From 0357e488b825313db3d574137337557f404e59ed Mon Sep 17 00:00:00 2001 From: Stefan Popa Date: Wed, 11 Apr 2018 14:53:17 +0300 Subject: iio:dac:ad5686: Refactor the driver In this patch restructures the existing ad5686 driver by adding a module for SPI and a header file, while the baseline module deals with the chip-logic. This is a necessary step, as this driver should support in the future similar devices which differ only in the type of interface used (I2C instead of SPI). Signed-off-by: Stefan Popa Signed-off-by: Jonathan Cameron --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 473ac00dcfb4..637e62d5f7ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -791,6 +791,13 @@ M: Michael Hanselmann S: Supported F: drivers/macintosh/ams/ +ANALOG DEVICES INC AD5686 DRIVER +M: Stefan Popa +L: linux-pm@vger.kernel.org +W: http://ez.analog.com/community/linux-device-drivers +S: Supported +F: drivers/iio/dac/ad5686* + ANALOG DEVICES INC AD9389B DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org -- cgit v1.2.3-70-g09d2 From 4177381b440130ccb686712aaa09b45539114698 Mon Sep 17 00:00:00 2001 From: Stefan Popa Date: Wed, 11 Apr 2018 14:53:39 +0300 Subject: iio:dac:ad5686: Add AD5671R/75R/94/94R/95R/96/96R support The AD5694/AD5694R/AD5695R/AD5696/AD5696R are a family of 4 channel DACs with 12-bit, 14-bit and 16-bit precision respectively. The devices have either no built-in reference, or built-in 2.5V reference. The AD5671R/AD5675R are similar, except that they have 8 instead of 4 channels. These devices are similar to AD5672R/AD5676/AD5676R and AD5684/AD5684R/AD5684/AD5685R/AD5686/AD5686R, except that they use i2c instead of spi. Datasheets: http://www.analog.com/media/en/technical-documentation/data-sheets/AD5671R_5675R.pdf http://www.analog.com/media/en/technical-documentation/data-sheets/AD5696R_5695R_5694R.pdf Signed-off-by: Stefan Popa Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 + drivers/iio/dac/Kconfig | 10 +++++ drivers/iio/dac/Makefile | 1 + drivers/iio/dac/ad5686.c | 28 +++++++++++++ drivers/iio/dac/ad5686.h | 7 ++++ drivers/iio/dac/ad5696-i2c.c | 97 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+) create mode 100644 drivers/iio/dac/ad5696-i2c.c (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 637e62d5f7ee..002cb013b000 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -797,6 +797,7 @@ L: linux-pm@vger.kernel.org W: http://ez.analog.com/community/linux-device-drivers S: Supported F: drivers/iio/dac/ad5686* +F: drivers/iio/dac/ad5696* ANALOG DEVICES INC AD9389B DRIVER M: Hans Verkuil diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig index 7a81f1e11b22..3ff8a32f1385 100644 --- a/drivers/iio/dac/Kconfig +++ b/drivers/iio/dac/Kconfig @@ -145,6 +145,16 @@ config AD5686_SPI To compile this driver as a module, choose M here: the module will be called ad5686. +config AD5696_I2C + tristate "Analog Devices AD5696 and similar multi-channel DACs (I2C)" + depends on I2C + select AD5686 + help + Say yes here to build support for Analog Devices AD5671R, AD5675R, + AD5694, AD5694R, AD5695R, AD5696, AD5696R Voltage Output Digital to + Analog Converter. + To compile this driver as a module, choose M here: the module will be + called ad5696. config AD5755 tristate "Analog Devices AD5755/AD5755-1/AD5757/AD5735/AD5737 DAC driver" diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile index 07db92e19490..4397e2114344 100644 --- a/drivers/iio/dac/Makefile +++ b/drivers/iio/dac/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_AD5764) += ad5764.o obj-$(CONFIG_AD5791) += ad5791.o obj-$(CONFIG_AD5686) += ad5686.o obj-$(CONFIG_AD5686_SPI) += ad5686-spi.o +obj-$(CONFIG_AD5696_I2C) += ad5696-i2c.o obj-$(CONFIG_AD7303) += ad7303.o obj-$(CONFIG_AD8801) += ad8801.o obj-$(CONFIG_CIO_DAC) += cio-dac.o diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index 79abff55a702..89c5f089ae7f 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -202,11 +202,21 @@ DECLARE_AD5686_CHANNELS(ad5685r_channels, 14, 2); DECLARE_AD5686_CHANNELS(ad5686_channels, 16, 0); static const struct ad5686_chip_info ad5686_chip_info_tbl[] = { + [ID_AD5671R] = { + .channels = ad5672_channels, + .int_vref_mv = 2500, + .num_channels = 8, + }, [ID_AD5672R] = { .channels = ad5672_channels, .int_vref_mv = 2500, .num_channels = 8, }, + [ID_AD5675R] = { + .channels = ad5676_channels, + .int_vref_mv = 2500, + .num_channels = 8, + }, [ID_AD5676] = { .channels = ad5676_channels, .num_channels = 8, @@ -239,6 +249,24 @@ static const struct ad5686_chip_info ad5686_chip_info_tbl[] = { .int_vref_mv = 2500, .num_channels = 4, }, + [ID_AD5694] = { + .channels = ad5684_channels, + .num_channels = 4, + }, + [ID_AD5694R] = { + .channels = ad5684_channels, + .int_vref_mv = 2500, + .num_channels = 4, + }, + [ID_AD5696] = { + .channels = ad5686_channels, + .num_channels = 4, + }, + [ID_AD5696R] = { + .channels = ad5686_channels, + .int_vref_mv = 2500, + .num_channels = 4, + }, }; int ad5686_probe(struct device *dev, diff --git a/drivers/iio/dac/ad5686.h b/drivers/iio/dac/ad5686.h index c8e1565391ca..05f0ce9d2de1 100644 --- a/drivers/iio/dac/ad5686.h +++ b/drivers/iio/dac/ad5686.h @@ -39,7 +39,9 @@ * ad5686_supported_device_ids: */ enum ad5686_supported_device_ids { + ID_AD5671R, ID_AD5672R, + ID_AD5675R, ID_AD5676, ID_AD5676R, ID_AD5684, @@ -47,6 +49,11 @@ enum ad5686_supported_device_ids { ID_AD5685R, ID_AD5686, ID_AD5686R, + ID_AD5694, + ID_AD5694R, + ID_AD5695R, + ID_AD5696, + ID_AD5696R, }; struct ad5686_state; diff --git a/drivers/iio/dac/ad5696-i2c.c b/drivers/iio/dac/ad5696-i2c.c new file mode 100644 index 000000000000..275e0321bcf8 --- /dev/null +++ b/drivers/iio/dac/ad5696-i2c.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * AD5671R, AD5675R, AD5694, AD5694R, AD5695R, AD5696, AD5696R + * Digital to analog converters driver + * + * Copyright 2018 Analog Devices Inc. + */ + +#include "ad5686.h" + +#include +#include + +static int ad5686_i2c_read(struct ad5686_state *st, u8 addr) +{ + struct i2c_client *i2c = to_i2c_client(st->dev); + struct i2c_msg msg[2] = { + { + .addr = i2c->addr, + .flags = i2c->flags, + .len = 3, + .buf = &st->data[0].d8[1], + }, + { + .addr = i2c->addr, + .flags = i2c->flags | I2C_M_RD, + .len = 2, + .buf = (char *)&st->data[0].d16, + }, + }; + int ret; + + st->data[0].d32 = cpu_to_be32(AD5686_CMD(AD5686_CMD_NOOP) | + AD5686_ADDR(addr) | + 0x00); + + ret = i2c_transfer(i2c->adapter, msg, 2); + if (ret < 0) + return ret; + + return be16_to_cpu(st->data[0].d16); +} + +static int ad5686_i2c_write(struct ad5686_state *st, + u8 cmd, u8 addr, u16 val) +{ + struct i2c_client *i2c = to_i2c_client(st->dev); + int ret; + + st->data[0].d32 = cpu_to_be32(AD5686_CMD(cmd) | AD5686_ADDR(addr) + | val); + + ret = i2c_master_send(i2c, &st->data[0].d8[1], 3); + if (ret < 0) + return ret; + + return (ret != 3) ? -EIO : 0; +} + +static int ad5686_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + return ad5686_probe(&i2c->dev, id->driver_data, id->name, + ad5686_i2c_write, ad5686_i2c_read); +} + +static int ad5686_i2c_remove(struct i2c_client *i2c) +{ + return ad5686_remove(&i2c->dev); +} + +static const struct i2c_device_id ad5686_i2c_id[] = { + {"ad5671r", ID_AD5671R}, + {"ad5675r", ID_AD5675R}, + {"ad5694", ID_AD5694}, + {"ad5694r", ID_AD5694R}, + {"ad5695r", ID_AD5695R}, + {"ad5696", ID_AD5696}, + {"ad5696r", ID_AD5696R}, + {} +}; +MODULE_DEVICE_TABLE(i2c, ad5686_i2c_id); + +static struct i2c_driver ad5686_i2c_driver = { + .driver = { + .name = "ad5696", + }, + .probe = ad5686_i2c_probe, + .remove = ad5686_i2c_remove, + .id_table = ad5686_i2c_id, +}; + +module_i2c_driver(ad5686_i2c_driver); + +MODULE_AUTHOR("Stefan Popa "); +MODULE_DESCRIPTION("Analog Devices AD5686 and similar multi-channel DACs"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From 7fd899fff5907dbb02089494102ef628988f2330 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Mon, 23 Apr 2018 11:55:01 +0800 Subject: MAINTAINERS: add maintainer for the DPAA2 PTP clock driver This patch is to add maintainer for the DPAA2 PTP clock driver. Signed-off-by: Yangbo Lu Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 0a1410d5a621..7733efa0db92 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4395,6 +4395,12 @@ L: linux-kernel@vger.kernel.org S: Maintained F: drivers/staging/fsl-dpaa2/ethsw +DPAA2 PTP CLOCK DRIVER +M: Yangbo Lu +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/staging/fsl-dpaa2/rtc + DPT_I2O SCSI RAID DRIVER M: Adaptec OEM Raid Solutions L: linux-scsi@vger.kernel.org -- cgit v1.2.3-70-g09d2 From 1351b50cc4b8abcab128febf3a27d01af44697b3 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 23 Apr 2018 23:08:06 +0200 Subject: dt-bindings: iio: afe: add binding for current-sense-shunt An ADC is often used to measure other quantities indirectly. This binding describe one cases, a current through a shunt resistor measured by the voltage over it. Signed-off-by: Peter Rosin Reviewed-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../bindings/iio/afe/current-sense-shunt.txt | 41 ++++++++++++++++++++++ MAINTAINERS | 6 ++++ 2 files changed, 47 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt (limited to 'MAINTAINERS') diff --git a/Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt b/Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt new file mode 100644 index 000000000000..8e7b3e408a52 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt @@ -0,0 +1,41 @@ +Current Sense Shunt +=================== + +When an io-channel measures the voltage over a current sense shunt, +the interesting mesaurement is almost always the current through the +shunt, not the voltage over it. This binding describes such a current +sense circuit. + +Required properties: +- compatible : "current-sense-shunt" +- io-channels : Channel node of a voltage io-channel. +- shunt-resistor-micro-ohms : The shunt resistance in microohms. + +Example: +The system current is measured by measuring the voltage over a +3.3 ohms shunt resistor. + +sysi { + compatible = "current-sense-shunt"; + io-channels = <&tiadc 0>; + + /* Divide the voltage by 3300000/1000000 (or 3.3) for the current. */ + shunt-resistor-micro-ohms = <3300000>; +}; + +&i2c { + tiadc: adc@48 { + compatible = "ti,ads1015"; + reg = <0x48>; + #io-channel-cells = <1>; + + #address-cells = <1>; + #size-cells = <0>; + + channel@0 { /* IN0,IN1 differential */ + reg = <0>; + ti,gain = <1>; + ti,datarate = <4>; + }; + }; +}; diff --git a/MAINTAINERS b/MAINTAINERS index 002cb013b000..d3052bd4a752 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6898,6 +6898,12 @@ F: drivers/staging/iio/ F: include/linux/iio/ F: tools/iio/ +IIO UNIT CONVERTER +M: Peter Rosin +L: linux-iio@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt + IKANOS/ADI EAGLE ADSL USB DRIVER M: Matthieu Castet M: Stanislaw Gruszka -- cgit v1.2.3-70-g09d2 From ff915802fb7f2bb2fa9890cb88dab9cdabb466b8 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 23 Apr 2018 23:08:07 +0200 Subject: dt-bindings: iio: afe: add binding for voltage-divider An ADC is often used to measure other quantities indirectly. This binding describe one cases, a "big" voltage measured with the help of a voltage divider. Signed-off-by: Peter Rosin Reviewed-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../bindings/iio/afe/voltage-divider.txt | 53 ++++++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 54 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/afe/voltage-divider.txt (limited to 'MAINTAINERS') diff --git a/Documentation/devicetree/bindings/iio/afe/voltage-divider.txt b/Documentation/devicetree/bindings/iio/afe/voltage-divider.txt new file mode 100644 index 000000000000..b452a8406107 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/afe/voltage-divider.txt @@ -0,0 +1,53 @@ +Voltage divider +=============== + +When an io-channel measures the midpoint of a voltage divider, the +interesting voltage is often the voltage over the full resistance +of the divider. This binding describes the voltage divider in such +a curcuit. + + Vin ----. + | + .-----. + | R | + '-----' + | + +---- Vout + | + .-----. + | Rout| + '-----' + | + GND + +Required properties: +- compatible : "voltage-divider" +- io-channels : Channel node of a voltage io-channel measuring Vout. +- output-ohms : Resistance Rout over which the output voltage is measured. + See full-ohms. +- full-ohms : Resistance R + Rout for the full divider. The io-channel + is scaled by the Rout / (R + Rout) quotient. + +Example: +The system voltage is circa 12V, but divided down with a 22/222 +voltage divider (R = 200 Ohms, Rout = 22 Ohms) and fed to an ADC. + +sysv { + compatible = "voltage-divider"; + io-channels = <&maxadc 1>; + + /* Scale the system voltage by 22/222 to fit the ADC range. */ + output-ohms = <22>; + full-ohms = <222>; /* 200 + 22 */ +}; + +&spi { + maxadc: adc@0 { + compatible = "maxim,max1027"; + reg = <0>; + #io-channel-cells = <1>; + interrupt-parent = <&gpio5>; + interrupts = <15 IRQ_TYPE_EDGE_RISING>; + spi-max-frequency = <1000000>; + }; +}; diff --git a/MAINTAINERS b/MAINTAINERS index d3052bd4a752..35987f60649b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6903,6 +6903,7 @@ M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt +F: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt IKANOS/ADI EAGLE ADSL USB DRIVER M: Matthieu Castet -- cgit v1.2.3-70-g09d2 From 2e9a128f359c1baa8f0fbfdb95a1b40f84244801 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 23 Apr 2018 23:08:08 +0200 Subject: dt-bindings: iio: afe: add binding for current-sense-amplifier Similar to current sense shunts, but an amplifier enables the use of a smaller sense resistance. Signed-off-by: Peter Rosin Reviewed-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../bindings/iio/afe/current-sense-amplifier.txt | 26 ++++++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 27 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt (limited to 'MAINTAINERS') diff --git a/Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt b/Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt new file mode 100644 index 000000000000..0ddbaebba8ce --- /dev/null +++ b/Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt @@ -0,0 +1,26 @@ +Current Sense Amplifier +======================= + +When an io-channel measures the output voltage from a current sense +amplifier, the interesting mesaurement is almost always the current +through the sense resistor, not the voltage output. This binding +describes such a current sense circuit. + +Required properties: +- compatible : "current-sense-amplifier" +- io-channels : Channel node of a voltage io-channel. +- sense-resistor-micro-ohms : The sense resistance in microohms. + +Optional properties: +- sense-gain-mult: Amplifier gain multiplier. The default is <1>. +- sense-gain-div: Amplifier gain divider. The default is <1>. + +Example: + +sysi { + compatible = "current-sense-amplifier"; + io-channels = <&tiadc 0>; + + sense-resistor-micro-ohms = <20000>; + sense-gain-mul = <50>; +}; diff --git a/MAINTAINERS b/MAINTAINERS index 35987f60649b..d1c0f58cf8a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6902,6 +6902,7 @@ IIO UNIT CONVERTER M: Peter Rosin L: linux-iio@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt F: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt -- cgit v1.2.3-70-g09d2 From 8b74816b5a9adac4629f0f072c122d57b8f0eb78 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 23 Apr 2018 23:08:09 +0200 Subject: iio: afe: rescale: new driver If an ADC channel measures the midpoint of a voltage divider, the interesting voltage is often the voltage over the full resistance. E.g. if the full voltage is too big for the ADC to handle. Likewise, if an ADC channel measures the voltage across a shunt resistor, with or without amplification, the interesting value is often the current through the resistor. This driver solves these problems by allowing to linearly scale a channel and/or by allowing changes to the type of the channel. Signed-off-by: Peter Rosin Signed-off-by: Jonathan Cameron --- MAINTAINERS | 1 + drivers/iio/Kconfig | 1 + drivers/iio/Makefile | 1 + drivers/iio/afe/Kconfig | 19 +++ drivers/iio/afe/Makefile | 6 + drivers/iio/afe/iio-rescale.c | 359 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 387 insertions(+) create mode 100644 drivers/iio/afe/Kconfig create mode 100644 drivers/iio/afe/Makefile create mode 100644 drivers/iio/afe/iio-rescale.c (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index d1c0f58cf8a0..ad4c68af122a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6905,6 +6905,7 @@ S: Maintained F: Documentation/devicetree/bindings/iio/afe/current-sense-amplifier.txt F: Documentation/devicetree/bindings/iio/afe/current-sense-shunt.txt F: Documentation/devicetree/bindings/iio/afe/voltage-divider.txt +F: drivers/iio/afe/iio-rescale.c IKANOS/ADI EAGLE ADSL USB DRIVER M: Matthieu Castet diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig index b3c8c6ef0dff..d69e85a8bdc3 100644 --- a/drivers/iio/Kconfig +++ b/drivers/iio/Kconfig @@ -70,6 +70,7 @@ config IIO_TRIGGERED_EVENT source "drivers/iio/accel/Kconfig" source "drivers/iio/adc/Kconfig" +source "drivers/iio/afe/Kconfig" source "drivers/iio/amplifiers/Kconfig" source "drivers/iio/chemical/Kconfig" source "drivers/iio/common/Kconfig" diff --git a/drivers/iio/Makefile b/drivers/iio/Makefile index b16b2e9ddc40..d8cba9c229c0 100644 --- a/drivers/iio/Makefile +++ b/drivers/iio/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_IIO_TRIGGERED_EVENT) += industrialio-triggered-event.o obj-y += accel/ obj-y += adc/ +obj-y += afe/ obj-y += amplifiers/ obj-y += buffer/ obj-y += chemical/ diff --git a/drivers/iio/afe/Kconfig b/drivers/iio/afe/Kconfig new file mode 100644 index 000000000000..c91eef04825a --- /dev/null +++ b/drivers/iio/afe/Kconfig @@ -0,0 +1,19 @@ +# +# Analog Front End drivers +# +# When adding new entries keep the list in alphabetical order + +menu "Analog Front Ends" + +config IIO_RESCALE + tristate "IIO rescale" + depends on OF || COMPILE_TEST + help + Say yes here to build support for the IIO rescaling + that handles voltage dividers, current sense shunts and + current sense amplifiers. + + To compile this driver as a module, choose M here: the + module will be called iio-rescale. + +endmenu diff --git a/drivers/iio/afe/Makefile b/drivers/iio/afe/Makefile new file mode 100644 index 000000000000..5fabb7bcac47 --- /dev/null +++ b/drivers/iio/afe/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for industrial I/O Analog Front Ends (AFE) +# + +# When adding new entries keep the list in alphabetical order +obj-$(CONFIG_IIO_RESCALE) += iio-rescale.o diff --git a/drivers/iio/afe/iio-rescale.c b/drivers/iio/afe/iio-rescale.c new file mode 100644 index 000000000000..e9ceee66d1e7 --- /dev/null +++ b/drivers/iio/afe/iio-rescale.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IIO rescale driver + * + * Copyright (C) 2018 Axentia Technologies AB + * + * Author: Peter Rosin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct rescale; + +struct rescale_cfg { + enum iio_chan_type type; + int (*props)(struct device *dev, struct rescale *rescale); +}; + +struct rescale { + const struct rescale_cfg *cfg; + struct iio_channel *source; + struct iio_chan_spec chan; + struct iio_chan_spec_ext_info *ext_info; + s32 numerator; + s32 denominator; +}; + +static int rescale_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + struct rescale *rescale = iio_priv(indio_dev); + unsigned long long tmp; + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + return iio_read_channel_raw(rescale->source, val); + + case IIO_CHAN_INFO_SCALE: + ret = iio_read_channel_scale(rescale->source, val, val2); + switch (ret) { + case IIO_VAL_FRACTIONAL: + *val *= rescale->numerator; + *val2 *= rescale->denominator; + return ret; + case IIO_VAL_INT: + *val *= rescale->numerator; + if (rescale->denominator == 1) + return ret; + *val2 = rescale->denominator; + return IIO_VAL_FRACTIONAL; + case IIO_VAL_FRACTIONAL_LOG2: + tmp = *val * 1000000000LL; + do_div(tmp, rescale->denominator); + tmp *= rescale->numerator; + do_div(tmp, 1000000000LL); + *val = tmp; + return ret; + default: + return -EOPNOTSUPP; + } + default: + return -EINVAL; + } +} + +static int rescale_read_avail(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + const int **vals, int *type, int *length, + long mask) +{ + struct rescale *rescale = iio_priv(indio_dev); + + switch (mask) { + case IIO_CHAN_INFO_RAW: + *type = IIO_VAL_INT; + return iio_read_avail_channel_raw(rescale->source, + vals, length); + default: + return -EINVAL; + } +} + +static const struct iio_info rescale_info = { + .read_raw = rescale_read_raw, + .read_avail = rescale_read_avail, +}; + +static ssize_t rescale_read_ext_info(struct iio_dev *indio_dev, + uintptr_t private, + struct iio_chan_spec const *chan, + char *buf) +{ + struct rescale *rescale = iio_priv(indio_dev); + + return iio_read_channel_ext_info(rescale->source, + rescale->ext_info[private].name, + buf); +} + +static ssize_t rescale_write_ext_info(struct iio_dev *indio_dev, + uintptr_t private, + struct iio_chan_spec const *chan, + const char *buf, size_t len) +{ + struct rescale *rescale = iio_priv(indio_dev); + + return iio_write_channel_ext_info(rescale->source, + rescale->ext_info[private].name, + buf, len); +} + +static int rescale_configure_channel(struct device *dev, + struct rescale *rescale) +{ + struct iio_chan_spec *chan = &rescale->chan; + struct iio_chan_spec const *schan = rescale->source->channel; + + chan->indexed = 1; + chan->output = schan->output; + chan->ext_info = rescale->ext_info; + chan->type = rescale->cfg->type; + + if (!iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) || + !iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) { + dev_err(dev, "source channel does not support raw/scale\n"); + return -EINVAL; + } + + chan->info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_SCALE); + + if (iio_channel_has_available(schan, IIO_CHAN_INFO_RAW)) + chan->info_mask_separate_available |= BIT(IIO_CHAN_INFO_RAW); + + return 0; +} + +static int rescale_current_sense_amplifier_props(struct device *dev, + struct rescale *rescale) +{ + u32 sense; + u32 gain_mult = 1; + u32 gain_div = 1; + u32 factor; + int ret; + + ret = device_property_read_u32(dev, "sense-resistor-micro-ohms", + &sense); + if (ret) { + dev_err(dev, "failed to read the sense resistance: %d\n", ret); + return ret; + } + + device_property_read_u32(dev, "sense-gain-mult", &gain_mult); + device_property_read_u32(dev, "sense-gain-div", &gain_div); + + /* + * Calculate the scaling factor, 1 / (gain * sense), or + * gain_div / (gain_mult * sense), while trying to keep the + * numerator/denominator from overflowing. + */ + factor = gcd(sense, 1000000); + rescale->numerator = 1000000 / factor; + rescale->denominator = sense / factor; + + factor = gcd(rescale->numerator, gain_mult); + rescale->numerator /= factor; + rescale->denominator *= gain_mult / factor; + + factor = gcd(rescale->denominator, gain_div); + rescale->numerator *= gain_div / factor; + rescale->denominator /= factor; + + return 0; +} + +static int rescale_current_sense_shunt_props(struct device *dev, + struct rescale *rescale) +{ + u32 shunt; + u32 factor; + int ret; + + ret = device_property_read_u32(dev, "shunt-resistor-micro-ohms", + &shunt); + if (ret) { + dev_err(dev, "failed to read the shunt resistance: %d\n", ret); + return ret; + } + + factor = gcd(shunt, 1000000); + rescale->numerator = 1000000 / factor; + rescale->denominator = shunt / factor; + + return 0; +} + +static int rescale_voltage_divider_props(struct device *dev, + struct rescale *rescale) +{ + int ret; + u32 factor; + + ret = device_property_read_u32(dev, "output-ohms", + &rescale->denominator); + if (ret) { + dev_err(dev, "failed to read output-ohms: %d\n", ret); + return ret; + } + + ret = device_property_read_u32(dev, "full-ohms", + &rescale->numerator); + if (ret) { + dev_err(dev, "failed to read full-ohms: %d\n", ret); + return ret; + } + + factor = gcd(rescale->numerator, rescale->denominator); + rescale->numerator /= factor; + rescale->denominator /= factor; + + return 0; +} + +enum rescale_variant { + CURRENT_SENSE_AMPLIFIER, + CURRENT_SENSE_SHUNT, + VOLTAGE_DIVIDER, +}; + +static const struct rescale_cfg rescale_cfg[] = { + [CURRENT_SENSE_AMPLIFIER] = { + .type = IIO_CURRENT, + .props = rescale_current_sense_amplifier_props, + }, + [CURRENT_SENSE_SHUNT] = { + .type = IIO_CURRENT, + .props = rescale_current_sense_shunt_props, + }, + [VOLTAGE_DIVIDER] = { + .type = IIO_VOLTAGE, + .props = rescale_voltage_divider_props, + }, +}; + +static const struct of_device_id rescale_match[] = { + { .compatible = "current-sense-amplifier", + .data = &rescale_cfg[CURRENT_SENSE_AMPLIFIER], }, + { .compatible = "current-sense-shunt", + .data = &rescale_cfg[CURRENT_SENSE_SHUNT], }, + { .compatible = "voltage-divider", + .data = &rescale_cfg[VOLTAGE_DIVIDER], }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rescale_match); + +static int rescale_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct iio_dev *indio_dev; + struct iio_channel *source; + struct rescale *rescale; + int sizeof_ext_info; + int sizeof_priv; + int i; + int ret; + + source = devm_iio_channel_get(dev, NULL); + if (IS_ERR(source)) { + if (PTR_ERR(source) != -EPROBE_DEFER) + dev_err(dev, "failed to get source channel\n"); + return PTR_ERR(source); + } + + sizeof_ext_info = iio_get_channel_ext_info_count(source); + if (sizeof_ext_info) { + sizeof_ext_info += 1; /* one extra entry for the sentinel */ + sizeof_ext_info *= sizeof(*rescale->ext_info); + } + + sizeof_priv = sizeof(*rescale) + sizeof_ext_info; + + indio_dev = devm_iio_device_alloc(dev, sizeof_priv); + if (!indio_dev) + return -ENOMEM; + + rescale = iio_priv(indio_dev); + + rescale->cfg = of_device_get_match_data(dev); + rescale->numerator = 1; + rescale->denominator = 1; + + ret = rescale->cfg->props(dev, rescale); + if (ret) + return ret; + + if (!rescale->numerator || !rescale->denominator) { + dev_err(dev, "invalid scaling factor.\n"); + return -EINVAL; + } + + platform_set_drvdata(pdev, indio_dev); + + rescale->source = source; + + indio_dev->name = dev_name(dev); + indio_dev->dev.parent = dev; + indio_dev->info = &rescale_info; + indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = &rescale->chan; + indio_dev->num_channels = 1; + if (sizeof_ext_info) { + rescale->ext_info = devm_kmemdup(dev, + source->channel->ext_info, + sizeof_ext_info, GFP_KERNEL); + if (!rescale->ext_info) + return -ENOMEM; + + for (i = 0; rescale->ext_info[i].name; ++i) { + struct iio_chan_spec_ext_info *ext_info = + &rescale->ext_info[i]; + + if (source->channel->ext_info[i].read) + ext_info->read = rescale_read_ext_info; + if (source->channel->ext_info[i].write) + ext_info->write = rescale_write_ext_info; + ext_info->private = i; + } + } + + ret = rescale_configure_channel(dev, rescale); + if (ret) + return ret; + + return devm_iio_device_register(dev, indio_dev); +} + +static struct platform_driver rescale_driver = { + .probe = rescale_probe, + .driver = { + .name = "iio-rescale", + .of_match_table = rescale_match, + }, +}; +module_platform_driver(rescale_driver); + +MODULE_DESCRIPTION("IIO rescale driver"); +MODULE_AUTHOR("Peter Rosin "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From be65f9ed267fd7d8b3146b7c4be9ecdd3e0aa3ed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Jun 2018 10:59:48 +0200 Subject: staging: lustre: delete the filesystem from the tree. The Lustre filesystem has been in the kernel tree for over 5 years now. While it has been an endless source of enjoyment for new kernel developers learning how to do basic codingstyle cleanups, as well as an semi-entertaining source of bewilderment from the vfs developers any time they have looked into the codebase to try to figure out how to port their latest api changes to this filesystem, it has not really moved forward into the "this is in shape to get out of staging" despite many half-completed attempts. And getting code out of staging is the main goal of that portion of the kernel tree. Code should not stagnate and it feels like having this code in staging is only causing the development cycle of the filesystem to take longer than it should. There is a whole separate out-of-tree copy of this codebase where the developers work on it, and then random changes are thrown over the wall at staging at some later point in time. This dual-tree development model has never worked, and the state of this codebase is proof of that. So, let's just delete the whole mess. Now the lustre developers can go off and work in their out-of-tree codebase and not have to worry about providing valid changelog entries and breaking their patches up into logical pieces. They can take the time they have spend doing those types of housekeeping chores and get the codebase into a much better shape, and it can be submitted for inclusion into the real part of the kernel tree when ready. Cc: Oleg Drokin Cc: Andreas Dilger Cc: James Simmons Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 9 - drivers/staging/Kconfig | 2 - drivers/staging/Makefile | 1 - drivers/staging/lustre/Kconfig | 3 - drivers/staging/lustre/Makefile | 2 - drivers/staging/lustre/README.txt | 83 - drivers/staging/lustre/TODO | 302 -- .../staging/lustre/include/linux/libcfs/libcfs.h | 76 - .../lustre/include/linux/libcfs/libcfs_cpu.h | 434 -- .../lustre/include/linux/libcfs/libcfs_crypto.h | 208 - .../lustre/include/linux/libcfs/libcfs_debug.h | 207 - .../lustre/include/linux/libcfs/libcfs_fail.h | 194 - .../lustre/include/linux/libcfs/libcfs_hash.h | 869 ---- .../lustre/include/linux/libcfs/libcfs_private.h | 200 - .../lustre/include/linux/libcfs/libcfs_string.h | 102 - drivers/staging/lustre/include/linux/lnet/api.h | 212 - .../staging/lustre/include/linux/lnet/lib-lnet.h | 652 --- .../staging/lustre/include/linux/lnet/lib-types.h | 666 ---- .../staging/lustre/include/linux/lnet/socklnd.h | 87 - .../lustre/include/uapi/linux/lnet/libcfs_debug.h | 149 - .../lustre/include/uapi/linux/lnet/libcfs_ioctl.h | 141 - .../lustre/include/uapi/linux/lnet/lnet-dlc.h | 150 - .../lustre/include/uapi/linux/lnet/lnet-types.h | 669 ---- .../lustre/include/uapi/linux/lnet/lnetctl.h | 123 - .../lustre/include/uapi/linux/lnet/lnetst.h | 556 --- .../lustre/include/uapi/linux/lnet/nidstr.h | 119 - .../lustre/include/uapi/linux/lnet/socklnd.h | 44 - .../lustre/include/uapi/linux/lustre/lustre_cfg.h | 261 -- .../lustre/include/uapi/linux/lustre/lustre_fid.h | 293 -- .../include/uapi/linux/lustre/lustre_fiemap.h | 72 - .../lustre/include/uapi/linux/lustre/lustre_idl.h | 2690 ------------- .../include/uapi/linux/lustre/lustre_ioctl.h | 229 -- .../include/uapi/linux/lustre/lustre_kernelcomm.h | 94 - .../include/uapi/linux/lustre/lustre_ostid.h | 236 -- .../include/uapi/linux/lustre/lustre_param.h | 94 - .../lustre/include/uapi/linux/lustre/lustre_user.h | 1327 ------ .../lustre/include/uapi/linux/lustre/lustre_ver.h | 27 - drivers/staging/lustre/lnet/Kconfig | 46 - drivers/staging/lustre/lnet/Makefile | 1 - drivers/staging/lustre/lnet/klnds/Makefile | 1 - drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile | 5 - .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 2958 -------------- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 1048 ----- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 3763 ----------------- .../lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c | 296 -- drivers/staging/lustre/lnet/klnds/socklnd/Makefile | 6 - .../staging/lustre/lnet/klnds/socklnd/socklnd.c | 2921 -------------- .../staging/lustre/lnet/klnds/socklnd/socklnd.h | 704 ---- .../staging/lustre/lnet/klnds/socklnd/socklnd_cb.c | 2586 ------------ .../lustre/lnet/klnds/socklnd/socklnd_lib.c | 534 --- .../lustre/lnet/klnds/socklnd/socklnd_modparams.c | 184 - .../lustre/lnet/klnds/socklnd/socklnd_proto.c | 810 ---- drivers/staging/lustre/lnet/libcfs/Makefile | 16 - drivers/staging/lustre/lnet/libcfs/debug.c | 461 --- drivers/staging/lustre/lnet/libcfs/fail.c | 146 - drivers/staging/lustre/lnet/libcfs/hash.c | 2065 ---------- drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 1086 ----- drivers/staging/lustre/lnet/libcfs/libcfs_lock.c | 155 - drivers/staging/lustre/lnet/libcfs/libcfs_mem.c | 171 - drivers/staging/lustre/lnet/libcfs/libcfs_string.c | 562 --- .../lustre/lnet/libcfs/linux-crypto-adler.c | 139 - drivers/staging/lustre/lnet/libcfs/linux-crypto.c | 447 --- drivers/staging/lustre/lnet/libcfs/linux-crypto.h | 30 - drivers/staging/lustre/lnet/libcfs/linux-debug.c | 142 - .../staging/lustre/lnet/libcfs/linux-tracefile.c | 258 -- drivers/staging/lustre/lnet/libcfs/module.c | 758 ---- drivers/staging/lustre/lnet/libcfs/tracefile.c | 1198 ------ drivers/staging/lustre/lnet/libcfs/tracefile.h | 274 -- drivers/staging/lustre/lnet/lnet/Makefile | 10 - drivers/staging/lustre/lnet/lnet/acceptor.c | 501 --- drivers/staging/lustre/lnet/lnet/api-ni.c | 2307 ----------- drivers/staging/lustre/lnet/lnet/config.c | 1235 ------ drivers/staging/lustre/lnet/lnet/lib-eq.c | 426 -- drivers/staging/lustre/lnet/lnet/lib-md.c | 463 --- drivers/staging/lustre/lnet/lnet/lib-me.c | 274 -- drivers/staging/lustre/lnet/lnet/lib-move.c | 2386 ----------- drivers/staging/lustre/lnet/lnet/lib-msg.c | 625 --- drivers/staging/lustre/lnet/lnet/lib-ptl.c | 987 ----- drivers/staging/lustre/lnet/lnet/lib-socket.c | 585 --- drivers/staging/lustre/lnet/lnet/lo.c | 105 - drivers/staging/lustre/lnet/lnet/module.c | 239 -- drivers/staging/lustre/lnet/lnet/net_fault.c | 1023 ----- drivers/staging/lustre/lnet/lnet/nidstrings.c | 1261 ------ drivers/staging/lustre/lnet/lnet/peer.c | 456 --- drivers/staging/lustre/lnet/lnet/router.c | 1799 --------- drivers/staging/lustre/lnet/lnet/router_proc.c | 907 ----- drivers/staging/lustre/lnet/selftest/Makefile | 7 - drivers/staging/lustre/lnet/selftest/brw_test.c | 526 --- drivers/staging/lustre/lnet/selftest/conctl.c | 801 ---- drivers/staging/lustre/lnet/selftest/conrpc.c | 1396 ------- drivers/staging/lustre/lnet/selftest/conrpc.h | 142 - drivers/staging/lustre/lnet/selftest/console.c | 2104 ---------- drivers/staging/lustre/lnet/selftest/console.h | 244 -- drivers/staging/lustre/lnet/selftest/framework.c | 1786 --------- drivers/staging/lustre/lnet/selftest/module.c | 169 - drivers/staging/lustre/lnet/selftest/ping_test.c | 228 -- drivers/staging/lustre/lnet/selftest/rpc.c | 1682 -------- drivers/staging/lustre/lnet/selftest/rpc.h | 295 -- drivers/staging/lustre/lnet/selftest/selftest.h | 622 --- drivers/staging/lustre/lnet/selftest/timer.c | 244 -- drivers/staging/lustre/lnet/selftest/timer.h | 50 - drivers/staging/lustre/lustre/Kconfig | 45 - drivers/staging/lustre/lustre/Makefile | 2 - drivers/staging/lustre/lustre/fid/Makefile | 5 - drivers/staging/lustre/lustre/fid/fid_internal.h | 46 - drivers/staging/lustre/lustre/fid/fid_lib.c | 87 - drivers/staging/lustre/lustre/fid/fid_request.c | 410 -- drivers/staging/lustre/lustre/fid/lproc_fid.c | 225 -- drivers/staging/lustre/lustre/fld/Makefile | 5 - drivers/staging/lustre/lustre/fld/fld_cache.c | 516 --- drivers/staging/lustre/lustre/fld/fld_internal.h | 170 - drivers/staging/lustre/lustre/fld/fld_request.c | 446 --- drivers/staging/lustre/lustre/fld/lproc_fld.c | 154 - drivers/staging/lustre/lustre/include/cl_object.h | 2463 ------------ .../staging/lustre/lustre/include/interval_tree.h | 119 - drivers/staging/lustre/lustre/include/llog_swab.h | 67 - .../staging/lustre/lustre/include/lprocfs_status.h | 646 --- drivers/staging/lustre/lustre/include/lu_object.h | 1305 ------ drivers/staging/lustre/lustre/include/lu_ref.h | 178 - drivers/staging/lustre/lustre/include/lustre_acl.h | 51 - .../staging/lustre/lustre/include/lustre_compat.h | 82 - .../staging/lustre/lustre/include/lustre_debug.h | 52 - .../staging/lustre/lustre/include/lustre_disk.h | 152 - drivers/staging/lustre/lustre/include/lustre_dlm.h | 1346 ------- .../lustre/lustre/include/lustre_dlm_flags.h | 402 -- .../staging/lustre/lustre/include/lustre_errno.h | 198 - .../staging/lustre/lustre/include/lustre_export.h | 250 -- drivers/staging/lustre/lustre/include/lustre_fid.h | 676 ---- drivers/staging/lustre/lustre/include/lustre_fld.h | 137 - drivers/staging/lustre/lustre/include/lustre_ha.h | 61 - .../staging/lustre/lustre/include/lustre_handles.h | 91 - .../staging/lustre/lustre/include/lustre_import.h | 369 -- .../staging/lustre/lustre/include/lustre_intent.h | 71 - .../lustre/lustre/include/lustre_kernelcomm.h | 56 - drivers/staging/lustre/lustre/include/lustre_lib.h | 126 - .../staging/lustre/lustre/include/lustre_linkea.h | 93 - drivers/staging/lustre/lustre/include/lustre_lmv.h | 174 - drivers/staging/lustre/lustre/include/lustre_log.h | 382 -- drivers/staging/lustre/lustre/include/lustre_mdc.h | 229 -- drivers/staging/lustre/lustre/include/lustre_mds.h | 62 - drivers/staging/lustre/lustre/include/lustre_net.h | 2360 ----------- drivers/staging/lustre/lustre/include/lustre_nrs.h | 718 ---- .../lustre/lustre/include/lustre_nrs_fifo.h | 71 - .../staging/lustre/lustre/include/lustre_obdo.h | 55 - .../lustre/include/lustre_patchless_compat.h | 68 - .../lustre/lustre/include/lustre_req_layout.h | 307 -- drivers/staging/lustre/lustre/include/lustre_sec.h | 1072 ----- .../staging/lustre/lustre/include/lustre_swab.h | 109 - drivers/staging/lustre/lustre/include/obd.h | 1114 ------ drivers/staging/lustre/lustre/include/obd_cksum.h | 153 - drivers/staging/lustre/lustre/include/obd_class.h | 1603 -------- .../staging/lustre/lustre/include/obd_support.h | 517 --- drivers/staging/lustre/lustre/include/seq_range.h | 200 - drivers/staging/lustre/lustre/ldlm/interval_tree.c | 599 --- drivers/staging/lustre/lustre/ldlm/l_lock.c | 73 - drivers/staging/lustre/lustre/ldlm/ldlm_extent.c | 258 -- drivers/staging/lustre/lustre/ldlm/ldlm_flock.c | 486 --- .../staging/lustre/lustre/ldlm/ldlm_inodebits.c | 69 - drivers/staging/lustre/lustre/ldlm/ldlm_internal.h | 342 -- drivers/staging/lustre/lustre/ldlm/ldlm_lib.c | 842 ---- drivers/staging/lustre/lustre/ldlm/ldlm_lock.c | 2135 ---------- drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c | 1163 ------ drivers/staging/lustre/lustre/ldlm/ldlm_plain.c | 68 - drivers/staging/lustre/lustre/ldlm/ldlm_pool.c | 1013 ----- drivers/staging/lustre/lustre/ldlm/ldlm_request.c | 2033 ---------- drivers/staging/lustre/lustre/ldlm/ldlm_resource.c | 1318 ------ drivers/staging/lustre/lustre/llite/Makefile | 13 - drivers/staging/lustre/lustre/llite/acl.c | 108 - drivers/staging/lustre/lustre/llite/dcache.c | 300 -- drivers/staging/lustre/lustre/llite/dir.c | 1708 -------- drivers/staging/lustre/lustre/llite/file.c | 3580 ----------------- drivers/staging/lustre/lustre/llite/glimpse.c | 205 - drivers/staging/lustre/lustre/llite/lcommon_cl.c | 292 -- drivers/staging/lustre/lustre/llite/lcommon_misc.c | 186 - .../staging/lustre/lustre/llite/llite_internal.h | 1344 ------- drivers/staging/lustre/lustre/llite/llite_lib.c | 2668 ------------- drivers/staging/lustre/lustre/llite/llite_mmap.c | 480 --- drivers/staging/lustre/lustre/llite/llite_nfs.c | 375 -- drivers/staging/lustre/lustre/llite/lproc_llite.c | 1659 -------- drivers/staging/lustre/lustre/llite/namei.c | 1207 ------ drivers/staging/lustre/lustre/llite/range_lock.c | 241 -- drivers/staging/lustre/lustre/llite/range_lock.h | 83 - drivers/staging/lustre/lustre/llite/rw.c | 1214 ------ drivers/staging/lustre/lustre/llite/rw26.c | 641 --- drivers/staging/lustre/lustre/llite/statahead.c | 1577 -------- drivers/staging/lustre/lustre/llite/super25.c | 189 - drivers/staging/lustre/lustre/llite/symlink.c | 159 - drivers/staging/lustre/lustre/llite/vvp_dev.c | 640 --- drivers/staging/lustre/lustre/llite/vvp_internal.h | 321 -- drivers/staging/lustre/lustre/llite/vvp_io.c | 1374 ------- drivers/staging/lustre/lustre/llite/vvp_lock.c | 87 - drivers/staging/lustre/lustre/llite/vvp_object.c | 303 -- drivers/staging/lustre/lustre/llite/vvp_page.c | 523 --- drivers/staging/lustre/lustre/llite/xattr.c | 665 ---- drivers/staging/lustre/lustre/llite/xattr_cache.c | 504 --- .../staging/lustre/lustre/llite/xattr_security.c | 96 - drivers/staging/lustre/lustre/lmv/Makefile | 5 - drivers/staging/lustre/lustre/lmv/lmv_fld.c | 82 - drivers/staging/lustre/lustre/lmv/lmv_intent.c | 521 --- drivers/staging/lustre/lustre/lmv/lmv_internal.h | 164 - drivers/staging/lustre/lustre/lmv/lmv_obd.c | 3131 --------------- drivers/staging/lustre/lustre/lmv/lproc_lmv.c | 173 - drivers/staging/lustre/lustre/lov/Makefile | 9 - .../staging/lustre/lustre/lov/lov_cl_internal.h | 639 --- drivers/staging/lustre/lustre/lov/lov_dev.c | 384 -- drivers/staging/lustre/lustre/lov/lov_ea.c | 331 -- drivers/staging/lustre/lustre/lov/lov_internal.h | 286 -- drivers/staging/lustre/lustre/lov/lov_io.c | 1023 ----- drivers/staging/lustre/lustre/lov/lov_lock.c | 348 -- drivers/staging/lustre/lustre/lov/lov_merge.c | 105 - drivers/staging/lustre/lustre/lov/lov_obd.c | 1444 ------- drivers/staging/lustre/lustre/lov/lov_object.c | 1625 -------- drivers/staging/lustre/lustre/lov/lov_offset.c | 269 -- drivers/staging/lustre/lustre/lov/lov_pack.c | 400 -- drivers/staging/lustre/lustre/lov/lov_page.c | 136 - drivers/staging/lustre/lustre/lov/lov_pool.c | 546 --- drivers/staging/lustre/lustre/lov/lov_request.c | 354 -- drivers/staging/lustre/lustre/lov/lovsub_dev.c | 147 - drivers/staging/lustre/lustre/lov/lovsub_lock.c | 81 - drivers/staging/lustre/lustre/lov/lovsub_object.c | 180 - drivers/staging/lustre/lustre/lov/lovsub_page.c | 68 - drivers/staging/lustre/lustre/lov/lproc_lov.c | 299 -- drivers/staging/lustre/lustre/mdc/Makefile | 5 - drivers/staging/lustre/lustre/mdc/lproc_mdc.c | 231 -- drivers/staging/lustre/lustre/mdc/mdc_internal.h | 144 - drivers/staging/lustre/lustre/mdc/mdc_lib.c | 498 --- drivers/staging/lustre/lustre/mdc/mdc_locks.c | 1239 ------ drivers/staging/lustre/lustre/mdc/mdc_reint.c | 421 -- drivers/staging/lustre/lustre/mdc/mdc_request.c | 2770 ------------- drivers/staging/lustre/lustre/mgc/Makefile | 5 - drivers/staging/lustre/lustre/mgc/lproc_mgc.c | 69 - drivers/staging/lustre/lustre/mgc/mgc_internal.h | 57 - drivers/staging/lustre/lustre/mgc/mgc_request.c | 1851 --------- drivers/staging/lustre/lustre/obdclass/Makefile | 12 - .../staging/lustre/lustre/obdclass/cl_internal.h | 95 - drivers/staging/lustre/lustre/obdclass/cl_io.c | 1151 ------ drivers/staging/lustre/lustre/obdclass/cl_lock.c | 275 -- drivers/staging/lustre/lustre/obdclass/cl_object.c | 1059 ----- drivers/staging/lustre/lustre/obdclass/cl_page.c | 1045 ----- drivers/staging/lustre/lustre/obdclass/class_obd.c | 544 --- drivers/staging/lustre/lustre/obdclass/debug.c | 96 - drivers/staging/lustre/lustre/obdclass/genops.c | 1480 ------- .../staging/lustre/lustre/obdclass/kernelcomm.c | 240 -- drivers/staging/lustre/lustre/obdclass/linkea.c | 249 -- .../lustre/lustre/obdclass/linux/linux-module.c | 514 --- .../lustre/lustre/obdclass/linux/linux-sysctl.c | 162 - drivers/staging/lustre/lustre/obdclass/llog.c | 524 --- drivers/staging/lustre/lustre/obdclass/llog_cat.c | 236 -- .../staging/lustre/lustre/obdclass/llog_internal.h | 79 - drivers/staging/lustre/lustre/obdclass/llog_obd.c | 225 -- drivers/staging/lustre/lustre/obdclass/llog_swab.c | 412 -- .../lustre/lustre/obdclass/lprocfs_counters.c | 134 - .../lustre/lustre/obdclass/lprocfs_status.c | 1698 -------- drivers/staging/lustre/lustre/obdclass/lu_object.c | 2056 ---------- drivers/staging/lustre/lustre/obdclass/lu_ref.c | 45 - .../lustre/lustre/obdclass/lustre_handles.c | 241 -- .../staging/lustre/lustre/obdclass/lustre_peer.c | 214 - .../staging/lustre/lustre/obdclass/obd_config.c | 1538 ------- drivers/staging/lustre/lustre/obdclass/obd_mount.c | 1245 ------ drivers/staging/lustre/lustre/obdclass/obdo.c | 181 - .../staging/lustre/lustre/obdclass/statfs_pack.c | 58 - drivers/staging/lustre/lustre/obdclass/uuid.c | 45 - drivers/staging/lustre/lustre/obdecho/Makefile | 5 - .../staging/lustre/lustre/obdecho/echo_client.c | 1729 -------- .../staging/lustre/lustre/obdecho/echo_internal.h | 42 - drivers/staging/lustre/lustre/osc/Makefile | 6 - drivers/staging/lustre/lustre/osc/lproc_osc.c | 838 ---- drivers/staging/lustre/lustre/osc/osc_cache.c | 3306 --------------- .../staging/lustre/lustre/osc/osc_cl_internal.h | 681 ---- drivers/staging/lustre/lustre/osc/osc_dev.c | 246 -- drivers/staging/lustre/lustre/osc/osc_internal.h | 237 -- drivers/staging/lustre/lustre/osc/osc_io.c | 918 ----- drivers/staging/lustre/lustre/osc/osc_lock.c | 1230 ------ drivers/staging/lustre/lustre/osc/osc_object.c | 473 --- drivers/staging/lustre/lustre/osc/osc_page.c | 1094 ----- drivers/staging/lustre/lustre/osc/osc_quota.c | 236 -- drivers/staging/lustre/lustre/osc/osc_request.c | 2907 -------------- drivers/staging/lustre/lustre/ptlrpc/Makefile | 23 - drivers/staging/lustre/lustre/ptlrpc/client.c | 3271 --------------- drivers/staging/lustre/lustre/ptlrpc/connection.c | 192 - drivers/staging/lustre/lustre/ptlrpc/errno.c | 383 -- drivers/staging/lustre/lustre/ptlrpc/events.c | 585 --- drivers/staging/lustre/lustre/ptlrpc/import.c | 1677 -------- drivers/staging/lustre/lustre/ptlrpc/layout.c | 2232 ----------- drivers/staging/lustre/lustre/ptlrpc/llog_client.c | 338 -- drivers/staging/lustre/lustre/ptlrpc/llog_net.c | 67 - .../staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c | 1316 ------ drivers/staging/lustre/lustre/ptlrpc/niobuf.c | 771 ---- drivers/staging/lustre/lustre/ptlrpc/nrs.c | 1613 -------- drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c | 270 -- .../staging/lustre/lustre/ptlrpc/pack_generic.c | 2311 ----------- drivers/staging/lustre/lustre/ptlrpc/pers.c | 72 - drivers/staging/lustre/lustre/ptlrpc/pinger.c | 474 --- .../staging/lustre/lustre/ptlrpc/ptlrpc_internal.h | 371 -- .../staging/lustre/lustre/ptlrpc/ptlrpc_module.c | 186 - drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c | 914 ----- drivers/staging/lustre/lustre/ptlrpc/recover.c | 374 -- drivers/staging/lustre/lustre/ptlrpc/sec.c | 2379 ----------- drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c | 572 --- drivers/staging/lustre/lustre/ptlrpc/sec_config.c | 850 ---- drivers/staging/lustre/lustre/ptlrpc/sec_gc.c | 190 - drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c | 170 - drivers/staging/lustre/lustre/ptlrpc/sec_null.c | 459 --- drivers/staging/lustre/lustre/ptlrpc/sec_plain.c | 1023 ----- drivers/staging/lustre/lustre/ptlrpc/service.c | 2807 ------------- drivers/staging/lustre/lustre/ptlrpc/wiretest.c | 4210 -------------------- drivers/staging/lustre/sysfs-fs-lustre | 654 --- scripts/selinux/mdp/mdp.c | 1 - 308 files changed, 195272 deletions(-) delete mode 100644 drivers/staging/lustre/Kconfig delete mode 100644 drivers/staging/lustre/Makefile delete mode 100644 drivers/staging/lustre/README.txt delete mode 100644 drivers/staging/lustre/TODO delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_private.h delete mode 100644 drivers/staging/lustre/include/linux/libcfs/libcfs_string.h delete mode 100644 drivers/staging/lustre/include/linux/lnet/api.h delete mode 100644 drivers/staging/lustre/include/linux/lnet/lib-lnet.h delete mode 100644 drivers/staging/lustre/include/linux/lnet/lib-types.h delete mode 100644 drivers/staging/lustre/include/linux/lnet/socklnd.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h delete mode 100644 drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h delete mode 100644 drivers/staging/lustre/lnet/Kconfig delete mode 100644 drivers/staging/lustre/lnet/Makefile delete mode 100644 drivers/staging/lustre/lnet/klnds/Makefile delete mode 100644 drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile delete mode 100644 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c delete mode 100644 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h delete mode 100644 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c delete mode 100644 drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/Makefile delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c delete mode 100644 drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/Makefile delete mode 100644 drivers/staging/lustre/lnet/libcfs/debug.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/fail.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/hash.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/libcfs_lock.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/libcfs_mem.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/libcfs_string.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux-crypto.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux-crypto.h delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux-debug.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/linux-tracefile.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/module.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/tracefile.c delete mode 100644 drivers/staging/lustre/lnet/libcfs/tracefile.h delete mode 100644 drivers/staging/lustre/lnet/lnet/Makefile delete mode 100644 drivers/staging/lustre/lnet/lnet/acceptor.c delete mode 100644 drivers/staging/lustre/lnet/lnet/api-ni.c delete mode 100644 drivers/staging/lustre/lnet/lnet/config.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-eq.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-md.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-me.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-move.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-msg.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-ptl.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lib-socket.c delete mode 100644 drivers/staging/lustre/lnet/lnet/lo.c delete mode 100644 drivers/staging/lustre/lnet/lnet/module.c delete mode 100644 drivers/staging/lustre/lnet/lnet/net_fault.c delete mode 100644 drivers/staging/lustre/lnet/lnet/nidstrings.c delete mode 100644 drivers/staging/lustre/lnet/lnet/peer.c delete mode 100644 drivers/staging/lustre/lnet/lnet/router.c delete mode 100644 drivers/staging/lustre/lnet/lnet/router_proc.c delete mode 100644 drivers/staging/lustre/lnet/selftest/Makefile delete mode 100644 drivers/staging/lustre/lnet/selftest/brw_test.c delete mode 100644 drivers/staging/lustre/lnet/selftest/conctl.c delete mode 100644 drivers/staging/lustre/lnet/selftest/conrpc.c delete mode 100644 drivers/staging/lustre/lnet/selftest/conrpc.h delete mode 100644 drivers/staging/lustre/lnet/selftest/console.c delete mode 100644 drivers/staging/lustre/lnet/selftest/console.h delete mode 100644 drivers/staging/lustre/lnet/selftest/framework.c delete mode 100644 drivers/staging/lustre/lnet/selftest/module.c delete mode 100644 drivers/staging/lustre/lnet/selftest/ping_test.c delete mode 100644 drivers/staging/lustre/lnet/selftest/rpc.c delete mode 100644 drivers/staging/lustre/lnet/selftest/rpc.h delete mode 100644 drivers/staging/lustre/lnet/selftest/selftest.h delete mode 100644 drivers/staging/lustre/lnet/selftest/timer.c delete mode 100644 drivers/staging/lustre/lnet/selftest/timer.h delete mode 100644 drivers/staging/lustre/lustre/Kconfig delete mode 100644 drivers/staging/lustre/lustre/Makefile delete mode 100644 drivers/staging/lustre/lustre/fid/Makefile delete mode 100644 drivers/staging/lustre/lustre/fid/fid_internal.h delete mode 100644 drivers/staging/lustre/lustre/fid/fid_lib.c delete mode 100644 drivers/staging/lustre/lustre/fid/fid_request.c delete mode 100644 drivers/staging/lustre/lustre/fid/lproc_fid.c delete mode 100644 drivers/staging/lustre/lustre/fld/Makefile delete mode 100644 drivers/staging/lustre/lustre/fld/fld_cache.c delete mode 100644 drivers/staging/lustre/lustre/fld/fld_internal.h delete mode 100644 drivers/staging/lustre/lustre/fld/fld_request.c delete mode 100644 drivers/staging/lustre/lustre/fld/lproc_fld.c delete mode 100644 drivers/staging/lustre/lustre/include/cl_object.h delete mode 100644 drivers/staging/lustre/lustre/include/interval_tree.h delete mode 100644 drivers/staging/lustre/lustre/include/llog_swab.h delete mode 100644 drivers/staging/lustre/lustre/include/lprocfs_status.h delete mode 100644 drivers/staging/lustre/lustre/include/lu_object.h delete mode 100644 drivers/staging/lustre/lustre/include/lu_ref.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_acl.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_compat.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_debug.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_disk.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_dlm.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_dlm_flags.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_errno.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_export.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_fid.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_fld.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_ha.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_handles.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_import.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_intent.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_kernelcomm.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_lib.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_linkea.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_lmv.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_log.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_mdc.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_mds.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_net.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_nrs.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_obdo.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_patchless_compat.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_req_layout.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_sec.h delete mode 100644 drivers/staging/lustre/lustre/include/lustre_swab.h delete mode 100644 drivers/staging/lustre/lustre/include/obd.h delete mode 100644 drivers/staging/lustre/lustre/include/obd_cksum.h delete mode 100644 drivers/staging/lustre/lustre/include/obd_class.h delete mode 100644 drivers/staging/lustre/lustre/include/obd_support.h delete mode 100644 drivers/staging/lustre/lustre/include/seq_range.h delete mode 100644 drivers/staging/lustre/lustre/ldlm/interval_tree.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/l_lock.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_extent.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_flock.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_internal.h delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_lib.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_lock.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_plain.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_pool.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_request.c delete mode 100644 drivers/staging/lustre/lustre/ldlm/ldlm_resource.c delete mode 100644 drivers/staging/lustre/lustre/llite/Makefile delete mode 100644 drivers/staging/lustre/lustre/llite/acl.c delete mode 100644 drivers/staging/lustre/lustre/llite/dcache.c delete mode 100644 drivers/staging/lustre/lustre/llite/dir.c delete mode 100644 drivers/staging/lustre/lustre/llite/file.c delete mode 100644 drivers/staging/lustre/lustre/llite/glimpse.c delete mode 100644 drivers/staging/lustre/lustre/llite/lcommon_cl.c delete mode 100644 drivers/staging/lustre/lustre/llite/lcommon_misc.c delete mode 100644 drivers/staging/lustre/lustre/llite/llite_internal.h delete mode 100644 drivers/staging/lustre/lustre/llite/llite_lib.c delete mode 100644 drivers/staging/lustre/lustre/llite/llite_mmap.c delete mode 100644 drivers/staging/lustre/lustre/llite/llite_nfs.c delete mode 100644 drivers/staging/lustre/lustre/llite/lproc_llite.c delete mode 100644 drivers/staging/lustre/lustre/llite/namei.c delete mode 100644 drivers/staging/lustre/lustre/llite/range_lock.c delete mode 100644 drivers/staging/lustre/lustre/llite/range_lock.h delete mode 100644 drivers/staging/lustre/lustre/llite/rw.c delete mode 100644 drivers/staging/lustre/lustre/llite/rw26.c delete mode 100644 drivers/staging/lustre/lustre/llite/statahead.c delete mode 100644 drivers/staging/lustre/lustre/llite/super25.c delete mode 100644 drivers/staging/lustre/lustre/llite/symlink.c delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_dev.c delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_internal.h delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_io.c delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_lock.c delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_object.c delete mode 100644 drivers/staging/lustre/lustre/llite/vvp_page.c delete mode 100644 drivers/staging/lustre/lustre/llite/xattr.c delete mode 100644 drivers/staging/lustre/lustre/llite/xattr_cache.c delete mode 100644 drivers/staging/lustre/lustre/llite/xattr_security.c delete mode 100644 drivers/staging/lustre/lustre/lmv/Makefile delete mode 100644 drivers/staging/lustre/lustre/lmv/lmv_fld.c delete mode 100644 drivers/staging/lustre/lustre/lmv/lmv_intent.c delete mode 100644 drivers/staging/lustre/lustre/lmv/lmv_internal.h delete mode 100644 drivers/staging/lustre/lustre/lmv/lmv_obd.c delete mode 100644 drivers/staging/lustre/lustre/lmv/lproc_lmv.c delete mode 100644 drivers/staging/lustre/lustre/lov/Makefile delete mode 100644 drivers/staging/lustre/lustre/lov/lov_cl_internal.h delete mode 100644 drivers/staging/lustre/lustre/lov/lov_dev.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_ea.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_internal.h delete mode 100644 drivers/staging/lustre/lustre/lov/lov_io.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_lock.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_merge.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_obd.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_object.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_offset.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_pack.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_page.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_pool.c delete mode 100644 drivers/staging/lustre/lustre/lov/lov_request.c delete mode 100644 drivers/staging/lustre/lustre/lov/lovsub_dev.c delete mode 100644 drivers/staging/lustre/lustre/lov/lovsub_lock.c delete mode 100644 drivers/staging/lustre/lustre/lov/lovsub_object.c delete mode 100644 drivers/staging/lustre/lustre/lov/lovsub_page.c delete mode 100644 drivers/staging/lustre/lustre/lov/lproc_lov.c delete mode 100644 drivers/staging/lustre/lustre/mdc/Makefile delete mode 100644 drivers/staging/lustre/lustre/mdc/lproc_mdc.c delete mode 100644 drivers/staging/lustre/lustre/mdc/mdc_internal.h delete mode 100644 drivers/staging/lustre/lustre/mdc/mdc_lib.c delete mode 100644 drivers/staging/lustre/lustre/mdc/mdc_locks.c delete mode 100644 drivers/staging/lustre/lustre/mdc/mdc_reint.c delete mode 100644 drivers/staging/lustre/lustre/mdc/mdc_request.c delete mode 100644 drivers/staging/lustre/lustre/mgc/Makefile delete mode 100644 drivers/staging/lustre/lustre/mgc/lproc_mgc.c delete mode 100644 drivers/staging/lustre/lustre/mgc/mgc_internal.h delete mode 100644 drivers/staging/lustre/lustre/mgc/mgc_request.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/Makefile delete mode 100644 drivers/staging/lustre/lustre/obdclass/cl_internal.h delete mode 100644 drivers/staging/lustre/lustre/obdclass/cl_io.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/cl_lock.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/cl_object.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/cl_page.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/class_obd.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/debug.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/genops.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/kernelcomm.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/linkea.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/linux/linux-module.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/llog.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/llog_cat.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/llog_internal.h delete mode 100644 drivers/staging/lustre/lustre/obdclass/llog_obd.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/llog_swab.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lprocfs_status.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lu_object.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lu_ref.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lustre_handles.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/lustre_peer.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/obd_config.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/obd_mount.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/obdo.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/statfs_pack.c delete mode 100644 drivers/staging/lustre/lustre/obdclass/uuid.c delete mode 100644 drivers/staging/lustre/lustre/obdecho/Makefile delete mode 100644 drivers/staging/lustre/lustre/obdecho/echo_client.c delete mode 100644 drivers/staging/lustre/lustre/obdecho/echo_internal.h delete mode 100644 drivers/staging/lustre/lustre/osc/Makefile delete mode 100644 drivers/staging/lustre/lustre/osc/lproc_osc.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_cache.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_cl_internal.h delete mode 100644 drivers/staging/lustre/lustre/osc/osc_dev.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_internal.h delete mode 100644 drivers/staging/lustre/lustre/osc/osc_io.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_lock.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_object.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_page.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_quota.c delete mode 100644 drivers/staging/lustre/lustre/osc/osc_request.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/Makefile delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/client.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/connection.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/errno.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/events.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/import.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/layout.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/llog_client.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/llog_net.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/niobuf.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/nrs.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/pack_generic.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/pers.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/pinger.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/recover.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_config.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_gc.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_null.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/sec_plain.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/service.c delete mode 100644 drivers/staging/lustre/lustre/ptlrpc/wiretest.c delete mode 100644 drivers/staging/lustre/sysfs-fs-lustre (limited to 'MAINTAINERS') diff --git a/MAINTAINERS b/MAINTAINERS index 4b65225d443a..db158767de20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13329,15 +13329,6 @@ S: Odd Fixes F: Documentation/devicetree/bindings/staging/iio/ F: drivers/staging/iio/ -STAGING - LUSTRE PARALLEL FILESYSTEM -M: Oleg Drokin -M: Andreas Dilger -M: James Simmons -L: lustre-devel@lists.lustre.org (moderated for non-subscribers) -W: http://wiki.lustre.org/ -S: Maintained -F: drivers/staging/lustre - STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec) M: Marc Dietrich L: ac100@lists.launchpad.net (moderated for non-subscribers) diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index d5926f0d3f6c..1c357ef669ae 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -84,8 +84,6 @@ source "drivers/staging/netlogic/Kconfig" source "drivers/staging/mt29f_spinand/Kconfig" -source "drivers/staging/lustre/Kconfig" - source "drivers/staging/dgnc/Kconfig" source "drivers/staging/gs_fpgaboot/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 919753c3d3f6..2edb9860931e 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -32,7 +32,6 @@ obj-$(CONFIG_STAGING_BOARD) += board/ obj-$(CONFIG_LTE_GDM724X) += gdm724x/ obj-$(CONFIG_FIREWIRE_SERIAL) += fwserial/ obj-$(CONFIG_GOLDFISH) += goldfish/ -obj-$(CONFIG_LNET) += lustre/ obj-$(CONFIG_DGNC) += dgnc/ obj-$(CONFIG_MTD_SPINAND_MT29F) += mt29f_spinand/ obj-$(CONFIG_GS_FPGABOOT) += gs_fpgaboot/ diff --git a/drivers/staging/lustre/Kconfig b/drivers/staging/lustre/Kconfig deleted file mode 100644 index b7d81096eee9..000000000000 --- a/drivers/staging/lustre/Kconfig +++ /dev/null @@ -1,3 +0,0 @@ -source "drivers/staging/lustre/lnet/Kconfig" - -source "drivers/staging/lustre/lustre/Kconfig" diff --git a/drivers/staging/lustre/Makefile b/drivers/staging/lustre/Makefile deleted file mode 100644 index 95ffe337a80a..000000000000 --- a/drivers/staging/lustre/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_LNET) += lnet/ -obj-$(CONFIG_LUSTRE_FS) += lustre/ diff --git a/drivers/staging/lustre/README.txt b/drivers/staging/lustre/README.txt deleted file mode 100644 index 783959240490..000000000000 --- a/drivers/staging/lustre/README.txt +++ /dev/null @@ -1,83 +0,0 @@ -Lustre Parallel Filesystem Client -================================= - -The Lustre file system is an open-source, parallel file system -that supports many requirements of leadership class HPC simulation -environments. -Born from a research project at Carnegie Mellon University, -the Lustre file system is a widely-used option in HPC. -The Lustre file system provides a POSIX compliant file system interface, -can scale to thousands of clients, petabytes of storage and -hundreds of gigabytes per second of I/O bandwidth. - -Unlike shared disk storage cluster filesystems (e.g. OCFS2, GFS, GPFS), -Lustre has independent Metadata and Data servers that clients can access -in parallel to maximize performance. - -In order to use Lustre client you will need to download the "lustre-client" -package that contains the userspace tools from http://lustre.org/download/ - -You will need to install and configure your Lustre servers separately. - -Mount Syntax -============ -After you installed the lustre-client tools including mount.lustre binary -you can mount your Lustre filesystem with: - -mount -t lustre mgs:/fsname mnt - -where mgs is the host name or ip address of your Lustre MGS(management service) -fsname is the name of the filesystem you would like to mount. - - -Mount Options -============= - - noflock - Disable posix file locking (Applications trying to use - the functionality will get ENOSYS) - - localflock - Enable local flock support, using only client-local flock - (faster, for applications that require flock but do not run - on multiple nodes). - - flock - Enable cluster-global posix file locking coherent across all - client nodes. - - user_xattr, nouser_xattr - Support "user." extended attributes (or not) - - user_fid2path, nouser_fid2path - Enable FID to path translation by regular users (or not) - - checksum, nochecksum - Verify data consistency on the wire and in memory as it passes - between the layers (or not). - - lruresize, nolruresize - Allow lock LRU to be controlled by memory pressure on the server - (or only 100 (default, controlled by lru_size proc parameter) locks - per CPU per server on this client). - - lazystatfs, nolazystatfs - Do not block in statfs() if some of the servers are down. - - 32bitapi - Shrink inode numbers to fit into 32 bits. This is necessary - if you plan to reexport Lustre filesystem from this client via - NFSv4. - - verbose, noverbose - Enable mount/umount console messages (or not) - -More Information -================ -You can get more information at the Lustre website: http://wiki.lustre.org/ - -Source for the userspace tools and out-of-tree client and server code -is available at: http://git.hpdd.intel.com/fs/lustre-release.git - -Latest binary packages: -http://lustre.org/download/ diff --git a/drivers/staging/lustre/TODO b/drivers/staging/lustre/TODO deleted file mode 100644 index 5332cdb19bfa..000000000000 --- a/drivers/staging/lustre/TODO +++ /dev/null @@ -1,302 +0,0 @@ -Currently all the work directed toward the lustre upstream client is tracked -at the following link: - -https://jira.hpdd.intel.com/browse/LU-9679 - -Under this ticket you will see the following work items that need to be -addressed: - -****************************************************************************** -* libcfs cleanup -* -* https://jira.hpdd.intel.com/browse/LU-9859 -* -* Track all the cleanups and simplification of the libcfs module. Remove -* functions the kernel provides. Possibly integrate some of the functionality -* into the kernel proper. -* -****************************************************************************** - -https://jira.hpdd.intel.com/browse/LU-100086 - -LNET_MINOR conflicts with USERIO_MINOR - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8130 - -Fix and simplify libcfs hash handling - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8703 - -The current way we handle SMP is wrong. Platforms like ARM and KNL can have -core and NUMA setups with things like NUMA nodes with no cores. We need to -handle such cases. This work also greatly simplified the lustre SMP code. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9019 - -Replace libcfs time API with standard kernel APIs. Also migrate away from -jiffies. We found jiffies can vary on nodes which can lead to corner cases -that can break the file system due to nodes having inconsistent behavior. -So move to time64_t and ktime_t as much as possible. - -****************************************************************************** -* Proper IB support for ko2iblnd -****************************************************************************** -https://jira.hpdd.intel.com/browse/LU-9179 - -Poor performance for the ko2iblnd driver. This is related to many of the -patches below that are missing from the linux client. ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9886 - -Crash in upstream kiblnd_handle_early_rxs() ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10394 / LU-10526 / LU-10089 - -Default to default to using MEM_REG ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10459 - -throttle tx based on queue depth ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9943 - -correct WR fast reg accounting ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10291 - -remove concurrent_sends tunable ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10213 - -calculate qp max_send_wrs properly ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9810 - -use less CQ entries for each connection ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10129 / LU-9180 - -rework map_on_demand behavior ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10129 - -query device capabilities ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10015 - -fix race at kiblnd_connect_peer ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9983 - -allow for discontiguous fragments ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9500 - -Don't Page Align remote_addr with FastReg ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9448 - -handle empty CPTs ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9507 - -Don't Assert On Reconnect with MultiQP ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9472 - -Fix FastReg map/unmap for MLX5 ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9425 - -Turn on 2 sges by default ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8943 - -Enable Multiple OPA Endpoints between Nodes ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-5718 - -multiple sges for work request ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9094 - -kill timedout txs from ibp_tx_queue ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9094 - -reconnect peer for REJ_INVALID_SERVICE_ID ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8752 - -Stop MLX5 triggering a dump_cqe ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8874 - -Move ko2iblnd to latest RDMA changes ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8875 / LU-8874 - -Change to new RDMA done callback mechanism - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9164 / LU-8874 - -Incorporate RDMA map/unamp API's into ko2iblnd - -****************************************************************************** -* sysfs/debugfs fixes -* -* https://jira.hpdd.intel.com/browse/LU-8066 -* -* The original migration to sysfs was done in haste without properly working -* utilities to test the changes. This covers the work to restore the proper -* behavior. Huge project to make this right. -* -****************************************************************************** - -https://jira.hpdd.intel.com/browse/LU-9431 - -The function class_process_proc_param was used for our mass updates of proc -tunables. It didn't work with sysfs and it was just ugly so it was removed. -In the process the ability to mass update thousands of clients was lost. This -work restores this in a sane way. - ------------------------------------------------------------------------------- -https://jira.hpdd.intel.com/browse/LU-9091 - -One the major request of users is the ability to pass in parameters into a -sysfs file in various different units. For example we can set max_pages_per_rpc -but this can vary on platforms due to different platform sizes. So you can -set this like max_pages_per_rpc=16MiB. The original code to handle this written -before the string helpers were created so the code doesn't follow that format -but it would be easy to move to. Currently the string helpers does the reverse -of what we need, changing bytes to string. We need to change a string to bytes. - -****************************************************************************** -* Proper user land to kernel space interface for Lustre -* -* https://jira.hpdd.intel.com/browse/LU-9680 -* -****************************************************************************** - -https://jira.hpdd.intel.com/browse/LU-8915 - -Don't use linux list structure as user land arguments for lnet selftest. -This code is pretty poor quality and really needs to be reworked. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8834 - -The lustre ioctl LL_IOC_FUTIMES_3 is very generic. Need to either work with -other file systems with similar functionality and make a common syscall -interface or rework our server code to automagically do it for us. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-6202 - -Cleanup up ioctl handling. We have many obsolete ioctls. Also the way we do -ioctls can be changed over to netlink. This also has the benefit of working -better with HPC systems that do IO forwarding. Such systems don't like ioctls -very well. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9667 - -More cleanups by making our utilities use sysfs instead of ioctls for LNet. -Also it has been requested to move the remaining ioctls to the netlink API. - -****************************************************************************** -* Misc -****************************************************************************** - ------------------------------------------------------------------------------- -https://jira.hpdd.intel.com/browse/LU-9855 - -Clean up obdclass preprocessor code. One of the major eye sores is the various -pointer redirections and macros used by the obdclass. This makes the code very -difficult to understand. It was requested by the Al Viro to clean this up before -we leave staging. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9633 - -Migrate to sphinx kernel-doc style comments. Add documents in Documentation. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-6142 - -Possible remaining coding style fix. Remove deadcode. Enforce kernel code -style. Other minor misc cleanups... - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8837 - -Separate client/server functionality. Functions only used by server can be -removed from client. Most of this has been done but we need a inspect of the -code to make sure. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-8964 - -Lustre client readahead/writeback control needs to better suit kernel providings. -Currently its being explored. We could end up replacing the CLIO read ahead -abstract with the kernel proper version. - ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9862 - -Patch that landed for LU-7890 leads to static checker errors ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-9868 - -dcache/namei fixes for lustre ------------------------------------------------------------------------------- - -https://jira.hpdd.intel.com/browse/LU-10467 - -use standard linux wait_events macros work by Neil Brown - ------------------------------------------------------------------------------- - -Please send any patches to Greg Kroah-Hartman , Andreas Dilger -, James Simmons and -Oleg Drokin . diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h deleted file mode 100644 index edc7ed0dcb94..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LIBCFS_LIBCFS_H__ -#define __LIBCFS_LIBCFS_H__ - -#include -#include -#include - -#include -#include -#include - -#define LIBCFS_VERSION "0.7.0" - -extern struct blocking_notifier_head libcfs_ioctl_list; -static inline int notifier_from_ioctl_errno(int err) -{ - if (err == -EINVAL) - return NOTIFY_OK; - return notifier_from_errno(err) | NOTIFY_STOP_MASK; -} - -int libcfs_setup(void); - -extern struct workqueue_struct *cfs_rehash_wq; - -void lustre_insert_debugfs(struct ctl_table *table); -int lprocfs_call_handler(void *data, int write, loff_t *ppos, - void __user *buffer, size_t *lenp, - int (*handler)(void *data, int write, loff_t pos, - void __user *buffer, int len)); - -/* - * Memory - */ -#if BITS_PER_LONG == 32 -/* limit to lowmem on 32-bit systems */ -#define NUM_CACHEPAGES \ - min(totalram_pages, 1UL << (30 - PAGE_SHIFT) * 3 / 4) -#else -#define NUM_CACHEPAGES totalram_pages -#endif - -#endif /* __LIBCFS_LIBCFS_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h deleted file mode 100644 index 61641c41c492..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_cpu.h +++ /dev/null @@ -1,434 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_cpu.h - * - * CPU partition - * . CPU partition is virtual processing unit - * - * . CPU partition can present 1-N cores, or 1-N NUMA nodes, - * in other words, CPU partition is a processors pool. - * - * CPU Partition Table (CPT) - * . a set of CPU partitions - * - * . There are two modes for CPT: CFS_CPU_MODE_NUMA and CFS_CPU_MODE_SMP - * - * . User can specify total number of CPU partitions while creating a - * CPT, ID of CPU partition is always start from 0. - * - * Example: if there are 8 cores on the system, while creating a CPT - * with cpu_npartitions=4: - * core[0, 1] = partition[0], core[2, 3] = partition[1] - * core[4, 5] = partition[2], core[6, 7] = partition[3] - * - * cpu_npartitions=1: - * core[0, 1, ... 7] = partition[0] - * - * . User can also specify CPU partitions by string pattern - * - * Examples: cpu_partitions="0[0,1], 1[2,3]" - * cpu_partitions="N 0[0-3], 1[4-8]" - * - * The first character "N" means following numbers are numa ID - * - * . NUMA allocators, CPU affinity threads are built over CPU partitions, - * instead of HW CPUs or HW nodes. - * - * . By default, Lustre modules should refer to the global cfs_cpt_tab, - * instead of accessing HW CPUs directly, so concurrency of Lustre can be - * configured by cpu_npartitions of the global cfs_cpt_tab - * - * . If cpu_npartitions=1(all CPUs in one pool), lustre should work the - * same way as 2.2 or earlier versions - * - * Author: liang@whamcloud.com - */ - -#ifndef __LIBCFS_CPU_H__ -#define __LIBCFS_CPU_H__ - -#include -#include -#include - -/* any CPU partition */ -#define CFS_CPT_ANY (-1) - -#ifdef CONFIG_SMP -/** virtual processing unit */ -struct cfs_cpu_partition { - /* CPUs mask for this partition */ - cpumask_var_t cpt_cpumask; - /* nodes mask for this partition */ - nodemask_t *cpt_nodemask; - /* spread rotor for NUMA allocator */ - unsigned int cpt_spread_rotor; -}; - - -/** descriptor for CPU partitions */ -struct cfs_cpt_table { - /* version, reserved for hotplug */ - unsigned int ctb_version; - /* spread rotor for NUMA allocator */ - unsigned int ctb_spread_rotor; - /* # of CPU partitions */ - unsigned int ctb_nparts; - /* partitions tables */ - struct cfs_cpu_partition *ctb_parts; - /* shadow HW CPU to CPU partition ID */ - int *ctb_cpu2cpt; - /* all cpus in this partition table */ - cpumask_var_t ctb_cpumask; - /* all nodes in this partition table */ - nodemask_t *ctb_nodemask; -}; - -extern struct cfs_cpt_table *cfs_cpt_tab; - -/** - * return cpumask of CPU partition \a cpt - */ -cpumask_var_t *cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt); -/** - * print string information of cpt-table - */ -int cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len); -/** - * return total number of CPU partitions in \a cptab - */ -int -cfs_cpt_number(struct cfs_cpt_table *cptab); -/** - * return number of HW cores or hyper-threadings in a CPU partition \a cpt - */ -int cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt); -/** - * is there any online CPU in CPU partition \a cpt - */ -int cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt); -/** - * return nodemask of CPU partition \a cpt - */ -nodemask_t *cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt); -/** - * shadow current HW processor ID to CPU-partition ID of \a cptab - */ -int cfs_cpt_current(struct cfs_cpt_table *cptab, int remap); -/** - * shadow HW processor ID \a CPU to CPU-partition ID by \a cptab - */ -int cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu); -/** - * bind current thread on a CPU-partition \a cpt of \a cptab - */ -int cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt); -/** - * add \a cpu to CPU partition @cpt of \a cptab, return 1 for success, - * otherwise 0 is returned - */ -int cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); -/** - * remove \a cpu from CPU partition \a cpt of \a cptab - */ -void cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu); -/** - * add all cpus in \a mask to CPU partition \a cpt - * return 1 if successfully set all CPUs, otherwise return 0 - */ -int cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, - int cpt, cpumask_t *mask); -/** - * remove all cpus in \a mask from CPU partition \a cpt - */ -void cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, - int cpt, cpumask_t *mask); -/** - * add all cpus in NUMA node \a node to CPU partition \a cpt - * return 1 if successfully set all CPUs, otherwise return 0 - */ -int cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node); -/** - * remove all cpus in NUMA node \a node from CPU partition \a cpt - */ -void cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node); - -/** - * add all cpus in node mask \a mask to CPU partition \a cpt - * return 1 if successfully set all CPUs, otherwise return 0 - */ -int cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, - int cpt, nodemask_t *mask); -/** - * remove all cpus in node mask \a mask from CPU partition \a cpt - */ -void cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, - int cpt, nodemask_t *mask); -/** - * unset all cpus for CPU partition \a cpt - */ -void cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt); -/** - * convert partition id \a cpt to numa node id, if there are more than one - * nodes in this partition, it might return a different node id each time. - */ -int cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt); - -/** - * return number of HTs in the same core of \a cpu - */ -int cfs_cpu_ht_nsiblings(int cpu); - -int cfs_cpu_init(void); -void cfs_cpu_fini(void); - -#else /* !CONFIG_SMP */ -struct cfs_cpt_table; -#define cfs_cpt_tab ((struct cfs_cpt_table *)NULL) - -static inline cpumask_var_t * -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) -{ - return NULL; -} - -static inline int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - return 0; -} -static inline int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return 1; -} - -static inline int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} - -static inline int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - return 1; -} - -static inline nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - return NULL; -} - -static inline int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - return 1; -} - -static inline void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ -} - -static inline int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - return 1; -} - -static inline void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ -} - -static inline int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - return 1; -} - -static inline void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ -} - -static inline int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - return 1; -} - -static inline void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ -} - -static inline void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ -} - -static inline int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} - -static inline int -cfs_cpu_ht_nsiblings(int cpu) -{ - return 1; -} - -static inline int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - return 0; -} - -static inline int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - return 0; -} - -static inline int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - return 0; -} - -static inline int -cfs_cpu_init(void) -{ - return 0; -} - -static inline void cfs_cpu_fini(void) -{ -} - -#endif /* CONFIG_SMP */ - -/** - * destroy a CPU partition table - */ -void cfs_cpt_table_free(struct cfs_cpt_table *cptab); -/** - * create a cfs_cpt_table with \a ncpt number of partitions - */ -struct cfs_cpt_table *cfs_cpt_table_alloc(unsigned int ncpt); - -/* - * allocate per-cpu-partition data, returned value is an array of pointers, - * variable can be indexed by CPU ID. - * cptab != NULL: size of array is number of CPU partitions - * cptab == NULL: size of array is number of HW cores - */ -void *cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size); -/* - * destroy per-cpu-partition variable - */ -void cfs_percpt_free(void *vars); -int cfs_percpt_number(void *vars); - -#define cfs_percpt_for_each(var, i, vars) \ - for (i = 0; i < cfs_percpt_number(vars) && \ - ((var) = (vars)[i]) != NULL; i++) - -/* - * percpu partition lock - * - * There are some use-cases like this in Lustre: - * . each CPU partition has it's own private data which is frequently changed, - * and mostly by the local CPU partition. - * . all CPU partitions share some global data, these data are rarely changed. - * - * LNet is typical example. - * CPU partition lock is designed for this kind of use-cases: - * . each CPU partition has it's own private lock - * . change on private data just needs to take the private lock - * . read on shared data just needs to take _any_ of private locks - * . change on shared data needs to take _all_ private locks, - * which is slow and should be really rare. - */ -enum { - CFS_PERCPT_LOCK_EX = -1, /* negative */ -}; - -struct cfs_percpt_lock { - /* cpu-partition-table for this lock */ - struct cfs_cpt_table *pcl_cptab; - /* exclusively locked */ - unsigned int pcl_locked; - /* private lock table */ - spinlock_t **pcl_locks; -}; - -/* return number of private locks */ -#define cfs_percpt_lock_num(pcl) cfs_cpt_number(pcl->pcl_cptab) - -/* - * create a cpu-partition lock based on CPU partition table \a cptab, - * each private lock has extra \a psize bytes padding data - */ -struct cfs_percpt_lock *cfs_percpt_lock_create(struct cfs_cpt_table *cptab, - struct lock_class_key *keys); -/* destroy a cpu-partition lock */ -void cfs_percpt_lock_free(struct cfs_percpt_lock *pcl); - -/* lock private lock \a index of \a pcl */ -void cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index); - -/* unlock private lock \a index of \a pcl */ -void cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index); - -#define CFS_PERCPT_LOCK_KEYS 256 - -/* NB: don't allocate keys dynamically, lockdep needs them to be in ".data" */ -#define cfs_percpt_lock_alloc(cptab) \ -({ \ - static struct lock_class_key ___keys[CFS_PERCPT_LOCK_KEYS]; \ - struct cfs_percpt_lock *___lk; \ - \ - if (cfs_cpt_number(cptab) > CFS_PERCPT_LOCK_KEYS) \ - ___lk = cfs_percpt_lock_create(cptab, NULL); \ - else \ - ___lk = cfs_percpt_lock_create(cptab, ___keys); \ - ___lk; \ -}) - -/** - * iterate over all CPU partitions in \a cptab - */ -#define cfs_cpt_for_each(i, cptab) \ - for (i = 0; i < cfs_cpt_number(cptab); i++) - -#endif /* __LIBCFS_CPU_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h deleted file mode 100644 index 176fae7319e3..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_crypto.h +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - */ - -#ifndef _LIBCFS_CRYPTO_H -#define _LIBCFS_CRYPTO_H - -#include -struct page; - -struct cfs_crypto_hash_type { - char *cht_name; /*< hash algorithm name, equal to - * format name for crypto api - */ - unsigned int cht_key; /*< init key by default (valid for - * 4 bytes context like crc32, adler - */ - unsigned int cht_size; /**< hash digest size */ -}; - -enum cfs_crypto_hash_alg { - CFS_HASH_ALG_NULL = 0, - CFS_HASH_ALG_ADLER32, - CFS_HASH_ALG_CRC32, - CFS_HASH_ALG_MD5, - CFS_HASH_ALG_SHA1, - CFS_HASH_ALG_SHA256, - CFS_HASH_ALG_SHA384, - CFS_HASH_ALG_SHA512, - CFS_HASH_ALG_CRC32C, - CFS_HASH_ALG_MAX, - CFS_HASH_ALG_UNKNOWN = 0xff -}; - -static struct cfs_crypto_hash_type hash_types[] = { - [CFS_HASH_ALG_NULL] = { - .cht_name = "null", - .cht_key = 0, - .cht_size = 0 - }, - [CFS_HASH_ALG_ADLER32] = { - .cht_name = "adler32", - .cht_key = 1, - .cht_size = 4 - }, - [CFS_HASH_ALG_CRC32] = { - .cht_name = "crc32", - .cht_key = ~0, - .cht_size = 4 - }, - [CFS_HASH_ALG_CRC32C] = { - .cht_name = "crc32c", - .cht_key = ~0, - .cht_size = 4 - }, - [CFS_HASH_ALG_MD5] = { - .cht_name = "md5", - .cht_key = 0, - .cht_size = 16 - }, - [CFS_HASH_ALG_SHA1] = { - .cht_name = "sha1", - .cht_key = 0, - .cht_size = 20 - }, - [CFS_HASH_ALG_SHA256] = { - .cht_name = "sha256", - .cht_key = 0, - .cht_size = 32 - }, - [CFS_HASH_ALG_SHA384] = { - .cht_name = "sha384", - .cht_key = 0, - .cht_size = 48 - }, - [CFS_HASH_ALG_SHA512] = { - .cht_name = "sha512", - .cht_key = 0, - .cht_size = 64 - }, - [CFS_HASH_ALG_MAX] = { - .cht_name = NULL, - .cht_key = 0, - .cht_size = 64 - }, -}; - -/* Maximum size of hash_types[].cht_size */ -#define CFS_CRYPTO_HASH_DIGESTSIZE_MAX 64 - -/** - * Return hash algorithm information for the specified algorithm identifier - * - * Hash information includes algorithm name, initial seed, hash size. - * - * \retval cfs_crypto_hash_type for valid ID (CFS_HASH_ALG_*) - * \retval NULL for unknown algorithm identifier - */ -static inline const struct cfs_crypto_hash_type * -cfs_crypto_hash_type(enum cfs_crypto_hash_alg hash_alg) -{ - struct cfs_crypto_hash_type *ht; - - if (hash_alg < CFS_HASH_ALG_MAX) { - ht = &hash_types[hash_alg]; - if (ht->cht_name) - return ht; - } - return NULL; -} - -/** - * Return hash name for hash algorithm identifier - * - * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*) - * - * \retval string name of known hash algorithm - * \retval "unknown" if hash algorithm is unknown - */ -static inline const char * -cfs_crypto_hash_name(enum cfs_crypto_hash_alg hash_alg) -{ - const struct cfs_crypto_hash_type *ht; - - ht = cfs_crypto_hash_type(hash_alg); - if (ht) - return ht->cht_name; - return "unknown"; -} - -/** - * Return digest size for hash algorithm type - * - * \param[in] hash_alg hash alrgorithm id (CFS_HASH_ALG_*) - * - * \retval hash algorithm digest size in bytes - * \retval 0 if hash algorithm type is unknown - */ -static inline int cfs_crypto_hash_digestsize(enum cfs_crypto_hash_alg hash_alg) -{ - const struct cfs_crypto_hash_type *ht; - - ht = cfs_crypto_hash_type(hash_alg); - if (ht) - return ht->cht_size; - return 0; -} - -/** - * Find hash algorithm ID for the specified algorithm name - * - * \retval hash algorithm ID for valid ID (CFS_HASH_ALG_*) - * \retval CFS_HASH_ALG_UNKNOWN for unknown algorithm name - */ -static inline unsigned char cfs_crypto_hash_alg(const char *algname) -{ - enum cfs_crypto_hash_alg hash_alg; - - for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) - if (!strcmp(hash_types[hash_alg].cht_name, algname)) - return hash_alg; - - return CFS_HASH_ALG_UNKNOWN; -} - -int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, - const void *buf, unsigned int buf_len, - unsigned char *key, unsigned int key_len, - unsigned char *hash, unsigned int *hash_len); - -struct ahash_request * -cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, - unsigned char *key, unsigned int key_len); -int cfs_crypto_hash_update_page(struct ahash_request *desc, - struct page *page, unsigned int offset, - unsigned int len); -int cfs_crypto_hash_update(struct ahash_request *desc, const void *buf, - unsigned int buf_len); -int cfs_crypto_hash_final(struct ahash_request *desc, - unsigned char *hash, unsigned int *hash_len); -int cfs_crypto_register(void); -void cfs_crypto_unregister(void); -int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg); -#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h deleted file mode 100644 index 17534a76362a..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h +++ /dev/null @@ -1,207 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_debug.h - * - * Debug messages and assertions - * - */ - -#ifndef __LIBCFS_DEBUG_H__ -#define __LIBCFS_DEBUG_H__ - -#include -#include - -/* - * Debugging - */ -extern unsigned int libcfs_subsystem_debug; -extern unsigned int libcfs_stack; -extern unsigned int libcfs_debug; -extern unsigned int libcfs_printk; -extern unsigned int libcfs_console_ratelimit; -extern unsigned int libcfs_console_max_delay; -extern unsigned int libcfs_console_min_delay; -extern unsigned int libcfs_console_backoff; -extern unsigned int libcfs_debug_binary; -extern char libcfs_debug_file_path_arr[PATH_MAX]; - -int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys); -int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys); - -/* Has there been an LBUG? */ -extern unsigned int libcfs_catastrophe; -extern unsigned int libcfs_panic_on_lbug; - -/* Enable debug-checks on stack size - except on x86_64 */ -#if !defined(__x86_64__) -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -# endif /* __ia64__ */ - -#define __CHECK_STACK(msgdata, mask, cdls) \ -do { \ - if (unlikely(CDEBUG_STACK() > libcfs_stack)) { \ - LIBCFS_DEBUG_MSG_DATA_INIT(msgdata, D_WARNING, NULL); \ - libcfs_stack = CDEBUG_STACK(); \ - libcfs_debug_msg(msgdata, \ - "maximum lustre stack %lu\n", \ - CDEBUG_STACK()); \ - (msgdata)->msg_mask = mask; \ - (msgdata)->msg_cdls = cdls; \ - dump_stack(); \ - /*panic("LBUG");*/ \ - } \ -} while (0) -#define CFS_CHECK_STACK(msgdata, mask, cdls) __CHECK_STACK(msgdata, mask, cdls) -#else /* __x86_64__ */ -#define CFS_CHECK_STACK(msgdata, mask, cdls) do {} while (0) -#define CDEBUG_STACK() (0L) -#endif /* __x86_64__ */ - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#define CDEBUG_DEFAULT_MAX_DELAY (600 * HZ) /* jiffies */ -#define CDEBUG_DEFAULT_MIN_DELAY ((HZ + 1) / 2) /* jiffies */ -#define CDEBUG_DEFAULT_BACKOFF 2 -struct cfs_debug_limit_state { - unsigned long cdls_next; - unsigned int cdls_delay; - int cdls_count; -}; - -struct libcfs_debug_msg_data { - const char *msg_file; - const char *msg_fn; - int msg_subsys; - int msg_line; - int msg_mask; - struct cfs_debug_limit_state *msg_cdls; -}; - -#define LIBCFS_DEBUG_MSG_DATA_INIT(data, mask, cdls) \ -do { \ - (data)->msg_subsys = DEBUG_SUBSYSTEM; \ - (data)->msg_file = __FILE__; \ - (data)->msg_fn = __func__; \ - (data)->msg_line = __LINE__; \ - (data)->msg_cdls = (cdls); \ - (data)->msg_mask = (mask); \ -} while (0) - -#define LIBCFS_DEBUG_MSG_DATA_DECL(dataname, mask, cdls) \ - static struct libcfs_debug_msg_data dataname = { \ - .msg_subsys = DEBUG_SUBSYSTEM, \ - .msg_file = __FILE__, \ - .msg_fn = __func__, \ - .msg_line = __LINE__, \ - .msg_cdls = (cdls) }; \ - dataname.msg_mask = (mask) - -/** - * Filters out logging messages based on mask and subsystem. - */ -static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem) -{ - return mask & D_CANTMASK || - ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem)); -} - -#define __CDEBUG(cdls, mask, format, ...) \ -do { \ - static struct libcfs_debug_msg_data msgdata; \ - \ - CFS_CHECK_STACK(&msgdata, mask, cdls); \ - \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_INIT(&msgdata, mask, cdls); \ - libcfs_debug_msg(&msgdata, format, ## __VA_ARGS__); \ - } \ -} while (0) - -#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__) - -#define CDEBUG_LIMIT(mask, format, ...) \ -do { \ - static struct cfs_debug_limit_state cdls; \ - \ - __CDEBUG(&cdls, mask, format, ## __VA_ARGS__); \ -} while (0) - -/* - * Lustre Error Checksum: calculates checksum - * of Hex number by XORing the nybbles. - */ -#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \ - ((hexnum) >> 8 & 0xf)) - -#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__) -#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__) -#define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a) -#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__) - -#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__) -#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__) -#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__) -#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ - "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__) -#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__) - -#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__) - -int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, - const char *format1, ...) - __printf(2, 3); - -int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, - const char *format1, - va_list args, const char *format2, ...) - __printf(4, 5); - -/* other external symbols that tracefile provides: */ -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob); -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_buffer, char *append); - -#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log" - -#endif /* __LIBCFS_DEBUG_H__ */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h deleted file mode 100644 index 8074e390b4d1..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h +++ /dev/null @@ -1,194 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Oracle Corporation, Inc. - */ - -#ifndef _LIBCFS_FAIL_H -#define _LIBCFS_FAIL_H - -#include -#include - -extern unsigned long cfs_fail_loc; -extern unsigned int cfs_fail_val; -extern int cfs_fail_err; - -extern wait_queue_head_t cfs_race_waitq; -extern int cfs_race_state; - -int __cfs_fail_check_set(u32 id, u32 value, int set); -int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set); - -enum { - CFS_FAIL_LOC_NOSET = 0, - CFS_FAIL_LOC_ORSET = 1, - CFS_FAIL_LOC_RESET = 2, - CFS_FAIL_LOC_VALUE = 3 -}; - -/* Failure injection control */ -#define CFS_FAIL_MASK_SYS 0x0000FF00 -#define CFS_FAIL_MASK_LOC (0x000000FF | CFS_FAIL_MASK_SYS) - -#define CFS_FAILED_BIT 30 -/* CFS_FAILED is 0x40000000 */ -#define CFS_FAILED BIT(CFS_FAILED_BIT) - -#define CFS_FAIL_ONCE_BIT 31 -/* CFS_FAIL_ONCE is 0x80000000 */ -#define CFS_FAIL_ONCE BIT(CFS_FAIL_ONCE_BIT) - -/* The following flags aren't made to be combined */ -#define CFS_FAIL_SKIP 0x20000000 /* skip N times then fail */ -#define CFS_FAIL_SOME 0x10000000 /* only fail N times */ -#define CFS_FAIL_RAND 0x08000000 /* fail 1/N of the times */ -#define CFS_FAIL_USR1 0x04000000 /* user flag */ - -#define CFS_FAULT 0x02000000 /* match any CFS_FAULT_CHECK */ - -static inline bool CFS_FAIL_PRECHECK(u32 id) -{ - return cfs_fail_loc && - ((cfs_fail_loc & CFS_FAIL_MASK_LOC) == (id & CFS_FAIL_MASK_LOC) || - (cfs_fail_loc & id & CFS_FAULT)); -} - -static inline int cfs_fail_check_set(u32 id, u32 value, - int set, int quiet) -{ - int ret = 0; - - if (unlikely(CFS_FAIL_PRECHECK(id))) { - ret = __cfs_fail_check_set(id, value, set); - if (ret) { - if (quiet) { - CDEBUG(D_INFO, "*** cfs_fail_loc=%x, val=%u***\n", - id, value); - } else { - LCONSOLE_INFO("*** cfs_fail_loc=%x, val=%u***\n", - id, value); - } - } - } - - return ret; -} - -/* If id hit cfs_fail_loc, return 1, otherwise return 0 */ -#define CFS_FAIL_CHECK(id) \ - cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 0) -#define CFS_FAIL_CHECK_QUIET(id) \ - cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET, 1) - -/* - * If id hit cfs_fail_loc and cfs_fail_val == (-1 or value) return 1, - * otherwise return 0 - */ -#define CFS_FAIL_CHECK_VALUE(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 0) -#define CFS_FAIL_CHECK_VALUE_QUIET(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_VALUE, 1) - -/* - * If id hit cfs_fail_loc, cfs_fail_loc |= value and return 1, - * otherwise return 0 - */ -#define CFS_FAIL_CHECK_ORSET(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 0) -#define CFS_FAIL_CHECK_ORSET_QUIET(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_ORSET, 1) - -/* - * If id hit cfs_fail_loc, cfs_fail_loc = value and return 1, - * otherwise return 0 - */ -#define CFS_FAIL_CHECK_RESET(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 0) -#define CFS_FAIL_CHECK_RESET_QUIET(id, value) \ - cfs_fail_check_set(id, value, CFS_FAIL_LOC_RESET, 1) - -static inline int cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) -{ - if (unlikely(CFS_FAIL_PRECHECK(id))) - return __cfs_fail_timeout_set(id, value, ms, set); - return 0; -} - -/* If id hit cfs_fail_loc, sleep for seconds or milliseconds */ -#define CFS_FAIL_TIMEOUT(id, secs) \ - cfs_fail_timeout_set(id, 0, (secs) * 1000, CFS_FAIL_LOC_NOSET) - -#define CFS_FAIL_TIMEOUT_MS(id, ms) \ - cfs_fail_timeout_set(id, 0, ms, CFS_FAIL_LOC_NOSET) - -/* - * If id hit cfs_fail_loc, cfs_fail_loc |= value and - * sleep seconds or milliseconds - */ -#define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \ - cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_ORSET) - -#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \ - cfs_fail_timeout_set(id, value, (secs) * 1000, CFS_FAIL_LOC_RESET) - -#define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \ - cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET) - -#define CFS_FAULT_CHECK(id) \ - CFS_FAIL_CHECK(CFS_FAULT | (id)) - -/* - * The idea here is to synchronise two threads to force a race. The - * first thread that calls this with a matching fail_loc is put to - * sleep. The next thread that calls with the same fail_loc wakes up - * the first and continues. - */ -static inline void cfs_race(u32 id) -{ - if (CFS_FAIL_PRECHECK(id)) { - if (unlikely(__cfs_fail_check_set(id, 0, CFS_FAIL_LOC_NOSET))) { - int rc; - - cfs_race_state = 0; - CERROR("cfs_race id %x sleeping\n", id); - rc = wait_event_interruptible(cfs_race_waitq, - !!cfs_race_state); - CERROR("cfs_fail_race id %x awake, rc=%d\n", id, rc); - } else { - CERROR("cfs_fail_race id %x waking\n", id); - cfs_race_state = 1; - wake_up(&cfs_race_waitq); - } - } -} - -#define CFS_RACE(id) cfs_race(id) - -#endif /* _LIBCFS_FAIL_H */ diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h deleted file mode 100644 index be315958a4b3..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_hash.h +++ /dev/null @@ -1,869 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_hash.h - * - * Hashing routines - * - */ - -#ifndef __LIBCFS_HASH_H__ -#define __LIBCFS_HASH_H__ - -#include -#include -#include -#include - -/* - * Knuth recommends primes in approximately golden ratio to the maximum - * integer representable by a machine word for multiplicative hashing. - * Chuck Lever verified the effectiveness of this technique: - * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf - * - * These primes are chosen to be bit-sparse, that is operations on - * them can use shifts and additions instead of multiplications for - * machines where multiplications are slow. - */ -/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ -#define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL -/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ -#define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL - -/** disable debug */ -#define CFS_HASH_DEBUG_NONE 0 -/* - * record hash depth and output to console when it's too deep, - * computing overhead is low but consume more memory - */ -#define CFS_HASH_DEBUG_1 1 -/** expensive, check key validation */ -#define CFS_HASH_DEBUG_2 2 - -#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE - -struct cfs_hash_ops; -struct cfs_hash_lock_ops; -struct cfs_hash_hlist_ops; - -union cfs_hash_lock { - rwlock_t rw; /**< rwlock */ - spinlock_t spin; /**< spinlock */ -}; - -/** - * cfs_hash_bucket is a container of: - * - lock, counter ... - * - array of hash-head starting from hsb_head[0], hash-head can be one of - * . struct cfs_hash_head - * . struct cfs_hash_head_dep - * . struct cfs_hash_dhead - * . struct cfs_hash_dhead_dep - * which depends on requirement of user - * - some extra bytes (caller can require it while creating hash) - */ -struct cfs_hash_bucket { - union cfs_hash_lock hsb_lock; /**< bucket lock */ - u32 hsb_count; /**< current entries */ - u32 hsb_version; /**< change version */ - unsigned int hsb_index; /**< index of bucket */ - int hsb_depmax; /**< max depth on bucket */ - long hsb_head[0]; /**< hash-head array */ -}; - -/** - * cfs_hash bucket descriptor, it's normally in stack of caller - */ -struct cfs_hash_bd { - /* address of bucket */ - struct cfs_hash_bucket *bd_bucket; - /* offset in bucket */ - unsigned int bd_offset; -}; - -#define CFS_HASH_NAME_LEN 16 /**< default name length */ -#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */ - -#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */ -#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */ -#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS - -/** - * common hash attributes. - */ -enum cfs_hash_tag { - /** - * don't need any lock, caller will protect operations with it's - * own lock. With this flag: - * . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK - * will be ignored. - * . Some functions will be disabled with this flag, i.e: - * cfs_hash_for_each_empty, cfs_hash_rehash - */ - CFS_HASH_NO_LOCK = BIT(0), - /** no bucket lock, use one spinlock to protect the whole hash */ - CFS_HASH_NO_BKTLOCK = BIT(1), - /** rwlock to protect bucket */ - CFS_HASH_RW_BKTLOCK = BIT(2), - /** spinlock to protect bucket */ - CFS_HASH_SPIN_BKTLOCK = BIT(3), - /** always add new item to tail */ - CFS_HASH_ADD_TAIL = BIT(4), - /** hash-table doesn't have refcount on item */ - CFS_HASH_NO_ITEMREF = BIT(5), - /** big name for param-tree */ - CFS_HASH_BIGNAME = BIT(6), - /** track global count */ - CFS_HASH_COUNTER = BIT(7), - /** rehash item by new key */ - CFS_HASH_REHASH_KEY = BIT(8), - /** Enable dynamic hash resizing */ - CFS_HASH_REHASH = BIT(9), - /** can shrink hash-size */ - CFS_HASH_SHRINK = BIT(10), - /** assert hash is empty on exit */ - CFS_HASH_ASSERT_EMPTY = BIT(11), - /** record hlist depth */ - CFS_HASH_DEPTH = BIT(12), - /** - * rehash is always scheduled in a different thread, so current - * change on hash table is non-blocking - */ - CFS_HASH_NBLK_CHANGE = BIT(13), - /** - * NB, we typed hs_flags as u16, please change it - * if you need to extend >=16 flags - */ -}; - -/** most used attributes */ -#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \ - CFS_HASH_COUNTER | CFS_HASH_REHASH) - -/** - * cfs_hash is a hash-table implementation for general purpose, it can support: - * . two refcount modes - * hash-table with & without refcount - * . four lock modes - * nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock - * . general operations - * lookup, add(add_tail or add_head), delete - * . rehash - * grows or shrink - * . iteration - * locked iteration and unlocked iteration - * . bigname - * support long name hash - * . debug - * trace max searching depth - * - * Rehash: - * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker) - * is spawned to handle the rehash in the background, it's possible that other - * processes can concurrently perform additions, deletions, and lookups - * without being blocked on rehash completion, because rehash will release - * the global wrlock for each bucket. - * - * rehash and iteration can't run at the same time because it's too tricky - * to keep both of them safe and correct. - * As they are relatively rare operations, so: - * . if iteration is in progress while we try to launch rehash, then - * it just giveup, iterator will launch rehash at the end. - * . if rehash is in progress while we try to iterate the hash table, - * then we just wait (shouldn't be very long time), anyway, nobody - * should expect iteration of whole hash-table to be non-blocking. - * - * During rehashing, a (key,object) pair may be in one of two buckets, - * depending on whether the worker task has yet to transfer the object - * to its new location in the table. Lookups and deletions need to search both - * locations; additions must take care to only insert into the new bucket. - */ - -struct cfs_hash { - /** - * serialize with rehash, or serialize all operations if - * the hash-table has CFS_HASH_NO_BKTLOCK - */ - union cfs_hash_lock hs_lock; - /** hash operations */ - struct cfs_hash_ops *hs_ops; - /** hash lock operations */ - struct cfs_hash_lock_ops *hs_lops; - /** hash list operations */ - struct cfs_hash_hlist_ops *hs_hops; - /** hash buckets-table */ - struct cfs_hash_bucket **hs_buckets; - /** total number of items on this hash-table */ - atomic_t hs_count; - /** hash flags, see cfs_hash_tag for detail */ - u16 hs_flags; - /** # of extra-bytes for bucket, for user saving extended attributes */ - u16 hs_extra_bytes; - /** wants to iterate */ - u8 hs_iterating; - /** hash-table is dying */ - u8 hs_exiting; - /** current hash bits */ - u8 hs_cur_bits; - /** min hash bits */ - u8 hs_min_bits; - /** max hash bits */ - u8 hs_max_bits; - /** bits for rehash */ - u8 hs_rehash_bits; - /** bits for each bucket */ - u8 hs_bkt_bits; - /** resize min threshold */ - u16 hs_min_theta; - /** resize max threshold */ - u16 hs_max_theta; - /** resize count */ - u32 hs_rehash_count; - /** # of iterators (caller of cfs_hash_for_each_*) */ - u32 hs_iterators; - /** rehash workitem */ - struct work_struct hs_rehash_work; - /** refcount on this hash table */ - atomic_t hs_refcount; - /** rehash buckets-table */ - struct cfs_hash_bucket **hs_rehash_buckets; -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 - /** serialize debug members */ - spinlock_t hs_dep_lock; - /** max depth */ - unsigned int hs_dep_max; - /** id of the deepest bucket */ - unsigned int hs_dep_bkt; - /** offset in the deepest bucket */ - unsigned int hs_dep_off; - /** bits when we found the max depth */ - unsigned int hs_dep_bits; - /** workitem to output max depth */ - struct work_struct hs_dep_work; -#endif - /** name of htable */ - char hs_name[0]; -}; - -struct cfs_hash_lock_ops { - /** lock the hash table */ - void (*hs_lock)(union cfs_hash_lock *lock, int exclusive); - /** unlock the hash table */ - void (*hs_unlock)(union cfs_hash_lock *lock, int exclusive); - /** lock the hash bucket */ - void (*hs_bkt_lock)(union cfs_hash_lock *lock, int exclusive); - /** unlock the hash bucket */ - void (*hs_bkt_unlock)(union cfs_hash_lock *lock, int exclusive); -}; - -struct cfs_hash_hlist_ops { - /** return hlist_head of hash-head of @bd */ - struct hlist_head *(*hop_hhead)(struct cfs_hash *hs, - struct cfs_hash_bd *bd); - /** return hash-head size */ - int (*hop_hhead_size)(struct cfs_hash *hs); - /** add @hnode to hash-head of @bd */ - int (*hop_hnode_add)(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode); - /** remove @hnode from hash-head of @bd */ - int (*hop_hnode_del)(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode); -}; - -struct cfs_hash_ops { - /** return hashed value from @key */ - unsigned int (*hs_hash)(struct cfs_hash *hs, const void *key, - unsigned int mask); - /** return key address of @hnode */ - void * (*hs_key)(struct hlist_node *hnode); - /** copy key from @hnode to @key */ - void (*hs_keycpy)(struct hlist_node *hnode, void *key); - /** - * compare @key with key of @hnode - * returns 1 on a match - */ - int (*hs_keycmp)(const void *key, struct hlist_node *hnode); - /** return object address of @hnode, i.e: container_of(...hnode) */ - void * (*hs_object)(struct hlist_node *hnode); - /** get refcount of item, always called with holding bucket-lock */ - void (*hs_get)(struct cfs_hash *hs, struct hlist_node *hnode); - /** release refcount of item */ - void (*hs_put)(struct cfs_hash *hs, struct hlist_node *hnode); - /** release refcount of item, always called with holding bucket-lock */ - void (*hs_put_locked)(struct cfs_hash *hs, - struct hlist_node *hnode); - /** it's called before removing of @hnode */ - void (*hs_exit)(struct cfs_hash *hs, struct hlist_node *hnode); -}; - -/** total number of buckets in @hs */ -#define CFS_HASH_NBKT(hs) \ - BIT((hs)->hs_cur_bits - (hs)->hs_bkt_bits) - -/** total number of buckets in @hs while rehashing */ -#define CFS_HASH_RH_NBKT(hs) \ - BIT((hs)->hs_rehash_bits - (hs)->hs_bkt_bits) - -/** number of hlist for in bucket */ -#define CFS_HASH_BKT_NHLIST(hs) BIT((hs)->hs_bkt_bits) - -/** total number of hlist in @hs */ -#define CFS_HASH_NHLIST(hs) BIT((hs)->hs_cur_bits) - -/** total number of hlist in @hs while rehashing */ -#define CFS_HASH_RH_NHLIST(hs) BIT((hs)->hs_rehash_bits) - -static inline int -cfs_hash_with_no_lock(struct cfs_hash *hs) -{ - /* caller will serialize all operations for this hash-table */ - return hs->hs_flags & CFS_HASH_NO_LOCK; -} - -static inline int -cfs_hash_with_no_bktlock(struct cfs_hash *hs) -{ - /* no bucket lock, one single lock to protect the hash-table */ - return hs->hs_flags & CFS_HASH_NO_BKTLOCK; -} - -static inline int -cfs_hash_with_rw_bktlock(struct cfs_hash *hs) -{ - /* rwlock to protect hash bucket */ - return hs->hs_flags & CFS_HASH_RW_BKTLOCK; -} - -static inline int -cfs_hash_with_spin_bktlock(struct cfs_hash *hs) -{ - /* spinlock to protect hash bucket */ - return hs->hs_flags & CFS_HASH_SPIN_BKTLOCK; -} - -static inline int -cfs_hash_with_add_tail(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_ADD_TAIL; -} - -static inline int -cfs_hash_with_no_itemref(struct cfs_hash *hs) -{ - /* - * hash-table doesn't keep refcount on item, - * item can't be removed from hash unless it's - * ZERO refcount - */ - return hs->hs_flags & CFS_HASH_NO_ITEMREF; -} - -static inline int -cfs_hash_with_bigname(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_BIGNAME; -} - -static inline int -cfs_hash_with_counter(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_COUNTER; -} - -static inline int -cfs_hash_with_rehash(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_REHASH; -} - -static inline int -cfs_hash_with_rehash_key(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_REHASH_KEY; -} - -static inline int -cfs_hash_with_shrink(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_SHRINK; -} - -static inline int -cfs_hash_with_assert_empty(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_ASSERT_EMPTY; -} - -static inline int -cfs_hash_with_depth(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_DEPTH; -} - -static inline int -cfs_hash_with_nblk_change(struct cfs_hash *hs) -{ - return hs->hs_flags & CFS_HASH_NBLK_CHANGE; -} - -static inline int -cfs_hash_is_exiting(struct cfs_hash *hs) -{ - /* cfs_hash_destroy is called */ - return hs->hs_exiting; -} - -static inline int -cfs_hash_is_rehashing(struct cfs_hash *hs) -{ - /* rehash is launched */ - return !!hs->hs_rehash_bits; -} - -static inline int -cfs_hash_is_iterating(struct cfs_hash *hs) -{ - /* someone is calling cfs_hash_for_each_* */ - return hs->hs_iterating || hs->hs_iterators; -} - -static inline int -cfs_hash_bkt_size(struct cfs_hash *hs) -{ - return offsetof(struct cfs_hash_bucket, hsb_head[0]) + - hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) + - hs->hs_extra_bytes; -} - -static inline unsigned -cfs_hash_id(struct cfs_hash *hs, const void *key, unsigned int mask) -{ - return hs->hs_ops->hs_hash(hs, key, mask); -} - -static inline void * -cfs_hash_key(struct cfs_hash *hs, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_key(hnode); -} - -static inline void -cfs_hash_keycpy(struct cfs_hash *hs, struct hlist_node *hnode, void *key) -{ - if (hs->hs_ops->hs_keycpy) - hs->hs_ops->hs_keycpy(hnode, key); -} - -/** - * Returns 1 on a match, - */ -static inline int -cfs_hash_keycmp(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_keycmp(key, hnode); -} - -static inline void * -cfs_hash_object(struct cfs_hash *hs, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_object(hnode); -} - -static inline void -cfs_hash_get(struct cfs_hash *hs, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_get(hs, hnode); -} - -static inline void -cfs_hash_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_put_locked(hs, hnode); -} - -static inline void -cfs_hash_put(struct cfs_hash *hs, struct hlist_node *hnode) -{ - return hs->hs_ops->hs_put(hs, hnode); -} - -static inline void -cfs_hash_exit(struct cfs_hash *hs, struct hlist_node *hnode) -{ - if (hs->hs_ops->hs_exit) - hs->hs_ops->hs_exit(hs, hnode); -} - -static inline void cfs_hash_lock(struct cfs_hash *hs, int excl) -{ - hs->hs_lops->hs_lock(&hs->hs_lock, excl); -} - -static inline void cfs_hash_unlock(struct cfs_hash *hs, int excl) -{ - hs->hs_lops->hs_unlock(&hs->hs_lock, excl); -} - -static inline int cfs_hash_dec_and_lock(struct cfs_hash *hs, - atomic_t *condition) -{ - LASSERT(cfs_hash_with_no_bktlock(hs)); - return atomic_dec_and_lock(condition, &hs->hs_lock.spin); -} - -static inline void cfs_hash_bd_lock(struct cfs_hash *hs, - struct cfs_hash_bd *bd, int excl) -{ - hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl); -} - -static inline void cfs_hash_bd_unlock(struct cfs_hash *hs, - struct cfs_hash_bd *bd, int excl) -{ - hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl); -} - -/** - * operations on cfs_hash bucket (bd: bucket descriptor), - * they are normally for hash-table without rehash - */ -void cfs_hash_bd_get(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bd); - -static inline void -cfs_hash_bd_get_and_lock(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bd, int excl) -{ - cfs_hash_bd_get(hs, key, bd); - cfs_hash_bd_lock(hs, bd, excl); -} - -static inline unsigned -cfs_hash_bd_index_get(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits); -} - -static inline void -cfs_hash_bd_index_set(struct cfs_hash *hs, unsigned int index, - struct cfs_hash_bd *bd) -{ - bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits]; - bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U); -} - -static inline void * -cfs_hash_bd_extra_get(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - return (void *)bd->bd_bucket + - cfs_hash_bkt_size(hs) - hs->hs_extra_bytes; -} - -static inline u32 -cfs_hash_bd_version_get(struct cfs_hash_bd *bd) -{ - /* need hold cfs_hash_bd_lock */ - return bd->bd_bucket->hsb_version; -} - -static inline u32 -cfs_hash_bd_count_get(struct cfs_hash_bd *bd) -{ - /* need hold cfs_hash_bd_lock */ - return bd->bd_bucket->hsb_count; -} - -static inline int -cfs_hash_bd_depmax_get(struct cfs_hash_bd *bd) -{ - return bd->bd_bucket->hsb_depmax; -} - -static inline int -cfs_hash_bd_compare(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) -{ - if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index) - return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index; - - if (bd1->bd_offset != bd2->bd_offset) - return bd1->bd_offset - bd2->bd_offset; - - return 0; -} - -void cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode); -void cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode); -void cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, - struct cfs_hash_bd *bd_new, - struct hlist_node *hnode); - -static inline int -cfs_hash_bd_dec_and_lock(struct cfs_hash *hs, struct cfs_hash_bd *bd, - atomic_t *condition) -{ - LASSERT(cfs_hash_with_spin_bktlock(hs)); - return atomic_dec_and_lock(condition, &bd->bd_bucket->hsb_lock.spin); -} - -static inline struct hlist_head * -cfs_hash_bd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - return hs->hs_hops->hop_hhead(hs, bd); -} - -struct hlist_node * -cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key); -struct hlist_node * -cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key); - -/** - * operations on cfs_hash bucket (bd: bucket descriptor), - * they are safe for hash-table with rehash - */ -void cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bds); -void cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - int excl); -void cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - int excl); - -static inline void -cfs_hash_dual_bd_get_and_lock(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_dual_bd_get(hs, key, bds); - cfs_hash_dual_bd_lock(hs, bds, excl); -} - -struct hlist_node * -cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key); -struct hlist_node * -cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode, - int insist_add); -struct hlist_node * -cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode); - -/* Hash init/cleanup functions */ -struct cfs_hash * -cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits, - unsigned int bkt_bits, unsigned int extra_bytes, - unsigned int min_theta, unsigned int max_theta, - struct cfs_hash_ops *ops, unsigned int flags); - -struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs); -void cfs_hash_putref(struct cfs_hash *hs); - -/* Hash addition functions */ -void cfs_hash_add(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode); -int cfs_hash_add_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode); -void *cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode); - -/* Hash deletion functions */ -void *cfs_hash_del(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode); -void *cfs_hash_del_key(struct cfs_hash *hs, const void *key); - -/* Hash lookup/for_each functions */ -#define CFS_HASH_LOOP_HOG 1024 - -typedef int (*cfs_hash_for_each_cb_t)(struct cfs_hash *hs, - struct cfs_hash_bd *bd, - struct hlist_node *node, - void *data); -void * -cfs_hash_lookup(struct cfs_hash *hs, const void *key); -void -cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, void *data); -void -cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, - void *data); -int -cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, - void *data, int start); -int -cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t cb, - void *data); -void -cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, - cfs_hash_for_each_cb_t cb, void *data); -typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data); -void -cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t cb, void *data); - -void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex, - cfs_hash_for_each_cb_t cb, void *data); -int cfs_hash_is_empty(struct cfs_hash *hs); -u64 cfs_hash_size_get(struct cfs_hash *hs); - -/* - * Rehash - Theta is calculated to be the average chained - * hash depth assuming a perfectly uniform hash function. - */ -void cfs_hash_rehash_cancel_locked(struct cfs_hash *hs); -void cfs_hash_rehash_cancel(struct cfs_hash *hs); -void cfs_hash_rehash(struct cfs_hash *hs, int do_rehash); -void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, - void *new_key, struct hlist_node *hnode); - -#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 -/* Validate hnode references the correct key */ -static inline void -cfs_hash_key_validate(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - LASSERT(cfs_hash_keycmp(hs, key, hnode)); -} - -/* Validate hnode is in the correct bucket */ -static inline void -cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_bd bds[2]; - - cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds); - LASSERT(bds[0].bd_bucket == bd->bd_bucket || - bds[1].bd_bucket == bd->bd_bucket); -} - -#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */ - -static inline void -cfs_hash_key_validate(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) {} - -static inline void -cfs_hash_bucket_validate(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) {} - -#endif /* CFS_HASH_DEBUG_LEVEL */ - -#define CFS_HASH_THETA_BITS 10 -#define CFS_HASH_MIN_THETA BIT(CFS_HASH_THETA_BITS - 1) -#define CFS_HASH_MAX_THETA BIT(CFS_HASH_THETA_BITS + 1) - -/* Return integer component of theta */ -static inline int __cfs_hash_theta_int(int theta) -{ - return (theta >> CFS_HASH_THETA_BITS); -} - -/* Return a fractional value between 0 and 999 */ -static inline int __cfs_hash_theta_frac(int theta) -{ - return ((theta * 1000) >> CFS_HASH_THETA_BITS) - - (__cfs_hash_theta_int(theta) * 1000); -} - -static inline int __cfs_hash_theta(struct cfs_hash *hs) -{ - return (atomic_read(&hs->hs_count) << - CFS_HASH_THETA_BITS) >> hs->hs_cur_bits; -} - -static inline void -__cfs_hash_set_theta(struct cfs_hash *hs, int min, int max) -{ - LASSERT(min < max); - hs->hs_min_theta = (u16)min; - hs->hs_max_theta = (u16)max; -} - -/* Generic debug formatting routines mainly for proc handler */ -struct seq_file; -void cfs_hash_debug_header(struct seq_file *m); -void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m); - -/* - * Generic djb2 hash algorithm for character arrays. - */ -static inline unsigned -cfs_hash_djb2_hash(const void *key, size_t size, unsigned int mask) -{ - unsigned int i, hash = 5381; - - LASSERT(key); - - for (i = 0; i < size; i++) - hash = hash * 33 + ((char *)key)[i]; - - return (hash & mask); -} - -/* - * Generic u32 hash algorithm. - */ -static inline unsigned -cfs_hash_u32_hash(const u32 key, unsigned int mask) -{ - return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask); -} - -/* - * Generic u64 hash algorithm. - */ -static inline unsigned -cfs_hash_u64_hash(const u64 key, unsigned int mask) -{ - return ((unsigned int)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask); -} - -/** iterate over all buckets in @bds (array of struct cfs_hash_bd) */ -#define cfs_hash_for_each_bd(bds, n, i) \ - for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++) - -/** iterate over all buckets of @hs */ -#define cfs_hash_for_each_bucket(hs, bd, pos) \ - for (pos = 0; \ - pos < CFS_HASH_NBKT(hs) && \ - ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++) - -/** iterate over all hlist of bucket @bd */ -#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \ - for ((bd)->bd_offset = 0; \ - (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \ - (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \ - (bd)->bd_offset++) - -/* !__LIBCFS__HASH_H__ */ -#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h deleted file mode 100644 index 491d5971d199..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h +++ /dev/null @@ -1,200 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_private.h - * - * Various defines for libcfs. - * - */ - -#ifndef __LIBCFS_PRIVATE_H__ -#define __LIBCFS_PRIVATE_H__ - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#define LASSERTF(cond, fmt, ...) \ -do { \ - if (unlikely(!(cond))) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(__msg_data, D_EMERG, NULL); \ - libcfs_debug_msg(&__msg_data, \ - "ASSERTION( %s ) failed: " fmt, #cond, \ - ## __VA_ARGS__); \ - lbug_with_loc(&__msg_data); \ - } \ -} while (0) - -#define LASSERT(cond) LASSERTF(cond, "\n") - -#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK -/** - * This is for more expensive checks that one doesn't want to be enabled all - * the time. LINVRNT() has to be explicitly enabled by - * CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK option. - */ -# define LINVRNT(exp) LASSERT(exp) -#else -# define LINVRNT(exp) ((void)sizeof !!(exp)) -#endif - -void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msg); - -#define LBUG() \ -do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ - lbug_with_loc(&msgdata); \ -} while (0) - -/* - * Use #define rather than inline, as lnet_cpt_table() might - * not be defined yet - */ -#define kmalloc_cpt(size, flags, cpt) \ - kmalloc_node(size, flags, cfs_cpt_spread_node(lnet_cpt_table(), cpt)) - -#define kzalloc_cpt(size, flags, cpt) \ - kmalloc_node(size, flags | __GFP_ZERO, \ - cfs_cpt_spread_node(lnet_cpt_table(), cpt)) - -#define kvmalloc_cpt(size, flags, cpt) \ - kvmalloc_node(size, flags, \ - cfs_cpt_spread_node(lnet_cpt_table(), cpt)) - -#define kvzalloc_cpt(size, flags, cpt) \ - kvmalloc_node(size, flags | __GFP_ZERO, \ - cfs_cpt_spread_node(lnet_cpt_table(), cpt)) - -/******************************************************************************/ - -void libcfs_debug_dumplog(void); -int libcfs_debug_init(unsigned long bufsize); -int libcfs_debug_cleanup(void); -int libcfs_debug_clear_buffer(void); -int libcfs_debug_mark_buffer(const char *text); - -/* - * allocate a variable array, returned value is an array of pointers. - * Caller can specify length of array by count. - */ -void *cfs_array_alloc(int count, unsigned int size); -void cfs_array_free(void *vars); - -#define LASSERT_ATOMIC_ENABLED (1) - -#if LASSERT_ATOMIC_ENABLED - -/** assert value of @a is equal to @v */ -#define LASSERT_ATOMIC_EQ(a, v) \ - LASSERTF(atomic_read(a) == v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is unequal to @v */ -#define LASSERT_ATOMIC_NE(a, v) \ - LASSERTF(atomic_read(a) != v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is little than @v */ -#define LASSERT_ATOMIC_LT(a, v) \ - LASSERTF(atomic_read(a) < v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is little/equal to @v */ -#define LASSERT_ATOMIC_LE(a, v) \ - LASSERTF(atomic_read(a) <= v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is great than @v */ -#define LASSERT_ATOMIC_GT(a, v) \ - LASSERTF(atomic_read(a) > v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is great/equal to @v */ -#define LASSERT_ATOMIC_GE(a, v) \ - LASSERTF(atomic_read(a) >= v, "value: %d\n", atomic_read((a))) - -/** assert value of @a is great than @v1 and little than @v2 */ -#define LASSERT_ATOMIC_GT_LT(a, v1, v2) \ -do { \ - int __v = atomic_read(a); \ - LASSERTF(__v > v1 && __v < v2, "value: %d\n", __v); \ -} while (0) - -/** assert value of @a is great than @v1 and little/equal to @v2 */ -#define LASSERT_ATOMIC_GT_LE(a, v1, v2) \ -do { \ - int __v = atomic_read(a); \ - LASSERTF(__v > v1 && __v <= v2, "value: %d\n", __v); \ -} while (0) - -/** assert value of @a is great/equal to @v1 and little than @v2 */ -#define LASSERT_ATOMIC_GE_LT(a, v1, v2) \ -do { \ - int __v = atomic_read(a); \ - LASSERTF(__v >= v1 && __v < v2, "value: %d\n", __v); \ -} while (0) - -/** assert value of @a is great/equal to @v1 and little/equal to @v2 */ -#define LASSERT_ATOMIC_GE_LE(a, v1, v2) \ -do { \ - int __v = atomic_read(a); \ - LASSERTF(__v >= v1 && __v <= v2, "value: %d\n", __v); \ -} while (0) - -#else /* !LASSERT_ATOMIC_ENABLED */ - -#define LASSERT_ATOMIC_EQ(a, v) do {} while (0) -#define LASSERT_ATOMIC_NE(a, v) do {} while (0) -#define LASSERT_ATOMIC_LT(a, v) do {} while (0) -#define LASSERT_ATOMIC_LE(a, v) do {} while (0) -#define LASSERT_ATOMIC_GT(a, v) do {} while (0) -#define LASSERT_ATOMIC_GE(a, v) do {} while (0) -#define LASSERT_ATOMIC_GT_LT(a, v1, v2) do {} while (0) -#define LASSERT_ATOMIC_GT_LE(a, v1, v2) do {} while (0) -#define LASSERT_ATOMIC_GE_LT(a, v1, v2) do {} while (0) -#define LASSERT_ATOMIC_GE_LE(a, v1, v2) do {} while (0) - -#endif /* LASSERT_ATOMIC_ENABLED */ - -#define LASSERT_ATOMIC_ZERO(a) LASSERT_ATOMIC_EQ(a, 0) -#define LASSERT_ATOMIC_POS(a) LASSERT_ATOMIC_GT(a, 0) - -/* implication */ -#define ergo(a, b) (!(a) || (b)) -/* logical equivalence */ -#define equi(a, b) (!!(a) == !!(b)) - -#ifndef HAVE_CFS_SIZE_ROUND -static inline size_t cfs_size_round(int val) -{ - return round_up(val, 8); -} - -#define HAVE_CFS_SIZE_ROUND -#endif - -#endif diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h deleted file mode 100644 index cd7c3ccb2dc0..000000000000 --- a/drivers/staging/lustre/include/linux/libcfs/libcfs_string.h +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_string.h - * - * Generic string manipulation functions. - * - * Author: Nathan Rutman - */ - -#ifndef __LIBCFS_STRING_H__ -#define __LIBCFS_STRING_H__ - -#include - -/* libcfs_string.c */ -/* Convert a text string to a bitmask */ -int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), - int *oldmask, int minmask, int allmask); -/* trim leading and trailing space characters */ -char *cfs_firststr(char *str, size_t size); - -/** - * Structure to represent NULL-less strings. - */ -struct cfs_lstr { - char *ls_str; - int ls_len; -}; - -/* - * Structure to represent \ token of the syntax. - */ -struct cfs_range_expr { - /* - * Link to cfs_expr_list::el_exprs. - */ - struct list_head re_link; - u32 re_lo; - u32 re_hi; - u32 re_stride; -}; - -struct cfs_expr_list { - struct list_head el_link; - struct list_head el_exprs; -}; - -int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res); -int cfs_str2num_check(char *str, int nob, unsigned int *num, - unsigned int min, unsigned int max); -int cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list); -int cfs_expr_list_print(char *buffer, int count, - struct cfs_expr_list *expr_list); -int cfs_expr_list_values(struct cfs_expr_list *expr_list, - int max, u32 **values); -static inline void -cfs_expr_list_values_free(u32 *values, int num) -{ - /* - * This array is allocated by kvalloc(), so it shouldn't be freed - * by OBD_FREE() if it's called by module other than libcfs & LNet, - * otherwise we will see fake memory leak - */ - kvfree(values); -} - -void cfs_expr_list_free(struct cfs_expr_list *expr_list); -int cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max, - struct cfs_expr_list **elpp); -void cfs_expr_list_free_list(struct list_head *list); - -#endif diff --git a/drivers/staging/lustre/include/linux/lnet/api.h b/drivers/staging/lustre/include/linux/lnet/api.h deleted file mode 100644 index dae2e4f0056c..000000000000 --- a/drivers/staging/lustre/include/linux/lnet/api.h +++ /dev/null @@ -1,212 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011 - 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - */ - -#ifndef __LNET_API_H__ -#define __LNET_API_H__ - -/** \defgroup lnet LNet - * - * The Lustre Networking subsystem. - * - * LNet is an asynchronous message-passing API, which provides an unreliable - * connectionless service that can't guarantee any order. It supports OFA IB, - * TCP/IP, and Cray Interconnects, and routes between heterogeneous networks. - * - * @{ - */ - -#include - -/** \defgroup lnet_init_fini Initialization and cleanup - * The LNet must be properly initialized before any LNet calls can be made. - * @{ - */ -int LNetNIInit(lnet_pid_t requested_pid); -int LNetNIFini(void); -/** @} lnet_init_fini */ - -/** \defgroup lnet_addr LNet addressing and basic types - * - * Addressing scheme and basic data types of LNet. - * - * The LNet API is memory-oriented, so LNet must be able to address not only - * end-points but also memory region within a process address space. - * An ::lnet_nid_t addresses an end-point. An ::lnet_pid_t identifies a process - * in a node. A portal represents an opening in the address space of a - * process. Match bits is criteria to identify a region of memory inside a - * portal, and offset specifies an offset within the memory region. - * - * LNet creates a table of portals for each process during initialization. - * This table has MAX_PORTALS entries and its size can't be dynamically - * changed. A portal stays empty until the owning process starts to add - * memory regions to it. A portal is sometimes called an index because - * it's an entry in the portals table of a process. - * - * \see LNetMEAttach - * @{ - */ -int LNetGetId(unsigned int index, struct lnet_process_id *id); -int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, __u32 *order); - -/** @} lnet_addr */ - -/** \defgroup lnet_me Match entries - * - * A match entry (abbreviated as ME) describes a set of criteria to accept - * incoming requests. - * - * A portal is essentially a match list plus a set of attributes. A match - * list is a chain of MEs. Each ME includes a pointer to a memory descriptor - * and a set of match criteria. The match criteria can be used to reject - * incoming requests based on process ID or the match bits provided in the - * request. MEs can be dynamically inserted into a match list by LNetMEAttach() - * and LNetMEInsert(), and removed from its list by LNetMEUnlink(). - * @{ - */ -int LNetMEAttach(unsigned int portal, - struct lnet_process_id match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - enum lnet_unlink unlink_in, - enum lnet_ins_pos pos_in, - struct lnet_handle_me *handle_out); - -int LNetMEInsert(struct lnet_handle_me current_in, - struct lnet_process_id match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - enum lnet_unlink unlink_in, - enum lnet_ins_pos position_in, - struct lnet_handle_me *handle_out); - -int LNetMEUnlink(struct lnet_handle_me current_in); -/** @} lnet_me */ - -/** \defgroup lnet_md Memory descriptors - * - * A memory descriptor contains information about a region of a user's - * memory (either in kernel or user space) and optionally points to an - * event queue where information about the operations performed on the - * memory descriptor are recorded. Memory descriptor is abbreviated as - * MD and can be used interchangeably with the memory region it describes. - * - * The LNet API provides two operations to create MDs: LNetMDAttach() - * and LNetMDBind(); one operation to unlink and release the resources - * associated with a MD: LNetMDUnlink(). - * @{ - */ -int LNetMDAttach(struct lnet_handle_me current_in, - struct lnet_md md_in, - enum lnet_unlink unlink_in, - struct lnet_handle_md *md_handle_out); - -int LNetMDBind(struct lnet_md md_in, - enum lnet_unlink unlink_in, - struct lnet_handle_md *md_handle_out); - -int LNetMDUnlink(struct lnet_handle_md md_in); -/** @} lnet_md */ - -/** \defgroup lnet_eq Events and event queues - * - * Event queues (abbreviated as EQ) are used to log operations performed on - * local MDs. In particular, they signal the completion of a data transmission - * into or out of a MD. They can also be used to hold acknowledgments for - * completed PUT operations and indicate when a MD has been unlinked. Multiple - * MDs can share a single EQ. An EQ may have an optional event handler - * associated with it. If an event handler exists, it will be run for each - * event that is deposited into the EQ. - * - * In addition to the lnet_handle_eq, the LNet API defines two types - * associated with events: The ::lnet_event_kind defines the kinds of events - * that can be stored in an EQ. The lnet_event defines a structure that - * holds the information about with an event. - * - * There are five functions for dealing with EQs: LNetEQAlloc() is used to - * create an EQ and allocate the resources needed, while LNetEQFree() - * releases these resources and free the EQ. LNetEQGet() retrieves the next - * event from an EQ, and LNetEQWait() can be used to block a process until - * an EQ has at least one event. LNetEQPoll() can be used to test or wait - * on multiple EQs. - * @{ - */ -int LNetEQAlloc(unsigned int count_in, - lnet_eq_handler_t handler, - struct lnet_handle_eq *handle_out); - -int LNetEQFree(struct lnet_handle_eq eventq_in); - -int LNetEQPoll(struct lnet_handle_eq *eventqs_in, - int neq_in, - int timeout_ms, - int interruptible, - struct lnet_event *event_out, - int *which_eq_out); -/** @} lnet_eq */ - -/** \defgroup lnet_data Data movement operations - * - * The LNet API provides two data movement operations: LNetPut() - * and LNetGet(). - * @{ - */ -int LNetPut(lnet_nid_t self, - struct lnet_handle_md md_in, - enum lnet_ack_req ack_req_in, - struct lnet_process_id target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in, - __u64 hdr_data_in); - -int LNetGet(lnet_nid_t self, - struct lnet_handle_md md_in, - struct lnet_process_id target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in); -/** @} lnet_data */ - -/** \defgroup lnet_misc Miscellaneous operations. - * Miscellaneous operations. - * @{ - */ -int LNetSetLazyPortal(int portal); -int LNetClearLazyPortal(int portal); -int LNetCtl(unsigned int cmd, void *arg); -void LNetDebugPeer(struct lnet_process_id id); - -/** @} lnet_misc */ - -/** @} lnet */ -#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h deleted file mode 100644 index 973c17a1c4a1..000000000000 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ /dev/null @@ -1,652 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/include/lnet/lib-lnet.h - */ - -#ifndef __LNET_LIB_LNET_H__ -#define __LNET_LIB_LNET_H__ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -extern struct lnet the_lnet; /* THE network */ - -#if (BITS_PER_LONG == 32) -/* 2 CPTs, allowing more CPTs might make us under memory pressure */ -#define LNET_CPT_MAX_BITS 1 - -#else /* 64-bit system */ -/* - * 256 CPTs for thousands of CPUs, allowing more CPTs might make us - * under risk of consuming all lh_cookie. - */ -#define LNET_CPT_MAX_BITS 8 -#endif /* BITS_PER_LONG == 32 */ - -/* max allowed CPT number */ -#define LNET_CPT_MAX (1 << LNET_CPT_MAX_BITS) - -#define LNET_CPT_NUMBER (the_lnet.ln_cpt_number) -#define LNET_CPT_BITS (the_lnet.ln_cpt_bits) -#define LNET_CPT_MASK ((1ULL << LNET_CPT_BITS) - 1) - -/** exclusive lock */ -#define LNET_LOCK_EX CFS_PERCPT_LOCK_EX - -/* need both kernel and user-land acceptor */ -#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 -#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 - -static inline int lnet_is_route_alive(struct lnet_route *route) -{ - /* gateway is down */ - if (!route->lr_gateway->lp_alive) - return 0; - /* no NI status, assume it's alive */ - if ((route->lr_gateway->lp_ping_feats & - LNET_PING_FEAT_NI_STATUS) == 0) - return 1; - /* has NI status, check # down NIs */ - return route->lr_downis == 0; -} - -static inline int lnet_is_wire_handle_none(struct lnet_handle_wire *wh) -{ - return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE && - wh->wh_object_cookie == LNET_WIRE_HANDLE_COOKIE_NONE); -} - -static inline int lnet_md_exhausted(struct lnet_libmd *md) -{ - return (!md->md_threshold || - ((md->md_options & LNET_MD_MAX_SIZE) && - md->md_offset + md->md_max_size > md->md_length)); -} - -static inline int lnet_md_unlinkable(struct lnet_libmd *md) -{ - /* - * Should unlink md when its refcount is 0 and either: - * - md has been flagged for deletion (by auto unlink or - * LNetM[DE]Unlink, in the latter case md may not be exhausted). - * - auto unlink is on and md is exhausted. - */ - if (md->md_refcount) - return 0; - - if (md->md_flags & LNET_MD_FLAG_ZOMBIE) - return 1; - - return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) && - lnet_md_exhausted(md)); -} - -#define lnet_cpt_table() (the_lnet.ln_cpt_table) -#define lnet_cpt_current() cfs_cpt_current(the_lnet.ln_cpt_table, 1) - -static inline int -lnet_cpt_of_cookie(__u64 cookie) -{ - unsigned int cpt = (cookie >> LNET_COOKIE_TYPE_BITS) & LNET_CPT_MASK; - - /* - * LNET_CPT_NUMBER doesn't have to be power2, which means we can - * get illegal cpt from it's invalid cookie - */ - return cpt < LNET_CPT_NUMBER ? cpt : cpt % LNET_CPT_NUMBER; -} - -static inline void -lnet_res_lock(int cpt) -{ - cfs_percpt_lock(the_lnet.ln_res_lock, cpt); -} - -static inline void -lnet_res_unlock(int cpt) -{ - cfs_percpt_unlock(the_lnet.ln_res_lock, cpt); -} - -static inline int -lnet_res_lock_current(void) -{ - int cpt = lnet_cpt_current(); - - lnet_res_lock(cpt); - return cpt; -} - -static inline void -lnet_net_lock(int cpt) -{ - cfs_percpt_lock(the_lnet.ln_net_lock, cpt); -} - -static inline void -lnet_net_unlock(int cpt) -{ - cfs_percpt_unlock(the_lnet.ln_net_lock, cpt); -} - -static inline int -lnet_net_lock_current(void) -{ - int cpt = lnet_cpt_current(); - - lnet_net_lock(cpt); - return cpt; -} - -#define LNET_LOCK() lnet_net_lock(LNET_LOCK_EX) -#define LNET_UNLOCK() lnet_net_unlock(LNET_LOCK_EX) - -#define lnet_ptl_lock(ptl) spin_lock(&(ptl)->ptl_lock) -#define lnet_ptl_unlock(ptl) spin_unlock(&(ptl)->ptl_lock) -#define lnet_eq_wait_lock() spin_lock(&the_lnet.ln_eq_wait_lock) -#define lnet_eq_wait_unlock() spin_unlock(&the_lnet.ln_eq_wait_lock) -#define lnet_ni_lock(ni) spin_lock(&(ni)->ni_lock) -#define lnet_ni_unlock(ni) spin_unlock(&(ni)->ni_lock) - -#define MAX_PORTALS 64 - -static inline struct lnet_libmd * -lnet_md_alloc(struct lnet_md *umd) -{ - struct lnet_libmd *md; - unsigned int size; - unsigned int niov; - - if (umd->options & LNET_MD_KIOV) { - niov = umd->length; - size = offsetof(struct lnet_libmd, md_iov.kiov[niov]); - } else { - niov = umd->options & LNET_MD_IOVEC ? umd->length : 1; - size = offsetof(struct lnet_libmd, md_iov.iov[niov]); - } - - md = kzalloc(size, GFP_NOFS); - - if (md) { - /* Set here in case of early free */ - md->md_options = umd->options; - md->md_niov = niov; - INIT_LIST_HEAD(&md->md_list); - } - - return md; -} - -struct lnet_libhandle *lnet_res_lh_lookup(struct lnet_res_container *rec, - __u64 cookie); -void lnet_res_lh_initialize(struct lnet_res_container *rec, - struct lnet_libhandle *lh); -static inline void -lnet_res_lh_invalidate(struct lnet_libhandle *lh) -{ - /* NB: cookie is still useful, don't reset it */ - list_del(&lh->lh_hash_chain); -} - -static inline void -lnet_eq2handle(struct lnet_handle_eq *handle, struct lnet_eq *eq) -{ - if (!eq) { - LNetInvalidateEQHandle(handle); - return; - } - - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline struct lnet_eq * -lnet_handle2eq(struct lnet_handle_eq *handle) -{ - struct lnet_libhandle *lh; - - lh = lnet_res_lh_lookup(&the_lnet.ln_eq_container, handle->cookie); - if (!lh) - return NULL; - - return lh_entry(lh, struct lnet_eq, eq_lh); -} - -static inline void -lnet_md2handle(struct lnet_handle_md *handle, struct lnet_libmd *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline struct lnet_libmd * -lnet_handle2md(struct lnet_handle_md *handle) -{ - /* ALWAYS called with resource lock held */ - struct lnet_libhandle *lh; - int cpt; - - cpt = lnet_cpt_of_cookie(handle->cookie); - lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], - handle->cookie); - if (!lh) - return NULL; - - return lh_entry(lh, struct lnet_libmd, md_lh); -} - -static inline struct lnet_libmd * -lnet_wire_handle2md(struct lnet_handle_wire *wh) -{ - /* ALWAYS called with resource lock held */ - struct lnet_libhandle *lh; - int cpt; - - if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie) - return NULL; - - cpt = lnet_cpt_of_cookie(wh->wh_object_cookie); - lh = lnet_res_lh_lookup(the_lnet.ln_md_containers[cpt], - wh->wh_object_cookie); - if (!lh) - return NULL; - - return lh_entry(lh, struct lnet_libmd, md_lh); -} - -static inline void -lnet_me2handle(struct lnet_handle_me *handle, struct lnet_me *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline struct lnet_me * -lnet_handle2me(struct lnet_handle_me *handle) -{ - /* ALWAYS called with resource lock held */ - struct lnet_libhandle *lh; - int cpt; - - cpt = lnet_cpt_of_cookie(handle->cookie); - lh = lnet_res_lh_lookup(the_lnet.ln_me_containers[cpt], - handle->cookie); - if (!lh) - return NULL; - - return lh_entry(lh, struct lnet_me, me_lh); -} - -static inline void -lnet_peer_addref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - lp->lp_refcount++; -} - -void lnet_destroy_peer_locked(struct lnet_peer *lp); - -static inline void -lnet_peer_decref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - lp->lp_refcount--; - if (!lp->lp_refcount) - lnet_destroy_peer_locked(lp); -} - -static inline int -lnet_isrouter(struct lnet_peer *lp) -{ - return lp->lp_rtr_refcount ? 1 : 0; -} - -static inline void -lnet_ni_addref_locked(struct lnet_ni *ni, int cpt) -{ - LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); - LASSERT(*ni->ni_refs[cpt] >= 0); - - (*ni->ni_refs[cpt])++; -} - -static inline void -lnet_ni_addref(struct lnet_ni *ni) -{ - lnet_net_lock(0); - lnet_ni_addref_locked(ni, 0); - lnet_net_unlock(0); -} - -static inline void -lnet_ni_decref_locked(struct lnet_ni *ni, int cpt) -{ - LASSERT(cpt >= 0 && cpt < LNET_CPT_NUMBER); - LASSERT(*ni->ni_refs[cpt] > 0); - - (*ni->ni_refs[cpt])--; -} - -static inline void -lnet_ni_decref(struct lnet_ni *ni) -{ - lnet_net_lock(0); - lnet_ni_decref_locked(ni, 0); - lnet_net_unlock(0); -} - -void lnet_ni_free(struct lnet_ni *ni); -struct lnet_ni * -lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist); - -static inline int -lnet_nid2peerhash(lnet_nid_t nid) -{ - return hash_long(nid, LNET_PEER_HASH_BITS); -} - -static inline struct list_head * -lnet_net2rnethash(__u32 net) -{ - return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) + - LNET_NETTYP(net)) & - ((1U << the_lnet.ln_remote_nets_hbits) - 1)]; -} - -extern struct lnet_lnd the_lolnd; -extern int avoid_asym_router_failure; - -int lnet_cpt_of_nid_locked(lnet_nid_t nid); -int lnet_cpt_of_nid(lnet_nid_t nid); -struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); -struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt); -struct lnet_ni *lnet_net2ni(__u32 net); - -extern int portal_rotor; - -int lnet_lib_init(void); -void lnet_lib_exit(void); - -int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive, - unsigned long when); -void lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive, - unsigned long when); -int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid, - unsigned int priority); -int lnet_check_routes(void); -int lnet_del_route(__u32 net, lnet_nid_t gw_nid); -void lnet_destroy_routes(void); -int lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive, __u32 *priority); -int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg); - -void lnet_router_debugfs_init(void); -void lnet_router_debugfs_fini(void); -int lnet_rtrpools_alloc(int im_a_router); -void lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages); -int lnet_rtrpools_adjust(int tiny, int small, int large); -int lnet_rtrpools_enable(void); -void lnet_rtrpools_disable(void); -void lnet_rtrpools_free(int keep_pools); -struct lnet_remotenet *lnet_find_net_locked(__u32 net); -int lnet_dyn_add_ni(lnet_pid_t requested_pid, - struct lnet_ioctl_config_data *conf); -int lnet_dyn_del_ni(__u32 net); -int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason); - -int lnet_islocalnid(lnet_nid_t nid); -int lnet_islocalnet(__u32 net); - -void lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md, - unsigned int offset, unsigned int mlen); -void lnet_msg_detach_md(struct lnet_msg *msg, int status); -void lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev); -void lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type); -void lnet_msg_commit(struct lnet_msg *msg, int cpt); -void lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status); - -void lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev); -void lnet_prep_send(struct lnet_msg *msg, int type, - struct lnet_process_id target, unsigned int offset, - unsigned int len); -int lnet_send(lnet_nid_t nid, struct lnet_msg *msg, lnet_nid_t rtr_nid); -void lnet_return_tx_credits_locked(struct lnet_msg *msg); -void lnet_return_rx_credits_locked(struct lnet_msg *msg); -void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp); -void lnet_drop_routed_msgs_locked(struct list_head *list, int cpt); - -/* portals functions */ -/* portals attributes */ -static inline int -lnet_ptl_is_lazy(struct lnet_portal *ptl) -{ - return !!(ptl->ptl_options & LNET_PTL_LAZY); -} - -static inline int -lnet_ptl_is_unique(struct lnet_portal *ptl) -{ - return !!(ptl->ptl_options & LNET_PTL_MATCH_UNIQUE); -} - -static inline int -lnet_ptl_is_wildcard(struct lnet_portal *ptl) -{ - return !!(ptl->ptl_options & LNET_PTL_MATCH_WILDCARD); -} - -static inline void -lnet_ptl_setopt(struct lnet_portal *ptl, int opt) -{ - ptl->ptl_options |= opt; -} - -static inline void -lnet_ptl_unsetopt(struct lnet_portal *ptl, int opt) -{ - ptl->ptl_options &= ~opt; -} - -/* match-table functions */ -struct list_head *lnet_mt_match_head(struct lnet_match_table *mtable, - struct lnet_process_id id, __u64 mbits); -struct lnet_match_table *lnet_mt_of_attach(unsigned int index, - struct lnet_process_id id, - __u64 mbits, __u64 ignore_bits, - enum lnet_ins_pos pos); -int lnet_mt_match_md(struct lnet_match_table *mtable, - struct lnet_match_info *info, struct lnet_msg *msg); - -/* portals match/attach functions */ -void lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md, - struct list_head *matches, struct list_head *drops); -void lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md); -int lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg); - -/* initialized and finalize portals */ -int lnet_portals_create(void); -void lnet_portals_destroy(void); - -/* message functions */ -int lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, - lnet_nid_t fromnid, void *private, int rdma_req); -int lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg); -int lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg); - -void lnet_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, unsigned int offset, unsigned int mlen, - unsigned int rlen); -void lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, unsigned int offset, - unsigned int mlen, unsigned int rlen); - -struct lnet_msg *lnet_create_reply_msg(struct lnet_ni *ni, - struct lnet_msg *get_msg); -void lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *msg, - unsigned int len); - -void lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int rc); - -void lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, - unsigned int nob); -void lnet_drop_delayed_msg_list(struct list_head *head, char *reason); -void lnet_recv_delayed_msg_list(struct list_head *head); - -int lnet_msg_container_setup(struct lnet_msg_container *container, int cpt); -void lnet_msg_container_cleanup(struct lnet_msg_container *container); -void lnet_msg_containers_destroy(void); -int lnet_msg_containers_create(void); - -char *lnet_msgtyp2str(int type); -void lnet_print_hdr(struct lnet_hdr *hdr); -int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); - -/** \addtogroup lnet_fault_simulation @{ */ - -int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data); -int lnet_fault_init(void); -void lnet_fault_fini(void); - -bool lnet_drop_rule_match(struct lnet_hdr *hdr); - -int lnet_delay_rule_add(struct lnet_fault_attr *attr); -int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown); -int lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr, - struct lnet_fault_stat *stat); -void lnet_delay_rule_reset(void); -void lnet_delay_rule_check(void); -bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg); - -/** @} lnet_fault_simulation */ - -void lnet_counters_get(struct lnet_counters *counters); -void lnet_counters_reset(void); - -unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov); -int lnet_extract_iov(int dst_niov, struct kvec *dst, - int src_niov, const struct kvec *src, - unsigned int offset, unsigned int len); - -unsigned int lnet_kiov_nob(unsigned int niov, struct bio_vec *iov); -int lnet_extract_kiov(int dst_niov, struct bio_vec *dst, - int src_niov, const struct bio_vec *src, - unsigned int offset, unsigned int len); - -void lnet_copy_iov2iter(struct iov_iter *to, - unsigned int nsiov, const struct kvec *siov, - unsigned int soffset, unsigned int nob); -void lnet_copy_kiov2iter(struct iov_iter *to, - unsigned int nkiov, const struct bio_vec *kiov, - unsigned int kiovoffset, unsigned int nob); - -void lnet_me_unlink(struct lnet_me *me); - -void lnet_md_unlink(struct lnet_libmd *md); -void lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd); - -void lnet_register_lnd(struct lnet_lnd *lnd); -void lnet_unregister_lnd(struct lnet_lnd *lnd); - -int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port); -void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int port); -int lnet_count_acceptor_nis(void); -int lnet_acceptor_timeout(void); -int lnet_acceptor_port(void); - -int lnet_count_acceptor_nis(void); -int lnet_acceptor_port(void); - -int lnet_acceptor_start(void); -void lnet_acceptor_stop(void); - -int lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); -int lnet_ipif_enumerate(char ***names); -void lnet_ipif_free_enumeration(char **names, int n); -int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize); -int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize); -int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port); -int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout); -int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout); - -int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog); -int lnet_sock_accept(struct socket **newsockp, struct socket *sock); -int lnet_sock_connect(struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port); -void libcfs_sock_release(struct socket *sock); - -int lnet_peers_start_down(void); -int lnet_peer_buffer_credits(struct lnet_ni *ni); - -int lnet_router_checker_start(void); -void lnet_router_checker_stop(void); -void lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net); -void lnet_swap_pinginfo(struct lnet_ping_info *info); - -int lnet_parse_ip2nets(char **networksp, char *ip2nets); -int lnet_parse_routes(char *route_str, int *im_a_router); -int lnet_parse_networks(struct list_head *nilist, char *networks); -int lnet_net_unique(__u32 net, struct list_head *nilist); - -int lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt); -struct lnet_peer *lnet_find_peer_locked(struct lnet_peer_table *ptable, - lnet_nid_t nid); -void lnet_peer_tables_cleanup(struct lnet_ni *ni); -void lnet_peer_tables_destroy(void); -int lnet_peer_tables_create(void); -void lnet_debug_peer(lnet_nid_t nid); -int lnet_get_peer_info(__u32 peer_index, __u64 *nid, - char alivness[LNET_MAX_STR_LEN], - __u32 *cpt_iter, __u32 *refcount, - __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, - __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis, - __u32 *peer_tx_qnob); - -static inline void -lnet_peer_set_alive(struct lnet_peer *lp) -{ - lp->lp_last_query = jiffies; - lp->lp_last_alive = jiffies; - if (!lp->lp_alive) - lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); -} - -#endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h deleted file mode 100644 index cfe8ee424e94..000000000000 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ /dev/null @@ -1,666 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/include/lnet/lib-types.h - */ - -#ifndef __LNET_LIB_TYPES_H__ -#define __LNET_LIB_TYPES_H__ - -#include -#include -#include -#include - -#include -#include - -/* Max payload size */ -#define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD -#if (LNET_MAX_PAYLOAD < LNET_MTU) -# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" -#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) -# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" -#endif - -/* forward refs */ -struct lnet_libmd; - -struct lnet_msg { - struct list_head msg_activelist; - struct list_head msg_list; /* Q for credits/MD */ - - struct lnet_process_id msg_target; - /* where is it from, it's only for building event */ - lnet_nid_t msg_from; - __u32 msg_type; - - /* committed for sending */ - unsigned int msg_tx_committed:1; - /* CPT # this message committed for sending */ - unsigned int msg_tx_cpt:15; - /* committed for receiving */ - unsigned int msg_rx_committed:1; - /* CPT # this message committed for receiving */ - unsigned int msg_rx_cpt:15; - /* queued for tx credit */ - unsigned int msg_tx_delayed:1; - /* queued for RX buffer */ - unsigned int msg_rx_delayed:1; - /* ready for pending on RX delay list */ - unsigned int msg_rx_ready_delay:1; - - unsigned int msg_vmflush:1; /* VM trying to free memory */ - unsigned int msg_target_is_router:1; /* sending to a router */ - unsigned int msg_routing:1; /* being forwarded */ - unsigned int msg_ack:1; /* ack on finalize (PUT) */ - unsigned int msg_sending:1; /* outgoing message */ - unsigned int msg_receiving:1; /* being received */ - unsigned int msg_txcredit:1; /* taken an NI send credit */ - unsigned int msg_peertxcredit:1; /* taken a peer send credit */ - unsigned int msg_rtrcredit:1; /* taken a global router credit */ - unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ - unsigned int msg_onactivelist:1; /* on the activelist */ - unsigned int msg_rdma_get:1; - - struct lnet_peer *msg_txpeer; /* peer I'm sending to */ - struct lnet_peer *msg_rxpeer; /* peer I received from */ - - void *msg_private; - struct lnet_libmd *msg_md; - - unsigned int msg_len; - unsigned int msg_wanted; - unsigned int msg_offset; - unsigned int msg_niov; - struct kvec *msg_iov; - struct bio_vec *msg_kiov; - - struct lnet_event msg_ev; - struct lnet_hdr msg_hdr; -}; - -struct lnet_libhandle { - struct list_head lh_hash_chain; - __u64 lh_cookie; -}; - -#define lh_entry(ptr, type, member) \ - ((type *)((char *)(ptr) - (char *)(&((type *)0)->member))) - -struct lnet_eq { - struct list_head eq_list; - struct lnet_libhandle eq_lh; - unsigned long eq_enq_seq; - unsigned long eq_deq_seq; - unsigned int eq_size; - lnet_eq_handler_t eq_callback; - struct lnet_event *eq_events; - int **eq_refs; /* percpt refcount for EQ */ -}; - -struct lnet_me { - struct list_head me_list; - struct lnet_libhandle me_lh; - struct lnet_process_id me_match_id; - unsigned int me_portal; - unsigned int me_pos; /* hash offset in mt_hash */ - __u64 me_match_bits; - __u64 me_ignore_bits; - enum lnet_unlink me_unlink; - struct lnet_libmd *me_md; -}; - -struct lnet_libmd { - struct list_head md_list; - struct lnet_libhandle md_lh; - struct lnet_me *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - void *md_user_ptr; - struct lnet_eq *md_eq; - unsigned int md_niov; /* # frags */ - union { - struct kvec iov[LNET_MAX_IOV]; - struct bio_vec kiov[LNET_MAX_IOV]; - } md_iov; -}; - -#define LNET_MD_FLAG_ZOMBIE BIT(0) -#define LNET_MD_FLAG_AUTO_UNLINK BIT(1) -#define LNET_MD_FLAG_ABORTED BIT(2) - -struct lnet_test_peer { - /* info about peers we are trying to fail */ - struct list_head tp_list; /* ln_test_peers */ - lnet_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -}; - -#define LNET_COOKIE_TYPE_MD 1 -#define LNET_COOKIE_TYPE_ME 2 -#define LNET_COOKIE_TYPE_EQ 3 -#define LNET_COOKIE_TYPE_BITS 2 -#define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) - -struct lnet_ni; /* forward ref */ - -struct lnet_lnd { - /* fields managed by portals */ - struct list_head lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ - - /* fields initialised by the LND */ - __u32 lnd_type; - - int (*lnd_startup)(struct lnet_ni *ni); - void (*lnd_shutdown)(struct lnet_ni *ni); - int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); - - /* - * In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct iovec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). - * The LND may NOT overwrite these fragment descriptors. - * An 'offset' and may specify a byte offset within the set of - * fragments to start from - */ - - /* - * Start sending a preformatted message. 'private' is NULL for PUT and - * GET messages; otherwise this is a response to an incoming message - * and 'private' is the 'private' passed to lnet_parse(). Return - * non-zero for immediate failure, otherwise complete later with - * lnet_finalize() - */ - int (*lnd_send)(struct lnet_ni *ni, void *private, - struct lnet_msg *msg); - - /* - * Start receiving 'mlen' bytes of payload data, skipping the following - * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to - * lnet_parse(). Return non-zero for immediate failure, otherwise - * complete later with lnet_finalize(). This also gives back a receive - * credit if the LND does flow control. - */ - int (*lnd_recv)(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, struct iov_iter *to, unsigned int rlen); - - /* - * lnet_parse() has had to delay processing of this message - * (e.g. waiting for a forwarding buffer or send credits). Give the - * LND a chance to free urgently needed resources. If called, return 0 - * for success and do NOT give back a receive credit; that has to wait - * until lnd_recv() gets called. On failure return < 0 and - * release resources; lnd_recv() will not be called. - */ - int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, - struct lnet_msg *msg, void **new_privatep); - - /* notification of peer health */ - void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); - - /* query of peer aliveness */ - void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, - unsigned long *when); - - /* accept a new connection */ - int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); -}; - -struct lnet_tx_queue { - int tq_credits; /* # tx credits free */ - int tq_credits_min; /* lowest it's been */ - int tq_credits_max; /* total # tx credits */ - struct list_head tq_delayed; /* delayed TXs */ -}; - -struct lnet_ni { - spinlock_t ni_lock; - struct list_head ni_list; /* chain on ln_nis */ - struct list_head ni_cptlist; /* chain on ln_nis_cpt */ - int ni_maxtxcredits; /* # tx credits */ - /* # per-peer send credits */ - int ni_peertxcredits; - /* # per-peer router buffer credits */ - int ni_peerrtrcredits; - /* seconds to consider peer dead */ - int ni_peertimeout; - int ni_ncpts; /* number of CPTs */ - __u32 *ni_cpts; /* bond NI on some CPTs */ - lnet_nid_t ni_nid; /* interface's NID */ - void *ni_data; /* instance-specific data */ - struct lnet_lnd *ni_lnd; /* procedural interface */ - struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ - int **ni_refs; /* percpt reference count */ - time64_t ni_last_alive;/* when I was last alive */ - struct lnet_ni_status *ni_status; /* my health status */ - /* per NI LND tunables */ - struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables; - /* equivalent interfaces to use */ - char *ni_interfaces[LNET_MAX_INTERFACES]; - /* original net namespace */ - struct net *ni_net_ns; -}; - -#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL - -/* - * NB: value of these features equal to LNET_PROTO_PING_VERSION_x - * of old LNet, so there shouldn't be any compatibility issue - */ -#define LNET_PING_FEAT_INVAL (0) /* no feature */ -#define LNET_PING_FEAT_BASE BIT(0) /* just a ping */ -#define LNET_PING_FEAT_NI_STATUS BIT(1) /* return NI status */ -#define LNET_PING_FEAT_RTE_DISABLED BIT(2) /* Routing enabled */ - -#define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ - LNET_PING_FEAT_NI_STATUS) - -/* router checker data, per router */ -#define LNET_MAX_RTR_NIS 16 -#define LNET_PINGINFO_SIZE offsetof(struct lnet_ping_info, pi_ni[LNET_MAX_RTR_NIS]) -struct lnet_rc_data { - /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ - struct list_head rcd_list; - struct lnet_handle_md rcd_mdh; /* ping buffer MD */ - struct lnet_peer *rcd_gateway; /* reference to gateway */ - struct lnet_ping_info *rcd_pinginfo; /* ping buffer */ -}; - -struct lnet_peer { - struct list_head lp_hashlist; /* chain on peer hash */ - struct list_head lp_txq; /* messages blocking for - * tx credits - */ - struct list_head lp_rtrq; /* messages blocking for - * router credits - */ - struct list_head lp_rtr_list; /* chain on router list */ - int lp_txcredits; /* # tx credits available */ - int lp_mintxcredits; /* low water mark */ - int lp_rtrcredits; /* # router credits */ - int lp_minrtrcredits; /* low water mark */ - unsigned int lp_alive:1; /* alive/dead? */ - unsigned int lp_notify:1; /* notification outstanding? */ - unsigned int lp_notifylnd:1;/* outstanding notification - * for LND? - */ - unsigned int lp_notifying:1; /* some thread is handling - * notification - */ - unsigned int lp_ping_notsent;/* SEND event outstanding - * from ping - */ - int lp_alive_count; /* # times router went - * dead<->alive - */ - long lp_txqnob; /* ytes queued for sending */ - unsigned long lp_timestamp; /* time of last aliveness - * news - */ - unsigned long lp_ping_timestamp;/* time of last ping - * attempt - */ - unsigned long lp_ping_deadline; /* != 0 if ping reply - * expected - */ - unsigned long lp_last_alive; /* when I was last alive */ - unsigned long lp_last_query; /* when lp_ni was queried - * last time - */ - struct lnet_ni *lp_ni; /* interface peer is on */ - lnet_nid_t lp_nid; /* peer's NID */ - int lp_refcount; /* # refs */ - int lp_cpt; /* CPT this peer attached on */ - /* # refs from lnet_route::lr_gateway */ - int lp_rtr_refcount; - /* returned RC ping features */ - unsigned int lp_ping_feats; - struct list_head lp_routes; /* routers on this peer */ - struct lnet_rc_data *lp_rcd; /* router checker state */ -}; - -/* peer hash size */ -#define LNET_PEER_HASH_BITS 9 -#define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) - -/* peer hash table */ -struct lnet_peer_table { - int pt_version; /* /proc validity stamp */ - int pt_number; /* # peers extant */ - /* # zombies to go to deathrow (and not there yet) */ - int pt_zombies; - struct list_head pt_deathrow; /* zombie peers */ - struct list_head *pt_hash; /* NID->peer hash */ -}; - -/* - * peer aliveness is enabled only on routers for peers in a network where the - * lnet_ni::ni_peertimeout has been set to a positive value - */ -#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \ - (lp)->lp_ni->ni_peertimeout > 0) - -struct lnet_route { - struct list_head lr_list; /* chain on net */ - struct list_head lr_gwlist; /* chain on gateway */ - struct lnet_peer *lr_gateway; /* router node */ - __u32 lr_net; /* remote network number */ - int lr_seq; /* sequence for round-robin */ - unsigned int lr_downis; /* number of down NIs */ - __u32 lr_hops; /* how far I am */ - unsigned int lr_priority; /* route priority */ -}; - -#define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) -#define LNET_REMOTE_NETS_HASH_MAX (1U << 16) -#define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) - -struct lnet_remotenet { - struct list_head lrn_list; /* chain on - * ln_remote_nets_hash - */ - struct list_head lrn_routes; /* routes to me */ - __u32 lrn_net; /* my net number */ -}; - -/** lnet message has credit and can be submitted to lnd for send/receive */ -#define LNET_CREDIT_OK 0 -/** lnet message is waiting for credit */ -#define LNET_CREDIT_WAIT 1 - -struct lnet_rtrbufpool { - struct list_head rbp_bufs; /* my free buffer pool */ - struct list_head rbp_msgs; /* messages blocking - * for a buffer - */ - int rbp_npages; /* # pages in each buffer */ - /* requested number of buffers */ - int rbp_req_nbuffers; - /* # buffers actually allocated */ - int rbp_nbuffers; - int rbp_credits; /* # free buffers - * blocked messages - */ - int rbp_mincredits; /* low water mark */ -}; - -struct lnet_rtrbuf { - struct list_head rb_list; /* chain on rbp_bufs */ - struct lnet_rtrbufpool *rb_pool; /* owning pool */ - struct bio_vec rb_kiov[0]; /* the buffer space */ -}; - -#define LNET_PEER_HASHSIZE 503 /* prime! */ - -#define LNET_TINY_BUF_IDX 0 -#define LNET_SMALL_BUF_IDX 1 -#define LNET_LARGE_BUF_IDX 2 - -/* # different router buffer pools */ -#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) - -enum lnet_match_flags { - /* Didn't match anything */ - LNET_MATCHMD_NONE = BIT(0), - /* Matched OK */ - LNET_MATCHMD_OK = BIT(1), - /* Must be discarded */ - LNET_MATCHMD_DROP = BIT(2), - /* match and buffer is exhausted */ - LNET_MATCHMD_EXHAUSTED = BIT(3), - /* match or drop */ - LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), -}; - -/* Options for lnet_portal::ptl_options */ -#define LNET_PTL_LAZY BIT(0) -#define LNET_PTL_MATCH_UNIQUE BIT(1) /* unique match, for RDMA */ -#define LNET_PTL_MATCH_WILDCARD BIT(2) /* wildcard match, request portal */ - -/* parameter for matching operations (GET, PUT) */ -struct lnet_match_info { - __u64 mi_mbits; - struct lnet_process_id mi_id; - unsigned int mi_opc; - unsigned int mi_portal; - unsigned int mi_rlength; - unsigned int mi_roffset; -}; - -/* ME hash of RDMA portal */ -#define LNET_MT_HASH_BITS 8 -#define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) -#define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) -/* - * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, - * the last entry is reserved for MEs with ignore-bits - */ -#define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE -/* - * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which - * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the - * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] - */ -#define LNET_MT_BITS_U64 6 /* 2^6 bits */ -#define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) -#define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) - -/* portal match table */ -struct lnet_match_table { - /* reserved for upcoming patches, CPU partition ID */ - unsigned int mt_cpt; - unsigned int mt_portal; /* portal index */ - /* - * match table is set as "enabled" if there's non-exhausted MD - * attached on mt_mhash, it's only valid for wildcard portal - */ - unsigned int mt_enabled; - /* bitmap to flag whether MEs on mt_hash are exhausted or not */ - __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; - struct list_head *mt_mhash; /* matching hash */ -}; - -/* these are only useful for wildcard portal */ -/* Turn off message rotor for wildcard portals */ -#define LNET_PTL_ROTOR_OFF 0 -/* round-robin dispatch all PUT messages for wildcard portals */ -#define LNET_PTL_ROTOR_ON 1 -/* round-robin dispatch routed PUT message for wildcard portals */ -#define LNET_PTL_ROTOR_RR_RT 2 -/* dispatch routed PUT message by hashing source NID for wildcard portals */ -#define LNET_PTL_ROTOR_HASH_RT 3 - -struct lnet_portal { - spinlock_t ptl_lock; - unsigned int ptl_index; /* portal ID, reserved */ - /* flags on this portal: lazy, unique... */ - unsigned int ptl_options; - /* list of messages which are stealing buffer */ - struct list_head ptl_msg_stealing; - /* messages blocking for MD */ - struct list_head ptl_msg_delayed; - /* Match table for each CPT */ - struct lnet_match_table **ptl_mtables; - /* spread rotor of incoming "PUT" */ - unsigned int ptl_rotor; - /* # active entries for this portal */ - int ptl_mt_nmaps; - /* array of active entries' cpu-partition-id */ - int ptl_mt_maps[0]; -}; - -#define LNET_LH_HASH_BITS 12 -#define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) -#define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) - -/* resource container (ME, MD, EQ) */ -struct lnet_res_container { - unsigned int rec_type; /* container type */ - __u64 rec_lh_cookie; /* cookie generator */ - struct list_head rec_active; /* active resource list */ - struct list_head *rec_lh_hash; /* handle hash */ -}; - -/* message container */ -struct lnet_msg_container { - int msc_init; /* initialized or not */ - /* max # threads finalizing */ - int msc_nfinalizers; - /* msgs waiting to complete finalizing */ - struct list_head msc_finalizing; - struct list_head msc_active; /* active message list */ - /* threads doing finalization */ - void **msc_finalizers; -}; - -/* Router Checker states */ -#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ -#define LNET_RC_STATE_RUNNING 1 /* started up OK */ -#define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ - -struct lnet { - /* CPU partition table of LNet */ - struct cfs_cpt_table *ln_cpt_table; - /* number of CPTs in ln_cpt_table */ - unsigned int ln_cpt_number; - unsigned int ln_cpt_bits; - - /* protect LNet resources (ME/MD/EQ) */ - struct cfs_percpt_lock *ln_res_lock; - /* # portals */ - int ln_nportals; - /* the vector of portals */ - struct lnet_portal **ln_portals; - /* percpt ME containers */ - struct lnet_res_container **ln_me_containers; - /* percpt MD container */ - struct lnet_res_container **ln_md_containers; - - /* Event Queue container */ - struct lnet_res_container ln_eq_container; - wait_queue_head_t ln_eq_waitq; - spinlock_t ln_eq_wait_lock; - unsigned int ln_remote_nets_hbits; - - /* protect NI, peer table, credits, routers, rtrbuf... */ - struct cfs_percpt_lock *ln_net_lock; - /* percpt message containers for active/finalizing/freed message */ - struct lnet_msg_container **ln_msg_containers; - struct lnet_counters **ln_counters; - struct lnet_peer_table **ln_peer_tables; - /* failure simulation */ - struct list_head ln_test_peers; - struct list_head ln_drop_rules; - struct list_head ln_delay_rules; - - struct list_head ln_nis; /* LND instances */ - /* NIs bond on specific CPT(s) */ - struct list_head ln_nis_cpt; - /* dying LND instances */ - struct list_head ln_nis_zombie; - struct lnet_ni *ln_loni; /* the loopback NI */ - - /* remote networks with routes to them */ - struct list_head *ln_remote_nets_hash; - /* validity stamp */ - __u64 ln_remote_nets_version; - /* list of all known routers */ - struct list_head ln_routers; - /* validity stamp */ - __u64 ln_routers_version; - /* percpt router buffer pools */ - struct lnet_rtrbufpool **ln_rtrpools; - - struct lnet_handle_md ln_ping_target_md; - struct lnet_handle_eq ln_ping_target_eq; - struct lnet_ping_info *ln_ping_info; - - /* router checker startup/shutdown state */ - int ln_rc_state; - /* router checker's event queue */ - struct lnet_handle_eq ln_rc_eqh; - /* rcd still pending on net */ - struct list_head ln_rcd_deathrow; - /* rcd ready for free */ - struct list_head ln_rcd_zombie; - /* serialise startup/shutdown */ - struct completion ln_rc_signal; - - struct mutex ln_api_mutex; - struct mutex ln_lnd_mutex; - struct mutex ln_delay_mutex; - /* Have I called LNetNIInit myself? */ - int ln_niinit_self; - /* LNetNIInit/LNetNIFini counter */ - int ln_refcount; - /* shutdown in progress */ - int ln_shutdown; - - int ln_routing; /* am I a router? */ - lnet_pid_t ln_pid; /* requested pid */ - /* uniquely identifies this ni in this epoch */ - __u64 ln_interface_cookie; - /* registered LNDs */ - struct list_head ln_lnds; - - /* test protocol compatibility flags */ - int ln_testprotocompat; - - /* - * 0 - load the NIs from the mod params - * 1 - do not load the NIs from the mod params - * Reverse logic to ensure that other calls to LNetNIInit - * need no change - */ - bool ln_nis_from_mod_params; - - /* - * waitq for router checker. As long as there are no routes in - * the list, the router checker will sleep on this queue. when - * routes are added the thread will wake up - */ - wait_queue_head_t ln_rc_waitq; - -}; - -#endif diff --git a/drivers/staging/lustre/include/linux/lnet/socklnd.h b/drivers/staging/lustre/include/linux/lnet/socklnd.h deleted file mode 100644 index 6bd1bca190a3..000000000000 --- a/drivers/staging/lustre/include/linux/lnet/socklnd.h +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012 - 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/include/lnet/socklnd.h - */ -#ifndef __LNET_LNET_SOCKLND_H__ -#define __LNET_LNET_SOCKLND_H__ - -#include -#include - -struct ksock_hello_msg { - __u32 kshm_magic; /* magic number of socklnd message */ - __u32 kshm_version; /* version of socklnd message */ - lnet_nid_t kshm_src_nid; /* sender's nid */ - lnet_nid_t kshm_dst_nid; /* destination nid */ - lnet_pid_t kshm_src_pid; /* sender's pid */ - lnet_pid_t kshm_dst_pid; /* destination pid */ - __u64 kshm_src_incarnation; /* sender's incarnation */ - __u64 kshm_dst_incarnation; /* destination's incarnation */ - __u32 kshm_ctype; /* connection type */ - __u32 kshm_nips; /* # IP addrs */ - __u32 kshm_ips[0]; /* IP addrs */ -} WIRE_ATTR; - -struct ksock_lnet_msg { - struct lnet_hdr ksnm_hdr; /* lnet hdr */ - - /* - * ksnm_payload is removed because of winnt compiler's limitation: - * zero-sized array can only be placed at the tail of [nested] - * structure definitions. lnet payload will be stored just after - * the body of structure ksock_lnet_msg_t - */ -} WIRE_ATTR; - -struct ksock_msg { - __u32 ksm_type; /* type of socklnd message */ - __u32 ksm_csum; /* checksum if != 0 */ - __u64 ksm_zc_cookies[2]; /* Zero-Copy request/ACK cookie */ - union { - struct ksock_lnet_msg lnetmsg; /* lnet message, it's empty if - * it's NOOP - */ - } WIRE_ATTR ksm_u; -} WIRE_ATTR; - -#define KSOCK_MSG_NOOP 0xC0 /* ksm_u empty */ -#define KSOCK_MSG_LNET 0xC1 /* lnet msg */ - -/* - * We need to know this number to parse hello msg from ksocklnd in - * other LND (usocklnd, for example) - */ -#define KSOCK_PROTO_V2 2 -#define KSOCK_PROTO_V3 3 - -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h deleted file mode 100644 index c4d9472b374f..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_debug.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2014, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_debug.h - * - * Debug messages and assertions - * - */ - -#ifndef __UAPI_LIBCFS_DEBUG_H__ -#define __UAPI_LIBCFS_DEBUG_H__ - -/** - * Format for debug message headers - */ -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u16 ph_cpu_id; - __u16 ph_type; - /* time_t overflow in 2106 */ - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - -#define PH_FLAG_FIRST_RECORD 1 - -/* Debugging subsystems (32 bits, non-overlapping) */ -#define S_UNDEFINED 0x00000001 -#define S_MDC 0x00000002 -#define S_MDS 0x00000004 -#define S_OSC 0x00000008 -#define S_OST 0x00000010 -#define S_CLASS 0x00000020 -#define S_LOG 0x00000040 -#define S_LLITE 0x00000080 -#define S_RPC 0x00000100 -#define S_MGMT 0x00000200 -#define S_LNET 0x00000400 -#define S_LND 0x00000800 /* ALL LNDs */ -#define S_PINGER 0x00001000 -#define S_FILTER 0x00002000 -#define S_LIBCFS 0x00004000 -#define S_ECHO 0x00008000 -#define S_LDLM 0x00010000 -#define S_LOV 0x00020000 -#define S_LQUOTA 0x00040000 -#define S_OSD 0x00080000 -#define S_LFSCK 0x00100000 -#define S_SNAPSHOT 0x00200000 -/* unused */ -#define S_LMV 0x00800000 /* b_new_cmd */ -/* unused */ -#define S_SEC 0x02000000 /* upcall cache */ -#define S_GSS 0x04000000 /* b_new_cmd */ -/* unused */ -#define S_MGC 0x10000000 -#define S_MGS 0x20000000 -#define S_FID 0x40000000 /* b_new_cmd */ -#define S_FLD 0x80000000 /* b_new_cmd */ - -#define LIBCFS_DEBUG_SUBSYS_NAMES { \ - "undefined", "mdc", "mds", "osc", "ost", "class", "log", \ - "llite", "rpc", "mgmt", "lnet", "lnd", "pinger", "filter", \ - "libcfs", "echo", "ldlm", "lov", "lquota", "osd", "lfsck", \ - "snapshot", "", "lmv", "", "sec", "gss", "", "mgc", "mgs", \ - "fid", "fld", NULL } - -/* Debugging masks (32 bits, non-overlapping) */ -#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ -#define D_INODE 0x00000002 -#define D_SUPER 0x00000004 -#define D_EXT2 0x00000008 /* anything from ext2_debug */ -#define D_MALLOC 0x00000010 /* print malloc, free information */ -#define D_CACHE 0x00000020 /* cache-related items */ -#define D_INFO 0x00000040 /* general information */ -#define D_IOCTL 0x00000080 /* ioctl related information */ -#define D_NETERROR 0x00000100 /* network errors */ -#define D_NET 0x00000200 /* network communications */ -#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ -#define D_BUFFS 0x00000800 -#define D_OTHER 0x00001000 -#define D_DENTRY 0x00002000 -#define D_NETTRACE 0x00004000 -#define D_PAGE 0x00008000 /* bulk page handling */ -#define D_DLMTRACE 0x00010000 -#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA 0x00080000 /* recovery and failover */ -#define D_RPCTRACE 0x00100000 /* for distributed debugging */ -#define D_VFSTRACE 0x00200000 -#define D_READA 0x00400000 /* read-ahead */ -#define D_MMAP 0x00800000 -#define D_CONFIG 0x01000000 -#define D_CONSOLE 0x02000000 -#define D_QUOTA 0x04000000 -#define D_SEC 0x08000000 -#define D_LFSCK 0x10000000 /* For both OI scrub and LFSCK */ -#define D_HSM 0x20000000 -#define D_SNAPSHOT 0x40000000 /* snapshot */ -#define D_LAYOUT 0x80000000 - -#define LIBCFS_DEBUG_MASKS_NAMES { \ - "trace", "inode", "super", "ext2", "malloc", "cache", "info", \ - "ioctl", "neterror", "net", "warning", "buffs", "other", \ - "dentry", "nettrace", "page", "dlmtrace", "error", "emerg", \ - "ha", "rpctrace", "vfstrace", "reada", "mmap", "config", \ - "console", "quota", "sec", "lfsck", "hsm", "snapshot", "layout",\ - NULL } - -#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) - -#define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log" - -#endif /* __UAPI_LIBCFS_DEBUG_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h deleted file mode 100644 index cce6b58e3682..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/libcfs_ioctl.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/include/libcfs/libcfs_ioctl.h - * - * Low-level ioctl data structures. Kernel ioctl functions declared here, - * and user space functions are in libcfs/util/ioctl.h. - * - */ - -#ifndef __LIBCFS_IOCTL_H__ -#define __LIBCFS_IOCTL_H__ - -#include -#include - -#define LIBCFS_IOCTL_VERSION 0x0001000a -#define LIBCFS_IOCTL_VERSION2 0x0001000b - -struct libcfs_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -/** max size to copy from userspace */ -#define LIBCFS_IOC_DATA_MAX (128 * 1024) - -struct libcfs_ioctl_data { - struct libcfs_ioctl_hdr ioc_hdr; - - __u64 ioc_nid; - __u64 ioc_u64[1]; - - __u32 ioc_flags; - __u32 ioc_count; - __u32 ioc_net; - __u32 ioc_u32[7]; - - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - - __u32 ioc_plen1; /* buffers in userspace */ - void __user *ioc_pbuf1; - __u32 ioc_plen2; /* buffers in userspace */ - void __user *ioc_pbuf2; - - char ioc_bulk[0]; -}; - -struct libcfs_debug_ioctl_data { - struct libcfs_ioctl_hdr hdr; - unsigned int subs; - unsigned int debug; -}; - -/* 'f' ioctls are defined in lustre_ioctl.h and lustre_user.h except for: */ -#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) -#define IOCTL_LIBCFS_TYPE long - -#define IOC_LIBCFS_TYPE ('e') -#define IOC_LIBCFS_MIN_NR 30 -/* libcfs ioctls */ -/* IOC_LIBCFS_PANIC obsolete in 2.8.0, was _IOWR('e', 30, IOCTL_LIBCFS_TYPE) */ -#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) -/* IOC_LIBCFS_MEMHOG obsolete in 2.8.0, was _IOWR('e', 36, IOCTL_LIBCFS_TYPE) */ -/* lnet ioctls */ -#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) -/* IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) */ -#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) -/* IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) */ -#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LNET_FAULT _IOWR('e', 64, IOCTL_LIBCFS_TYPE) -/* lnd ioctls */ -#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) -/* ioctl 77 is free for use */ -#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) - -/* - * DLC Specific IOCTL numbers. - * In order to maintain backward compatibility with any possible external - * tools which might be accessing the IOCTL numbers, a new group of IOCTL - * number have been allocated. - */ -#define IOCTL_CONFIG_SIZE struct lnet_ioctl_config_data -#define IOC_LIBCFS_ADD_ROUTE _IOWR(IOC_LIBCFS_TYPE, 81, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_DEL_ROUTE _IOWR(IOC_LIBCFS_TYPE, 82, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_ROUTE _IOWR(IOC_LIBCFS_TYPE, 83, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_ADD_NET _IOWR(IOC_LIBCFS_TYPE, 84, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_DEL_NET _IOWR(IOC_LIBCFS_TYPE, 85, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_NET _IOWR(IOC_LIBCFS_TYPE, 86, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_CONFIG_RTR _IOWR(IOC_LIBCFS_TYPE, 87, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_ADD_BUF _IOWR(IOC_LIBCFS_TYPE, 88, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_BUF _IOWR(IOC_LIBCFS_TYPE, 89, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_PEER_INFO _IOWR(IOC_LIBCFS_TYPE, 90, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_GET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 91, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_MAX_NR 91 - -#endif /* __LIBCFS_IOCTL_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h deleted file mode 100644 index c1619f411d81..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-dlc.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * LGPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. - * - * LGPL HEADER END - * - */ -/* - * Copyright (c) 2014, Intel Corporation. - */ -/* - * Author: Amir Shehata - */ - -#ifndef LNET_DLC_H -#define LNET_DLC_H - -#include -#include - -#define MAX_NUM_SHOW_ENTRIES 32 -#define LNET_MAX_STR_LEN 128 -#define LNET_MAX_SHOW_NUM_CPT 128 -#define LNET_UNDEFINED_HOPS ((__u32)(-1)) - -struct lnet_ioctl_config_lnd_cmn_tunables { - __u32 lct_version; - __u32 lct_peer_timeout; - __u32 lct_peer_tx_credits; - __u32 lct_peer_rtr_credits; - __u32 lct_max_tx_credits; -}; - -struct lnet_ioctl_config_o2iblnd_tunables { - __u32 lnd_version; - __u32 lnd_peercredits_hiw; - __u32 lnd_map_on_demand; - __u32 lnd_concurrent_sends; - __u32 lnd_fmr_pool_size; - __u32 lnd_fmr_flush_trigger; - __u32 lnd_fmr_cache; - __u16 lnd_conns_per_peer; - __u16 pad; -}; - -struct lnet_ioctl_config_lnd_tunables { - struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn; - union { - struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib; - } lt_tun_u; -}; - -struct lnet_ioctl_net_config { - char ni_interfaces[LNET_MAX_INTERFACES][LNET_MAX_STR_LEN]; - __u32 ni_status; - __u32 ni_cpts[LNET_MAX_SHOW_NUM_CPT]; - char cfg_bulk[0]; -}; - -#define LNET_TINY_BUF_IDX 0 -#define LNET_SMALL_BUF_IDX 1 -#define LNET_LARGE_BUF_IDX 2 - -/* # different router buffer pools */ -#define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) - -struct lnet_ioctl_pool_cfg { - struct { - __u32 pl_npages; - __u32 pl_nbuffers; - __u32 pl_credits; - __u32 pl_mincredits; - } pl_pools[LNET_NRBPOOLS]; - __u32 pl_routing; -}; - -struct lnet_ioctl_config_data { - struct libcfs_ioctl_hdr cfg_hdr; - - __u32 cfg_net; - __u32 cfg_count; - __u64 cfg_nid; - __u32 cfg_ncpts; - - union { - struct { - __u32 rtr_hop; - __u32 rtr_priority; - __u32 rtr_flags; - } cfg_route; - struct { - char net_intf[LNET_MAX_STR_LEN]; - __s32 net_peer_timeout; - __s32 net_peer_tx_credits; - __s32 net_peer_rtr_credits; - __s32 net_max_tx_credits; - __u32 net_cksum_algo; - __u32 net_interface_count; - } cfg_net; - struct { - __u32 buf_enable; - __s32 buf_tiny; - __s32 buf_small; - __s32 buf_large; - } cfg_buffers; - } cfg_config_u; - - char cfg_bulk[0]; -}; - -struct lnet_ioctl_peer { - struct libcfs_ioctl_hdr pr_hdr; - __u32 pr_count; - __u32 pr_pad; - __u64 pr_nid; - - union { - struct { - char cr_aliveness[LNET_MAX_STR_LEN]; - __u32 cr_refcount; - __u32 cr_ni_peer_tx_credits; - __u32 cr_peer_tx_credits; - __u32 cr_peer_rtr_credits; - __u32 cr_peer_min_rtr_credits; - __u32 cr_peer_tx_qnob; - __u32 cr_ncpt; - } pr_peer_credits; - } pr_lnd_u; -}; - -struct lnet_ioctl_lnet_stats { - struct libcfs_ioctl_hdr st_hdr; - struct lnet_counters st_cntrs; -}; - -#endif /* LNET_DLC_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h deleted file mode 100644 index 1be9b7aa7326..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/lnet-types.h +++ /dev/null @@ -1,669 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012 - 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - */ - -#ifndef __LNET_TYPES_H__ -#define __LNET_TYPES_H__ - -#include -#include - -/** \addtogroup lnet - * @{ - */ - -#define LNET_VERSION "0.6.0" - -/** \addtogroup lnet_addr - * @{ - */ - -/** Portal reserved for LNet's own use. - * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments. - */ -#define LNET_RESERVED_PORTAL 0 - -/** - * Address of an end-point in an LNet network. - * - * A node can have multiple end-points and hence multiple addresses. - * An LNet network can be a simple network (e.g. tcp0) or a network of - * LNet networks connected by LNet routers. Therefore an end-point address - * has two parts: network ID, and address within a network. - * - * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID. - */ -typedef __u64 lnet_nid_t; -/** - * ID of a process in a node. Shortened as PID to distinguish from - * lnet_process_id, the global process ID. - */ -typedef __u32 lnet_pid_t; - -/** wildcard NID that matches any end-point address */ -#define LNET_NID_ANY ((lnet_nid_t)(-1)) -/** wildcard PID that matches any lnet_pid_t */ -#define LNET_PID_ANY ((lnet_pid_t)(-1)) - -#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ -#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ -#define LNET_PID_LUSTRE 12345 - -#define LNET_TIME_FOREVER (-1) - -/* how an LNET NID encodes net:address */ -/** extract the address part of an lnet_nid_t */ - -static inline __u32 LNET_NIDADDR(lnet_nid_t nid) -{ - return nid & 0xffffffff; -} - -static inline __u32 LNET_NIDNET(lnet_nid_t nid) -{ - return (nid >> 32) & 0xffffffff; -} - -static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr) -{ - return (((__u64)net) << 32) | addr; -} - -static inline __u32 LNET_NETNUM(__u32 net) -{ - return net & 0xffff; -} - -static inline __u32 LNET_NETTYP(__u32 net) -{ - return (net >> 16) & 0xffff; -} - -static inline __u32 LNET_MKNET(__u32 type, __u32 num) -{ - return (type << 16) | num; -} - -#define WIRE_ATTR __packed - -/* Packed version of lnet_process_id to transfer via network */ -struct lnet_process_id_packed { - /* node id / process id */ - lnet_nid_t nid; - lnet_pid_t pid; -} WIRE_ATTR; - -/* - * The wire handle's interface cookie only matches one network interface in - * one epoch (i.e. new cookie when the interface restarts or the node - * reboots). The object cookie only matches one object on that interface - * during that object's lifetime (i.e. no cookie re-use). - */ -struct lnet_handle_wire { - __u64 wh_interface_cookie; - __u64 wh_object_cookie; -} WIRE_ATTR; - -enum lnet_msg_type { - LNET_MSG_ACK = 0, - LNET_MSG_PUT, - LNET_MSG_GET, - LNET_MSG_REPLY, - LNET_MSG_HELLO, -}; - -/* - * The variant fields of the portals message header are aligned on an 8 - * byte boundary in the message header. Note that all types used in these - * wire structs MUST be fixed size and the smaller types are placed at the - * end. - */ -struct lnet_ack { - struct lnet_handle_wire dst_wmd; - __u64 match_bits; - __u32 mlength; -} WIRE_ATTR; - -struct lnet_put { - struct lnet_handle_wire ack_wmd; - __u64 match_bits; - __u64 hdr_data; - __u32 ptl_index; - __u32 offset; -} WIRE_ATTR; - -struct lnet_get { - struct lnet_handle_wire return_wmd; - __u64 match_bits; - __u32 ptl_index; - __u32 src_offset; - __u32 sink_length; -} WIRE_ATTR; - -struct lnet_reply { - struct lnet_handle_wire dst_wmd; -} WIRE_ATTR; - -struct lnet_hello { - __u64 incarnation; - __u32 type; -} WIRE_ATTR; - -struct lnet_hdr { - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - lnet_pid_t dest_pid; - lnet_pid_t src_pid; - __u32 type; /* enum lnet_msg_type */ - __u32 payload_length; /* payload data to follow */ - /*<------__u64 aligned------->*/ - union { - struct lnet_ack ack; - struct lnet_put put; - struct lnet_get get; - struct lnet_reply reply; - struct lnet_hello hello; - } msg; -} WIRE_ATTR; - -/* - * A HELLO message contains a magic number and protocol version - * code in the header's dest_nid, the peer's NID in the src_nid, and - * LNET_MSG_HELLO in the type field. All other common fields are zero - * (including payload_size; i.e. no payload). - * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID. These LNDs should - * exchange HELLO messages when a connection is first established. Individual - * LNDs can put whatever else they fancy in struct lnet_hdr::msg. - */ -struct lnet_magicversion { - __u32 magic; /* LNET_PROTO_TCP_MAGIC */ - __u16 version_major; /* increment on incompatible change */ - __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR; - -/* PROTO MAGIC for LNDs */ -#define LNET_PROTO_IB_MAGIC 0x0be91b91 -#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */ -#define LNET_PROTO_TCP_MAGIC 0xeebc0ded -#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 -#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ - -/* Placeholder for a future "unified" protocol across all LNDs */ -/* - * Current LNDs that receive a request with this magic will respond with a - * "stub" reply using their current protocol - */ -#define LNET_PROTO_MAGIC 0x45726963 /* ! */ - -#define LNET_PROTO_TCP_VERSION_MAJOR 1 -#define LNET_PROTO_TCP_VERSION_MINOR 0 - -/* Acceptor connection request */ -struct lnet_acceptor_connreq { - __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ - __u32 acr_version; /* protocol version */ - __u64 acr_nid; /* target NID */ -} WIRE_ATTR; - -#define LNET_PROTO_ACCEPTOR_VERSION 1 - -struct lnet_ni_status { - lnet_nid_t ns_nid; - __u32 ns_status; - __u32 ns_unused; -} WIRE_ATTR; - -struct lnet_ping_info { - __u32 pi_magic; - __u32 pi_features; - lnet_pid_t pi_pid; - __u32 pi_nnis; - struct lnet_ni_status pi_ni[0]; -} WIRE_ATTR; - -struct lnet_counters { - __u32 msgs_alloc; - __u32 msgs_max; - __u32 errors; - __u32 send_count; - __u32 recv_count; - __u32 route_count; - __u32 drop_count; - __u64 send_length; - __u64 recv_length; - __u64 route_length; - __u64 drop_length; -} WIRE_ATTR; - -#define LNET_NI_STATUS_UP 0x15aac0de -#define LNET_NI_STATUS_DOWN 0xdeadface -#define LNET_NI_STATUS_INVALID 0x00000000 - -#define LNET_MAX_INTERFACES 16 - -/** - * Objects maintained by the LNet are accessed through handles. Handle types - * have names of the form lnet_handle_xx, where xx is one of the two letter - * object type codes ('eq' for event queue, 'md' for memory descriptor, and - * 'me' for match entry). Each type of object is given a unique handle type - * to enhance type checking. - */ -#define LNET_WIRE_HANDLE_COOKIE_NONE (-1) - -struct lnet_handle_eq { - u64 cookie; -}; - -/** - * Invalidate eq handle @h. - */ -static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h) -{ - h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; -} - -/** - * Check whether eq handle @h is invalid. - * - * @return 1 if handle is invalid, 0 if valid. - */ -static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h) -{ - return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); -} - -struct lnet_handle_md { - u64 cookie; -}; - -/** - * Invalidate md handle @h. - */ -static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h) -{ - h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; -} - -/** - * Check whether eq handle @h is invalid. - * - * @return 1 if handle is invalid, 0 if valid. - */ -static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h) -{ - return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); -} - -struct lnet_handle_me { - u64 cookie; -}; - -/** - * Global process ID. - */ -struct lnet_process_id { - /** node id */ - lnet_nid_t nid; - /** process id */ - lnet_pid_t pid; -}; -/** @} lnet_addr */ - -/** \addtogroup lnet_me - * @{ - */ - -/** - * Specifies whether the match entry or memory descriptor should be unlinked - * automatically (LNET_UNLINK) or not (LNET_RETAIN). - */ -enum lnet_unlink { - LNET_RETAIN = 0, - LNET_UNLINK -}; - -/** - * Values of the type lnet_ins_pos are used to control where a new match - * entry is inserted. The value LNET_INS_BEFORE is used to insert the new - * entry before the current entry or before the head of the list. The value - * LNET_INS_AFTER is used to insert the new entry after the current entry - * or after the last item in the list. - */ -enum lnet_ins_pos { - /** insert ME before current position or head of the list */ - LNET_INS_BEFORE, - /** insert ME after current position or tail of the list */ - LNET_INS_AFTER, - /** attach ME at tail of local CPU partition ME list */ - LNET_INS_LOCAL -}; - -/** @} lnet_me */ - -/** \addtogroup lnet_md - * @{ - */ - -/** - * Defines the visible parts of a memory descriptor. Values of this type - * are used to initialize memory descriptors. - */ -struct lnet_md { - /** - * Specify the memory region associated with the memory descriptor. - * If the options field has: - * - LNET_MD_KIOV bit set: The start field points to the starting - * address of an array of struct bio_vec and the length field specifies - * the number of entries in the array. The length can't be bigger - * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based - * fragments that are not necessarily mapped in virtual memory. - * - LNET_MD_IOVEC bit set: The start field points to the starting - * address of an array of struct iovec and the length field specifies - * the number of entries in the array. The length can't be bigger - * than LNET_MAX_IOV. The struct iovec is used to describe fragments - * that have virtual addresses. - * - Otherwise: The memory region is contiguous. The start field - * specifies the starting address for the memory region and the - * length field specifies its length. - * - * When the memory region is fragmented, all fragments but the first - * one must start on page boundary, and all but the last must end on - * page boundary. - */ - void *start; - unsigned int length; - /** - * Specifies the maximum number of operations that can be performed - * on the memory descriptor. An operation is any action that could - * possibly generate an event. In the usual case, the threshold value - * is decremented for each operation on the MD. When the threshold - * drops to zero, the MD becomes inactive and does not respond to - * operations. A threshold value of LNET_MD_THRESH_INF indicates that - * there is no bound on the number of operations that may be applied - * to a MD. - */ - int threshold; - /** - * Specifies the largest incoming request that the memory descriptor - * should respond to. When the unused portion of a MD (length - - * local offset) falls below this value, the MD becomes inactive and - * does not respond to further operations. This value is only used - * if the LNET_MD_MAX_SIZE option is set. - */ - int max_size; - /** - * Specifies the behavior of the memory descriptor. A bitwise OR - * of the following values can be used: - * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD. - * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD. - * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory - * region is provided by the incoming request. By default, the - * offset is maintained locally. When maintained locally, the - * offset is incremented by the length of the request so that - * the next operation (PUT or GET) will access the next part of - * the memory region. Note that only one offset variable exists - * per memory descriptor. If both PUT and GET operations are - * performed on a memory descriptor, the offset is updated each time. - * - LNET_MD_TRUNCATE: The length provided in the incoming request can - * be reduced to match the memory available in the region (determined - * by subtracting the offset from the length of the memory region). - * By default, if the length in the incoming operation is greater - * than the amount of memory available, the operation is rejected. - * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for - * incoming PUT operations, even if requested. By default, - * acknowledgments are sent for PUT operations that request an - * acknowledgment. Acknowledgments are never sent for GET operations. - * The data sent in the REPLY serves as an implicit acknowledgment. - * - LNET_MD_KIOV: The start and length fields specify an array of - * struct bio_vec. - * - LNET_MD_IOVEC: The start and length fields specify an array of - * struct iovec. - * - LNET_MD_MAX_SIZE: The max_size field is valid. - * - * Note: - * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather - * capability for memory descriptors. They can't be both set. - * - When LNET_MD_MAX_SIZE is set, the total length of the memory - * region (i.e. sum of all fragment lengths) must not be less than - * \a max_size. - */ - unsigned int options; - /** - * A user-specified value that is associated with the memory - * descriptor. The value does not need to be a pointer, but must fit - * in the space used by a pointer. This value is recorded in events - * associated with operations on this MD. - */ - void *user_ptr; - /** - * A handle for the event queue used to log the operations performed on - * the memory region. If this argument is a NULL handle (i.e. nullified - * by LNetInvalidateHandle()), operations performed on this memory - * descriptor are not logged. - */ - struct lnet_handle_eq eq_handle; -}; - -/* - * Max Transfer Unit (minimum supported everywhere). - * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) - * these limits are system wide and not interface-local. - */ -#define LNET_MTU_BITS 20 -#define LNET_MTU (1 << LNET_MTU_BITS) - -/** limit on the number of fragments in discontiguous MDs */ -#define LNET_MAX_IOV 256 - -/** - * Options for the MD structure. See lnet_md::options. - */ -#define LNET_MD_OP_PUT (1 << 0) -/** See lnet_md::options. */ -#define LNET_MD_OP_GET (1 << 1) -/** See lnet_md::options. */ -#define LNET_MD_MANAGE_REMOTE (1 << 2) -/* unused (1 << 3) */ -/** See lnet_md::options. */ -#define LNET_MD_TRUNCATE (1 << 4) -/** See lnet_md::options. */ -#define LNET_MD_ACK_DISABLE (1 << 5) -/** See lnet_md::options. */ -#define LNET_MD_IOVEC (1 << 6) -/** See lnet_md::options. */ -#define LNET_MD_MAX_SIZE (1 << 7) -/** See lnet_md::options. */ -#define LNET_MD_KIOV (1 << 8) - -/* For compatibility with Cray Portals */ -#define LNET_MD_PHYS 0 - -/** Infinite threshold on MD operations. See lnet_md::threshold */ -#define LNET_MD_THRESH_INF (-1) - -/** @} lnet_md */ - -/** \addtogroup lnet_eq - * @{ - */ - -/** - * Six types of events can be logged in an event queue. - */ -enum lnet_event_kind { - /** An incoming GET operation has completed on the MD. */ - LNET_EVENT_GET = 1, - /** - * An incoming PUT operation has completed on the MD. The - * underlying layers will not alter the memory (on behalf of this - * operation) once this event has been logged. - */ - LNET_EVENT_PUT, - /** - * A REPLY operation has completed. This event is logged after the - * data (if any) from the REPLY has been written into the MD. - */ - LNET_EVENT_REPLY, - /** An acknowledgment has been received. */ - LNET_EVENT_ACK, - /** - * An outgoing send (PUT or GET) operation has completed. This event - * is logged after the entire buffer has been sent and it is safe for - * the caller to reuse the buffer. - * - * Note: - * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can - * happen even when the message has not yet been put out on wire. - * - It's unsafe to assume that in an outgoing GET operation - * the LNET_EVENT_SEND event would happen before the - * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and - * LNET_EVENT_ACK events in an outgoing PUT operation. - */ - LNET_EVENT_SEND, - /** - * A MD has been unlinked. Note that LNetMDUnlink() does not - * necessarily trigger an LNET_EVENT_UNLINK event. - * \see LNetMDUnlink - */ - LNET_EVENT_UNLINK, -}; - -#define LNET_SEQ_GT(a, b) (((signed long)((a) - (b))) > 0) - -/** - * Information about an event on a MD. - */ -struct lnet_event { - /** The identifier (nid, pid) of the target. */ - struct lnet_process_id target; - /** The identifier (nid, pid) of the initiator. */ - struct lnet_process_id initiator; - /** - * The NID of the immediate sender. If the request has been forwarded - * by routers, this is the NID of the last hop; otherwise it's the - * same as the initiator. - */ - lnet_nid_t sender; - /** Indicates the type of the event. */ - enum lnet_event_kind type; - /** The portal table index specified in the request */ - unsigned int pt_index; - /** A copy of the match bits specified in the request. */ - __u64 match_bits; - /** The length (in bytes) specified in the request. */ - unsigned int rlength; - /** - * The length (in bytes) of the data that was manipulated by the - * operation. For truncated operations, the manipulated length will be - * the number of bytes specified by the MD (possibly with an offset, - * see lnet_md). For all other operations, the manipulated length - * will be the length of the requested operation, i.e. rlength. - */ - unsigned int mlength; - /** - * The handle to the MD associated with the event. The handle may be - * invalid if the MD has been unlinked. - */ - struct lnet_handle_md md_handle; - /** - * A snapshot of the state of the MD immediately after the event has - * been processed. In particular, the threshold field in md will - * reflect the value of the threshold after the operation occurred. - */ - struct lnet_md md; - /** - * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT. - * \see LNetPut - */ - __u64 hdr_data; - /** - * Indicates the completion status of the operation. It's 0 for - * successful operations, otherwise it's an error code. - */ - int status; - /** - * Indicates whether the MD has been unlinked. Note that: - * - An event with unlinked set is the last event on the MD. - * - This field is also set for an explicit LNET_EVENT_UNLINK event. - * \see LNetMDUnlink - */ - int unlinked; - /** - * The displacement (in bytes) into the memory region that the - * operation used. The offset can be determined by the operation for - * a remote managed MD or by the local MD. - * \see lnet_md::options - */ - unsigned int offset; - /** - * The sequence number for this event. Sequence numbers are unique - * to each event. - */ - volatile unsigned long sequence; -}; - -/** - * Event queue handler function type. - * - * The EQ handler runs for each event that is deposited into the EQ. The - * handler is supplied with a pointer to the event that triggered the - * handler invocation. - * - * The handler must not block, must be reentrant, and must not call any LNet - * API functions. It should return as quickly as possible. - */ -typedef void (*lnet_eq_handler_t)(struct lnet_event *event); -#define LNET_EQ_HANDLER_NONE NULL -/** @} lnet_eq */ - -/** \addtogroup lnet_data - * @{ - */ - -/** - * Specify whether an acknowledgment should be sent by target when the PUT - * operation completes (i.e., when the data has been written to a MD of the - * target process). - * - * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which - * acknowledgments can be disabled for a MD. - */ -enum lnet_ack_req { - /** Request an acknowledgment */ - LNET_ACK_REQ, - /** Request that no acknowledgment should be generated. */ - LNET_NOACK_REQ -}; -/** @} lnet_data */ - -/** @} lnet */ -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h deleted file mode 100644 index cccb32dd28f2..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetctl.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * header for lnet ioctl - */ -#ifndef _LNETCTL_H_ -#define _LNETCTL_H_ - -#include - -/** \addtogroup lnet_fault_simulation - * @{ - */ - -enum { - LNET_CTL_DROP_ADD, - LNET_CTL_DROP_DEL, - LNET_CTL_DROP_RESET, - LNET_CTL_DROP_LIST, - LNET_CTL_DELAY_ADD, - LNET_CTL_DELAY_DEL, - LNET_CTL_DELAY_RESET, - LNET_CTL_DELAY_LIST, -}; - -#define LNET_ACK_BIT (1 << 0) -#define LNET_PUT_BIT (1 << 1) -#define LNET_GET_BIT (1 << 2) -#define LNET_REPLY_BIT (1 << 3) - -/** ioctl parameter for LNet fault simulation */ -struct lnet_fault_attr { - /** - * source NID of drop rule - * LNET_NID_ANY is wildcard for all sources - * 255.255.255.255@net is wildcard for all addresses from @net - */ - lnet_nid_t fa_src; - /** destination NID of drop rule, see \a dr_src for details */ - lnet_nid_t fa_dst; - /** - * Portal mask to drop, -1 means all portals, for example: - * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) | - * (1 << LDLM_CANCEL_REQUEST_PORTAL) - * - * If it is non-zero then only PUT and GET will be filtered, otherwise - * there is no portal filter, all matched messages will be checked. - */ - __u64 fa_ptl_mask; - /** - * message types to drop, for example: - * dra_type = LNET_DROP_ACK_BIT | LNET_DROP_PUT_BIT - * - * If it is non-zero then only specified message types are filtered, - * otherwise all message types will be checked. - */ - __u32 fa_msg_mask; - union { - /** message drop simulation */ - struct { - /** drop rate of this rule */ - __u32 da_rate; - /** - * time interval of message drop, it is exclusive - * with da_rate - */ - __u32 da_interval; - } drop; - /** message latency simulation */ - struct { - __u32 la_rate; - /** - * time interval of message delay, it is exclusive - * with la_rate - */ - __u32 la_interval; - /** latency to delay */ - __u32 la_latency; - } delay; - __u64 space[8]; - } u; -}; - -/** fault simluation stats */ -struct lnet_fault_stat { - /** total # matched messages */ - __u64 fs_count; - /** # dropped LNET_MSG_PUT by this rule */ - __u64 fs_put; - /** # dropped LNET_MSG_ACK by this rule */ - __u64 fs_ack; - /** # dropped LNET_MSG_GET by this rule */ - __u64 fs_get; - /** # dropped LNET_MSG_REPLY by this rule */ - __u64 fs_reply; - union { - struct { - /** total # dropped messages */ - __u64 ds_dropped; - } drop; - struct { - /** total # delayed messages */ - __u64 ls_delayed; - } delay; - __u64 space[8]; - } u; -}; - -/** @} lnet_fault_simulation */ - -#define LNET_DEV_ID 0 -#define LNET_DEV_PATH "/dev/lnet" - -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h b/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h deleted file mode 100644 index a4f9ff01d458..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/lnetst.h +++ /dev/null @@ -1,556 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011 - 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/include/lnet/lnetst.h - * - * Author: Liang Zhen - */ - -#ifndef __LNET_ST_H__ -#define __LNET_ST_H__ - -#include - -#define LST_FEAT_NONE (0) -#define LST_FEAT_BULK_LEN (1 << 0) /* enable variable page size */ - -#define LST_FEATS_EMPTY (LST_FEAT_NONE) -#define LST_FEATS_MASK (LST_FEAT_NONE | LST_FEAT_BULK_LEN) - -#define LST_NAME_SIZE 32 /* max name buffer length */ - -#define LSTIO_DEBUG 0xC00 /* debug */ -#define LSTIO_SESSION_NEW 0xC01 /* create session */ -#define LSTIO_SESSION_END 0xC02 /* end session */ -#define LSTIO_SESSION_INFO 0xC03 /* query session */ -#define LSTIO_GROUP_ADD 0xC10 /* add group */ -#define LSTIO_GROUP_LIST 0xC11 /* list all groups in session */ -#define LSTIO_GROUP_INFO 0xC12 /* query default information of - * specified group - */ -#define LSTIO_GROUP_DEL 0xC13 /* delete group */ -#define LSTIO_NODES_ADD 0xC14 /* add nodes to specified group */ -#define LSTIO_GROUP_UPDATE 0xC15 /* update group */ -#define LSTIO_BATCH_ADD 0xC20 /* add batch */ -#define LSTIO_BATCH_START 0xC21 /* start batch */ -#define LSTIO_BATCH_STOP 0xC22 /* stop batch */ -#define LSTIO_BATCH_DEL 0xC23 /* delete batch */ -#define LSTIO_BATCH_LIST 0xC24 /* show all batches in the session */ -#define LSTIO_BATCH_INFO 0xC25 /* show defail of specified batch */ -#define LSTIO_TEST_ADD 0xC26 /* add test (to batch) */ -#define LSTIO_BATCH_QUERY 0xC27 /* query batch status */ -#define LSTIO_STAT_QUERY 0xC30 /* get stats */ - -struct lst_sid { - lnet_nid_t ses_nid; /* nid of console node */ - __u64 ses_stamp; /* time stamp */ -}; /*** session id */ - -extern struct lst_sid LST_INVALID_SID; - -struct lst_bid { - __u64 bat_id; /* unique id in session */ -}; /*** batch id (group of tests) */ - -/* Status of test node */ -#define LST_NODE_ACTIVE 0x1 /* node in this session */ -#define LST_NODE_BUSY 0x2 /* node is taken by other session */ -#define LST_NODE_DOWN 0x4 /* node is down */ -#define LST_NODE_UNKNOWN 0x8 /* node not in session */ - -struct lstcon_node_ent { - struct lnet_process_id nde_id; /* id of node */ - int nde_state; /* state of node */ -}; /*** node entry, for list_group command */ - -struct lstcon_ndlist_ent { - int nle_nnode; /* # of nodes */ - int nle_nactive; /* # of active nodes */ - int nle_nbusy; /* # of busy nodes */ - int nle_ndown; /* # of down nodes */ - int nle_nunknown; /* # of unknown nodes */ -}; /*** node_list entry, for list_batch command */ - -struct lstcon_test_ent { - int tse_type; /* test type */ - int tse_loop; /* loop count */ - int tse_concur; /* concurrency of test */ -}; /* test summary entry, for - * list_batch command - */ - -struct lstcon_batch_ent { - int bae_state; /* batch status */ - int bae_timeout; /* batch timeout */ - int bae_ntest; /* # of tests in the batch */ -}; /* batch summary entry, for - * list_batch command - */ - -struct lstcon_test_batch_ent { - struct lstcon_ndlist_ent tbe_cli_nle; /* client (group) node_list - * entry - */ - struct lstcon_ndlist_ent tbe_srv_nle; /* server (group) node_list - * entry - */ - union { - struct lstcon_test_ent tbe_test; /* test entry */ - struct lstcon_batch_ent tbe_batch;/* batch entry */ - } u; -}; /* test/batch verbose information entry, - * for list_batch command - */ - -struct lstcon_rpc_ent { - struct list_head rpe_link; /* link chain */ - struct lnet_process_id rpe_peer; /* peer's id */ - struct timeval rpe_stamp; /* time stamp of RPC */ - int rpe_state; /* peer's state */ - int rpe_rpc_errno; /* RPC errno */ - - struct lst_sid rpe_sid; /* peer's session id */ - int rpe_fwk_errno; /* framework errno */ - int rpe_priv[4]; /* private data */ - char rpe_payload[0]; /* private reply payload */ -}; - -struct lstcon_trans_stat { - int trs_rpc_stat[4]; /* RPCs stat (0: total 1: failed - * 2: finished - * 4: reserved - */ - int trs_rpc_errno; /* RPC errno */ - int trs_fwk_stat[8]; /* framework stat */ - int trs_fwk_errno; /* errno of the first remote error */ - void *trs_fwk_private; /* private framework stat */ -}; - -static inline int -lstcon_rpc_stat_total(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[0] : stat->trs_rpc_stat[0]; -} - -static inline int -lstcon_rpc_stat_success(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[1] : stat->trs_rpc_stat[1]; -} - -static inline int -lstcon_rpc_stat_failure(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_rpc_stat[2] : stat->trs_rpc_stat[2]; -} - -static inline int -lstcon_sesop_stat_success(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_sesop_stat_failure(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_sesqry_stat_active(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_sesqry_stat_busy(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_sesqry_stat_unknown(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; -} - -static inline int -lstcon_tsbop_stat_success(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_tsbop_stat_failure(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_tsbqry_stat_idle(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_tsbqry_stat_run(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -static inline int -lstcon_tsbqry_stat_failure(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[2] : stat->trs_fwk_stat[2]; -} - -static inline int -lstcon_statqry_stat_success(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[0] : stat->trs_fwk_stat[0]; -} - -static inline int -lstcon_statqry_stat_failure(struct lstcon_trans_stat *stat, int inc) -{ - return inc ? ++stat->trs_fwk_stat[1] : stat->trs_fwk_stat[1]; -} - -/* create a session */ -struct lstio_session_new_args { - int lstio_ses_key; /* IN: local key */ - int lstio_ses_timeout; /* IN: session timeout */ - int lstio_ses_force; /* IN: force create ? */ - /** IN: session features */ - unsigned int lstio_ses_feats; - struct lst_sid __user *lstio_ses_idp; /* OUT: session id */ - int lstio_ses_nmlen; /* IN: name length */ - char __user *lstio_ses_namep; /* IN: session name */ -}; - -/* query current session */ -struct lstio_session_info_args { - struct lst_sid __user *lstio_ses_idp; /* OUT: session id */ - int __user *lstio_ses_keyp; /* OUT: local key */ - /** OUT: session features */ - unsigned int __user *lstio_ses_featp; - struct lstcon_ndlist_ent __user *lstio_ses_ndinfo;/* OUT: */ - int lstio_ses_nmlen; /* IN: name length */ - char __user *lstio_ses_namep; /* OUT: session name */ -}; - -/* delete a session */ -struct lstio_session_end_args { - int lstio_ses_key; /* IN: session key */ -}; - -#define LST_OPC_SESSION 1 -#define LST_OPC_GROUP 2 -#define LST_OPC_NODES 3 -#define LST_OPC_BATCHCLI 4 -#define LST_OPC_BATCHSRV 5 - -struct lstio_debug_args { - int lstio_dbg_key; /* IN: session key */ - int lstio_dbg_type; /* IN: debug - * session|batch| - * group|nodes list - */ - int lstio_dbg_flags; /* IN: reserved debug - * flags - */ - int lstio_dbg_timeout; /* IN: timeout of - * debug - */ - int lstio_dbg_nmlen; /* IN: len of name */ - char __user *lstio_dbg_namep; /* IN: name of - * group|batch - */ - int lstio_dbg_count; /* IN: # of test nodes - * to debug - */ - struct lnet_process_id __user *lstio_dbg_idsp; /* IN: id of test - * nodes - */ - struct list_head __user *lstio_dbg_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_group_add_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char __user *lstio_grp_namep; /* IN: group name */ -}; - -struct lstio_group_del_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char __user *lstio_grp_namep; /* IN: group name */ -}; - -#define LST_GROUP_CLEAN 1 /* remove inactive nodes in the group */ -#define LST_GROUP_REFRESH 2 /* refresh inactive nodes - * in the group - */ -#define LST_GROUP_RMND 3 /* delete nodes from the group */ - -struct lstio_group_update_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_opc; /* IN: OPC */ - int lstio_grp_args; /* IN: arguments */ - int lstio_grp_nmlen; /* IN: name length */ - char __user *lstio_grp_namep; /* IN: group name */ - int lstio_grp_count; /* IN: # of nodes id */ - struct lnet_process_id __user *lstio_grp_idsp; /* IN: array of nodes */ - struct list_head __user *lstio_grp_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_group_nodes_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name length */ - char __user *lstio_grp_namep; /* IN: group name */ - int lstio_grp_count; /* IN: # of nodes */ - /** OUT: session features */ - unsigned int __user *lstio_grp_featp; - struct lnet_process_id __user *lstio_grp_idsp; /* IN: nodes */ - struct list_head __user *lstio_grp_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_group_list_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_idx; /* IN: group idx */ - int lstio_grp_nmlen; /* IN: name len */ - char __user *lstio_grp_namep; /* OUT: name */ -}; - -struct lstio_group_info_args { - int lstio_grp_key; /* IN: session key */ - int lstio_grp_nmlen; /* IN: name len */ - char __user *lstio_grp_namep; /* IN: name */ - struct lstcon_ndlist_ent __user *lstio_grp_entp;/* OUT: description - * of group - */ - int __user *lstio_grp_idxp; /* IN/OUT: node index */ - int __user *lstio_grp_ndentp; /* IN/OUT: # of nodent */ - struct lstcon_node_ent __user *lstio_grp_dentsp;/* OUT: nodent array */ -}; - -#define LST_DEFAULT_BATCH "batch" /* default batch name */ - -struct lstio_batch_add_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ -}; - -struct lstio_batch_del_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ -}; - -struct lstio_batch_run_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_timeout; /* IN: timeout for - * the batch - */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ - struct list_head __user *lstio_bat_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_batch_stop_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_force; /* IN: abort unfinished - * test RPC - */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ - struct list_head __user *lstio_bat_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_batch_query_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_testidx; /* IN: test index */ - int lstio_bat_client; /* IN: we testing - * client? - */ - int lstio_bat_timeout; /* IN: timeout for - * waiting - */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ - struct list_head __user *lstio_bat_resultp; /* OUT: list head of - * result buffer - */ -}; - -struct lstio_batch_list_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_idx; /* IN: index */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: batch name */ -}; - -struct lstio_batch_info_args { - int lstio_bat_key; /* IN: session key */ - int lstio_bat_nmlen; /* IN: name length */ - char __user *lstio_bat_namep; /* IN: name */ - int lstio_bat_server; /* IN: query server - * or not - */ - int lstio_bat_testidx; /* IN: test index */ - struct lstcon_test_batch_ent __user *lstio_bat_entp;/* OUT: batch ent */ - - int __user *lstio_bat_idxp; /* IN/OUT: index of node */ - int __user *lstio_bat_ndentp; /* IN/OUT: # of nodent */ - struct lstcon_node_ent __user *lstio_bat_dentsp;/* array of nodent */ -}; - -/* add stat in session */ -struct lstio_stat_args { - int lstio_sta_key; /* IN: session key */ - int lstio_sta_timeout; /* IN: timeout for - * stat request - */ - int lstio_sta_nmlen; /* IN: group name - * length - */ - char __user *lstio_sta_namep; /* IN: group name */ - int lstio_sta_count; /* IN: # of pid */ - struct lnet_process_id __user *lstio_sta_idsp; /* IN: pid */ - struct list_head __user *lstio_sta_resultp; /* OUT: list head of - * result buffer - */ -}; - -enum lst_test_type { - LST_TEST_BULK = 1, - LST_TEST_PING = 2 -}; - -/* create a test in a batch */ -#define LST_MAX_CONCUR 1024 /* Max concurrency of test */ - -struct lstio_test_args { - int lstio_tes_key; /* IN: session key */ - int lstio_tes_bat_nmlen; /* IN: batch name len */ - char __user *lstio_tes_bat_name; /* IN: batch name */ - int lstio_tes_type; /* IN: test type */ - int lstio_tes_oneside; /* IN: one sided test */ - int lstio_tes_loop; /* IN: loop count */ - int lstio_tes_concur; /* IN: concurrency */ - - int lstio_tes_dist; /* IN: node distribution in - * destination groups - */ - int lstio_tes_span; /* IN: node span in - * destination groups - */ - int lstio_tes_sgrp_nmlen; /* IN: source group - * name length - */ - char __user *lstio_tes_sgrp_name; /* IN: group name */ - int lstio_tes_dgrp_nmlen; /* IN: destination group - * name length - */ - char __user *lstio_tes_dgrp_name; /* IN: group name */ - - int lstio_tes_param_len; /* IN: param buffer len */ - void __user *lstio_tes_param; /* IN: parameter for specified - * test: lstio_bulk_param_t, - * lstio_ping_param_t, - * ... more - */ - int __user *lstio_tes_retp; /* OUT: private returned - * value - */ - struct list_head __user *lstio_tes_resultp;/* OUT: list head of - * result buffer - */ -}; - -enum lst_brw_type { - LST_BRW_READ = 1, - LST_BRW_WRITE = 2 -}; - -enum lst_brw_flags { - LST_BRW_CHECK_NONE = 1, - LST_BRW_CHECK_SIMPLE = 2, - LST_BRW_CHECK_FULL = 3 -}; - -struct lst_test_bulk_param { - int blk_opc; /* bulk operation code */ - int blk_size; /* size (bytes) */ - int blk_time; /* time of running the test*/ - int blk_flags; /* reserved flags */ - int blk_cli_off; /* bulk offset on client */ - int blk_srv_off; /* reserved: bulk offset on server */ -}; - -struct lst_test_ping_param { - int png_size; /* size of ping message */ - int png_time; /* time */ - int png_loop; /* loop */ - int png_flags; /* reserved flags */ -}; - -struct srpc_counters { - __u32 errors; - __u32 rpcs_sent; - __u32 rpcs_rcvd; - __u32 rpcs_dropped; - __u32 rpcs_expired; - __u64 bulk_get; - __u64 bulk_put; -} WIRE_ATTR; - -struct sfw_counters { - /** milliseconds since current session started */ - __u32 running_ms; - __u32 active_batches; - __u32 zombie_sessions; - __u32 brw_errors; - __u32 ping_errors; -} WIRE_ATTR; - -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h b/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h deleted file mode 100644 index 882074ed6021..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/nidstr.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -#ifndef _LNET_NIDSTRINGS_H -#define _LNET_NIDSTRINGS_H - -#include - -/** - * Lustre Network Driver types. - */ -enum { - /* - * Only add to these values (i.e. don't ever change or redefine them): - * network addresses depend on them... - */ - QSWLND = 1, - SOCKLND = 2, - GMLND = 3, - PTLLND = 4, - O2IBLND = 5, - CIBLND = 6, - OPENIBLND = 7, - IIBLND = 8, - LOLND = 9, - RALND = 10, - VIBLND = 11, - MXLND = 12, - GNILND = 13, - GNIIPLND = 14, -}; - -struct list_head; - -#define LNET_NIDSTR_COUNT 1024 /* # of nidstrings */ -#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ - -/* support decl needed by both kernel and user space */ -char *libcfs_next_nidstring(void); -int libcfs_isknown_lnd(__u32 lnd); -char *libcfs_lnd2modname(__u32 lnd); -char *libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size); -static inline char *libcfs_lnd2str(__u32 lnd) -{ - return libcfs_lnd2str_r(lnd, libcfs_next_nidstring(), - LNET_NIDSTR_SIZE); -} - -int libcfs_str2lnd(const char *str); -char *libcfs_net2str_r(__u32 net, char *buf, size_t buf_size); -static inline char *libcfs_net2str(__u32 net) -{ - return libcfs_net2str_r(net, libcfs_next_nidstring(), - LNET_NIDSTR_SIZE); -} - -char *libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size); -static inline char *libcfs_nid2str(lnet_nid_t nid) -{ - return libcfs_nid2str_r(nid, libcfs_next_nidstring(), - LNET_NIDSTR_SIZE); -} - -__u32 libcfs_str2net(const char *str); -lnet_nid_t libcfs_str2nid(const char *str); -int libcfs_str2anynid(lnet_nid_t *nid, const char *str); -char *libcfs_id2str(struct lnet_process_id id); -void cfs_free_nidlist(struct list_head *list); -int cfs_parse_nidlist(char *str, int len, struct list_head *list); -int cfs_print_nidlist(char *buffer, int count, struct list_head *list); -int cfs_match_nid(lnet_nid_t nid, struct list_head *list); - -int cfs_ip_addr_parse(char *str, int len, struct list_head *list); -int cfs_ip_addr_match(__u32 addr, struct list_head *list); -bool cfs_nidrange_is_contiguous(struct list_head *nidlist); -void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid, - char *max_nid, size_t nidstr_length); - -struct netstrfns { - __u32 nf_type; - char *nf_name; - char *nf_modname; - void (*nf_addr2str)(__u32 addr, char *str, size_t size); - int (*nf_str2addr)(const char *str, int nob, __u32 *addr); - int (*nf_parse_addrlist)(char *str, int len, - struct list_head *list); - int (*nf_print_addrlist)(char *buffer, int count, - struct list_head *list); - int (*nf_match_addr)(__u32 addr, struct list_head *list); - bool (*nf_is_contiguous)(struct list_head *nidlist); - void (*nf_min_max)(struct list_head *nidlist, __u32 *min_nid, - __u32 *max_nid); -}; - -#endif /* _LNET_NIDSTRINGS_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h b/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h deleted file mode 100644 index 6453e053fa99..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lnet/socklnd.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * #defines shared between socknal implementation and utilities - */ -#ifndef __UAPI_LNET_SOCKLND_H__ -#define __UAPI_LNET_SOCKLND_H__ - -#define SOCKLND_CONN_NONE (-1) -#define SOCKLND_CONN_ANY 0 -#define SOCKLND_CONN_CONTROL 1 -#define SOCKLND_CONN_BULK_IN 2 -#define SOCKLND_CONN_BULK_OUT 3 -#define SOCKLND_CONN_NTYPES 4 - -#define SOCKLND_CONN_ACK SOCKLND_CONN_BULK_IN - -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h deleted file mode 100644 index 11b51d93f64c..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_cfg.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _UAPI_LUSTRE_CFG_H_ -#define _UAPI_LUSTRE_CFG_H_ - -#include -#include -#include - -/** \defgroup cfg cfg - * - * @{ - */ - -/* - * 1cf6 - * lcfG - */ -#define LUSTRE_CFG_VERSION 0x1cf60001 -#define LUSTRE_CFG_MAX_BUFCOUNT 8 - -#define LCFG_HDR_SIZE(count) \ - __ALIGN_KERNEL(offsetof(struct lustre_cfg, lcfg_buflens[(count)]), 8) - -/** If the LCFG_REQUIRED bit is set in a configuration command, - * then the client is required to understand this parameter - * in order to mount the filesystem. If it does not understand - * a REQUIRED command the client mount will fail. - */ -#define LCFG_REQUIRED 0x0001000 - -enum lcfg_command_type { - LCFG_ATTACH = 0x00cf001, /**< create a new obd instance */ - LCFG_DETACH = 0x00cf002, /**< destroy obd instance */ - LCFG_SETUP = 0x00cf003, /**< call type-specific setup */ - LCFG_CLEANUP = 0x00cf004, /**< call type-specific cleanup - */ - LCFG_ADD_UUID = 0x00cf005, /**< add a nid to a niduuid */ - LCFG_DEL_UUID = 0x00cf006, /**< remove a nid from - * a niduuid - */ - LCFG_MOUNTOPT = 0x00cf007, /**< create a profile - * (mdc, osc) - */ - LCFG_DEL_MOUNTOPT = 0x00cf008, /**< destroy a profile */ - LCFG_SET_TIMEOUT = 0x00cf009, /**< set obd_timeout */ - LCFG_SET_UPCALL = 0x00cf00a, /**< deprecated */ - LCFG_ADD_CONN = 0x00cf00b, /**< add a failover niduuid to - * an obd - */ - LCFG_DEL_CONN = 0x00cf00c, /**< remove a failover niduuid */ - LCFG_LOV_ADD_OBD = 0x00cf00d, /**< add an osc to a lov */ - LCFG_LOV_DEL_OBD = 0x00cf00e, /**< remove an osc from a lov */ - LCFG_PARAM = 0x00cf00f, /**< set a proc parameter */ - LCFG_MARKER = 0x00cf010, /**< metadata about next - * cfg rec - */ - LCFG_LOG_START = 0x00ce011, /**< mgc only, process a - * cfg log - */ - LCFG_LOG_END = 0x00ce012, /**< stop processing updates */ - LCFG_LOV_ADD_INA = 0x00ce013, /**< like LOV_ADD_OBD, - * inactive - */ - LCFG_ADD_MDC = 0x00cf014, /**< add an mdc to a lmv */ - LCFG_DEL_MDC = 0x00cf015, /**< remove an mdc from a lmv */ - LCFG_SPTLRPC_CONF = 0x00ce016, /**< security */ - LCFG_POOL_NEW = 0x00ce020, /**< create an ost pool name */ - LCFG_POOL_ADD = 0x00ce021, /**< add an ost to a pool */ - LCFG_POOL_REM = 0x00ce022, /**< remove an ost from a pool */ - LCFG_POOL_DEL = 0x00ce023, /**< destroy an ost pool name */ - LCFG_SET_LDLM_TIMEOUT = 0x00ce030, /**< set ldlm_timeout */ - LCFG_PRE_CLEANUP = 0x00cf031, /**< call type-specific pre - * cleanup cleanup - */ - LCFG_SET_PARAM = 0x00ce032, /**< use set_param syntax to set - * a proc parameters - */ -}; - -struct lustre_cfg_bufs { - void *lcfg_buf[LUSTRE_CFG_MAX_BUFCOUNT]; - __u32 lcfg_buflen[LUSTRE_CFG_MAX_BUFCOUNT]; - __u32 lcfg_bufcount; -}; - -struct lustre_cfg { - __u32 lcfg_version; - __u32 lcfg_command; - - __u32 lcfg_num; - __u32 lcfg_flags; - __u64 lcfg_nid; - __u32 lcfg_nal; /* not used any more */ - - __u32 lcfg_bufcount; - __u32 lcfg_buflens[0]; -}; - -enum cfg_record_type { - PORTALS_CFG_TYPE = 1, - LUSTRE_CFG_TYPE = 123, -}; - -#define LUSTRE_CFG_BUFLEN(lcfg, idx) \ - ((lcfg)->lcfg_bufcount <= (idx) ? 0 : (lcfg)->lcfg_buflens[(idx)]) - -static inline void lustre_cfg_bufs_set(struct lustre_cfg_bufs *bufs, - __u32 index, void *buf, __u32 buflen) -{ - if (index >= LUSTRE_CFG_MAX_BUFCOUNT) - return; - - if (!bufs) - return; - - if (bufs->lcfg_bufcount <= index) - bufs->lcfg_bufcount = index + 1; - - bufs->lcfg_buf[index] = buf; - bufs->lcfg_buflen[index] = buflen; -} - -static inline void lustre_cfg_bufs_set_string(struct lustre_cfg_bufs *bufs, - __u32 index, char *str) -{ - lustre_cfg_bufs_set(bufs, index, str, str ? strlen(str) + 1 : 0); -} - -static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, - char *name) -{ - memset((bufs), 0, sizeof(*bufs)); - if (name) - lustre_cfg_bufs_set_string(bufs, 0, name); -} - -static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index) -{ - __u32 i; - size_t offset; - __u32 bufcount; - - if (!lcfg) - return NULL; - - bufcount = lcfg->lcfg_bufcount; - if (index >= bufcount) - return NULL; - - offset = LCFG_HDR_SIZE(lcfg->lcfg_bufcount); - for (i = 0; i < index; i++) - offset += __ALIGN_KERNEL(lcfg->lcfg_buflens[i], 8); - return (char *)lcfg + offset; -} - -static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs, - struct lustre_cfg *lcfg) -{ - __u32 i; - - bufs->lcfg_bufcount = lcfg->lcfg_bufcount; - for (i = 0; i < bufs->lcfg_bufcount; i++) { - bufs->lcfg_buflen[i] = lcfg->lcfg_buflens[i]; - bufs->lcfg_buf[i] = lustre_cfg_buf(lcfg, i); - } -} - -static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens) -{ - __u32 i; - __u32 len; - - len = LCFG_HDR_SIZE(bufcount); - for (i = 0; i < bufcount; i++) - len += __ALIGN_KERNEL(buflens[i], 8); - - return __ALIGN_KERNEL(len, 8); -} - -static inline void lustre_cfg_init(struct lustre_cfg *lcfg, int cmd, - struct lustre_cfg_bufs *bufs) -{ - char *ptr; - __u32 i; - - lcfg->lcfg_version = LUSTRE_CFG_VERSION; - lcfg->lcfg_command = cmd; - lcfg->lcfg_bufcount = bufs->lcfg_bufcount; - - ptr = (char *)lcfg + LCFG_HDR_SIZE(lcfg->lcfg_bufcount); - for (i = 0; i < lcfg->lcfg_bufcount; i++) { - lcfg->lcfg_buflens[i] = bufs->lcfg_buflen[i]; - if (bufs->lcfg_buf[i]) { - memcpy(ptr, bufs->lcfg_buf[i], bufs->lcfg_buflen[i]); - ptr += __ALIGN_KERNEL(bufs->lcfg_buflen[i], 8); - } - } -} - -static inline int lustre_cfg_sanity_check(void *buf, size_t len) -{ - struct lustre_cfg *lcfg = (struct lustre_cfg *)buf; - - if (!lcfg) - return -EINVAL; - - /* check that the first bits of the struct are valid */ - if (len < LCFG_HDR_SIZE(0)) - return -EINVAL; - - if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) - return -EINVAL; - - if (lcfg->lcfg_bufcount >= LUSTRE_CFG_MAX_BUFCOUNT) - return -EINVAL; - - /* check that the buflens are valid */ - if (len < LCFG_HDR_SIZE(lcfg->lcfg_bufcount)) - return -EINVAL; - - /* make sure all the pointers point inside the data */ - if (len < lustre_cfg_len(lcfg->lcfg_bufcount, lcfg->lcfg_buflens)) - return -EINVAL; - - return 0; -} - -/** @} cfg */ - -#endif /* _UAPI_LUSTRE_CFG_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h deleted file mode 100644 index 2e7a8d103777..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fid.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2016 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * all fid manipulation functions go here - * - * FIDS are globally unique within a Lustre filessytem, and are made up - * of three parts: sequence, Object ID, and version. - * - */ -#ifndef _UAPI_LUSTRE_FID_H_ -#define _UAPI_LUSTRE_FID_H_ - -#include - -/** returns fid object sequence */ -static inline __u64 fid_seq(const struct lu_fid *fid) -{ - return fid->f_seq; -} - -/** returns fid object id */ -static inline __u32 fid_oid(const struct lu_fid *fid) -{ - return fid->f_oid; -} - -/** returns fid object version */ -static inline __u32 fid_ver(const struct lu_fid *fid) -{ - return fid->f_ver; -} - -static inline void fid_zero(struct lu_fid *fid) -{ - memset(fid, 0, sizeof(*fid)); -} - -static inline __u64 fid_ver_oid(const struct lu_fid *fid) -{ - return (__u64)fid_ver(fid) << 32 | fid_oid(fid); -} - -static inline bool fid_seq_is_mdt0(__u64 seq) -{ - return seq == FID_SEQ_OST_MDT0; -} - -static inline bool fid_seq_is_mdt(__u64 seq) -{ - return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL; -}; - -static inline bool fid_seq_is_echo(__u64 seq) -{ - return seq == FID_SEQ_ECHO; -} - -static inline bool fid_is_echo(const struct lu_fid *fid) -{ - return fid_seq_is_echo(fid_seq(fid)); -} - -static inline bool fid_seq_is_llog(__u64 seq) -{ - return seq == FID_SEQ_LLOG; -} - -static inline bool fid_is_llog(const struct lu_fid *fid) -{ - /* file with OID == 0 is not llog but contains last oid */ - return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0; -} - -static inline bool fid_seq_is_rsvd(__u64 seq) -{ - return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD; -}; - -static inline bool fid_seq_is_special(__u64 seq) -{ - return seq == FID_SEQ_SPECIAL; -}; - -static inline bool fid_seq_is_local_file(__u64 seq) -{ - return seq == FID_SEQ_LOCAL_FILE || - seq == FID_SEQ_LOCAL_NAME; -}; - -static inline bool fid_seq_is_root(__u64 seq) -{ - return seq == FID_SEQ_ROOT; -} - -static inline bool fid_seq_is_dot(__u64 seq) -{ - return seq == FID_SEQ_DOT_LUSTRE; -} - -static inline bool fid_seq_is_default(__u64 seq) -{ - return seq == FID_SEQ_LOV_DEFAULT; -} - -static inline bool fid_is_mdt0(const struct lu_fid *fid) -{ - return fid_seq_is_mdt0(fid_seq(fid)); -} - -/** - * Check if a fid is igif or not. - * \param fid the fid to be tested. - * \return true if the fid is an igif; otherwise false. - */ -static inline bool fid_seq_is_igif(__u64 seq) -{ - return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX; -} - -static inline bool fid_is_igif(const struct lu_fid *fid) -{ - return fid_seq_is_igif(fid_seq(fid)); -} - -/** - * Check if a fid is idif or not. - * \param fid the fid to be tested. - * \return true if the fid is an idif; otherwise false. - */ -static inline bool fid_seq_is_idif(__u64 seq) -{ - return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX; -} - -static inline bool fid_is_idif(const struct lu_fid *fid) -{ - return fid_seq_is_idif(fid_seq(fid)); -} - -static inline bool fid_is_local_file(const struct lu_fid *fid) -{ - return fid_seq_is_local_file(fid_seq(fid)); -} - -static inline bool fid_seq_is_norm(__u64 seq) -{ - return (seq >= FID_SEQ_NORMAL); -} - -static inline bool fid_is_norm(const struct lu_fid *fid) -{ - return fid_seq_is_norm(fid_seq(fid)); -} - -/* convert an OST objid into an IDIF FID SEQ number */ -static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx) -{ - return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff); -} - -/* convert a packed IDIF FID into an OST objid */ -static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver) -{ - return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid; -} - -static inline __u32 idif_ost_idx(__u64 seq) -{ - return (seq >> 16) & 0xffff; -} - -/* extract ost index from IDIF FID */ -static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid) -{ - return idif_ost_idx(fid_seq(fid)); -} - -/** - * Get inode number from an igif. - * \param fid an igif to get inode number from. - * \return inode number for the igif. - */ -static inline ino_t lu_igif_ino(const struct lu_fid *fid) -{ - return fid_seq(fid); -} - -/** - * Get inode generation from an igif. - * \param fid an igif to get inode generation from. - * \return inode generation for the igif. - */ -static inline __u32 lu_igif_gen(const struct lu_fid *fid) -{ - return fid_oid(fid); -} - -/** - * Build igif from the inode number/generation. - */ -static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen) -{ - fid->f_seq = ino; - fid->f_oid = gen; - fid->f_ver = 0; -} - -/* - * Fids are transmitted across network (in the sender byte-ordering), - * and stored on disk in big-endian order. - */ -static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src) -{ - dst->f_seq = __cpu_to_le64(fid_seq(src)); - dst->f_oid = __cpu_to_le32(fid_oid(src)); - dst->f_ver = __cpu_to_le32(fid_ver(src)); -} - -static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) -{ - dst->f_seq = __le64_to_cpu(fid_seq(src)); - dst->f_oid = __le32_to_cpu(fid_oid(src)); - dst->f_ver = __le32_to_cpu(fid_ver(src)); -} - -static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) -{ - dst->f_seq = __cpu_to_be64(fid_seq(src)); - dst->f_oid = __cpu_to_be32(fid_oid(src)); - dst->f_ver = __cpu_to_be32(fid_ver(src)); -} - -static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) -{ - dst->f_seq = __be64_to_cpu(fid_seq(src)); - dst->f_oid = __be32_to_cpu(fid_oid(src)); - dst->f_ver = __be32_to_cpu(fid_ver(src)); -} - -static inline bool fid_is_sane(const struct lu_fid *fid) -{ - return fid && ((fid_seq(fid) >= FID_SEQ_START && !fid_ver(fid)) || - fid_is_igif(fid) || fid_is_idif(fid) || - fid_seq_is_rsvd(fid_seq(fid))); -} - -static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) -{ - return !memcmp(f0, f1, sizeof(*f0)); -} - -static inline int lu_fid_cmp(const struct lu_fid *f0, - const struct lu_fid *f1) -{ - if (fid_seq(f0) != fid_seq(f1)) - return fid_seq(f0) > fid_seq(f1) ? 1 : -1; - - if (fid_oid(f0) != fid_oid(f1)) - return fid_oid(f0) > fid_oid(f1) ? 1 : -1; - - if (fid_ver(f0) != fid_ver(f1)) - return fid_ver(f0) > fid_ver(f1) ? 1 : -1; - - return 0; -} -#endif diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h deleted file mode 100644 index d375a476f5ea..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_fiemap.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2014, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * FIEMAP data structures and flags. This header file will be used until - * fiemap.h is available in the upstream kernel. - * - * Author: Kalpak Shah - * Author: Andreas Dilger - */ - -#ifndef _LUSTRE_FIEMAP_H -#define _LUSTRE_FIEMAP_H - -#include -#include - -/* XXX: We use fiemap_extent::fe_reserved[0] */ -#define fe_device fe_reserved[0] - -static inline size_t fiemap_count_to_size(size_t extent_count) -{ - return sizeof(struct fiemap) + extent_count * - sizeof(struct fiemap_extent); -} - -static inline unsigned int fiemap_size_to_count(size_t array_size) -{ - return (array_size - sizeof(struct fiemap)) / - sizeof(struct fiemap_extent); -} - -#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */ - -#ifdef FIEMAP_FLAGS_COMPAT -#undef FIEMAP_FLAGS_COMPAT -#endif - -/* Lustre specific flags - use a high bit, don't conflict with upstream flag */ -#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */ -#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely. - * Sets NO_DIRECT flag - */ - -#endif /* _LUSTRE_FIEMAP_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h deleted file mode 100644 index 6c7e3992d646..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_idl.h +++ /dev/null @@ -1,2690 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Lustre wire protocol definitions. - */ - -/** \defgroup lustreidl lustreidl - * - * Lustre wire protocol definitions. - * - * ALL structs passing over the wire should be declared here. Structs - * that are used in interfaces with userspace should go in lustre_user.h. - * - * All structs being declared here should be built from simple fixed-size - * types (__u8, __u16, __u32, __u64) or be built from other types or - * structs also declared in this file. Similarly, all flags and magic - * values in those structs should also be declared here. This ensures - * that the Lustre wire protocol is not influenced by external dependencies. - * - * The only other acceptable items in this file are VERY SIMPLE accessor - * functions to avoid callers grubbing inside the structures. Nothing that - * depends on external functions or definitions should be in here. - * - * Structs must be properly aligned to put 64-bit values on an 8-byte - * boundary. Any structs being added here must also be added to - * utils/wirecheck.c and "make newwiretest" run to regenerate the - * utils/wiretest.c sources. This allows us to verify that wire structs - * have the proper alignment/size on all architectures. - * - * DO NOT CHANGE any of the structs, flags, values declared here and used - * in released Lustre versions. Some structs may have padding fields that - * can be used. Some structs might allow addition at the end (verify this - * in the code to ensure that new/old clients that see this larger struct - * do not fail, otherwise you need to implement protocol compatibility). - * - * @{ - */ - -#ifndef _LUSTRE_IDL_H_ -#define _LUSTRE_IDL_H_ - -#include -#include - -#include -/* Defn's shared with user-space. */ -#include -#include - -/* - * GENERAL STUFF - */ -/* FOO_REQUEST_PORTAL is for incoming requests on the FOO - * FOO_REPLY_PORTAL is for incoming replies on the FOO - * FOO_BULK_PORTAL is for incoming bulk on the FOO - */ - -/* Lustre service names are following the format - * service name + MDT + seq name - */ -#define LUSTRE_MDT_MAXNAMELEN 80 - -#define CONNMGR_REQUEST_PORTAL 1 -#define CONNMGR_REPLY_PORTAL 2 -/*#define OSC_REQUEST_PORTAL 3 */ -#define OSC_REPLY_PORTAL 4 -/*#define OSC_BULK_PORTAL 5 */ -#define OST_IO_PORTAL 6 -#define OST_CREATE_PORTAL 7 -#define OST_BULK_PORTAL 8 -/*#define MDC_REQUEST_PORTAL 9 */ -#define MDC_REPLY_PORTAL 10 -/*#define MDC_BULK_PORTAL 11 */ -#define MDS_REQUEST_PORTAL 12 -/*#define MDS_REPLY_PORTAL 13 */ -#define MDS_BULK_PORTAL 14 -#define LDLM_CB_REQUEST_PORTAL 15 -#define LDLM_CB_REPLY_PORTAL 16 -#define LDLM_CANCEL_REQUEST_PORTAL 17 -#define LDLM_CANCEL_REPLY_PORTAL 18 -/*#define PTLBD_REQUEST_PORTAL 19 */ -/*#define PTLBD_REPLY_PORTAL 20 */ -/*#define PTLBD_BULK_PORTAL 21 */ -#define MDS_SETATTR_PORTAL 22 -#define MDS_READPAGE_PORTAL 23 -#define OUT_PORTAL 24 - -#define MGC_REPLY_PORTAL 25 -#define MGS_REQUEST_PORTAL 26 -#define MGS_REPLY_PORTAL 27 -#define OST_REQUEST_PORTAL 28 -#define FLD_REQUEST_PORTAL 29 -#define SEQ_METADATA_PORTAL 30 -#define SEQ_DATA_PORTAL 31 -#define SEQ_CONTROLLER_PORTAL 32 -#define MGS_BULK_PORTAL 33 - -/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, - * n8851@cray.com - */ - -/* packet types */ -#define PTL_RPC_MSG_REQUEST 4711 -#define PTL_RPC_MSG_ERR 4712 -#define PTL_RPC_MSG_REPLY 4713 - -/* DON'T use swabbed values of MAGIC as magic! */ -#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3 -#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B - -#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2 - -#define PTLRPC_MSG_VERSION 0x00000003 -#define LUSTRE_VERSION_MASK 0xffff0000 -#define LUSTRE_OBD_VERSION 0x00010000 -#define LUSTRE_MDS_VERSION 0x00020000 -#define LUSTRE_OST_VERSION 0x00030000 -#define LUSTRE_DLM_VERSION 0x00040000 -#define LUSTRE_LOG_VERSION 0x00050000 -#define LUSTRE_MGS_VERSION 0x00060000 - -/** - * Describes a range of sequence, lsr_start is included but lsr_end is - * not in the range. - * Same structure is used in fld module where lsr_index field holds mdt id - * of the home mdt. - */ -struct lu_seq_range { - __u64 lsr_start; - __u64 lsr_end; - __u32 lsr_index; - __u32 lsr_flags; -}; - -struct lu_seq_range_array { - __u32 lsra_count; - __u32 lsra_padding; - struct lu_seq_range lsra_lsr[0]; -}; - -#define LU_SEQ_RANGE_MDT 0x0 -#define LU_SEQ_RANGE_OST 0x1 -#define LU_SEQ_RANGE_ANY 0x3 - -#define LU_SEQ_RANGE_MASK 0x3 - -/** \defgroup lu_fid lu_fid - * @{ - */ - -/** - * Flags for lustre_mdt_attrs::lma_compat and lustre_mdt_attrs::lma_incompat. - * Deprecated since HSM and SOM attributes are now stored in separate on-disk - * xattr. - */ -enum lma_compat { - LMAC_HSM = 0x00000001, -/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */ - LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */ - LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is - * under /O//d. - */ -}; - -/** - * Masks for all features that should be supported by a Lustre version to - * access a specific file. - * This information is stored in lustre_mdt_attrs::lma_incompat. - */ -enum lma_incompat { - LMAI_RELEASED = 0x00000001, /* file is released */ - LMAI_AGENT = 0x00000002, /* agent inode */ - LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object - * is on the remote MDT - */ -}; - -#define LMA_INCOMPAT_SUPP (LMAI_AGENT | LMAI_REMOTE_PARENT) - -/** - * fid constants - */ -enum { - /** LASTID file has zero OID */ - LUSTRE_FID_LASTID_OID = 0UL, - /** initial fid id value */ - LUSTRE_FID_INIT_OID = 1UL -}; - -/* copytool uses a 32b bitmask field to encode archive-Ids during register - * with MDT thru kuc. - * archive num = 0 => all - * archive num from 1 to 32 - */ -#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8) - -/** - * Note that reserved SEQ numbers below 12 will conflict with ldiskfs - * inodes in the IGIF namespace, so these reserved SEQ numbers can be - * used for other purposes and not risk collisions with existing inodes. - * - * Different FID Format - * http://wiki.old.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs - */ -enum fid_seq { - FID_SEQ_OST_MDT0 = 0, - FID_SEQ_LLOG = 1, /* unnamed llogs */ - FID_SEQ_ECHO = 2, - FID_SEQ_OST_MDT1 = 3, - FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */ - FID_SEQ_LLOG_NAME = 10, /* named llogs */ - FID_SEQ_RSVD = 11, - FID_SEQ_IGIF = 12, - FID_SEQ_IGIF_MAX = 0x0ffffffffULL, - FID_SEQ_IDIF = 0x100000000ULL, - FID_SEQ_IDIF_MAX = 0x1ffffffffULL, - /* Normal FID sequence starts from this value, i.e. 1<<33 */ - FID_SEQ_START = 0x200000000ULL, - /* sequence for local pre-defined FIDs listed in local_oid */ - FID_SEQ_LOCAL_FILE = 0x200000001ULL, - FID_SEQ_DOT_LUSTRE = 0x200000002ULL, - /* sequence is used for local named objects FIDs generated - * by local_object_storage library - */ - FID_SEQ_LOCAL_NAME = 0x200000003ULL, - /* Because current FLD will only cache the fid sequence, instead - * of oid on the client side, if the FID needs to be exposed to - * clients sides, it needs to make sure all of fids under one - * sequence will be located in one MDT. - */ - FID_SEQ_SPECIAL = 0x200000004ULL, - FID_SEQ_QUOTA = 0x200000005ULL, - FID_SEQ_QUOTA_GLB = 0x200000006ULL, - FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */ - FID_SEQ_NORMAL = 0x200000400ULL, - FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL -}; - -#define OBIF_OID_MAX_BITS 32 -#define OBIF_MAX_OID (1ULL << OBIF_OID_MAX_BITS) -#define OBIF_OID_MASK ((1ULL << OBIF_OID_MAX_BITS) - 1) -#define IDIF_OID_MAX_BITS 48 -#define IDIF_MAX_OID (1ULL << IDIF_OID_MAX_BITS) -#define IDIF_OID_MASK ((1ULL << IDIF_OID_MAX_BITS) - 1) - -/** OID for FID_SEQ_SPECIAL */ -enum special_oid { - /* Big Filesystem Lock to serialize rename operations */ - FID_OID_SPECIAL_BFL = 1UL, -}; - -/** OID for FID_SEQ_DOT_LUSTRE */ -enum dot_lustre_oid { - FID_OID_DOT_LUSTRE = 1UL, - FID_OID_DOT_LUSTRE_OBF = 2UL, -}; - -/** OID for FID_SEQ_ROOT */ -enum root_oid { - FID_OID_ROOT = 1UL, - FID_OID_ECHO_ROOT = 2UL, -}; - -/** @} lu_fid */ - -/** \defgroup lu_dir lu_dir - * @{ - */ - -/** - * Enumeration of possible directory entry attributes. - * - * Attributes follow directory entry header in the order they appear in this - * enumeration. - */ -enum lu_dirent_attrs { - LUDA_FID = 0x0001, - LUDA_TYPE = 0x0002, - LUDA_64BITHASH = 0x0004, -}; - -/** - * Layout of readdir pages, as transmitted on wire. - */ -struct lu_dirent { - /** valid if LUDA_FID is set. */ - struct lu_fid lde_fid; - /** a unique entry identifier: a hash or an offset. */ - __u64 lde_hash; - /** total record length, including all attributes. */ - __u16 lde_reclen; - /** name length */ - __u16 lde_namelen; - /** optional variable size attributes following this entry. - * taken from enum lu_dirent_attrs. - */ - __u32 lde_attrs; - /** name is followed by the attributes indicated in ->ldp_attrs, in - * their natural order. After the last attribute, padding bytes are - * added to make ->lde_reclen a multiple of 8. - */ - char lde_name[0]; -}; - -/* - * Definitions of optional directory entry attributes formats. - * - * Individual attributes do not have their length encoded in a generic way. It - * is assumed that consumer of an attribute knows its format. This means that - * it is impossible to skip over an unknown attribute, except by skipping over all - * remaining attributes (by using ->lde_reclen), which is not too - * constraining, because new server versions will append new attributes at - * the end of an entry. - */ - -/** - * Fid directory attribute: a fid of an object referenced by the entry. This - * will be almost always requested by the client and supplied by the server. - * - * Aligned to 8 bytes. - */ -/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */ - -/** - * File type. - * - * Aligned to 2 bytes. - */ -struct luda_type { - __u16 lt_type; -}; - -#ifndef IFSHIFT -#define IFSHIFT 12 -#endif - -#ifndef IFTODT -#define IFTODT(type) (((type) & S_IFMT) >> IFSHIFT) -#endif -#ifndef DTTOIF -#define DTTOIF(dirtype) ((dirtype) << IFSHIFT) -#endif - -struct lu_dirpage { - __le64 ldp_hash_start; - __le64 ldp_hash_end; - __le32 ldp_flags; - __le32 ldp_pad0; - struct lu_dirent ldp_entries[0]; -}; - -enum lu_dirpage_flags { - /** - * dirpage contains no entry. - */ - LDF_EMPTY = 1 << 0, - /** - * last entry's lde_hash equals ldp_hash_end. - */ - LDF_COLLIDE = 1 << 1 -}; - -static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp) -{ - if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY) - return NULL; - else - return dp->ldp_entries; -} - -static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent) -{ - struct lu_dirent *next; - - if (__le16_to_cpu(ent->lde_reclen) != 0) - next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen); - else - next = NULL; - - return next; -} - -static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr) -{ - size_t size; - - if (attr & LUDA_TYPE) { - const size_t align = sizeof(struct luda_type) - 1; - - size = (sizeof(struct lu_dirent) + namelen + align) & ~align; - size += sizeof(struct luda_type); - } else { - size = sizeof(struct lu_dirent) + namelen; - } - - return (size + 7) & ~7; -} - -#define MDS_DIR_END_OFF 0xfffffffffffffffeULL - -/** - * MDS_READPAGE page size - * - * This is the directory page size packed in MDS_READPAGE RPC. - * It's different than PAGE_SIZE because the client needs to - * access the struct lu_dirpage header packed at the beginning of - * the "page" and without this there isn't any way to know find the - * lu_dirpage header is if client and server PAGE_SIZE differ. - */ -#define LU_PAGE_SHIFT 12 -#define LU_PAGE_SIZE (1UL << LU_PAGE_SHIFT) -#define LU_PAGE_MASK (~(LU_PAGE_SIZE - 1)) - -#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT)) - -/** @} lu_dir */ - -struct lustre_handle { - __u64 cookie; -}; - -#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL - -static inline bool lustre_handle_is_used(const struct lustre_handle *lh) -{ - return lh->cookie != 0ull; -} - -static inline bool lustre_handle_equal(const struct lustre_handle *lh1, - const struct lustre_handle *lh2) -{ - return lh1->cookie == lh2->cookie; -} - -static inline void lustre_handle_copy(struct lustre_handle *tgt, - const struct lustre_handle *src) -{ - tgt->cookie = src->cookie; -} - -/* flags for lm_flags */ -#define MSGHDR_AT_SUPPORT 0x1 -#define MSGHDR_CKSUM_INCOMPAT18 0x2 - -#define lustre_msg lustre_msg_v2 -/* we depend on this structure to be 8-byte aligned */ -/* this type is only endian-adjusted in lustre_unpack_msg() */ -struct lustre_msg_v2 { - __u32 lm_bufcount; - __u32 lm_secflvr; - __u32 lm_magic; - __u32 lm_repsize; - __u32 lm_cksum; - __u32 lm_flags; - __u32 lm_padding_2; - __u32 lm_padding_3; - __u32 lm_buflens[0]; -}; - -/* without gss, ptlrpc_body is put at the first buffer. */ -#define PTLRPC_NUM_VERSIONS 4 - -struct ptlrpc_body_v3 { - struct lustre_handle pb_handle; - __u32 pb_type; - __u32 pb_version; - __u32 pb_opc; - __u32 pb_status; - __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ - __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ - __u16 pb_padding0; - __u32 pb_padding1; - __u64 pb_last_committed; - __u64 pb_transno; - __u32 pb_flags; - __u32 pb_op_flags; - __u32 pb_conn_cnt; - __u32 pb_timeout; /* for req, the deadline, for rep, the service est */ - __u32 pb_service_time; /* for rep, actual service time */ - __u32 pb_limit; - __u64 pb_slv; - /* VBR: pre-versions */ - __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; - __u64 pb_mbits; /**< match bits for bulk request */ - /* padding for future needs */ - __u64 pb_padding64_0; - __u64 pb_padding64_1; - __u64 pb_padding64_2; - char pb_jobid[LUSTRE_JOBID_SIZE]; -}; - -#define ptlrpc_body ptlrpc_body_v3 - -struct ptlrpc_body_v2 { - struct lustre_handle pb_handle; - __u32 pb_type; - __u32 pb_version; - __u32 pb_opc; - __u32 pb_status; - __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ - __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ - __u16 pb_padding0; - __u32 pb_padding1; - __u64 pb_last_committed; - __u64 pb_transno; - __u32 pb_flags; - __u32 pb_op_flags; - __u32 pb_conn_cnt; - __u32 pb_timeout; /* for req, the deadline, for rep, the service est */ - __u32 pb_service_time; /* for rep, actual service time, also used for - * net_latency of req - */ - __u32 pb_limit; - __u64 pb_slv; - /* VBR: pre-versions */ - __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; - __u64 pb_mbits; /**< unused in V2 */ - /* padding for future needs */ - __u64 pb_padding64_0; - __u64 pb_padding64_1; - __u64 pb_padding64_2; -}; - -/* message body offset for lustre_msg_v2 */ -/* ptlrpc body offset in all request/reply messages */ -#define MSG_PTLRPC_BODY_OFF 0 - -/* normal request/reply message record offset */ -#define REQ_REC_OFF 1 -#define REPLY_REC_OFF 1 - -/* ldlm request message body offset */ -#define DLM_LOCKREQ_OFF 1 /* lockreq offset */ -#define DLM_REQ_REC_OFF 2 /* normal dlm request record offset */ - -/* ldlm intent lock message body offset */ -#define DLM_INTENT_IT_OFF 2 /* intent lock it offset */ -#define DLM_INTENT_REC_OFF 3 /* intent lock record offset */ - -/* ldlm reply message body offset */ -#define DLM_LOCKREPLY_OFF 1 /* lockrep offset */ -#define DLM_REPLY_REC_OFF 2 /* reply record offset */ - -/** only use in req->rq_{req,rep}_swab_mask */ -#define MSG_PTLRPC_HEADER_OFF 31 - -/* Flags that are operation-specific go in the top 16 bits. */ -#define MSG_OP_FLAG_MASK 0xffff0000 -#define MSG_OP_FLAG_SHIFT 16 - -/* Flags that apply to all requests are in the bottom 16 bits */ -#define MSG_GEN_FLAG_MASK 0x0000ffff -#define MSG_LAST_REPLAY 0x0001 -#define MSG_RESENT 0x0002 -#define MSG_REPLAY 0x0004 -/* #define MSG_AT_SUPPORT 0x0008 - * This was used in early prototypes of adaptive timeouts, and while there - * shouldn't be any users of that code there also isn't a need for using this - * bits. Defer usage until at least 1.10 to avoid potential conflict. - */ -#define MSG_DELAY_REPLAY 0x0010 -#define MSG_VERSION_REPLAY 0x0020 -#define MSG_REQ_REPLAY_DONE 0x0040 -#define MSG_LOCK_REPLAY_DONE 0x0080 - -/* - * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) - */ - -#define MSG_CONNECT_RECOVERING 0x00000001 -#define MSG_CONNECT_RECONNECT 0x00000002 -#define MSG_CONNECT_REPLAYABLE 0x00000004 -/*#define MSG_CONNECT_PEER 0x8 */ -#define MSG_CONNECT_LIBCLIENT 0x00000010 -#define MSG_CONNECT_INITIAL 0x00000020 -#define MSG_CONNECT_ASYNC 0x00000040 -#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */ -#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */ - -/* Connect flags */ -#define OBD_CONNECT_RDONLY 0x1ULL /*client has read-only access*/ -#define OBD_CONNECT_INDEX 0x2ULL /*connect specific LOV idx */ -#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */ -#define OBD_CONNECT_GRANT 0x8ULL /*OSC gets grant at connect */ -#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for cli */ -#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */ -#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO req portal */ -#define OBD_CONNECT_ACL 0x80ULL /*access control lists */ -#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attr */ -#define OBD_CONNECT_LARGE_ACL 0x200ULL /* more than 32 ACL entries */ -#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */ -#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends init transno */ -#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/ -#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated. - *We do not support JOIN FILE - *anymore, reserve this flags - *just for preventing such bit - *to be reused. - */ -#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/ -#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/ -#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used - * in production. Removed in - * 2.9. Keep this flag to - * avoid reuse. - */ -#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /* Remote client by force, - * never used in production. - * Removed in 2.9. Keep this - * flag to avoid reuse - */ -#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */ -#define OBD_CONNECT_QUOTA64 0x80000ULL /*Not used since 2.4 */ -#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */ -#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */ -#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */ -#define OBD_CONNECT_SOM 0x800000ULL /*Size on MDS */ -#define OBD_CONNECT_AT 0x1000000ULL /*client uses AT */ -#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */ -#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */ -#define OBD_CONNECT_REAL 0x8000000ULL /* obsolete since 2.8 */ -#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*Not used since 2.4 */ -#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos*/ -#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */ -#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */ -#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */ -#define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */ -#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */ -#define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */ -#define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */ -#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client uses layout lock */ -#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits - * directory hash - */ -#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */ -#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */ -#define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */ -#define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */ -#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS - * RPC error properly - */ -#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for - * finer space reservation - */ -#define OBD_CONNECT_FLOCK_OWNER 0x200000000000ULL /* for the fixed 1.8 - * policy and 2.x server - */ -#define OBD_CONNECT_LVB_TYPE 0x400000000000ULL /* variable type of LVB */ -#define OBD_CONNECT_NANOSEC_TIME 0x800000000000ULL /* nanosecond timestamps */ -#define OBD_CONNECT_LIGHTWEIGHT 0x1000000000000ULL/* lightweight connection */ -#define OBD_CONNECT_SHORTIO 0x2000000000000ULL/* short io */ -#define OBD_CONNECT_PINGLESS 0x4000000000000ULL/* pings not required */ -#define OBD_CONNECT_FLOCK_DEAD 0x8000000000000ULL/* flock deadlock detection */ -#define OBD_CONNECT_DISP_STRIPE 0x10000000000000ULL/*create stripe disposition*/ -#define OBD_CONNECT_OPEN_BY_FID 0x20000000000000ULL /* open by fid won't pack - * name in request - */ -#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */ -#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */ -#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify - * RPCs in parallel - */ -#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL/* striped DNE dir */ -#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */ -#define OBD_CONNECT_LOCK_AHEAD 0x1000000000000000ULL /* lock ahead */ -/** bulk matchbits is sent within ptlrpc_body */ -#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL -#define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ -#define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ - -/* XXX README XXX: - * Please DO NOT add flag values here before first ensuring that this same - * flag value is not in use on some other branch. Please clear any such - * changes with senior engineers before starting to use a new flag. Then, - * submit a small patch against EVERY branch that ONLY adds the new flag, - * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the - * flag to check_obd_connect_data(), and updates wiretests accordingly, so it - * can be approved and landed easily to reserve the flag for future use. - */ - -/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS - * connection. It is a temporary bug fix for Imperative Recovery interop - * between 2.2 and 2.3 x86/ppc nodes, and can be removed when interop for - * 2.2 clients/servers is no longer needed. LU-1252/LU-1644. - */ -#define OBD_CONNECT_MNE_SWAB OBD_CONNECT_MDS_MDS - -#define OCD_HAS_FLAG(ocd, flg) \ - (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg)) - -/* Features required for this version of the client to work with server */ -#define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \ - OBD_CONNECT_FULL20) - -/* This structure is used for both request and reply. - * - * If we eventually have separate connect data for different types, which we - * almost certainly will, then perhaps we stick a union in here. - */ -struct obd_connect_data { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_brw_size; /* Maximum BRW size in bytes */ - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */ - __u8 ocd_inodespace; /* log2 of the per-inode space consumption */ - __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */ - __u32 ocd_unused; /* also fix lustre_swab_connect */ - __u64 ocd_transno; /* first transno from client to be replayed */ - __u32 ocd_group; /* MDS group on OST */ - __u32 ocd_cksum_types; /* supported checksum algorithms */ - __u32 ocd_max_easize; /* How big LOV EA can be on MDS */ - __u32 ocd_instance; /* instance # of this target */ - __u64 ocd_maxbytes; /* Maximum stripe size in bytes */ - /* Fields after ocd_maxbytes are only accessible by the receiver - * if the corresponding flag in ocd_connect_flags is set. Accessing - * any field after ocd_maxbytes on the receiver without a valid flag - * may result in out-of-bound memory access and kernel oops. - */ - __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */ - __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */ - __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 ocd_connect_flags2; - __u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding5; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding6; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding7; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding8; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 padding9; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingA; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingB; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingC; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingD; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingE; /* added 2.1.0. also fix lustre_swab_connect */ - __u64 paddingF; /* added 2.1.0. also fix lustre_swab_connect */ -}; - -/* XXX README XXX: - * Please DO NOT use any fields here before first ensuring that this same - * field is not in use on some other branch. Please clear any such changes - * with senior engineers before starting to use a new field. Then, submit - * a small patch against EVERY branch that ONLY adds the new field along with - * the matching OBD_CONNECT flag, so that can be approved and landed easily to - * reserve the flag for future use. - */ - -/* - * Supported checksum algorithms. Up to 32 checksum types are supported. - * (32-bit mask stored in obd_connect_data::ocd_cksum_types) - * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new - * algorithm and also the OBD_FL_CKSUM* flags. - */ -enum cksum_type { - OBD_CKSUM_CRC32 = 0x00000001, - OBD_CKSUM_ADLER = 0x00000002, - OBD_CKSUM_CRC32C = 0x00000004, -}; - -/* - * OST requests: OBDO & OBD request records - */ - -/* opcodes */ -enum ost_cmd { - OST_REPLY = 0, /* reply ? */ - OST_GETATTR = 1, - OST_SETATTR = 2, - OST_READ = 3, - OST_WRITE = 4, - OST_CREATE = 5, - OST_DESTROY = 6, - OST_GET_INFO = 7, - OST_CONNECT = 8, - OST_DISCONNECT = 9, - OST_PUNCH = 10, - OST_OPEN = 11, - OST_CLOSE = 12, - OST_STATFS = 13, - OST_SYNC = 16, - OST_SET_INFO = 17, - OST_QUOTACHECK = 18, /* not used since 2.4 */ - OST_QUOTACTL = 19, - OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */ - OST_LAST_OPC -}; -#define OST_FIRST_OPC OST_REPLY - -enum obdo_flags { - OBD_FL_INLINEDATA = 0x00000001, - OBD_FL_OBDMDEXISTS = 0x00000002, - OBD_FL_DELORPHAN = 0x00000004, /* if set in o_flags delete orphans */ - OBD_FL_NORPC = 0x00000008, /* set in o_flags do in OSC not OST */ - OBD_FL_IDONLY = 0x00000010, /* set in o_flags only adjust obj id*/ - OBD_FL_RECREATE_OBJS = 0x00000020, /* recreate missing obj */ - OBD_FL_DEBUG_CHECK = 0x00000040, /* echo client/server debug check */ - OBD_FL_NO_USRQUOTA = 0x00000100, /* the object's owner is over quota */ - OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */ - OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */ - OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */ - OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ - OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ - OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ - OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ - OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ - OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ - OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. - * XXX: obsoleted - reserved for old - * clients prior than 2.2 - */ - OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ - OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ - OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ - OBD_FL_SHORT_IO = 0x00400000, /* short io request */ - - /* Note that while these checksum values are currently separate bits, - * in 2.x we can actually allow all values from 1-31 if we wanted. - */ - OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | - OBD_FL_CKSUM_CRC32C, - - /* mask for local-only flag, which won't be sent over network */ - OBD_FL_LOCAL_MASK = 0xF0000000, -}; - -/* - * All LOV EA magics should have the same postfix, if some new version - * Lustre instroduces new LOV EA magic, then when down-grade to an old - * Lustre, even though the old version system does not recognizes such - * new magic, it still can distinguish the corrupted cases by checking - * the magic's postfix. - */ -#define LOV_MAGIC_MAGIC 0x0BD0 -#define LOV_MAGIC_MASK 0xFFFF - -#define LOV_MAGIC_V1 (0x0BD10000 | LOV_MAGIC_MAGIC) -#define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC) -#define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC) -#define LOV_MAGIC_MIGRATE (0x0BD40000 | LOV_MAGIC_MAGIC) -/* reserved for specifying OSTs */ -#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC) -#define LOV_MAGIC LOV_MAGIC_V1 - -/* - * magic for fully defined striping - * the idea is that we should have different magics for striping "hints" - * (struct lov_user_md_v[13]) and defined ready-to-use striping (struct - * lov_mds_md_v[13]). at the moment the magics are used in wire protocol, - * we can't just change it w/o long way preparation, but we still need a - * mechanism to allow LOD to differentiate hint versus ready striping. - * so, at the moment we do a trick: MDT knows what to expect from request - * depending on the case (replay uses ready striping, non-replay req uses - * hints), so MDT replaces magic with appropriate one and now LOD can - * easily understand what's inside -bzzz - */ -#define LOV_MAGIC_V1_DEF 0x0CD10BD0 -#define LOV_MAGIC_V3_DEF 0x0CD30BD0 - -#define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK) -#define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK) - -#define lov_ost_data lov_ost_data_v1 -struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/ - struct ost_id l_ost_oi; /* OST object ID */ - __u32 l_ost_gen; /* generation of this l_ost_idx */ - __u32 l_ost_idx; /* OST index in LOV (lov_tgt_desc->tgts) */ -}; - -#define lov_mds_md lov_mds_md_v1 -struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ - __u32 lmm_magic; /* magic number = LOV_MAGIC_V1 */ - __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ - struct ost_id lmm_oi; /* LOV object ID */ - __u32 lmm_stripe_size; /* size of stripe in bytes */ - /* lmm_stripe_count used to be __u32 */ - __u16 lmm_stripe_count; /* num stripes in use for this object */ - __u16 lmm_layout_gen; /* layout generation number */ - struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ -}; - -#define MAX_MD_SIZE \ - (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) -#define MIN_MD_SIZE \ - (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) - -#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" -#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" -#define XATTR_USER_PREFIX "user." -#define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_SECURITY_PREFIX "security." -#define XATTR_LUSTRE_PREFIX "lustre." - -#define XATTR_NAME_LOV "trusted.lov" -#define XATTR_NAME_LMA "trusted.lma" -#define XATTR_NAME_LMV "trusted.lmv" -#define XATTR_NAME_DEFAULT_LMV "trusted.dmv" -#define XATTR_NAME_LINK "trusted.link" -#define XATTR_NAME_FID "trusted.fid" -#define XATTR_NAME_VERSION "trusted.version" -#define XATTR_NAME_SOM "trusted.som" -#define XATTR_NAME_HSM "trusted.hsm" -#define XATTR_NAME_LFSCK_NAMESPACE "trusted.lfsck_namespace" - -struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */ - __u32 lmm_magic; /* magic number = LOV_MAGIC_V3 */ - __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ - struct ost_id lmm_oi; /* LOV object ID */ - __u32 lmm_stripe_size; /* size of stripe in bytes */ - /* lmm_stripe_count used to be __u32 */ - __u16 lmm_stripe_count; /* num stripes in use for this object */ - __u16 lmm_layout_gen; /* layout generation number */ - char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */ - struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ -}; - -static inline __u32 lov_mds_md_size(__u16 stripes, __u32 lmm_magic) -{ - if (lmm_magic == LOV_MAGIC_V3) - return sizeof(struct lov_mds_md_v3) + - stripes * sizeof(struct lov_ost_data_v1); - else - return sizeof(struct lov_mds_md_v1) + - stripes * sizeof(struct lov_ost_data_v1); -} - -static inline __u32 -lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) -{ - switch (lmm_magic) { - case LOV_MAGIC_V1: { - struct lov_mds_md_v1 lmm; - - if (buf_size < sizeof(lmm)) - return 0; - - return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]); - } - case LOV_MAGIC_V3: { - struct lov_mds_md_v3 lmm; - - if (buf_size < sizeof(lmm)) - return 0; - - return (buf_size - sizeof(lmm)) / sizeof(lmm.lmm_objects[0]); - } - default: - return 0; - } -} - -#define OBD_MD_FLID (0x00000001ULL) /* object ID */ -#define OBD_MD_FLATIME (0x00000002ULL) /* access time */ -#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ -#define OBD_MD_FLCTIME (0x00000008ULL) /* change time */ -#define OBD_MD_FLSIZE (0x00000010ULL) /* size */ -#define OBD_MD_FLBLOCKS (0x00000020ULL) /* allocated blocks count */ -#define OBD_MD_FLBLKSZ (0x00000040ULL) /* block size */ -#define OBD_MD_FLMODE (0x00000080ULL) /* access bits (mode & ~S_IFMT) */ -#define OBD_MD_FLTYPE (0x00000100ULL) /* object type (mode & S_IFMT) */ -#define OBD_MD_FLUID (0x00000200ULL) /* user ID */ -#define OBD_MD_FLGID (0x00000400ULL) /* group ID */ -#define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */ -#define OBD_MD_FLNLINK (0x00002000ULL) /* link count */ -#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */ -/*#define OBD_MD_FLINLINE (0x00008000ULL) inline data. used until 1.6.5 */ -#define OBD_MD_FLRDEV (0x00010000ULL) /* device number */ -#define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */ -#define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */ -#define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */ -#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ -#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ -/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */ -/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ -#define OBD_MD_FLGROUP (0x01000000ULL) /* group */ -#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ -#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ - /* ->mds if epoch opens or closes - */ -#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ -#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ -#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ -#define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */ -#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ - -#define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ -#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ -#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ -#define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */ - -#define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */ -#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ -#define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ -#define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ -/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */ -#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ -#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ -#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ -#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ -#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes - * under lock; for xattr - * requests means the - * client holds the lock - */ -#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */ - -/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */ -/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */ -/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */ -/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */ - -#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */ -#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent - * executed - */ - -#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ - -#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ - OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ - OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \ - OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \ - OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP) - -#define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS) - -/* don't forget obdo_fid which is way down at the bottom so it can - * come after the definition of llog_cookie - */ - -enum hss_valid { - HSS_SETMASK = 0x01, - HSS_CLEARMASK = 0x02, - HSS_ARCHIVE_ID = 0x04, -}; - -struct hsm_state_set { - __u32 hss_valid; - __u32 hss_archive_id; - __u64 hss_setmask; - __u64 hss_clearmask; -}; - -/* ost_body.data values for OST_BRW */ - -#define OBD_BRW_READ 0x01 -#define OBD_BRW_WRITE 0x02 -#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) -#define OBD_BRW_SYNC 0x08 /* this page is a part of synchronous - * transfer and is not accounted in - * the grant. - */ -#define OBD_BRW_CHECK 0x10 -#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ -#define OBD_BRW_GRANTED 0x40 /* the ost manages this */ -#define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */ -#define OBD_BRW_NOQUOTA 0x100 -#define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */ -#define OBD_BRW_ASYNC 0x400 /* Server may delay commit to disk */ -#define OBD_BRW_MEMALLOC 0x800 /* Client runs in the "kswapd" context */ -#define OBD_BRW_OVER_USRQUOTA 0x1000 /* Running out of user quota */ -#define OBD_BRW_OVER_GRPQUOTA 0x2000 /* Running out of group quota */ -#define OBD_BRW_SOFT_SYNC 0x4000 /* This flag notifies the server - * that the client is running low on - * space for unstable pages; asking - * it to sync quickly - */ - -#define OBD_OBJECT_EOF LUSTRE_EOF - -#define OST_MIN_PRECREATE 32 -#define OST_MAX_PRECREATE 20000 - -struct obd_ioobj { - struct ost_id ioo_oid; /* object ID, if multi-obj BRW */ - __u32 ioo_max_brw; /* low 16 bits were o_mode before 2.4, - * now (PTLRPC_BULK_OPS_COUNT - 1) in - * high 16 bits in 2.4 and later - */ - __u32 ioo_bufcnt; /* number of niobufs for this object */ -}; - -/* - * NOTE: IOOBJ_MAX_BRW_BITS defines the _offset_ of the max_brw field in - * ioo_max_brw, NOT the maximum number of bits in PTLRPC_BULK_OPS_BITS. - * That said, ioo_max_brw is a 32-bit field so the limit is also 16 bits. - */ -#define IOOBJ_MAX_BRW_BITS 16 -#define ioobj_max_brw_get(ioo) (((ioo)->ioo_max_brw >> IOOBJ_MAX_BRW_BITS) + 1) -#define ioobj_max_brw_set(ioo, num) \ -do { (ioo)->ioo_max_brw = ((num) - 1) << IOOBJ_MAX_BRW_BITS; } while (0) - -/* multiple of 8 bytes => can array */ -struct niobuf_remote { - __u64 rnb_offset; - __u32 rnb_len; - __u32 rnb_flags; -}; - -/* lock value block communicated between the filter and llite */ - -/* OST_LVB_ERR_INIT is needed because the return code in rc is - * negative, i.e. because ((MASK + rc) & MASK) != MASK. - */ -#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL -#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL -#define OST_LVB_IS_ERR(blocks) \ - ((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK) -#define OST_LVB_SET_ERR(blocks, rc) \ - do { blocks = OST_LVB_ERR_INIT + rc; } while (0) -#define OST_LVB_GET_ERR(blocks) (int)(blocks - OST_LVB_ERR_INIT) - -struct ost_lvb_v1 { - __u64 lvb_size; - __s64 lvb_mtime; - __s64 lvb_atime; - __s64 lvb_ctime; - __u64 lvb_blocks; -}; - -struct ost_lvb { - __u64 lvb_size; - __s64 lvb_mtime; - __s64 lvb_atime; - __s64 lvb_ctime; - __u64 lvb_blocks; - __u32 lvb_mtime_ns; - __u32 lvb_atime_ns; - __u32 lvb_ctime_ns; - __u32 lvb_padding; -}; - -/* - * lquota data structures - */ - -/* The lquota_id structure is a union of all the possible identifier types that - * can be used with quota, this includes: - * - 64-bit user ID - * - 64-bit group ID - * - a FID which can be used for per-directory quota in the future - */ -union lquota_id { - struct lu_fid qid_fid; /* FID for per-directory quota */ - __u64 qid_uid; /* user identifier */ - __u64 qid_gid; /* group identifier */ -}; - -/* quotactl management */ -struct obd_quotactl { - __u32 qc_cmd; - __u32 qc_type; /* see Q_* flag below */ - __u32 qc_id; - __u32 qc_stat; - struct obd_dqinfo qc_dqinfo; - struct obd_dqblk qc_dqblk; -}; - -#define Q_COPY(out, in, member) (out)->member = (in)->member - -#define QCTL_COPY(out, in) \ -do { \ - Q_COPY(out, in, qc_cmd); \ - Q_COPY(out, in, qc_type); \ - Q_COPY(out, in, qc_id); \ - Q_COPY(out, in, qc_stat); \ - Q_COPY(out, in, qc_dqinfo); \ - Q_COPY(out, in, qc_dqblk); \ -} while (0) - -/* Data structures associated with the quota locks */ - -/* Glimpse descriptor used for the index & per-ID quota locks */ -struct ldlm_gl_lquota_desc { - union lquota_id gl_id; /* quota ID subject to the glimpse */ - __u64 gl_flags; /* see LQUOTA_FL* below */ - __u64 gl_ver; /* new index version */ - __u64 gl_hardlimit; /* new hardlimit or qunit value */ - __u64 gl_softlimit; /* new softlimit */ - __u64 gl_time; - __u64 gl_pad2; -}; - -/* quota glimpse flags */ -#define LQUOTA_FL_EDQUOT 0x1 /* user/group out of quota space on QMT */ - -/* LVB used with quota (global and per-ID) locks */ -struct lquota_lvb { - __u64 lvb_flags; /* see LQUOTA_FL* above */ - __u64 lvb_id_may_rel; /* space that might be released later */ - __u64 lvb_id_rel; /* space released by the slave for this ID */ - __u64 lvb_id_qunit; /* current qunit value */ - __u64 lvb_pad1; -}; - -/* op codes */ -enum quota_cmd { - QUOTA_DQACQ = 601, - QUOTA_DQREL = 602, - QUOTA_LAST_OPC -}; -#define QUOTA_FIRST_OPC QUOTA_DQACQ - -/* - * MDS REQ RECORDS - */ - -/* opcodes */ -enum mds_cmd { - MDS_GETATTR = 33, - MDS_GETATTR_NAME = 34, - MDS_CLOSE = 35, - MDS_REINT = 36, - MDS_READPAGE = 37, - MDS_CONNECT = 38, - MDS_DISCONNECT = 39, - MDS_GETSTATUS = 40, - MDS_STATFS = 41, - MDS_PIN = 42, /* obsolete, never used in a release */ - MDS_UNPIN = 43, /* obsolete, never used in a release */ - MDS_SYNC = 44, - MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */ - MDS_SET_INFO = 46, - MDS_QUOTACHECK = 47, /* not used since 2.4 */ - MDS_QUOTACTL = 48, - MDS_GETXATTR = 49, - MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */ - MDS_WRITEPAGE = 51, - MDS_IS_SUBDIR = 52, /* obsolete, never used in a release */ - MDS_GET_INFO = 53, - MDS_HSM_STATE_GET = 54, - MDS_HSM_STATE_SET = 55, - MDS_HSM_ACTION = 56, - MDS_HSM_PROGRESS = 57, - MDS_HSM_REQUEST = 58, - MDS_HSM_CT_REGISTER = 59, - MDS_HSM_CT_UNREGISTER = 60, - MDS_SWAP_LAYOUTS = 61, - MDS_LAST_OPC -}; - -#define MDS_FIRST_OPC MDS_GETATTR - -/* - * Do not exceed 63 - */ - -enum mdt_reint_cmd { - REINT_SETATTR = 1, - REINT_CREATE = 2, - REINT_LINK = 3, - REINT_UNLINK = 4, - REINT_RENAME = 5, - REINT_OPEN = 6, - REINT_SETXATTR = 7, - REINT_RMENTRY = 8, - REINT_MIGRATE = 9, - REINT_MAX -}; - -/* the disposition of the intent outlines what was executed */ -#define DISP_IT_EXECD 0x00000001 -#define DISP_LOOKUP_EXECD 0x00000002 -#define DISP_LOOKUP_NEG 0x00000004 -#define DISP_LOOKUP_POS 0x00000008 -#define DISP_OPEN_CREATE 0x00000010 -#define DISP_OPEN_OPEN 0x00000020 -#define DISP_ENQ_COMPLETE 0x00400000 /* obsolete and unused */ -#define DISP_ENQ_OPEN_REF 0x00800000 -#define DISP_ENQ_CREATE_REF 0x01000000 -#define DISP_OPEN_LOCK 0x02000000 -#define DISP_OPEN_LEASE 0x04000000 -#define DISP_OPEN_STRIPE 0x08000000 -#define DISP_OPEN_DENY 0x10000000 - -/* INODE LOCK PARTS */ -#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also - * was used to protect permission (mode, - * owner, group etc) before 2.4. - */ -#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ -#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ -#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ - -/* The PERM bit is added int 2.4, and it is used to protect permission(mode, - * owner, group, acl etc), so to separate the permission from LOOKUP lock. - * Because for remote directories(in DNE), these locks will be granted by - * different MDTs(different ldlm namespace). - * - * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together. - * For Remote directory, the master MDT, where the remote directory is, will - * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is, - * will grant LOOKUP_LOCK. - */ -#define MDS_INODELOCK_PERM 0x000010 -#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ - -#define MDS_INODELOCK_MAXSHIFT 5 -/* This FULL lock is useful to take on unlink sort of operations */ -#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1) - -/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2], - * but was moved into name[1] along with the OID to avoid consuming the - * name[2,3] fields that need to be used for the quota id (also a FID). - */ -enum { - LUSTRE_RES_ID_SEQ_OFF = 0, - LUSTRE_RES_ID_VER_OID_OFF = 1, - LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */ - LUSTRE_RES_ID_QUOTA_SEQ_OFF = 2, - LUSTRE_RES_ID_QUOTA_VER_OID_OFF = 3, - LUSTRE_RES_ID_HSH_OFF = 3 -}; - -#define MDS_STATUS_CONN 1 -#define MDS_STATUS_LOV 2 - -/* these should be identical to their EXT4_*_FL counterparts, they are - * redefined here only to avoid dragging in fs/ext4/ext4.h - */ -#define LUSTRE_SYNC_FL 0x00000008 /* Synchronous updates */ -#define LUSTRE_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define LUSTRE_APPEND_FL 0x00000020 /* writes to file may only append */ -#define LUSTRE_NODUMP_FL 0x00000040 /* do not dump file */ -#define LUSTRE_NOATIME_FL 0x00000080 /* do not update atime */ -#define LUSTRE_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define LUSTRE_DIRSYNC_FL 0x00010000 /* dirsync behaviour (dir only) */ -#define LUSTRE_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define LUSTRE_DIRECTIO_FL 0x00100000 /* Use direct i/o */ -#define LUSTRE_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ - -/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values - * for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire - * protocol equivalents of LDISKFS_*_FL values stored on disk, while - * the S_* flags are kernel-internal values that change between kernel - * versions. These flags are set/cleared via FSFILT_IOC_{GET,SET}_FLAGS. - * See b=16526 for a full history. - */ -static inline int ll_ext_to_inode_flags(int flags) -{ - return (((flags & LUSTRE_SYNC_FL) ? S_SYNC : 0) | - ((flags & LUSTRE_NOATIME_FL) ? S_NOATIME : 0) | - ((flags & LUSTRE_APPEND_FL) ? S_APPEND : 0) | - ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) | - ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0)); -} - -static inline int ll_inode_to_ext_flags(int iflags) -{ - return (((iflags & S_SYNC) ? LUSTRE_SYNC_FL : 0) | - ((iflags & S_NOATIME) ? LUSTRE_NOATIME_FL : 0) | - ((iflags & S_APPEND) ? LUSTRE_APPEND_FL : 0) | - ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) | - ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0)); -} - -/* 64 possible states */ -enum md_transient_state { - MS_RESTORE = (1 << 0), /* restore is running */ -}; - -struct mdt_body { - struct lu_fid mbo_fid1; - struct lu_fid mbo_fid2; - struct lustre_handle mbo_handle; - __u64 mbo_valid; - __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */ - __s64 mbo_mtime; - __s64 mbo_atime; - __s64 mbo_ctime; - __u64 mbo_blocks; /* XID, in the case of MDS_READPAGE */ - __u64 mbo_ioepoch; - __u64 mbo_t_state; /* transient file state defined in - * enum md_transient_state - * was "ino" until 2.4.0 - */ - __u32 mbo_fsuid; - __u32 mbo_fsgid; - __u32 mbo_capability; - __u32 mbo_mode; - __u32 mbo_uid; - __u32 mbo_gid; - __u32 mbo_flags; /* LUSTRE_*_FL file attributes */ - __u32 mbo_rdev; - __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */ - __u32 mbo_unused2; /* was "generation" until 2.4.0 */ - __u32 mbo_suppgid; - __u32 mbo_eadatasize; - __u32 mbo_aclsize; - __u32 mbo_max_mdsize; - __u32 mbo_unused3; /* was max_cookiesize until 2.8 */ - __u32 mbo_uid_h; /* high 32-bits of uid, for FUID */ - __u32 mbo_gid_h; /* high 32-bits of gid, for FUID */ - __u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */ - __u64 mbo_padding_6; - __u64 mbo_padding_7; - __u64 mbo_padding_8; - __u64 mbo_padding_9; - __u64 mbo_padding_10; -}; /* 216 */ - -struct mdt_ioepoch { - struct lustre_handle mio_handle; - __u64 mio_unused1; /* was ioepoch */ - __u32 mio_unused2; /* was flags */ - __u32 mio_padding; -}; - -/* permissions for md_perm.mp_perm */ -enum { - CFS_SETUID_PERM = 0x01, - CFS_SETGID_PERM = 0x02, - CFS_SETGRP_PERM = 0x04, -}; - -struct mdt_rec_setattr { - __u32 sa_opcode; - __u32 sa_cap; - __u32 sa_fsuid; - __u32 sa_fsuid_h; - __u32 sa_fsgid; - __u32 sa_fsgid_h; - __u32 sa_suppgid; - __u32 sa_suppgid_h; - __u32 sa_padding_1; - __u32 sa_padding_1_h; - struct lu_fid sa_fid; - __u64 sa_valid; - __u32 sa_uid; - __u32 sa_gid; - __u64 sa_size; - __u64 sa_blocks; - __s64 sa_mtime; - __s64 sa_atime; - __s64 sa_ctime; - __u32 sa_attr_flags; - __u32 sa_mode; - __u32 sa_bias; /* some operation flags */ - __u32 sa_padding_3; - __u32 sa_padding_4; - __u32 sa_padding_5; -}; - -/* - * Attribute flags used in mdt_rec_setattr::sa_valid. - * The kernel's #defines for ATTR_* should not be used over the network - * since the client and MDS may run different kernels (see bug 13828) - * Therefore, we should only use MDS_ATTR_* attributes for sa_valid. - */ -#define MDS_ATTR_MODE 0x1ULL /* = 1 */ -#define MDS_ATTR_UID 0x2ULL /* = 2 */ -#define MDS_ATTR_GID 0x4ULL /* = 4 */ -#define MDS_ATTR_SIZE 0x8ULL /* = 8 */ -#define MDS_ATTR_ATIME 0x10ULL /* = 16 */ -#define MDS_ATTR_MTIME 0x20ULL /* = 32 */ -#define MDS_ATTR_CTIME 0x40ULL /* = 64 */ -#define MDS_ATTR_ATIME_SET 0x80ULL /* = 128 */ -#define MDS_ATTR_MTIME_SET 0x100ULL /* = 256 */ -#define MDS_ATTR_FORCE 0x200ULL /* = 512, Not a change, but a change it */ -#define MDS_ATTR_ATTR_FLAG 0x400ULL /* = 1024 */ -#define MDS_ATTR_KILL_SUID 0x800ULL /* = 2048 */ -#define MDS_ATTR_KILL_SGID 0x1000ULL /* = 4096 */ -#define MDS_ATTR_CTIME_SET 0x2000ULL /* = 8192 */ -#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, - * ie O_TRUNC - */ -#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */ - -#define MDS_FMODE_CLOSED 00000000 -#define MDS_FMODE_EXEC 00000004 -/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ -/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ -/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ - -#define MDS_OPEN_CREATED 00000010 -#define MDS_OPEN_CROSS 00000020 - -#define MDS_OPEN_CREAT 00000100 -#define MDS_OPEN_EXCL 00000200 -#define MDS_OPEN_TRUNC 00001000 -#define MDS_OPEN_APPEND 00002000 -#define MDS_OPEN_SYNC 00010000 -#define MDS_OPEN_DIRECTORY 00200000 - -#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */ -#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ -#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ -#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file. - * We do not support JOIN FILE - * anymore, reserve this flags - * just for preventing such bit - * to be reused. - */ - -#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ -#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ -#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ -#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */ -#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or - * hsm restore) - */ -#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created - * unlinked - */ -#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease - * delegation, succeed if it's not - * being opened with conflict mode. - */ -#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ - -#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ - MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ - MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ - MDS_OPEN_RELEASE) - -enum mds_op_bias { - MDS_CHECK_SPLIT = 1 << 0, - MDS_CROSS_REF = 1 << 1, - MDS_VTX_BYPASS = 1 << 2, - MDS_PERM_BYPASS = 1 << 3, -/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ - MDS_QUOTA_IGNORE = 1 << 5, - MDS_CLOSE_CLEANUP = 1 << 6, - MDS_KEEP_ORPHAN = 1 << 7, - MDS_RECOV_OPEN = 1 << 8, - MDS_DATA_MODIFIED = 1 << 9, - MDS_CREATE_VOLATILE = 1 << 10, - MDS_OWNEROVERRIDE = 1 << 11, - MDS_HSM_RELEASE = 1 << 12, - MDS_RENAME_MIGRATE = 1 << 13, - MDS_CLOSE_LAYOUT_SWAP = 1 << 14, -}; - -/* instance of mdt_reint_rec */ -struct mdt_rec_create { - __u32 cr_opcode; - __u32 cr_cap; - __u32 cr_fsuid; - __u32 cr_fsuid_h; - __u32 cr_fsgid; - __u32 cr_fsgid_h; - __u32 cr_suppgid1; - __u32 cr_suppgid1_h; - __u32 cr_suppgid2; - __u32 cr_suppgid2_h; - struct lu_fid cr_fid1; - struct lu_fid cr_fid2; - struct lustre_handle cr_old_handle; /* handle in case of open replay */ - __s64 cr_time; - __u64 cr_rdev; - __u64 cr_ioepoch; - __u64 cr_padding_1; /* rr_blocks */ - __u32 cr_mode; - __u32 cr_bias; - /* use of helpers set/get_mrc_cr_flags() is needed to access - * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to - * extend cr_flags size without breaking 1.8 compat - */ - __u32 cr_flags_l; /* for use with open, low 32 bits */ - __u32 cr_flags_h; /* for use with open, high 32 bits */ - __u32 cr_umask; /* umask for create */ - __u32 cr_padding_4; /* rr_padding_4 */ -}; - -/* instance of mdt_reint_rec */ -struct mdt_rec_link { - __u32 lk_opcode; - __u32 lk_cap; - __u32 lk_fsuid; - __u32 lk_fsuid_h; - __u32 lk_fsgid; - __u32 lk_fsgid_h; - __u32 lk_suppgid1; - __u32 lk_suppgid1_h; - __u32 lk_suppgid2; - __u32 lk_suppgid2_h; - struct lu_fid lk_fid1; - struct lu_fid lk_fid2; - __s64 lk_time; - __u64 lk_padding_1; /* rr_atime */ - __u64 lk_padding_2; /* rr_ctime */ - __u64 lk_padding_3; /* rr_size */ - __u64 lk_padding_4; /* rr_blocks */ - __u32 lk_bias; - __u32 lk_padding_5; /* rr_mode */ - __u32 lk_padding_6; /* rr_flags */ - __u32 lk_padding_7; /* rr_padding_2 */ - __u32 lk_padding_8; /* rr_padding_3 */ - __u32 lk_padding_9; /* rr_padding_4 */ -}; - -/* instance of mdt_reint_rec */ -struct mdt_rec_unlink { - __u32 ul_opcode; - __u32 ul_cap; - __u32 ul_fsuid; - __u32 ul_fsuid_h; - __u32 ul_fsgid; - __u32 ul_fsgid_h; - __u32 ul_suppgid1; - __u32 ul_suppgid1_h; - __u32 ul_suppgid2; - __u32 ul_suppgid2_h; - struct lu_fid ul_fid1; - struct lu_fid ul_fid2; - __s64 ul_time; - __u64 ul_padding_2; /* rr_atime */ - __u64 ul_padding_3; /* rr_ctime */ - __u64 ul_padding_4; /* rr_size */ - __u64 ul_padding_5; /* rr_blocks */ - __u32 ul_bias; - __u32 ul_mode; - __u32 ul_padding_6; /* rr_flags */ - __u32 ul_padding_7; /* rr_padding_2 */ - __u32 ul_padding_8; /* rr_padding_3 */ - __u32 ul_padding_9; /* rr_padding_4 */ -}; - -/* instance of mdt_reint_rec */ -struct mdt_rec_rename { - __u32 rn_opcode; - __u32 rn_cap; - __u32 rn_fsuid; - __u32 rn_fsuid_h; - __u32 rn_fsgid; - __u32 rn_fsgid_h; - __u32 rn_suppgid1; - __u32 rn_suppgid1_h; - __u32 rn_suppgid2; - __u32 rn_suppgid2_h; - struct lu_fid rn_fid1; - struct lu_fid rn_fid2; - __s64 rn_time; - __u64 rn_padding_1; /* rr_atime */ - __u64 rn_padding_2; /* rr_ctime */ - __u64 rn_padding_3; /* rr_size */ - __u64 rn_padding_4; /* rr_blocks */ - __u32 rn_bias; /* some operation flags */ - __u32 rn_mode; /* cross-ref rename has mode */ - __u32 rn_padding_5; /* rr_flags */ - __u32 rn_padding_6; /* rr_padding_2 */ - __u32 rn_padding_7; /* rr_padding_3 */ - __u32 rn_padding_8; /* rr_padding_4 */ -}; - -/* instance of mdt_reint_rec */ -struct mdt_rec_setxattr { - __u32 sx_opcode; - __u32 sx_cap; - __u32 sx_fsuid; - __u32 sx_fsuid_h; - __u32 sx_fsgid; - __u32 sx_fsgid_h; - __u32 sx_suppgid1; - __u32 sx_suppgid1_h; - __u32 sx_suppgid2; - __u32 sx_suppgid2_h; - struct lu_fid sx_fid; - __u64 sx_padding_1; /* These three are rr_fid2 */ - __u32 sx_padding_2; - __u32 sx_padding_3; - __u64 sx_valid; - __s64 sx_time; - __u64 sx_padding_5; /* rr_ctime */ - __u64 sx_padding_6; /* rr_size */ - __u64 sx_padding_7; /* rr_blocks */ - __u32 sx_size; - __u32 sx_flags; - __u32 sx_padding_8; /* rr_flags */ - __u32 sx_padding_9; /* rr_padding_2 */ - __u32 sx_padding_10; /* rr_padding_3 */ - __u32 sx_padding_11; /* rr_padding_4 */ -}; - -/* - * mdt_rec_reint is the template for all mdt_reint_xxx structures. - * Do NOT change the size of various members, otherwise the value - * will be broken in lustre_swab_mdt_rec_reint(). - * - * If you add new members in other mdt_reint_xxx structures and need to use the - * rr_padding_x fields, then update lustre_swab_mdt_rec_reint() also. - */ -struct mdt_rec_reint { - __u32 rr_opcode; - __u32 rr_cap; - __u32 rr_fsuid; - __u32 rr_fsuid_h; - __u32 rr_fsgid; - __u32 rr_fsgid_h; - __u32 rr_suppgid1; - __u32 rr_suppgid1_h; - __u32 rr_suppgid2; - __u32 rr_suppgid2_h; - struct lu_fid rr_fid1; - struct lu_fid rr_fid2; - __s64 rr_mtime; - __s64 rr_atime; - __s64 rr_ctime; - __u64 rr_size; - __u64 rr_blocks; - __u32 rr_bias; - __u32 rr_mode; - __u32 rr_flags; - __u32 rr_flags_h; - __u32 rr_umask; - __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ -}; - -/* lmv structures */ -struct lmv_desc { - __u32 ld_tgt_count; /* how many MDS's */ - __u32 ld_active_tgt_count; /* how many active */ - __u32 ld_default_stripe_count; /* how many objects are used */ - __u32 ld_pattern; /* default hash pattern */ - __u64 ld_default_hash_size; - __u64 ld_padding_1; /* also fix lustre_swab_lmv_desc */ - __u32 ld_padding_2; /* also fix lustre_swab_lmv_desc */ - __u32 ld_qos_maxage; /* in second */ - __u32 ld_padding_3; /* also fix lustre_swab_lmv_desc */ - __u32 ld_padding_4; /* also fix lustre_swab_lmv_desc */ - struct obd_uuid ld_uuid; -}; - -/* LMV layout EA, and it will be stored both in master and slave object */ -struct lmv_mds_md_v1 { - __u32 lmv_magic; - __u32 lmv_stripe_count; - __u32 lmv_master_mdt_index; /* On master object, it is master - * MDT index, on slave object, it - * is stripe index of the slave obj - */ - __u32 lmv_hash_type; /* dir stripe policy, i.e. indicate - * which hash function to be used, - * Note: only lower 16 bits is being - * used for now. Higher 16 bits will - * be used to mark the object status, - * for example migrating or dead. - */ - __u32 lmv_layout_version; /* Used for directory restriping */ - __u32 lmv_padding1; - __u64 lmv_padding2; - __u64 lmv_padding3; - char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */ - struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */ -}; - -#define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */ -#define LMV_MAGIC LMV_MAGIC_V1 - -/* #define LMV_USER_MAGIC 0x0CD30CD0 */ -#define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */ - -/* - *Right now only the lower part(0-16bits) of lmv_hash_type is being used, - * and the higher part will be the flag to indicate the status of object, - * for example the object is being migrated. And the hash function - * might be interpreted differently with different flags. - */ -#define LMV_HASH_TYPE_MASK 0x0000ffff - -#define LMV_HASH_FLAG_MIGRATION 0x80000000 -#define LMV_HASH_FLAG_DEAD 0x40000000 - -/** - * The FNV-1a hash algorithm is as follows: - * hash = FNV_offset_basis - * for each octet_of_data to be hashed - * hash = hash XOR octet_of_data - * hash = hash × FNV_prime - * return hash - * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash - * - * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source - * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL - **/ -#define LUSTRE_FNV_1A_64_PRIME 0x100000001b3ULL -#define LUSTRE_FNV_1A_64_OFFSET_BIAS 0xcbf29ce484222325ULL -static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size) -{ - __u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS; - const unsigned char *p = buf; - size_t i; - - for (i = 0; i < size; i++) { - hash ^= p[i]; - hash *= LUSTRE_FNV_1A_64_PRIME; - } - - return hash; -} - -union lmv_mds_md { - __u32 lmv_magic; - struct lmv_mds_md_v1 lmv_md_v1; - struct lmv_user_md lmv_user_md; -}; - -static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) -{ - ssize_t len = -EINVAL; - - switch (lmm_magic) { - case LMV_MAGIC_V1: { - struct lmv_mds_md_v1 *lmm1; - - len = sizeof(*lmm1); - len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]); - break; } - default: - break; - } - return len; -} - -static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm) -{ - switch (__le32_to_cpu(lmm->lmv_magic)) { - case LMV_MAGIC_V1: - return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count); - case LMV_USER_MAGIC: - return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count); - default: - return -EINVAL; - } -} - -enum fld_rpc_opc { - FLD_QUERY = 900, - FLD_READ = 901, - FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY -}; - -enum seq_rpc_opc { - SEQ_QUERY = 700, - SEQ_LAST_OPC, - SEQ_FIRST_OPC = SEQ_QUERY -}; - -enum seq_op { - SEQ_ALLOC_SUPER = 0, - SEQ_ALLOC_META = 1 -}; - -enum fld_op { - FLD_CREATE = 0, - FLD_DELETE = 1, - FLD_LOOKUP = 2, -}; - -/* - * LOV data structures - */ - -#define LOV_MAX_UUID_BUFFER_SIZE 8192 -/* The size of the buffer the lov/mdc reserves for the - * array of UUIDs returned by the MDS. With the current - * protocol, this will limit the max number of OSTs per LOV - */ - -#define LOV_DESC_MAGIC 0xB0CCDE5C -#define LOV_DESC_QOS_MAXAGE_DEFAULT 5 /* Seconds */ -#define LOV_DESC_STRIPE_SIZE_DEFAULT (1 << LNET_MTU_BITS) - -/* LOV settings descriptor (should only contain static info) */ -struct lov_desc { - __u32 ld_tgt_count; /* how many OBD's */ - __u32 ld_active_tgt_count; /* how many active */ - __u32 ld_default_stripe_count; /* how many objects are used */ - __u32 ld_pattern; /* default PATTERN_RAID0 */ - __u64 ld_default_stripe_size; /* in bytes */ - __u64 ld_default_stripe_offset; /* in bytes */ - __u32 ld_padding_0; /* unused */ - __u32 ld_qos_maxage; /* in second */ - __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */ - __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */ - struct obd_uuid ld_uuid; -}; - -#define ld_magic ld_active_tgt_count /* for swabbing from llogs */ - -/* - * LDLM requests: - */ -/* opcodes -- MUST be distinct from OST/MDS opcodes */ -enum ldlm_cmd { - LDLM_ENQUEUE = 101, - LDLM_CONVERT = 102, - LDLM_CANCEL = 103, - LDLM_BL_CALLBACK = 104, - LDLM_CP_CALLBACK = 105, - LDLM_GL_CALLBACK = 106, - LDLM_SET_INFO = 107, - LDLM_LAST_OPC -}; -#define LDLM_FIRST_OPC LDLM_ENQUEUE - -#define RES_NAME_SIZE 4 -struct ldlm_res_id { - __u64 name[RES_NAME_SIZE]; -}; - -#define DLDLMRES "[%#llx:%#llx:%#llx].%llx" -#define PLDLMRES(res) (res)->lr_name.name[0], (res)->lr_name.name[1], \ - (res)->lr_name.name[2], (res)->lr_name.name[3] - -/* lock types */ -enum ldlm_mode { - LCK_MINMODE = 0, - LCK_EX = 1, - LCK_PW = 2, - LCK_PR = 4, - LCK_CW = 8, - LCK_CR = 16, - LCK_NL = 32, - LCK_GROUP = 64, - LCK_COS = 128, - LCK_MAXMODE -}; - -#define LCK_MODE_NUM 8 - -enum ldlm_type { - LDLM_PLAIN = 10, - LDLM_EXTENT = 11, - LDLM_FLOCK = 12, - LDLM_IBITS = 13, - LDLM_MAX_TYPE -}; - -#define LDLM_MIN_TYPE LDLM_PLAIN - -struct ldlm_extent { - __u64 start; - __u64 end; - __u64 gid; -}; - -struct ldlm_inodebits { - __u64 bits; -}; - -struct ldlm_flock_wire { - __u64 lfw_start; - __u64 lfw_end; - __u64 lfw_owner; - __u32 lfw_padding; - __u32 lfw_pid; -}; - -/* it's important that the fields of the ldlm_extent structure match - * the first fields of the ldlm_flock structure because there is only - * one ldlm_swab routine to process the ldlm_policy_data_t union. if - * this ever changes we will need to swab the union differently based - * on the resource type. - */ - -union ldlm_wire_policy_data { - struct ldlm_extent l_extent; - struct ldlm_flock_wire l_flock; - struct ldlm_inodebits l_inodebits; -}; - -union ldlm_gl_desc { - struct ldlm_gl_lquota_desc lquota_desc; -}; - -enum ldlm_intent_flags { - IT_OPEN = 0x00000001, - IT_CREAT = 0x00000002, - IT_OPEN_CREAT = 0x00000003, - IT_READDIR = 0x00000004, - IT_GETATTR = 0x00000008, - IT_LOOKUP = 0x00000010, - IT_UNLINK = 0x00000020, - IT_TRUNC = 0x00000040, - IT_GETXATTR = 0x00000080, - IT_EXEC = 0x00000100, - IT_PIN = 0x00000200, - IT_LAYOUT = 0x00000400, - IT_QUOTA_DQACQ = 0x00000800, - IT_QUOTA_CONN = 0x00001000, - IT_SETXATTR = 0x00002000, -}; - -struct ldlm_intent { - __u64 opc; -}; - -struct ldlm_resource_desc { - enum ldlm_type lr_type; - __u32 lr_padding; /* also fix lustre_swab_ldlm_resource_desc */ - struct ldlm_res_id lr_name; -}; - -struct ldlm_lock_desc { - struct ldlm_resource_desc l_resource; - enum ldlm_mode l_req_mode; - enum ldlm_mode l_granted_mode; - union ldlm_wire_policy_data l_policy_data; -}; - -#define LDLM_LOCKREQ_HANDLES 2 -#define LDLM_ENQUEUE_CANCEL_OFF 1 - -struct ldlm_request { - __u32 lock_flags; - __u32 lock_count; - struct ldlm_lock_desc lock_desc; - struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; -}; - -struct ldlm_reply { - __u32 lock_flags; - __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */ - struct ldlm_lock_desc lock_desc; - struct lustre_handle lock_handle; - __u64 lock_policy_res1; - __u64 lock_policy_res2; -}; - -#define ldlm_flags_to_wire(flags) ((__u32)(flags)) -#define ldlm_flags_from_wire(flags) ((__u64)(flags)) - -/* - * Opcodes for mountconf (mgs and mgc) - */ -enum mgs_cmd { - MGS_CONNECT = 250, - MGS_DISCONNECT, - MGS_EXCEPTION, /* node died, etc. */ - MGS_TARGET_REG, /* whenever target starts up */ - MGS_TARGET_DEL, - MGS_SET_INFO, - MGS_CONFIG_READ, - MGS_LAST_OPC -}; -#define MGS_FIRST_OPC MGS_CONNECT - -#define MGS_PARAM_MAXLEN 1024 -#define KEY_SET_INFO "set_info" - -struct mgs_send_param { - char mgs_param[MGS_PARAM_MAXLEN]; -}; - -/* We pass this info to the MGS so it can write config logs */ -#define MTI_NAME_MAXLEN 64 -#define MTI_PARAM_MAXLEN 4096 -#define MTI_NIDS_MAX 32 -struct mgs_target_info { - __u32 mti_lustre_ver; - __u32 mti_stripe_index; - __u32 mti_config_ver; - __u32 mti_flags; - __u32 mti_nid_count; - __u32 mti_instance; /* Running instance of target */ - char mti_fsname[MTI_NAME_MAXLEN]; - char mti_svname[MTI_NAME_MAXLEN]; - char mti_uuid[sizeof(struct obd_uuid)]; - __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/ - char mti_params[MTI_PARAM_MAXLEN]; -}; - -struct mgs_nidtbl_entry { - __u64 mne_version; /* table version of this entry */ - __u32 mne_instance; /* target instance # */ - __u32 mne_index; /* target index */ - __u32 mne_length; /* length of this entry - by bytes */ - __u8 mne_type; /* target type LDD_F_SV_TYPE_OST/MDT */ - __u8 mne_nid_type; /* type of nid(mbz). for ipv6. */ - __u8 mne_nid_size; /* size of each NID, by bytes */ - __u8 mne_nid_count; /* # of NIDs in buffer */ - union { - lnet_nid_t nids[0]; /* variable size buffer for NIDs. */ - } u; -}; - -struct mgs_config_body { - char mcb_name[MTI_NAME_MAXLEN]; /* logname */ - __u64 mcb_offset; /* next index of config log to request */ - __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */ - __u8 mcb_reserved; - __u8 mcb_bits; /* bits unit size of config log */ - __u32 mcb_units; /* # of units for bulk transfer */ -}; - -struct mgs_config_res { - __u64 mcr_offset; /* index of last config log */ - __u64 mcr_size; /* size of the log */ -}; - -/* Config marker flags (in config log) */ -#define CM_START 0x01 -#define CM_END 0x02 -#define CM_SKIP 0x04 -#define CM_UPGRADE146 0x08 -#define CM_EXCLUDE 0x10 -#define CM_START_SKIP (CM_START | CM_SKIP) - -struct cfg_marker { - __u32 cm_step; /* aka config version */ - __u32 cm_flags; - __u32 cm_vers; /* lustre release version number */ - __u32 cm_padding; /* 64 bit align */ - __s64 cm_createtime; /*when this record was first created */ - __s64 cm_canceltime; /*when this record is no longer valid*/ - char cm_tgtname[MTI_NAME_MAXLEN]; - char cm_comment[MTI_NAME_MAXLEN]; -}; - -/* - * Opcodes for multiple servers. - */ - -enum obd_cmd { - OBD_PING = 400, - OBD_LOG_CANCEL, - OBD_QC_CALLBACK, /* not used since 2.4 */ - OBD_IDX_READ, - OBD_LAST_OPC -}; -#define OBD_FIRST_OPC OBD_PING - -/** - * llog contexts indices. - * - * There is compatibility problem with indexes below, they are not - * continuous and must keep their numbers for compatibility needs. - * See LU-5218 for details. - */ -enum llog_ctxt_id { - LLOG_CONFIG_ORIG_CTXT = 0, - LLOG_CONFIG_REPL_CTXT = 1, - LLOG_MDS_OST_ORIG_CTXT = 2, - LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */ - LLOG_SIZE_ORIG_CTXT = 4, - LLOG_SIZE_REPL_CTXT = 5, - LLOG_TEST_ORIG_CTXT = 8, - LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */ - LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */ - LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */ - /* for multiple changelog consumers */ - LLOG_CHANGELOG_USER_ORIG_CTXT = 14, - LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */ - LLOG_MAX_CTXTS -}; - -/** Identifier for a single log object */ -struct llog_logid { - struct ost_id lgl_oi; - __u32 lgl_ogen; -} __packed; - -/** Records written to the CATALOGS list */ -#define CATLIST "CATALOGS" -struct llog_catid { - struct llog_logid lci_logid; - __u32 lci_padding1; - __u32 lci_padding2; - __u32 lci_padding3; -} __packed; - -/* Log data record types - there is no specific reason that these need to - * be related to the RPC opcodes, but no reason not to (may be handy later?) - */ -#define LLOG_OP_MAGIC 0x10600000 -#define LLOG_OP_MASK 0xfff00000 - -enum llog_op_type { - LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, - OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, - /* OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, never used */ - MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | - REINT_UNLINK, /* obsolete after 2.5.0 */ - MDS_UNLINK64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | - REINT_UNLINK, - /* MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x12401, obsolete 1.8.0 */ - MDS_SETATTR64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | - REINT_SETATTR, - OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, - /* PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, obsolete 1.4.0 */ - LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, - /* LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, obsolete 1.8.0 */ - CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000, - CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000, - HSM_AGENT_REC = LLOG_OP_MAGIC | 0x80000, - LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, - LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, -}; - -#define LLOG_REC_HDR_NEEDS_SWABBING(r) \ - (((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC)) - -/** Log record header - stored in little endian order. - * Each record must start with this struct, end with a llog_rec_tail, - * and be a multiple of 256 bits in size. - */ -struct llog_rec_hdr { - __u32 lrh_len; - __u32 lrh_index; - __u32 lrh_type; - __u32 lrh_id; -}; - -struct llog_rec_tail { - __u32 lrt_len; - __u32 lrt_index; -}; - -/* Where data follow just after header */ -#define REC_DATA(ptr) \ - ((void *)((char *)ptr + sizeof(struct llog_rec_hdr))) - -#define REC_DATA_LEN(rec) \ - (rec->lrh_len - sizeof(struct llog_rec_hdr) - \ - sizeof(struct llog_rec_tail)) - -struct llog_logid_rec { - struct llog_rec_hdr lid_hdr; - struct llog_logid lid_id; - __u32 lid_padding1; - __u64 lid_padding2; - __u64 lid_padding3; - struct llog_rec_tail lid_tail; -} __packed; - -struct llog_unlink_rec { - struct llog_rec_hdr lur_hdr; - __u64 lur_oid; - __u32 lur_oseq; - __u32 lur_count; - struct llog_rec_tail lur_tail; -} __packed; - -struct llog_unlink64_rec { - struct llog_rec_hdr lur_hdr; - struct lu_fid lur_fid; - __u32 lur_count; /* to destroy the lost precreated */ - __u32 lur_padding1; - __u64 lur_padding2; - __u64 lur_padding3; - struct llog_rec_tail lur_tail; -} __packed; - -struct llog_setattr64_rec { - struct llog_rec_hdr lsr_hdr; - struct ost_id lsr_oi; - __u32 lsr_uid; - __u32 lsr_uid_h; - __u32 lsr_gid; - __u32 lsr_gid_h; - __u64 lsr_valid; - struct llog_rec_tail lsr_tail; -} __packed; - -struct llog_size_change_rec { - struct llog_rec_hdr lsc_hdr; - struct ll_fid lsc_fid; - __u32 lsc_ioepoch; - __u32 lsc_padding1; - __u64 lsc_padding2; - __u64 lsc_padding3; - struct llog_rec_tail lsc_tail; -} __packed; - -/* changelog llog name, needed by client replicators */ -#define CHANGELOG_CATALOG "changelog_catalog" - -struct changelog_setinfo { - __u64 cs_recno; - __u32 cs_id; -} __packed; - -/** changelog record */ -struct llog_changelog_rec { - struct llog_rec_hdr cr_hdr; - struct changelog_rec cr; /**< Variable length field */ - struct llog_rec_tail cr_do_not_use; /**< for_sizezof_only */ -} __packed; - -struct llog_changelog_user_rec { - struct llog_rec_hdr cur_hdr; - __u32 cur_id; - __u32 cur_padding; - __u64 cur_endrec; - struct llog_rec_tail cur_tail; -} __packed; - -enum agent_req_status { - ARS_WAITING, - ARS_STARTED, - ARS_FAILED, - ARS_CANCELED, - ARS_SUCCEED, -}; - -static inline const char *agent_req_status2name(const enum agent_req_status ars) -{ - switch (ars) { - case ARS_WAITING: - return "WAITING"; - case ARS_STARTED: - return "STARTED"; - case ARS_FAILED: - return "FAILED"; - case ARS_CANCELED: - return "CANCELED"; - case ARS_SUCCEED: - return "SUCCEED"; - default: - return "UNKNOWN"; - } -} - -struct llog_agent_req_rec { - struct llog_rec_hdr arr_hdr; /**< record header */ - __u32 arr_status; /**< status of the request */ - /* must match enum - * agent_req_status - */ - __u32 arr_archive_id; /**< backend archive number */ - __u64 arr_flags; /**< req flags */ - __u64 arr_compound_id;/**< compound cookie */ - __u64 arr_req_create; /**< req. creation time */ - __u64 arr_req_change; /**< req. status change time */ - struct hsm_action_item arr_hai; /**< req. to the agent */ - struct llog_rec_tail arr_tail; /**< record tail for_sizezof_only */ -} __packed; - -/* Old llog gen for compatibility */ -struct llog_gen { - __u64 mnt_cnt; - __u64 conn_cnt; -} __packed; - -struct llog_gen_rec { - struct llog_rec_hdr lgr_hdr; - struct llog_gen lgr_gen; - __u64 padding1; - __u64 padding2; - __u64 padding3; - struct llog_rec_tail lgr_tail; -}; - -/* flags for the logs */ -enum llog_flag { - LLOG_F_ZAP_WHEN_EMPTY = 0x1, - LLOG_F_IS_CAT = 0x2, - LLOG_F_IS_PLAIN = 0x4, - LLOG_F_EXT_JOBID = 0x8, - LLOG_F_IS_FIXSIZE = 0x10, - - /* - * Note: Flags covered by LLOG_F_EXT_MASK will be inherited from - * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, - * because the catlog record is usually fixed size, but its plain - * log record can be variable - */ - LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID, -}; - -/* On-disk header structure of each log object, stored in little endian order */ -#define LLOG_MIN_CHUNK_SIZE 8192 -#define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) + - * sizeof(llh_tail) - sizeof(llh_bitmap) - */ -#define LLOG_BITMAP_BYTES (LLOG_MIN_CHUNK_SIZE - LLOG_HEADER_SIZE) -#define LLOG_MIN_REC_SIZE (24) /* round(llog_rec_hdr + llog_rec_tail) */ - -/* flags for the logs */ -struct llog_log_hdr { - struct llog_rec_hdr llh_hdr; - __s64 llh_timestamp; - __u32 llh_count; - __u32 llh_bitmap_offset; - __u32 llh_size; - __u32 llh_flags; - __u32 llh_cat_idx; - /* for a catalog the first plain slot is next to it */ - struct obd_uuid llh_tgtuuid; - __u32 llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23]; - /* These fields must always be at the end of the llog_log_hdr. - * Note: llh_bitmap size is variable because llog chunk size could be - * bigger than LLOG_MIN_CHUNK_SIZE, i.e. sizeof(llog_log_hdr) > 8192 - * bytes, and the real size is stored in llh_hdr.lrh_len, which means - * llh_tail should only be referred by LLOG_HDR_TAIL(). - * But this structure is also used by client/server llog interface - * (see llog_client.c), it will be kept in its original way to avoid - * compatibility issue. - */ - __u32 llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)]; - struct llog_rec_tail llh_tail; -} __packed; - -#undef LLOG_HEADER_SIZE -#undef LLOG_BITMAP_BYTES - -#define LLOG_HDR_BITMAP_SIZE(llh) (__u32)((llh->llh_hdr.lrh_len - \ - llh->llh_bitmap_offset - \ - sizeof(llh->llh_tail)) * 8) -#define LLOG_HDR_BITMAP(llh) (__u32 *)((char *)(llh) + \ - (llh)->llh_bitmap_offset) -#define LLOG_HDR_TAIL(llh) ((struct llog_rec_tail *)((char *)llh + \ - llh->llh_hdr.lrh_len - \ - sizeof(llh->llh_tail))) - -/** log cookies are used to reference a specific log file and a record - * therein - */ -struct llog_cookie { - struct llog_logid lgc_lgl; - __u32 lgc_subsys; - __u32 lgc_index; - __u32 lgc_padding; -} __packed; - -/** llog protocol */ -enum llogd_rpc_ops { - LLOG_ORIGIN_HANDLE_CREATE = 501, - LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, - LLOG_ORIGIN_HANDLE_READ_HEADER = 503, - LLOG_ORIGIN_HANDLE_WRITE_REC = 504, - LLOG_ORIGIN_HANDLE_CLOSE = 505, - LLOG_ORIGIN_CONNECT = 506, - LLOG_CATINFO = 507, /* deprecated */ - LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, - LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ - LLOG_LAST_OPC, - LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE -}; - -struct llogd_body { - struct llog_logid lgd_logid; - __u32 lgd_ctxt_idx; - __u32 lgd_llh_flags; - __u32 lgd_index; - __u32 lgd_saved_index; - __u32 lgd_len; - __u64 lgd_cur_offset; -} __packed; - -struct llogd_conn_body { - struct llog_gen lgdc_gen; - struct llog_logid lgdc_logid; - __u32 lgdc_ctxt_idx; -} __packed; - -/* Note: 64-bit types are 64-bit aligned in structure */ -struct obdo { - __u64 o_valid; /* hot fields in this obdo */ - struct ost_id o_oi; - __u64 o_parent_seq; - __u64 o_size; /* o_size-o_blocks == ost_lvb */ - __s64 o_mtime; - __s64 o_atime; - __s64 o_ctime; - __u64 o_blocks; /* brw: cli sent cached bytes */ - __u64 o_grant; - - /* 32-bit fields start here: keep an even number of them via padding */ - __u32 o_blksize; /* optimal IO blocksize */ - __u32 o_mode; /* brw: cli sent cache remain */ - __u32 o_uid; - __u32 o_gid; - __u32 o_flags; - __u32 o_nlink; /* brw: checksum */ - __u32 o_parent_oid; - __u32 o_misc; /* brw: o_dropped */ - - __u64 o_ioepoch; /* epoch in ost writes */ - __u32 o_stripe_idx; /* holds stripe idx */ - __u32 o_parent_ver; - struct lustre_handle o_handle; /* brw: lock handle to prolong locks - */ - struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS, - * obsolete in 2.8, reused in OSP - */ - __u32 o_uid_h; - __u32 o_gid_h; - - __u64 o_data_version; /* getattr: sum of iversion for - * each stripe. - * brw: grant space consumed on - * the client for the write - */ - __u64 o_padding_4; - __u64 o_padding_5; - __u64 o_padding_6; -}; - -#define o_dirty o_blocks -#define o_undirty o_mode -#define o_dropped o_misc -#define o_cksum o_nlink -#define o_grant_used o_data_version - -/* request structure for OST's */ -struct ost_body { - struct obdo oa; -}; - -/* Key for FIEMAP to be used in get_info calls */ -struct ll_fiemap_info_key { - char lfik_name[8]; - struct obdo lfik_oa; - struct fiemap lfik_fiemap; -}; - -/* security opcodes */ -enum sec_cmd { - SEC_CTX_INIT = 801, - SEC_CTX_INIT_CONT = 802, - SEC_CTX_FINI = 803, - SEC_LAST_OPC, - SEC_FIRST_OPC = SEC_CTX_INIT -}; - -/* - * capa related definitions - */ -#define CAPA_HMAC_MAX_LEN 64 -#define CAPA_HMAC_KEY_MAX_LEN 56 - -/* NB take care when changing the sequence of elements this struct, - * because the offset info is used in find_capa() - */ -struct lustre_capa { - struct lu_fid lc_fid; /** fid */ - __u64 lc_opc; /** operations allowed */ - __u64 lc_uid; /** file owner */ - __u64 lc_gid; /** file group */ - __u32 lc_flags; /** HMAC algorithm & flags */ - __u32 lc_keyid; /** key# used for the capability */ - __u32 lc_timeout; /** capa timeout value (sec) */ -/* FIXME: y2038 time_t overflow: */ - __u32 lc_expiry; /** expiry time (sec) */ - __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */ -} __packed; - -/** lustre_capa::lc_opc */ -enum { - CAPA_OPC_BODY_WRITE = 1 << 0, /**< write object data */ - CAPA_OPC_BODY_READ = 1 << 1, /**< read object data */ - CAPA_OPC_INDEX_LOOKUP = 1 << 2, /**< lookup object fid */ - CAPA_OPC_INDEX_INSERT = 1 << 3, /**< insert object fid */ - CAPA_OPC_INDEX_DELETE = 1 << 4, /**< delete object fid */ - CAPA_OPC_OSS_WRITE = 1 << 5, /**< write oss object data */ - CAPA_OPC_OSS_READ = 1 << 6, /**< read oss object data */ - CAPA_OPC_OSS_TRUNC = 1 << 7, /**< truncate oss object */ - CAPA_OPC_OSS_DESTROY = 1 << 8, /**< destroy oss object */ - CAPA_OPC_META_WRITE = 1 << 9, /**< write object meta data */ - CAPA_OPC_META_READ = 1 << 10, /**< read object meta data */ -}; - -#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE) -#define CAPA_OPC_MDS_ONLY \ - (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \ - CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) -#define CAPA_OPC_OSS_ONLY \ - (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \ - CAPA_OPC_OSS_DESTROY) -#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY -#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) - -struct lustre_capa_key { - __u64 lk_seq; /**< mds# */ - __u32 lk_keyid; /**< key# */ - __u32 lk_padding; - __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /**< key */ -} __packed; - -/** The link ea holds 1 \a link_ea_entry for each hardlink */ -#define LINK_EA_MAGIC 0x11EAF1DFUL -struct link_ea_header { - __u32 leh_magic; - __u32 leh_reccount; - __u64 leh_len; /* total size */ - __u32 leh_overflow_time; - __u32 leh_padding; -}; - -/** Hardlink data is name and parent fid. - * Stored in this crazy struct for maximum packing and endian-neutrality - */ -struct link_ea_entry { - /** __u16 stored big-endian, unaligned */ - unsigned char lee_reclen[2]; - unsigned char lee_parent_fid[sizeof(struct lu_fid)]; - char lee_name[0]; -} __packed; - -/** fid2path request/reply structure */ -struct getinfo_fid2path { - struct lu_fid gf_fid; - __u64 gf_recno; - __u32 gf_linkno; - __u32 gf_pathlen; - char gf_path[0]; -} __packed; - -/** path2parent request/reply structures */ -struct getparent { - struct lu_fid gp_fid; /**< parent FID */ - __u32 gp_linkno; /**< hardlink number */ - __u32 gp_name_size; /**< size of the name field */ - char gp_name[0]; /**< zero-terminated link name */ -} __packed; - -enum { - LAYOUT_INTENT_ACCESS = 0, - LAYOUT_INTENT_READ = 1, - LAYOUT_INTENT_WRITE = 2, - LAYOUT_INTENT_GLIMPSE = 3, - LAYOUT_INTENT_TRUNC = 4, - LAYOUT_INTENT_RELEASE = 5, - LAYOUT_INTENT_RESTORE = 6 -}; - -/* enqueue layout lock with intent */ -struct layout_intent { - __u32 li_opc; /* intent operation for enqueue, read, write etc */ - __u32 li_flags; - __u64 li_start; - __u64 li_end; -}; - -/** - * On the wire version of hsm_progress structure. - * - * Contains the userspace hsm_progress and some internal fields. - */ -struct hsm_progress_kernel { - /* Field taken from struct hsm_progress */ - struct lu_fid hpk_fid; - __u64 hpk_cookie; - struct hsm_extent hpk_extent; - __u16 hpk_flags; - __u16 hpk_errval; /* positive val */ - __u32 hpk_padding1; - /* Additional fields */ - __u64 hpk_data_version; - __u64 hpk_padding2; -} __packed; - -/** layout swap request structure - * fid1 and fid2 are in mdt_body - */ -struct mdc_swap_layouts { - __u64 msl_flags; -} __packed; - -struct close_data { - struct lustre_handle cd_handle; - struct lu_fid cd_fid; - __u64 cd_data_version; - __u64 cd_reserved[8]; -}; - -#endif -/** @} lustreidl */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h deleted file mode 100644 index 6e4e109fb874..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ioctl.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -#ifndef _UAPI_LUSTRE_IOCTL_H_ -#define _UAPI_LUSTRE_IOCTL_H_ - -#include -#include -#include -#include - -#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS) -# error This file is for Lustre internal use only. -#endif - -enum md_echo_cmd { - ECHO_MD_CREATE = 1, /* Open/Create file on MDT */ - ECHO_MD_MKDIR = 2, /* Mkdir on MDT */ - ECHO_MD_DESTROY = 3, /* Unlink file on MDT */ - ECHO_MD_RMDIR = 4, /* Rmdir on MDT */ - ECHO_MD_LOOKUP = 5, /* Lookup on MDT */ - ECHO_MD_GETATTR = 6, /* Getattr on MDT */ - ECHO_MD_SETATTR = 7, /* Setattr on MDT */ - ECHO_MD_ALLOC_FID = 8, /* Get FIDs from MDT */ -}; - -#define OBD_DEV_ID 1 -#define OBD_DEV_NAME "obd" -#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME - -#define OBD_IOCTL_VERSION 0x00010004 -#define OBD_DEV_BY_DEVNAME 0xffffd0de - -struct obd_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - - union { - __u64 ioc_cookie; - __u64 ioc_u64_1; - }; - union { - __u32 ioc_conn1; - __u32 ioc_u32_1; - }; - union { - __u32 ioc_conn2; - __u32 ioc_u32_2; - }; - - struct obdo ioc_obdo1; - struct obdo ioc_obdo2; - - __u64 ioc_count; - __u64 ioc_offset; - __u32 ioc_dev; - __u32 ioc_command; - - __u64 ioc_nid; - __u32 ioc_nal; - __u32 ioc_type; - - /* buffers the kernel will treat as user pointers */ - __u32 ioc_plen1; - char __user *ioc_pbuf1; - __u32 ioc_plen2; - char __user *ioc_pbuf2; - - /* inline buffers for various arguments */ - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - __u32 ioc_inllen3; - char *ioc_inlbuf3; - __u32 ioc_inllen4; - char *ioc_inlbuf4; - - char ioc_bulk[0]; -}; - -struct obd_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data) -{ - __u32 len = __ALIGN_KERNEL(sizeof(*data), 8); - - len += __ALIGN_KERNEL(data->ioc_inllen1, 8); - len += __ALIGN_KERNEL(data->ioc_inllen2, 8); - len += __ALIGN_KERNEL(data->ioc_inllen3, 8); - len += __ALIGN_KERNEL(data->ioc_inllen4, 8); - - return len; -} - -/* - * OBD_IOC_DATA_TYPE is only for compatibility reasons with older - * Linux Lustre user tools. New ioctls should NOT use this macro as - * the ioctl "size". Instead the ioctl should get a "size" argument - * which is the actual data type used by the ioctl, to ensure the - * ioctl interface is versioned correctly. - */ -#define OBD_IOC_DATA_TYPE long - -/* IOC_LDLM_TEST _IOWR('f', 40, long) */ -/* IOC_LDLM_DUMP _IOWR('f', 41, long) */ -/* IOC_LDLM_REGRESS_START _IOWR('f', 42, long) */ -/* IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) */ - -#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW('f', 104, OBD_IOC_DATA_TYPE) -/* OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) */ - -#define OBD_IOC_SETATTR _IOW('f', 107, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETATTR _IOWR('f', 108, OBD_IOC_DATA_TYPE) -#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) -#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW('f', 114, OBD_IOC_DATA_TYPE) -/* OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) */ - -/* OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) */ -#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE) -#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE) -#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE) -#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETMDNAME _IOR('f', 131, char[MAX_OBD_NAME]) -#define OBD_IOC_GETDTNAME OBD_IOC_GETNAME -#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW('f', 133, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PING_TARGET _IOW('f', 136, OBD_IOC_DATA_TYPE) - -/* OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */ -#define OBD_IOC_NO_TRANSNO _IOW('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR('f', 142, OBD_IOC_DATA_TYPE) -/* OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) */ -#define OBD_GET_VERSION _IOWR('f', 144, OBD_IOC_DATA_TYPE) -/* OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_CLOSE_UUID _IOWR('f', 147, OBD_IOC_DATA_TYPE) */ -#define OBD_IOC_CHANGELOG_SEND _IOW('f', 148, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETDEVICE _IOWR('f', 149, OBD_IOC_DATA_TYPE) -#define OBD_IOC_FID2PATH _IOWR('f', 150, OBD_IOC_DATA_TYPE) -/* lustre/lustre_user.h 151-153 */ -/* OBD_IOC_LOV_SETSTRIPE 154 LL_IOC_LOV_SETSTRIPE */ -/* OBD_IOC_LOV_GETSTRIPE 155 LL_IOC_LOV_GETSTRIPE */ -/* OBD_IOC_LOV_SETEA 156 LL_IOC_LOV_SETEA */ -/* lustre/lustre_user.h 157-159 */ -/* OBD_IOC_QUOTACHECK _IOW('f', 160, int) */ -/* OBD_IOC_POLL_QUOTACHECK _IOR('f', 161, struct if_quotacheck *) */ -#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl) -/* lustre/lustre_user.h 163-176 */ -#define OBD_IOC_CHANGELOG_REG _IOW('f', 177, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_DEREG _IOW('f', 178, struct obd_ioctl_data) -#define OBD_IOC_CHANGELOG_CLEAR _IOW('f', 179, struct obd_ioctl_data) -/* OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) */ -#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) -/* OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) */ -/* OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) */ -#define OBD_IOC_PARAM _IOW('f', 187, OBD_IOC_DATA_TYPE) -#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) -#define OBD_IOC_REPLACE_NIDS _IOWR('f', 189, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE) -/* OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) */ -#define OBD_IOC_NODEMAP _IOWR('f', 197, OBD_IOC_DATA_TYPE) - -/* ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) */ -/* ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) */ -/* ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) */ -/* ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) */ - -#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE) - -/* lustre/lustre_user.h 212-217 */ -#define OBD_IOC_GET_MNTOPT _IOW('f', 220, mntopt_t) -#define OBD_IOC_ECHO_MD _IOR('f', 221, struct obd_ioctl_data) -#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data) -#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE) -#define OBD_IOC_STOP_LFSCK _IOW('f', 231, OBD_IOC_DATA_TYPE) -#define OBD_IOC_QUERY_LFSCK _IOR('f', 232, struct obd_ioctl_data) -/* lustre/lustre_user.h 240-249 */ -/* LIBCFS_IOC_DEBUG_MASK 250 */ - -#define IOC_OSC_SET_ACTIVE _IOWR('h', 21, void *) - -#endif /* _UAPI_LUSTRE_IOCTL_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h deleted file mode 100644 index 94dadbe8e069..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_kernelcomm.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * - * Author: Nathan Rutman - * - * Kernel <-> userspace communication routines. - * The definitions below are used in the kernel and userspace. - */ - -#ifndef __UAPI_LUSTRE_KERNELCOMM_H__ -#define __UAPI_LUSTRE_KERNELCOMM_H__ - -#include - -/* KUC message header. - * All current and future KUC messages should use this header. - * To avoid having to include Lustre headers from libcfs, define this here. - */ -struct kuc_hdr { - __u16 kuc_magic; - /* Each new Lustre feature should use a different transport */ - __u8 kuc_transport; - __u8 kuc_flags; - /* Message type or opcode, transport-specific */ - __u16 kuc_msgtype; - /* Including header */ - __u16 kuc_msglen; -} __aligned(sizeof(__u64)); - -#define KUC_CHANGELOG_MSG_MAXSIZE (sizeof(struct kuc_hdr) + CR_MAXSIZE) - -#define KUC_MAGIC 0x191C /*Lustre9etLinC */ - -/* kuc_msgtype values are defined in each transport */ -enum kuc_transport_type { - KUC_TRANSPORT_GENERIC = 1, - KUC_TRANSPORT_HSM = 2, - KUC_TRANSPORT_CHANGELOG = 3, -}; - -enum kuc_generic_message_type { - KUC_MSG_SHUTDOWN = 1, -}; - -/* KUC Broadcast Groups. This determines which userspace process hears which - * messages. Mutliple transports may be used within a group, or multiple - * groups may use the same transport. Broadcast - * groups need not be used if e.g. a UID is specified instead; - * use group 0 to signify unicast. - */ -#define KUC_GRP_HSM 0x02 -#define KUC_GRP_MAX KUC_GRP_HSM - -#define LK_FLG_STOP 0x01 -#define LK_NOFD -1U - -/* kernelcomm control structure, passed from userspace to kernel */ -struct lustre_kernelcomm { - __u32 lk_wfd; - __u32 lk_rfd; - __u32 lk_uid; - __u32 lk_group; - __u32 lk_data; - __u32 lk_flags; -} __packed; - -#endif /* __UAPI_LUSTRE_KERNELCOMM_H__ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h deleted file mode 100644 index 3343b602219b..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ostid.h +++ /dev/null @@ -1,236 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * Define ost_id associated functions - */ - -#ifndef _UAPI_LUSTRE_OSTID_H_ -#define _UAPI_LUSTRE_OSTID_H_ - -#include -#include - -static inline __u64 lmm_oi_id(const struct ost_id *oi) -{ - return oi->oi.oi_id; -} - -static inline __u64 lmm_oi_seq(const struct ost_id *oi) -{ - return oi->oi.oi_seq; -} - -static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq) -{ - oi->oi.oi_seq = seq; -} - -static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid) -{ - oi->oi.oi_id = oid; -} - -static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi, - const struct ost_id *src_oi) -{ - dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id); - dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq); -} - -static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, - const struct ost_id *src_oi) -{ - dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id); - dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq); -} - -/* extract OST sequence (group) from a wire ost_id (id/seq) pair */ -static inline __u64 ostid_seq(const struct ost_id *ostid) -{ - if (fid_seq_is_mdt0(ostid->oi.oi_seq)) - return FID_SEQ_OST_MDT0; - - if (fid_seq_is_default(ostid->oi.oi_seq)) - return FID_SEQ_LOV_DEFAULT; - - if (fid_is_idif(&ostid->oi_fid)) - return FID_SEQ_OST_MDT0; - - return fid_seq(&ostid->oi_fid); -} - -/* extract OST objid from a wire ost_id (id/seq) pair */ -static inline __u64 ostid_id(const struct ost_id *ostid) -{ - if (fid_seq_is_mdt0(ostid->oi.oi_seq)) - return ostid->oi.oi_id & IDIF_OID_MASK; - - if (fid_seq_is_default(ostid->oi.oi_seq)) - return ostid->oi.oi_id; - - if (fid_is_idif(&ostid->oi_fid)) - return fid_idif_id(fid_seq(&ostid->oi_fid), - fid_oid(&ostid->oi_fid), 0); - - return fid_oid(&ostid->oi_fid); -} - -static inline void ostid_set_seq(struct ost_id *oi, __u64 seq) -{ - if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) { - oi->oi.oi_seq = seq; - } else { - oi->oi_fid.f_seq = seq; - /* - * Note: if f_oid + f_ver is zero, we need init it - * to be 1, otherwise, ostid_seq will treat this - * as old ostid (oi_seq == 0) - */ - if (!oi->oi_fid.f_oid && !oi->oi_fid.f_ver) - oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID; - } -} - -static inline void ostid_set_seq_mdt0(struct ost_id *oi) -{ - ostid_set_seq(oi, FID_SEQ_OST_MDT0); -} - -static inline void ostid_set_seq_echo(struct ost_id *oi) -{ - ostid_set_seq(oi, FID_SEQ_ECHO); -} - -static inline void ostid_set_seq_llog(struct ost_id *oi) -{ - ostid_set_seq(oi, FID_SEQ_LLOG); -} - -static inline void ostid_cpu_to_le(const struct ost_id *src_oi, - struct ost_id *dst_oi) -{ - if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) { - dst_oi->oi.oi_id = __cpu_to_le64(src_oi->oi.oi_id); - dst_oi->oi.oi_seq = __cpu_to_le64(src_oi->oi.oi_seq); - } else { - fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid); - } -} - -static inline void ostid_le_to_cpu(const struct ost_id *src_oi, - struct ost_id *dst_oi) -{ - if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) { - dst_oi->oi.oi_id = __le64_to_cpu(src_oi->oi.oi_id); - dst_oi->oi.oi_seq = __le64_to_cpu(src_oi->oi.oi_seq); - } else { - fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid); - } -} - -/** - * Sigh, because pre-2.4 uses - * struct lov_mds_md_v1 { - * ........ - * __u64 lmm_object_id; - * __u64 lmm_object_seq; - * ...... - * } - * to identify the LOV(MDT) object, and lmm_object_seq will - * be normal_fid, which make it hard to combine these conversion - * to ostid_to FID. so we will do lmm_oi/fid conversion separately - * - * We can tell the lmm_oi by this way, - * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0 - * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL - * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k}, - * lmm_oi.f_ver = 0 - * - * But currently lmm_oi/lsm_oi does not have any "real" usages, - * except for printing some information, and the user can always - * get the real FID from LMA, besides this multiple case check might - * make swab more complicate. So we will keep using id/seq for lmm_oi. - */ - -static inline void fid_to_lmm_oi(const struct lu_fid *fid, - struct ost_id *oi) -{ - oi->oi.oi_id = fid_oid(fid); - oi->oi.oi_seq = fid_seq(fid); -} - -/** - * Unpack an OST object id/seq (group) into a FID. This is needed for - * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper - * FIDs. Note that if an id/seq is already in FID/IDIF format it will - * be passed through unchanged. Only legacy OST objects in "group 0" - * will be mapped into the IDIF namespace so that they can fit into the - * struct lu_fid fields without loss. - */ -static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid, - __u32 ost_idx) -{ - __u64 seq = ostid_seq(ostid); - - if (ost_idx > 0xffff) - return -EBADF; - - if (fid_seq_is_mdt0(seq)) { - __u64 oid = ostid_id(ostid); - - /* This is a "legacy" (old 1.x/2.early) OST object in "group 0" - * that we map into the IDIF namespace. It allows up to 2^48 - * objects per OST, as this is the object namespace that has - * been in production for years. This can handle create rates - * of 1M objects/s/OST for 9 years, or combinations thereof. - */ - if (oid >= IDIF_MAX_OID) - return -EBADF; - - fid->f_seq = fid_idif_seq(oid, ost_idx); - /* truncate to 32 bits by assignment */ - fid->f_oid = oid; - /* in theory, not currently used */ - fid->f_ver = oid >> 48; - } else if (!fid_seq_is_default(seq)) { - /* This is either an IDIF object, which identifies objects - * across all OSTs, or a regular FID. The IDIF namespace - * maps legacy OST objects into the FID namespace. In both - * cases, we just pass the FID through, no conversion needed. - */ - if (ostid->oi_fid.f_ver) - return -EBADF; - - *fid = ostid->oi_fid; - } - - return 0; -} -#endif /* _UAPI_LUSTRE_OSTID_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h deleted file mode 100644 index 1eab2ceca338..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_param.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * User-settable parameter keys - * - * Author: Nathan Rutman - */ - -#ifndef _UAPI_LUSTRE_PARAM_H_ -#define _UAPI_LUSTRE_PARAM_H_ - -/** \defgroup param param - * - * @{ - */ - -/****************** User-settable parameter keys *********************/ -/* e.g. - * tunefs.lustre --param="failover.node=192.168.0.13@tcp0" /dev/sda - * lctl conf_param testfs-OST0000 failover.node=3@elan,192.168.0.3@tcp0 - * ... testfs-MDT0000.lov.stripesize=4M - * ... testfs-OST0000.ost.client_cache_seconds=15 - * ... testfs.sys.timeout= - * ... testfs.llite.max_read_ahead_mb=16 - */ - -/* System global or special params not handled in obd's proc - * See mgs_write_log_sys() - */ -#define PARAM_TIMEOUT "timeout=" /* global */ -#define PARAM_LDLM_TIMEOUT "ldlm_timeout=" /* global */ -#define PARAM_AT_MIN "at_min=" /* global */ -#define PARAM_AT_MAX "at_max=" /* global */ -#define PARAM_AT_EXTRA "at_extra=" /* global */ -#define PARAM_AT_EARLY_MARGIN "at_early_margin=" /* global */ -#define PARAM_AT_HISTORY "at_history=" /* global */ -#define PARAM_JOBID_VAR "jobid_var=" /* global */ -#define PARAM_MGSNODE "mgsnode=" /* only at mounttime */ -#define PARAM_FAILNODE "failover.node=" /* add failover nid */ -#define PARAM_FAILMODE "failover.mode=" /* initial mount only */ -#define PARAM_ACTIVE "active=" /* activate/deactivate */ -#define PARAM_NETWORK "network=" /* bind on nid */ -#define PARAM_ID_UPCALL "identity_upcall=" /* identity upcall */ - -/* Prefixes for parameters handled by obd's proc methods (XXX_process_config) */ -#define PARAM_OST "ost." -#define PARAM_OSD "osd." -#define PARAM_OSC "osc." -#define PARAM_MDT "mdt." -#define PARAM_HSM "mdt.hsm." -#define PARAM_MDD "mdd." -#define PARAM_MDC "mdc." -#define PARAM_LLITE "llite." -#define PARAM_LOV "lov." -#define PARAM_LOD "lod." -#define PARAM_OSP "osp." -#define PARAM_SYS "sys." /* global */ -#define PARAM_SRPC "srpc." -#define PARAM_SRPC_FLVR "srpc.flavor." -#define PARAM_SRPC_UDESC "srpc.udesc.cli2mdt" -#define PARAM_SEC "security." -#define PARAM_QUOTA "quota." /* global */ - -/** @} param */ - -#endif /* _UAPI_LUSTRE_PARAM_H_ */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h deleted file mode 100644 index 69387f36d1f1..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_user.h +++ /dev/null @@ -1,1327 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre/lustre_user.h - * - * Lustre public user-space interface definitions. - */ - -#ifndef _LUSTRE_USER_H -#define _LUSTRE_USER_H - -/** \defgroup lustreuser lustreuser - * - * @{ - */ - -#ifdef __KERNEL__ -# include -# include -# include -# include /* snprintf() */ -# include -#else /* !__KERNEL__ */ -# define NEED_QUOTA_DEFS -# include /* snprintf() */ -# include -# include -# include -#endif /* __KERNEL__ */ -#include - -/* - * We need to always use 64bit version because the structure - * is shared across entire cluster where 32bit and 64bit machines - * are co-existing. - */ -#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) -typedef struct stat64 lstat_t; -#define lstat_f lstat64 -#define fstat_f fstat64 -#define fstatat_f fstatat64 -#else -typedef struct stat lstat_t; -#define lstat_f lstat -#define fstat_f fstat -#define fstatat_f fstatat -#endif - -#define HAVE_LOV_USER_MDS_DATA - -#define LUSTRE_EOF 0xffffffffffffffffULL - -/* for statfs() */ -#define LL_SUPER_MAGIC 0x0BD00BD0 - -#ifndef FSFILT_IOC_GETFLAGS -#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long) -#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long) -#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) -#define FSFILT_IOC_SETVERSION _IOW('f', 4, long) -#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) -#endif - -/* FIEMAP flags supported by Lustre */ -#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER) - -enum obd_statfs_state { - OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ - OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */ - OS_STATE_RDONLY_1 = 0x00000004, /**< obsolete 1.6, was EROFS=30 */ - OS_STATE_RDONLY_2 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ - OS_STATE_RDONLY_3 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ -}; - -struct obd_statfs { - __u64 os_type; - __u64 os_blocks; - __u64 os_bfree; - __u64 os_bavail; - __u64 os_files; - __u64 os_ffree; - __u8 os_fsid[40]; - __u32 os_bsize; - __u32 os_namelen; - __u64 os_maxbytes; - __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */ - __u32 os_fprecreated; /* objs available now to the caller */ - /* used in QoS code to find preferred OSTs */ - __u32 os_spare2; - __u32 os_spare3; - __u32 os_spare4; - __u32 os_spare5; - __u32 os_spare6; - __u32 os_spare7; - __u32 os_spare8; - __u32 os_spare9; -}; - -/** - * File IDentifier. - * - * FID is a cluster-wide unique identifier of a file or an object (stripe). - * FIDs are never reused. - **/ -struct lu_fid { - /** - * FID sequence. Sequence is a unit of migration: all files (objects) - * with FIDs from a given sequence are stored on the same server. - * Lustre should support 2^64 objects, so even if each sequence - * has only a single object we can still enumerate 2^64 objects. - **/ - __u64 f_seq; - /* FID number within sequence. */ - __u32 f_oid; - /** - * FID version, used to distinguish different versions (in the sense - * of snapshots, etc.) of the same file system object. Not currently - * used. - **/ - __u32 f_ver; -}; - -static inline bool fid_is_zero(const struct lu_fid *fid) -{ - return !fid->f_seq && !fid->f_oid; -} - -struct filter_fid { - struct lu_fid ff_parent; /* ff_parent.f_ver == file stripe number */ -}; - -/* keep this one for compatibility */ -struct filter_fid_old { - struct lu_fid ff_parent; - __u64 ff_objid; - __u64 ff_seq; -}; - -/* Userspace should treat lu_fid as opaque, and only use the following methods - * to print or parse them. Other functions (e.g. compare, swab) could be moved - * here from lustre_idl.h if needed. - */ -struct lu_fid; - -/** - * Following struct for object attributes, that will be kept inode's EA. - * Introduced in 2.0 release (please see b15993, for details) - * Added to all objects since Lustre 2.4 as contains self FID - */ -struct lustre_mdt_attrs { - /** - * Bitfield for supported data in this structure. From enum lma_compat. - * lma_self_fid and lma_flags are always available. - */ - __u32 lma_compat; - /** - * Per-file incompat feature list. Lustre version should support all - * flags set in this field. The supported feature mask is available in - * LMA_INCOMPAT_SUPP. - */ - __u32 lma_incompat; - /** FID of this inode */ - struct lu_fid lma_self_fid; -}; - -/** - * Prior to 2.4, the LMA structure also included SOM attributes which has since - * been moved to a dedicated xattr - * lma_flags was also removed because of lma_compat/incompat fields. - */ -#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64)) - -/** - * OST object IDentifier. - */ -struct ost_id { - union { - struct { - __u64 oi_id; - __u64 oi_seq; - } oi; - struct lu_fid oi_fid; - }; -}; - -#define DOSTID "%#llx:%llu" -#define POSTID(oi) ostid_seq(oi), ostid_id(oi) - -/* - * The ioctl naming rules: - * LL_* - works on the currently opened filehandle instead of parent dir - * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly) - * *_MDC_* - gets/sets data related to MDC - * *_LOV_* - gets/sets data related to OSC/LOV - * *FILE* - called on parent dir and passes in a filename - * *STRIPE* - set/get lov_user_md - * *INFO - set/get lov_user_mds_data - */ -/* lustre_ioctl.h 101-150 */ -#define LL_IOC_GETFLAGS _IOR('f', 151, long) -#define LL_IOC_SETFLAGS _IOW('f', 152, long) -#define LL_IOC_CLRFLAGS _IOW('f', 153, long) -#define LL_IOC_LOV_SETSTRIPE _IOW('f', 154, long) -#define LL_IOC_LOV_GETSTRIPE _IOW('f', 155, long) -#define LL_IOC_LOV_SETEA _IOW('f', 156, long) -/* LL_IOC_RECREATE_OBJ 157 obsolete */ -/* LL_IOC_RECREATE_FID 158 obsolete */ -#define LL_IOC_GROUP_LOCK _IOW('f', 158, long) -#define LL_IOC_GROUP_UNLOCK _IOW('f', 159, long) -/* #define LL_IOC_QUOTACHECK 160 OBD_IOC_QUOTACHECK */ -/* #define LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */ -/* #define LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */ -#define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *) -/* IOC_LOV_GETINFO 165 obsolete */ -#define LL_IOC_FLUSHCTX _IOW('f', 166, long) -/* LL_IOC_RMTACL 167 obsolete */ -#define LL_IOC_GETOBDCOUNT _IOR('f', 168, long) -#define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long) -#define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long) -#define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid) -#define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long) -#define LL_IOC_PATH2FID _IOR('f', 173, long) -#define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *) -#define LL_IOC_GET_MDTIDX _IOR('f', 175, int) - -/* lustre_ioctl.h 177-210 */ -#define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state) -#define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set) -#define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm) -#define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *) -#define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *) -#define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request) -#define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request) -#define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version) -#define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \ - struct lustre_swap_layouts) -#define LL_IOC_HSM_ACTION _IOR('f', 220, \ - struct hsm_current_action) -/* see for ioctl numbers 221-232 */ - -#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) -#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) -#define LL_IOC_SET_LEASE _IOWR('f', 243, long) -#define LL_IOC_GET_LEASE _IO('f', 244) -#define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import) -#define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md) -#define LL_IOC_MIGRATE _IOR('f', 247, int) -#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid) -#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent) - -/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */ -enum ll_lease_type { - LL_LEASE_RDLCK = 0x1, - LL_LEASE_WRLCK = 0x2, - LL_LEASE_UNLCK = 0x4, -}; - -#define LL_STATFS_LMV 1 -#define LL_STATFS_LOV 2 -#define LL_STATFS_NODELAY 4 - -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) -#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *) -#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *) - -#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */ - -/* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular - * files, but are unlikely to be used in practice and are not harmful if - * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character - * devices and are safe for use on new files (See LU-812, LU-4209). - */ -#define O_LOV_DELAY_CREATE (O_NOCTTY | FASYNC) - -#define LL_FILE_IGNORE_LOCK 0x00000001 -#define LL_FILE_GROUP_LOCKED 0x00000002 -#define LL_FILE_READAHEA 0x00000004 -#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */ -#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */ -#define LL_FILE_RMTACL 0x00000020 - -#define LOV_USER_MAGIC_V1 0x0BD10BD0 -#define LOV_USER_MAGIC LOV_USER_MAGIC_V1 -#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0 -#define LOV_USER_MAGIC_V3 0x0BD30BD0 -/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */ -#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */ - -#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/ - -#define LOV_PATTERN_RAID0 0x001 -#define LOV_PATTERN_RAID1 0x002 -#define LOV_PATTERN_FIRST 0x100 -#define LOV_PATTERN_CMOBD 0x200 - -#define LOV_PATTERN_F_MASK 0xffff0000 -#define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */ -#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */ - -#define LOV_MAXPOOLNAME 15 -#define LOV_POOLNAMEF "%.15s" - -#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */ -#define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS) -#define LOV_MAX_STRIPE_COUNT_OLD 160 -/* This calculation is crafted so that input of 4096 will result in 160 - * which in turn is equal to old maximal stripe count. - * XXX: In fact this is too simplified for now, what it also need is to get - * ea_type argument to clearly know how much space each stripe consumes. - * - * The limit of 12 pages is somewhat arbitrary, but is a reasonably large - * allocation that is sufficient for the current generation of systems. - * - * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) - */ -#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ -#define LOV_ALL_STRIPES 0xffff /* only valid for directories */ -#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ - -#define XATTR_LUSTRE_PREFIX "lustre." -#define XATTR_LUSTRE_LOV "lustre.lov" - -#define lov_user_ost_data lov_user_ost_data_v1 -struct lov_user_ost_data_v1 { /* per-stripe data structure */ - struct ost_id l_ost_oi; /* OST object ID */ - __u32 l_ost_gen; /* generation of this OST index */ - __u32 l_ost_idx; /* OST index in LOV */ -} __packed; - -#define lov_user_md lov_user_md_v1 -struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ - __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */ - __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ - struct ost_id lmm_oi; /* LOV object ID */ - __u32 lmm_stripe_size; /* size of stripe in bytes */ - __u16 lmm_stripe_count; /* num stripes in use for this object */ - union { - __u16 lmm_stripe_offset; /* starting stripe offset in - * lmm_objects, use when writing - */ - __u16 lmm_layout_gen; /* layout generation number - * used when reading - */ - }; - struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ -} __attribute__((packed, __may_alias__)); - -struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ - __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */ - __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ - struct ost_id lmm_oi; /* LOV object ID */ - __u32 lmm_stripe_size; /* size of stripe in bytes */ - __u16 lmm_stripe_count; /* num stripes in use for this object */ - union { - __u16 lmm_stripe_offset; /* starting stripe offset in - * lmm_objects, use when writing - */ - __u16 lmm_layout_gen; /* layout generation number - * used when reading - */ - }; - char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ - struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ -} __packed; - -static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) -{ - if (lmm_magic == LOV_USER_MAGIC_V1) - return sizeof(struct lov_user_md_v1) + - stripes * sizeof(struct lov_user_ost_data_v1); - return sizeof(struct lov_user_md_v3) + - stripes * sizeof(struct lov_user_ost_data_v1); -} - -/* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to - * use this. It is unsafe to #define those values in this header as it - * is possible the application has already #included . - */ -#ifdef HAVE_LOV_USER_MDS_DATA -#define lov_user_mds_data lov_user_mds_data_v1 -struct lov_user_mds_data_v1 { - lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ -} __packed; - -struct lov_user_mds_data_v3 { - lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */ -} __packed; -#endif - -struct lmv_user_mds_data { - struct lu_fid lum_fid; - __u32 lum_padding; - __u32 lum_mds; -}; - -enum lmv_hash_type { - LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */ - LMV_HASH_TYPE_ALL_CHARS = 1, - LMV_HASH_TYPE_FNV_1A_64 = 2, -}; - -#define LMV_HASH_NAME_ALL_CHARS "all_char" -#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64" - -/* - * Got this according to how get LOV_MAX_STRIPE_COUNT, see above, - * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) - */ -#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ -#define lmv_user_md lmv_user_md_v1 -struct lmv_user_md_v1 { - __u32 lum_magic; /* must be the first field */ - __u32 lum_stripe_count; /* dirstripe count */ - __u32 lum_stripe_offset; /* MDT idx for default dirstripe */ - __u32 lum_hash_type; /* Dir stripe policy */ - __u32 lum_type; /* LMV type: default or normal */ - __u32 lum_padding1; - __u32 lum_padding2; - __u32 lum_padding3; - char lum_pool_name[LOV_MAXPOOLNAME + 1]; - struct lmv_user_mds_data lum_objects[0]; -} __packed; - -static inline int lmv_user_md_size(int stripes, int lmm_magic) -{ - return sizeof(struct lmv_user_md) + - stripes * sizeof(struct lmv_user_mds_data); -} - -struct ll_recreate_obj { - __u64 lrc_id; - __u32 lrc_ost_idx; -}; - -struct ll_fid { - __u64 id; /* holds object id */ - __u32 generation; /* holds object generation */ - __u32 f_type; /* holds object type or stripe idx when passing it to - * OST for saving into EA. - */ -}; - -#define UUID_MAX 40 -struct obd_uuid { - char uuid[UUID_MAX]; -}; - -static inline bool obd_uuid_equals(const struct obd_uuid *u1, - const struct obd_uuid *u2) -{ - return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0; -} - -static inline int obd_uuid_empty(struct obd_uuid *uuid) -{ - return uuid->uuid[0] == '\0'; -} - -static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp) -{ - strncpy((char *)uuid->uuid, tmp, sizeof(*uuid)); - uuid->uuid[sizeof(*uuid) - 1] = '\0'; -} - -/* For printf's only, make sure uuid is terminated */ -static inline char *obd_uuid2str(const struct obd_uuid *uuid) -{ - if (!uuid) - return NULL; - - if (uuid->uuid[sizeof(*uuid) - 1] != '\0') { - /* Obviously not safe, but for printfs, no real harm done... - * we're always null-terminated, even in a race. - */ - static char temp[sizeof(*uuid)]; - - memcpy(temp, uuid->uuid, sizeof(*uuid) - 1); - temp[sizeof(*uuid) - 1] = '\0'; - return temp; - } - return (char *)(uuid->uuid); -} - -/* Extract fsname from uuid (or target name) of a target - * e.g. (myfs-OST0007_UUID -> myfs) - * see also deuuidify. - */ -static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) -{ - char *p; - - strncpy(buf, uuid, buflen - 1); - buf[buflen - 1] = '\0'; - p = strrchr(buf, '-'); - if (p) - *p = '\0'; -} - -/* printf display format - * * usage: printf("file FID is "DFID"\n", PFID(fid)); - */ -#define FID_NOBRACE_LEN 40 -#define FID_LEN (FID_NOBRACE_LEN + 2) -#define DFID_NOBRACE "%#llx:0x%x:0x%x" -#define DFID "[" DFID_NOBRACE "]" -#define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver - -/* scanf input parse format for fids in DFID_NOBRACE format - * Need to strip '[' from DFID format first or use "["SFID"]" at caller. - * usage: sscanf(fidstr, SFID, RFID(&fid)); - */ -#define SFID "0x%llx:0x%x:0x%x" -#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) - -/********* Quotas **********/ - -#define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */ -#define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */ -#define Q_GETOINFO 0x800102 /* get obd quota info */ -#define Q_GETOQUOTA 0x800103 /* get obd quotas */ -#define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */ - -/* these must be explicitly translated into linux Q_* in ll_dir_ioctl */ -#define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */ -#define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */ -#define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */ -#define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */ -#define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ -#define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ -/* lustre-specific control commands */ -#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ -#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ - -#define UGQUOTA 2 /* set both USRQUOTA and GRPQUOTA */ - -#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629 - -/* permission */ -#define N_PERMS_MAX 64 - -struct perm_downcall_data { - __u64 pdd_nid; - __u32 pdd_perm; - __u32 pdd_padding; -}; - -struct identity_downcall_data { - __u32 idd_magic; - __u32 idd_err; - __u32 idd_uid; - __u32 idd_gid; - __u32 idd_nperms; - __u32 idd_ngroups; - struct perm_downcall_data idd_perms[N_PERMS_MAX]; - __u32 idd_groups[0]; -}; - -/* lustre volatile file support - * file name header: .^L^S^T^R:volatile" - */ -#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE" -#define LUSTRE_VOLATILE_HDR_LEN 14 -/* hdr + MDT index */ -#define LUSTRE_VOLATILE_IDX LUSTRE_VOLATILE_HDR":%.4X:" - -enum lustre_quota_version { - LUSTRE_QUOTA_V2 = 1 -}; - -/* XXX: same as if_dqinfo struct in kernel */ -struct obd_dqinfo { - __u64 dqi_bgrace; - __u64 dqi_igrace; - __u32 dqi_flags; - __u32 dqi_valid; -}; - -/* XXX: same as if_dqblk struct in kernel, plus one padding */ -struct obd_dqblk { - __u64 dqb_bhardlimit; - __u64 dqb_bsoftlimit; - __u64 dqb_curspace; - __u64 dqb_ihardlimit; - __u64 dqb_isoftlimit; - __u64 dqb_curinodes; - __u64 dqb_btime; - __u64 dqb_itime; - __u32 dqb_valid; - __u32 dqb_padding; -}; - -enum { - QC_GENERAL = 0, - QC_MDTIDX = 1, - QC_OSTIDX = 2, - QC_UUID = 3 -}; - -struct if_quotactl { - __u32 qc_cmd; - __u32 qc_type; - __u32 qc_id; - __u32 qc_stat; - __u32 qc_valid; - __u32 qc_idx; - struct obd_dqinfo qc_dqinfo; - struct obd_dqblk qc_dqblk; - char obd_type[16]; - struct obd_uuid obd_uuid; -}; - -/* swap layout flags */ -#define SWAP_LAYOUTS_CHECK_DV1 (1 << 0) -#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1) -#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) -#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) -#define SWAP_LAYOUTS_CLOSE (1 << 4) - -/* Swap XATTR_NAME_HSM as well, only on the MDT so far */ -#define SWAP_LAYOUTS_MDS_HSM (1 << 31) -struct lustre_swap_layouts { - __u64 sl_flags; - __u32 sl_fd; - __u32 sl_gid; - __u64 sl_dv1; - __u64 sl_dv2; -}; - -/********* Changelogs **********/ -/** Changelog record types */ -enum changelog_rec_type { - CL_MARK = 0, - CL_CREATE = 1, /* namespace */ - CL_MKDIR = 2, /* namespace */ - CL_HARDLINK = 3, /* namespace */ - CL_SOFTLINK = 4, /* namespace */ - CL_MKNOD = 5, /* namespace */ - CL_UNLINK = 6, /* namespace */ - CL_RMDIR = 7, /* namespace */ - CL_RENAME = 8, /* namespace */ - CL_EXT = 9, /* namespace extended record (2nd half of rename) */ - CL_OPEN = 10, /* not currently used */ - CL_CLOSE = 11, /* may be written to log only with mtime change */ - CL_LAYOUT = 12, /* file layout/striping modified */ - CL_TRUNC = 13, - CL_SETATTR = 14, - CL_XATTR = 15, - CL_HSM = 16, /* HSM specific events, see flags */ - CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */ - CL_CTIME = 18, - CL_ATIME = 19, - CL_LAST -}; - -static inline const char *changelog_type2str(int type) -{ - static const char *changelog_str[] = { - "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK", - "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC", - "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", - }; - - if (type >= 0 && type < CL_LAST) - return changelog_str[type]; - return NULL; -} - -/* per-record flags */ -#define CLF_FLAGSHIFT 12 -#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1) -#define CLF_VERMASK (~CLF_FLAGMASK) -enum changelog_rec_flags { - CLF_VERSION = 0x1000, - CLF_RENAME = 0x2000, - CLF_JOBID = 0x4000, - CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID -}; - -/* Anything under the flagmask may be per-type (if desired) */ -/* Flags for unlink */ -#define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */ -#define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */ - /* HSM cleaning needed */ -/* Flags for rename */ -#define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink of - * target - */ -#define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target - * has an archive in backend - */ - -/* Flags for HSM */ -/* 12b used (from high weight to low weight): - * 2b for flags - * 3b for event - * 7b for error code - */ -#define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */ -#define CLF_HSM_ERR_H 6 -#define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */ -#define CLF_HSM_EVENT_H 9 -#define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */ -#define CLF_HSM_FLAG_H 11 -#define CLF_HSM_SPARE_L 12 /* 4 spare bits */ -#define CLF_HSM_SPARE_H 15 -#define CLF_HSM_LAST 15 - -/* Remove bits higher than _h, then extract the value - * between _h and _l by shifting lower weigth to bit 0. - */ -#define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \ - >> (CLF_HSM_LAST - _h + _l)) - -#define CLF_HSM_SUCCESS 0x00 -#define CLF_HSM_MAXERROR 0x7E -#define CLF_HSM_ERROVERFLOW 0x7F - -#define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */ - -/* 3 bits field => 8 values allowed */ -enum hsm_event { - HE_ARCHIVE = 0, - HE_RESTORE = 1, - HE_CANCEL = 2, - HE_RELEASE = 3, - HE_REMOVE = 4, - HE_STATE = 5, - HE_SPARE1 = 6, - HE_SPARE2 = 7, -}; - -static inline enum hsm_event hsm_get_cl_event(__u16 flags) -{ - return CLF_GET_BITS(flags, CLF_HSM_EVENT_H, CLF_HSM_EVENT_L); -} - -static inline void hsm_set_cl_event(int *flags, enum hsm_event he) -{ - *flags |= (he << CLF_HSM_EVENT_L); -} - -static inline __u16 hsm_get_cl_flags(int flags) -{ - return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); -} - -static inline void hsm_set_cl_flags(int *flags, int bits) -{ - *flags |= (bits << CLF_HSM_FLAG_L); -} - -static inline int hsm_get_cl_error(int flags) -{ - return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); -} - -static inline void hsm_set_cl_error(int *flags, int error) -{ - *flags |= (error << CLF_HSM_ERR_L); -} - -enum changelog_send_flag { - /* Not yet implemented */ - CHANGELOG_FLAG_FOLLOW = 0x01, - /* - * Blocking IO makes sense in case of slow user parsing of the records, - * but it also prevents us from cleaning up if the records are not - * consumed. - */ - CHANGELOG_FLAG_BLOCK = 0x02, - /* Pack jobid into the changelog records if available. */ - CHANGELOG_FLAG_JOBID = 0x04, -}; - -#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \ - changelog_rec_offset(CLF_SUPPORTED)) - -/* 31 usable bytes string + null terminator. */ -#define LUSTRE_JOBID_SIZE 32 - -/* - * This is the minimal changelog record. It can contain extensions - * such as rename fields or process jobid. Its exact content is described - * by the cr_flags. - * - * Extensions are packed in the same order as their corresponding flags. - */ -struct changelog_rec { - __u16 cr_namelen; - __u16 cr_flags; /**< \a changelog_rec_flags */ - __u32 cr_type; /**< \a changelog_rec_type */ - __u64 cr_index; /**< changelog record number */ - __u64 cr_prev; /**< last index for this target fid */ - __u64 cr_time; - union { - struct lu_fid cr_tfid; /**< target fid */ - __u32 cr_markerflags; /**< CL_MARK flags */ - }; - struct lu_fid cr_pfid; /**< parent fid */ -} __packed; - -/* Changelog extension for RENAME. */ -struct changelog_ext_rename { - struct lu_fid cr_sfid; /**< source fid, or zero */ - struct lu_fid cr_spfid; /**< source parent fid, or zero */ -}; - -/* Changelog extension to include JOBID. */ -struct changelog_ext_jobid { - char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */ -}; - -static inline size_t changelog_rec_offset(enum changelog_rec_flags crf) -{ - size_t size = sizeof(struct changelog_rec); - - if (crf & CLF_RENAME) - size += sizeof(struct changelog_ext_rename); - - if (crf & CLF_JOBID) - size += sizeof(struct changelog_ext_jobid); - - return size; -} - -static inline size_t changelog_rec_size(struct changelog_rec *rec) -{ - return changelog_rec_offset(rec->cr_flags); -} - -static inline size_t changelog_rec_varsize(struct changelog_rec *rec) -{ - return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen; -} - -static inline -struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec) -{ - enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION; - - return (struct changelog_ext_rename *)((char *)rec + - changelog_rec_offset(crf)); -} - -/* The jobid follows the rename extension, if present */ -static inline -struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec) -{ - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME); - - return (struct changelog_ext_jobid *)((char *)rec + - changelog_rec_offset(crf)); -} - -/* The name follows the rename and jobid extensions, if present */ -static inline char *changelog_rec_name(struct changelog_rec *rec) -{ - return (char *)rec + changelog_rec_offset(rec->cr_flags & - CLF_SUPPORTED); -} - -static inline size_t changelog_rec_snamelen(struct changelog_rec *rec) -{ - return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1; -} - -static inline char *changelog_rec_sname(struct changelog_rec *rec) -{ - char *cr_name = changelog_rec_name(rec); - - return cr_name + strlen(cr_name) + 1; -} - -/** - * Remap a record to the desired format as specified by the crf flags. - * The record must be big enough to contain the final remapped version. - * Superfluous extension fields are removed and missing ones are added - * and zeroed. The flags of the record are updated accordingly. - * - * The jobid and rename extensions can be added to a record, to match the - * format an application expects, typically. In this case, the newly added - * fields will be zeroed. - * The Jobid field can be removed, to guarantee compatibility with older - * clients that don't expect this field in the records they process. - * - * The following assumptions are being made: - * - CLF_RENAME will not be removed - * - CLF_JOBID will not be added without CLF_RENAME being added too - * - * @param[in,out] rec The record to remap. - * @param[in] crf_wanted Flags describing the desired extensions. - */ -static inline void changelog_remap_rec(struct changelog_rec *rec, - enum changelog_rec_flags crf_wanted) -{ - char *jid_mov, *rnm_mov; - - crf_wanted &= CLF_SUPPORTED; - - if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) - return; - - /* First move the variable-length name field */ - memmove((char *)rec + changelog_rec_offset(crf_wanted), - changelog_rec_name(rec), rec->cr_namelen); - - /* Locations of jobid and rename extensions in the remapped record */ - jid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & ~CLF_JOBID); - rnm_mov = (char *)rec + - changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME)); - - /* Move the extension fields to the desired positions */ - if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID)) - memmove(jid_mov, changelog_rec_jobid(rec), - sizeof(struct changelog_ext_jobid)); - - if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME)) - memmove(rnm_mov, changelog_rec_rename(rec), - sizeof(struct changelog_ext_rename)); - - /* Clear newly added fields */ - if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID)) - memset(jid_mov, 0, sizeof(struct changelog_ext_jobid)); - - if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME)) - memset(rnm_mov, 0, sizeof(struct changelog_ext_rename)); - - /* Update the record's flags accordingly */ - rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted; -} - -struct ioc_changelog { - __u64 icc_recno; - __u32 icc_mdtindex; - __u32 icc_id; - __u32 icc_flags; -}; - -enum changelog_message_type { - CL_RECORD = 10, /* message is a changelog_rec */ - CL_EOF = 11, /* at end of current changelog */ -}; - -/********* Misc **********/ - -struct ioc_data_version { - __u64 idv_version; - __u64 idv_flags; /* See LL_DV_xxx */ -}; - -#define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */ -#define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */ - -#ifndef offsetof -# define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb))) -#endif - -#define dot_lustre_name ".lustre" - -/********* HSM **********/ - -/** HSM per-file state - * See HSM_FLAGS below. - */ -enum hsm_states { - HS_NONE = 0x00000000, - HS_EXISTS = 0x00000001, - HS_DIRTY = 0x00000002, - HS_RELEASED = 0x00000004, - HS_ARCHIVED = 0x00000008, - HS_NORELEASE = 0x00000010, - HS_NOARCHIVE = 0x00000020, - HS_LOST = 0x00000040, -}; - -/* HSM user-setable flags. */ -#define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY) - -/* Other HSM flags. */ -#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED) - -/* - * All HSM-related possible flags that could be applied to a file. - * This should be kept in sync with hsm_states. - */ -#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK) - -/** - * HSM request progress state - */ -enum hsm_progress_states { - HPS_WAITING = 1, - HPS_RUNNING = 2, - HPS_DONE = 3, -}; - -#define HPS_NONE 0 - -static inline char *hsm_progress_state2name(enum hsm_progress_states s) -{ - switch (s) { - case HPS_WAITING: return "waiting"; - case HPS_RUNNING: return "running"; - case HPS_DONE: return "done"; - default: return "unknown"; - } -} - -struct hsm_extent { - __u64 offset; - __u64 length; -} __packed; - -/** - * Current HSM states of a Lustre file. - * - * This structure purpose is to be sent to user-space mainly. It describes the - * current HSM flags and in-progress action. - */ -struct hsm_user_state { - /** Current HSM states, from enum hsm_states. */ - __u32 hus_states; - __u32 hus_archive_id; - /** The current undergoing action, if there is one */ - __u32 hus_in_progress_state; - __u32 hus_in_progress_action; - struct hsm_extent hus_in_progress_location; - char hus_extended_info[]; -}; - -struct hsm_state_set_ioc { - struct lu_fid hssi_fid; - __u64 hssi_setmask; - __u64 hssi_clearmask; -}; - -/* - * This structure describes the current in-progress action for a file. - * it is returned to user space and send over the wire - */ -struct hsm_current_action { - /** The current undergoing action, if there is one */ - /* state is one of hsm_progress_states */ - __u32 hca_state; - /* action is one of hsm_user_action */ - __u32 hca_action; - struct hsm_extent hca_location; -}; - -/***** HSM user requests ******/ -/* User-generated (lfs/ioctl) request types */ -enum hsm_user_action { - HUA_NONE = 1, /* no action (noop) */ - HUA_ARCHIVE = 10, /* copy to hsm */ - HUA_RESTORE = 11, /* prestage */ - HUA_RELEASE = 12, /* drop ost objects */ - HUA_REMOVE = 13, /* remove from archive */ - HUA_CANCEL = 14 /* cancel a request */ -}; - -static inline char *hsm_user_action2name(enum hsm_user_action a) -{ - switch (a) { - case HUA_NONE: return "NOOP"; - case HUA_ARCHIVE: return "ARCHIVE"; - case HUA_RESTORE: return "RESTORE"; - case HUA_RELEASE: return "RELEASE"; - case HUA_REMOVE: return "REMOVE"; - case HUA_CANCEL: return "CANCEL"; - default: return "UNKNOWN"; - } -} - -/* - * List of hr_flags (bit field) - */ -#define HSM_FORCE_ACTION 0x0001 -/* used by CT, connot be set by user */ -#define HSM_GHOST_COPY 0x0002 - -/** - * Contains all the fixed part of struct hsm_user_request. - * - */ -struct hsm_request { - __u32 hr_action; /* enum hsm_user_action */ - __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */ - __u64 hr_flags; /* request flags */ - __u32 hr_itemcount; /* item count in hur_user_item vector */ - __u32 hr_data_len; -}; - -struct hsm_user_item { - struct lu_fid hui_fid; - struct hsm_extent hui_extent; -} __packed; - -struct hsm_user_request { - struct hsm_request hur_request; - struct hsm_user_item hur_user_item[0]; - /* extra data blob at end of struct (after all - * hur_user_items), only use helpers to access it - */ -} __packed; - -/** Return pointer to data field in a hsm user request */ -static inline void *hur_data(struct hsm_user_request *hur) -{ - return &hur->hur_user_item[hur->hur_request.hr_itemcount]; -} - -/** - * Compute the current length of the provided hsm_user_request. This returns -1 - * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ] - * - * return -1 on bounds check error. - */ -static inline ssize_t hur_len(struct hsm_user_request *hur) -{ - __u64 size; - - /* can't overflow a __u64 since hr_itemcount is only __u32 */ - size = offsetof(struct hsm_user_request, hur_user_item[0]) + - (__u64)hur->hur_request.hr_itemcount * - sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len; - - if (size != (ssize_t)size) - return -1; - - return size; -} - -/****** HSM RPCs to copytool *****/ -/* Message types the copytool may receive */ -enum hsm_message_type { - HMT_ACTION_LIST = 100, /* message is a hsm_action_list */ -}; - -/* Actions the copytool may be instructed to take for a given action_item */ -enum hsm_copytool_action { - HSMA_NONE = 10, /* no action */ - HSMA_ARCHIVE = 20, /* arbitrary offset */ - HSMA_RESTORE = 21, - HSMA_REMOVE = 22, - HSMA_CANCEL = 23 -}; - -static inline char *hsm_copytool_action2name(enum hsm_copytool_action a) -{ - switch (a) { - case HSMA_NONE: return "NOOP"; - case HSMA_ARCHIVE: return "ARCHIVE"; - case HSMA_RESTORE: return "RESTORE"; - case HSMA_REMOVE: return "REMOVE"; - case HSMA_CANCEL: return "CANCEL"; - default: return "UNKNOWN"; - } -} - -/* Copytool item action description */ -struct hsm_action_item { - __u32 hai_len; /* valid size of this struct */ - __u32 hai_action; /* hsm_copytool_action, but use known size */ - struct lu_fid hai_fid; /* Lustre FID to operated on */ - struct lu_fid hai_dfid; /* fid used for data access */ - struct hsm_extent hai_extent; /* byte range to operate on */ - __u64 hai_cookie; /* action cookie from coordinator */ - __u64 hai_gid; /* grouplock id */ - char hai_data[0]; /* variable length */ -} __packed; - -/* - * helper function which print in hexa the first bytes of - * hai opaque field - * \param hai [IN] record to print - * \param buffer [OUT] output buffer - * \param len [IN] max buffer len - * \retval buffer - */ -static inline char *hai_dump_data_field(struct hsm_action_item *hai, - char *buffer, size_t len) -{ - int i, data_len; - char *ptr; - - ptr = buffer; - data_len = hai->hai_len - sizeof(*hai); - for (i = 0; (i < data_len) && (len > 2); i++) { - snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]); - ptr += 2; - len -= 2; - } - - *ptr = '\0'; - - return buffer; -} - -/* Copytool action list */ -#define HAL_VERSION 1 -#define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */ -struct hsm_action_list { - __u32 hal_version; - __u32 hal_count; /* number of hai's to follow */ - __u64 hal_compound_id; /* returned by coordinator */ - __u64 hal_flags; - __u32 hal_archive_id; /* which archive backend */ - __u32 padding1; - char hal_fsname[0]; /* null-terminated */ - /* struct hsm_action_item[hal_count] follows, aligned on 8-byte - * boundaries. See hai_first - */ -} __packed; - -#ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round(int val) -{ - return (val + 7) & (~0x7); -} - -#define HAVE_CFS_SIZE_ROUND -#endif - -/* Return pointer to first hai in action list */ -static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) -{ - return (struct hsm_action_item *)(hal->hal_fsname + - cfs_size_round(strlen(hal-> \ - hal_fsname) - + 1)); -} - -/* Return pointer to next hai */ -static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai) -{ - return (struct hsm_action_item *)((char *)hai + - cfs_size_round(hai->hai_len)); -} - -/* Return size of an hsm_action_list */ -static inline int hal_size(struct hsm_action_list *hal) -{ - int i, sz; - struct hsm_action_item *hai; - - sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); - hai = hai_first(hal); - for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) - sz += cfs_size_round(hai->hai_len); - - return sz; -} - -/* HSM file import - * describe the attributes to be set on imported file - */ -struct hsm_user_import { - __u64 hui_size; - __u64 hui_atime; - __u64 hui_mtime; - __u32 hui_atime_ns; - __u32 hui_mtime_ns; - __u32 hui_uid; - __u32 hui_gid; - __u32 hui_mode; - __u32 hui_archive_id; -}; - -/* Copytool progress reporting */ -#define HP_FLAG_COMPLETED 0x01 -#define HP_FLAG_RETRY 0x02 - -struct hsm_progress { - struct lu_fid hp_fid; - __u64 hp_cookie; - struct hsm_extent hp_extent; - __u16 hp_flags; - __u16 hp_errval; /* positive val */ - __u32 padding; -}; - -struct hsm_copy { - __u64 hc_data_version; - __u16 hc_flags; - __u16 hc_errval; /* positive val */ - __u32 padding; - struct hsm_action_item hc_hai; -}; - -/** @} lustreuser */ - -#endif /* _LUSTRE_USER_H */ diff --git a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h b/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h deleted file mode 100644 index 19c9135e2273..000000000000 --- a/drivers/staging/lustre/include/uapi/linux/lustre/lustre_ver.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _LUSTRE_VER_H_ -#define _LUSTRE_VER_H_ - -#define LUSTRE_MAJOR 2 -#define LUSTRE_MINOR 6 -#define LUSTRE_PATCH 99 -#define LUSTRE_FIX 0 -#define LUSTRE_VERSION_STRING "2.6.99" - -#define OBD_OCD_VERSION(major, minor, patch, fix) \ - (((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix)) - -#define OBD_OCD_VERSION_MAJOR(version) ((int)((version) >> 24) & 255) -#define OBD_OCD_VERSION_MINOR(version) ((int)((version) >> 16) & 255) -#define OBD_OCD_VERSION_PATCH(version) ((int)((version) >> 8) & 255) -#define OBD_OCD_VERSION_FIX(version) ((int)((version) >> 0) & 255) - -#define LUSTRE_VERSION_CODE \ - OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX) - -/* - * If lustre version of client and servers it connects to differs by more - * than this amount, client would issue a warning. - */ -#define LUSTRE_VERSION_OFFSET_WARN OBD_OCD_VERSION(0, 4, 0, 0) - -#endif diff --git a/drivers/staging/lustre/lnet/Kconfig b/drivers/staging/lustre/lnet/Kconfig deleted file mode 100644 index ad049e6f24e4..000000000000 --- a/drivers/staging/lustre/lnet/Kconfig +++ /dev/null @@ -1,46 +0,0 @@ -config LNET - tristate "Lustre networking subsystem (LNet)" - depends on INET - help - The Lustre network layer, also known as LNet, is a networking abstaction - level API that was initially created to allow Lustre Filesystem to utilize - very different networks like tcp and ib verbs in a uniform way. In the - case of Lustre routers only the LNet layer is required. Lately other - projects are also looking into using LNet as their networking API as well. - -config LNET_MAX_PAYLOAD - int "Lustre lnet max transfer payload (default 1MB)" - depends on LNET - default "1048576" - help - This option defines the maximum size of payload in bytes that lnet - can put into its transport. - - If unsure, use default. - -config LNET_SELFTEST - tristate "Lustre networking self testing" - depends on LNET - help - Choose Y here if you want to do lnet self testing. To compile this - as a module, choose M here: the module will be called lnet_selftest. - - To compile this as a kernel modules, choose M here and it will be - called lnet_selftest. - - If unsure, say N. - - See also http://wiki.lustre.org/ - -config LNET_XPRT_IB - tristate "LNET infiniband support" - depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS - default LNET && INFINIBAND - help - This option allows the LNET users to use infiniband as an - RDMA-enabled transport. - - To compile this as a kernel module, choose M here and it will be - called ko2iblnd. - - If unsure, say N. diff --git a/drivers/staging/lustre/lnet/Makefile b/drivers/staging/lustre/lnet/Makefile deleted file mode 100644 index 0a380fe88ce8..000000000000 --- a/drivers/staging/lustre/lnet/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_LNET) += libcfs/ lnet/ klnds/ selftest/ diff --git a/drivers/staging/lustre/lnet/klnds/Makefile b/drivers/staging/lustre/lnet/klnds/Makefile deleted file mode 100644 index c23e4f67f837..000000000000 --- a/drivers/staging/lustre/lnet/klnds/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_LNET) += o2iblnd/ socklnd/ diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile b/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile deleted file mode 100644 index 4affe1d79948..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET_XPRT_IB) += ko2iblnd.o -ko2iblnd-y := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c deleted file mode 100644 index f0b4eb42bc1d..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ /dev/null @@ -1,2958 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd.c - * - * Author: Eric Barton - */ - -#include -#include -#include "o2iblnd.h" - -static struct lnet_lnd the_o2iblnd; - -struct kib_data kiblnd_data; - -static __u32 kiblnd_cksum(void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return !sum ? 1 : sum; -} - -static char *kiblnd_msgtype2str(int type) -{ - switch (type) { - case IBLND_MSG_CONNREQ: - return "CONNREQ"; - - case IBLND_MSG_CONNACK: - return "CONNACK"; - - case IBLND_MSG_NOOP: - return "NOOP"; - - case IBLND_MSG_IMMEDIATE: - return "IMMEDIATE"; - - case IBLND_MSG_PUT_REQ: - return "PUT_REQ"; - - case IBLND_MSG_PUT_NAK: - return "PUT_NAK"; - - case IBLND_MSG_PUT_ACK: - return "PUT_ACK"; - - case IBLND_MSG_PUT_DONE: - return "PUT_DONE"; - - case IBLND_MSG_GET_REQ: - return "GET_REQ"; - - case IBLND_MSG_GET_DONE: - return "GET_DONE"; - - default: - return "???"; - } -} - -static int kiblnd_msgtype2size(int type) -{ - const int hdr_size = offsetof(struct kib_msg, ibm_u); - - switch (type) { - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - return hdr_size + sizeof(struct kib_connparams); - - case IBLND_MSG_NOOP: - return hdr_size; - - case IBLND_MSG_IMMEDIATE: - return offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[0]); - - case IBLND_MSG_PUT_REQ: - return hdr_size + sizeof(struct kib_putreq_msg); - - case IBLND_MSG_PUT_ACK: - return hdr_size + sizeof(struct kib_putack_msg); - - case IBLND_MSG_GET_REQ: - return hdr_size + sizeof(struct kib_get_msg); - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - return hdr_size + sizeof(struct kib_completion_msg); - default: - return -1; - } -} - -static int kiblnd_unpack_rd(struct kib_msg *msg, int flip) -{ - struct kib_rdma_desc *rd; - int msg_size; - int nob; - int n; - int i; - - LASSERT(msg->ibm_type == IBLND_MSG_GET_REQ || - msg->ibm_type == IBLND_MSG_PUT_ACK); - - rd = msg->ibm_type == IBLND_MSG_GET_REQ ? - &msg->ibm_u.get.ibgm_rd : - &msg->ibm_u.putack.ibpam_rd; - - if (flip) { - __swab32s(&rd->rd_key); - __swab32s(&rd->rd_nfrags); - } - - n = rd->rd_nfrags; - - nob = offsetof(struct kib_msg, ibm_u) + - kiblnd_rd_msg_size(rd, msg->ibm_type, n); - - if (msg->ibm_nob < nob) { - CERROR("Short %s: %d(%d)\n", - kiblnd_msgtype2str(msg->ibm_type), msg->ibm_nob, nob); - return 1; - } - - msg_size = kiblnd_rd_size(rd); - if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) { - CERROR("Bad msg_size: %d, should be 0 < n <= %d\n", - msg_size, LNET_MAX_PAYLOAD); - return 1; - } - - if (!flip) - return 0; - - for (i = 0; i < n; i++) { - __swab32s(&rd->rd_frags[i].rf_nob); - __swab64s(&rd->rd_frags[i].rf_addr); - } - - return 0; -} - -void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version, - int credits, lnet_nid_t dstnid, __u64 dststamp) -{ - struct kib_net *net = ni->ni_data; - - /* - * CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. - */ - msg->ibm_magic = IBLND_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = ni->ni_nid; - msg->ibm_srcstamp = net->ibn_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kiblnd_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); - } -} - -int kiblnd_unpack_msg(struct kib_msg *msg, int nob) -{ - const int hdr_size = offsetof(struct kib_msg, ibm_u); - __u32 msg_cksum; - __u16 version; - int msg_nob; - int flip; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (version != IBLND_MSG_VERSION && - version != IBLND_MSG_VERSION_1) { - CERROR("Bad version: %x\n", version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* - * checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped - */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum && - msg_cksum != kiblnd_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = version; - BUILD_BUG_ON(sizeof(msg->ibm_type) != 1); - BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - if (msg_nob < kiblnd_msgtype2size(msg->ibm_type)) { - CERROR("Short %s: %d(%d)\n", kiblnd_msgtype2str(msg->ibm_type), - msg_nob, kiblnd_msgtype2size(msg->ibm_type)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBLND_MSG_NOOP: - case IBLND_MSG_IMMEDIATE: - case IBLND_MSG_PUT_REQ: - break; - - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_GET_REQ: - if (kiblnd_unpack_rd(msg, flip)) - return -EPROTO; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - if (flip) { - __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - } - break; - } - return 0; -} - -int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp, - lnet_nid_t nid) -{ - struct kib_peer *peer; - struct kib_net *net = ni->ni_data; - int cpt = lnet_cpt_of_nid(nid); - unsigned long flags; - - LASSERT(net); - LASSERT(nid != LNET_NID_ANY); - - peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt); - if (!peer) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - peer->ibp_ni = ni; - peer->ibp_nid = nid; - peer->ibp_error = 0; - peer->ibp_last_alive = 0; - peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); - peer->ibp_queue_depth = ni->ni_peertxcredits; - atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD(&peer->ibp_conns); - INIT_LIST_HEAD(&peer->ibp_tx_queue); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(!net->ibn_shutdown); - - /* npeers only grows with the global lock held */ - atomic_inc(&net->ibn_npeers); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - *peerp = peer; - return 0; -} - -void kiblnd_destroy_peer(struct kib_peer *peer) -{ - struct kib_net *net = peer->ibp_ni->ni_data; - - LASSERT(net); - LASSERT(!atomic_read(&peer->ibp_refcount)); - LASSERT(!kiblnd_peer_active(peer)); - LASSERT(kiblnd_peer_idle(peer)); - LASSERT(list_empty(&peer->ibp_tx_queue)); - - kfree(peer); - - /* - * NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. - */ - atomic_dec(&net->ibn_npeers); -} - -struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid) -{ - /* - * the caller is responsible for accounting the additional reference - * that this creates - */ - struct list_head *peer_list = kiblnd_nid2peerlist(nid); - struct list_head *tmp; - struct kib_peer *peer; - - list_for_each(tmp, peer_list) { - peer = list_entry(tmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->ibp_refcount), - peer->ibp_version); - return peer; - } - return NULL; -} - -void kiblnd_unlink_peer_locked(struct kib_peer *peer) -{ - LASSERT(list_empty(&peer->ibp_conns)); - - LASSERT(kiblnd_peer_active(peer)); - list_del_init(&peer->ibp_list); - /* lose peerlist's ref */ - kiblnd_peer_decref(peer); -} - -static int kiblnd_get_peer_info(struct lnet_ni *ni, int index, - lnet_nid_t *nidp, int *count) -{ - struct kib_peer *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *count = atomic_read(&peer->ibp_refcount); - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return -ENOENT; -} - -static void kiblnd_del_peer_locked(struct kib_peer *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - struct kib_conn *conn; - - if (list_empty(&peer->ibp_conns)) { - kiblnd_unlink_peer_locked(peer); - } else { - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - kiblnd_close_conn_locked(conn, 0); - } - /* NB closing peer's last conn unlinked it. */ - } - /* - * NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. - */ -} - -static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid) -{ - LIST_HEAD(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - struct kib_peer *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT(list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, - &zombies); - } - - kiblnd_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &zombies, -EIO); - - return rc; -} - -static struct kib_conn *kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) -{ - struct kib_peer *peer; - struct list_head *ptmp; - struct kib_conn *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - list_for_each(ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct kib_conn, - ibc_list); - kiblnd_conn_addref(conn); - read_unlock_irqrestore( - &kiblnd_data.kib_global_lock, - flags); - return conn; - } - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return NULL; -} - -int kiblnd_translate_mtu(int value) -{ - switch (value) { - default: - return -1; - case 0: - return 0; - case 256: - return IB_MTU_256; - case 512: - return IB_MTU_512; - case 1024: - return IB_MTU_1024; - case 2048: - return IB_MTU_2048; - case 4096: - return IB_MTU_4096; - } -} - -static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid) -{ - int mtu; - - /* XXX There is no path record for iWARP, set by netdev->change_mtu? */ - if (!cmid->route.path_rec) - return; - - mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu); - LASSERT(mtu >= 0); - if (mtu) - cmid->route.path_rec->mtu = mtu; -} - -static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt) -{ - cpumask_var_t *mask; - int vectors; - int off; - int i; - lnet_nid_t nid = conn->ibc_peer->ibp_nid; - - vectors = conn->ibc_cmid->device->num_comp_vectors; - if (vectors <= 1) - return 0; - - mask = cfs_cpt_cpumask(lnet_cpt_table(), cpt); - if (!mask) - return 0; - - /* hash NID to CPU id in this partition... */ - off = do_div(nid, cpumask_weight(*mask)); - for_each_cpu(i, *mask) { - if (!off--) - return i % vectors; - } - - LBUG(); - return 1; -} - -struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid, - int state, int version) -{ - /* - * CAVEAT EMPTOR: - * If the new conn is created successfully it takes over the caller's - * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself - * is destroyed. On failure, the caller's ref on 'peer' remains and - * she must dispose of 'cmid'. (Actually I'd block forever if I tried - * to destroy 'cmid' here since I'm called from the CM which still has - * its ref on 'cmid'). - */ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_net *net = peer->ibp_ni->ni_data; - struct kib_dev *dev; - struct ib_qp_init_attr *init_qp_attr; - struct kib_sched_info *sched; - struct ib_cq_init_attr cq_attr = {}; - struct kib_conn *conn; - struct ib_cq *cq; - unsigned long flags; - int cpt; - int rc; - int i; - - LASSERT(net); - LASSERT(!in_interrupt()); - - dev = net->ibn_dev; - - cpt = lnet_cpt_of_nid(peer->ibp_nid); - sched = kiblnd_data.kib_scheds[cpt]; - - LASSERT(sched->ibs_nthreads > 0); - - init_qp_attr = kzalloc_cpt(sizeof(*init_qp_attr), GFP_NOFS, cpt); - if (!init_qp_attr) { - CERROR("Can't allocate qp_attr for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_0; - } - - conn = kzalloc_cpt(sizeof(*conn), GFP_NOFS, cpt); - if (!conn) { - CERROR("Can't allocate connection for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_1; - } - - conn->ibc_state = IBLND_CONN_INIT; - conn->ibc_version = version; - conn->ibc_peer = peer; /* I take the caller's ref */ - cmid->context = conn; /* for future CM callbacks */ - conn->ibc_cmid = cmid; - conn->ibc_max_frags = peer->ibp_max_frags; - conn->ibc_queue_depth = peer->ibp_queue_depth; - - INIT_LIST_HEAD(&conn->ibc_early_rxs); - INIT_LIST_HEAD(&conn->ibc_tx_noops); - INIT_LIST_HEAD(&conn->ibc_tx_queue); - INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD(&conn->ibc_active_txs); - spin_lock_init(&conn->ibc_lock); - - conn->ibc_connvars = kzalloc_cpt(sizeof(*conn->ibc_connvars), GFP_NOFS, cpt); - if (!conn->ibc_connvars) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed_2; - } - - write_lock_irqsave(glock, flags); - if (dev->ibd_failover) { - write_unlock_irqrestore(glock, flags); - CERROR("%s: failover in progress\n", dev->ibd_ifname); - goto failed_2; - } - - if (dev->ibd_hdev->ibh_ibdev != cmid->device) { - /* wakeup failover thread and teardown connection */ - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - wake_up(&kiblnd_data.kib_failover_waitq); - } - - write_unlock_irqrestore(glock, flags); - CERROR("cmid HCA(%s), kib_dev(%s) need failover\n", - cmid->device->name, dev->ibd_ifname); - goto failed_2; - } - - kiblnd_hdev_addref_locked(dev->ibd_hdev); - conn->ibc_hdev = dev->ibd_hdev; - - kiblnd_setup_mtu_locked(cmid); - - write_unlock_irqrestore(glock, flags); - - conn->ibc_rxs = kzalloc_cpt(IBLND_RX_MSGS(conn) * sizeof(struct kib_rx), - GFP_NOFS, cpt); - if (!conn->ibc_rxs) { - CERROR("Cannot allocate RX buffers\n"); - goto failed_2; - } - - rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, - IBLND_RX_MSG_PAGES(conn)); - if (rc) - goto failed_2; - - kiblnd_map_rx_descs(conn); - - cq_attr.cqe = IBLND_CQ_ENTRIES(conn); - cq_attr.comp_vector = kiblnd_get_completion_vector(conn, cpt); - cq = ib_create_cq(cmid->device, - kiblnd_cq_completion, kiblnd_cq_event, conn, - &cq_attr); - if (IS_ERR(cq)) { - CERROR("Failed to create CQ with %d CQEs: %ld\n", - IBLND_CQ_ENTRIES(conn), PTR_ERR(cq)); - goto failed_2; - } - - conn->ibc_cq = cq; - - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc) { - CERROR("Can't request completion notification: %d\n", rc); - goto failed_2; - } - - init_qp_attr->event_handler = kiblnd_qp_event; - init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn); - init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn); - init_qp_attr->cap.max_send_sge = 1; - init_qp_attr->cap.max_recv_sge = 1; - init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - init_qp_attr->qp_type = IB_QPT_RC; - init_qp_attr->send_cq = cq; - init_qp_attr->recv_cq = cq; - - conn->ibc_sched = sched; - - rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr); - if (rc) { - CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n", - rc, init_qp_attr->cap.max_send_wr, - init_qp_attr->cap.max_recv_wr); - goto failed_2; - } - - kfree(init_qp_attr); - - /* 1 ref for caller and each rxmsg */ - atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn)); - conn->ibc_nrx = IBLND_RX_MSGS(conn); - - /* post receives */ - for (i = 0; i < IBLND_RX_MSGS(conn); i++) { - rc = kiblnd_post_rx(&conn->ibc_rxs[i], - IBLND_POSTRX_NO_CREDIT); - if (rc) { - CERROR("Can't post rxmsg: %d\n", rc); - - /* Make posted receives complete */ - kiblnd_abort_receives(conn); - - /* - * correct # of posted buffers - * NB locking needed now I'm racing with completion - */ - spin_lock_irqsave(&sched->ibs_lock, flags); - conn->ibc_nrx -= IBLND_RX_MSGS(conn) - i; - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - /* - * cmid will be destroyed by CM(ofed) after cm_callback - * returned, so we can't refer it anymore - * (by kiblnd_connd()->kiblnd_destroy_conn) - */ - rdma_destroy_qp(conn->ibc_cmid); - conn->ibc_cmid = NULL; - - /* Drop my own and unused rxbuffer refcounts */ - while (i++ <= IBLND_RX_MSGS(conn)) - kiblnd_conn_decref(conn); - - return NULL; - } - } - - /* Init successful! */ - LASSERT(state == IBLND_CONN_ACTIVE_CONNECT || - state == IBLND_CONN_PASSIVE_WAIT); - conn->ibc_state = state; - - /* 1 more conn */ - atomic_inc(&net->ibn_nconns); - return conn; - - failed_2: - kiblnd_destroy_conn(conn); - kfree(conn); - failed_1: - kfree(init_qp_attr); - failed_0: - return NULL; -} - -void kiblnd_destroy_conn(struct kib_conn *conn) -{ - struct rdma_cm_id *cmid = conn->ibc_cmid; - struct kib_peer *peer = conn->ibc_peer; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(!atomic_read(&conn->ibc_refcount)); - LASSERT(list_empty(&conn->ibc_early_rxs)); - LASSERT(list_empty(&conn->ibc_tx_noops)); - LASSERT(list_empty(&conn->ibc_tx_queue)); - LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT(list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT(list_empty(&conn->ibc_active_txs)); - LASSERT(!conn->ibc_noops_posted); - LASSERT(!conn->ibc_nsends_posted); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBLND_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT(!conn->ibc_connvars); - break; - - case IBLND_CONN_INIT: - break; - } - - /* conn->ibc_cmid might be destroyed by CM already */ - if (cmid && cmid->qp) - rdma_destroy_qp(cmid); - - if (conn->ibc_cq) { - rc = ib_destroy_cq(conn->ibc_cq); - if (rc) - CWARN("Error destroying CQ: %d\n", rc); - } - - if (conn->ibc_rx_pages) - kiblnd_unmap_rx_descs(conn); - - kfree(conn->ibc_rxs); - kfree(conn->ibc_connvars); - - if (conn->ibc_hdev) - kiblnd_hdev_decref(conn->ibc_hdev); - - /* See CAVEAT EMPTOR above in kiblnd_create_conn */ - if (conn->ibc_state != IBLND_CONN_INIT) { - struct kib_net *net = peer->ibp_ni->ni_data; - - kiblnd_peer_decref(peer); - rdma_destroy_id(cmid); - atomic_dec(&net->ibn_nconns); - } -} - -int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why) -{ - struct kib_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - CDEBUG(D_NET, "Closing conn -> %s, version: %x, reason: %d\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_version, why); - - kiblnd_close_conn_locked(conn, why); - count++; - } - - return count; -} - -int kiblnd_close_stale_conns_locked(struct kib_peer *peer, - int version, __u64 incarnation) -{ - struct kib_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - if (conn->ibc_version == version && - conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, - "Closing stale conn -> %s version: %x, incarnation:%#llx(%x, %#llx)\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_version, conn->ibc_incarnation, - version, incarnation); - - kiblnd_close_conn_locked(conn, -ESTALE); - count++; - } - - return count; -} - -static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid) -{ - struct kib_peer *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kiblnd_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return !count ? -ENOENT : 0; -} - -static int kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - switch (cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - rc = kiblnd_get_peer_info(ni, data->ioc_count, - &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - - case IOC_LIBCFS_DEL_PEER: { - rc = kiblnd_del_peer(ni, data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - struct kib_conn *conn; - - rc = 0; - conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); - if (!conn) { - rc = -ENOENT; - break; - } - - LASSERT(conn->ibc_cmid); - data->ioc_nid = conn->ibc_peer->ibp_nid; - if (!conn->ibc_cmid->route.path_rec) - data->ioc_u32[0] = 0; /* iWarp has no path MTU */ - else - data->ioc_u32[0] = - ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu); - kiblnd_conn_decref(conn); - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kiblnd_close_matching_conns(ni, data->ioc_nid); - break; - } - - default: - break; - } - - return rc; -} - -static void kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, - unsigned long *when) -{ - unsigned long last_alive = 0; - unsigned long now = jiffies; - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_peer *peer; - unsigned long flags; - - read_lock_irqsave(glock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer) - last_alive = peer->ibp_last_alive; - - read_unlock_irqrestore(glock, flags); - - if (last_alive) - *when = last_alive; - - /* - * peer is not persistent in hash, trigger peer creation - * and connection establishment with a NULL tx - */ - if (!peer) - kiblnd_launch_tx(ni, NULL, nid); - - CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n", - libcfs_nid2str(nid), peer, - last_alive ? (now - last_alive) / HZ : -1); -} - -static void kiblnd_free_pages(struct kib_pages *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) { - if (p->ibp_pages[i]) - __free_page(p->ibp_pages[i]); - } - - kfree(p); -} - -int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages) -{ - struct kib_pages *p; - int i; - - p = kzalloc_cpt(offsetof(struct kib_pages, ibp_pages[npages]), - GFP_NOFS, cpt); - if (!p) { - CERROR("Can't allocate descriptor for %d pages\n", npages); - return -ENOMEM; - } - - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_pages_node( - cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_NOFS, 0); - if (!p->ibp_pages[i]) { - CERROR("Can't allocate page %d of %d\n", i, npages); - kiblnd_free_pages(p); - return -ENOMEM; - } - } - - *pp = p; - return 0; -} - -void kiblnd_unmap_rx_descs(struct kib_conn *conn) -{ - struct kib_rx *rx; - int i; - - LASSERT(conn->ibc_rxs); - LASSERT(conn->ibc_hdev); - - for (i = 0; i < IBLND_RX_MSGS(conn); i++) { - rx = &conn->ibc_rxs[i]; - - LASSERT(rx->rx_nob >= 0); /* not posted */ - - kiblnd_dma_unmap_single(conn->ibc_hdev->ibh_ibdev, - KIBLND_UNMAP_ADDR(rx, rx_msgunmap, - rx->rx_msgaddr), - IBLND_MSG_SIZE, DMA_FROM_DEVICE); - } - - kiblnd_free_pages(conn->ibc_rx_pages); - - conn->ibc_rx_pages = NULL; -} - -void kiblnd_map_rx_descs(struct kib_conn *conn) -{ - struct kib_rx *rx; - struct page *pg; - int pg_off; - int ipg; - int i; - - for (pg_off = ipg = i = 0; i < IBLND_RX_MSGS(conn); i++) { - pg = conn->ibc_rx_pages->ibp_pages[ipg]; - rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (struct kib_msg *)(((char *)page_address(pg)) + pg_off); - - rx->rx_msgaddr = kiblnd_dma_map_single(conn->ibc_hdev->ibh_ibdev, - rx->rx_msg, - IBLND_MSG_SIZE, - DMA_FROM_DEVICE); - LASSERT(!kiblnd_dma_mapping_error(conn->ibc_hdev->ibh_ibdev, - rx->rx_msgaddr)); - KIBLND_UNMAP_ADDR_SET(rx, rx_msgunmap, rx->rx_msgaddr); - - CDEBUG(D_NET, "rx %d: %p %#llx(%#llx)\n", - i, rx->rx_msg, rx->rx_msgaddr, - (__u64)(page_to_phys(pg) + pg_off)); - - pg_off += IBLND_MSG_SIZE; - LASSERT(pg_off <= PAGE_SIZE); - - if (pg_off == PAGE_SIZE) { - pg_off = 0; - ipg++; - LASSERT(ipg <= IBLND_RX_MSG_PAGES(conn)); - } - } -} - -static void kiblnd_unmap_tx_pool(struct kib_tx_pool *tpo) -{ - struct kib_hca_dev *hdev = tpo->tpo_hdev; - struct kib_tx *tx; - int i; - - LASSERT(!tpo->tpo_pool.po_allocated); - - if (!hdev) - return; - - for (i = 0; i < tpo->tpo_pool.po_size; i++) { - tx = &tpo->tpo_tx_descs[i]; - kiblnd_dma_unmap_single(hdev->ibh_ibdev, - KIBLND_UNMAP_ADDR(tx, tx_msgunmap, - tx->tx_msgaddr), - IBLND_MSG_SIZE, DMA_TO_DEVICE); - } - - kiblnd_hdev_decref(hdev); - tpo->tpo_hdev = NULL; -} - -static struct kib_hca_dev *kiblnd_current_hdev(struct kib_dev *dev) -{ - struct kib_hca_dev *hdev; - unsigned long flags; - int i = 0; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (dev->ibd_failover) { - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (!(i++ % 50)) - CDEBUG(D_NET, "%s: Wait for failover\n", - dev->ibd_ifname); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 100); - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - - kiblnd_hdev_addref_locked(dev->ibd_hdev); - hdev = dev->ibd_hdev; - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - return hdev; -} - -static void kiblnd_map_tx_pool(struct kib_tx_pool *tpo) -{ - struct kib_pages *txpgs = tpo->tpo_tx_pages; - struct kib_pool *pool = &tpo->tpo_pool; - struct kib_net *net = pool->po_owner->ps_net; - struct kib_dev *dev; - struct page *page; - struct kib_tx *tx; - int page_offset; - int ipage; - int i; - - LASSERT(net); - - dev = net->ibn_dev; - - /* pre-mapped messages are not bigger than 1 page */ - BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE); - - tpo->tpo_hdev = kiblnd_current_hdev(dev); - - for (ipage = page_offset = i = 0; i < pool->po_size; i++) { - page = txpgs->ibp_pages[ipage]; - tx = &tpo->tpo_tx_descs[i]; - - tx->tx_msg = (struct kib_msg *)(((char *)page_address(page)) + - page_offset); - - tx->tx_msgaddr = kiblnd_dma_map_single( - tpo->tpo_hdev->ibh_ibdev, tx->tx_msg, - IBLND_MSG_SIZE, DMA_TO_DEVICE); - LASSERT(!kiblnd_dma_mapping_error(tpo->tpo_hdev->ibh_ibdev, - tx->tx_msgaddr)); - KIBLND_UNMAP_ADDR_SET(tx, tx_msgunmap, tx->tx_msgaddr); - - list_add(&tx->tx_list, &pool->po_free_list); - - page_offset += IBLND_MSG_SIZE; - LASSERT(page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT(ipage <= txpgs->ibp_npages); - } - } -} - -static void kiblnd_destroy_fmr_pool(struct kib_fmr_pool *fpo) -{ - LASSERT(!fpo->fpo_map_count); - - if (fpo->fpo_is_fmr) { - if (fpo->fmr.fpo_fmr_pool) - ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool); - } else { - struct kib_fast_reg_descriptor *frd, *tmp; - int i = 0; - - list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, - frd_list) { - list_del(&frd->frd_list); - ib_dereg_mr(frd->frd_mr); - kfree(frd); - i++; - } - if (i < fpo->fast_reg.fpo_pool_size) - CERROR("FastReg pool still has %d regions registered\n", - fpo->fast_reg.fpo_pool_size - i); - } - - if (fpo->fpo_hdev) - kiblnd_hdev_decref(fpo->fpo_hdev); - - kfree(fpo); -} - -static void kiblnd_destroy_fmr_pool_list(struct list_head *head) -{ - struct kib_fmr_pool *fpo, *tmp; - - list_for_each_entry_safe(fpo, tmp, head, fpo_list) { - list_del(&fpo->fpo_list); - kiblnd_destroy_fmr_pool(fpo); - } -} - -static int -kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables, - int ncpts) -{ - int size = tunables->lnd_fmr_pool_size / ncpts; - - return max(IBLND_FMR_POOL, size); -} - -static int -kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, - int ncpts) -{ - int size = tunables->lnd_fmr_flush_trigger / ncpts; - - return max(IBLND_FMR_POOL_FLUSH, size); -} - -static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) -{ - struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD / PAGE_SIZE, - .page_shift = PAGE_SHIFT, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE), - .pool_size = fps->fps_pool_size, - .dirty_watermark = fps->fps_flush_trigger, - .flush_function = NULL, - .flush_arg = NULL, - .cache = !!fps->fps_cache }; - int rc = 0; - - fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, - ¶m); - if (IS_ERR(fpo->fmr.fpo_fmr_pool)) { - rc = PTR_ERR(fpo->fmr.fpo_fmr_pool); - if (rc != -ENOSYS) - CERROR("Failed to create FMR pool: %d\n", rc); - else - CERROR("FMRs are not supported\n"); - } - - return rc; -} - -static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) -{ - struct kib_fast_reg_descriptor *frd, *tmp; - int i, rc; - - INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list); - fpo->fast_reg.fpo_pool_size = 0; - for (i = 0; i < fps->fps_pool_size; i++) { - frd = kzalloc_cpt(sizeof(*frd), GFP_NOFS, fps->fps_cpt); - if (!frd) { - CERROR("Failed to allocate a new fast_reg descriptor\n"); - rc = -ENOMEM; - goto out; - } - - frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd, - IB_MR_TYPE_MEM_REG, - LNET_MAX_PAYLOAD / PAGE_SIZE); - if (IS_ERR(frd->frd_mr)) { - rc = PTR_ERR(frd->frd_mr); - CERROR("Failed to allocate ib_alloc_mr: %d\n", rc); - frd->frd_mr = NULL; - goto out_middle; - } - - frd->frd_valid = true; - - list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); - fpo->fast_reg.fpo_pool_size++; - } - - return 0; - -out_middle: - if (frd->frd_mr) - ib_dereg_mr(frd->frd_mr); - kfree(frd); - -out: - list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, - frd_list) { - list_del(&frd->frd_list); - ib_dereg_mr(frd->frd_mr); - kfree(frd); - } - - return rc; -} - -static int kiblnd_create_fmr_pool(struct kib_fmr_poolset *fps, - struct kib_fmr_pool **pp_fpo) -{ - struct kib_dev *dev = fps->fps_net->ibn_dev; - struct ib_device_attr *dev_attr; - struct kib_fmr_pool *fpo; - int rc; - - fpo = kzalloc_cpt(sizeof(*fpo), GFP_NOFS, fps->fps_cpt); - if (!fpo) - return -ENOMEM; - - fpo->fpo_hdev = kiblnd_current_hdev(dev); - dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs; - - /* Check for FMR or FastReg support */ - fpo->fpo_is_fmr = 0; - if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr && - fpo->fpo_hdev->ibh_ibdev->dealloc_fmr && - fpo->fpo_hdev->ibh_ibdev->map_phys_fmr && - fpo->fpo_hdev->ibh_ibdev->unmap_fmr) { - LCONSOLE_INFO("Using FMR for registration\n"); - fpo->fpo_is_fmr = 1; - } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - LCONSOLE_INFO("Using FastReg for registration\n"); - } else { - rc = -ENOSYS; - LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n"); - goto out_fpo; - } - - if (fpo->fpo_is_fmr) - rc = kiblnd_alloc_fmr_pool(fps, fpo); - else - rc = kiblnd_alloc_freg_pool(fps, fpo); - if (rc) - goto out_fpo; - - fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ; - fpo->fpo_owner = fps; - *pp_fpo = fpo; - - return 0; - -out_fpo: - kiblnd_hdev_decref(fpo->fpo_hdev); - kfree(fpo); - return rc; -} - -static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps, - struct list_head *zombies) -{ - if (!fps->fps_net) /* initialized? */ - return; - - spin_lock(&fps->fps_lock); - - while (!list_empty(&fps->fps_pool_list)) { - struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next, - struct kib_fmr_pool, fpo_list); - fpo->fpo_failed = 1; - list_del(&fpo->fpo_list); - if (!fpo->fpo_map_count) - list_add(&fpo->fpo_list, zombies); - else - list_add(&fpo->fpo_list, &fps->fps_failed_pool_list); - } - - spin_unlock(&fps->fps_lock); -} - -static void kiblnd_fini_fmr_poolset(struct kib_fmr_poolset *fps) -{ - if (fps->fps_net) { /* initialized? */ - kiblnd_destroy_fmr_pool_list(&fps->fps_failed_pool_list); - kiblnd_destroy_fmr_pool_list(&fps->fps_pool_list); - } -} - -static int -kiblnd_init_fmr_poolset(struct kib_fmr_poolset *fps, int cpt, int ncpts, - struct kib_net *net, - struct lnet_ioctl_config_o2iblnd_tunables *tunables) -{ - struct kib_fmr_pool *fpo; - int rc; - - memset(fps, 0, sizeof(*fps)); - - fps->fps_net = net; - fps->fps_cpt = cpt; - - fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts); - fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts); - fps->fps_cache = tunables->lnd_fmr_cache; - - spin_lock_init(&fps->fps_lock); - INIT_LIST_HEAD(&fps->fps_pool_list); - INIT_LIST_HEAD(&fps->fps_failed_pool_list); - - rc = kiblnd_create_fmr_pool(fps, &fpo); - if (!rc) - list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); - - return rc; -} - -static int kiblnd_fmr_pool_is_idle(struct kib_fmr_pool *fpo, unsigned long now) -{ - if (fpo->fpo_map_count) /* still in use */ - return 0; - if (fpo->fpo_failed) - return 1; - return time_after_eq(now, fpo->fpo_deadline); -} - -static int -kiblnd_map_tx_pages(struct kib_tx *tx, struct kib_rdma_desc *rd) -{ - __u64 *pages = tx->tx_pages; - struct kib_hca_dev *hdev; - int npages; - int size; - int i; - - hdev = tx->tx_pool->tpo_hdev; - - for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { - for (size = 0; size < rd->rd_frags[i].rf_nob; - size += hdev->ibh_page_size) { - pages[npages++] = (rd->rd_frags[i].rf_addr & - hdev->ibh_page_mask) + size; - } - } - - return npages; -} - -void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status) -{ - LIST_HEAD(zombies); - struct kib_fmr_pool *fpo = fmr->fmr_pool; - struct kib_fmr_poolset *fps; - unsigned long now = jiffies; - struct kib_fmr_pool *tmp; - int rc; - - if (!fpo) - return; - - fps = fpo->fpo_owner; - if (fpo->fpo_is_fmr) { - if (fmr->fmr_pfmr) { - rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); - LASSERT(!rc); - fmr->fmr_pfmr = NULL; - } - - if (status) { - rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool); - LASSERT(!rc); - } - } else { - struct kib_fast_reg_descriptor *frd = fmr->fmr_frd; - - if (frd) { - frd->frd_valid = false; - spin_lock(&fps->fps_lock); - list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); - spin_unlock(&fps->fps_lock); - fmr->fmr_frd = NULL; - } - } - fmr->fmr_pool = NULL; - - spin_lock(&fps->fps_lock); - fpo->fpo_map_count--; /* decref the pool */ - - list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) { - /* the first pool is persistent */ - if (fps->fps_pool_list.next == &fpo->fpo_list) - continue; - - if (kiblnd_fmr_pool_is_idle(fpo, now)) { - list_move(&fpo->fpo_list, &zombies); - fps->fps_version++; - } - } - spin_unlock(&fps->fps_lock); - - if (!list_empty(&zombies)) - kiblnd_destroy_fmr_pool_list(&zombies); -} - -int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, - struct kib_rdma_desc *rd, __u32 nob, __u64 iov, - struct kib_fmr *fmr) -{ - __u64 *pages = tx->tx_pages; - bool is_rx = (rd != tx->tx_rd); - bool tx_pages_mapped = false; - struct kib_fmr_pool *fpo; - int npages = 0; - __u64 version; - int rc; - - again: - spin_lock(&fps->fps_lock); - version = fps->fps_version; - list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { - fpo->fpo_deadline = jiffies + IBLND_POOL_DEADLINE * HZ; - fpo->fpo_map_count++; - - if (fpo->fpo_is_fmr) { - struct ib_pool_fmr *pfmr; - - spin_unlock(&fps->fps_lock); - - if (!tx_pages_mapped) { - npages = kiblnd_map_tx_pages(tx, rd); - tx_pages_mapped = 1; - } - - pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, - pages, npages, iov); - if (likely(!IS_ERR(pfmr))) { - fmr->fmr_key = is_rx ? pfmr->fmr->rkey : - pfmr->fmr->lkey; - fmr->fmr_frd = NULL; - fmr->fmr_pfmr = pfmr; - fmr->fmr_pool = fpo; - return 0; - } - rc = PTR_ERR(pfmr); - } else { - if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { - struct kib_fast_reg_descriptor *frd; - struct ib_reg_wr *wr; - struct ib_mr *mr; - int n; - - frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, - struct kib_fast_reg_descriptor, - frd_list); - list_del(&frd->frd_list); - spin_unlock(&fps->fps_lock); - - mr = frd->frd_mr; - - if (!frd->frd_valid) { - __u32 key = is_rx ? mr->rkey : mr->lkey; - struct ib_send_wr *inv_wr; - - inv_wr = &frd->frd_inv_wr; - memset(inv_wr, 0, sizeof(*inv_wr)); - inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = IBLND_WID_MR; - inv_wr->ex.invalidate_rkey = key; - - /* Bump the key */ - key = ib_inc_rkey(key); - ib_update_fast_reg_key(mr, key); - } - - n = ib_map_mr_sg(mr, tx->tx_frags, - tx->tx_nfrags, NULL, PAGE_SIZE); - if (unlikely(n != tx->tx_nfrags)) { - CERROR("Failed to map mr %d/%d elements\n", - n, tx->tx_nfrags); - return n < 0 ? n : -EINVAL; - } - - mr->iova = iov; - - /* Prepare FastReg WR */ - wr = &frd->frd_fastreg_wr; - memset(wr, 0, sizeof(*wr)); - wr->wr.opcode = IB_WR_REG_MR; - wr->wr.wr_id = IBLND_WID_MR; - wr->wr.num_sge = 0; - wr->wr.send_flags = 0; - wr->mr = mr; - wr->key = is_rx ? mr->rkey : mr->lkey; - wr->access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - - fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; - fmr->fmr_frd = frd; - fmr->fmr_pfmr = NULL; - fmr->fmr_pool = fpo; - return 0; - } - spin_unlock(&fps->fps_lock); - rc = -EAGAIN; - } - - spin_lock(&fps->fps_lock); - fpo->fpo_map_count--; - if (rc != -EAGAIN) { - spin_unlock(&fps->fps_lock); - return rc; - } - - /* EAGAIN and ... */ - if (version != fps->fps_version) { - spin_unlock(&fps->fps_lock); - goto again; - } - } - - if (fps->fps_increasing) { - spin_unlock(&fps->fps_lock); - CDEBUG(D_NET, "Another thread is allocating new FMR pool, waiting for her to complete\n"); - schedule(); - goto again; - } - - if (time_before(jiffies, fps->fps_next_retry)) { - /* someone failed recently */ - spin_unlock(&fps->fps_lock); - return -EAGAIN; - } - - fps->fps_increasing = 1; - spin_unlock(&fps->fps_lock); - - CDEBUG(D_NET, "Allocate new FMR pool\n"); - rc = kiblnd_create_fmr_pool(fps, &fpo); - spin_lock(&fps->fps_lock); - fps->fps_increasing = 0; - if (!rc) { - fps->fps_version++; - list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); - } else { - fps->fps_next_retry = jiffies + IBLND_POOL_RETRY * HZ; - } - spin_unlock(&fps->fps_lock); - - goto again; -} - -static void kiblnd_fini_pool(struct kib_pool *pool) -{ - LASSERT(list_empty(&pool->po_free_list)); - LASSERT(!pool->po_allocated); - - CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name); -} - -static void kiblnd_init_pool(struct kib_poolset *ps, struct kib_pool *pool, int size) -{ - CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name); - - memset(pool, 0, sizeof(*pool)); - INIT_LIST_HEAD(&pool->po_free_list); - pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ; - pool->po_owner = ps; - pool->po_size = size; -} - -static void kiblnd_destroy_pool_list(struct list_head *head) -{ - struct kib_pool *pool; - - while (!list_empty(head)) { - pool = list_entry(head->next, struct kib_pool, po_list); - list_del(&pool->po_list); - - LASSERT(pool->po_owner); - pool->po_owner->ps_pool_destroy(pool); - } -} - -static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies) -{ - if (!ps->ps_net) /* initialized? */ - return; - - spin_lock(&ps->ps_lock); - while (!list_empty(&ps->ps_pool_list)) { - struct kib_pool *po = list_entry(ps->ps_pool_list.next, - struct kib_pool, po_list); - po->po_failed = 1; - list_del(&po->po_list); - if (!po->po_allocated) - list_add(&po->po_list, zombies); - else - list_add(&po->po_list, &ps->ps_failed_pool_list); - } - spin_unlock(&ps->ps_lock); -} - -static void kiblnd_fini_poolset(struct kib_poolset *ps) -{ - if (ps->ps_net) { /* initialized? */ - kiblnd_destroy_pool_list(&ps->ps_failed_pool_list); - kiblnd_destroy_pool_list(&ps->ps_pool_list); - } -} - -static int kiblnd_init_poolset(struct kib_poolset *ps, int cpt, - struct kib_net *net, char *name, int size, - kib_ps_pool_create_t po_create, - kib_ps_pool_destroy_t po_destroy, - kib_ps_node_init_t nd_init, - kib_ps_node_fini_t nd_fini) -{ - struct kib_pool *pool; - int rc; - - memset(ps, 0, sizeof(*ps)); - - ps->ps_cpt = cpt; - ps->ps_net = net; - ps->ps_pool_create = po_create; - ps->ps_pool_destroy = po_destroy; - ps->ps_node_init = nd_init; - ps->ps_node_fini = nd_fini; - ps->ps_pool_size = size; - if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name)) - >= sizeof(ps->ps_name)) - return -E2BIG; - spin_lock_init(&ps->ps_lock); - INIT_LIST_HEAD(&ps->ps_pool_list); - INIT_LIST_HEAD(&ps->ps_failed_pool_list); - - rc = ps->ps_pool_create(ps, size, &pool); - if (!rc) - list_add(&pool->po_list, &ps->ps_pool_list); - else - CERROR("Failed to create the first pool for %s\n", ps->ps_name); - - return rc; -} - -static int kiblnd_pool_is_idle(struct kib_pool *pool, unsigned long now) -{ - if (pool->po_allocated) /* still in use */ - return 0; - if (pool->po_failed) - return 1; - return time_after_eq(now, pool->po_deadline); -} - -void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node) -{ - LIST_HEAD(zombies); - struct kib_poolset *ps = pool->po_owner; - struct kib_pool *tmp; - unsigned long now = jiffies; - - spin_lock(&ps->ps_lock); - - if (ps->ps_node_fini) - ps->ps_node_fini(pool, node); - - LASSERT(pool->po_allocated > 0); - list_add(node, &pool->po_free_list); - pool->po_allocated--; - - list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) { - /* the first pool is persistent */ - if (ps->ps_pool_list.next == &pool->po_list) - continue; - - if (kiblnd_pool_is_idle(pool, now)) - list_move(&pool->po_list, &zombies); - } - spin_unlock(&ps->ps_lock); - - if (!list_empty(&zombies)) - kiblnd_destroy_pool_list(&zombies); -} - -struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps) -{ - struct list_head *node; - struct kib_pool *pool; - unsigned int interval = 1; - unsigned long time_before; - unsigned int trips = 0; - int rc; - - again: - spin_lock(&ps->ps_lock); - list_for_each_entry(pool, &ps->ps_pool_list, po_list) { - if (list_empty(&pool->po_free_list)) - continue; - - pool->po_allocated++; - pool->po_deadline = jiffies + IBLND_POOL_DEADLINE * HZ; - node = pool->po_free_list.next; - list_del(node); - - if (ps->ps_node_init) { - /* still hold the lock */ - ps->ps_node_init(pool, node); - } - spin_unlock(&ps->ps_lock); - return node; - } - - /* no available tx pool and ... */ - if (ps->ps_increasing) { - /* another thread is allocating a new pool */ - spin_unlock(&ps->ps_lock); - trips++; - CDEBUG(D_NET, "Another thread is allocating new %s pool, waiting %d HZs for her to complete. trips = %d\n", - ps->ps_name, interval, trips); - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(interval); - if (interval < HZ) - interval *= 2; - - goto again; - } - - if (time_before(jiffies, ps->ps_next_retry)) { - /* someone failed recently */ - spin_unlock(&ps->ps_lock); - return NULL; - } - - ps->ps_increasing = 1; - spin_unlock(&ps->ps_lock); - - CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); - time_before = jiffies; - rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); - CDEBUG(D_NET, "ps_pool_create took %lu HZ to complete", - jiffies - time_before); - - spin_lock(&ps->ps_lock); - ps->ps_increasing = 0; - if (!rc) { - list_add_tail(&pool->po_list, &ps->ps_pool_list); - } else { - ps->ps_next_retry = jiffies + IBLND_POOL_RETRY * HZ; - CERROR("Can't allocate new %s pool because out of memory\n", - ps->ps_name); - } - spin_unlock(&ps->ps_lock); - - goto again; -} - -static void kiblnd_destroy_tx_pool(struct kib_pool *pool) -{ - struct kib_tx_pool *tpo = container_of(pool, struct kib_tx_pool, tpo_pool); - int i; - - LASSERT(!pool->po_allocated); - - if (tpo->tpo_tx_pages) { - kiblnd_unmap_tx_pool(tpo); - kiblnd_free_pages(tpo->tpo_tx_pages); - } - - if (!tpo->tpo_tx_descs) - goto out; - - for (i = 0; i < pool->po_size; i++) { - struct kib_tx *tx = &tpo->tpo_tx_descs[i]; - - list_del(&tx->tx_list); - kfree(tx->tx_pages); - kfree(tx->tx_frags); - kfree(tx->tx_wrq); - kfree(tx->tx_sge); - kfree(tx->tx_rd); - } - - kfree(tpo->tpo_tx_descs); -out: - kiblnd_fini_pool(pool); - kfree(tpo); -} - -static int kiblnd_tx_pool_size(int ncpts) -{ - int ntx = *kiblnd_tunables.kib_ntx / ncpts; - - return max(IBLND_TX_POOL, ntx); -} - -static int kiblnd_create_tx_pool(struct kib_poolset *ps, int size, - struct kib_pool **pp_po) -{ - int i; - int npg; - struct kib_pool *pool; - struct kib_tx_pool *tpo; - - tpo = kzalloc_cpt(sizeof(*tpo), GFP_NOFS, ps->ps_cpt); - if (!tpo) { - CERROR("Failed to allocate TX pool\n"); - return -ENOMEM; - } - - pool = &tpo->tpo_pool; - kiblnd_init_pool(ps, pool, size); - tpo->tpo_tx_descs = NULL; - tpo->tpo_tx_pages = NULL; - - npg = DIV_ROUND_UP(size * IBLND_MSG_SIZE, PAGE_SIZE); - if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg)) { - CERROR("Can't allocate tx pages: %d\n", npg); - kfree(tpo); - return -ENOMEM; - } - - tpo->tpo_tx_descs = kzalloc_cpt(size * sizeof(struct kib_tx), - GFP_NOFS, ps->ps_cpt); - if (!tpo->tpo_tx_descs) { - CERROR("Can't allocate %d tx descriptors\n", size); - ps->ps_pool_destroy(pool); - return -ENOMEM; - } - - memset(tpo->tpo_tx_descs, 0, size * sizeof(struct kib_tx)); - - for (i = 0; i < size; i++) { - struct kib_tx *tx = &tpo->tpo_tx_descs[i]; - - tx->tx_pool = tpo; - if (ps->ps_net->ibn_fmr_ps) { - tx->tx_pages = kzalloc_cpt(LNET_MAX_IOV * sizeof(*tx->tx_pages), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_pages) - break; - } - - tx->tx_frags = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_frags), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_frags) - break; - - sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1); - - tx->tx_wrq = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_wrq) - break; - - tx->tx_sge = kzalloc_cpt((1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_sge) - break; - - tx->tx_rd = kzalloc_cpt(offsetof(struct kib_rdma_desc, - rd_frags[IBLND_MAX_RDMA_FRAGS]), - GFP_NOFS, ps->ps_cpt); - if (!tx->tx_rd) - break; - } - - if (i == size) { - kiblnd_map_tx_pool(tpo); - *pp_po = pool; - return 0; - } - - ps->ps_pool_destroy(pool); - return -ENOMEM; -} - -static void kiblnd_tx_init(struct kib_pool *pool, struct list_head *node) -{ - struct kib_tx_poolset *tps = container_of(pool->po_owner, - struct kib_tx_poolset, - tps_poolset); - struct kib_tx *tx = list_entry(node, struct kib_tx, tx_list); - - tx->tx_cookie = tps->tps_next_tx_cookie++; -} - -static void kiblnd_net_fini_pools(struct kib_net *net) -{ - int i; - - cfs_cpt_for_each(i, lnet_cpt_table()) { - struct kib_tx_poolset *tps; - struct kib_fmr_poolset *fps; - - if (net->ibn_tx_ps) { - tps = net->ibn_tx_ps[i]; - kiblnd_fini_poolset(&tps->tps_poolset); - } - - if (net->ibn_fmr_ps) { - fps = net->ibn_fmr_ps[i]; - kiblnd_fini_fmr_poolset(fps); - } - } - - if (net->ibn_tx_ps) { - cfs_percpt_free(net->ibn_tx_ps); - net->ibn_tx_ps = NULL; - } - - if (net->ibn_fmr_ps) { - cfs_percpt_free(net->ibn_fmr_ps); - net->ibn_fmr_ps = NULL; - } -} - -static int kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni, - __u32 *cpts, int ncpts) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int cpt; - int rc; - int i; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { - CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", - tunables->lnd_fmr_pool_size, - *kiblnd_tunables.kib_ntx / 4); - rc = -EINVAL; - goto failed; - } - - /* - * TX pool must be created later than FMR, see LU-2268 - * for details - */ - LASSERT(!net->ibn_tx_ps); - - /* - * premapping can fail if ibd_nmr > 1, so we always create - * FMR pool and map-on-demand if premapping failed - * - * cfs_precpt_alloc is creating an array of struct kib_fmr_poolset - * The number of struct kib_fmr_poolsets create is equal to the - * number of CPTs that exist, i.e net->ibn_fmr_ps[cpt]. - */ - net->ibn_fmr_ps = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct kib_fmr_poolset)); - if (!net->ibn_fmr_ps) { - CERROR("Failed to allocate FMR pool array\n"); - rc = -ENOMEM; - goto failed; - } - - for (i = 0; i < ncpts; i++) { - cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts, - net, tunables); - if (rc) { - CERROR("Can't initialize FMR pool for CPT %d: %d\n", - cpt, rc); - goto failed; - } - } - - if (i > 0) - LASSERT(i == ncpts); - - /* - * cfs_precpt_alloc is creating an array of struct kib_tx_poolset - * The number of struct kib_tx_poolsets create is equal to the - * number of CPTs that exist, i.e net->ibn_tx_ps[cpt]. - */ - net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct kib_tx_poolset)); - if (!net->ibn_tx_ps) { - CERROR("Failed to allocate tx pool array\n"); - rc = -ENOMEM; - goto failed; - } - - for (i = 0; i < ncpts; i++) { - cpt = !cpts ? i : cpts[i]; - rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, - cpt, net, "TX", - kiblnd_tx_pool_size(ncpts), - kiblnd_create_tx_pool, - kiblnd_destroy_tx_pool, - kiblnd_tx_init, NULL); - if (rc) { - CERROR("Can't initialize TX pool for CPT %d: %d\n", - cpt, rc); - goto failed; - } - } - - return 0; - failed: - kiblnd_net_fini_pools(net); - LASSERT(rc); - return rc; -} - -static int kiblnd_hdev_get_attr(struct kib_hca_dev *hdev) -{ - /* - * It's safe to assume a HCA can handle a page size - * matching that of the native system - */ - hdev->ibh_page_shift = PAGE_SHIFT; - hdev->ibh_page_size = 1 << PAGE_SHIFT; - hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); - - hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size; - if (hdev->ibh_mr_size == ~0ULL) { - hdev->ibh_mr_shift = 64; - return 0; - } - - CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); - return -EINVAL; -} - -void kiblnd_hdev_destroy(struct kib_hca_dev *hdev) -{ - if (hdev->ibh_pd) - ib_dealloc_pd(hdev->ibh_pd); - - if (hdev->ibh_cmid) - rdma_destroy_id(hdev->ibh_cmid); - - kfree(hdev); -} - -/* DUMMY */ -static int kiblnd_dummy_callback(struct rdma_cm_id *cmid, - struct rdma_cm_event *event) -{ - return 0; -} - -static int kiblnd_dev_need_failover(struct kib_dev *dev) -{ - struct rdma_cm_id *cmid; - struct sockaddr_in srcaddr; - struct sockaddr_in dstaddr; - int rc; - - if (!dev->ibd_hdev || /* initializing */ - !dev->ibd_hdev->ibh_cmid || /* listener is dead */ - *kiblnd_tunables.kib_dev_failover > 1) /* debugging */ - return 1; - - /* - * XXX: it's UGLY, but I don't have better way to find - * ib-bonding HCA failover because: - * - * a. no reliable CM event for HCA failover... - * b. no OFED API to get ib_device for current net_device... - * - * We have only two choices at this point: - * - * a. rdma_bind_addr(), it will conflict with listener cmid - * b. rdma_resolve_addr() to zero addr - */ - cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP, - IB_QPT_RC); - if (IS_ERR(cmid)) { - rc = PTR_ERR(cmid); - CERROR("Failed to create cmid for failover: %d\n", rc); - return rc; - } - - memset(&srcaddr, 0, sizeof(srcaddr)); - srcaddr.sin_family = AF_INET; - srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip); - - memset(&dstaddr, 0, sizeof(dstaddr)); - dstaddr.sin_family = AF_INET; - rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, 1); - if (rc || !cmid->device) { - CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", - dev->ibd_ifname, &dev->ibd_ifip, - cmid->device, rc); - rdma_destroy_id(cmid); - return rc; - } - - rc = dev->ibd_hdev->ibh_ibdev != cmid->device; /* true for failover */ - rdma_destroy_id(cmid); - - return rc; -} - -int kiblnd_dev_failover(struct kib_dev *dev) -{ - LIST_HEAD(zombie_tpo); - LIST_HEAD(zombie_ppo); - LIST_HEAD(zombie_fpo); - struct rdma_cm_id *cmid = NULL; - struct kib_hca_dev *hdev = NULL; - struct ib_pd *pd; - struct kib_net *net; - struct sockaddr_in addr; - unsigned long flags; - int rc = 0; - int i; - - LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || - dev->ibd_can_failover || !dev->ibd_hdev); - - rc = kiblnd_dev_need_failover(dev); - if (rc <= 0) - goto out; - - if (dev->ibd_hdev && - dev->ibd_hdev->ibh_cmid) { - /* - * XXX it's not good to close old listener at here, - * because we can fail to create new listener. - * But we have to close it now, otherwise rdma_bind_addr - * will return EADDRINUSE... How crap! - */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - cmid = dev->ibd_hdev->ibh_cmid; - /* - * make next schedule of kiblnd_dev_need_failover() - * return 1 for me - */ - dev->ibd_hdev->ibh_cmid = NULL; - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - rdma_destroy_id(cmid); - } - - cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP, - IB_QPT_RC); - if (IS_ERR(cmid)) { - rc = PTR_ERR(cmid); - CERROR("Failed to create cmid for failover: %d\n", rc); - goto out; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(dev->ibd_ifip); - addr.sin_port = htons(*kiblnd_tunables.kib_service); - - /* Bind to failover device or port */ - rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); - if (rc || !cmid->device) { - CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", - dev->ibd_ifname, &dev->ibd_ifip, - cmid->device, rc); - rdma_destroy_id(cmid); - goto out; - } - - hdev = kzalloc(sizeof(*hdev), GFP_NOFS); - if (!hdev) { - CERROR("Failed to allocate kib_hca_dev\n"); - rdma_destroy_id(cmid); - rc = -ENOMEM; - goto out; - } - - atomic_set(&hdev->ibh_ref, 1); - hdev->ibh_dev = dev; - hdev->ibh_cmid = cmid; - hdev->ibh_ibdev = cmid->device; - - pd = ib_alloc_pd(cmid->device, 0); - if (IS_ERR(pd)) { - rc = PTR_ERR(pd); - CERROR("Can't allocate PD: %d\n", rc); - goto out; - } - - hdev->ibh_pd = pd; - - rc = rdma_listen(cmid, 0); - if (rc) { - CERROR("Can't start new listener: %d\n", rc); - goto out; - } - - rc = kiblnd_hdev_get_attr(hdev); - if (rc) { - CERROR("Can't get device attributes: %d\n", rc); - goto out; - } - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - swap(dev->ibd_hdev, hdev); /* take over the refcount */ - - list_for_each_entry(net, &dev->ibd_nets, ibn_list) { - cfs_cpt_for_each(i, lnet_cpt_table()) { - kiblnd_fail_poolset(&net->ibn_tx_ps[i]->tps_poolset, - &zombie_tpo); - - if (net->ibn_fmr_ps) - kiblnd_fail_fmr_poolset(net->ibn_fmr_ps[i], - &zombie_fpo); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - out: - if (!list_empty(&zombie_tpo)) - kiblnd_destroy_pool_list(&zombie_tpo); - if (!list_empty(&zombie_ppo)) - kiblnd_destroy_pool_list(&zombie_ppo); - if (!list_empty(&zombie_fpo)) - kiblnd_destroy_fmr_pool_list(&zombie_fpo); - if (hdev) - kiblnd_hdev_decref(hdev); - - if (rc) - dev->ibd_failed_failover++; - else - dev->ibd_failed_failover = 0; - - return rc; -} - -void kiblnd_destroy_dev(struct kib_dev *dev) -{ - LASSERT(!dev->ibd_nnets); - LASSERT(list_empty(&dev->ibd_nets)); - - list_del(&dev->ibd_fail_list); - list_del(&dev->ibd_list); - - if (dev->ibd_hdev) - kiblnd_hdev_decref(dev->ibd_hdev); - - kfree(dev); -} - -static struct kib_dev *kiblnd_create_dev(char *ifname) -{ - struct net_device *netdev; - struct kib_dev *dev; - __u32 netmask; - __u32 ip; - int up; - int rc; - - rc = lnet_ipif_query(ifname, &up, &ip, &netmask); - if (rc) { - CERROR("Can't query IPoIB interface %s: %d\n", - ifname, rc); - return NULL; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ifname); - return NULL; - } - - dev = kzalloc(sizeof(*dev), GFP_NOFS); - if (!dev) - return NULL; - - netdev = dev_get_by_name(&init_net, ifname); - if (!netdev) { - dev->ibd_can_failover = 0; - } else { - dev->ibd_can_failover = !!(netdev->flags & IFF_MASTER); - dev_put(netdev); - } - - INIT_LIST_HEAD(&dev->ibd_nets); - INIT_LIST_HEAD(&dev->ibd_list); /* not yet in kib_devs */ - INIT_LIST_HEAD(&dev->ibd_fail_list); - dev->ibd_ifip = ip; - strcpy(&dev->ibd_ifname[0], ifname); - - /* initialize the device */ - rc = kiblnd_dev_failover(dev); - if (rc) { - CERROR("Can't initialize device: %d\n", rc); - kfree(dev); - return NULL; - } - - list_add_tail(&dev->ibd_list, &kiblnd_data.kib_devs); - return dev; -} - -static void kiblnd_base_shutdown(void) -{ - struct kib_sched_info *sched; - int i; - - LASSERT(list_empty(&kiblnd_data.kib_devs)); - - switch (kiblnd_data.kib_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - case IBLND_INIT_DATA: - LASSERT(kiblnd_data.kib_peers); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - LASSERT(list_empty(&kiblnd_data.kib_peers[i])); - LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); - LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); - LASSERT(list_empty(&kiblnd_data.kib_reconn_list)); - LASSERT(list_empty(&kiblnd_data.kib_reconn_wait)); - - /* flag threads to terminate; wake and wait for them to die */ - kiblnd_data.kib_shutdown = 1; - - /* - * NB: we really want to stop scheduler threads net by net - * instead of the whole module, this should be improved - * with dynamic configuration LNet - */ - cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) - wake_up_all(&sched->ibs_waitq); - - wake_up_all(&kiblnd_data.kib_connd_waitq); - wake_up_all(&kiblnd_data.kib_failover_waitq); - - i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads)) { - i++; - /* power of 2 ? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d threads to terminate\n", - atomic_read(&kiblnd_data.kib_nthreads)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - - /* fall through */ - - case IBLND_INIT_NOTHING: - break; - } - - kvfree(kiblnd_data.kib_peers); - - if (kiblnd_data.kib_scheds) - cfs_percpt_free(kiblnd_data.kib_scheds); - - kiblnd_data.kib_init = IBLND_INIT_NOTHING; - module_put(THIS_MODULE); -} - -static void kiblnd_shutdown(struct lnet_ni *ni) -{ - struct kib_net *net = ni->ni_data; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - int i; - unsigned long flags; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); - - if (!net) - goto out; - - write_lock_irqsave(g_lock, flags); - net->ibn_shutdown = 1; - write_unlock_irqrestore(g_lock, flags); - - switch (net->ibn_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - /* nuke all existing peers within this net */ - kiblnd_del_peer(ni, LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&net->ibn_npeers)) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "%s: waiting for %d peers to disconnect\n", - libcfs_nid2str(ni->ni_nid), - atomic_read(&net->ibn_npeers)); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - - kiblnd_net_fini_pools(net); - - write_lock_irqsave(g_lock, flags); - LASSERT(net->ibn_dev->ibd_nnets > 0); - net->ibn_dev->ibd_nnets--; - list_del(&net->ibn_list); - write_unlock_irqrestore(g_lock, flags); - - /* fall through */ - - case IBLND_INIT_NOTHING: - LASSERT(!atomic_read(&net->ibn_nconns)); - - if (net->ibn_dev && !net->ibn_dev->ibd_nnets) - kiblnd_destroy_dev(net->ibn_dev); - - break; - } - - net->ibn_init = IBLND_INIT_NOTHING; - ni->ni_data = NULL; - - kfree(net); - -out: - if (list_empty(&kiblnd_data.kib_devs)) - kiblnd_base_shutdown(); -} - -static int kiblnd_base_startup(void) -{ - struct kib_sched_info *sched; - int rc; - int i; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING); - - try_module_get(THIS_MODULE); - /* zero pointers, flags etc */ - memset(&kiblnd_data, 0, sizeof(kiblnd_data)); - - rwlock_init(&kiblnd_data.kib_global_lock); - - INIT_LIST_HEAD(&kiblnd_data.kib_devs); - INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs); - - kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; - kiblnd_data.kib_peers = kvmalloc_array(kiblnd_data.kib_peer_hash_size, - sizeof(struct list_head), - GFP_KERNEL); - if (!kiblnd_data.kib_peers) - goto failed; - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); - - spin_lock_init(&kiblnd_data.kib_connd_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); - INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list); - INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait); - - init_waitqueue_head(&kiblnd_data.kib_connd_waitq); - init_waitqueue_head(&kiblnd_data.kib_failover_waitq); - - kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*sched)); - if (!kiblnd_data.kib_scheds) - goto failed; - - cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) { - int nthrs; - - spin_lock_init(&sched->ibs_lock); - INIT_LIST_HEAD(&sched->ibs_conns); - init_waitqueue_head(&sched->ibs_waitq); - - nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - if (*kiblnd_tunables.kib_nscheds > 0) { - nthrs = min(nthrs, *kiblnd_tunables.kib_nscheds); - } else { - /* - * max to half of CPUs, another half is reserved for - * upper layer modules - */ - nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); - } - - sched->ibs_nthreads_max = nthrs; - sched->ibs_cpt = i; - } - - kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; - - /* lists/ptrs/locks initialised */ - kiblnd_data.kib_init = IBLND_INIT_DATA; - /*****************************************************/ - - rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); - if (rc) { - CERROR("Can't spawn o2iblnd connd: %d\n", rc); - goto failed; - } - - if (*kiblnd_tunables.kib_dev_failover) - rc = kiblnd_thread_start(kiblnd_failover_thread, NULL, - "kiblnd_failover"); - - if (rc) { - CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kiblnd_data.kib_init = IBLND_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kiblnd_base_shutdown(); - return -ENETDOWN; -} - -static int kiblnd_start_schedulers(struct kib_sched_info *sched) -{ - int rc = 0; - int nthrs; - int i; - - if (!sched->ibs_nthreads) { - if (*kiblnd_tunables.kib_nscheds > 0) { - nthrs = sched->ibs_nthreads_max; - } else { - nthrs = cfs_cpt_weight(lnet_cpt_table(), - sched->ibs_cpt); - nthrs = min(max(IBLND_N_SCHED, nthrs >> 1), nthrs); - nthrs = min(IBLND_N_SCHED_HIGH, nthrs); - } - } else { - LASSERT(sched->ibs_nthreads <= sched->ibs_nthreads_max); - /* increase one thread if there is new interface */ - nthrs = sched->ibs_nthreads < sched->ibs_nthreads_max; - } - - for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - - id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); - snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", - KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); - if (!rc) - continue; - - CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", - sched->ibs_cpt, sched->ibs_nthreads + i, rc); - break; - } - - sched->ibs_nthreads += i; - return rc; -} - -static int kiblnd_dev_start_threads(struct kib_dev *dev, int newdev, __u32 *cpts, - int ncpts) -{ - int cpt; - int rc; - int i; - - for (i = 0; i < ncpts; i++) { - struct kib_sched_info *sched; - - cpt = !cpts ? i : cpts[i]; - sched = kiblnd_data.kib_scheds[cpt]; - - if (!newdev && sched->ibs_nthreads > 0) - continue; - - rc = kiblnd_start_schedulers(kiblnd_data.kib_scheds[cpt]); - if (rc) { - CERROR("Failed to start scheduler threads for %s\n", - dev->ibd_ifname); - return rc; - } - } - return 0; -} - -static struct kib_dev *kiblnd_dev_search(char *ifname) -{ - struct kib_dev *alias = NULL; - struct kib_dev *dev; - char *colon; - char *colon2; - - colon = strchr(ifname, ':'); - list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (!strcmp(&dev->ibd_ifname[0], ifname)) - return dev; - - if (alias) - continue; - - colon2 = strchr(dev->ibd_ifname, ':'); - if (colon) - *colon = 0; - if (colon2) - *colon2 = 0; - - if (!strcmp(&dev->ibd_ifname[0], ifname)) - alias = dev; - - if (colon) - *colon = ':'; - if (colon2) - *colon2 = ':'; - } - return alias; -} - -static int kiblnd_startup(struct lnet_ni *ni) -{ - char *ifname; - struct kib_dev *ibdev = NULL; - struct kib_net *net; - struct timespec64 tv; - unsigned long flags; - int rc; - int newdev; - - LASSERT(ni->ni_lnd == &the_o2iblnd); - - if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { - rc = kiblnd_base_startup(); - if (rc) - return rc; - } - - net = kzalloc(sizeof(*net), GFP_NOFS); - ni->ni_data = net; - if (!net) - goto net_failed; - - ktime_get_real_ts64(&tv); - net->ibn_incarnation = tv.tv_sec * USEC_PER_SEC + - tv.tv_nsec / NSEC_PER_USEC; - - rc = kiblnd_tunables_setup(ni); - if (rc) - goto net_failed; - - if (ni->ni_interfaces[0]) { - /* Use the IPoIB interface specified in 'networks=' */ - - BUILD_BUG_ON(LNET_MAX_INTERFACES <= 1); - if (ni->ni_interfaces[1]) { - CERROR("Multiple interfaces not supported\n"); - goto failed; - } - - ifname = ni->ni_interfaces[0]; - } else { - ifname = *kiblnd_tunables.kib_default_ipif; - } - - if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { - CERROR("IPoIB interface name too long: %s\n", ifname); - goto failed; - } - - ibdev = kiblnd_dev_search(ifname); - - newdev = !ibdev; - /* hmm...create kib_dev even for alias */ - if (!ibdev || strcmp(&ibdev->ibd_ifname[0], ifname)) - ibdev = kiblnd_create_dev(ifname); - - if (!ibdev) - goto failed; - - net->ibn_dev = ibdev; - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); - - rc = kiblnd_dev_start_threads(ibdev, newdev, - ni->ni_cpts, ni->ni_ncpts); - if (rc) - goto failed; - - rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts); - if (rc) { - CERROR("Failed to initialize NI pools: %d\n", rc); - goto failed; - } - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - ibdev->ibd_nnets++; - list_add_tail(&net->ibn_list, &ibdev->ibd_nets); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - net->ibn_init = IBLND_INIT_ALL; - - return 0; - -failed: - if (!net->ibn_dev && ibdev) - kiblnd_destroy_dev(ibdev); - -net_failed: - kiblnd_shutdown(ni); - - CDEBUG(D_NET, "%s failed\n", __func__); - return -ENETDOWN; -} - -static struct lnet_lnd the_o2iblnd = { - .lnd_type = O2IBLND, - .lnd_startup = kiblnd_startup, - .lnd_shutdown = kiblnd_shutdown, - .lnd_ctl = kiblnd_ctl, - .lnd_query = kiblnd_query, - .lnd_send = kiblnd_send, - .lnd_recv = kiblnd_recv, -}; - -static void __exit ko2iblnd_exit(void) -{ - lnet_unregister_lnd(&the_o2iblnd); -} - -static int __init ko2iblnd_init(void) -{ - int rc; - - BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE); - BUILD_BUG_ON(offsetof(struct kib_msg, - ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - > IBLND_MSG_SIZE); - BUILD_BUG_ON(offsetof(struct kib_msg, - ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - > IBLND_MSG_SIZE); - - kiblnd_tunables_init(); - - rc = libcfs_setup(); - if (rc) - return rc; - - lnet_register_lnd(&the_o2iblnd); - - return 0; -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("OpenIB gen2 LNet Network Driver"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(ko2iblnd_init); -module_exit(ko2iblnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h deleted file mode 100644 index 217503f125bc..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ /dev/null @@ -1,1048 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd.h - * - * Author: Eric Barton - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LND - -#include - -#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */ -/* # scheduler loops before reschedule */ -#define IBLND_RESCHED 100 - -#define IBLND_N_SCHED 2 -#define IBLND_N_SCHED_HIGH 4 - -struct kib_tunables { - int *kib_dev_failover; /* HCA failover */ - unsigned int *kib_service; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* exponentially increasing to this */ - int *kib_cksum; /* checksum struct kib_msg? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - char **kib_default_ipif; /* default IPoIB interface */ - int *kib_retry_count; - int *kib_rnr_retry_count; - int *kib_ib_mtu; /* IB MTU */ - int *kib_require_priv_port; /* accept only privileged ports */ - int *kib_use_priv_port; /* use privileged port for active connect */ - int *kib_nscheds; /* # threads on each CPT */ -}; - -extern struct kib_tunables kiblnd_tunables; - -#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */ -#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */ - -#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */ -#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1) /* Max # of peer credits */ - -/* when eagerly to return credits */ -#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ - IBLND_CREDIT_HIGHWATER_V1 : \ - t->lnd_peercredits_hiw) - -#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \ - cb, dev, \ - ps, qpt) - -/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */ -#define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) -#define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) - -#define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */ -#define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ -#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */ - -/************************/ -/* derived constants... */ -/* Pools (shared by connections on each CPT) */ -/* These pools can grow at runtime, so don't need give a very large value */ -#define IBLND_TX_POOL 256 -#define IBLND_FMR_POOL 256 -#define IBLND_FMR_POOL_FLUSH 192 - -#define IBLND_RX_MSGS(c) \ - ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version)) -#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE) -#define IBLND_RX_MSG_PAGES(c) \ - ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE) - -/* WRs and CQEs (per connection) */ -#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) -#define IBLND_SEND_WRS(c) \ - (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \ - kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni)) -#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) - -struct kib_hca_dev; - -/* o2iblnd can run over aliased interface */ -#ifdef IFALIASZ -#define KIB_IFNAME_SIZE IFALIASZ -#else -#define KIB_IFNAME_SIZE 256 -#endif - -struct kib_dev { - struct list_head ibd_list; /* chain on kib_devs */ - struct list_head ibd_fail_list; /* chain on kib_failed_devs */ - __u32 ibd_ifip; /* IPoIB interface IP */ - - /* IPoIB interface name */ - char ibd_ifname[KIB_IFNAME_SIZE]; - int ibd_nnets; /* # nets extant */ - - unsigned long ibd_next_failover; - int ibd_failed_failover; /* # failover failures */ - unsigned int ibd_failover; /* failover in progress */ - unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */ - struct list_head ibd_nets; - struct kib_hca_dev *ibd_hdev; -}; - -struct kib_hca_dev { - struct rdma_cm_id *ibh_cmid; /* listener cmid */ - struct ib_device *ibh_ibdev; /* IB device */ - int ibh_page_shift; /* page shift of current HCA */ - int ibh_page_size; /* page size of current HCA */ - __u64 ibh_page_mask; /* page mask of current HCA */ - int ibh_mr_shift; /* bits shift of max MR size */ - __u64 ibh_mr_size; /* size of MR */ - struct ib_pd *ibh_pd; /* PD */ - struct kib_dev *ibh_dev; /* owner */ - atomic_t ibh_ref; /* refcount */ -}; - -/** # of seconds to keep pool alive */ -#define IBLND_POOL_DEADLINE 300 -/** # of seconds to retry if allocation failed */ -#define IBLND_POOL_RETRY 1 - -struct kib_pages { - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; /* page array */ -}; - -struct kib_pool; -struct kib_poolset; - -typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps, - int inc, struct kib_pool **pp_po); -typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po); -typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node); -typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node); - -struct kib_net; - -#define IBLND_POOL_NAME_LEN 32 - -struct kib_poolset { - spinlock_t ps_lock; /* serialize */ - struct kib_net *ps_net; /* network it belongs to */ - char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */ - struct list_head ps_pool_list; /* list of pools */ - struct list_head ps_failed_pool_list;/* failed pool list */ - unsigned long ps_next_retry; /* time stamp for retry if */ - /* failed to allocate */ - int ps_increasing; /* is allocating new pool */ - int ps_pool_size; /* new pool size */ - int ps_cpt; /* CPT id */ - - kib_ps_pool_create_t ps_pool_create; /* create a new pool */ - kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */ - kib_ps_node_init_t ps_node_init; /* initialize new allocated node */ - kib_ps_node_fini_t ps_node_fini; /* finalize node */ -}; - -struct kib_pool { - struct list_head po_list; /* chain on pool list */ - struct list_head po_free_list; /* pre-allocated node */ - struct kib_poolset *po_owner; /* pool_set of this pool */ - unsigned long po_deadline; /* deadline of this pool */ - int po_allocated; /* # of elements in use */ - int po_failed; /* pool is created on failed HCA */ - int po_size; /* # of pre-allocated elements */ -}; - -struct kib_tx_poolset { - struct kib_poolset tps_poolset; /* pool-set */ - __u64 tps_next_tx_cookie; /* cookie of TX */ -}; - -struct kib_tx_pool { - struct kib_pool tpo_pool; /* pool */ - struct kib_hca_dev *tpo_hdev; /* device for this pool */ - struct kib_tx *tpo_tx_descs; /* all the tx descriptors */ - struct kib_pages *tpo_tx_pages; /* premapped tx msg pages */ -}; - -struct kib_fmr_poolset { - spinlock_t fps_lock; /* serialize */ - struct kib_net *fps_net; /* IB network */ - struct list_head fps_pool_list; /* FMR pool list */ - struct list_head fps_failed_pool_list;/* FMR pool list */ - __u64 fps_version; /* validity stamp */ - int fps_cpt; /* CPT id */ - int fps_pool_size; - int fps_flush_trigger; - int fps_cache; - int fps_increasing; /* is allocating new pool */ - unsigned long fps_next_retry; /* time stamp for retry if*/ - /* failed to allocate */ -}; - -struct kib_fast_reg_descriptor { /* For fast registration */ - struct list_head frd_list; - struct ib_send_wr frd_inv_wr; - struct ib_reg_wr frd_fastreg_wr; - struct ib_mr *frd_mr; - bool frd_valid; -}; - -struct kib_fmr_pool { - struct list_head fpo_list; /* chain on pool list */ - struct kib_hca_dev *fpo_hdev; /* device for this pool */ - struct kib_fmr_poolset *fpo_owner; /* owner of this pool */ - union { - struct { - struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ - } fmr; - struct { /* For fast registration */ - struct list_head fpo_pool_list; - int fpo_pool_size; - } fast_reg; - }; - unsigned long fpo_deadline; /* deadline of this pool */ - int fpo_failed; /* fmr pool is failed */ - int fpo_map_count; /* # of mapped FMR */ - int fpo_is_fmr; -}; - -struct kib_fmr { - struct kib_fmr_pool *fmr_pool; /* pool of FMR */ - struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ - struct kib_fast_reg_descriptor *fmr_frd; - u32 fmr_key; -}; - -struct kib_net { - struct list_head ibn_list; /* chain on struct kib_dev::ibd_nets */ - __u64 ibn_incarnation;/* my epoch */ - int ibn_init; /* initialisation state */ - int ibn_shutdown; /* shutting down? */ - - atomic_t ibn_npeers; /* # peers extant */ - atomic_t ibn_nconns; /* # connections extant */ - - struct kib_tx_poolset **ibn_tx_ps; /* tx pool-set */ - struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */ - - struct kib_dev *ibn_dev; /* underlying IB device */ -}; - -#define KIB_THREAD_SHIFT 16 -#define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid)) -#define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT) -#define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1)) - -struct kib_sched_info { - spinlock_t ibs_lock; /* serialise */ - wait_queue_head_t ibs_waitq; /* schedulers sleep here */ - struct list_head ibs_conns; /* conns to check for rx completions */ - int ibs_nthreads; /* number of scheduler threads */ - int ibs_nthreads_max; /* max allowed scheduler threads */ - int ibs_cpt; /* CPT id */ -}; - -struct kib_data { - int kib_init; /* initialisation state */ - int kib_shutdown; /* shut down? */ - struct list_head kib_devs; /* IB devices extant */ - struct list_head kib_failed_devs; /* list head of failed devices */ - wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */ - atomic_t kib_nthreads; /* # live threads */ - rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */ - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - void *kib_connd; /* the connd task (serialisation assertions) */ - struct list_head kib_connd_conns; /* connections to setup/teardown */ - struct list_head kib_connd_zombies; /* connections with zero refcount */ - /* connections to reconnect */ - struct list_head kib_reconn_list; - /* peers wait for reconnection */ - struct list_head kib_reconn_wait; - /** - * The second that peers are pulled out from \a kib_reconn_wait - * for reconnection. - */ - time64_t kib_reconn_sec; - - wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ - spinlock_t kib_connd_lock; /* serialise */ - struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ - struct kib_sched_info **kib_scheds; /* percpt data for schedulers */ -}; - -#define IBLND_INIT_NOTHING 0 -#define IBLND_INIT_DATA 1 -#define IBLND_INIT_ALL 2 - -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -struct kib_connparams { - __u16 ibcp_queue_depth; - __u16 ibcp_max_frags; - __u32 ibcp_max_msg_size; -} WIRE_ATTR; - -struct kib_immediate_msg { - struct lnet_hdr ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR; - -struct kib_rdma_frag { - __u32 rf_nob; /* # bytes this frag */ - __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */ -} WIRE_ATTR; - -struct kib_rdma_desc { - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrags; /* # fragments */ - struct kib_rdma_frag rd_frags[0]; /* buffer frags */ -} WIRE_ATTR; - -struct kib_putreq_msg { - struct lnet_hdr ibprm_hdr; /* portals header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR; - -struct kib_putack_msg { - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - struct kib_rdma_desc ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR; - -struct kib_get_msg { - struct lnet_hdr ibgm_hdr; /* portals header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - struct kib_rdma_desc ibgm_rd; /* rdma descriptor */ -} WIRE_ATTR; - -struct kib_completion_msg { - __u64 ibcm_cookie; /* opaque completion cookie */ - __s32 ibcm_status; /* < 0 failure: >= 0 length */ -} WIRE_ATTR; - -struct kib_msg { - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an ibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - - union { - struct kib_connparams connparams; - struct kib_immediate_msg immediate; - struct kib_putreq_msg putreq; - struct kib_putack_msg putack; - struct kib_get_msg get; - struct kib_completion_msg completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR; - -#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */ - -#define IBLND_MSG_VERSION_1 0x11 -#define IBLND_MSG_VERSION_2 0x12 -#define IBLND_MSG_VERSION IBLND_MSG_VERSION_2 - -#define IBLND_MSG_CONNREQ 0xc0 /* connection request */ -#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -struct kib_rej { - __u32 ibr_magic; /* sender's magic */ - __u16 ibr_version; /* sender's version */ - __u8 ibr_why; /* reject reason */ - __u8 ibr_padding; /* padding */ - __u64 ibr_incarnation; /* incarnation of peer */ - struct kib_connparams ibr_cp; /* connection parameters */ -} WIRE_ATTR; - -/* connection rejection reasons */ -#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */ -#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */ -#define IBLND_REJECT_FATAL 3 /* Anything else */ -#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */ -#define IBLND_REJECT_CONN_STALE 5 /* stale peer */ -/* peer's rdma frags doesn't match mine */ -#define IBLND_REJECT_RDMA_FRAGS 6 -/* peer's msg queue size doesn't match mine */ -#define IBLND_REJECT_MSG_QUEUE_SIZE 7 - -/***********************************************************************/ - -struct kib_rx { /* receive message */ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - enum ib_wc_status rx_status; /* completion status */ - struct kib_msg *rx_msg; /* message buffer (host vaddr) */ - __u64 rx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); /* for dma_unmap_single() */ - struct ib_recv_wr rx_wrq; /* receive work item... */ - struct ib_sge rx_sge; /* ...and its memory */ -}; - -#define IBLND_POSTRX_DONT_POST 0 /* don't post */ -#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */ -#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ -#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */ - -struct kib_tx { /* transmit message */ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_tx_pool *tx_pool; /* pool I'm from */ - struct kib_conn *tx_conn; /* owning conn */ - short tx_sending; /* # tx callbacks outstanding */ - short tx_queued; /* queued for sending */ - short tx_waiting; /* waiting for peer */ - int tx_status; /* LNET completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - struct lnet_msg *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - struct kib_msg *tx_msg; /* message buffer (host vaddr) */ - __u64 tx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */ - int tx_nwrq; /* # send work items */ - struct ib_rdma_wr *tx_wrq; /* send work items... */ - struct ib_sge *tx_sge; /* ...and their memory */ - struct kib_rdma_desc *tx_rd; /* rdma descriptor */ - int tx_nfrags; /* # entries in... */ - struct scatterlist *tx_frags; /* dma_map_sg descriptor */ - __u64 *tx_pages; /* rdma phys page addrs */ - struct kib_fmr fmr; /* FMR */ - int tx_dmadir; /* dma direction */ -}; - -struct kib_connvars { - struct kib_msg cv_msg; /* connection-in-progress variables */ -}; - -struct kib_conn { - struct kib_sched_info *ibc_sched; /* scheduler information */ - struct kib_peer *ibc_peer; /* owning peer */ - struct kib_hca_dev *ibc_hdev; /* HCA bound on */ - struct list_head ibc_list; /* stash on peer's conn list */ - struct list_head ibc_sched_list; /* schedule for attention */ - __u16 ibc_version; /* version of connection */ - /* reconnect later */ - __u16 ibc_reconnect:1; - __u64 ibc_incarnation; /* which instance of the peer */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_noops_posted; /* # uncompleted NOOPs */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # ACK/DONE msg credits */ - int ibc_comms_error; /* set on comms error */ - /* connections queue depth */ - __u16 ibc_queue_depth; - /* connections max frags */ - __u16 ibc_max_frags; - unsigned int ibc_nrx:16; /* receive buffers owned */ - unsigned int ibc_scheduled:1; /* scheduled for attention */ - unsigned int ibc_ready:1; /* CQ callback fired */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_connd_list; /* link chain for */ - /* kiblnd_check_conns only */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs for */ - /* IBLND_MSG_VERSION_1 */ - struct list_head ibc_tx_queue; /* sends that need a credit */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a */ - /* credit */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need to */ - /* reserve an ACK/DONE msg */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - struct kib_rx *ibc_rxs; /* the rx descs */ - struct kib_pages *ibc_rx_pages; /* premapped rx msg pages */ - - struct rdma_cm_id *ibc_cmid; /* CM id */ - struct ib_cq *ibc_cq; /* completion queue */ - - struct kib_connvars *ibc_connvars; /* in-progress connection state */ -}; - -#define IBLND_CONN_INIT 0 /* being initialised */ -#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */ -#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */ -#define IBLND_CONN_ESTABLISHED 3 /* connection established */ -#define IBLND_CONN_CLOSING 4 /* being closed */ -#define IBLND_CONN_DISCONNECTED 5 /* disconnected */ - -struct kib_peer { - struct list_head ibp_list; /* stash on global peer list */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - struct lnet_ni *ibp_ni; /* LNet interface */ - struct list_head ibp_conns; /* all active connections */ - struct kib_conn *ibp_next_conn; /* next connection to send on for - * round robin */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - __u64 ibp_incarnation; /* incarnation of peer */ - /* when (in jiffies) I was last alive */ - unsigned long ibp_last_alive; - /* # users */ - atomic_t ibp_refcount; - /* version of peer */ - __u16 ibp_version; - /* current passive connection attempts */ - unsigned short ibp_accepting; - /* current active connection attempts */ - unsigned short ibp_connecting; - /* reconnect this peer later */ - unsigned char ibp_reconnecting; - /* counter of how many times we triggered a conn race */ - unsigned char ibp_races; - /* # consecutive reconnection attempts to this peer */ - unsigned int ibp_reconnected; - /* errno on closing this peer */ - int ibp_error; - /* max map_on_demand */ - __u16 ibp_max_frags; - /* max_peer_credits */ - __u16 ibp_queue_depth; -}; - -extern struct kib_data kiblnd_data; - -void kiblnd_hdev_destroy(struct kib_hca_dev *hdev); - -int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); - -/* max # of fragments configured by user */ -static inline int -kiblnd_cfg_rdma_frags(struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int mod; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - mod = tunables->lnd_map_on_demand; - return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT; -} - -static inline int -kiblnd_rdma_frags(int version, struct lnet_ni *ni) -{ - return version == IBLND_MSG_VERSION_1 ? - (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) : - kiblnd_cfg_rdma_frags(ni); -} - -static inline int -kiblnd_concurrent_sends(int version, struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - int concurrent_sends; - - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - concurrent_sends = tunables->lnd_concurrent_sends; - - if (version == IBLND_MSG_VERSION_1) { - if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) - return IBLND_MSG_QUEUE_SIZE_V1 * 2; - - if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) - return IBLND_MSG_QUEUE_SIZE_V1 / 2; - } - - return concurrent_sends; -} - -static inline void -kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev) -{ - LASSERT(atomic_read(&hdev->ibh_ref) > 0); - atomic_inc(&hdev->ibh_ref); -} - -static inline void -kiblnd_hdev_decref(struct kib_hca_dev *hdev) -{ - LASSERT(atomic_read(&hdev->ibh_ref) > 0); - if (atomic_dec_and_test(&hdev->ibh_ref)) - kiblnd_hdev_destroy(hdev); -} - -static inline int -kiblnd_dev_can_failover(struct kib_dev *dev) -{ - if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */ - return 0; - - if (!*kiblnd_tunables.kib_dev_failover) /* disabled */ - return 0; - - if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */ - return 1; - - return dev->ibd_can_failover; -} - -#define kiblnd_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kiblnd_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kiblnd_data.kib_connd_zombies); \ - wake_up(&kiblnd_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\ - } \ -} while (0) - -#define kiblnd_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read(&(peer)->ibp_refcount)); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kiblnd_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read(&(peer)->ibp_refcount)); \ - LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kiblnd_destroy_peer(peer); \ -} while (0) - -static inline bool -kiblnd_peer_connecting(struct kib_peer *peer) -{ - return peer->ibp_connecting || - peer->ibp_reconnecting || - peer->ibp_accepting; -} - -static inline bool -kiblnd_peer_idle(struct kib_peer *peer) -{ - return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns); -} - -static inline struct list_head * -kiblnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = - ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size; - - return &kiblnd_data.kib_peers[hash]; -} - -static inline int -kiblnd_peer_active(struct kib_peer *peer) -{ - /* Am I in the peer hash table? */ - return !list_empty(&peer->ibp_list); -} - -static inline struct kib_conn * -kiblnd_get_conn_locked(struct kib_peer *peer) -{ - struct list_head *next; - - LASSERT(!list_empty(&peer->ibp_conns)); - - /* Advance to next connection, be sure to skip the head node */ - if (!peer->ibp_next_conn || - peer->ibp_next_conn->ibc_list.next == &peer->ibp_conns) - next = peer->ibp_conns.next; - else - next = peer->ibp_next_conn->ibc_list.next; - peer->ibp_next_conn = list_entry(next, struct kib_conn, ibc_list); - - return peer->ibp_next_conn; -} - -static inline int -kiblnd_send_keepalive(struct kib_conn *conn) -{ - return (*kiblnd_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - msecs_to_jiffies(*kiblnd_tunables.kib_keepalive * - MSEC_PER_SEC)); -} - -static inline int -kiblnd_need_noop(struct kib_conn *conn) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - if (conn->ibc_outstanding_credits < - IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && - !kiblnd_send_keepalive(conn)) - return 0; /* No need to send NOOP */ - - if (IBLND_OOB_CAPABLE(conn->ibc_version)) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) - return 0; /* NOOP can be piggybacked */ - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (list_empty(&conn->ibc_tx_queue) || - !conn->ibc_credits); - } - - if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */ - !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */ - !conn->ibc_credits) /* no credit */ - return 0; - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - !conn->ibc_outstanding_credits) /* giving back credits */ - return 0; - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1); -} - -static inline void -kiblnd_abort_receives(struct kib_conn *conn) -{ - ib_modify_qp(conn->ibc_cmid->qp, - &kiblnd_data.kib_error_qpa, IB_QP_STATE); -} - -static inline const char * -kiblnd_queue2str(struct kib_conn *conn, struct list_head *q) -{ - if (q == &conn->ibc_tx_queue) - return "tx_queue"; - - if (q == &conn->ibc_tx_queue_rsrvd) - return "tx_queue_rsrvd"; - - if (q == &conn->ibc_tx_queue_nocred) - return "tx_queue_nocred"; - - if (q == &conn->ibc_active_txs) - return "active_txs"; - - LBUG(); - return NULL; -} - -/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */ -/* lowest bits of the work request id to stash the work item type. */ - -#define IBLND_WID_INVAL 0 -#define IBLND_WID_TX 1 -#define IBLND_WID_RX 2 -#define IBLND_WID_RDMA 3 -#define IBLND_WID_MR 4 -#define IBLND_WID_MASK 7UL - -static inline __u64 -kiblnd_ptr2wreqid(void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT(!(lptr & IBLND_WID_MASK)); - LASSERT(!(type & ~IBLND_WID_MASK)); - return (__u64)(lptr | type); -} - -static inline void * -kiblnd_wreqid2ptr(__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK); -} - -static inline int -kiblnd_wreqid2type(__u64 wreqid) -{ - return wreqid & IBLND_WID_MASK; -} - -static inline void -kiblnd_set_conn_state(struct kib_conn *conn, int state) -{ - conn->ibc_state = state; - mb(); -} - -static inline void -kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(struct kib_msg, ibm_u) + body_nob; -} - -static inline int -kiblnd_rd_size(struct kib_rdma_desc *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrags; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} - -static inline __u64 -kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_frags[index].rf_addr; -} - -static inline __u32 -kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_frags[index].rf_nob; -} - -static inline __u32 -kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index) -{ - return rd->rd_key; -} - -static inline int -kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob) -{ - if (nob < rd->rd_frags[index].rf_nob) { - rd->rd_frags[index].rf_addr += nob; - rd->rd_frags[index].rf_nob -= nob; - } else { - index++; - } - - return index; -} - -static inline int -kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n) -{ - LASSERT(msgtype == IBLND_MSG_GET_REQ || - msgtype == IBLND_MSG_PUT_ACK); - - return msgtype == IBLND_MSG_GET_REQ ? - offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) : - offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]); -} - -static inline __u64 -kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return ib_dma_mapping_error(dev, dma_addr); -} - -static inline __u64 kiblnd_dma_map_single(struct ib_device *dev, - void *msg, size_t size, - enum dma_data_direction direction) -{ - return ib_dma_map_single(dev, msg, size, direction); -} - -static inline void kiblnd_dma_unmap_single(struct ib_device *dev, - __u64 addr, size_t size, - enum dma_data_direction direction) -{ - ib_dma_unmap_single(dev, addr, size, direction); -} - -#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0) -#define KIBLND_UNMAP_ADDR(p, m, a) (a) - -static inline int kiblnd_dma_map_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - return ib_dma_map_sg(dev, sg, nents, direction); -} - -static inline void kiblnd_dma_unmap_sg(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - ib_dma_unmap_sg(dev, sg, nents, direction); -} - -static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev, - struct scatterlist *sg) -{ - return ib_sg_dma_address(dev, sg); -} - -static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return ib_sg_dma_len(dev, sg); -} - -/* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */ -/* right because OFED1.2 defines it as const, to use it we have to add */ -/* (void *) cast to overcome "const" */ - -#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) -#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) - -void kiblnd_map_rx_descs(struct kib_conn *conn); -void kiblnd_unmap_rx_descs(struct kib_conn *conn); -void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node); -struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps); - -int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, - struct kib_rdma_desc *rd, __u32 nob, __u64 iov, - struct kib_fmr *fmr); -void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status); - -int kiblnd_tunables_setup(struct lnet_ni *ni); -void kiblnd_tunables_init(void); - -int kiblnd_connd(void *arg); -int kiblnd_scheduler(void *arg); -int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name); -int kiblnd_failover_thread(void *arg); - -int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages); - -int kiblnd_cm_callback(struct rdma_cm_id *cmid, - struct rdma_cm_event *event); -int kiblnd_translate_mtu(int value); - -int kiblnd_dev_failover(struct kib_dev *dev); -int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer **peerp, - lnet_nid_t nid); -void kiblnd_destroy_peer(struct kib_peer *peer); -bool kiblnd_reconnect_peer(struct kib_peer *peer); -void kiblnd_destroy_dev(struct kib_dev *dev); -void kiblnd_unlink_peer_locked(struct kib_peer *peer); -struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid); -int kiblnd_close_stale_conns_locked(struct kib_peer *peer, - int version, __u64 incarnation); -int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); - -struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, - struct rdma_cm_id *cmid, - int state, int version); -void kiblnd_destroy_conn(struct kib_conn *conn); -void kiblnd_close_conn(struct kib_conn *conn, int error); -void kiblnd_close_conn_locked(struct kib_conn *conn, int error); - -void kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid); -void kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, - int status); - -void kiblnd_qp_event(struct ib_event *event, void *arg); -void kiblnd_cq_event(struct ib_event *event, void *arg); -void kiblnd_cq_completion(struct ib_cq *cq, void *arg); - -void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version, - int credits, lnet_nid_t dstnid, __u64 dststamp); -int kiblnd_unpack_msg(struct kib_msg *msg, int nob); -int kiblnd_post_rx(struct kib_rx *rx, int credit); - -int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); -int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c deleted file mode 100644 index 65b7a62943ad..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ /dev/null @@ -1,3763 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd_cb.c - * - * Author: Eric Barton - */ - -#include -#include "o2iblnd.h" - -#define MAX_CONN_RACES_BEFORE_ABORT 20 - -static void kiblnd_peer_alive(struct kib_peer *peer); -static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error); -static void kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, - int type, int body_nob); -static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, - int resid, struct kib_rdma_desc *dstrd, - __u64 dstcookie); -static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn); -static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn); -static void kiblnd_unmap_tx(struct kib_tx *tx); -static void kiblnd_check_sends_locked(struct kib_conn *conn); - -static void -kiblnd_tx_done(struct lnet_ni *ni, struct kib_tx *tx) -{ - struct lnet_msg *lntmsg[2]; - struct kib_net *net = ni->ni_data; - int rc; - int i; - - LASSERT(net); - LASSERT(!in_interrupt()); - LASSERT(!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT(!tx->tx_sending); /* mustn't be awaiting sent callback */ - LASSERT(!tx->tx_waiting); /* mustn't be awaiting peer response */ - LASSERT(tx->tx_pool); - - kiblnd_unmap_tx(tx); - - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn) { - LASSERT(ni == tx->tx_conn->ibc_peer->ibp_ni); - - kiblnd_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (!lntmsg[i]) - continue; - - lnet_finalize(ni, lntmsg[i], rc); - } -} - -void -kiblnd_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int status) -{ - struct kib_tx *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, struct kib_tx, tx_list); - - list_del(&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kiblnd_tx_done(ni, tx); - } -} - -static struct kib_tx * -kiblnd_get_idle_tx(struct lnet_ni *ni, lnet_nid_t target) -{ - struct kib_net *net = (struct kib_net *)ni->ni_data; - struct list_head *node; - struct kib_tx *tx; - struct kib_tx_poolset *tps; - - tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)]; - node = kiblnd_pool_alloc_node(&tps->tps_poolset); - if (!node) - return NULL; - tx = list_entry(node, struct kib_tx, tx_list); - - LASSERT(!tx->tx_nwrq); - LASSERT(!tx->tx_queued); - LASSERT(!tx->tx_sending); - LASSERT(!tx->tx_waiting); - LASSERT(!tx->tx_status); - LASSERT(!tx->tx_conn); - LASSERT(!tx->tx_lntmsg[0]); - LASSERT(!tx->tx_lntmsg[1]); - LASSERT(!tx->tx_nfrags); - - return tx; -} - -static void -kiblnd_drop_rx(struct kib_rx *rx) -{ - struct kib_conn *conn = rx->rx_conn; - struct kib_sched_info *sched = conn->ibc_sched; - unsigned long flags; - - spin_lock_irqsave(&sched->ibs_lock, flags); - LASSERT(conn->ibc_nrx > 0); - conn->ibc_nrx--; - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - kiblnd_conn_decref(conn); -} - -int -kiblnd_post_rx(struct kib_rx *rx, int credit) -{ - struct kib_conn *conn = rx->rx_conn; - struct kib_net *net = conn->ibc_peer->ibp_ni->ni_data; - struct ib_recv_wr *bad_wrq = NULL; - int rc; - - LASSERT(net); - LASSERT(!in_interrupt()); - LASSERT(credit == IBLND_POSTRX_NO_CREDIT || - credit == IBLND_POSTRX_PEER_CREDIT || - credit == IBLND_POSTRX_RSRVD_CREDIT); - - rx->rx_sge.lkey = conn->ibc_hdev->ibh_pd->local_dma_lkey; - rx->rx_sge.addr = rx->rx_msgaddr; - rx->rx_sge.length = IBLND_MSG_SIZE; - - rx->rx_wrq.next = NULL; - rx->rx_wrq.sg_list = &rx->rx_sge; - rx->rx_wrq.num_sge = 1; - rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX); - - LASSERT(conn->ibc_state >= IBLND_CONN_INIT); - LASSERT(rx->rx_nob >= 0); /* not posted */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) { - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - /* NB: need an extra reference after ib_post_recv because we don't - * own this rx (and rx::rx_conn) anymore, LU-5678. - */ - kiblnd_conn_addref(conn); - rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq); - if (unlikely(rc)) { - CERROR("Can't post rx for %s: %d, bad_wrq: %p\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc, bad_wrq); - rx->rx_nob = 0; - } - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */ - goto out; - - if (unlikely(rc)) { - kiblnd_close_conn(conn, rc); - kiblnd_drop_rx(rx); /* No more posts for this rx */ - goto out; - } - - if (credit == IBLND_POSTRX_NO_CREDIT) - goto out; - - spin_lock(&conn->ibc_lock); - if (credit == IBLND_POSTRX_PEER_CREDIT) - conn->ibc_outstanding_credits++; - else - conn->ibc_reserved_credits++; - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - -out: - kiblnd_conn_decref(conn); - return rc; -} - -static struct kib_tx * -kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list); - - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_sending || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -static void -kiblnd_handle_completion(struct kib_conn *conn, int txtype, int status, __u64 cookie) -{ - struct kib_tx *tx; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie); - if (!tx) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie %#llx from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_close_conn(conn, -EPROTO); - return; - } - - if (!tx->tx_status) { /* success so far */ - if (status < 0) /* failed? */ - tx->tx_status = status; - else if (txtype == IBLND_MSG_GET_REQ) - lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status); - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && !tx->tx_sending; - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(ni, tx); -} - -static void -kiblnd_send_completion(struct kib_conn *conn, int type, int status, __u64 cookie) -{ - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_tx *tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - - if (!tx) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof(struct kib_completion_msg)); - - kiblnd_queue_tx(tx, conn); -} - -static void -kiblnd_handle_rx(struct kib_rx *rx) -{ - struct kib_msg *msg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - int credits = msg->ibm_credits; - struct kib_tx *tx; - int rc = 0; - int rc2; - int post_credit; - - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - CDEBUG(D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - - if (conn->ibc_credits + credits > - conn->ibc_queue_depth) { - rc2 = conn->ibc_credits; - spin_unlock(&conn->ibc_lock); - - CERROR("Bad credits from %s: %d + %d > %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc2, credits, conn->ibc_queue_depth); - - kiblnd_close_conn(conn, -EPROTO); - kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); - return; - } - - conn->ibc_credits += credits; - - /* This ensures the credit taken by NOOP can be returned */ - if (msg->ibm_type == IBLND_MSG_NOOP && - !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */ - conn->ibc_outstanding_credits++; - - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBLND message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_NO_CREDIT; - rc = -EPROTO; - break; - - case IBLND_MSG_NOOP: - if (IBLND_OOB_CAPABLE(conn->ibc_version)) { - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - } - - if (credits) /* credit already posted */ - post_credit = IBLND_POSTRX_NO_CREDIT; - else /* a keepalive NOOP */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_IMMEDIATE: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_NAK: - CWARN("PUT_NACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_PUT_ACK: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - - spin_lock(&conn->ibc_lock); - tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (!tx) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT(tx->tx_waiting); - /* - * CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. - */ - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kiblnd_init_rdma(conn, tx, IBLND_MSG_PUT_DONE, - kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBLND_MSG_PUT_DONE: - post_credit = IBLND_POSTRX_PEER_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_GET_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_GET_DONE: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kiblnd_close_conn(conn, rc); - - if (post_credit != IBLND_POSTRX_DONT_POST) - kiblnd_post_rx(rx, post_credit); -} - -static void -kiblnd_rx_complete(struct kib_rx *rx, int status, int nob) -{ - struct kib_msg *msg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_net *net = ni->ni_data; - int rc; - int err = -EIO; - - LASSERT(net); - LASSERT(rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) - goto ignore; - - if (status != IB_WC_SUCCESS) { - CNETERR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - goto failed; - } - - LASSERT(nob >= 0); - rx->rx_nob = nob; - - rc = kiblnd_unpack_msg(msg, rx->rx_nob); - if (rc) { - CERROR("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != net->ibn_incarnation) { - CERROR("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* set time last known alive */ - kiblnd_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - - write_lock_irqsave(g_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(g_lock, flags); - return; - } - write_unlock_irqrestore(g_lock, flags); - } - kiblnd_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kiblnd_close_conn(conn, err); - ignore: - kiblnd_drop_rx(rx); /* Don't re-post rx. */ -} - -static struct page * -kiblnd_kvaddr_to_page(unsigned long vaddr) -{ - struct page *page; - - if (is_vmalloc_addr((void *)vaddr)) { - page = vmalloc_to_page((void *)vaddr); - LASSERT(page); - return page; - } -#ifdef CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page(vaddr); - LASSERT(page); - return page; -} - -static int -kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx, struct kib_rdma_desc *rd, __u32 nob) -{ - struct kib_hca_dev *hdev; - struct kib_fmr_poolset *fps; - int cpt; - int rc; - - LASSERT(tx->tx_pool); - LASSERT(tx->tx_pool->tpo_pool.po_owner); - - hdev = tx->tx_pool->tpo_hdev; - cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt; - - fps = net->ibn_fmr_ps[cpt]; - rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr); - if (rc) { - CERROR("Can't map %u bytes: %d\n", nob, rc); - return rc; - } - - /* - * If rd is not tx_rd, it's going to get sent to a peer, who will need - * the rkey - */ - rd->rd_key = tx->fmr.fmr_key; - rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; - rd->rd_frags[0].rf_nob = nob; - rd->rd_nfrags = 1; - - return 0; -} - -static void kiblnd_unmap_tx(struct kib_tx *tx) -{ - if (tx->fmr.fmr_pfmr || tx->fmr.fmr_frd) - kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); - - if (tx->tx_nfrags) { - kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - tx->tx_nfrags = 0; - } -} - -static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, int nfrags) -{ - struct kib_net *net = ni->ni_data; - struct kib_hca_dev *hdev = net->ibn_dev->ibd_hdev; - __u32 nob; - int i; - - /* - * If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink - */ - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - tx->tx_nfrags = nfrags; - - rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags, - tx->tx_nfrags, tx->tx_dmadir); - - for (i = 0, nob = 0; i < rd->rd_nfrags; i++) { - rd->rd_frags[i].rf_nob = kiblnd_sg_dma_len( - hdev->ibh_ibdev, &tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = kiblnd_sg_dma_address( - hdev->ibh_ibdev, &tx->tx_frags[i]); - nob += rd->rd_frags[i].rf_nob; - } - - if (net->ibn_fmr_ps) - return kiblnd_fmr_map_tx(net, tx, rd, nob); - - return -EINVAL; -} - -static int -kiblnd_setup_rd_iov(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, unsigned int niov, - const struct kvec *iov, int offset, int nob) -{ - struct kib_net *net = ni->ni_data; - struct page *page; - struct scatterlist *sg; - unsigned long vaddr; - int fragnob; - int page_offset; - - LASSERT(nob > 0); - LASSERT(niov > 0); - LASSERT(net); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT(niov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT(niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kiblnd_kvaddr_to_page(vaddr); - if (!page) { - CERROR("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - sg_set_page(sg, page, fragnob, page_offset); - sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); -} - -static int -kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, - struct kib_rdma_desc *rd, int nkiov, - const struct bio_vec *kiov, int offset, int nob) -{ - struct kib_net *net = ni->ni_data; - struct scatterlist *sg; - int fragnob; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT(nob > 0); - LASSERT(nkiov > 0); - LASSERT(net); - - while (offset >= kiov->bv_len) { - offset -= kiov->bv_len; - nkiov--; - kiov++; - LASSERT(nkiov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT(nkiov > 0); - - fragnob = min((int)(kiov->bv_len - offset), nob); - - sg_set_page(sg, kiov->bv_page, fragnob, - kiov->bv_offset + offset); - sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); -} - -static int -kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit) - __must_hold(&conn->ibc_lock) -{ - struct kib_msg *msg = tx->tx_msg; - struct kib_peer *peer = conn->ibc_peer; - struct lnet_ni *ni = peer->ibp_ni; - int ver = conn->ibc_version; - int rc; - int done; - - LASSERT(tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT(tx->tx_nwrq > 0); - - LASSERT(!credit || credit == 1); - LASSERT(conn->ibc_outstanding_credits >= 0); - LASSERT(conn->ibc_outstanding_credits <= conn->ibc_queue_depth); - LASSERT(conn->ibc_credits >= 0); - LASSERT(conn->ibc_credits <= conn->ibc_queue_depth); - - if (conn->ibc_nsends_posted == kiblnd_concurrent_sends(ver, ni)) { - /* tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - if (credit && !conn->ibc_credits) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - if (credit && !IBLND_OOB_CAPABLE(ver) && - conn->ibc_credits == 1 && /* last credit reserved */ - msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(peer->ibp_nid)); - return -EAGAIN; - } - - /* NB don't drop ibc_lock before bumping tx_sending */ - list_del(&tx->tx_list); - tx->tx_queued = 0; - - if (msg->ibm_type == IBLND_MSG_NOOP && - (!kiblnd_need_noop(conn) || /* redundant NOOP */ - (IBLND_OOB_CAPABLE(ver) && /* posted enough NOOP */ - conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) { - /* - * OK to drop when posted enough NOOPs, since - * kiblnd_check_sends_locked will queue NOOP again when - * posted NOOPs complete - */ - spin_unlock(&conn->ibc_lock); - kiblnd_tx_done(peer->ibp_ni, tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s(%d): redundant or enough NOOP\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_noops_posted); - return 0; - } - - kiblnd_pack_msg(peer->ibp_ni, msg, ver, conn->ibc_outstanding_credits, - peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_credits -= credit; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted++; - - /* - * CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() - * from the first send; hence the ++ rather than = below. - */ - tx->tx_sending++; - list_add(&tx->tx_list, &conn->ibc_active_txs); - - /* I'm still holding ibc_lock! */ - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) { - rc = -ECONNABORTED; - } else if (tx->tx_pool->tpo_pool.po_failed || - conn->ibc_hdev != tx->tx_pool->tpo_hdev) { - /* close_conn will launch failover */ - rc = -ENETDOWN; - } else { - struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd; - struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr; - struct ib_send_wr *wrq = &tx->tx_wrq[0].wr; - - if (frd) { - if (!frd->frd_valid) { - wrq = &frd->frd_inv_wr; - wrq->next = &frd->frd_fastreg_wr.wr; - } else { - wrq = &frd->frd_fastreg_wr.wr; - } - frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr; - } - - LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), - "bad wr_id %llx, opc %d, flags %d, peer: %s\n", - bad->wr_id, bad->opcode, bad->send_flags, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - bad = NULL; - rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad); - } - - conn->ibc_last_send = jiffies; - - if (!rc) - return 0; - - /* - * NB credits are transferred in the actual - * message, which can only be the last work item - */ - conn->ibc_credits += credit; - conn->ibc_outstanding_credits += msg->ibm_credits; - conn->ibc_nsends_posted--; - if (msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = !tx->tx_sending; - if (done) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CERROR("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(peer->ibp_nid)); - else - CDEBUG(D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(peer->ibp_nid)); - - kiblnd_close_conn(conn, rc); - - if (done) - kiblnd_tx_done(peer->ibp_ni, tx); - - spin_lock(&conn->ibc_lock); - - return -EIO; -} - -static void -kiblnd_check_sends_locked(struct kib_conn *conn) -{ - int ver = conn->ibc_version; - struct lnet_ni *ni = conn->ibc_peer->ibp_ni; - struct kib_tx *tx; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni)); - LASSERT(!IBLND_OOB_CAPABLE(ver) || - conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver)); - LASSERT(conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - struct kib_tx, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (kiblnd_need_noop(conn)) { - spin_unlock(&conn->ibc_lock); - - tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (tx) - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - if (tx) - kiblnd_queue_tx_locked(tx, conn); - } - - for (;;) { - int credit; - - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - credit = 0; - tx = list_entry(conn->ibc_tx_queue_nocred.next, - struct kib_tx, tx_list); - } else if (!list_empty(&conn->ibc_tx_noops)) { - LASSERT(!IBLND_OOB_CAPABLE(ver)); - credit = 1; - tx = list_entry(conn->ibc_tx_noops.next, - struct kib_tx, tx_list); - } else if (!list_empty(&conn->ibc_tx_queue)) { - credit = 1; - tx = list_entry(conn->ibc_tx_queue.next, - struct kib_tx, tx_list); - } else { - break; - } - - if (kiblnd_post_tx_locked(conn, tx, credit)) - break; - } -} - -static void -kiblnd_tx_complete(struct kib_tx *tx, int status) -{ - int failed = (status != IB_WC_SUCCESS); - struct kib_conn *conn = tx->tx_conn; - int idle; - - LASSERT(tx->tx_sending > 0); - - if (failed) { - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CNETERR("Tx -> %s cookie %#llx sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_cookie, tx->tx_sending, tx->tx_waiting, - status); - - kiblnd_close_conn(conn, -EIO); - } else { - kiblnd_peer_alive(conn->ibc_peer); - } - - spin_lock(&conn->ibc_lock); - - /* - * I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. - */ - tx->tx_sending--; - conn->ibc_nsends_posted--; - if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP) - conn->ibc_noops_posted--; - - if (failed) { - tx->tx_waiting = 0; /* don't wait for peer */ - tx->tx_status = -EIO; - } - - idle = !tx->tx_sending && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx); -} - -static void -kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type, - int body_nob) -{ - struct kib_hca_dev *hdev = tx->tx_pool->tpo_hdev; - struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof(struct kib_msg, ibm_u) + body_nob; - - LASSERT(tx->tx_nwrq >= 0); - LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); - LASSERT(nob <= IBLND_MSG_SIZE); - - kiblnd_init_msg(tx->tx_msg, type, body_nob); - - sge->lkey = hdev->ibh_pd->local_dma_lkey; - sge->addr = tx->tx_msgaddr; - sge->length = nob; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->wr.next = NULL; - wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->wr.sg_list = sge; - wrq->wr.num_sge = 1; - wrq->wr.opcode = IB_WR_SEND; - wrq->wr.send_flags = IB_SEND_SIGNALED; - - tx->tx_nwrq++; -} - -static int -kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type, - int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie) -{ - struct kib_msg *ibmsg = tx->tx_msg; - struct kib_rdma_desc *srcrd = tx->tx_rd; - struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_rdma_wr *wrq, *next; - int rc = resid; - int srcidx = 0; - int dstidx = 0; - int wrknob; - - LASSERT(!in_interrupt()); - LASSERT(!tx->tx_nwrq); - LASSERT(type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) { - CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_max_frags << PAGE_SHIFT, - kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd)); - rc = -EMSGSIZE; - goto too_big; - } - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrags) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrags) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) { - CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - IBLND_MAX_RDMA_FRAGS, - srcidx, srcrd->rd_nfrags, - dstidx, dstrd->rd_nfrags); - rc = -EMSGSIZE; - break; - } - - wrknob = min3(kiblnd_rd_frag_size(srcrd, srcidx), - kiblnd_rd_frag_size(dstrd, dstidx), - (__u32)resid); - - sge = &tx->tx_sge[tx->tx_nwrq]; - sge->addr = kiblnd_rd_frag_addr(srcrd, srcidx); - sge->lkey = kiblnd_rd_frag_key(srcrd, srcidx); - sge->length = wrknob; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - next = wrq + 1; - - wrq->wr.next = &next->wr; - wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->wr.sg_list = sge; - wrq->wr.num_sge = 1; - wrq->wr.opcode = IB_WR_RDMA_WRITE; - wrq->wr.send_flags = 0; - - wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); - wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx); - - srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob); - dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob); - - resid -= wrknob; - - tx->tx_nwrq++; - wrq++; - sge++; - } -too_big: - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kiblnd_init_tx_msg(conn->ibc_peer->ibp_ni, tx, - type, sizeof(struct kib_completion_msg)); - - return rc; -} - -static void -kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn) -{ - struct list_head *q; - - LASSERT(tx->tx_nwrq > 0); /* work items set up */ - LASSERT(!tx->tx_queued); /* not queued for sending already */ - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + - msecs_to_jiffies(*kiblnd_tunables.kib_timeout * - MSEC_PER_SEC); - - if (!tx->tx_conn) { - kiblnd_conn_addref(conn); - tx->tx_conn = conn; - LASSERT(tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE); - } else { - /* PUT_DONE first attached to conn as a PUT_REQ */ - LASSERT(tx->tx_conn == conn); - LASSERT(tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE); - } - - switch (tx->tx_msg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_PUT_REQ: - case IBLND_MSG_GET_REQ: - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - q = &conn->ibc_tx_queue_nocred; - break; - - case IBLND_MSG_NOOP: - if (IBLND_OOB_CAPABLE(conn->ibc_version)) - q = &conn->ibc_tx_queue_nocred; - else - q = &conn->ibc_tx_noops; - break; - - case IBLND_MSG_IMMEDIATE: - q = &conn->ibc_tx_queue; - break; - } - - list_add_tail(&tx->tx_list, q); -} - -static void -kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn) -{ - spin_lock(&conn->ibc_lock); - kiblnd_queue_tx_locked(tx, conn); - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); -} - -static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, - struct sockaddr_in *srcaddr, - struct sockaddr_in *dstaddr, - int timeout_ms) -{ - unsigned short port; - int rc; - - /* allow the port to be reused */ - rc = rdma_set_reuseaddr(cmid, 1); - if (rc) { - CERROR("Unable to set reuse on cmid: %d\n", rc); - return rc; - } - - /* look for a free privileged port */ - for (port = PROT_SOCK - 1; port > 0; port--) { - srcaddr->sin_port = htons(port); - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)srcaddr, - (struct sockaddr *)dstaddr, - timeout_ms); - if (!rc) { - CDEBUG(D_NET, "bound to port %hu\n", port); - return 0; - } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) { - CDEBUG(D_NET, "bind to port %hu failed: %d\n", - port, rc); - } else { - return rc; - } - } - - CERROR("Failed to bind to a free privileged port\n"); - return rc; -} - -static void -kiblnd_connect_peer(struct kib_peer *peer) -{ - struct rdma_cm_id *cmid; - struct kib_dev *dev; - struct kib_net *net = peer->ibp_ni->ni_data; - struct sockaddr_in srcaddr; - struct sockaddr_in dstaddr; - int rc; - - LASSERT(net); - LASSERT(peer->ibp_connecting > 0); - - cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP, - IB_QPT_RC); - - if (IS_ERR(cmid)) { - CERROR("Can't create CMID for %s: %ld\n", - libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid)); - rc = PTR_ERR(cmid); - goto failed; - } - - dev = net->ibn_dev; - memset(&srcaddr, 0, sizeof(srcaddr)); - srcaddr.sin_family = AF_INET; - srcaddr.sin_addr.s_addr = htonl(dev->ibd_ifip); - - memset(&dstaddr, 0, sizeof(dstaddr)); - dstaddr.sin_family = AF_INET; - dstaddr.sin_port = htons(*kiblnd_tunables.kib_service); - dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid)); - - kiblnd_peer_addref(peer); /* cmid's ref */ - - if (*kiblnd_tunables.kib_use_priv_port) { - rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - } else { - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - } - if (rc) { - /* Can't initiate address resolution: */ - CERROR("Can't resolve addr for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed2; - } - - return; - - failed2: - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); /* cmid's ref */ - rdma_destroy_id(cmid); - return; - failed: - kiblnd_peer_connect_failed(peer, 1, rc); -} - -bool -kiblnd_reconnect_peer(struct kib_peer *peer) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - char *reason = NULL; - struct list_head txs; - unsigned long flags; - - INIT_LIST_HEAD(&txs); - - write_lock_irqsave(glock, flags); - if (!peer->ibp_reconnecting) { - if (peer->ibp_accepting) - reason = "accepting"; - else if (peer->ibp_connecting) - reason = "connecting"; - else if (!list_empty(&peer->ibp_conns)) - reason = "connected"; - else /* connected then closed */ - reason = "closed"; - - goto no_reconnect; - } - - LASSERT(!peer->ibp_accepting && !peer->ibp_connecting && - list_empty(&peer->ibp_conns)); - peer->ibp_reconnecting--; - - if (!kiblnd_peer_active(peer)) { - list_splice_init(&peer->ibp_tx_queue, &txs); - reason = "unlinked"; - goto no_reconnect; - } - - peer->ibp_connecting++; - peer->ibp_reconnected++; - write_unlock_irqrestore(glock, flags); - - kiblnd_connect_peer(peer); - return true; - -no_reconnect: - write_unlock_irqrestore(glock, flags); - - CWARN("Abort reconnection of %s: %s\n", - libcfs_nid2str(peer->ibp_nid), reason); - kiblnd_txlist_done(peer->ibp_ni, &txs, -ECONNABORTED); - return false; -} - -void -kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) -{ - struct kib_peer *peer; - struct kib_peer *peer2; - struct kib_conn *conn; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - int i; - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - - /* - * If I get here, I've committed to send, so I complete the tx with - * failure on any problems - */ - LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */ - LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */ - - /* - * First time, just use a read lock since I expect to find my peer - * connected - */ - read_lock_irqsave(g_lock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer && !list_empty(&peer->ibp_conns)) { - /* Found a peer with an established connection */ - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } - - read_unlock(g_lock); - /* Re-try with a write lock */ - write_lock(g_lock); - - peer = kiblnd_find_peer_locked(nid); - if (peer) { - if (list_empty(&peer->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT(kiblnd_peer_connecting(peer)); - if (tx) - list_add_tail(&tx->tx_list, - &peer->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } - - write_unlock_irqrestore(g_lock, flags); - - /* Allocate a peer ready to add to the peer table and retry */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc) { - CERROR("Can't create peer %s\n", libcfs_nid2str(nid)); - if (tx) { - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kiblnd_tx_done(ni, tx); - } - return; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2) { - if (list_empty(&peer2->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT(kiblnd_peer_connecting(peer2)); - if (tx) - list_add_tail(&tx->tx_list, - &peer2->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer2); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - if (tx) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - - kiblnd_peer_decref(peer); - return; - } - - /* Brand new peer */ - LASSERT(!peer->ibp_connecting); - tunables = &peer->ibp_ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - peer->ibp_connecting = tunables->lnd_conns_per_peer; - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT(!((struct kib_net *)ni->ni_data)->ibn_shutdown); - - if (tx) - list_add_tail(&tx->tx_list, &peer->ibp_tx_queue); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - - for (i = 0; i < tunables->lnd_conns_per_peer; i++) - kiblnd_connect_peer(peer); - kiblnd_peer_decref(peer); -} - -int -kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - struct lnet_hdr *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - struct lnet_process_id target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct kvec *payload_iov = lntmsg->msg_iov; - struct bio_vec *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct iov_iter from; - struct kib_msg *ibmsg; - struct kib_rdma_desc *rd; - struct kib_tx *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT(!payload_nob || payload_niov > 0); - LASSERT(payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT(!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT(!(payload_kiov && payload_iov)); - - if (payload_kiov) - iov_iter_bvec(&from, ITER_BVEC | WRITE, - payload_kiov, payload_niov, - payload_nob + payload_offset); - else - iov_iter_kvec(&from, ITER_KVEC | WRITE, - payload_iov, payload_niov, - payload_nob + payload_offset); - - iov_iter_advance(&from, payload_offset); - - switch (type) { - default: - LBUG(); - return -EIO; - - case LNET_MSG_ACK: - LASSERT(!payload_nob); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't allocate txd for GET to %s\n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - rd = &ibmsg->ibm_u.get.ibgm_rd; - if (!(lntmsg->msg_md->md_options & LNET_MD_KIOV)) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - nob = offsetof(struct kib_get_msg, ibgm_rd.rd_frags[rd->rd_nfrags]); - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (!tx->tx_lntmsg[1]) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (!payload_kiov) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(struct kib_putreq_msg)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBLND_MSG_SIZE); - - tx = kiblnd_get_idle_tx(ni, target.nid); - if (!tx) { - CERROR("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - rc = copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, payload_nob, - &from); - if (rc != payload_nob) { - kiblnd_pool_free_node(&tx->tx_pool->tpo_pool, &tx->tx_list); - return -EFAULT; - } - - nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]); - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; -} - -static void -kiblnd_reply(struct lnet_ni *ni, struct kib_rx *rx, struct lnet_msg *lntmsg) -{ - struct lnet_process_id target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct kvec *iov = lntmsg->msg_iov; - struct bio_vec *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - struct kib_tx *tx; - int rc; - - tx = kiblnd_get_idle_tx(ni, rx->rx_conn->ibc_peer->ibp_nid); - if (!tx) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (!nob) - rc = 0; - else if (!kiov) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - niov, iov, offset, nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - niov, kiov, offset, nob); - - if (rc) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kiblnd_init_rdma(rx->rx_conn, tx, - IBLND_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (!nob) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kiblnd_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kiblnd_tx_done(ni, tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct kib_rx *rx = private; - struct kib_msg *rxmsg = rx->rx_msg; - struct kib_conn *conn = rx->rx_conn; - struct kib_tx *tx; - int nob; - int post_credit = IBLND_POSTRX_PEER_CREDIT; - int rc = 0; - - LASSERT(iov_iter_count(to) <= rlen); - LASSERT(!in_interrupt()); - /* Either all pages or all vaddrs */ - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_IMMEDIATE: - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - rc = copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload, rlen, - to); - if (rc != rlen) { - rc = -EFAULT; - break; - } - - rc = 0; - lnet_finalize(ni, lntmsg, 0); - break; - - case IBLND_MSG_PUT_REQ: { - struct kib_msg *txmsg; - struct kib_rdma_desc *rd; - - if (!iov_iter_count(to)) { - lnet_finalize(ni, lntmsg, 0); - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kiblnd_get_idle_tx(ni, conn->ibc_peer->ibp_nid); - if (!tx) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - rd = &txmsg->ibm_u.putack.ibpam_rd; - if (!(to->type & ITER_BVEC)) - rc = kiblnd_setup_rd_iov(ni, tx, rd, - to->nr_segs, to->kvec, - to->iov_offset, - iov_iter_count(to)); - else - rc = kiblnd_setup_rd_kiov(ni, tx, rd, - to->nr_segs, to->bvec, - to->iov_offset, - iov_iter_count(to)); - if (rc) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_tx_done(ni, tx); - /* tell peer it's over */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - nob = offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[rd->rd_nfrags]); - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; - - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kiblnd_queue_tx(tx, conn); - - /* reposted buffer reserved for PUT_DONE */ - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - } - - case IBLND_MSG_GET_REQ: - if (lntmsg) { - /* Optimized GET; RDMA lntmsg's payload */ - kiblnd_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kiblnd_post_rx(rx, post_credit); - return rc; -} - -int -kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name) -{ - struct task_struct *task = kthread_run(fn, arg, "%s", name); - - if (IS_ERR(task)) - return PTR_ERR(task); - - atomic_inc(&kiblnd_data.kib_nthreads); - return 0; -} - -static void -kiblnd_thread_fini(void) -{ - atomic_dec(&kiblnd_data.kib_nthreads); -} - -static void -kiblnd_peer_alive(struct kib_peer *peer) -{ - /* This is racy, but everyone's only writing jiffies */ - peer->ibp_last_alive = jiffies; - mb(); -} - -static void -kiblnd_peer_notify(struct kib_peer *peer) -{ - int error = 0; - unsigned long last_alive = 0; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (kiblnd_peer_idle(peer) && peer->ibp_error) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = peer->ibp_last_alive; - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (error) - lnet_notify(peer->ibp_ni, - peer->ibp_nid, 0, last_alive); -} - -void -kiblnd_close_conn_locked(struct kib_conn *conn, int error) -{ - /* - * This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context - */ - struct kib_peer *peer = conn->ibc_peer; - struct kib_dev *dev; - unsigned long flags; - - LASSERT(error || conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - if (error && !conn->ibc_comms_error) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - return; /* already being handled */ - - if (!error && - list_empty(&conn->ibc_tx_noops) && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); - } - - dev = ((struct kib_net *)peer->ibp_ni->ni_data)->ibn_dev; - if (peer->ibp_next_conn == conn) - /* clear next_conn so it won't be used */ - peer->ibp_next_conn = NULL; - list_del(&conn->ibc_list); - /* connd (see below) takes over ibc_list's ref */ - - if (list_empty(&peer->ibp_conns) && /* no more conns */ - kiblnd_peer_active(peer)) { /* still in peer table */ - kiblnd_unlink_peer_locked(peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING); - - if (error && - kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - wake_up(&kiblnd_data.kib_failover_waitq); - } - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_conns); - wake_up(&kiblnd_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); -} - -void -kiblnd_close_conn(struct kib_conn *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - kiblnd_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -static void -kiblnd_handle_early_rxs(struct kib_conn *conn) -{ - unsigned long flags; - struct kib_rx *rx; - - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - struct kib_rx, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_handle_rx(rx); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -static void -kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs) -{ - LIST_HEAD(zombies); - struct list_head *tmp; - struct list_head *nxt; - struct kib_tx *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe(tmp, nxt, txs) { - tx = list_entry(tmp, struct kib_tx, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || tx->tx_sending); - } else { - LASSERT(tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_waiting = 0; - - if (!tx->tx_sending) { - tx->tx_queued = 0; - list_del(&tx->tx_list); - list_add(&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kiblnd_txlist_done(conn->ibc_peer->ibp_ni, &zombies, -ECONNABORTED); -} - -static void -kiblnd_finalise_conn(struct kib_conn *conn) -{ - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state > IBLND_CONN_INIT); - - kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED); - - /* - * abort_receives moves QP state to IB_QPS_ERR. This is only required - * for connections that didn't get as far as being connected, because - * rdma_disconnect() does this for free. - */ - kiblnd_abort_receives(conn); - - /* - * Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state - */ - kiblnd_abort_txs(conn, &conn->ibc_tx_noops); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kiblnd_abort_txs(conn, &conn->ibc_active_txs); - - kiblnd_handle_early_rxs(conn); -} - -static void -kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error) -{ - LIST_HEAD(zombies); - unsigned long flags; - - LASSERT(error); - LASSERT(!in_interrupt()); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (active) { - LASSERT(peer->ibp_connecting > 0); - peer->ibp_connecting--; - } else { - LASSERT(peer->ibp_accepting > 0); - peer->ibp_accepting--; - } - - if (kiblnd_peer_connecting(peer)) { - /* another connection attempt under way... */ - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return; - } - - peer->ibp_reconnected = 0; - if (list_empty(&peer->ibp_conns)) { - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kiblnd_peer_active(peer)) - kiblnd_unlink_peer_locked(peer); - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT(list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_peer_notify(peer); - - if (list_empty(&zombies)) - return; - - CNETERR("Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH); -} - -static void -kiblnd_connreq_done(struct kib_conn *conn, int status) -{ - struct kib_peer *peer = conn->ibc_peer; - struct kib_tx *tx; - struct kib_tx *tmp; - struct list_head txs; - unsigned long flags; - int active; - - active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - CDEBUG(D_NET, "%s: active(%d), version(%x), status(%d)\n", - libcfs_nid2str(peer->ibp_nid), active, - conn->ibc_version, status); - - LASSERT(!in_interrupt()); - LASSERT((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT && - peer->ibp_connecting > 0) || - (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && - peer->ibp_accepting > 0)); - - kfree(conn->ibc_connvars); - conn->ibc_connvars = NULL; - - if (status) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(peer, active, status); - kiblnd_finalise_conn(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - conn->ibc_last_send = jiffies; - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer); - - /* - * Add conn to peer's list and nuke any dangling conns from a different - * peer instance... - */ - kiblnd_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - peer->ibp_reconnected = 0; - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - if (!peer->ibp_version) { - peer->ibp_version = conn->ibc_version; - peer->ibp_incarnation = conn->ibc_incarnation; - } - - if (peer->ibp_version != conn->ibc_version || - peer->ibp_incarnation != conn->ibc_incarnation) { - kiblnd_close_stale_conns_locked(peer, conn->ibc_version, - conn->ibc_incarnation); - peer->ibp_version = conn->ibc_version; - peer->ibp_incarnation = conn->ibc_incarnation; - } - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (!kiblnd_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error) { /* error has happened already */ - struct lnet_ni *ni = peer->ibp_ni; - - /* start to shut down connection */ - kiblnd_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &txs, -ECONNABORTED); - - return; - } - - /* - * +1 ref for myself, this connection is visible to other threads - * now, refcount of peer:ibp_conns can be released by connection - * close from either a different thread, or the calling of - * kiblnd_check_sends_locked() below. See bz21911 for details. - */ - kiblnd_conn_addref(conn); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* Schedule blocked txs - * Note: if we are running with conns_per_peer > 1, these blocked - * txs will all get scheduled to the first connection which gets - * scheduled. We won't be using round robin on this first batch. - */ - spin_lock(&conn->ibc_lock); - list_for_each_entry_safe(tx, tmp, &txs, tx_list) { - list_del(&tx->tx_list); - - kiblnd_queue_tx_locked(tx, conn); - } - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - /* schedule blocked rxs */ - kiblnd_handle_early_rxs(conn); - - kiblnd_conn_decref(conn); -} - -static void -kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) -{ - int rc; - - rc = rdma_reject(cmid, rej, sizeof(*rej)); - - if (rc) - CWARN("Error %d sending reject\n", rc); -} - -static int -kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) -{ - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - struct kib_msg *reqmsg = priv; - struct kib_msg *ackmsg; - struct kib_dev *ibdev; - struct kib_peer *peer; - struct kib_peer *peer2; - struct kib_conn *conn; - struct lnet_ni *ni = NULL; - struct kib_net *net = NULL; - lnet_nid_t nid; - struct rdma_conn_param cp; - struct kib_rej rej; - int version = IBLND_MSG_VERSION; - unsigned long flags; - int max_frags; - int rc; - struct sockaddr_in *peer_addr; - - LASSERT(!in_interrupt()); - - /* cmid inherits 'context' from the corresponding listener id */ - ibdev = (struct kib_dev *)cmid->context; - LASSERT(ibdev); - - memset(&rej, 0, sizeof(rej)); - rej.ibr_magic = IBLND_MSG_MAGIC; - rej.ibr_why = IBLND_REJECT_FATAL; - rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; - - peer_addr = (struct sockaddr_in *)&cmid->route.addr.dst_addr; - if (*kiblnd_tunables.kib_require_priv_port && - ntohs(peer_addr->sin_port) >= PROT_SOCK) { - __u32 ip = ntohl(peer_addr->sin_addr.s_addr); - - CERROR("Peer's port (%pI4h:%hu) is not privileged\n", - &ip, ntohs(peer_addr->sin_port)); - goto failed; - } - - if (priv_nob < offsetof(struct kib_msg, ibm_type)) { - CERROR("Short connection request\n"); - goto failed; - } - - /* - * Future protocol version compatibility support! If the - * o2iblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will - * negotiate a protocol version. I trap this here to avoid - * console errors; the reject tells the peer which protocol I - * speak. - */ - if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || - reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - goto failed; - if (reqmsg->ibm_magic == IBLND_MSG_MAGIC && - reqmsg->ibm_version != IBLND_MSG_VERSION && - reqmsg->ibm_version != IBLND_MSG_VERSION_1) - goto failed; - if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1)) - goto failed; - - rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc) { - CERROR("Can't parse connection request: %d\n", rc); - goto failed; - } - - nid = reqmsg->ibm_srcnid; - ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); - - if (ni) { - net = (struct kib_net *)ni->ni_data; - rej.ibr_incarnation = net->ibn_incarnation; - } - - if (!ni || /* no matching net */ - ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ - net->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", - libcfs_nid2str(nid), - !ni ? "NA" : libcfs_nid2str(ni->ni_nid), - ibdev->ibd_ifname, ibdev->ibd_nnets, - &ibdev->ibd_ifip, - libcfs_nid2str(reqmsg->ibm_dstnid)); - - goto failed; - } - - /* check time stamp as soon as possible */ - if (reqmsg->ibm_dststamp && - reqmsg->ibm_dststamp != net->ibn_incarnation) { - CWARN("Stale connection request\n"); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } - - /* I can accept peer's version */ - version = reqmsg->ibm_version; - - if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - reqmsg->ibm_type, libcfs_nid2str(nid)); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_queue_depth > - kiblnd_msg_queue_size(version, ni)) { - CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_queue_depth, - kiblnd_msg_queue_size(version, ni)); - - if (version == IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_MSG_QUEUE_SIZE; - - goto failed; - } - - max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; - if (max_frags > kiblnd_rdma_frags(version, ni)) { - CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n", - libcfs_nid2str(nid), version, max_frags, - kiblnd_rdma_frags(version, ni)); - - if (version >= IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; - - goto failed; - } else if (max_frags < kiblnd_rdma_frags(version, ni) && - !net->ibn_fmr_ps) { - CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n", - libcfs_nid2str(nid), version, max_frags, - kiblnd_rdma_frags(version, ni)); - - if (version == IBLND_MSG_VERSION) - rej.ibr_why = IBLND_REJECT_RDMA_FRAGS; - - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } - - /* assume 'nid' is a new peer; create */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc) { - CERROR("Can't create peer for %s\n", libcfs_nid2str(nid)); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* We have validated the peer's parameters so use those */ - peer->ibp_max_frags = max_frags; - peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth; - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2) { - if (!peer2->ibp_version) { - peer2->ibp_version = version; - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - } - - /* not the guy I've talked with */ - if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || - peer2->ibp_version != version) { - kiblnd_close_peer_conns_locked(peer2, -ESTALE); - - if (kiblnd_peer_active(peer2)) { - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - peer2->ibp_version = version; - } - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n", - libcfs_nid2str(nid), peer2->ibp_version, version, - peer2->ibp_incarnation, reqmsg->ibm_srcstamp); - - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } - - /* - * Tie-break connection race in favour of the higher NID. - * If we keep running into a race condition multiple times, - * we have to assume that the connection attempt with the - * higher NID is stuck in a connecting state and will never - * recover. As such, we pass through this if-block and let - * the lower NID connection win so we can move forward. - */ - if (peer2->ibp_connecting && - nid < ni->ni_nid && peer2->ibp_races < - MAX_CONN_RACES_BEFORE_ABORT) { - peer2->ibp_races++; - write_unlock_irqrestore(g_lock, flags); - - CDEBUG(D_NET, "Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_RACE; - goto failed; - } - if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT) - CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n", - libcfs_nid2str(peer2->ibp_nid), - MAX_CONN_RACES_BEFORE_ABORT); - /** - * passive connection is allowed even this peer is waiting for - * reconnection. - */ - peer2->ibp_reconnecting = 0; - peer2->ibp_races = 0; - peer2->ibp_accepting++; - kiblnd_peer_addref(peer2); - - /** - * Race with kiblnd_launch_tx (active connect) to create peer - * so copy validated parameters since we now know what the - * peer's limits are - */ - peer2->ibp_max_frags = peer->ibp_max_frags; - peer2->ibp_queue_depth = peer->ibp_queue_depth; - - write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT(!peer->ibp_accepting); - LASSERT(!peer->ibp_version && - !peer->ibp_incarnation); - - peer->ibp_accepting = 1; - peer->ibp_version = version; - peer->ibp_incarnation = reqmsg->ibm_srcstamp; - - /* I have a ref on ni that prevents it being shutdown */ - LASSERT(!net->ibn_shutdown); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT, - version); - if (!conn) { - kiblnd_peer_connect_failed(peer, 0, -ENOMEM); - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* - * conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. - */ - conn->ibc_incarnation = reqmsg->ibm_srcstamp; - conn->ibc_credits = conn->ibc_queue_depth; - conn->ibc_reserved_credits = conn->ibc_queue_depth; - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + - IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn)); - - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); - - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); - ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; - ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - - kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - - rc = rdma_accept(cmid, &cp); - if (rc) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - rej.ibr_version = version; - rej.ibr_why = IBLND_REJECT_FATAL; - - kiblnd_reject(cmid, &rej); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - lnet_ni_decref(ni); - return 0; - - failed: - if (ni) { - rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni); - rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni); - lnet_ni_decref(ni); - } - - rej.ibr_version = version; - kiblnd_reject(cmid, &rej); - - return -ECONNREFUSED; -} - -static void -kiblnd_check_reconnect(struct kib_conn *conn, int version, - __u64 incarnation, int why, struct kib_connparams *cp) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_peer *peer = conn->ibc_peer; - char *reason; - int msg_size = IBLND_MSG_SIZE; - int frag_num = -1; - int queue_dep = -1; - bool reconnect; - unsigned long flags; - - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - LASSERT(peer->ibp_connecting > 0); /* 'conn' at least */ - - if (cp) { - msg_size = cp->ibcp_max_msg_size; - frag_num = cp->ibcp_max_frags << IBLND_FRAG_SHIFT; - queue_dep = cp->ibcp_queue_depth; - } - - write_lock_irqsave(glock, flags); - /** - * retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress - * NB: reconnect is still needed even when ibp_tx_queue is - * empty if ibp_version != version because reconnect may be - * initiated by kiblnd_query() - */ - reconnect = (!list_empty(&peer->ibp_tx_queue) || - peer->ibp_version != version) && - peer->ibp_connecting && - !peer->ibp_accepting; - if (!reconnect) { - reason = "no need"; - goto out; - } - - switch (why) { - default: - reason = "Unknown"; - break; - - case IBLND_REJECT_RDMA_FRAGS: { - struct lnet_ioctl_config_lnd_tunables *tunables; - - if (!cp) { - reason = "can't negotiate max frags"; - goto out; - } - tunables = peer->ibp_ni->ni_lnd_tunables; - if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) { - reason = "map_on_demand must be enabled"; - goto out; - } - if (conn->ibc_max_frags <= frag_num) { - reason = "unsupported max frags"; - goto out; - } - - peer->ibp_max_frags = frag_num; - reason = "rdma fragments"; - break; - } - case IBLND_REJECT_MSG_QUEUE_SIZE: - if (!cp) { - reason = "can't negotiate queue depth"; - goto out; - } - if (conn->ibc_queue_depth <= queue_dep) { - reason = "unsupported queue depth"; - goto out; - } - - peer->ibp_queue_depth = queue_dep; - reason = "queue depth"; - break; - - case IBLND_REJECT_CONN_STALE: - reason = "stale"; - break; - - case IBLND_REJECT_CONN_RACE: - reason = "conn race"; - break; - - case IBLND_REJECT_CONN_UNCOMPAT: - reason = "version negotiation"; - break; - } - - conn->ibc_reconnect = 1; - peer->ibp_reconnecting++; - peer->ibp_version = version; - if (incarnation) - peer->ibp_incarnation = incarnation; -out: - write_unlock_irqrestore(glock, flags); - - CNETERR("%s: %s (%s), %x, %x, msg_size: %d, queue_depth: %d/%d, max_frags: %d/%d\n", - libcfs_nid2str(peer->ibp_nid), - reconnect ? "reconnect" : "don't reconnect", - reason, IBLND_MSG_VERSION, version, msg_size, - conn->ibc_queue_depth, queue_dep, - conn->ibc_max_frags, frag_num); - /** - * if conn::ibc_reconnect is TRUE, connd will reconnect to the peer - * while destroying the zombie - */ -} - -static void -kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob) -{ - struct kib_peer *peer = conn->ibc_peer; - - LASSERT(!in_interrupt()); - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - switch (reason) { - case IB_CM_REJ_STALE_CONN: - kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0, - IBLND_REJECT_CONN_STALE, NULL); - break; - - case IB_CM_REJ_INVALID_SERVICE_ID: - CNETERR("%s rejected: no listener at %d\n", - libcfs_nid2str(peer->ibp_nid), - *kiblnd_tunables.kib_service); - break; - - case IB_CM_REJ_CONSUMER_DEFINED: - if (priv_nob >= offsetof(struct kib_rej, ibr_padding)) { - struct kib_rej *rej = priv; - struct kib_connparams *cp = NULL; - int flip = 0; - __u64 incarnation = -1; - - /* NB. default incarnation is -1 because: - * a) V1 will ignore dst incarnation in connreq. - * b) V2 will provide incarnation while rejecting me, - * -1 will be overwrote. - * - * if I try to connect to a V1 peer with V2 protocol, - * it rejected me then upgrade to V2, I have no idea - * about the upgrading and try to reconnect with V1, - * in this case upgraded V2 can find out I'm trying to - * talk to the old guy and reject me(incarnation is -1). - */ - - if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || - rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { - __swab32s(&rej->ibr_magic); - __swab16s(&rej->ibr_version); - flip = 1; - } - - if (priv_nob >= sizeof(struct kib_rej) && - rej->ibr_version > IBLND_MSG_VERSION_1) { - /* - * priv_nob is always 148 in current version - * of OFED, so we still need to check version. - * (define of IB_CM_REJ_PRIVATE_DATA_SIZE) - */ - cp = &rej->ibr_cp; - - if (flip) { - __swab64s(&rej->ibr_incarnation); - __swab16s(&cp->ibcp_queue_depth); - __swab16s(&cp->ibcp_max_frags); - __swab32s(&cp->ibcp_max_msg_size); - } - - incarnation = rej->ibr_incarnation; - } - - if (rej->ibr_magic != IBLND_MSG_MAGIC && - rej->ibr_magic != LNET_PROTO_MAGIC) { - CERROR("%s rejected: consumer defined fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - } - - if (rej->ibr_version != IBLND_MSG_VERSION && - rej->ibr_version != IBLND_MSG_VERSION_1) { - CERROR("%s rejected: o2iblnd version %x error\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_version); - break; - } - - if (rej->ibr_why == IBLND_REJECT_FATAL && - rej->ibr_version == IBLND_MSG_VERSION_1) { - CDEBUG(D_NET, "rejected by old version peer %s: %x\n", - libcfs_nid2str(peer->ibp_nid), rej->ibr_version); - - if (conn->ibc_version != IBLND_MSG_VERSION_1) - rej->ibr_why = IBLND_REJECT_CONN_UNCOMPAT; - } - - switch (rej->ibr_why) { - case IBLND_REJECT_CONN_RACE: - case IBLND_REJECT_CONN_STALE: - case IBLND_REJECT_CONN_UNCOMPAT: - case IBLND_REJECT_MSG_QUEUE_SIZE: - case IBLND_REJECT_RDMA_FRAGS: - kiblnd_check_reconnect(conn, rej->ibr_version, - incarnation, - rej->ibr_why, cp); - break; - - case IBLND_REJECT_NO_RESOURCES: - CERROR("%s rejected: o2iblnd no resources\n", - libcfs_nid2str(peer->ibp_nid)); - break; - - case IBLND_REJECT_FATAL: - CERROR("%s rejected: o2iblnd fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - - default: - CERROR("%s rejected: o2iblnd reason %d\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_why); - break; - } - break; - } - /* fall through */ - default: - CNETERR("%s rejected: reason %d, size %d\n", - libcfs_nid2str(peer->ibp_nid), reason, priv_nob); - break; - } - - kiblnd_connreq_done(conn, -ECONNREFUSED); -} - -static void -kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob) -{ - struct kib_peer *peer = conn->ibc_peer; - struct lnet_ni *ni = peer->ibp_ni; - struct kib_net *net = ni->ni_data; - struct kib_msg *msg = priv; - int ver = conn->ibc_version; - int rc = kiblnd_unpack_msg(msg, priv_nob); - unsigned long flags; - - LASSERT(net); - - if (rc) { - CERROR("Can't unpack connack from %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed; - } - - if (msg->ibm_type != IBLND_MSG_CONNACK) { - CERROR("Unexpected message %d from %s\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid)); - rc = -EPROTO; - goto failed; - } - - if (ver != msg->ibm_version) { - CERROR("%s replied version %x is different with requested version %x\n", - libcfs_nid2str(peer->ibp_nid), msg->ibm_version, ver); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth > - conn->ibc_queue_depth) { - CERROR("%s has incompatible queue depth %d (<=%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_queue_depth, - conn->ibc_queue_depth); - rc = -EPROTO; - goto failed; - } - - if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) > - conn->ibc_max_frags) { - CERROR("%s has incompatible max_frags %d (<=%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT, - conn->ibc_max_frags); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - rc = -EPROTO; - goto failed; - } - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (msg->ibm_dstnid == ni->ni_nid && - msg->ibm_dststamp == net->ibn_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc) { - CERROR("Bad connection reply from %s, rc = %d, version: %x max_frags: %d\n", - libcfs_nid2str(peer->ibp_nid), rc, - msg->ibm_version, msg->ibm_u.connparams.ibcp_max_frags); - goto failed; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth; - conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT; - LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + - IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn)); - - kiblnd_connreq_done(conn, 0); - return; - - failed: - /* - * NB My QP has already established itself, so I handle anything going - * wrong here by setting ibc_comms_error. - * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then - * immediately tears it down. - */ - LASSERT(rc); - conn->ibc_comms_error = rc; - kiblnd_connreq_done(conn, 0); -} - -static int -kiblnd_active_connect(struct rdma_cm_id *cmid) -{ - struct kib_peer *peer = (struct kib_peer *)cmid->context; - struct kib_conn *conn; - struct kib_msg *msg; - struct rdma_conn_param cp; - int version; - __u64 incarnation; - unsigned long flags; - int rc; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - incarnation = peer->ibp_incarnation; - version = !peer->ibp_version ? IBLND_MSG_VERSION : - peer->ibp_version; - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT, - version); - if (!conn) { - kiblnd_peer_connect_failed(peer, 1, -ENOMEM); - kiblnd_peer_decref(peer); /* lose cmid's ref */ - return -ENOMEM; - } - - /* - * conn "owns" cmid now, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. conn also takes over cmid's ref - * on peer - */ - msg = &conn->ibc_connvars->cv_msg; - - memset(msg, 0, sizeof(*msg)); - kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; - msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT; - msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - - kiblnd_pack_msg(peer->ibp_ni, msg, version, - 0, peer->ibp_nid, incarnation); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = msg; - cp.private_data_len = msg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - LASSERT(cmid->context == (void *)conn); - LASSERT(conn->ibc_cmid == cmid); - - rc = rdma_connect(cmid, &cp); - if (rc) { - CERROR("Can't connect to %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - return 0; -} - -int -kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) -{ - struct kib_peer *peer; - struct kib_conn *conn; - int rc; - - switch (event->event) { - default: - CERROR("Unexpected event: %d, status: %d\n", - event->event, event->status); - LBUG(); - - case RDMA_CM_EVENT_CONNECT_REQUEST: - /* destroy cmid on failure */ - rc = kiblnd_passive_connect(cmid, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - CDEBUG(D_NET, "connreq: %d\n", rc); - return rc; - - case RDMA_CM_EVENT_ADDR_ERROR: - peer = (struct kib_peer *)cmid->context; - CNETERR("%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ADDR_RESOLVED: - peer = (struct kib_peer *)cmid->context; - - CDEBUG(D_NET, "%s Addr resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status) { - CNETERR("Can't resolve address for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - rc = event->status; - } else { - rc = rdma_resolve_route( - cmid, *kiblnd_tunables.kib_timeout * 1000); - if (!rc) { - struct kib_net *net = peer->ibp_ni->ni_data; - struct kib_dev *dev = net->ibn_dev; - - CDEBUG(D_NET, "%s: connection bound to "\ - "%s:%pI4h:%s\n", - libcfs_nid2str(peer->ibp_nid), - dev->ibd_ifname, - &dev->ibd_ifip, cmid->device->name); - - return 0; - } - - /* Can't initiate route resolution */ - CERROR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - } - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); - return rc; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_ERROR: - peer = (struct kib_peer *)cmid->context; - CNETERR("%s: ROUTE ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_RESOLVED: - peer = (struct kib_peer *)cmid->context; - CDEBUG(D_NET, "%s Route resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (!event->status) - return kiblnd_active_connect(cmid); - - CNETERR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, event->status); - kiblnd_peer_decref(peer); - return event->status; /* rc destroys cmid */ - - case RDMA_CM_EVENT_UNREACHABLE: - conn = (struct kib_conn *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CNETERR("%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENETDOWN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_CONNECT_ERROR: - conn = (struct kib_conn *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CNETERR("%s: CONNECT ERROR %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENOTCONN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_REJECTED: - conn = (struct kib_conn *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CERROR("%s: REJECTED %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - event->status); - kiblnd_connreq_done(conn, -ECONNRESET); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - kiblnd_rejected(conn, event->status, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_ESTABLISHED: - conn = (struct kib_conn *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CDEBUG(D_NET, "ESTABLISHED (passive): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, 0); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - CDEBUG(D_NET, "ESTABLISHED(active): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_check_connreply(conn, - (void *)KIBLND_CONN_PARAM(event), - KIBLND_CONN_PARAM_LEN(event)); - break; - } - /* net keeps its ref on conn! */ - return 0; - - case RDMA_CM_EVENT_TIMEWAIT_EXIT: - CDEBUG(D_NET, "Ignore TIMEWAIT_EXIT event\n"); - return 0; - case RDMA_CM_EVENT_DISCONNECTED: - conn = (struct kib_conn *)cmid->context; - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CERROR("%s DISCONNECTED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, -ECONNRESET); - } else { - kiblnd_close_conn(conn, 0); - } - kiblnd_conn_decref(conn); - cmid->context = NULL; - return 0; - - case RDMA_CM_EVENT_DEVICE_REMOVAL: - LCONSOLE_ERROR_MSG(0x131, - "Received notification of device removal\n" - "Please shutdown LNET to allow this to proceed\n"); - /* - * Can't remove network from underneath LNET for now, so I have - * to ignore this - */ - return 0; - - case RDMA_CM_EVENT_ADDR_CHANGE: - LCONSOLE_INFO("Physical link changed (eg hca/port)\n"); - return 0; - } -} - -static int -kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs) -{ - struct kib_tx *tx; - struct list_head *ttmp; - - list_for_each(ttmp, txs) { - tx = list_entry(ttmp, struct kib_tx, tx_list); - - if (txs != &conn->ibc_active_txs) { - LASSERT(tx->tx_queued); - } else { - LASSERT(!tx->tx_queued); - LASSERT(tx->tx_waiting || tx->tx_sending); - } - - if (time_after_eq(jiffies, tx->tx_deadline)) { - CERROR("Timed out tx: %s, %lu seconds\n", - kiblnd_queue2str(conn, txs), - (jiffies - tx->tx_deadline) / HZ); - return 1; - } - } - - return 0; -} - -static int -kiblnd_conn_timed_out_locked(struct kib_conn *conn) -{ - return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) || - kiblnd_check_txs_locked(conn, &conn->ibc_active_txs); -} - -static void -kiblnd_check_conns(int idx) -{ - LIST_HEAD(closes); - LIST_HEAD(checksends); - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; - struct kib_peer *peer; - struct kib_conn *conn; - struct kib_conn *temp; - struct kib_conn *tmp; - struct list_head *ctmp; - unsigned long flags; - - /* - * NB. We expect to have a look at all the peers and not find any - * RDMAs to time out, so we just use a shared lock while we - * take a look... - */ - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - list_for_each(ptmp, peers) { - peer = list_entry(ptmp, struct kib_peer, ibp_list); - - list_for_each(ctmp, &peer->ibp_conns) { - int timedout; - int sendnoop; - - conn = list_entry(ctmp, struct kib_conn, ibc_list); - - LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED); - - spin_lock(&conn->ibc_lock); - - sendnoop = kiblnd_need_noop(conn); - timedout = kiblnd_conn_timed_out_locked(conn); - if (!sendnoop && !timedout) { - spin_unlock(&conn->ibc_lock); - continue; - } - - if (timedout) { - CERROR("Timed out RDMA with %s (%lu): c: %u, oc: %u, rc: %u\n", - libcfs_nid2str(peer->ibp_nid), - (jiffies - peer->ibp_last_alive) / HZ, - conn->ibc_credits, - conn->ibc_outstanding_credits, - conn->ibc_reserved_credits); - list_add(&conn->ibc_connd_list, &closes); - } else { - list_add(&conn->ibc_connd_list, &checksends); - } - /* +ref for 'closes' or 'checksends' */ - kiblnd_conn_addref(conn); - - spin_unlock(&conn->ibc_lock); - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* - * Handle timeout by closing the whole - * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. - */ - list_for_each_entry_safe(conn, tmp, &closes, ibc_connd_list) { - list_del(&conn->ibc_connd_list); - kiblnd_close_conn(conn, -ETIMEDOUT); - kiblnd_conn_decref(conn); - } - - /* - * In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... - */ - list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) { - list_del(&conn->ibc_connd_list); - - spin_lock(&conn->ibc_lock); - kiblnd_check_sends_locked(conn); - spin_unlock(&conn->ibc_lock); - - kiblnd_conn_decref(conn); - } -} - -static void -kiblnd_disconnect_conn(struct kib_conn *conn) -{ - LASSERT(!in_interrupt()); - LASSERT(current == kiblnd_data.kib_connd); - LASSERT(conn->ibc_state == IBLND_CONN_CLOSING); - - rdma_disconnect(conn->ibc_cmid); - kiblnd_finalise_conn(conn); - - kiblnd_peer_notify(conn->ibc_peer); -} - -/** - * High-water for reconnection to the same peer, reconnection attempt should - * be delayed after trying more than KIB_RECONN_HIGH_RACE. - */ -#define KIB_RECONN_HIGH_RACE 10 -/** - * Allow connd to take a break and handle other things after consecutive - * reconnection attempts. - */ -#define KIB_RECONN_BREAK 100 - -int -kiblnd_connd(void *arg) -{ - spinlock_t *lock = &kiblnd_data.kib_connd_lock; - wait_queue_entry_t wait; - unsigned long flags; - struct kib_conn *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - init_waitqueue_entry(&wait, current); - kiblnd_data.kib_connd = current; - - spin_lock_irqsave(lock, flags); - - while (!kiblnd_data.kib_shutdown) { - int reconn = 0; - - dropped_lock = 0; - - if (!list_empty(&kiblnd_data.kib_connd_zombies)) { - struct kib_peer *peer = NULL; - - conn = list_entry(kiblnd_data.kib_connd_zombies.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - if (conn->ibc_reconnect) { - peer = conn->ibc_peer; - kiblnd_peer_addref(peer); - } - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - kiblnd_destroy_conn(conn); - - spin_lock_irqsave(lock, flags); - if (!peer) { - kfree(conn); - continue; - } - - conn->ibc_peer = peer; - if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) - list_add_tail(&conn->ibc_list, - &kiblnd_data.kib_reconn_list); - else - list_add_tail(&conn->ibc_list, - &kiblnd_data.kib_reconn_wait); - } - - if (!list_empty(&kiblnd_data.kib_connd_conns)) { - conn = list_entry(kiblnd_data.kib_connd_conns.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - kiblnd_disconnect_conn(conn); - kiblnd_conn_decref(conn); - - spin_lock_irqsave(lock, flags); - } - - while (reconn < KIB_RECONN_BREAK) { - if (kiblnd_data.kib_reconn_sec != - ktime_get_real_seconds()) { - kiblnd_data.kib_reconn_sec = ktime_get_real_seconds(); - list_splice_init(&kiblnd_data.kib_reconn_wait, - &kiblnd_data.kib_reconn_list); - } - - if (list_empty(&kiblnd_data.kib_reconn_list)) - break; - - conn = list_entry(kiblnd_data.kib_reconn_list.next, - struct kib_conn, ibc_list); - list_del(&conn->ibc_list); - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - reconn += kiblnd_reconnect_peer(conn->ibc_peer); - kiblnd_peer_decref(conn->ibc_peer); - kfree(conn); - - spin_lock_irqsave(lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; - - spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; - - /* - * Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. - */ - if (*kiblnd_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kiblnd_tunables.kib_timeout; - if (!chunk) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kiblnd_check_conns(peer_index); - peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; - } - - deadline += msecs_to_jiffies(p * MSEC_PER_SEC); - spin_lock_irqsave(lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore(lock, flags); - - schedule_timeout(timeout); - - remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait); - spin_lock_irqsave(lock, flags); - } - - spin_unlock_irqrestore(lock, flags); - - kiblnd_thread_fini(); - return 0; -} - -void -kiblnd_qp_event(struct ib_event *event, void *arg) -{ - struct kib_conn *conn = arg; - - switch (event->event) { - case IB_EVENT_COMM_EST: - CDEBUG(D_NET, "%s established\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* - * We received a packet but connection isn't established - * probably handshake packet was lost, so free to - * force make connection established - */ - rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST); - return; - - default: - CERROR("%s: Async QP event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); - return; - } -} - -static void -kiblnd_complete(struct ib_wc *wc) -{ - switch (kiblnd_wreqid2type(wc->wr_id)) { - default: - LBUG(); - - case IBLND_WID_MR: - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) - CNETERR("FastReg failed: %d\n", wc->status); - break; - - case IBLND_WID_RDMA: - /* - * We only get RDMA completion notification if it fails. All - * subsequent work items, including the final SEND will fail - * too. However we can't print out any more info about the - * failing RDMA because 'tx' might be back on the idle list or - * even reused already if we didn't manage to post all our work - * items - */ - CNETERR("RDMA (tx: %p) failed: %d\n", - kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_TX: - kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_RX: - kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status, - wc->byte_len); - return; - } -} - -void -kiblnd_cq_completion(struct ib_cq *cq, void *arg) -{ - /* - * NB I'm not allowed to schedule this conn once its refcount has - * reached 0. Since fundamentally I'm racing with scheduler threads - * consuming my CQ I could be called after all completions have - * occurred. But in this case, !ibc_nrx && !ibc_nsends_posted - * and this CQ is about to be destroyed so I NOOP. - */ - struct kib_conn *conn = arg; - struct kib_sched_info *sched = conn->ibc_sched; - unsigned long flags; - - LASSERT(cq == conn->ibc_cq); - - spin_lock_irqsave(&sched->ibs_lock, flags); - - conn->ibc_ready = 1; - - if (!conn->ibc_scheduled && - (conn->ibc_nrx > 0 || - conn->ibc_nsends_posted > 0)) { - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - conn->ibc_scheduled = 1; - list_add_tail(&conn->ibc_sched_list, &sched->ibs_conns); - - if (waitqueue_active(&sched->ibs_waitq)) - wake_up(&sched->ibs_waitq); - } - - spin_unlock_irqrestore(&sched->ibs_lock, flags); -} - -void -kiblnd_cq_event(struct ib_event *event, void *arg) -{ - struct kib_conn *conn = arg; - - CERROR("%s: async CQ event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); -} - -int -kiblnd_scheduler(void *arg) -{ - long id = (long)arg; - struct kib_sched_info *sched; - struct kib_conn *conn; - wait_queue_entry_t wait; - unsigned long flags; - struct ib_wc wc; - int did_something; - int busy_loops = 0; - int rc; - - init_waitqueue_entry(&wait, current); - - sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)]; - - rc = cfs_cpt_bind(lnet_cpt_table(), sched->ibs_cpt); - if (rc) { - CWARN("Unable to bind on CPU partition %d, please verify whether all CPUs are healthy and reload modules if necessary, otherwise your system might under risk of low performance\n", - sched->ibs_cpt); - } - - spin_lock_irqsave(&sched->ibs_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - if (busy_loops++ >= IBLND_RESCHED) { - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - did_something = 0; - - if (!list_empty(&sched->ibs_conns)) { - conn = list_entry(sched->ibs_conns.next, struct kib_conn, - ibc_sched_list); - /* take over kib_sched_conns' ref on conn... */ - LASSERT(conn->ibc_scheduled); - list_del(&conn->ibc_sched_list); - conn->ibc_ready = 0; - - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - wc.wr_id = IBLND_WID_INVAL; - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (!rc) { - rc = ib_req_notify_cq(conn->ibc_cq, - IB_CQ_NEXT_COMP); - if (rc < 0) { - CWARN("%s: ib_req_notify_cq failed: %d, closing connection\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_close_conn(conn, -EIO); - kiblnd_conn_decref(conn); - spin_lock_irqsave(&sched->ibs_lock, - flags); - continue; - } - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - } - - if (unlikely(rc > 0 && wc.wr_id == IBLND_WID_INVAL)) { - LCONSOLE_ERROR("ib_poll_cq (rc: %d) returned invalid wr_id, opcode %d, status: %d, vendor_err: %d, conn: %s status: %d\nplease upgrade firmware and OFED or contact vendor.\n", - rc, wc.opcode, wc.status, - wc.vendor_err, - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_state); - rc = -EINVAL; - } - - if (rc < 0) { - CWARN("%s: ib_poll_cq failed: %d, closing connection\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc); - kiblnd_close_conn(conn, -EIO); - kiblnd_conn_decref(conn); - spin_lock_irqsave(&sched->ibs_lock, flags); - continue; - } - - spin_lock_irqsave(&sched->ibs_lock, flags); - - if (rc || conn->ibc_ready) { - /* - * There may be another completion waiting; get - * another scheduler to check while I handle - * this one... - */ - /* +1 ref for sched_conns */ - kiblnd_conn_addref(conn); - list_add_tail(&conn->ibc_sched_list, - &sched->ibs_conns); - if (waitqueue_active(&sched->ibs_waitq)) - wake_up(&sched->ibs_waitq); - } else { - conn->ibc_scheduled = 0; - } - - if (rc) { - spin_unlock_irqrestore(&sched->ibs_lock, flags); - kiblnd_complete(&wc); - - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - kiblnd_conn_decref(conn); /* ...drop my ref from above */ - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&sched->ibs_waitq, &wait); - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - schedule(); - busy_loops = 0; - - remove_wait_queue(&sched->ibs_waitq, &wait); - spin_lock_irqsave(&sched->ibs_lock, flags); - } - - spin_unlock_irqrestore(&sched->ibs_lock, flags); - - kiblnd_thread_fini(); - return 0; -} - -int -kiblnd_failover_thread(void *arg) -{ - rwlock_t *glock = &kiblnd_data.kib_global_lock; - struct kib_dev *dev; - wait_queue_entry_t wait; - unsigned long flags; - int rc; - - LASSERT(*kiblnd_tunables.kib_dev_failover); - - init_waitqueue_entry(&wait, current); - write_lock_irqsave(glock, flags); - - while (!kiblnd_data.kib_shutdown) { - int do_failover = 0; - int long_sleep; - - list_for_each_entry(dev, &kiblnd_data.kib_failed_devs, - ibd_fail_list) { - if (time_before(jiffies, - dev->ibd_next_failover)) - continue; - do_failover = 1; - break; - } - - if (do_failover) { - list_del_init(&dev->ibd_fail_list); - dev->ibd_failover = 1; - write_unlock_irqrestore(glock, flags); - - rc = kiblnd_dev_failover(dev); - - write_lock_irqsave(glock, flags); - - LASSERT(dev->ibd_failover); - dev->ibd_failover = 0; - if (rc >= 0) { /* Device is OK or failover succeed */ - dev->ibd_next_failover = jiffies + 3 * HZ; - continue; - } - - /* failed to failover, retry later */ - dev->ibd_next_failover = - jiffies + min(dev->ibd_failed_failover, 10) * HZ; - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } - - continue; - } - - /* long sleep if no more pending failover */ - long_sleep = list_empty(&kiblnd_data.kib_failed_devs); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); - write_unlock_irqrestore(glock, flags); - - rc = schedule_timeout(long_sleep ? 10 * HZ : - HZ); - remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait); - write_lock_irqsave(glock, flags); - - if (!long_sleep || rc) - continue; - - /* - * have a long sleep, routine check all active devices, - * we need checking like this because if there is not active - * connection on the dev and no SEND from local, we may listen - * on wrong HCA for ever while there is a bonding failover - */ - list_for_each_entry(dev, &kiblnd_data.kib_devs, ibd_list) { - if (kiblnd_dev_can_failover(dev)) { - list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - } - } - } - - write_unlock_irqrestore(glock, flags); - - kiblnd_thread_fini(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c deleted file mode 100644 index 39d07926d603..000000000000 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ /dev/null @@ -1,296 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/o2iblnd/o2iblnd_modparams.c - * - * Author: Eric Barton - */ - -#include "o2iblnd.h" - -static int service = 987; -module_param(service, int, 0444); -MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)"); - -static int cksum; -module_param(cksum, int, 0644); -MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -module_param(timeout, int, 0644); -MODULE_PARM_DESC(timeout, "timeout (seconds)"); - -/* - * Number of threads in each scheduler pool which is percpt, - * we will estimate reasonable value based on CPUs if it's set to zero. - */ -static int nscheds; -module_param(nscheds, int, 0444); -MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool"); - -static unsigned int conns_per_peer = 1; -module_param(conns_per_peer, uint, 0444); -MODULE_PARM_DESC(conns_per_peer, "number of connections per peer"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int ntx = 512; -module_param(ntx, int, 0444); -MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool"); - -/* NB: this value is shared by all CPTs */ -static int credits = 256; -module_param(credits, int, 0444); -MODULE_PARM_DESC(credits, "# concurrent sends"); - -static int peer_credits = 8; -module_param(peer_credits, int, 0444); -MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer"); - -static int peer_credits_hiw; -module_param(peer_credits_hiw, int, 0444); -MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits"); - -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits"); - -static int peer_timeout = 180; -module_param(peer_timeout, int, 0444); -MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); - -static char *ipif_name = "ib0"; -module_param(ipif_name, charp, 0444); -MODULE_PARM_DESC(ipif_name, "IPoIB interface name"); - -static int retry_count = 5; -module_param(retry_count, int, 0644); -MODULE_PARM_DESC(retry_count, "Retransmissions when no ACK received"); - -static int rnr_retry_count = 6; -module_param(rnr_retry_count, int, 0644); -MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions"); - -static int keepalive = 100; -module_param(keepalive, int, 0644); -MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive"); - -static int ib_mtu; -module_param(ib_mtu, int, 0444); -MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096"); - -static int concurrent_sends; -module_param(concurrent_sends, int, 0444); -MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing"); - -#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS -static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; -module_param(map_on_demand, int, 0444); -MODULE_PARM_DESC(map_on_demand, "map on demand"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int fmr_pool_size = 512; -module_param(fmr_pool_size, int, 0444); -MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)"); - -/* NB: this value is shared by all CPTs, it can grow at runtime */ -static int fmr_flush_trigger = 384; -module_param(fmr_flush_trigger, int, 0444); -MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush"); - -static int fmr_cache = 1; -module_param(fmr_cache, int, 0444); -MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching"); - -/* - * 0: disable failover - * 1: enable failover if necessary - * 2: force to failover (for debug) - */ -static int dev_failover; -module_param(dev_failover, int, 0444); -MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)"); - -static int require_privileged_port; -module_param(require_privileged_port, int, 0644); -MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection"); - -static int use_privileged_port = 1; -module_param(use_privileged_port, int, 0644); -MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection"); - -struct kib_tunables kiblnd_tunables = { - .kib_dev_failover = &dev_failover, - .kib_service = &service, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_default_ipif = &ipif_name, - .kib_retry_count = &retry_count, - .kib_rnr_retry_count = &rnr_retry_count, - .kib_ib_mtu = &ib_mtu, - .kib_require_priv_port = &require_privileged_port, - .kib_use_priv_port = &use_privileged_port, - .kib_nscheds = &nscheds -}; - -static struct lnet_ioctl_config_o2iblnd_tunables default_tunables; - -/* # messages/RDMAs in-flight */ -int kiblnd_msg_queue_size(int version, struct lnet_ni *ni) -{ - if (version == IBLND_MSG_VERSION_1) - return IBLND_MSG_QUEUE_SIZE_V1; - else if (ni) - return ni->ni_peertxcredits; - else - return peer_credits; -} - -int kiblnd_tunables_setup(struct lnet_ni *ni) -{ - struct lnet_ioctl_config_o2iblnd_tunables *tunables; - - /* - * if there was no tunables specified, setup the tunables to be - * defaulted - */ - if (!ni->ni_lnd_tunables) { - ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables), - GFP_NOFS); - if (!ni->ni_lnd_tunables) - return -ENOMEM; - - memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib, - &default_tunables, sizeof(*tunables)); - } - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; - - /* Current API version */ - tunables->lnd_version = 0; - - if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) { - CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n", - *kiblnd_tunables.kib_ib_mtu); - return -EINVAL; - } - - if (!ni->ni_peertimeout) - ni->ni_peertimeout = peer_timeout; - - if (!ni->ni_maxtxcredits) - ni->ni_maxtxcredits = credits; - - if (!ni->ni_peertxcredits) - ni->ni_peertxcredits = peer_credits; - - if (!ni->ni_peerrtrcredits) - ni->ni_peerrtrcredits = peer_buffer_credits; - - if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT) - ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT; - - if (ni->ni_peertxcredits > IBLND_CREDITS_MAX) - ni->ni_peertxcredits = IBLND_CREDITS_MAX; - - if (ni->ni_peertxcredits > credits) - ni->ni_peertxcredits = credits; - - if (!tunables->lnd_peercredits_hiw) - tunables->lnd_peercredits_hiw = peer_credits_hiw; - - if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2; - - if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1; - - if (tunables->lnd_map_on_demand <= 0 || - tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { - /* Use the default */ - CWARN("Invalid map_on_demand (%d), expects 1 - %d. Using default of %d\n", - tunables->lnd_map_on_demand, - IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND); - tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; - } - - if (tunables->lnd_map_on_demand == 1) { - /* don't make sense to create map if only one fragment */ - tunables->lnd_map_on_demand = 2; - } - - if (!tunables->lnd_concurrent_sends) { - if (tunables->lnd_map_on_demand > 0 && - tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) { - tunables->lnd_concurrent_sends = - ni->ni_peertxcredits * 2; - } else { - tunables->lnd_concurrent_sends = ni->ni_peertxcredits; - } - } - - if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2; - - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2; - - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) { - CWARN("Concurrent sends %d is lower than message queue size: %d, performance may drop slightly.\n", - tunables->lnd_concurrent_sends, ni->ni_peertxcredits); - } - - if (!tunables->lnd_fmr_pool_size) - tunables->lnd_fmr_pool_size = fmr_pool_size; - if (!tunables->lnd_fmr_flush_trigger) - tunables->lnd_fmr_flush_trigger = fmr_flush_trigger; - if (!tunables->lnd_fmr_cache) - tunables->lnd_fmr_cache = fmr_cache; - if (!tunables->lnd_conns_per_peer) { - tunables->lnd_conns_per_peer = (conns_per_peer) ? - conns_per_peer : 1; - } - - return 0; -} - -void kiblnd_tunables_init(void) -{ - default_tunables.lnd_version = 0; - default_tunables.lnd_peercredits_hiw = peer_credits_hiw, - default_tunables.lnd_map_on_demand = map_on_demand; - default_tunables.lnd_concurrent_sends = concurrent_sends; - default_tunables.lnd_fmr_pool_size = fmr_pool_size; - default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger; - default_tunables.lnd_fmr_cache = fmr_cache; - default_tunables.lnd_conns_per_peer = conns_per_peer; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile b/drivers/staging/lustre/lnet/klnds/socklnd/Makefile deleted file mode 100644 index a7da1abfc804..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += ksocklnd.o - -ksocklnd-y := socklnd.o socklnd_cb.o socklnd_proto.o socklnd_modparams.o socklnd_lib.o diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c deleted file mode 100644 index f01b34ac1a53..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@ -1,2921 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/socklnd/socklnd.c - * - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - */ - -#include "socklnd.h" - -static struct lnet_lnd the_ksocklnd; -struct ksock_nal_data ksocknal_data; - -static struct ksock_interface * -ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip) -{ - struct ksock_net *net = ni->ni_data; - int i; - struct ksock_interface *iface; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - iface = &net->ksnn_interfaces[i]; - - if (iface->ksni_ipaddr == ip) - return iface; - } - - return NULL; -} - -static struct ksock_route * -ksocknal_create_route(__u32 ipaddr, int port) -{ - struct ksock_route *route; - - route = kzalloc(sizeof(*route), GFP_NOFS); - if (!route) - return NULL; - - atomic_set(&route->ksnr_refcount, 1); - route->ksnr_peer = NULL; - route->ksnr_retry_interval = 0; /* OK to connect at any time */ - route->ksnr_ipaddr = ipaddr; - route->ksnr_port = port; - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - route->ksnr_connected = 0; - route->ksnr_deleted = 0; - route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; - - return route; -} - -void -ksocknal_destroy_route(struct ksock_route *route) -{ - LASSERT(!atomic_read(&route->ksnr_refcount)); - - if (route->ksnr_peer) - ksocknal_peer_decref(route->ksnr_peer); - - kfree(route); -} - -static int -ksocknal_create_peer(struct ksock_peer **peerp, struct lnet_ni *ni, - struct lnet_process_id id) -{ - int cpt = lnet_cpt_of_nid(id.nid); - struct ksock_net *net = ni->ni_data; - struct ksock_peer *peer; - - LASSERT(id.nid != LNET_NID_ANY); - LASSERT(id.pid != LNET_PID_ANY); - LASSERT(!in_interrupt()); - - peer = kzalloc_cpt(sizeof(*peer), GFP_NOFS, cpt); - if (!peer) - return -ENOMEM; - - peer->ksnp_ni = ni; - peer->ksnp_id = id; - atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */ - peer->ksnp_closing = 0; - peer->ksnp_accepting = 0; - peer->ksnp_proto = NULL; - peer->ksnp_last_alive = 0; - peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; - - INIT_LIST_HEAD(&peer->ksnp_conns); - INIT_LIST_HEAD(&peer->ksnp_routes); - INIT_LIST_HEAD(&peer->ksnp_tx_queue); - INIT_LIST_HEAD(&peer->ksnp_zc_req_list); - spin_lock_init(&peer->ksnp_lock); - - spin_lock_bh(&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh(&net->ksnn_lock); - - kfree(peer); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh(&net->ksnn_lock); - - *peerp = peer; - return 0; -} - -void -ksocknal_destroy_peer(struct ksock_peer *peer) -{ - struct ksock_net *net = peer->ksnp_ni->ni_data; - - CDEBUG(D_NET, "peer %s %p deleted\n", - libcfs_id2str(peer->ksnp_id), peer); - - LASSERT(!atomic_read(&peer->ksnp_refcount)); - LASSERT(!peer->ksnp_accepting); - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - LASSERT(list_empty(&peer->ksnp_tx_queue)); - LASSERT(list_empty(&peer->ksnp_zc_req_list)); - - kfree(peer); - - /* - * NB a peer's connections and routes keep a reference on their peer - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer has been cleaned up when its refcount drops to - * zero. - */ - spin_lock_bh(&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh(&net->ksnn_lock); -} - -struct ksock_peer * -ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct ksock_peer *peer; - - list_for_each_entry(peer, peer_list, ksnp_list) { - LASSERT(!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_id.nid != id.nid || - peer->ksnp_id.pid != id.pid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_id2str(id), - atomic_read(&peer->ksnp_refcount)); - return peer; - } - return NULL; -} - -struct ksock_peer * -ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct ksock_peer *peer; - - read_lock(&ksocknal_data.ksnd_global_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer) /* +1 ref for caller? */ - ksocknal_peer_addref(peer); - read_unlock(&ksocknal_data.ksnd_global_lock); - - return peer; -} - -static void -ksocknal_unlink_peer_locked(struct ksock_peer *peer) -{ - int i; - __u32 ip; - struct ksock_interface *iface; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - ip = peer->ksnp_passive_ips[i]; - - iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - /* - * All IPs in peer->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. - */ - LASSERT(iface); - - CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", - peer, iface, iface->ksni_nroutes); - iface->ksni_npeers--; - } - - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - LASSERT(!peer->ksnp_closing); - peer->ksnp_closing = 1; - list_del(&peer->ksnp_list); - /* lose peerlist's ref */ - ksocknal_peer_decref(peer); -} - -static int -ksocknal_get_peer_info(struct lnet_ni *ni, int index, - struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip, - int *port, int *conn_count, int *share_count) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct ksock_route *route; - struct list_head *rtmp; - int i; - int j; - int rc = -ENOENT; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!peer->ksnp_n_passive_ips && - list_empty(&peer->ksnp_routes)) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = peer->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - list_for_each(rtmp, &peer->ksnp_routes) { - if (index-- > 0) - continue; - - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - *id = peer->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } - } - } - out: - read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; -} - -static void -ksocknal_associate_route_conn_locked(struct ksock_route *route, - struct ksock_conn *conn) -{ - struct ksock_peer *peer = route->ksnr_peer; - int type = conn->ksnc_type; - struct ksock_interface *iface; - - conn->ksnc_route = route; - ksocknal_route_addref(route); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (!route->ksnr_myipaddr) { - /* route wasn't bound locally yet (the initial route) */ - CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_myipaddr); - } else { - CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &route->ksnr_myipaddr, - &conn->ksnc_myipaddr); - - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes++; - } - - route->ksnr_connected |= (1 << type); - route->ksnr_conn_count++; - - /* - * Successful connection => further attempts can - * proceed immediately - */ - route->ksnr_retry_interval = 0; -} - -static void -ksocknal_add_route_locked(struct ksock_peer *peer, struct ksock_route *route) -{ - struct list_head *tmp; - struct ksock_conn *conn; - struct ksock_route *route2; - - LASSERT(!peer->ksnp_closing); - LASSERT(!route->ksnr_peer); - LASSERT(!route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - LASSERT(!route->ksnr_connected); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR("Duplicate route %s %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr); - LBUG(); - } - } - - route->ksnr_peer = peer; - ksocknal_peer_addref(peer); - /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ - } -} - -static void -ksocknal_del_route_locked(struct ksock_route *route) -{ - struct ksock_peer *peer = route->ksnr_peer; - struct ksock_interface *iface; - struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - - LASSERT(!route->ksnr_deleted); - - /* Close associated conns */ - list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_route != route) - continue; - - ksocknal_close_conn_locked(conn, 0); - } - - if (route->ksnr_myipaddr) { - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface) - iface->ksni_nroutes--; - } - - route->ksnr_deleted = 1; - list_del(&route->ksnr_list); - ksocknal_route_decref(route); /* drop peer's ref */ - - if (list_empty(&peer->ksnp_routes) && - list_empty(&peer->ksnp_conns)) { - /* - * I've just removed the last route to a peer with no active - * connections - */ - ksocknal_unlink_peer_locked(peer); - } -} - -int -ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, - int port) -{ - struct ksock_peer *peer; - struct ksock_peer *peer2; - struct ksock_route *route; - struct ksock_route *route2; - int rc; - - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) - return -EINVAL; - - /* Have a brand new peer ready... */ - rc = ksocknal_create_peer(&peer, ni, id); - if (rc) - return rc; - - route = ksocknal_create_route(ipaddr, port); - if (!route) { - ksocknal_peer_decref(peer); - return -ENOMEM; - } - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - /* always called with a ref on ni, so shutdown can't have started */ - LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown); - - peer2 = ksocknal_find_peer_locked(ni, id); - if (peer2) { - ksocknal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes my ref on peer */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(id.nid)); - } - - list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { - if (route2->ksnr_ipaddr == ipaddr) { - /* Route already exists, use the old one */ - ksocknal_route_decref(route); - route2->ksnr_share_count++; - goto out; - } - } - /* Route doesn't already exist, add the new one */ - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; -out: - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return 0; -} - -static void -ksocknal_del_peer_locked(struct ksock_peer *peer, __u32 ip) -{ - struct ksock_conn *conn; - struct ksock_route *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; - - LASSERT(!peer->ksnp_closing); - - /* Extra ref prevents peer disappearing until I'm done with it */ - ksocknal_peer_addref(peer); - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* no match */ - if (!(!ip || route->ksnr_ipaddr == ip)) - continue; - - route->ksnr_share_count = 0; - /* This deletes associated conns too */ - ksocknal_del_route_locked(route); - } - - nshared = 0; - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (!nshared) { - /* - * remove everything else if there are no explicit entries - * left - */ - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(!route->ksnr_share_count); - ksocknal_del_route_locked(route); - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } - } - - ksocknal_peer_decref(peer); - /* NB peer unlinks itself when last conn/route is removed */ -} - -static int -ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) -{ - LIST_HEAD(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - struct ksock_peer *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) { - lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - } else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) && - (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid))) - continue; - - ksocknal_peer_addref(peer); /* a ref for me... */ - - ksocknal_del_peer_locked(peer, ip); - - if (peer->ksnp_closing && - !list_empty(&peer->ksnp_tx_queue)) { - LASSERT(list_empty(&peer->ksnp_conns)); - LASSERT(list_empty(&peer->ksnp_routes)); - - list_splice_init(&peer->ksnp_tx_queue, - &zombies); - } - - ksocknal_peer_decref(peer); /* ...till here */ - - rc = 0; /* matched! */ - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(ni, &zombies, 1); - - return rc; -} - -static struct ksock_conn * -ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct ksock_conn *conn; - struct list_head *ctmp; - int i; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - LASSERT(!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - list_for_each(ctmp, &peer->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - read_unlock(&ksocknal_data.ksnd_global_lock); - return conn; - } - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return NULL; -} - -static struct ksock_sched * -ksocknal_choose_scheduler_locked(unsigned int cpt) -{ - struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt]; - struct ksock_sched *sched; - int i; - - LASSERT(info->ksi_nthreads > 0); - - sched = &info->ksi_scheds[0]; - /* - * NB: it's safe so far, but info->ksi_nthreads could be changed - * at runtime when we have dynamic LNet configuration, then we - * need to take care of this. - */ - for (i = 1; i < info->ksi_nthreads; i++) { - if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns) - sched = &info->ksi_scheds[i]; - } - - return sched; -} - -static int -ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs) -{ - struct ksock_net *net = ni->ni_data; - int i; - int nip; - - read_lock(&ksocknal_data.ksnd_global_lock); - - nip = net->ksnn_ninterfaces; - LASSERT(nip <= LNET_MAX_INTERFACES); - - /* - * Only offer interfaces for additional connections if I have - * more than one. - */ - if (nip < 2) { - read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; - } - - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT(ipaddrs[i]); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return nip; -} - -static int -ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (!ips[i]) - continue; - - this_xor = ips[i] ^ iface->ksni_ipaddr; - this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0; - - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; - - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } - - LASSERT(best >= 0); - return best; -} - -static int -ksocknal_select_ips(struct ksock_peer *peer, __u32 *peerips, int n_peerips) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct ksock_net *net = peer->ksnp_ni->ni_data; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* - * CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter - */ - /* - * Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself - */ - write_lock_bh(global_lock); - - LASSERT(n_peerips <= LNET_MAX_INTERFACES); - LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* - * Only match interfaces for additional connections - * if I have > 1 interface - */ - n_ips = (net->ksnn_ninterfaces < 2) ? 0 : - min(n_peerips, net->ksnn_ninterfaces); - - for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* - * If we have any new interfaces, first tick off all the - * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... - */ - if (i < peer->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - - /* peer passive ips are kept up to date */ - LASSERT(best_iface); - } else { - /* choose a new interface */ - LASSERT(i == peer->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer->ksnp_n_passive_ips; k++) - if (peer->ksnp_passive_ips[k] == ip) - break; - - if (k < peer->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, - n_peerips); - xor = ip ^ peerips[k]; - this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0; - - if (!(!best_iface || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } - - LASSERT(best_iface); - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i + 1; - } - - /* mark the best matching peer IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } - - /* Overwrite input peer IP addresses */ - memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - - write_unlock_bh(global_lock); - - return n_ips; -} - -static void -ksocknal_create_routes(struct ksock_peer *peer, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) -{ - struct ksock_route *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct lnet_ni *ni = peer->ksnp_ni; - struct ksock_net *net = ni->ni_data; - struct list_head *rtmp; - struct ksock_route *route; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* - * CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter - */ - write_lock_bh(global_lock); - - if (net->ksnn_ninterfaces < 2) { - /* - * Only create additional connections - * if I have > 1 interface - */ - write_unlock_bh(global_lock); - return; - } - - LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_bh(global_lock); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (!newroute) - return; - - write_lock_bh(global_lock); - } - - if (peer->ksnp_closing) { - /* peer got closed under me */ - break; - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Select interface to connect from */ - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route) - continue; - - this_netmatch = (!((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask)) ? 1 : 0; - - if (!(!best_iface || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (!best_iface) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer, newroute); - newroute = NULL; - } - - write_unlock_bh(global_lock); - if (newroute) - ksocknal_route_decref(newroute); -} - -int -ksocknal_accept(struct lnet_ni *ni, struct socket *sock) -{ - struct ksock_connreq *cr; - int rc; - __u32 peer_ip; - int peer_port; - - rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(!rc); /* we succeeded before */ - - cr = kzalloc(sizeof(*cr), GFP_NOFS); - if (!cr) { - LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n", - &peer_ip); - return -ENOMEM; - } - - lnet_ni_addref(ni); - cr->ksncr_ni = ni; - cr->ksncr_sock = sock; - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); - wake_up(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - return 0; -} - -static int -ksocknal_connecting(struct ksock_peer *peer, __u32 ipaddr) -{ - struct ksock_route *route; - - list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) { - if (route->ksnr_ipaddr == ipaddr) - return route->ksnr_connecting; - } - return 0; -} - -int -ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, - struct socket *sock, int type) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - LIST_HEAD(zombies); - struct lnet_process_id peerid; - struct list_head *tmp; - __u64 incarnation; - struct ksock_conn *conn; - struct ksock_conn *conn2; - struct ksock_peer *peer = NULL; - struct ksock_peer *peer2; - struct ksock_sched *sched; - struct ksock_hello_msg *hello; - int cpt; - struct ksock_tx *tx; - struct ksock_tx *txtmp; - int rc; - int active; - char *warn = NULL; - - active = !!route; - - LASSERT(active == (type != SOCKLND_CONN_NONE)); - - conn = kzalloc(sizeof(*conn), GFP_NOFS); - if (!conn) { - rc = -ENOMEM; - goto failed_0; - } - - conn->ksnc_peer = NULL; - conn->ksnc_route = NULL; - conn->ksnc_sock = sock; - /* - * 2 ref, 1 for conn, another extra ref prevents socket - * being closed before establishment of connection - */ - atomic_set(&conn->ksnc_sock_refcount, 2); - conn->ksnc_type = type; - ksocknal_lib_save_callback(sock, conn); - atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - - INIT_LIST_HEAD(&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - conn->ksnc_tx_carrier = NULL; - atomic_set(&conn->ksnc_tx_nob, 0); - - hello = kvzalloc(offsetof(struct ksock_hello_msg, - kshm_ips[LNET_MAX_INTERFACES]), - GFP_KERNEL); - if (!hello) { - rc = -ENOMEM; - goto failed_1; - } - - /* stash conn's local and remote addrs */ - rc = ksocknal_lib_get_conn_addrs(conn); - if (rc) - goto failed_1; - - /* - * Find out/confirm peer's NID and connection type and get the - * vector of interfaces she's willing to let me connect to. - * Passive connections use the listener timeout since the peer sends - * eagerly - */ - if (active) { - peer = route->ksnr_peer; - LASSERT(ni == peer->ksnp_ni); - - /* Active connection sends HELLO eagerly */ - hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = peer->ksnp_id; - - write_lock_bh(global_lock); - conn->ksnc_proto = peer->ksnp_proto; - write_unlock_bh(global_lock); - - if (!conn->ksnc_proto) { - conn->ksnc_proto = &ksocknal_protocol_v3x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 2) - conn->ksnc_proto = &ksocknal_protocol_v2x; - else if (*ksocknal_tunables.ksnd_protocol == 1) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - } - - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - if (rc) - goto failed_1; - } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; - - /* Passive, get protocol from peer */ - conn->ksnc_proto = NULL; - } - - rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation); - if (rc < 0) - goto failed_1; - - LASSERT(!rc || active); - LASSERT(conn->ksnc_proto); - LASSERT(peerid.nid != LNET_NID_ANY); - - cpt = lnet_cpt_of_nid(peerid.nid); - - if (active) { - ksocknal_peer_addref(peer); - write_lock_bh(global_lock); - } else { - rc = ksocknal_create_peer(&peer, ni, peerid); - if (rc) - goto failed_1; - - write_lock_bh(global_lock); - - /* called with a ref on ni, so shutdown can't have started */ - LASSERT(!((struct ksock_net *)ni->ni_data)->ksnn_shutdown); - - peer2 = ksocknal_find_peer_locked(ni, peerid); - if (!peer2) { - /* - * NB this puts an "empty" peer in the peer - * table (which takes my ref) - */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); - } else { - ksocknal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for me */ - ksocknal_peer_addref(peer); - peer->ksnp_accepting++; - - /* - * Am I already connecting to this guy? Resolve in - * favour of higher NID... - */ - if (peerid.nid < ni->ni_nid && - ksocknal_connecting(peer, conn->ksnc_ipaddr)) { - rc = EALREADY; - warn = "connection race resolution"; - goto failed_2; - } - } - - if (peer->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer/route got closed under me */ - rc = -ESTALE; - warn = "peer/route removed"; - goto failed_2; - } - - if (!peer->ksnp_proto) { - /* - * Never connected before. - * NB recv_hello may have returned EPROTO to signal my peer - * wants a different protocol than the one I asked for. - */ - LASSERT(list_empty(&peer->ksnp_conns)); - - peer->ksnp_proto = conn->ksnc_proto; - peer->ksnp_incarnation = incarnation; - } - - if (peer->ksnp_proto != conn->ksnc_proto || - peer->ksnp_incarnation != incarnation) { - /* Peer rebooted or I've got the wrong protocol version */ - ksocknal_close_peer_conns_locked(peer, 0, 0); - - peer->ksnp_proto = NULL; - rc = ESTALE; - warn = peer->ksnp_incarnation != incarnation ? - "peer rebooted" : - "wrong proto version"; - goto failed_2; - } - - switch (rc) { - default: - LBUG(); - case 0: - break; - case EALREADY: - warn = "lost conn race"; - goto failed_2; - case EPROTO: - warn = "retry with different protocol version"; - goto failed_2; - } - - /* - * Refuse to duplicate an existing connection, unless this is a - * loopback connection - */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type) - continue; - - /* - * Reply on a passive connection attempt so the peer - * realises we're connected. - */ - LASSERT(!rc); - if (!active) - rc = EALREADY; - - warn = "duplicate"; - goto failed_2; - } - } - - /* - * If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. - */ - if (active && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route %s %pI4h connected to %pI4h\n", - libcfs_id2str(peer->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_ipaddr); - } - - /* - * Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. - */ - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - break; - } - - conn->ksnc_peer = peer; /* conn takes my ref on peer */ - peer->ksnp_last_alive = jiffies; - peer->ksnp_send_keepalive = 0; - peer->ksnp_error = 0; - - sched = ksocknal_choose_scheduler_locked(cpt); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - conn->ksnc_tx_last_post = jiffies; - /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; - conn->ksnc_tx_deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - mb(); /* order with adding to peer's conn list */ - - list_add(&conn->ksnc_list, &peer->ksnp_conns); - ksocknal_conn_addref(conn); - - ksocknal_new_packet(conn, 0); - - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); - - /* Take packets blocking for this connection. */ - list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) { - int match = conn->ksnc_proto->pro_match_tx(conn, tx, - tx->tx_nonblk); - - if (match == SOCKNAL_MATCH_NO) - continue; - - list_del(&tx->tx_list); - ksocknal_queue_tx_locked(tx, conn); - } - - write_unlock_bh(global_lock); - - /* - * We've now got a new connection. Any errors from here on are just - * like "normal" comms errors and we close the connection normally. - * NB (a) we still have to send the reply HELLO for passive - * connections, - * (b) normal I/O on the conn is blocked until I setup and call the - * socket callbacks. - */ - CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, - &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, - conn->ksnc_port, incarnation, cpt, - (int)(sched - &sched->kss_info->ksi_scheds[0])); - - if (active) { - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - kvfree(hello); - - /* - * setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. - */ - if (!rc) - rc = ksocknal_lib_setup_sock(sock); - - write_lock_bh(global_lock); - - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - - if (!active) - peer->ksnp_accepting--; - - write_unlock_bh(global_lock); - - if (rc) { - write_lock_bh(global_lock); - if (!conn->ksnc_closing) { - /* could be closed by another thread */ - ksocknal_close_conn_locked(conn, rc); - } - write_unlock_bh(global_lock); - } else if (!ksocknal_connsock_addref(conn)) { - /* Allow I/O to proceed. */ - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } - - ksocknal_connsock_decref(conn); - ksocknal_conn_decref(conn); - return rc; - - failed_2: - if (!peer->ksnp_closing && - list_empty(&peer->ksnp_conns) && - list_empty(&peer->ksnp_routes)) { - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - ksocknal_unlink_peer_locked(peer); - } - - write_unlock_bh(global_lock); - - if (warn) { - if (rc < 0) - CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - else - CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - } - - if (!active) { - if (rc > 0) { - /* - * Request retry by replying with CONN_NONE - * ksnc_proto has been set already - */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - write_lock_bh(global_lock); - peer->ksnp_accepting--; - write_unlock_bh(global_lock); - } - - ksocknal_txlist_done(ni, &zombies, 1); - ksocknal_peer_decref(peer); - -failed_1: - kvfree(hello); - - kfree(conn); - -failed_0: - sock_release(sock); - return rc; -} - -void -ksocknal_close_conn_locked(struct ksock_conn *conn, int error) -{ - /* - * This just does the immmediate housekeeping, and queues the - * connection for the reaper to terminate. - * Caller holds ksnd_global_lock exclusively in irq context - */ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_route *route; - struct ksock_conn *conn2; - struct list_head *tmp; - - LASSERT(!peer->ksnp_error); - LASSERT(!conn->ksnc_closing); - conn->ksnc_closing = 1; - - /* ksnd_deathrow_conns takes over peer's ref */ - list_del(&conn->ksnc_list); - - route = conn->ksnc_route; - if (route) { - /* dissociate conn from route... */ - LASSERT(!route->ksnr_deleted); - LASSERT(route->ksnr_connected & (1 << conn->ksnc_type)); - - conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; - } - if (!conn2) - route->ksnr_connected &= ~(1 << conn->ksnc_type); - - conn->ksnc_route = NULL; - - ksocknal_route_decref(route); /* drop conn's ref on route */ - } - - if (list_empty(&peer->ksnp_conns)) { - /* No more connections to this peer */ - - if (!list_empty(&peer->ksnp_tx_queue)) { - struct ksock_tx *tx; - - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); - - /* - * throw them to the last connection..., - * these TXs will be send to /dev/null by scheduler - */ - list_for_each_entry(tx, &peer->ksnp_tx_queue, - tx_list) - ksocknal_tx_prep(conn, tx); - - spin_lock_bh(&conn->ksnc_scheduler->kss_lock); - list_splice_init(&peer->ksnp_tx_queue, - &conn->ksnc_tx_queue); - spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); - } - - peer->ksnp_proto = NULL; /* renegotiate protocol version */ - peer->ksnp_error = error; /* stash last conn close reason */ - - if (list_empty(&peer->ksnp_routes)) { - /* - * I've just closed last conn belonging to a - * peer with no routes to it - */ - ksocknal_unlink_peer_locked(peer); - } - } - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, - &ksocknal_data.ksnd_deathrow_conns); - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_peer_failed(struct ksock_peer *peer) -{ - int notify = 0; - unsigned long last_alive = 0; - - /* - * There has been a connection failure or comms error; but I'll only - * tell LNET I think the peer is dead if it's to another kernel and - * there are no connections or connection attempts in existence. - */ - read_lock(&ksocknal_data.ksnd_global_lock); - - if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) && - list_empty(&peer->ksnp_conns) && - !peer->ksnp_accepting && - !ksocknal_find_connecting_route_locked(peer)) { - notify = 1; - last_alive = peer->ksnp_last_alive; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (notify) - lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0, - last_alive); -} - -void -ksocknal_finalize_zcreq(struct ksock_conn *conn) -{ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_tx *tx; - struct ksock_tx *temp; - struct ksock_tx *tmp; - LIST_HEAD(zlist); - - /* - * NB safe to finalize TXs because closing of socket will - * abort all buffered data - */ - LASSERT(!conn->ksnc_sock); - - spin_lock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) { - if (tx->tx_conn != conn) - continue; - - LASSERT(tx->tx_msg.ksm_zc_cookies[0]); - - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_zc_aborted = 1; /* mark it as not-acked */ - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } -} - -void -ksocknal_terminate_conn(struct ksock_conn *conn) -{ - /* - * This gets called by the reaper (guaranteed thread context) to - * disengage the socket from its callbacks and close it. - * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. - */ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_sched *sched = conn->ksnc_scheduler; - int failed = 0; - - LASSERT(conn->ksnc_closing); - - /* wake up the scheduler to "send" all remaining packets to /dev/null */ - spin_lock_bh(&sched->kss_lock); - - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)) { - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); - - /* serialise with callbacks */ - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - - /* - * OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... - */ - conn->ksnc_scheduler->kss_nconns--; - - if (peer->ksnp_error) { - /* peer's last conn closed in error */ - LASSERT(list_empty(&peer->ksnp_conns)); - failed = 1; - peer->ksnp_error = 0; /* avoid multiple notifications */ - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - if (failed) - ksocknal_peer_failed(peer); - - /* - * The socket is closed on the final put; either here, or in - * ksocknal_{send,recv}msg(). Since we set up the linger2 option - * when the connection was established, this will close the socket - * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. - */ - ksocknal_connsock_decref(conn); -} - -void -ksocknal_queue_zombie_conn(struct ksock_conn *conn) -{ - /* Queue the conn for the reaper to destroy */ - - LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_destroy_conn(struct ksock_conn *conn) -{ - unsigned long last_rcv; - - /* Final coup-de-grace of the reaper */ - CDEBUG(D_NET, "connection %p\n", conn); - - LASSERT(!atomic_read(&conn->ksnc_conn_refcount)); - LASSERT(!atomic_read(&conn->ksnc_sock_refcount)); - LASSERT(!conn->ksnc_sock); - LASSERT(!conn->ksnc_route); - LASSERT(!conn->ksnc_tx_scheduled); - LASSERT(!conn->ksnc_rx_scheduled); - LASSERT(list_empty(&conn->ksnc_tx_queue)); - - /* complete current receive if any */ - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_LNET_PAYLOAD: - last_rcv = conn->ksnc_rx_deadline - - *ksocknal_tunables.ksnd_timeout * HZ; - CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, - &conn->ksnc_ipaddr, conn->ksnc_port, - iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left, - (jiffies - last_rcv) / HZ); - lnet_finalize(conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); - break; - case SOCKNAL_RX_LNET_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port); - break; - default: - LBUG(); - break; - } - - ksocknal_peer_decref(conn->ksnc_peer); - - kfree(conn); -} - -int -ksocknal_close_peer_conns_locked(struct ksock_peer *peer, __u32 ipaddr, int why) -{ - struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (!ipaddr || conn->ksnc_ipaddr == ipaddr) { - count++; - ksocknal_close_conn_locked(conn, why); - } - } - - return count; -} - -int -ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) -{ - struct ksock_peer *peer = conn->ksnc_peer; - __u32 ipaddr = conn->ksnc_ipaddr; - int count; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - count = ksocknal_close_peer_conns_locked(peer, ipaddr, why); - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return count; -} - -int -ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) -{ - struct ksock_peer *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) { - lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - } else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, ksnp_list); - - if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) - continue; - - count += ksocknal_close_peer_conns_locked(peer, ipaddr, - 0); - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr) - return 0; - - if (!count) - return -ENOENT; - else - return 0; -} - -void -ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive) -{ - /* - * The router is telling me she's been notified of a change in - * gateway state.... - */ - struct lnet_process_id id = {0}; - - id.nid = gw_nid; - id.pid = LNET_PID_ANY; - - CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); - - if (!alive) { - /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns(id, 0); - return; - } - - /* - * ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. - */ -} - -void -ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when) -{ - int connect = 1; - unsigned long last_alive = 0; - unsigned long now = jiffies; - struct ksock_peer *peer = NULL; - rwlock_t *glock = &ksocknal_data.ksnd_global_lock; - struct lnet_process_id id = { - .nid = nid, - .pid = LNET_PID_LUSTRE, - }; - - read_lock(glock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) { - struct ksock_conn *conn; - int bufnob; - - list_for_each_entry(conn, &peer->ksnp_conns, ksnc_list) { - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - - if (bufnob < conn->ksnc_tx_bufnob) { - /* something got ACKed */ - conn->ksnc_tx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - peer->ksnp_last_alive = now; - conn->ksnc_tx_bufnob = bufnob; - } - } - - last_alive = peer->ksnp_last_alive; - if (!ksocknal_find_connectable_route_locked(peer)) - connect = 0; - } - - read_unlock(glock); - - if (last_alive) - *when = last_alive; - - CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n", - libcfs_nid2str(nid), peer, - last_alive ? (now - last_alive) / HZ : -1, - connect); - - if (!connect) - return; - - ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); - - write_lock_bh(glock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) - ksocknal_launch_all_connections_locked(peer); - - write_unlock_bh(glock); -} - -static void -ksocknal_push_peer(struct ksock_peer *peer) -{ - int index; - int i; - struct list_head *tmp; - struct ksock_conn *conn; - - for (index = 0; ; index++) { - read_lock(&ksocknal_data.ksnd_global_lock); - - i = 0; - conn = NULL; - - list_for_each(tmp, &peer->ksnp_conns) { - if (i++ == index) { - conn = list_entry(tmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - break; - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (!conn) - break; - - ksocknal_lib_push_conn(conn); - ksocknal_conn_decref(conn); - } -} - -static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct list_head *start; - struct list_head *end; - struct list_head *tmp; - int rc = -ENOENT; - unsigned int hsize = ksocknal_data.ksnd_peer_hash_size; - - if (id.nid == LNET_NID_ANY) { - start = &ksocknal_data.ksnd_peers[0]; - end = &ksocknal_data.ksnd_peers[hsize - 1]; - } else { - start = ksocknal_nid2peerlist(id.nid); - end = ksocknal_nid2peerlist(id.nid); - } - - for (tmp = start; tmp <= end; tmp++) { - int peer_off; /* searching offset in peer hash table */ - - for (peer_off = 0; ; peer_off++) { - struct ksock_peer *peer; - int i = 0; - - read_lock(&ksocknal_data.ksnd_global_lock); - list_for_each_entry(peer, tmp, ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer->ksnp_id.pid))) - continue; - - if (i++ == peer_off) { - ksocknal_peer_addref(peer); - break; - } - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (!i) /* no match */ - break; - - rc = 0; - ksocknal_push_peer(peer); - ksocknal_peer_decref(peer); - } - } - return rc; -} - -static int -ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask) -{ - struct ksock_net *net = ni->ni_data; - struct ksock_interface *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - struct ksock_peer *peer; - struct list_head *rtmp; - struct ksock_route *route; - - if (!ipaddress || !netmask) - return -EINVAL; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, struct ksock_peer, - ksnp_list); - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) - if (peer->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* - * NB only new connections will pay attention to the - * new interface! - */ - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -static void -ksocknal_peer_del_interface_locked(struct ksock_peer *peer, __u32 ipaddr) -{ - struct list_head *tmp; - struct list_head *nxt; - struct ksock_route *route; - struct ksock_conn *conn; - int i; - int j; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) - if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i + 1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j - 1] = - peer->ksnp_passive_ips[j]; - peer->ksnp_n_passive_ips--; - break; - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route->ksnr_myipaddr != ipaddr) - continue; - - if (route->ksnr_share_count) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); - } - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked(conn, 0); - } -} - -static int -ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress) -{ - struct ksock_net *net = ni->ni_data; - int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - struct ksock_peer *peer; - __u32 this_ip; - int i; - int j; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - - if (!(!ipaddress || ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i + 1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j - 1] = - net->ksnn_interfaces[j]; - - net->ksnn_ninterfaces--; - - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, - &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - ksocknal_peer_del_interface_locked(peer, this_ip); - } - } - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -int -ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) -{ - struct lnet_process_id id = {0}; - struct libcfs_ioctl_data *data = arg; - int rc; - - switch (cmd) { - case IOC_LIBCFS_GET_INTERFACE: { - struct ksock_net *net = ni->ni_data; - struct ksock_interface *iface; - - read_lock(&ksocknal_data.ksnd_global_lock); - - if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) { - rc = -ENOENT; - } else { - rc = 0; - iface = &net->ksnn_interfaces[data->ioc_count]; - - data->ioc_u32[0] = iface->ksni_ipaddr; - data->ioc_u32[1] = iface->ksni_netmask; - data->ioc_u32[2] = iface->ksni_npeers; - data->ioc_u32[3] = iface->ksni_nroutes; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; - } - - case IOC_LIBCFS_ADD_INTERFACE: - return ksocknal_add_interface(ni, - data->ioc_u32[0], /* IP address */ - data->ioc_u32[1]); /* net mask */ - - case IOC_LIBCFS_DEL_INTERFACE: - return ksocknal_del_interface(ni, - data->ioc_u32[0]); /* IP address */ - - case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc) - return rc; - - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } - - case IOC_LIBCFS_ADD_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_LUSTRE; - return ksocknal_add_peer(ni, id, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - - case IOC_LIBCFS_DEL_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_del_peer(ni, id, - data->ioc_u32[0]); /* IP */ - - case IOC_LIBCFS_GET_CONN: { - int txmem; - int rxmem; - int nagle; - struct ksock_conn *conn; - - conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); - if (!conn) - return -ENOENT; - - ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; - data->ioc_u32[0] = conn->ksnc_ipaddr; - data->ioc_u32[1] = conn->ksnc_port; - data->ioc_u32[2] = conn->ksnc_myipaddr; - data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt; - data->ioc_u32[5] = rxmem; - data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; - ksocknal_conn_decref(conn); - return 0; - } - - case IOC_LIBCFS_CLOSE_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_close_matching_conns(id, - data->ioc_u32[0]); - - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_push(ni, id); - - default: - return -EINVAL; - } - /* not reached */ -} - -static void -ksocknal_free_buffers(void) -{ - LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs)); - - if (ksocknal_data.ksnd_sched_info) { - struct ksock_sched_info *info; - int i; - - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) - kfree(info->ksi_scheds); - cfs_percpt_free(ksocknal_data.ksnd_sched_info); - } - - kvfree(ksocknal_data.ksnd_peers); - - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; - struct ksock_tx *tx; - struct ksock_tx *temp; - - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); - spin_unlock(&ksocknal_data.ksnd_tx_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_list) { - list_del(&tx->tx_list); - kfree(tx); - } - } else { - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } -} - -static void -ksocknal_base_shutdown(void) -{ - struct ksock_sched_info *info; - struct ksock_sched *sched; - int i; - int j; - - LASSERT(!ksocknal_data.ksnd_nnets); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT(0); - /* fall through */ - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT(ksocknal_data.ksnd_peers); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - LASSERT(list_empty(&ksocknal_data.ksnd_peers[i])); - - LASSERT(list_empty(&ksocknal_data.ksnd_nets)); - LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns)); - LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns)); - LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs)); - LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes)); - - if (ksocknal_data.ksnd_sched_info) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (!info->ksi_scheds) - continue; - - for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; - LASSERT(list_empty( - &sched->kss_tx_conns)); - LASSERT(list_empty( - &sched->kss_rx_conns)); - LASSERT(list_empty( - &sched->kss_zombie_noop_txs)); - LASSERT(!sched->kss_nconns); - } - } - } - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - wake_up_all(&ksocknal_data.ksnd_connd_waitq); - wake_up_all(&ksocknal_data.ksnd_reaper_waitq); - - if (ksocknal_data.ksnd_sched_info) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (!info->ksi_scheds) - continue; - - for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; - wake_up_all(&sched->kss_waitq); - } - } - } - - i = 4; - read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock(&ksocknal_data.ksnd_global_lock); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - read_lock(&ksocknal_data.ksnd_global_lock); - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_free_buffers(); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } - - module_put(THIS_MODULE); -} - -static __u64 -ksocknal_new_incarnation(void) -{ - /* The incarnation number is the time this module loaded and it - * identifies this particular instance of the socknal. - */ - return ktime_get_ns(); -} - -static int -ksocknal_base_startup(void) -{ - struct ksock_sched_info *info; - int rc; - int i; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT(!ksocknal_data.ksnd_nnets); - - memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */ - - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - ksocknal_data.ksnd_peers = kvmalloc_array(ksocknal_data.ksnd_peer_hash_size, - sizeof(struct list_head), - GFP_KERNEL); - if (!ksocknal_data.ksnd_peers) - return -ENOMEM; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); - - rwlock_init(&ksocknal_data.ksnd_global_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); - - spin_lock_init(&ksocknal_data.ksnd_reaper_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns); - INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns); - INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns); - init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); - - spin_lock_init(&ksocknal_data.ksnd_connd_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes); - init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq); - - spin_lock_init(&ksocknal_data.ksnd_tx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs); - - /* NB memset above zeros whole of ksocknal_data */ - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - try_module_get(THIS_MODULE); - - ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*info)); - if (!ksocknal_data.ksnd_sched_info) - goto failed; - - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - struct ksock_sched *sched; - int nthrs; - - nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - if (*ksocknal_tunables.ksnd_nscheds > 0) { - nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); - } else { - /* - * max to half of CPUs, assume another half should be - * reserved for upper layer modules - */ - nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); - } - - info->ksi_nthreads_max = nthrs; - info->ksi_cpt = i; - - info->ksi_scheds = kzalloc_cpt(info->ksi_nthreads_max * sizeof(*sched), - GFP_NOFS, i); - if (!info->ksi_scheds) - goto failed; - - for (; nthrs > 0; nthrs--) { - sched = &info->ksi_scheds[nthrs - 1]; - - sched->kss_info = info; - spin_lock_init(&sched->kss_lock); - INIT_LIST_HEAD(&sched->kss_rx_conns); - INIT_LIST_HEAD(&sched->kss_tx_conns); - INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); - init_waitqueue_head(&sched->kss_waitq); - } - } - - ksocknal_data.ksnd_connd_starting = 0; - ksocknal_data.ksnd_connd_failed_stamp = 0; - ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds(); - /* - * must have at least 2 connds to remain responsive to accepts while - * connecting - */ - if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1) - *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1; - - if (*ksocknal_tunables.ksnd_nconnds_max < - *ksocknal_tunables.ksnd_nconnds) { - ksocknal_tunables.ksnd_nconnds_max = - ksocknal_tunables.ksnd_nconnds; - } - - for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - char name[16]; - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_starting++; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - - snprintf(name, sizeof(name), "socknal_cd%02d", i); - rc = ksocknal_thread_start(ksocknal_connd, - (void *)((uintptr_t)i), name); - if (rc) { - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_starting--; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - CERROR("Can't spawn socknal connd: %d\n", rc); - goto failed; - } - } - - rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper"); - if (rc) { - CERROR("Can't spawn socknal reaper: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - return 0; - - failed: - ksocknal_base_shutdown(); - return -ENETDOWN; -} - -static void -ksocknal_debug_peerhash(struct lnet_ni *ni) -{ - struct ksock_peer *peer = NULL; - struct list_head *tmp; - int i; - - read_lock(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(tmp, struct ksock_peer, ksnp_list); - - if (peer->ksnp_ni == ni) - break; - - peer = NULL; - } - } - - if (peer) { - struct ksock_route *route; - struct ksock_conn *conn; - - CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n", - libcfs_id2str(peer->ksnp_id), - atomic_read(&peer->ksnp_refcount), - peer->ksnp_sharecount, peer->ksnp_closing, - peer->ksnp_accepting, peer->ksnp_error, - peer->ksnp_zc_next_cookie, - !list_empty(&peer->ksnp_tx_queue), - !list_empty(&peer->ksnp_zc_req_list)); - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n", - atomic_read(&route->ksnr_refcount), - route->ksnr_scheduled, route->ksnr_connecting, - route->ksnr_connected, route->ksnr_deleted); - } - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - CWARN("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_shutdown(struct lnet_ni *ni) -{ - struct ksock_net *net = ni->ni_data; - int i; - struct lnet_process_id anyid = {0}; - - anyid.nid = LNET_NID_ANY; - anyid.pid = LNET_PID_ANY; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); - - spin_lock_bh(&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh(&net->ksnn_lock); - - /* Delete all peers */ - ksocknal_del_peer(ni, anyid, 0); - - /* Wait for all peer state to clean up */ - i = 2; - spin_lock_bh(&net->ksnn_lock); - while (net->ksnn_npeers) { - spin_unlock_bh(&net->ksnn_lock); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - net->ksnn_npeers); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - - ksocknal_debug_peerhash(ni); - - spin_lock_bh(&net->ksnn_lock); - } - spin_unlock_bh(&net->ksnn_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(!net->ksnn_interfaces[i].ksni_npeers); - LASSERT(!net->ksnn_interfaces[i].ksni_nroutes); - } - - list_del(&net->ksnn_list); - kfree(net); - - ksocknal_data.ksnd_nnets--; - if (!ksocknal_data.ksnd_nnets) - ksocknal_base_shutdown(); -} - -static int -ksocknal_enumerate_interfaces(struct ksock_net *net) -{ - char **names; - int i; - int j; - int rc; - int n; - - n = lnet_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return n; - } - - for (i = j = 0; i < n; i++) { - int up; - __u32 ip; - __u32 mask; - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = lnet_ipif_query(names[i], &up, &ip, &mask); - if (rc) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - if (j == LNET_MAX_INTERFACES) { - CWARN("Ignoring interface %s (too many interfaces)\n", - names[i]); - continue; - } - - net->ksnn_interfaces[j].ksni_ipaddr = ip; - net->ksnn_interfaces[j].ksni_netmask = mask; - strlcpy(net->ksnn_interfaces[j].ksni_name, - names[i], sizeof(net->ksnn_interfaces[j].ksni_name)); - j++; - } - - lnet_ipif_free_enumeration(names, n); - - if (!j) - CERROR("Can't find any usable interfaces\n"); - - return j; -} - -static int -ksocknal_search_new_ipif(struct ksock_net *net) -{ - int new_ipif = 0; - int i; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; - char *colon = strchr(ifnam, ':'); - int found = 0; - struct ksock_net *tmp; - int j; - - if (colon) /* ignore alias device */ - *colon = 0; - - list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) { - for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { - char *ifnam2 = - &tmp->ksnn_interfaces[j].ksni_name[0]; - char *colon2 = strchr(ifnam2, ':'); - - if (colon2) - *colon2 = 0; - - found = !strcmp(ifnam, ifnam2); - if (colon2) - *colon2 = ':'; - } - if (found) - break; - } - - new_ipif += !found; - if (colon) - *colon = ':'; - } - - return new_ipif; -} - -static int -ksocknal_start_schedulers(struct ksock_sched_info *info) -{ - int nthrs; - int rc = 0; - int i; - - if (!info->ksi_nthreads) { - if (*ksocknal_tunables.ksnd_nscheds > 0) { - nthrs = info->ksi_nthreads_max; - } else { - nthrs = cfs_cpt_weight(lnet_cpt_table(), - info->ksi_cpt); - nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); - nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs); - } - nthrs = min(nthrs, info->ksi_nthreads_max); - } else { - LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max); - /* increase two threads if there is new interface */ - nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads); - } - - for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - struct ksock_sched *sched; - - id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i); - sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; - snprintf(name, sizeof(name), "socknal_sd%02d_%02d", - info->ksi_cpt, (int)(sched - &info->ksi_scheds[0])); - - rc = ksocknal_thread_start(ksocknal_scheduler, - (void *)id, name); - if (!rc) - continue; - - CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", - info->ksi_cpt, info->ksi_nthreads + i, rc); - break; - } - - info->ksi_nthreads += i; - return rc; -} - -static int -ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts) -{ - int newif = ksocknal_search_new_ipif(net); - int rc; - int i; - - LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table())); - - for (i = 0; i < ncpts; i++) { - struct ksock_sched_info *info; - int cpt = !cpts ? i : cpts[i]; - - LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); - info = ksocknal_data.ksnd_sched_info[cpt]; - - if (!newif && info->ksi_nthreads > 0) - continue; - - rc = ksocknal_start_schedulers(info); - if (rc) - return rc; - } - return 0; -} - -int -ksocknal_startup(struct lnet_ni *ni) -{ - struct ksock_net *net; - int rc; - int i; - - LASSERT(ni->ni_lnd == &the_ksocklnd); - - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { - rc = ksocknal_base_startup(); - if (rc) - return rc; - } - - net = kzalloc(sizeof(*net), GFP_NOFS); - if (!net) - goto fail_0; - - spin_lock_init(&net->ksnn_lock); - net->ksnn_incarnation = ksocknal_new_incarnation(); - ni->ni_data = net; - ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; - ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; - - if (!ni->ni_interfaces[0]) { - rc = ksocknal_enumerate_interfaces(net); - if (rc <= 0) - goto fail_1; - - net->ksnn_ninterfaces = 1; - } else { - for (i = 0; i < LNET_MAX_INTERFACES; i++) { - int up; - - if (!ni->ni_interfaces[i]) - break; - - rc = lnet_ipif_query(ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); - - if (rc) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[i], rc); - goto fail_1; - } - - if (!up) { - CERROR("Interface %s is down\n", - ni->ni_interfaces[i]); - goto fail_1; - } - - strlcpy(net->ksnn_interfaces[i].ksni_name, - ni->ni_interfaces[i], - sizeof(net->ksnn_interfaces[i].ksni_name)); - } - net->ksnn_ninterfaces = i; - } - - /* call it before add it to ksocknal_data.ksnd_nets */ - rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); - if (rc) - goto fail_1; - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); - list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); - - ksocknal_data.ksnd_nnets++; - - return 0; - - fail_1: - kfree(net); - fail_0: - if (!ksocknal_data.ksnd_nnets) - ksocknal_base_shutdown(); - - return -ENETDOWN; -} - -static void __exit ksocklnd_exit(void) -{ - lnet_unregister_lnd(&the_ksocklnd); -} - -static int __init ksocklnd_init(void) -{ - int rc; - - /* check ksnr_connected/connecting field large enough */ - BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4); - BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN); - - /* initialize the_ksocklnd */ - the_ksocklnd.lnd_type = SOCKLND; - the_ksocklnd.lnd_startup = ksocknal_startup; - the_ksocklnd.lnd_shutdown = ksocknal_shutdown; - the_ksocklnd.lnd_ctl = ksocknal_ctl; - the_ksocklnd.lnd_send = ksocknal_send; - the_ksocklnd.lnd_recv = ksocknal_recv; - the_ksocklnd.lnd_notify = ksocknal_notify; - the_ksocklnd.lnd_query = ksocknal_query; - the_ksocklnd.lnd_accept = ksocknal_accept; - - rc = ksocknal_tunables_init(); - if (rc) - return rc; - - rc = libcfs_setup(); - if (rc) - return rc; - - lnet_register_lnd(&the_ksocklnd); - - return 0; -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("TCP Socket LNet Network Driver"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(ksocklnd_init); -module_exit(ksocklnd_exit); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h deleted file mode 100644 index 4e5c89a692a3..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@ -1,704 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Lustre, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef _SOCKLND_SOCKLND_H_ -#define _SOCKLND_SOCKLND_H_ - -#define DEBUG_PORTAL_ALLOC -#define DEBUG_SUBSYSTEM S_LND - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* assume one thread for each connection type */ -#define SOCKNAL_NSCHEDS 3 -#define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1) - -#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */ -#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */ - -#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ -#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ - -#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ - -/* - * risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). - * no risk if we're not running on a CONFIG_HIGHMEM platform. - */ -#ifdef CONFIG_HIGHMEM -# define SOCKNAL_RISK_KMAP_DEADLOCK 0 -#else -# define SOCKNAL_RISK_KMAP_DEADLOCK 1 -#endif - -struct ksock_sched_info; - -struct ksock_sched { /* per scheduler state */ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ - struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ - wait_queue_head_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to - * this scheduler - */ - struct ksock_sched_info *kss_info; /* owner of it */ -}; - -struct ksock_sched_info { - int ksi_nthreads_max; /* max allowed threads */ - int ksi_nthreads; /* number of threads */ - int ksi_cpt; /* CPT id */ - struct ksock_sched *ksi_scheds; /* array of schedulers */ -}; - -#define KSOCK_CPT_SHIFT 16 -#define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid)) -#define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT) -#define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1)) - -struct ksock_interface { /* in-use interface */ - __u32 ksni_ipaddr; /* interface's IP address */ - __u32 ksni_netmask; /* interface's network mask */ - int ksni_nroutes; /* # routes using (active) */ - int ksni_npeers; /* # peers using (passive) */ - char ksni_name[IFNAMSIZ]; /* interface name */ -}; - -struct ksock_tunables { - int *ksnd_timeout; /* "stuck" socket timeout - * (seconds) - */ - int *ksnd_nscheds; /* # scheduler threads in each - * pool while starting - */ - int *ksnd_nconnds; /* # connection daemons */ - int *ksnd_nconnds_max; /* max # connection daemons */ - int *ksnd_min_reconnectms; /* first connection retry after - * (ms)... - */ - int *ksnd_max_reconnectms; /* ...exponentially increasing to - * this - */ - int *ksnd_eager_ack; /* make TCP ack eagerly? */ - int *ksnd_typed_conns; /* drive sockets by type? */ - int *ksnd_min_bulk; /* smallest "large" message */ - int *ksnd_tx_buffer_size; /* socket tx buffer size */ - int *ksnd_rx_buffer_size; /* socket rx buffer size */ - int *ksnd_nagle; /* enable NAGLE? */ - int *ksnd_round_robin; /* round robin for multiple - * interfaces - */ - int *ksnd_keepalive; /* # secs for sending keepalive - * NOOP - */ - int *ksnd_keepalive_idle; /* # idle secs before 1st probe - */ - int *ksnd_keepalive_count; /* # probes */ - int *ksnd_keepalive_intvl; /* time between probes */ - int *ksnd_credits; /* # concurrent sends */ - int *ksnd_peertxcredits; /* # concurrent sends to 1 peer - */ - int *ksnd_peerrtrcredits; /* # per-peer router buffer - * credits - */ - int *ksnd_peertimeout; /* seconds to consider peer dead - */ - int *ksnd_enable_csum; /* enable check sum */ - int *ksnd_inject_csum_error; /* set non-zero to inject - * checksum error - */ - int *ksnd_nonblk_zcack; /* always send zc-ack on - * non-blocking connection - */ - unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload - * size - */ - int *ksnd_zc_recv; /* enable ZC receive (for - * Chelsio TOE) - */ - int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to - * enable ZC receive - */ -}; - -struct ksock_net { - __u64 ksnn_incarnation; /* my epoch */ - spinlock_t ksnn_lock; /* serialise */ - struct list_head ksnn_list; /* chain on global list */ - int ksnn_npeers; /* # peers */ - int ksnn_shutdown; /* shutting down? */ - int ksnn_ninterfaces; /* IP interfaces */ - struct ksock_interface ksnn_interfaces[LNET_MAX_INTERFACES]; -}; - -/** connd timeout */ -#define SOCKNAL_CONND_TIMEOUT 120 -/** reserved thread for accepting & creating new connd */ -#define SOCKNAL_CONND_RESV 1 - -struct ksock_nal_data { - int ksnd_init; /* initialisation state - */ - int ksnd_nnets; /* # networks set up */ - struct list_head ksnd_nets; /* list of nets */ - rwlock_t ksnd_global_lock; /* stabilize peer/conn - * ops - */ - struct list_head *ksnd_peers; /* hash table of all my - * known peers - */ - int ksnd_peer_hash_size; /* size of ksnd_peers */ - - int ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit - */ - struct ksock_sched_info **ksnd_sched_info; /* schedulers info */ - - atomic_t ksnd_nactive_txs; /* #active txs */ - - struct list_head ksnd_deathrow_conns; /* conns to close: - * reaper_lock - */ - struct list_head ksnd_zombie_conns; /* conns to free: - * reaper_lock - */ - struct list_head ksnd_enomem_conns; /* conns to retry: - * reaper_lock - */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - unsigned long ksnd_reaper_waketime; /* when reaper will wake - */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - int ksnd_enomem_tx; /* test ENOMEM sender */ - int ksnd_stall_tx; /* test sluggish sender - */ - int ksnd_stall_rx; /* test sluggish - * receiver - */ - struct list_head ksnd_connd_connreqs; /* incoming connection - * requests - */ - struct list_head ksnd_connd_routes; /* routes waiting to be - * connected - */ - wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */ - int ksnd_connd_connecting; /* # connds connecting - */ - time64_t ksnd_connd_failed_stamp;/* time stamp of the - * last failed - * connecting attempt - */ - time64_t ksnd_connd_starting_stamp;/* time stamp of the - * last starting connd - */ - unsigned int ksnd_connd_starting; /* # starting connd */ - unsigned int ksnd_connd_running; /* # running connd */ - spinlock_t ksnd_connd_lock; /* serialise */ - - struct list_head ksnd_idle_noop_txs; /* list head for freed - * noop tx - */ - spinlock_t ksnd_tx_lock; /* serialise, g_lock - * unsafe - */ -}; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_ALL 2 - -/* - * A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments (the first frag contains the portals header), - * followed by 0 or more struct bio_vec fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, the payload is - * received into either struct iovec or struct bio_vec fragments, depending on - * what the header matched or whether the message needs forwarding. - */ -struct ksock_conn; /* forward ref */ -struct ksock_peer; /* forward ref */ -struct ksock_route; /* forward ref */ -struct ksock_proto; /* forward ref */ - -struct ksock_tx { /* transmit packet */ - struct list_head tx_list; /* queue on conn for transmission etc - */ - struct list_head tx_zc_list; /* queue on peer for ZC request */ - atomic_t tx_refcount; /* tx reference count */ - int tx_nob; /* # packet bytes */ - int tx_resid; /* residual bytes */ - int tx_niov; /* # packet iovec frags */ - struct kvec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - unsigned short tx_zc_aborted; /* aborted ZC request */ - unsigned short tx_zc_capable:1; /* payload is large enough for ZC */ - unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */ - unsigned short tx_nonblk:1; /* it's a non-blocking ACK */ - struct bio_vec *tx_kiov; /* packet page frags */ - struct ksock_conn *tx_conn; /* owning conn */ - struct lnet_msg *tx_lnetmsg; /* lnet message for lnet_finalize() - */ - unsigned long tx_deadline; /* when (in jiffies) tx times out */ - struct ksock_msg tx_msg; /* socklnd message buffer */ - int tx_desc_size; /* size of this descriptor */ - union { - struct { - struct kvec iov; /* virt hdr */ - struct bio_vec kiov[0]; /* paged payload */ - } paged; - struct { - struct kvec iov[1]; /* virt hdr + payload */ - } virt; - } tx_frags; -}; - -#define KSOCK_NOOP_TX_SIZE (offsetof(struct ksock_tx, tx_frags.paged.kiov[0])) - -/* network zero copy callback descriptor embedded in struct ksock_tx */ - -#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ -#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ -#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ -#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */ -#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */ -#define SOCKNAL_RX_SLOP 6 /* skipping body */ - -struct ksock_conn { - struct ksock_peer *ksnc_peer; /* owning peer */ - struct ksock_route *ksnc_route; /* owning route */ - struct list_head ksnc_list; /* stash on peer's conn list */ - struct socket *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original - * data_ready() callback - */ - void *ksnc_saved_write_space; /* socket's original - * write_space() callback - */ - atomic_t ksnc_conn_refcount;/* conn refcount */ - atomic_t ksnc_sock_refcount;/* sock refcount */ - struct ksock_sched *ksnc_scheduler; /* who schedules this connection - */ - __u32 ksnc_myipaddr; /* my IP */ - __u32 ksnc_ipaddr; /* peer's IP */ - int ksnc_port; /* peer's port */ - signed int ksnc_type:3; /* type of connection, should be - * signed value - */ - unsigned int ksnc_closing:1; /* being shut down */ - unsigned int ksnc_flip:1; /* flip or not, only for V2.x */ - unsigned int ksnc_zc_capable:1; /* enable to ZC */ - struct ksock_proto *ksnc_proto; /* protocol for the connection */ - - /* reader */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a - * forwarding descriptor - */ - unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times - * out - */ - __u8 ksnc_rx_started; /* started receiving a message */ - __u8 ksnc_rx_ready; /* data ready to read */ - __u8 ksnc_rx_scheduled; /* being progressed */ - __u8 ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - struct iov_iter ksnc_rx_to; /* copy destination */ - struct kvec ksnc_rx_iov_space[LNET_MAX_IOV]; /* space for frag descriptors */ - __u32 ksnc_rx_csum; /* partial checksum for incoming - * data - */ - void *ksnc_cookie; /* rx lnet_finalize passthru arg - */ - struct ksock_msg ksnc_msg; /* incoming message buffer: - * V2.x message takes the - * whole struct - * V1.x message is a bare - * struct lnet_hdr, it's stored in - * ksnc_msg.ksm_u.lnetmsg - */ - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output - * space - */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - struct ksock_tx *ksnc_tx_carrier; /* next TX that can carry a LNet - * message or ZC-ACK - */ - unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out - */ - int ksnc_tx_bufnob; /* send buffer marker */ - atomic_t ksnc_tx_nob; /* # bytes queued */ - int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - unsigned long ksnc_tx_last_post; /* time stamp of the last posted - * TX - */ -}; - -struct ksock_route { - struct list_head ksnr_list; /* chain on peer route list */ - struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ - struct ksock_peer *ksnr_peer; /* owning peer */ - atomic_t ksnr_refcount; /* # users */ - unsigned long ksnr_timeout; /* when (in jiffies) reconnection - * can happen next - */ - long ksnr_retry_interval; /* how long between retries */ - __u32 ksnr_myipaddr; /* my IP */ - __u32 ksnr_ipaddr; /* IP address to connect to */ - int ksnr_port; /* port to connect to */ - unsigned int ksnr_scheduled:1; /* scheduled for attention */ - unsigned int ksnr_connecting:1; /* connection establishment in - * progress - */ - unsigned int ksnr_connected:4; /* connections established by - * type - */ - unsigned int ksnr_deleted:1; /* been removed from peer? */ - unsigned int ksnr_share_count; /* created explicitly? */ - int ksnr_conn_count; /* # conns established by this - * route - */ -}; - -#define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ - -struct ksock_peer { - struct list_head ksnp_list; /* stash on global peer list */ - unsigned long ksnp_last_alive; /* when (in jiffies) I was last - * alive - */ - struct lnet_process_id ksnp_id; /* who's on the other end(s) */ - atomic_t ksnp_refcount; /* # users */ - int ksnp_sharecount; /* lconf usage counter */ - int ksnp_closing; /* being closed */ - int ksnp_accepting; /* # passive connections pending - */ - int ksnp_error; /* errno on closing last conn */ - __u64 ksnp_zc_next_cookie; /* ZC completion cookie */ - __u64 ksnp_incarnation; /* latest known peer incarnation - */ - struct ksock_proto *ksnp_proto; /* latest known peer protocol */ - struct list_head ksnp_conns; /* all active connections */ - struct list_head ksnp_routes; /* routes */ - struct list_head ksnp_tx_queue; /* waiting packets */ - spinlock_t ksnp_lock; /* serialize, g_lock unsafe */ - struct list_head ksnp_zc_req_list; /* zero copy requests wait for - * ACK - */ - unsigned long ksnp_send_keepalive; /* time to send keepalive */ - struct lnet_ni *ksnp_ni; /* which network */ - int ksnp_n_passive_ips; /* # of... */ - - /* preferred local interfaces */ - __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; -}; - -struct ksock_connreq { - struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ - struct lnet_ni *ksncr_ni; /* chosen NI */ - struct socket *ksncr_sock; /* accepted socket */ -}; - -extern struct ksock_nal_data ksocknal_data; -extern struct ksock_tunables ksocknal_tunables; - -#define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */ -#define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */ -#define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not - * preferred - */ - -struct ksock_proto { - /* version number of protocol */ - int pro_version; - - /* handshake function */ - int (*pro_send_hello)(struct ksock_conn *, struct ksock_hello_msg *); - - /* handshake function */ - int (*pro_recv_hello)(struct ksock_conn *, struct ksock_hello_msg *, int); - - /* message pack */ - void (*pro_pack)(struct ksock_tx *); - - /* message unpack */ - void (*pro_unpack)(struct ksock_msg *); - - /* queue tx on the connection */ - struct ksock_tx *(*pro_queue_tx_msg)(struct ksock_conn *, struct ksock_tx *); - - /* queue ZC ack on the connection */ - int (*pro_queue_tx_zcack)(struct ksock_conn *, struct ksock_tx *, __u64); - - /* handle ZC request */ - int (*pro_handle_zcreq)(struct ksock_conn *, __u64, int); - - /* handle ZC ACK */ - int (*pro_handle_zcack)(struct ksock_conn *, __u64, __u64); - - /* - * msg type matches the connection type: - * return value: - * return MATCH_NO : no - * return MATCH_YES : matching type - * return MATCH_MAY : can be backup - */ - int (*pro_match_tx)(struct ksock_conn *, struct ksock_tx *, int); -}; - -extern struct ksock_proto ksocknal_protocol_v1x; -extern struct ksock_proto ksocknal_protocol_v2x; -extern struct ksock_proto ksocknal_protocol_v3x; - -#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR -#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR -#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR - -#ifndef CPU_MASK_NONE -#define CPU_MASK_NONE 0UL -#endif - -static inline int -ksocknal_route_mask(void) -{ - if (!*ksocknal_tunables.ksnd_typed_conns) - return (1 << SOCKLND_CONN_ANY); - - return ((1 << SOCKLND_CONN_CONTROL) | - (1 << SOCKLND_CONN_BULK_IN) | - (1 << SOCKLND_CONN_BULK_OUT)); -} - -static inline struct list_head * -ksocknal_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; - - return &ksocknal_data.ksnd_peers[hash]; -} - -static inline void -ksocknal_conn_addref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - atomic_inc(&conn->ksnc_conn_refcount); -} - -void ksocknal_queue_zombie_conn(struct ksock_conn *conn); -void ksocknal_finalize_zcreq(struct ksock_conn *conn); - -static inline void -ksocknal_conn_decref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) - ksocknal_queue_zombie_conn(conn); -} - -static inline int -ksocknal_connsock_addref(struct ksock_conn *conn) -{ - int rc = -ESHUTDOWN; - - read_lock(&ksocknal_data.ksnd_global_lock); - if (!conn->ksnc_closing) { - LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); - atomic_inc(&conn->ksnc_sock_refcount); - rc = 0; - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - return rc; -} - -static inline void -ksocknal_connsock_decref(struct ksock_conn *conn) -{ - LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { - LASSERT(conn->ksnc_closing); - sock_release(conn->ksnc_sock); - conn->ksnc_sock = NULL; - ksocknal_finalize_zcreq(conn); - } -} - -static inline void -ksocknal_tx_addref(struct ksock_tx *tx) -{ - LASSERT(atomic_read(&tx->tx_refcount) > 0); - atomic_inc(&tx->tx_refcount); -} - -void ksocknal_tx_prep(struct ksock_conn *, struct ksock_tx *tx); -void ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx); - -static inline void -ksocknal_tx_decref(struct ksock_tx *tx) -{ - LASSERT(atomic_read(&tx->tx_refcount) > 0); - if (atomic_dec_and_test(&tx->tx_refcount)) - ksocknal_tx_done(NULL, tx); -} - -static inline void -ksocknal_route_addref(struct ksock_route *route) -{ - LASSERT(atomic_read(&route->ksnr_refcount) > 0); - atomic_inc(&route->ksnr_refcount); -} - -void ksocknal_destroy_route(struct ksock_route *route); - -static inline void -ksocknal_route_decref(struct ksock_route *route) -{ - LASSERT(atomic_read(&route->ksnr_refcount) > 0); - if (atomic_dec_and_test(&route->ksnr_refcount)) - ksocknal_destroy_route(route); -} - -static inline void -ksocknal_peer_addref(struct ksock_peer *peer) -{ - LASSERT(atomic_read(&peer->ksnp_refcount) > 0); - atomic_inc(&peer->ksnp_refcount); -} - -void ksocknal_destroy_peer(struct ksock_peer *peer); - -static inline void -ksocknal_peer_decref(struct ksock_peer *peer) -{ - LASSERT(atomic_read(&peer->ksnp_refcount) > 0); - if (atomic_dec_and_test(&peer->ksnp_refcount)) - ksocknal_destroy_peer(peer); -} - -int ksocknal_startup(struct lnet_ni *ni); -void ksocknal_shutdown(struct lnet_ni *ni); -int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg); -int ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); -int ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen); -int ksocknal_accept(struct lnet_ni *ni, struct socket *sock); - -int ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip, - int port); -struct ksock_peer *ksocknal_find_peer_locked(struct lnet_ni *ni, - struct lnet_process_id id); -struct ksock_peer *ksocknal_find_peer(struct lnet_ni *ni, - struct lnet_process_id id); -void ksocknal_peer_failed(struct ksock_peer *peer); -int ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, - struct socket *sock, int type); -void ksocknal_close_conn_locked(struct ksock_conn *conn, int why); -void ksocknal_terminate_conn(struct ksock_conn *conn); -void ksocknal_destroy_conn(struct ksock_conn *conn); -int ksocknal_close_peer_conns_locked(struct ksock_peer *peer, - __u32 ipaddr, int why); -int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why); -int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr); -struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer, - struct ksock_tx *tx, int nonblk); - -int ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id); -struct ksock_tx *ksocknal_alloc_tx(int type, int size); -void ksocknal_free_tx(struct ksock_tx *tx); -struct ksock_tx *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk); -void ksocknal_next_tx_carrier(struct ksock_conn *conn); -void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn); -void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error); -void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive); -void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when); -int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name); -void ksocknal_thread_fini(void); -void ksocknal_launch_all_connections_locked(struct ksock_peer *peer); -struct ksock_route *ksocknal_find_connectable_route_locked(struct ksock_peer *peer); -struct ksock_route *ksocknal_find_connecting_route_locked(struct ksock_peer *peer); -int ksocknal_new_packet(struct ksock_conn *conn, int skip); -int ksocknal_scheduler(void *arg); -int ksocknal_connd(void *arg); -int ksocknal_reaper(void *arg); -int ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn, - lnet_nid_t peer_nid, struct ksock_hello_msg *hello); -int ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn, - struct ksock_hello_msg *hello, - struct lnet_process_id *id, - __u64 *incarnation); -void ksocknal_read_callback(struct ksock_conn *conn); -void ksocknal_write_callback(struct ksock_conn *conn); - -int ksocknal_lib_zc_capable(struct ksock_conn *conn); -void ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn); -void ksocknal_lib_push_conn(struct ksock_conn *conn); -int ksocknal_lib_get_conn_addrs(struct ksock_conn *conn); -int ksocknal_lib_setup_sock(struct socket *so); -int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx); -int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx); -void ksocknal_lib_eager_ack(struct ksock_conn *conn); -int ksocknal_lib_recv(struct ksock_conn *conn); -int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, - int *rxmem, int *nagle); - -void ksocknal_read_callback(struct ksock_conn *conn); -void ksocknal_write_callback(struct ksock_conn *conn); - -int ksocknal_tunables_init(void); - -void ksocknal_lib_csum_tx(struct ksock_tx *tx); - -int ksocknal_lib_memory_pressure(struct ksock_conn *conn); -int ksocknal_lib_bind_thread_to_cpu(int id); - -#endif /* _SOCKLND_SOCKLND_H_ */ diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c deleted file mode 100644 index 01b31a6bb588..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@ -1,2586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include -#include "socklnd.h" - -struct ksock_tx * -ksocknal_alloc_tx(int type, int size) -{ - struct ksock_tx *tx = NULL; - - if (type == KSOCK_MSG_NOOP) { - LASSERT(size == KSOCK_NOOP_TX_SIZE); - - /* searching for a noop tx in free list */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next, - struct ksock_tx, tx_list); - LASSERT(tx->tx_desc_size == size); - list_del(&tx->tx_list); - } - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } - - if (!tx) - tx = kzalloc(size, GFP_NOFS); - - if (!tx) - return NULL; - - atomic_set(&tx->tx_refcount, 1); - tx->tx_zc_aborted = 0; - tx->tx_zc_capable = 0; - tx->tx_zc_checked = 0; - tx->tx_desc_size = size; - - atomic_inc(&ksocknal_data.ksnd_nactive_txs); - - return tx; -} - -struct ksock_tx * -ksocknal_alloc_tx_noop(__u64 cookie, int nonblk) -{ - struct ksock_tx *tx; - - tx = ksocknal_alloc_tx(KSOCK_MSG_NOOP, KSOCK_NOOP_TX_SIZE); - if (!tx) { - CERROR("Can't allocate noop tx desc\n"); - return NULL; - } - - tx->tx_conn = NULL; - tx->tx_lnetmsg = NULL; - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1; - tx->tx_nonblk = nonblk; - - tx->tx_msg.ksm_csum = 0; - tx->tx_msg.ksm_type = KSOCK_MSG_NOOP; - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_msg.ksm_zc_cookies[1] = cookie; - - return tx; -} - -void -ksocknal_free_tx(struct ksock_tx *tx) -{ - atomic_dec(&ksocknal_data.ksnd_nactive_txs); - - if (!tx->tx_lnetmsg && tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { - /* it's a noop tx */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs); - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } else { - kfree(tx); - } -} - -static int -ksocknal_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct kvec *iov = tx->tx_iov; - int nob; - int rc; - - LASSERT(tx->tx_niov > 0); - - /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */ - rc = ksocknal_lib_send_iov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return rc; - - nob = rc; - LASSERT(nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" iov */ - do { - LASSERT(tx->tx_niov > 0); - - if (nob < (int)iov->iov_len) { - iov->iov_base = (void *)((char *)iov->iov_base + nob); - iov->iov_len -= nob; - return rc; - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob); - - return rc; -} - -static int -ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct bio_vec *kiov = tx->tx_kiov; - int nob; - int rc; - - LASSERT(!tx->tx_niov); - LASSERT(tx->tx_nkiov > 0); - - /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ - rc = ksocknal_lib_send_kiov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return rc; - - nob = rc; - LASSERT(nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" kiov */ - do { - LASSERT(tx->tx_nkiov > 0); - - if (nob < (int)kiov->bv_len) { - kiov->bv_offset += nob; - kiov->bv_len -= nob; - return rc; - } - - nob -= (int)kiov->bv_len; - tx->tx_kiov = ++kiov; - tx->tx_nkiov--; - } while (nob); - - return rc; -} - -static int -ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) -{ - int rc; - int bufnob; - - if (ksocknal_data.ksnd_stall_tx) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ksocknal_data.ksnd_stall_tx * HZ); - } - - LASSERT(tx->tx_resid); - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - return -ESHUTDOWN; - } - - do { - if (ksocknal_data.ksnd_enomem_tx > 0) { - /* testing... */ - ksocknal_data.ksnd_enomem_tx--; - rc = -EAGAIN; - } else if (tx->tx_niov) { - rc = ksocknal_send_iov(conn, tx); - } else { - rc = ksocknal_send_kiov(conn, tx); - } - - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - if (rc > 0) /* sent something? */ - conn->ksnc_tx_bufnob += rc; /* account it */ - - if (bufnob < conn->ksnc_tx_bufnob) { - /* - * allocated send buffer bytes < computed; infer - * something got ACKed - */ - conn->ksnc_tx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_tx_bufnob = bufnob; - mb(); - } - - if (rc <= 0) { /* Didn't write anything? */ - - if (!rc) /* some stacks return 0 instead of -EAGAIN */ - rc = -EAGAIN; - - /* Check if EAGAIN is due to memory pressure */ - if (rc == -EAGAIN && ksocknal_lib_memory_pressure(conn)) - rc = -ENOMEM; - - break; - } - - /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub(rc, &conn->ksnc_tx_nob); - rc = 0; - - } while (tx->tx_resid); - - ksocknal_connsock_decref(conn); - return rc; -} - -static int -ksocknal_recv_iter(struct ksock_conn *conn) -{ - int nob; - int rc; - - /* - * Never touch conn->ksnc_rx_to or change connection - * status inside ksocknal_lib_recv - */ - rc = ksocknal_lib_recv(conn); - - if (rc <= 0) - return rc; - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_rx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_left -= nob; - - iov_iter_advance(&conn->ksnc_rx_to, nob); - if (iov_iter_count(&conn->ksnc_rx_to)) - return -EAGAIN; - - return 1; -} - -static int -ksocknal_receive(struct ksock_conn *conn) -{ - /* - * Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_to to determine - * progress/completion. - */ - int rc; - - if (ksocknal_data.ksnd_stall_rx) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ksocknal_data.ksnd_stall_rx * HZ); - } - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - return -ESHUTDOWN; - } - - for (;;) { - rc = ksocknal_recv_iter(conn); - if (rc <= 0) { - /* error/EOF or partial receive */ - if (rc == -EAGAIN) { - rc = 1; - } else if (!rc && conn->ksnc_rx_started) { - /* EOF in the middle of a message */ - rc = -EPROTO; - } - break; - } - - /* Completed a fragment */ - - if (!iov_iter_count(&conn->ksnc_rx_to)) { - rc = 1; - break; - } - } - - ksocknal_connsock_decref(conn); - return rc; -} - -void -ksocknal_tx_done(struct lnet_ni *ni, struct ksock_tx *tx) -{ - struct lnet_msg *lnetmsg = tx->tx_lnetmsg; - int rc = (!tx->tx_resid && !tx->tx_zc_aborted) ? 0 : -EIO; - - LASSERT(ni || tx->tx_conn); - - if (tx->tx_conn) - ksocknal_conn_decref(tx->tx_conn); - - if (!ni && tx->tx_conn) - ni = tx->tx_conn->ksnc_peer->ksnp_ni; - - ksocknal_free_tx(tx); - if (lnetmsg) /* KSOCK_MSG_NOOP go without lnetmsg */ - lnet_finalize(ni, lnetmsg, rc); -} - -void -ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error) -{ - struct ksock_tx *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, struct ksock_tx, tx_list); - - if (error && tx->tx_lnetmsg) { - CNETERR("Deleting packet type %d len %d %s->%s\n", - le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type), - le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid))); - } else if (error) { - CNETERR("Deleting noop packet\n"); - } - - list_del(&tx->tx_list); - - LASSERT(atomic_read(&tx->tx_refcount) == 1); - ksocknal_tx_done(ni, tx); - } -} - -static void -ksocknal_check_zc_req(struct ksock_tx *tx) -{ - struct ksock_conn *conn = tx->tx_conn; - struct ksock_peer *peer = conn->ksnc_peer; - - /* - * Set tx_msg.ksm_zc_cookies[0] to a unique non-zero cookie and add tx - * to ksnp_zc_req_list if some fragment of this message should be sent - * zero-copy. Our peer will send an ACK containing this cookie when - * she has received this message to tell us we can signal completion. - * tx_msg.ksm_zc_cookies[0] remains non-zero while tx is on - * ksnp_zc_req_list. - */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_zc_capable); - - tx->tx_zc_checked = 1; - - if (conn->ksnc_proto == &ksocknal_protocol_v1x || - !conn->ksnc_zc_capable) - return; - - /* - * assign cookie and queue tx to pending list, it will be released when - * a matching ack is received. See ksocknal_handle_zcack() - */ - ksocknal_tx_addref(tx); - - spin_lock(&peer->ksnp_lock); - - /* ZC_REQ is going to be pinned to the peer */ - tx->tx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - - LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); - - tx->tx_msg.ksm_zc_cookies[0] = peer->ksnp_zc_next_cookie++; - - if (!peer->ksnp_zc_next_cookie) - peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; - - list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); - - spin_unlock(&peer->ksnp_lock); -} - -static void -ksocknal_uncheck_zc_req(struct ksock_tx *tx) -{ - struct ksock_peer *peer = tx->tx_conn->ksnc_peer; - - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_zc_capable); - - tx->tx_zc_checked = 0; - - spin_lock(&peer->ksnp_lock); - - if (!tx->tx_msg.ksm_zc_cookies[0]) { - /* Not waiting for an ACK */ - spin_unlock(&peer->ksnp_lock); - return; - } - - tx->tx_msg.ksm_zc_cookies[0] = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); -} - -static int -ksocknal_process_transmit(struct ksock_conn *conn, struct ksock_tx *tx) -{ - int rc; - - if (tx->tx_zc_capable && !tx->tx_zc_checked) - ksocknal_check_zc_req(tx); - - rc = ksocknal_transmit(conn, tx); - - CDEBUG(D_NET, "send(%d) %d\n", tx->tx_resid, rc); - - if (!tx->tx_resid) { - /* Sent everything OK */ - LASSERT(!rc); - - return 0; - } - - if (rc == -EAGAIN) - return rc; - - if (rc == -ENOMEM) { - static int counter; - - counter++; /* exponential backoff warnings */ - if ((counter & (-counter)) == counter) - CWARN("%u ENOMEM tx %p\n", counter, conn); - - /* Queue on ksnd_enomem_conns for retry after a timeout */ - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - /* enomem list takes over scheduler's ref... */ - LASSERT(conn->ksnc_tx_scheduled); - list_add_tail(&conn->ksnc_tx_list, - &ksocknal_data.ksnd_enomem_conns); - if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY, - ksocknal_data.ksnd_reaper_waketime)) - wake_up(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - return rc; - } - - /* Actual error */ - LASSERT(rc < 0); - - if (!conn->ksnc_closing) { - switch (rc) { - case -ECONNRESET: - LCONSOLE_WARN("Host %pI4h reset our connection while we were sending data; it may have rebooted.\n", - &conn->ksnc_ipaddr); - break; - default: - LCONSOLE_WARN("There was an unexpected network error while writing to %pI4h: %d.\n", - &conn->ksnc_ipaddr, rc); - break; - } - CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pI4h:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - } - - if (tx->tx_zc_checked) - ksocknal_uncheck_zc_req(tx); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings(conn, (conn->ksnc_closing) ? 0 : rc); - - return rc; -} - -static void -ksocknal_launch_connection_locked(struct ksock_route *route) -{ - /* called holding write lock on ksnd_global_lock */ - - LASSERT(!route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - LASSERT(ksocknal_route_mask() & ~route->ksnr_connected); - - route->ksnr_scheduled = 1; /* scheduling conn for connd */ - ksocknal_route_addref(route); /* extra ref for connd */ - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&route->ksnr_connd_list, - &ksocknal_data.ksnd_connd_routes); - wake_up(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); -} - -void -ksocknal_launch_all_connections_locked(struct ksock_peer *peer) -{ - struct ksock_route *route; - - /* called holding write lock on ksnd_global_lock */ - for (;;) { - /* launch any/all connections that need it */ - route = ksocknal_find_connectable_route_locked(peer); - if (!route) - return; - - ksocknal_launch_connection_locked(route); - } -} - -struct ksock_conn * -ksocknal_find_conn_locked(struct ksock_peer *peer, struct ksock_tx *tx, - int nonblk) -{ - struct list_head *tmp; - struct ksock_conn *conn; - struct ksock_conn *typed = NULL; - struct ksock_conn *fallback = NULL; - int tnob = 0; - int fnob = 0; - - list_for_each(tmp, &peer->ksnp_conns) { - struct ksock_conn *c; - int nob, rc; - - c = list_entry(tmp, struct ksock_conn, ksnc_list); - nob = atomic_read(&c->ksnc_tx_nob) + - c->ksnc_sock->sk->sk_wmem_queued; - - LASSERT(!c->ksnc_closing); - LASSERT(c->ksnc_proto && - c->ksnc_proto->pro_match_tx); - - rc = c->ksnc_proto->pro_match_tx(c, tx, nonblk); - - switch (rc) { - default: - LBUG(); - case SOCKNAL_MATCH_NO: /* protocol rejected the tx */ - continue; - - case SOCKNAL_MATCH_YES: /* typed connection */ - if (!typed || tnob > nob || - (tnob == nob && *ksocknal_tunables.ksnd_round_robin && - time_after(typed->ksnc_tx_last_post, c->ksnc_tx_last_post))) { - typed = c; - tnob = nob; - } - break; - - case SOCKNAL_MATCH_MAY: /* fallback connection */ - if (!fallback || fnob > nob || - (fnob == nob && *ksocknal_tunables.ksnd_round_robin && - time_after(fallback->ksnc_tx_last_post, c->ksnc_tx_last_post))) { - fallback = c; - fnob = nob; - } - break; - } - } - - /* prefer the typed selection */ - conn = (typed) ? typed : fallback; - - if (conn) - conn->ksnc_tx_last_post = jiffies; - - return conn; -} - -void -ksocknal_tx_prep(struct ksock_conn *conn, struct ksock_tx *tx) -{ - conn->ksnc_proto->pro_pack(tx); - - atomic_add(tx->tx_nob, &conn->ksnc_tx_nob); - ksocknal_conn_addref(conn); /* +1 ref for tx */ - tx->tx_conn = conn; -} - -void -ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn) -{ - struct ksock_sched *sched = conn->ksnc_scheduler; - struct ksock_msg *msg = &tx->tx_msg; - struct ksock_tx *ztx = NULL; - int bufnob = 0; - - /* - * called holding global lock (read or irq-write) and caller may - * not have dropped this lock between finding conn and calling me, - * so we don't need the {get,put}connsock dance to deref - * ksnc_sock... - */ - LASSERT(!conn->ksnc_closing); - - CDEBUG(D_NET, "Sending to %s ip %pI4h:%d\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port); - - ksocknal_tx_prep(conn, tx); - - /* - * Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete ksocknal message header. - */ - LASSERT(lnet_iov_nob(tx->tx_niov, tx->tx_iov) + - lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) == - (unsigned int)tx->tx_nob); - LASSERT(tx->tx_niov >= 1); - LASSERT(tx->tx_resid == tx->tx_nob); - - CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", - tx, (tx->tx_lnetmsg) ? tx->tx_lnetmsg->msg_hdr.type : - KSOCK_MSG_NOOP, - tx->tx_nob, tx->tx_niov, tx->tx_nkiov); - - /* - * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__ - * but they're used inside spinlocks a lot. - */ - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - spin_lock_bh(&sched->kss_lock); - - if (list_empty(&conn->ksnc_tx_queue) && !bufnob) { - /* First packet starts the timeout */ - conn->ksnc_tx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */ - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_tx_bufnob = 0; - mb(); /* order with adding to tx_queue */ - } - - if (msg->ksm_type == KSOCK_MSG_NOOP) { - /* - * The packet is noop ZC ACK, try to piggyback the ack_cookie - * on a normal packet so I don't need to send it - */ - LASSERT(msg->ksm_zc_cookies[1]); - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); - - /* ZC ACK piggybacked on ztx release tx later */ - if (conn->ksnc_proto->pro_queue_tx_zcack(conn, tx, 0)) - ztx = tx; - } else { - /* - * It's a normal packet - can it piggback a noop zc-ack that - * has been queued already? - */ - LASSERT(!msg->ksm_zc_cookies[1]); - LASSERT(conn->ksnc_proto->pro_queue_tx_msg); - - ztx = conn->ksnc_proto->pro_queue_tx_msg(conn, tx); - /* ztx will be released later */ - } - - if (ztx) { - atomic_sub(ztx->tx_nob, &conn->ksnc_tx_nob); - list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs); - } - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - /* +1 ref for scheduler */ - ksocknal_conn_addref(conn); - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); -} - -struct ksock_route * -ksocknal_find_connectable_route_locked(struct ksock_peer *peer) -{ - unsigned long now = jiffies; - struct list_head *tmp; - struct ksock_route *route; - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); - - /* connections being established */ - if (route->ksnr_scheduled) - continue; - - /* all route types connected ? */ - if (!(ksocknal_route_mask() & ~route->ksnr_connected)) - continue; - - if (!(!route->ksnr_retry_interval || /* first attempt */ - time_after_eq(now, route->ksnr_timeout))) { - CDEBUG(D_NET, - "Too soon to retry route %pI4h (cnted %d, interval %ld, %ld secs later)\n", - &route->ksnr_ipaddr, - route->ksnr_connected, - route->ksnr_retry_interval, - (route->ksnr_timeout - now) / HZ); - continue; - } - - return route; - } - - return NULL; -} - -struct ksock_route * -ksocknal_find_connecting_route_locked(struct ksock_peer *peer) -{ - struct list_head *tmp; - struct ksock_route *route; - - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - LASSERT(!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) - return route; - } - - return NULL; -} - -int -ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, - struct lnet_process_id id) -{ - struct ksock_peer *peer; - struct ksock_conn *conn; - rwlock_t *g_lock; - int retry; - int rc; - - LASSERT(!tx->tx_conn); - - g_lock = &ksocknal_data.ksnd_global_lock; - - for (retry = 0;; retry = 1) { - read_lock(g_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer) { - if (!ksocknal_find_connectable_route_locked(peer)) { - conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn) { - /* - * I've got no routes that need to be - * connecting and I do have an actual - * connection... - */ - ksocknal_queue_tx_locked(tx, conn); - read_unlock(g_lock); - return 0; - } - } - } - - /* I'll need a write lock... */ - read_unlock(g_lock); - - write_lock_bh(g_lock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer) - break; - - write_unlock_bh(g_lock); - - if (id.pid & LNET_PID_USERFLAG) { - CERROR("Refusing to create a connection to userspace process %s\n", - libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - rc = ksocknal_add_peer(ni, id, - LNET_NIDADDR(id.nid), - lnet_acceptor_port()); - if (rc) { - CERROR("Can't add peer %s: %d\n", - libcfs_id2str(id), rc); - return rc; - } - } - - ksocknal_launch_all_connections_locked(peer); - - conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk); - if (conn) { - /* Connection exists; queue message on it */ - ksocknal_queue_tx_locked(tx, conn); - write_unlock_bh(g_lock); - return 0; - } - - if (peer->ksnp_accepting > 0 || - ksocknal_find_connecting_route_locked(peer)) { - /* the message is going to be pinned to the peer */ - tx->tx_deadline = - jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - - /* Queue the message until a connection is established */ - list_add_tail(&tx->tx_list, &peer->ksnp_tx_queue); - write_unlock_bh(g_lock); - return 0; - } - - write_unlock_bh(g_lock); - - /* NB Routes may be ignored if connections to them failed recently */ - CNETERR("No usable routes to %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; -} - -int -ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - unsigned int mpflag = 0; - int type = lntmsg->msg_type; - struct lnet_process_id target = lntmsg->msg_target; - unsigned int payload_niov = lntmsg->msg_niov; - struct kvec *payload_iov = lntmsg->msg_iov; - struct bio_vec *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct ksock_tx *tx; - int desc_size; - int rc; - - /* - * NB 'private' is different depending on what we're sending. - * Just ignore it... - */ - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT(!payload_nob || payload_niov > 0); - LASSERT(payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT(!(payload_kiov && payload_iov)); - LASSERT(!in_interrupt()); - - if (payload_iov) - desc_size = offsetof(struct ksock_tx, - tx_frags.virt.iov[1 + payload_niov]); - else - desc_size = offsetof(struct ksock_tx, - tx_frags.paged.kiov[payload_niov]); - - if (lntmsg->msg_vmflush) - mpflag = memalloc_noreclaim_save(); - tx = ksocknal_alloc_tx(KSOCK_MSG_LNET, desc_size); - if (!tx) { - CERROR("Can't allocate tx desc type %d size %d\n", - type, desc_size); - if (lntmsg->msg_vmflush) - memalloc_noreclaim_restore(mpflag); - return -ENOMEM; - } - - tx->tx_conn = NULL; /* set when assigned a conn */ - tx->tx_lnetmsg = lntmsg; - - if (payload_iov) { - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - } else { - tx->tx_niov = 1; - tx->tx_iov = &tx->tx_frags.paged.iov; - tx->tx_kiov = tx->tx_frags.paged.kiov; - tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov, - payload_niov, payload_kiov, - payload_offset, payload_nob); - - if (payload_nob >= *ksocknal_tunables.ksnd_zc_min_payload) - tx->tx_zc_capable = 1; - } - - tx->tx_msg.ksm_csum = 0; - tx->tx_msg.ksm_type = KSOCK_MSG_LNET; - tx->tx_msg.ksm_zc_cookies[0] = 0; - tx->tx_msg.ksm_zc_cookies[1] = 0; - - /* The first fragment will be set later in pro_pack */ - rc = ksocknal_launch_packet(ni, tx, target); - if (mpflag) - memalloc_noreclaim_restore(mpflag); - - if (!rc) - return 0; - - ksocknal_free_tx(tx); - return -EIO; -} - -int -ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name) -{ - struct task_struct *task = kthread_run(fn, arg, "%s", name); - - if (IS_ERR(task)) - return PTR_ERR(task); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads++; - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return 0; -} - -void -ksocknal_thread_fini(void) -{ - write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads--; - write_unlock_bh(&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - struct kvec *kvec = conn->ksnc_rx_iov_space; - - int nob; - unsigned int niov; - int skipped; - - LASSERT(conn->ksnc_proto); - - if (*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) { - /* Remind the socket to ack eagerly... */ - ksocknal_lib_eager_ack(conn); - } - - if (!nob_to_skip) { /* right at next packet boundary now */ - conn->ksnc_rx_started = 0; - mb(); /* racing with timeout thread */ - - switch (conn->ksnc_proto->pro_version) { - case KSOCK_PROTO_V2: - case KSOCK_PROTO_V3: - conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - kvec->iov_base = &conn->ksnc_msg; - kvec->iov_len = offsetof(struct ksock_msg, ksm_u); - conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u); - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, offsetof(struct ksock_msg, ksm_u)); - break; - - case KSOCK_PROTO_V1: - /* Receiving bare struct lnet_hdr */ - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - kvec->iov_len = sizeof(struct lnet_hdr); - conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr); - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, sizeof(struct lnet_hdr)); - break; - - default: - LBUG(); - } - conn->ksnc_rx_csum = ~0; - return 1; - } - - /* - * Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again - */ - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - skipped = 0; - niov = 0; - - do { - nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer)); - - kvec[niov].iov_base = ksocknal_slop_buffer; - kvec[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -= nob; - - } while (nob_to_skip && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec)); - - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped); - return 0; -} - -static int -ksocknal_process_receive(struct ksock_conn *conn) -{ - struct kvec *kvec = conn->ksnc_rx_iov_space; - struct lnet_hdr *lhdr; - struct lnet_process_id *id; - int rc; - - LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); - - /* NB: sched lock NOT held */ - /* SOCKNAL_RX_LNET_HEADER is here for backward compatibility */ - LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - again: - if (iov_iter_count(&conn->ksnc_rx_to)) { - rc = ksocknal_receive(conn); - - if (rc <= 0) { - LASSERT(rc != -EAGAIN); - - if (!rc) - CDEBUG(D_NET, "[%p] EOF from %s ip %pI4h:%d\n", - conn, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - else if (!conn->ksnc_closing) - CERROR("[%p] Error %d on read from %s ip %pI4h:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings(conn, - (conn->ksnc_closing) ? 0 : rc); - return (!rc ? -ESHUTDOWN : rc); - } - - if (iov_iter_count(&conn->ksnc_rx_to)) { - /* short read */ - return -EAGAIN; - } - } - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_flip) { - __swab32s(&conn->ksnc_msg.ksm_type); - __swab32s(&conn->ksnc_msg.ksm_csum); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]); - __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]); - } - - if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) { - CERROR("%s: Unknown message type: %x\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_type); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return -EPROTO; - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - /* NOOP Checksum error */ - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return -EIO; - } - - if (conn->ksnc_msg.ksm_zc_cookies[1]) { - __u64 cookie = 0; - - LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) - cookie = conn->ksnc_msg.ksm_zc_cookies[0]; - - rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie, - conn->ksnc_msg.ksm_zc_cookies[1]); - - if (rc) { - CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - cookie, conn->ksnc_msg.ksm_zc_cookies[1]); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return rc; - } - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet(conn, 0); - return 0; /* NOOP is done and just return */ - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg); - - kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - kvec->iov_len = sizeof(struct ksock_lnet_msg); - - iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, - 1, sizeof(struct ksock_lnet_msg)); - - goto again; /* read lnet header now */ - - case SOCKNAL_RX_LNET_HEADER: - /* unpack message header */ - conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - - if (conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) { - /* Userspace peer */ - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->ksnc_rx_state = SOCKNAL_RX_PARSE; - ksocknal_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->ksnc_peer->ksnp_ni, - &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->ksnc_peer->ksnp_id.nid, conn, 0); - if (rc < 0) { - /* I just received garbage: give up on this conn */ - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, rc); - ksocknal_conn_decref(conn); - return -EPROTO; - } - - /* I'm racing with ksocknal_recv() */ - LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); - - if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) - return 0; - - /* ksocknal_recv() got called */ - goto again; - - case SOCKNAL_RX_LNET_PAYLOAD: - /* payload all received */ - rc = 0; - - if (!conn->ksnc_rx_nob_left && /* not truncating */ - conn->ksnc_msg.ksm_csum && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - rc = -EIO; - } - - if (!rc && conn->ksnc_msg.ksm_zc_cookies[0]) { - LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x); - - lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - id = &conn->ksnc_peer->ksnp_id; - - rc = conn->ksnc_proto->pro_handle_zcreq(conn, - conn->ksnc_msg.ksm_zc_cookies[0], - *ksocknal_tunables.ksnd_nonblk_zcack || - le64_to_cpu(lhdr->src_nid) != id->nid); - } - - lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); - - if (rc) { - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, rc); - return -EPROTO; - } - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left)) - return 0; /* come back later */ - goto again; /* try to finish reading slop now */ - - default: - break; - } - - /* Not Reached */ - LBUG(); - return -EINVAL; /* keep gcc happy */ -} - -int -ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct ksock_conn *conn = private; - struct ksock_sched *sched = conn->ksnc_scheduler; - - LASSERT(iov_iter_count(to) <= rlen); - LASSERT(to->nr_segs <= LNET_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_to = *to; - - LASSERT(conn->ksnc_rx_scheduled); - - spin_lock_bh(&sched->kss_lock); - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_PARSE_WAIT: - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - wake_up(&sched->kss_waitq); - LASSERT(conn->ksnc_rx_ready); - break; - - case SOCKNAL_RX_PARSE: - /* scheduler hasn't noticed I'm parsing yet */ - break; - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD; - - spin_unlock_bh(&sched->kss_lock); - ksocknal_conn_decref(conn); - return 0; -} - -static inline int -ksocknal_sched_cansleep(struct ksock_sched *sched) -{ - int rc; - - spin_lock_bh(&sched->kss_lock); - - rc = !ksocknal_data.ksnd_shuttingdown && - list_empty(&sched->kss_rx_conns) && - list_empty(&sched->kss_tx_conns); - - spin_unlock_bh(&sched->kss_lock); - return rc; -} - -int ksocknal_scheduler(void *arg) -{ - struct ksock_sched_info *info; - struct ksock_sched *sched; - struct ksock_conn *conn; - struct ksock_tx *tx; - int rc; - int nloops = 0; - long id = (long)arg; - - info = ksocknal_data.ksnd_sched_info[KSOCK_THREAD_CPT(id)]; - sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; - - rc = cfs_cpt_bind(lnet_cpt_table(), info->ksi_cpt); - if (rc) { - CWARN("Can't set CPU partition affinity to %d: %d\n", - info->ksi_cpt, rc); - } - - spin_lock_bh(&sched->kss_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty(&sched->kss_rx_conns)) { - conn = list_entry(sched->kss_rx_conns.next, - struct ksock_conn, ksnc_rx_list); - list_del(&conn->ksnc_rx_list); - - LASSERT(conn->ksnc_rx_scheduled); - LASSERT(conn->ksnc_rx_ready); - - /* - * clear rx_ready in case receive isn't complete. - * Do it BEFORE we call process_recv, since - * data_ready can set it any time after we release - * kss_lock. - */ - conn->ksnc_rx_ready = 0; - spin_unlock_bh(&sched->kss_lock); - - rc = ksocknal_process_receive(conn); - - spin_lock_bh(&sched->kss_lock); - - /* I'm the only one that can clear this flag */ - LASSERT(conn->ksnc_rx_scheduled); - - /* Did process_receive get everything it wanted? */ - if (!rc) - conn->ksnc_rx_ready = 1; - - if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) { - /* - * Conn blocked waiting for ksocknal_recv() - * I change its state (under lock) to signal - * it can be rescheduled - */ - conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT; - } else if (conn->ksnc_rx_ready) { - /* reschedule for rx */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - } else { - conn->ksnc_rx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - - if (!list_empty(&sched->kss_tx_conns)) { - LIST_HEAD(zlist); - - if (!list_empty(&sched->kss_zombie_noop_txs)) { - list_add(&zlist, &sched->kss_zombie_noop_txs); - list_del_init(&sched->kss_zombie_noop_txs); - } - - conn = list_entry(sched->kss_tx_conns.next, - struct ksock_conn, ksnc_tx_list); - list_del(&conn->ksnc_tx_list); - - LASSERT(conn->ksnc_tx_scheduled); - LASSERT(conn->ksnc_tx_ready); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - - tx = list_entry(conn->ksnc_tx_queue.next, - struct ksock_tx, tx_list); - - if (conn->ksnc_tx_carrier == tx) - ksocknal_next_tx_carrier(conn); - - /* dequeue now so empty list => more to send */ - list_del(&tx->tx_list); - - /* - * Clear tx_ready in case send isn't complete. Do - * it BEFORE we call process_transmit, since - * write_space can set it any time after we release - * kss_lock. - */ - conn->ksnc_tx_ready = 0; - spin_unlock_bh(&sched->kss_lock); - - if (!list_empty(&zlist)) { - /* - * free zombie noop txs, it's fast because - * noop txs are just put in freelist - */ - ksocknal_txlist_done(NULL, &zlist, 0); - } - - rc = ksocknal_process_transmit(conn, tx); - - if (rc == -ENOMEM || rc == -EAGAIN) { - /* - * Incomplete send: replace tx on HEAD of - * tx_queue - */ - spin_lock_bh(&sched->kss_lock); - list_add(&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* Complete send; tx -ref */ - ksocknal_tx_decref(tx); - - spin_lock_bh(&sched->kss_lock); - /* assume space for more */ - conn->ksnc_tx_ready = 1; - } - - if (rc == -ENOMEM) { - /* - * Do nothing; after a short timeout, this - * conn will be reposted on kss_tx_conns. - */ - } else if (conn->ksnc_tx_ready && - !list_empty(&conn->ksnc_tx_queue)) { - /* reschedule for tx */ - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - } else { - conn->ksnc_tx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_bh(&sched->kss_lock); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible_exclusive( - sched->kss_waitq, - !ksocknal_sched_cansleep(sched)); - LASSERT(!rc); - } else { - cond_resched(); - } - - spin_lock_bh(&sched->kss_lock); - } - } - - spin_unlock_bh(&sched->kss_lock); - ksocknal_thread_fini(); - return 0; -} - -/* - * Add connection to kss_rx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_read_callback(struct ksock_conn *conn) -{ - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - spin_unlock_bh(&sched->kss_lock); -} - -/* - * Add connection to kss_tx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_write_callback(struct ksock_conn *conn) -{ - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && /* not being progressed */ - !list_empty(&conn->ksnc_tx_queue)) { /* packets to send */ - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - wake_up(&sched->kss_waitq); - } - - spin_unlock_bh(&sched->kss_lock); -} - -static struct ksock_proto * -ksocknal_parse_proto_version(struct ksock_hello_msg *hello) -{ - __u32 version = 0; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - version = hello->kshm_version; - else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC)) - version = __swab32(hello->kshm_version); - - if (version) { -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 1) - return NULL; - - if (*ksocknal_tunables.ksnd_protocol == 2 && - version == KSOCK_PROTO_V3) - return NULL; -#endif - if (version == KSOCK_PROTO_V2) - return &ksocknal_protocol_v2x; - - if (version == KSOCK_PROTO_V3) - return &ksocknal_protocol_v3x; - - return NULL; - } - - if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - struct lnet_magicversion *hmv = (struct lnet_magicversion *)hello; - - BUILD_BUG_ON(sizeof(struct lnet_magicversion) != - offsetof(struct ksock_hello_msg, kshm_src_nid)); - - if (hmv->version_major == cpu_to_le16(KSOCK_PROTO_V1_MAJOR) && - hmv->version_minor == cpu_to_le16(KSOCK_PROTO_V1_MINOR)) - return &ksocknal_protocol_v1x; - } - - return NULL; -} - -int -ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn, - lnet_nid_t peer_nid, struct ksock_hello_msg *hello) -{ - /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - struct ksock_net *net = (struct ksock_net *)ni->ni_data; - - LASSERT(hello->kshm_nips <= LNET_MAX_INTERFACES); - - /* rely on caller to hold a ref on socket so it wouldn't disappear */ - LASSERT(conn->ksnc_proto); - - hello->kshm_src_nid = ni->ni_nid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_src_pid = the_lnet.ln_pid; - - hello->kshm_src_incarnation = net->ksnn_incarnation; - hello->kshm_ctype = conn->ksnc_type; - - return conn->ksnc_proto->pro_send_hello(conn, hello); -} - -static int -ksocknal_invert_type(int type) -{ - switch (type) { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return type; - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return SOCKLND_CONN_NONE; - } -} - -int -ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn, - struct ksock_hello_msg *hello, - struct lnet_process_id *peerid, - __u64 *incarnation) -{ - /* Return < 0 fatal error - * 0 success - * EALREADY lost connection race - * EPROTO protocol version mismatch - */ - struct socket *sock = conn->ksnc_sock; - int active = !!conn->ksnc_proto; - int timeout; - int proto_match; - int rc; - struct ksock_proto *proto; - struct lnet_process_id recv_id; - - /* socket type set on active connections - not set on passive */ - LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE)); - - timeout = active ? *ksocknal_tunables.ksnd_timeout : - lnet_acceptor_timeout(); - - rc = lnet_sock_read(sock, &hello->kshm_magic, - sizeof(hello->kshm_magic), timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - if (hello->kshm_magic != LNET_PROTO_MAGIC && - hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) && - hello->kshm_magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - /* Unexpected magic! */ - CERROR("Bad magic(1) %#08x (%#08x expected) from %pI4h\n", - __cpu_to_le32(hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - &conn->ksnc_ipaddr); - return -EPROTO; - } - - rc = lnet_sock_read(sock, &hello->kshm_version, - sizeof(hello->kshm_version), timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - proto = ksocknal_parse_proto_version(hello); - if (!proto) { - if (!active) { - /* unknown protocol from peer, tell peer my protocol */ - conn->ksnc_proto = &ksocknal_protocol_v3x; -#if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 2) - conn->ksnc_proto = &ksocknal_protocol_v2x; - else if (*ksocknal_tunables.ksnd_protocol == 1) - conn->ksnc_proto = &ksocknal_protocol_v1x; -#endif - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, ni->ni_nid, hello); - } - - CERROR("Unknown protocol version (%d.x expected) from %pI4h\n", - conn->ksnc_proto->pro_version, - &conn->ksnc_ipaddr); - - return -EPROTO; - } - - proto_match = (conn->ksnc_proto == proto); - conn->ksnc_proto = proto; - - /* receive the rest of hello message anyway */ - rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout); - if (rc) { - CERROR("Error %d reading or checking hello from from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0); - return rc; - } - - *incarnation = hello->kshm_src_incarnation; - - if (hello->kshm_src_nid == LNET_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pI4h\n", - &conn->ksnc_ipaddr); - return -EPROTO; - } - - if (!active && - conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* Userspace NAL assigns peer process ID from socket */ - recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG; - recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - conn->ksnc_ipaddr); - } else { - recv_id.nid = hello->kshm_src_nid; - recv_id.pid = hello->kshm_src_pid; - } - - if (!active) { - *peerid = recv_id; - - /* peer determines type */ - conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - CERROR("Unexpected type %d from %s ip %pI4h\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr); - return -EPROTO; - } - - return 0; - } - - if (peerid->pid != recv_id.pid || - peerid->nid != recv_id.nid) { - LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host %pI4h, but they claimed they were %s; please check your Lustre configuration.\n", - libcfs_id2str(*peerid), - &conn->ksnc_ipaddr, - libcfs_id2str(recv_id)); - return -EPROTO; - } - - if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - /* Possible protocol mismatch or I lost the connection race */ - return proto_match ? EALREADY : EPROTO; - } - - if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { - CERROR("Mismatched types: me %d, %s ip %pI4h %d\n", - conn->ksnc_type, libcfs_id2str(*peerid), - &conn->ksnc_ipaddr, hello->kshm_ctype); - return -EPROTO; - } - - return 0; -} - -static int -ksocknal_connect(struct ksock_route *route) -{ - LIST_HEAD(zombies); - struct ksock_peer *peer = route->ksnr_peer; - int type; - int wanted; - struct socket *sock; - unsigned long deadline; - int retry_later = 0; - int rc = 0; - - deadline = jiffies + *ksocknal_tunables.ksnd_timeout * HZ; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - LASSERT(route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - - route->ksnr_connecting = 1; - - for (;;) { - wanted = ksocknal_route_mask() & ~route->ksnr_connected; - - /* - * stop connecting if peer/route got closed under me, or - * route got connected while queued - */ - if (peer->ksnp_closing || route->ksnr_deleted || - !wanted) { - retry_later = 0; - break; - } - - /* reschedule if peer is connecting to me */ - if (peer->ksnp_accepting > 0) { - CDEBUG(D_NET, - "peer %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid), - peer->ksnp_accepting); - retry_later = 1; - } - - if (retry_later) /* needs reschedule */ - break; - - if (wanted & BIT(SOCKLND_CONN_ANY)) { - type = SOCKLND_CONN_ANY; - } else if (wanted & BIT(SOCKLND_CONN_CONTROL)) { - type = SOCKLND_CONN_CONTROL; - } else if (wanted & BIT(SOCKLND_CONN_BULK_IN)) { - type = SOCKLND_CONN_BULK_IN; - } else { - LASSERT(wanted & BIT(SOCKLND_CONN_BULK_OUT)); - type = SOCKLND_CONN_BULK_OUT; - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - if (time_after_eq(jiffies, deadline)) { - rc = -ETIMEDOUT; - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - rc = lnet_connect(&sock, peer->ksnp_id.nid, - route->ksnr_myipaddr, - route->ksnr_ipaddr, route->ksnr_port); - if (rc) - goto failed; - - rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - if (rc < 0) { - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - /* - * A +ve RC means I have to retry because I lost the connection - * race or I have to renegotiate protocol version - */ - retry_later = (rc); - if (retry_later) - CDEBUG(D_NET, "peer %s: conn race, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid)); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - } - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - if (retry_later) { - /* - * re-queue for attention; this frees me up to handle - * the peer's incoming connection request - */ - if (rc == EALREADY || - (!rc && peer->ksnp_accepting > 0)) { - /* - * We want to introduce a delay before next - * attempt to connect if we lost conn race, - * but the race is resolved quickly usually, - * so min_reconnectms should be good heuristic - */ - route->ksnr_retry_interval = - *ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000; - route->ksnr_timeout = jiffies + route->ksnr_retry_interval; - } - - ksocknal_launch_connection_locked(route); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return retry_later; - - failed: - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - /* This is a retry rather than a new connection */ - route->ksnr_retry_interval *= 2; - route->ksnr_retry_interval = - max(route->ksnr_retry_interval, - (long)*ksocknal_tunables.ksnd_min_reconnectms * HZ / 1000); - route->ksnr_retry_interval = - min(route->ksnr_retry_interval, - (long)*ksocknal_tunables.ksnd_max_reconnectms * HZ / 1000); - - LASSERT(route->ksnr_retry_interval); - route->ksnr_timeout = jiffies + route->ksnr_retry_interval; - - if (!list_empty(&peer->ksnp_tx_queue) && - !peer->ksnp_accepting && - !ksocknal_find_connecting_route_locked(peer)) { - struct ksock_conn *conn; - - /* - * ksnp_tx_queue is queued on a conn on successful - * connection for V1.x and V2.x - */ - if (!list_empty(&peer->ksnp_conns)) { - conn = list_entry(peer->ksnp_conns.next, - struct ksock_conn, ksnc_list); - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); - } - - /* - * take all the blocked packets while I've got the lock and - * complete below... - */ - list_splice_init(&peer->ksnp_tx_queue, &zombies); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_peer_failed(peer); - ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1); - return 0; -} - -/* - * check whether we need to create more connds. - * It will try to create new thread if it's necessary, @timeout can - * be updated if failed to create, so caller wouldn't keep try while - * running out of resource. - */ -static int -ksocknal_connd_check_start(time64_t sec, long *timeout) -{ - char name[16]; - int rc; - int total = ksocknal_data.ksnd_connd_starting + - ksocknal_data.ksnd_connd_running; - - if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) { - /* still in initializing */ - return 0; - } - - if (total >= *ksocknal_tunables.ksnd_nconnds_max || - total > ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV) { - /* - * can't create more connd, or still have enough - * threads to handle more connecting - */ - return 0; - } - - if (list_empty(&ksocknal_data.ksnd_connd_routes)) { - /* no pending connecting request */ - return 0; - } - - if (sec - ksocknal_data.ksnd_connd_failed_stamp <= 1) { - /* may run out of resource, retry later */ - *timeout = HZ; - return 0; - } - - if (ksocknal_data.ksnd_connd_starting > 0) { - /* serialize starting to avoid flood */ - return 0; - } - - ksocknal_data.ksnd_connd_starting_stamp = sec; - ksocknal_data.ksnd_connd_starting++; - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - - /* NB: total is the next id */ - snprintf(name, sizeof(name), "socknal_cd%02d", total); - rc = ksocknal_thread_start(ksocknal_connd, NULL, name); - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - if (!rc) - return 1; - - /* we tried ... */ - LASSERT(ksocknal_data.ksnd_connd_starting > 0); - ksocknal_data.ksnd_connd_starting--; - ksocknal_data.ksnd_connd_failed_stamp = ktime_get_real_seconds(); - - return 1; -} - -/* - * check whether current thread can exit, it will return 1 if there are too - * many threads and no creating in past 120 seconds. - * Also, this function may update @timeout to make caller come back - * again to recheck these conditions. - */ -static int -ksocknal_connd_check_stop(time64_t sec, long *timeout) -{ - int val; - - if (unlikely(ksocknal_data.ksnd_init < SOCKNAL_INIT_ALL)) { - /* still in initializing */ - return 0; - } - - if (ksocknal_data.ksnd_connd_starting > 0) { - /* in progress of starting new thread */ - return 0; - } - - if (ksocknal_data.ksnd_connd_running <= - *ksocknal_tunables.ksnd_nconnds) { /* can't shrink */ - return 0; - } - - /* created thread in past 120 seconds? */ - val = (int)(ksocknal_data.ksnd_connd_starting_stamp + - SOCKNAL_CONND_TIMEOUT - sec); - - *timeout = (val > 0) ? val * HZ : - SOCKNAL_CONND_TIMEOUT * HZ; - if (val > 0) - return 0; - - /* no creating in past 120 seconds */ - - return ksocknal_data.ksnd_connd_running > - ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV; -} - -/* - * Go through connd_routes queue looking for a route that we can process - * right now, @timeout_p can be updated if we need to come back later - */ -static struct ksock_route * -ksocknal_connd_get_route_locked(signed long *timeout_p) -{ - struct ksock_route *route; - unsigned long now; - - now = jiffies; - - /* connd_routes can contain both pending and ordinary routes */ - list_for_each_entry(route, &ksocknal_data.ksnd_connd_routes, - ksnr_connd_list) { - if (!route->ksnr_retry_interval || - time_after_eq(now, route->ksnr_timeout)) - return route; - - if (*timeout_p == MAX_SCHEDULE_TIMEOUT || - (int)*timeout_p > (int)(route->ksnr_timeout - now)) - *timeout_p = (int)(route->ksnr_timeout - now); - } - - return NULL; -} - -int -ksocknal_connd(void *arg) -{ - spinlock_t *connd_lock = &ksocknal_data.ksnd_connd_lock; - struct ksock_connreq *cr; - wait_queue_entry_t wait; - int nloops = 0; - int cons_retry = 0; - - init_waitqueue_entry(&wait, current); - - spin_lock_bh(connd_lock); - - LASSERT(ksocknal_data.ksnd_connd_starting > 0); - ksocknal_data.ksnd_connd_starting--; - ksocknal_data.ksnd_connd_running++; - - while (!ksocknal_data.ksnd_shuttingdown) { - struct ksock_route *route = NULL; - time64_t sec = ktime_get_real_seconds(); - long timeout = MAX_SCHEDULE_TIMEOUT; - int dropped_lock = 0; - - if (ksocknal_connd_check_stop(sec, &timeout)) { - /* wakeup another one to check stop */ - wake_up(&ksocknal_data.ksnd_connd_waitq); - break; - } - - if (ksocknal_connd_check_start(sec, &timeout)) { - /* created new thread */ - dropped_lock = 1; - } - - if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { - /* Connection accepted by the listener */ - cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next, - struct ksock_connreq, ksncr_list); - - list_del(&cr->ksncr_list); - spin_unlock_bh(connd_lock); - dropped_lock = 1; - - ksocknal_create_conn(cr->ksncr_ni, NULL, - cr->ksncr_sock, SOCKLND_CONN_NONE); - lnet_ni_decref(cr->ksncr_ni); - kfree(cr); - - spin_lock_bh(connd_lock); - } - - /* - * Only handle an outgoing connection request if there - * is a thread left to handle incoming connections and - * create new connd - */ - if (ksocknal_data.ksnd_connd_connecting + SOCKNAL_CONND_RESV < - ksocknal_data.ksnd_connd_running) { - route = ksocknal_connd_get_route_locked(&timeout); - } - if (route) { - list_del(&route->ksnr_connd_list); - ksocknal_data.ksnd_connd_connecting++; - spin_unlock_bh(connd_lock); - dropped_lock = 1; - - if (ksocknal_connect(route)) { - /* consecutive retry */ - if (cons_retry++ > SOCKNAL_INSANITY_RECONN) { - CWARN("massive consecutive re-connecting to %pI4h\n", - &route->ksnr_ipaddr); - cons_retry = 0; - } - } else { - cons_retry = 0; - } - - ksocknal_route_decref(route); - - spin_lock_bh(connd_lock); - ksocknal_data.ksnd_connd_connecting--; - } - - if (dropped_lock) { - if (++nloops < SOCKNAL_RESCHED) - continue; - spin_unlock_bh(connd_lock); - nloops = 0; - cond_resched(); - spin_lock_bh(connd_lock); - continue; - } - - /* Nothing to do for 'timeout' */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&ksocknal_data.ksnd_connd_waitq, - &wait); - spin_unlock_bh(connd_lock); - - nloops = 0; - schedule_timeout(timeout); - - remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait); - spin_lock_bh(connd_lock); - } - ksocknal_data.ksnd_connd_running--; - spin_unlock_bh(connd_lock); - - ksocknal_thread_fini(); - return 0; -} - -static struct ksock_conn * -ksocknal_find_timed_out_conn(struct ksock_peer *peer) -{ - /* We're called with a shared lock on ksnd_global_lock */ - struct ksock_conn *conn; - struct list_head *ctmp; - - list_for_each(ctmp, &peer->ksnp_conns) { - int error; - - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - /* Don't need the {get,put}connsock dance to deref ksnc_sock */ - LASSERT(!conn->ksnc_closing); - - /* - * SOCK_ERROR will reset error code of socket in - * some platform (like Darwin8.x) - */ - error = conn->ksnc_sock->sk->sk_err; - if (error) { - ksocknal_conn_addref(conn); - - switch (error) { - case ECONNRESET: - CNETERR("A connection with %s (%pI4h:%d) was reset; it may have rebooted.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - case ETIMEDOUT: - CNETERR("A connection with %s (%pI4h:%d) timed out; the network or node may be down.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - default: - CNETERR("An unexpected network error %d occurred with %s (%pI4h:%d\n", - error, - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - break; - } - - return conn; - } - - if (conn->ksnc_rx_started && - time_after_eq(jiffies, - conn->ksnc_rx_deadline)) { - /* Timed out incomplete incoming message */ - ksocknal_conn_addref(conn); - CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port, - conn->ksnc_rx_state, - iov_iter_count(&conn->ksnc_rx_to), - conn->ksnc_rx_nob_left); - return conn; - } - - if ((!list_empty(&conn->ksnc_tx_queue) || - conn->ksnc_sock->sk->sk_wmem_queued) && - time_after_eq(jiffies, - conn->ksnc_tx_deadline)) { - /* - * Timed out messages queued for sending or - * buffered in the socket's send buffer - */ - ksocknal_conn_addref(conn); - CNETERR("Timeout sending data to %s (%pI4h:%d) the network or that node may be down.\n", - libcfs_id2str(peer->ksnp_id), - &conn->ksnc_ipaddr, - conn->ksnc_port); - return conn; - } - } - - return NULL; -} - -static inline void -ksocknal_flush_stale_txs(struct ksock_peer *peer) -{ - struct ksock_tx *tx; - struct ksock_tx *tmp; - LIST_HEAD(stale_txs); - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_tx_queue, tx_list) { - if (!time_after_eq(jiffies, - tx->tx_deadline)) - break; - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &stale_txs); - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1); -} - -static int -ksocknal_send_keepalive_locked(struct ksock_peer *peer) - __must_hold(&ksocknal_data.ksnd_global_lock) -{ - struct ksock_sched *sched; - struct ksock_conn *conn; - struct ksock_tx *tx; - - /* last_alive will be updated by create_conn */ - if (list_empty(&peer->ksnp_conns)) - return 0; - - if (peer->ksnp_proto != &ksocknal_protocol_v3x) - return 0; - - if (*ksocknal_tunables.ksnd_keepalive <= 0 || - time_before(jiffies, - peer->ksnp_last_alive + *ksocknal_tunables.ksnd_keepalive * HZ)) - return 0; - - if (time_before(jiffies, peer->ksnp_send_keepalive)) - return 0; - - /* - * retry 10 secs later, so we wouldn't put pressure - * on this peer if we failed to send keepalive this time - */ - peer->ksnp_send_keepalive = jiffies + 10 * HZ; - - conn = ksocknal_find_conn_locked(peer, NULL, 1); - if (conn) { - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - if (!list_empty(&conn->ksnc_tx_queue)) { - spin_unlock_bh(&sched->kss_lock); - /* there is an queued ACK, don't need keepalive */ - return 0; - } - - spin_unlock_bh(&sched->kss_lock); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - /* cookie = 1 is reserved for keepalive PING */ - tx = ksocknal_alloc_tx_noop(1, 1); - if (!tx) { - read_lock(&ksocknal_data.ksnd_global_lock); - return -ENOMEM; - } - - if (!ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) { - read_lock(&ksocknal_data.ksnd_global_lock); - return 1; - } - - ksocknal_free_tx(tx); - read_lock(&ksocknal_data.ksnd_global_lock); - - return -EIO; -} - -static void -ksocknal_check_peer_timeouts(int idx) -{ - struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; - struct ksock_peer *peer; - struct ksock_conn *conn; - struct ksock_tx *tx; - - again: - /* - * NB. We expect to have a look at all the peers and not find any - * connections to time out, so we just use a shared lock while we - * take a look... - */ - read_lock(&ksocknal_data.ksnd_global_lock); - - list_for_each_entry(peer, peers, ksnp_list) { - unsigned long deadline = 0; - struct ksock_tx *tx_stale; - int resid = 0; - int n = 0; - - if (ksocknal_send_keepalive_locked(peer)) { - read_unlock(&ksocknal_data.ksnd_global_lock); - goto again; - } - - conn = ksocknal_find_timed_out_conn(peer); - - if (conn) { - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); - - /* - * NB we won't find this one again, but we can't - * just proceed with the next peer, since we dropped - * ksnd_global_lock and it might be dead already! - */ - ksocknal_conn_decref(conn); - goto again; - } - - /* - * we can't process stale txs right here because we're - * holding only shared lock - */ - if (!list_empty(&peer->ksnp_tx_queue)) { - tx = list_entry(peer->ksnp_tx_queue.next, - struct ksock_tx, tx_list); - - if (time_after_eq(jiffies, - tx->tx_deadline)) { - ksocknal_peer_addref(peer); - read_unlock(&ksocknal_data.ksnd_global_lock); - - ksocknal_flush_stale_txs(peer); - - ksocknal_peer_decref(peer); - goto again; - } - } - - if (list_empty(&peer->ksnp_zc_req_list)) - continue; - - tx_stale = NULL; - spin_lock(&peer->ksnp_lock); - list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) { - if (!time_after_eq(jiffies, - tx->tx_deadline)) - break; - /* ignore the TX if connection is being closed */ - if (tx->tx_conn->ksnc_closing) - continue; - if (!tx_stale) - tx_stale = tx; - n++; - } - - if (!tx_stale) { - spin_unlock(&peer->ksnp_lock); - continue; - } - - deadline = tx_stale->tx_deadline; - resid = tx_stale->tx_resid; - conn = tx_stale->tx_conn; - ksocknal_conn_addref(conn); - - spin_unlock(&peer->ksnp_lock); - read_unlock(&ksocknal_data.ksnd_global_lock); - - CERROR("Total %d stale ZC_REQs for peer %s detected; the oldest(%p) timed out %ld secs ago, resid: %d, wmem: %d\n", - n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale, - (jiffies - deadline) / HZ, - resid, conn->ksnc_sock->sk->sk_wmem_queued); - - ksocknal_close_conn_and_siblings(conn, -ETIMEDOUT); - ksocknal_conn_decref(conn); - goto again; - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_reaper(void *arg) -{ - wait_queue_entry_t wait; - struct ksock_conn *conn; - struct ksock_sched *sched; - struct list_head enomem_conns; - int nenomem_conns; - long timeout; - int i; - int peer_index = 0; - unsigned long deadline = jiffies; - - INIT_LIST_HEAD(&enomem_conns); - init_waitqueue_entry(&wait, current); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - if (!list_empty(&ksocknal_data.ksnd_deathrow_conns)) { - conn = list_entry(ksocknal_data.ksnd_deathrow_conns.next, - struct ksock_conn, ksnc_list); - list_del(&conn->ksnc_list); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_terminate_conn(conn); - ksocknal_conn_decref(conn); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty(&ksocknal_data.ksnd_zombie_conns)) { - conn = list_entry(ksocknal_data.ksnd_zombie_conns.next, - struct ksock_conn, ksnc_list); - list_del(&conn->ksnc_list); - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_destroy_conn(conn); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty(&ksocknal_data.ksnd_enomem_conns)) { - list_add(&enomem_conns, - &ksocknal_data.ksnd_enomem_conns); - list_del_init(&ksocknal_data.ksnd_enomem_conns); - } - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - /* reschedule all the connections that stalled with ENOMEM... */ - nenomem_conns = 0; - while (!list_empty(&enomem_conns)) { - conn = list_entry(enomem_conns.next, struct ksock_conn, - ksnc_tx_list); - list_del(&conn->ksnc_tx_list); - - sched = conn->ksnc_scheduler; - - spin_lock_bh(&sched->kss_lock); - - LASSERT(conn->ksnc_tx_scheduled); - conn->ksnc_tx_ready = 1; - list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - wake_up(&sched->kss_waitq); - - spin_unlock_bh(&sched->kss_lock); - nenomem_conns++; - } - - /* careful with the jiffy wrap... */ - while ((timeout = deadline - jiffies) <= 0) { - const int n = 4; - const int p = 1; - int chunk = ksocknal_data.ksnd_peer_hash_size; - - /* - * Time to check for timeouts on a few more peers: I do - * checks every 'p' seconds on a proportion of the peer - * table and I need to check every connection 'n' times - * within a timeout interval, to ensure I detect a - * timeout on any connection within (n+1)/n times the - * timeout interval. - */ - if (*ksocknal_tunables.ksnd_timeout > n * p) - chunk = (chunk * n * p) / - *ksocknal_tunables.ksnd_timeout; - if (!chunk) - chunk = 1; - - for (i = 0; i < chunk; i++) { - ksocknal_check_peer_timeouts(peer_index); - peer_index = (peer_index + 1) % - ksocknal_data.ksnd_peer_hash_size; - } - - deadline = deadline + p * HZ; - } - - if (nenomem_conns) { - /* - * Reduce my timeout if I rescheduled ENOMEM conns. - * This also prevents me getting woken immediately - * if any go back on my enomem list. - */ - timeout = SOCKNAL_ENOMEM_RETRY; - } - ksocknal_data.ksnd_reaper_waketime = jiffies + timeout; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); - - if (!ksocknal_data.ksnd_shuttingdown && - list_empty(&ksocknal_data.ksnd_deathrow_conns) && - list_empty(&ksocknal_data.ksnd_zombie_conns)) - schedule_timeout(timeout); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&ksocknal_data.ksnd_reaper_waitq, &wait); - - spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - } - - spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); - - ksocknal_thread_fini(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c deleted file mode 100644 index 93a02cd6b6b5..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ /dev/null @@ -1,534 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include "socklnd.h" - -int -ksocknal_lib_get_conn_addrs(struct ksock_conn *conn) -{ - int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT(!conn->ksnc_closing); - - if (rc) { - CERROR("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL); - if (rc) { - CERROR("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -int -ksocknal_lib_zc_capable(struct ksock_conn *conn) -{ - int caps = conn->ksnc_sock->sk->sk_route_caps; - - if (conn->ksnc_proto == &ksocknal_protocol_v1x) - return 0; - - /* - * ZC if the socket supports scatter/gather and doesn't need software - * checksums - */ - return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK)); -} - -int -ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; - struct socket *sock = conn->ksnc_sock; - int nob, i; - - if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ - conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ - tx->tx_nob == tx->tx_resid && /* frist sending */ - !tx->tx_msg.ksm_csum) /* not checksummed */ - ksocknal_lib_csum_tx(tx); - - for (nob = i = 0; i < tx->tx_niov; i++) - nob += tx->tx_iov[i].iov_len; - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, - tx->tx_iov, tx->tx_niov, nob); - return sock_sendmsg(sock, &msg); -} - -int -ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) -{ - struct socket *sock = conn->ksnc_sock; - struct bio_vec *kiov = tx->tx_kiov; - int rc; - int nob; - - /* Not NOOP message */ - LASSERT(tx->tx_lnetmsg); - - if (tx->tx_msg.ksm_zc_cookies[0]) { - /* Zero copy is enabled */ - struct sock *sk = sock->sk; - struct page *page = kiov->bv_page; - int offset = kiov->bv_offset; - int fragsize = kiov->bv_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->bv_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - if (sk->sk_prot->sendpage) { - rc = sk->sk_prot->sendpage(sk, page, - offset, fragsize, msgflg); - } else { - rc = tcp_sendpage(sk, page, offset, fragsize, msgflg); - } - } else { - struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; - int i; - - for (nob = i = 0; i < tx->tx_nkiov; i++) - nob += kiov[i].bv_len; - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, - kiov, tx->tx_nkiov, nob); - rc = sock_sendmsg(sock, &msg); - } - return rc; -} - -void -ksocknal_lib_eager_ack(struct ksock_conn *conn) -{ - int opt = 1; - struct socket *sock = conn->ksnc_sock; - - /* - * Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer. - */ - kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt, - sizeof(opt)); -} - -static int lustre_csum(struct kvec *v, void *context) -{ - struct ksock_conn *conn = context; - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - v->iov_base, v->iov_len); - return 0; -} - -int -ksocknal_lib_recv(struct ksock_conn *conn) -{ - struct msghdr msg = { .msg_iter = conn->ksnc_rx_to }; - __u32 saved_csum; - int rc; - - rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); - if (rc <= 0) - return rc; - - saved_csum = conn->ksnc_msg.ksm_csum; - if (!saved_csum) - return rc; - - /* header is included only in V2 - V3 checksums only the bulk data */ - if (!(conn->ksnc_rx_to.type & ITER_BVEC) && - conn->ksnc_proto != &ksocknal_protocol_v2x) - return rc; - - /* accumulate checksum */ - conn->ksnc_msg.ksm_csum = 0; - iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn); - conn->ksnc_msg.ksm_csum = saved_csum; - - return rc; -} - -void -ksocknal_lib_csum_tx(struct ksock_tx *tx) -{ - int i; - __u32 csum; - void *base; - - LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg); - LASSERT(tx->tx_conn); - LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); - - tx->tx_msg.ksm_csum = 0; - - csum = crc32_le(~0, tx->tx_iov[0].iov_base, - tx->tx_iov[0].iov_len); - - if (tx->tx_kiov) { - for (i = 0; i < tx->tx_nkiov; i++) { - base = kmap(tx->tx_kiov[i].bv_page) + - tx->tx_kiov[i].bv_offset; - - csum = crc32_le(csum, base, tx->tx_kiov[i].bv_len); - - kunmap(tx->tx_kiov[i].bv_page); - } - } else { - for (i = 1; i < tx->tx_niov; i++) - csum = crc32_le(csum, tx->tx_iov[i].iov_base, - tx->tx_iov[i].iov_len); - } - - if (*ksocknal_tunables.ksnd_inject_csum_error) { - csum++; - *ksocknal_tunables.ksnd_inject_csum_error = 0; - } - - tx->tx_msg.ksm_csum = csum; -} - -int -ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, - int *rxmem, int *nagle) -{ - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc) { - LASSERT(conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return -ESHUTDOWN; - } - - rc = lnet_sock_getbuf(sock, txmem, rxmem); - if (!rc) { - len = sizeof(*nagle); - rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - } - - ksocknal_connsock_decref(conn); - - if (!rc) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return rc; -} - -int -ksocknal_lib_setup_sock(struct socket *sock) -{ - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* - * Ensure this socket aborts active sends immediately when we close - * it. - */ - linger.l_onoff = 0; - linger.l_linger = 0; - - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger, - sizeof(linger)); - if (rc) { - CERROR("Can't set SO_LINGER: %d\n", rc); - return rc; - } - - option = -1; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option, - sizeof(option)); - if (rc) { - CERROR("Can't set SO_LINGER2: %d\n", rc); - return rc; - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't disable nagle: %d\n", rc); - return rc; - } - } - - rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc) { - CERROR("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return rc; - } - -/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option, - sizeof(option)); - if (rc) { - CERROR("Can't set SO_KEEPALIVE: %d\n", rc); - return rc; - } - - if (!do_keepalive) - return 0; - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle, - sizeof(keep_idle)); - if (rc) { - CERROR("Can't set TCP_KEEPIDLE: %d\n", rc); - return rc; - } - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof(keep_intvl)); - if (rc) { - CERROR("Can't set TCP_KEEPINTVL: %d\n", rc); - return rc; - } - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count, - sizeof(keep_count)); - if (rc) { - CERROR("Can't set TCP_KEEPCNT: %d\n", rc); - return rc; - } - - return 0; -} - -void -ksocknal_lib_push_conn(struct ksock_conn *conn) -{ - struct sock *sk; - struct tcp_sock *tp; - int nonagle; - int val = 1; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc) /* being shut down */ - return; - - sk = conn->ksnc_sock->sk; - tp = tcp_sk(sk); - - lock_sock(sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock(sk); - - rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - LASSERT(!rc); - - lock_sock(sk); - tp->nonagle = nonagle; - release_sock(sk); - - ksocknal_connsock_decref(conn); -} - -/* - * socket call back in Linux - */ -static void -ksocknal_data_ready(struct sock *sk) -{ - struct ksock_conn *conn; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - if (!conn) { /* raced with ksocknal_terminate_conn */ - LASSERT(sk->sk_data_ready != &ksocknal_data_ready); - sk->sk_data_ready(sk); - } else { - ksocknal_read_callback(conn); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -static void -ksocknal_write_space(struct sock *sk) -{ - struct ksock_conn *conn; - int wspace; - int min_wpace; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - wspace = sk_stream_wspace(sk); - min_wpace = sk_stream_min_wspace(sk); - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, wspace, min_wpace, conn, - !conn ? "" : (conn->ksnc_tx_ready ? - " ready" : " blocked"), - !conn ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (!conn) { /* raced with ksocknal_terminate_conn */ - LASSERT(sk->sk_write_space != &ksocknal_write_space); - sk->sk_write_space(sk); - - read_unlock(&ksocknal_data.ksnd_global_lock); - return; - } - - if (wspace >= min_wpace) { /* got enough space */ - ksocknal_write_callback(conn); - - /* - * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the - * ENOMEM check in ksocknal_transmit is race-free (think about - * it). - */ - clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - - read_unlock(&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn) -{ - conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; - conn->ksnc_saved_write_space = sock->sk->sk_write_space; -} - -void -ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn) -{ - sock->sk->sk_user_data = conn; - sock->sk->sk_data_ready = ksocknal_data_ready; - sock->sk->sk_write_space = ksocknal_write_space; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn) -{ - /* - * Remove conn's network callbacks. - * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! - */ - sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; - sock->sk->sk_write_space = conn->ksnc_saved_write_space; - - /* - * A callback could be in progress already; they hold a read lock - * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. - */ - sock->sk->sk_user_data = NULL; -} - -int -ksocknal_lib_memory_pressure(struct ksock_conn *conn) -{ - int rc = 0; - struct ksock_sched *sched; - - sched = conn->ksnc_scheduler; - spin_lock_bh(&sched->kss_lock); - - if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) && - !conn->ksnc_tx_ready) { - /* - * SOCK_NOSPACE is set when the socket fills - * and cleared in the write_space callback - * (which also sets ksnc_tx_ready). If - * SOCK_NOSPACE and ksnc_tx_ready are BOTH - * zero, I didn't fill the socket and - * write_space won't reschedule me, so I - * return -ENOMEM to get my caller to retry - * after a timeout - */ - rc = -ENOMEM; - } - - spin_unlock_bh(&sched->kss_lock); - - return rc; -} diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c deleted file mode 100644 index 5663a4ca94d4..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_modparams.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * Author: Eric Barton - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "socklnd.h" - -static int sock_timeout = 50; -module_param(sock_timeout, int, 0644); -MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)"); - -static int credits = 256; -module_param(credits, int, 0444); -MODULE_PARM_DESC(credits, "# concurrent sends"); - -static int peer_credits = 8; -module_param(peer_credits, int, 0444); -MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer"); - -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits"); - -static int peer_timeout = 180; -module_param(peer_timeout, int, 0444); -MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); - -/* - * Number of daemons in each thread pool which is percpt, - * we will estimate reasonable value based on CPUs if it's not set. - */ -static unsigned int nscheds; -module_param(nscheds, int, 0444); -MODULE_PARM_DESC(nscheds, "# scheduler daemons in each pool while starting"); - -static int nconnds = 4; -module_param(nconnds, int, 0444); -MODULE_PARM_DESC(nconnds, "# connection daemons while starting"); - -static int nconnds_max = 64; -module_param(nconnds_max, int, 0444); -MODULE_PARM_DESC(nconnds_max, "max # connection daemons"); - -static int min_reconnectms = 1000; -module_param(min_reconnectms, int, 0644); -MODULE_PARM_DESC(min_reconnectms, "min connection retry interval (mS)"); - -static int max_reconnectms = 60000; -module_param(max_reconnectms, int, 0644); -MODULE_PARM_DESC(max_reconnectms, "max connection retry interval (mS)"); - -# define DEFAULT_EAGER_ACK 0 -static int eager_ack = DEFAULT_EAGER_ACK; -module_param(eager_ack, int, 0644); -MODULE_PARM_DESC(eager_ack, "send tcp ack packets eagerly"); - -static int typed_conns = 1; -module_param(typed_conns, int, 0444); -MODULE_PARM_DESC(typed_conns, "use different sockets for bulk"); - -static int min_bulk = 1 << 10; -module_param(min_bulk, int, 0644); -MODULE_PARM_DESC(min_bulk, "smallest 'large' message"); - -# define DEFAULT_BUFFER_SIZE 0 -static int tx_buffer_size = DEFAULT_BUFFER_SIZE; -module_param(tx_buffer_size, int, 0644); -MODULE_PARM_DESC(tx_buffer_size, "socket tx buffer size (0 for system default)"); - -static int rx_buffer_size = DEFAULT_BUFFER_SIZE; -module_param(rx_buffer_size, int, 0644); -MODULE_PARM_DESC(rx_buffer_size, "socket rx buffer size (0 for system default)"); - -static int nagle; -module_param(nagle, int, 0644); -MODULE_PARM_DESC(nagle, "enable NAGLE?"); - -static int round_robin = 1; -module_param(round_robin, int, 0644); -MODULE_PARM_DESC(round_robin, "Round robin for multiple interfaces"); - -static int keepalive = 30; -module_param(keepalive, int, 0644); -MODULE_PARM_DESC(keepalive, "# seconds before send keepalive"); - -static int keepalive_idle = 30; -module_param(keepalive_idle, int, 0644); -MODULE_PARM_DESC(keepalive_idle, "# idle seconds before probe"); - -#define DEFAULT_KEEPALIVE_COUNT 5 -static int keepalive_count = DEFAULT_KEEPALIVE_COUNT; -module_param(keepalive_count, int, 0644); -MODULE_PARM_DESC(keepalive_count, "# missed probes == dead"); - -static int keepalive_intvl = 5; -module_param(keepalive_intvl, int, 0644); -MODULE_PARM_DESC(keepalive_intvl, "seconds between probes"); - -static int enable_csum; -module_param(enable_csum, int, 0644); -MODULE_PARM_DESC(enable_csum, "enable check sum"); - -static int inject_csum_error; -module_param(inject_csum_error, int, 0644); -MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error"); - -static int nonblk_zcack = 1; -module_param(nonblk_zcack, int, 0644); -MODULE_PARM_DESC(nonblk_zcack, "always send ZC-ACK on non-blocking connection"); - -static unsigned int zc_min_payload = 16 << 10; -module_param(zc_min_payload, int, 0644); -MODULE_PARM_DESC(zc_min_payload, "minimum payload size to zero copy"); - -static unsigned int zc_recv; -module_param(zc_recv, int, 0644); -MODULE_PARM_DESC(zc_recv, "enable ZC recv for Chelsio driver"); - -static unsigned int zc_recv_min_nfrags = 16; -module_param(zc_recv_min_nfrags, int, 0644); -MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv"); - -#if SOCKNAL_VERSION_DEBUG -static int protocol = 3; -module_param(protocol, int, 0644); -MODULE_PARM_DESC(protocol, "protocol version"); -#endif - -struct ksock_tunables ksocknal_tunables; - -int ksocknal_tunables_init(void) -{ - /* initialize ksocknal_tunables structure */ - ksocknal_tunables.ksnd_timeout = &sock_timeout; - ksocknal_tunables.ksnd_nscheds = &nscheds; - ksocknal_tunables.ksnd_nconnds = &nconnds; - ksocknal_tunables.ksnd_nconnds_max = &nconnds_max; - ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms; - ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms; - ksocknal_tunables.ksnd_eager_ack = &eager_ack; - ksocknal_tunables.ksnd_typed_conns = &typed_conns; - ksocknal_tunables.ksnd_min_bulk = &min_bulk; - ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size; - ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size; - ksocknal_tunables.ksnd_nagle = &nagle; - ksocknal_tunables.ksnd_round_robin = &round_robin; - ksocknal_tunables.ksnd_keepalive = &keepalive; - ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle; - ksocknal_tunables.ksnd_keepalive_count = &keepalive_count; - ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl; - ksocknal_tunables.ksnd_credits = &credits; - ksocknal_tunables.ksnd_peertxcredits = &peer_credits; - ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits; - ksocknal_tunables.ksnd_peertimeout = &peer_timeout; - ksocknal_tunables.ksnd_enable_csum = &enable_csum; - ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error; - ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack; - ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; - ksocknal_tunables.ksnd_zc_recv = &zc_recv; - ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; - -#if SOCKNAL_VERSION_DEBUG - ksocknal_tunables.ksnd_protocol = &protocol; -#endif - - if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10)) - *ksocknal_tunables.ksnd_zc_min_payload = 2 << 10; - - return 0; -}; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c deleted file mode 100644 index 05982dac781c..000000000000 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c +++ /dev/null @@ -1,810 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2012, Intel Corporation. - * - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include "socklnd.h" - -/* - * Protocol entries : - * pro_send_hello : send hello message - * pro_recv_hello : receive hello message - * pro_pack : pack message header - * pro_unpack : unpack message header - * pro_queue_tx_zcack() : Called holding BH lock: kss_lock - * return 1 if ACK is piggybacked, otherwise return 0 - * pro_queue_tx_msg() : Called holding BH lock: kss_lock - * return the ACK that piggybacked by my message, or NULL - * pro_handle_zcreq() : handler of incoming ZC-REQ - * pro_handle_zcack() : handler of incoming ZC-ACK - * pro_match_tx() : Called holding glock - */ - -static struct ksock_tx * -ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg) -{ - /* V1.x, just enqueue it */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - return NULL; -} - -void -ksocknal_next_tx_carrier(struct ksock_conn *conn) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - LASSERT(tx); - - /* Next TX that can carry ZC-ACK or LNet message */ - if (tx->tx_list.next == &conn->ksnc_tx_queue) { - /* no more packets queued */ - conn->ksnc_tx_carrier = NULL; - } else { - conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); - LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); - } -} - -static int -ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn, - struct ksock_tx *tx_ack, __u64 cookie) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - LASSERT(!tx_ack || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - /* - * Enqueue or piggyback tx_ack / cookie - * . no tx can piggyback cookie of tx_ack (or cookie), just - * enqueue the tx_ack (if tx_ack != NUL) and return NULL. - * . There is tx can piggyback cookie of tx_ack (or cookie), - * piggyback the cookie and return the tx. - */ - if (!tx) { - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_ack; - } - return 0; - } - - if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* tx is noop zc-ack, can't piggyback zc-ack cookie */ - if (tx_ack) - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - return 0; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); - LASSERT(!tx->tx_msg.ksm_zc_cookies[1]); - - if (tx_ack) - cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; - - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_cookies[1] = cookie; - /* move on to the next TX which can carry cookie */ - ksocknal_next_tx_carrier(conn); - - return 1; -} - -static struct ksock_tx * -ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg) -{ - struct ksock_tx *tx = conn->ksnc_tx_carrier; - - /* - * Enqueue tx_msg: - * . If there is no NOOP on the connection, just enqueue - * tx_msg and return NULL - * . If there is NOOP on the connection, piggyback the cookie - * and replace the NOOP tx, and return the NOOP tx. - */ - if (!tx) { /* nothing on queue */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_msg; - return NULL; - } - - if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */ - list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); - return NULL; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - /* There is a noop zc-ack can be piggybacked */ - tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1]; - ksocknal_next_tx_carrier(conn); - - /* use new_tx to replace the noop zc-ack packet */ - list_add(&tx_msg->tx_list, &tx->tx_list); - list_del(&tx->tx_list); - - return tx; -} - -static int -ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, - struct ksock_tx *tx_ack, __u64 cookie) -{ - struct ksock_tx *tx; - - if (conn->ksnc_type != SOCKLND_CONN_ACK) - return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); - - /* non-blocking ZC-ACK (to router) */ - LASSERT(!tx_ack || - tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx = conn->ksnc_tx_carrier; - if (!tx) { - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, - &conn->ksnc_tx_queue); - conn->ksnc_tx_carrier = tx_ack; - } - return 0; - } - - /* conn->ksnc_tx_carrier */ - - if (tx_ack) - cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; - - if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */ - return 1; - - if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) { - /* replace the keepalive PING with a real ACK */ - LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); - tx->tx_msg.ksm_zc_cookies[1] = cookie; - return 1; - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[0] || - cookie == tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX return error in the future */ - } - - if (!tx->tx_msg.ksm_zc_cookies[0]) { - /* - * NOOP tx has only one ZC-ACK cookie, - * can carry at least one more - */ - if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { - tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; - tx->tx_msg.ksm_zc_cookies[1] = cookie; - } else { - tx->tx_msg.ksm_zc_cookies[0] = cookie; - } - - if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) { - /* - * not likely to carry more ACKs, skip it - * to simplify logic - */ - ksocknal_next_tx_carrier(conn); - } - - return 1; - } - - /* takes two or more cookies already */ - - if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) { - __u64 tmp = 0; - - /* two separated cookies: (a+2, a) or (a+1, a) */ - LASSERT(tx->tx_msg.ksm_zc_cookies[0] - - tx->tx_msg.ksm_zc_cookies[1] <= 2); - - if (tx->tx_msg.ksm_zc_cookies[0] - - tx->tx_msg.ksm_zc_cookies[1] == 2) { - if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) - tmp = cookie; - } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) { - tmp = tx->tx_msg.ksm_zc_cookies[1]; - } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) { - tmp = tx->tx_msg.ksm_zc_cookies[0]; - } - - if (tmp) { - /* range of cookies */ - tx->tx_msg.ksm_zc_cookies[0] = tmp - 1; - tx->tx_msg.ksm_zc_cookies[1] = tmp + 1; - return 1; - } - - } else { - /* - * ksm_zc_cookies[0] < ksm_zc_cookies[1], - * it is range of cookies - */ - if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && - cookie <= tx->tx_msg.ksm_zc_cookies[1]) { - CWARN("%s: duplicated ZC cookie: %llu\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); - return 1; /* XXX: return error in the future */ - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { - tx->tx_msg.ksm_zc_cookies[1] = cookie; - return 1; - } - - if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) { - tx->tx_msg.ksm_zc_cookies[0] = cookie; - return 1; - } - } - - /* failed to piggyback ZC-ACK */ - if (tx_ack) { - list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); - /* the next tx can piggyback at least 1 ACK */ - ksocknal_next_tx_carrier(conn); - } - - return 0; -} - -static int -ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) -{ - int nob; - -#if SOCKNAL_VERSION_DEBUG - if (!*ksocknal_tunables.ksnd_typed_conns) - return SOCKNAL_MATCH_YES; -#endif - - if (!tx || !tx->tx_lnetmsg) { - /* noop packet */ - nob = offsetof(struct ksock_msg, ksm_u); - } else { - nob = tx->tx_lnetmsg->msg_len + - ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? - sizeof(struct lnet_hdr) : sizeof(struct ksock_msg)); - } - - /* default checking for typed connection */ - switch (conn->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", conn->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_BULK_IN: - return SOCKNAL_MATCH_MAY; - - case SOCKLND_CONN_BULK_OUT: - if (nob < *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_CONTROL: - if (nob >= *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - } -} - -static int -ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) -{ - int nob; - - if (!tx || !tx->tx_lnetmsg) - nob = offsetof(struct ksock_msg, ksm_u); - else - nob = tx->tx_lnetmsg->msg_len + sizeof(struct ksock_msg); - - switch (conn->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", conn->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - return SOCKNAL_MATCH_NO; - - case SOCKLND_CONN_ACK: - if (nonblk) - return SOCKNAL_MATCH_YES; - else if (!tx || !tx->tx_lnetmsg) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_NO; - - case SOCKLND_CONN_BULK_OUT: - if (nonblk) - return SOCKNAL_MATCH_NO; - else if (nob < *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - - case SOCKLND_CONN_CONTROL: - if (nonblk) - return SOCKNAL_MATCH_NO; - else if (nob >= *ksocknal_tunables.ksnd_min_bulk) - return SOCKNAL_MATCH_MAY; - else - return SOCKNAL_MATCH_YES; - } -} - -/* (Sink) handle incoming ZC request from sender */ -static int -ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) -{ - struct ksock_peer *peer = c->ksnc_peer; - struct ksock_conn *conn; - struct ksock_tx *tx; - int rc; - - read_lock(&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked(peer, NULL, !!remote); - if (conn) { - struct ksock_sched *sched = conn->ksnc_scheduler; - - LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); - - spin_lock_bh(&sched->kss_lock); - - rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie); - - spin_unlock_bh(&sched->kss_lock); - - if (rc) { /* piggybacked */ - read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - /* ACK connection is not ready, or can't piggyback the ACK */ - tx = ksocknal_alloc_tx_noop(cookie, !!remote); - if (!tx) - return -ENOMEM; - - rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id); - if (!rc) - return 0; - - ksocknal_free_tx(tx); - return rc; -} - -/* (Sender) handle ZC_ACK from sink */ -static int -ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) -{ - struct ksock_peer *peer = conn->ksnc_peer; - struct ksock_tx *tx; - struct ksock_tx *temp; - struct ksock_tx *tmp; - LIST_HEAD(zlist); - int count; - - if (!cookie1) - cookie1 = cookie2; - - count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); - - if (cookie2 == SOCKNAL_KEEPALIVE_PING && - conn->ksnc_proto == &ksocknal_protocol_v3x) { - /* keepalive PING for V3.x, just ignore it */ - return count == 1 ? 0 : -EPROTO; - } - - spin_lock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, - tx_zc_list) { - __u64 c = tx->tx_msg.ksm_zc_cookies[0]; - - if (c == cookie1 || c == cookie2 || - (cookie1 < c && c < cookie2)) { - tx->tx_msg.ksm_zc_cookies[0] = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - - if (!--count) - break; - } - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } - - return !count ? 0 : -EPROTO; -} - -static int -ksocknal_send_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello) -{ - struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; - struct lnet_magicversion *hmv; - int rc; - int i; - - BUILD_BUG_ON(sizeof(struct lnet_magicversion) != offsetof(struct lnet_hdr, src_nid)); - - hdr = kzalloc(sizeof(*hdr), GFP_NOFS); - if (!hdr) { - CERROR("Can't allocate struct lnet_hdr\n"); - return -ENOMEM; - } - - hmv = (struct lnet_magicversion *)&hdr->dest_nid; - - /* - * Re-organize V2.x message header to V1.x (struct lnet_hdr) - * header and send out - */ - hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR); - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - LNET_LOCK(); - if (the_lnet.ln_testprotocompat & 1) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if (the_lnet.ln_testprotocompat & 2) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - hdr->src_nid = cpu_to_le64(hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32(hello->kshm_src_pid); - hdr->type = cpu_to_le32(LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation); - - rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - goto out; - } - - if (!hello->kshm_nips) - goto out; - - for (i = 0; i < (int)hello->kshm_nips; i++) - hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]); - - rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", - rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } -out: - kfree(hdr); - - return rc; -} - -static int -ksocknal_send_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = conn->ksnc_proto->pro_version; - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - LNET_LOCK(); - if (the_lnet.ln_testprotocompat & 1) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } - - rc = lnet_sock_write(sock, hello, offsetof(struct ksock_hello_msg, kshm_ips), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", - rc, &conn->ksnc_ipaddr, conn->ksnc_port); - return rc; - } - - if (!hello->kshm_nips) - return 0; - - rc = lnet_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc) { - CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", - rc, hello->kshm_nips, - &conn->ksnc_ipaddr, conn->ksnc_port); - } - - return rc; -} - -static int -ksocknal_recv_hello_v1(struct ksock_conn *conn, struct ksock_hello_msg *hello, - int timeout) -{ - struct socket *sock = conn->ksnc_sock; - struct lnet_hdr *hdr; - int rc; - int i; - - hdr = kzalloc(sizeof(*hdr), GFP_NOFS); - if (!hdr) { - CERROR("Can't allocate struct lnet_hdr\n"); - return -ENOMEM; - } - - rc = lnet_sock_read(sock, &hdr->src_nid, - sizeof(*hdr) - offsetof(struct lnet_hdr, src_nid), - timeout); - if (rc) { - CERROR("Error %d reading rest of HELLO hdr from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - goto out; - } - - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) { - CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n", - le32_to_cpu(hdr->type), - &conn->ksnc_ipaddr); - rc = -EPROTO; - goto out; - } - - hello->kshm_src_nid = le64_to_cpu(hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu(hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu(hdr->payload_length) / - sizeof(__u32); - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); - rc = -EPROTO; - goto out; - } - - if (!hello->kshm_nips) - goto out; - - rc = lnet_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - goto out; - } - - for (i = 0; i < (int)hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - - if (!hello->kshm_ips[i]) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - rc = -EPROTO; - break; - } - } -out: - kfree(hdr); - - return rc; -} - -static int -ksocknal_recv_hello_v2(struct ksock_conn *conn, struct ksock_hello_msg *hello, - int timeout) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - int i; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; - - rc = lnet_sock_read(sock, &hello->kshm_src_nid, - offsetof(struct ksock_hello_msg, kshm_ips) - - offsetof(struct ksock_hello_msg, kshm_src_nid), - timeout); - if (rc) { - CERROR("Error %d reading HELLO from %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } - - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %pI4h\n", - hello->kshm_nips, &conn->ksnc_ipaddr); - return -EPROTO; - } - - if (!hello->kshm_nips) - return 0; - - rc = lnet_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc) { - CERROR("Error %d reading IPs from ip %pI4h\n", - rc, &conn->ksnc_ipaddr); - LASSERT(rc < 0 && rc != -EALREADY); - return rc; - } - - for (i = 0; i < (int)hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); - - if (!hello->kshm_ips[i]) { - CERROR("Zero IP[%d] from ip %pI4h\n", - i, &conn->ksnc_ipaddr); - return -EPROTO; - } - } - - return 0; -} - -static void -ksocknal_pack_msg_v1(struct ksock_tx *tx) -{ - /* V1.x has no KSOCK_MSG_NOOP */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg); - - tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct lnet_hdr); - - tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr); - tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(struct lnet_hdr); -} - -static void -ksocknal_pack_msg_v2(struct ksock_tx *tx) -{ - tx->tx_iov[0].iov_base = &tx->tx_msg; - - if (tx->tx_lnetmsg) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(struct ksock_msg); - tx->tx_nob = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len; - tx->tx_resid = sizeof(struct ksock_msg) + tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx->tx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_nob = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = offsetof(struct ksock_msg, ksm_u.lnetmsg.ksnm_hdr); - } - /* - * Don't checksum before start sending, because packet can be - * piggybacked with ACK - */ -} - -static void -ksocknal_unpack_msg_v1(struct ksock_msg *msg) -{ - msg->ksm_csum = 0; - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_zc_cookies[0] = 0; - msg->ksm_zc_cookies[1] = 0; -} - -static void -ksocknal_unpack_msg_v2(struct ksock_msg *msg) -{ - return; /* Do nothing */ -} - -struct ksock_proto ksocknal_protocol_v1x = { - .pro_version = KSOCK_PROTO_V1, - .pro_send_hello = ksocknal_send_hello_v1, - .pro_recv_hello = ksocknal_recv_hello_v1, - .pro_pack = ksocknal_pack_msg_v1, - .pro_unpack = ksocknal_unpack_msg_v1, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1, - .pro_handle_zcreq = NULL, - .pro_handle_zcack = NULL, - .pro_queue_tx_zcack = NULL, - .pro_match_tx = ksocknal_match_tx -}; - -struct ksock_proto ksocknal_protocol_v2x = { - .pro_version = KSOCK_PROTO_V2, - .pro_send_hello = ksocknal_send_hello_v2, - .pro_recv_hello = ksocknal_recv_hello_v2, - .pro_pack = ksocknal_pack_msg_v2, - .pro_unpack = ksocknal_unpack_msg_v2, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, - .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2, - .pro_handle_zcreq = ksocknal_handle_zcreq, - .pro_handle_zcack = ksocknal_handle_zcack, - .pro_match_tx = ksocknal_match_tx -}; - -struct ksock_proto ksocknal_protocol_v3x = { - .pro_version = KSOCK_PROTO_V3, - .pro_send_hello = ksocknal_send_hello_v2, - .pro_recv_hello = ksocknal_recv_hello_v2, - .pro_pack = ksocknal_pack_msg_v2, - .pro_unpack = ksocknal_unpack_msg_v2, - .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, - .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3, - .pro_handle_zcreq = ksocknal_handle_zcreq, - .pro_handle_zcack = ksocknal_handle_zcack, - .pro_match_tx = ksocknal_match_tx_v3 -}; diff --git a/drivers/staging/lustre/lnet/libcfs/Makefile b/drivers/staging/lustre/lnet/libcfs/Makefile deleted file mode 100644 index 6a1b232da495..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += libcfs.o - -libcfs-obj-y += linux-tracefile.o linux-debug.o -libcfs-obj-y += linux-crypto.o -libcfs-obj-y += linux-crypto-adler.o - -libcfs-obj-y += debug.o fail.o module.o tracefile.o -libcfs-obj-y += libcfs_string.o hash.o -libcfs-obj-$(CONFIG_SMP) += libcfs_cpu.o -libcfs-obj-y += libcfs_mem.o libcfs_lock.o - -libcfs-objs := $(libcfs-obj-y) diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c deleted file mode 100644 index 06f694f6a28f..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/debug.c +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/debug.c - * - * Author: Phil Schwan - * - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include -#include "tracefile.h" - -static char debug_file_name[1024]; - -unsigned int libcfs_subsystem_debug = ~0; -EXPORT_SYMBOL(libcfs_subsystem_debug); -module_param(libcfs_subsystem_debug, int, 0644); -MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); - -unsigned int libcfs_debug = (D_CANTMASK | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL); -EXPORT_SYMBOL(libcfs_debug); -module_param(libcfs_debug, int, 0644); -MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); - -static int libcfs_param_debug_mb_set(const char *val, - const struct kernel_param *kp) -{ - int rc; - unsigned int num; - - rc = kstrtouint(val, 0, &num); - if (rc < 0) - return rc; - - if (!*((unsigned int *)kp->arg)) { - *((unsigned int *)kp->arg) = num; - return 0; - } - - rc = cfs_trace_set_debug_mb(num); - - if (!rc) - *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); - - return rc; -} - -/* While debug_mb setting look like unsigned int, in fact - * it needs quite a bunch of extra processing, so we define special - * debugmb parameter type with corresponding methods to handle this case - */ -static const struct kernel_param_ops param_ops_debugmb = { - .set = libcfs_param_debug_mb_set, - .get = param_get_uint, -}; - -#define param_check_debugmb(name, p) \ - __param_check(name, p, unsigned int) - -static unsigned int libcfs_debug_mb; -module_param(libcfs_debug_mb, debugmb, 0644); -MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size."); - -unsigned int libcfs_printk = D_CANTMASK; -module_param(libcfs_printk, uint, 0644); -MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask"); - -unsigned int libcfs_console_ratelimit = 1; -module_param(libcfs_console_ratelimit, uint, 0644); -MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)"); - -static int param_set_delay_minmax(const char *val, - const struct kernel_param *kp, - long min, long max) -{ - long d; - int sec; - int rc; - - rc = kstrtoint(val, 0, &sec); - if (rc) - return -EINVAL; - - d = sec * HZ / 100; - if (d < min || d > max) - return -EINVAL; - - *((unsigned int *)kp->arg) = d; - - return 0; -} - -static int param_get_delay(char *buffer, const struct kernel_param *kp) -{ - unsigned int d = *(unsigned int *)kp->arg; - - return sprintf(buffer, "%u", (unsigned int)(d * 100) / HZ); -} - -unsigned int libcfs_console_max_delay; -unsigned int libcfs_console_min_delay; - -static int param_set_console_max_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - libcfs_console_min_delay, INT_MAX); -} - -static const struct kernel_param_ops param_ops_console_max_delay = { - .set = param_set_console_max_delay, - .get = param_get_delay, -}; - -#define param_check_console_max_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_max_delay, console_max_delay, 0644); -MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)"); - -static int param_set_console_min_delay(const char *val, - const struct kernel_param *kp) -{ - return param_set_delay_minmax(val, kp, - 1, libcfs_console_max_delay); -} - -static const struct kernel_param_ops param_ops_console_min_delay = { - .set = param_set_console_min_delay, - .get = param_get_delay, -}; - -#define param_check_console_min_delay(name, p) \ - __param_check(name, p, unsigned int) - -module_param(libcfs_console_min_delay, console_min_delay, 0644); -MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)"); - -static int param_set_uint_minmax(const char *val, - const struct kernel_param *kp, - unsigned int min, unsigned int max) -{ - unsigned int num; - int ret; - - if (!val) - return -EINVAL; - ret = kstrtouint(val, 0, &num); - if (ret < 0 || num < min || num > max) - return -EINVAL; - *((unsigned int *)kp->arg) = num; - return 0; -} - -static int param_set_uintpos(const char *val, const struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, 1, -1); -} - -static const struct kernel_param_ops param_ops_uintpos = { - .set = param_set_uintpos, - .get = param_get_uint, -}; - -#define param_check_uintpos(name, p) \ - __param_check(name, p, unsigned int) - -unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; -module_param(libcfs_console_backoff, uintpos, 0644); -MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor"); - -unsigned int libcfs_debug_binary = 1; - -unsigned int libcfs_stack = 3 * THREAD_SIZE / 4; -EXPORT_SYMBOL(libcfs_stack); - -unsigned int libcfs_catastrophe; -EXPORT_SYMBOL(libcfs_catastrophe); - -unsigned int libcfs_panic_on_lbug = 1; -module_param(libcfs_panic_on_lbug, uint, 0644); -MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); - -static wait_queue_head_t debug_ctlwq; - -char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; - -/* We need to pass a pointer here, but elsewhere this must be a const */ -static char *libcfs_debug_file_path; -module_param(libcfs_debug_file_path, charp, 0644); -MODULE_PARM_DESC(libcfs_debug_file_path, - "Path for dumping debug logs, set 'NONE' to prevent log dumping"); - -int libcfs_panic_in_progress; - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_subsys2str(int subsys) -{ - static const char * const libcfs_debug_subsystems[] = - LIBCFS_DEBUG_SUBSYS_NAMES; - - if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems)) - return NULL; - - return libcfs_debug_subsystems[subsys]; -} - -/* libcfs_debug_token2mask() expects the returned string in lower-case */ -static const char * -libcfs_debug_dbg2str(int debug) -{ - static const char * const libcfs_debug_masks[] = - LIBCFS_DEBUG_MASKS_NAMES; - - if (debug >= ARRAY_SIZE(libcfs_debug_masks)) - return NULL; - - return libcfs_debug_masks[debug]; -} - -int -libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int len = 0; - const char *token; - int i; - - if (!mask) { /* "0" */ - if (size > 0) - str[0] = '0'; - len = 1; - } else { /* space-separated tokens */ - for (i = 0; i < 32; i++) { - if (!(mask & (1 << i))) - continue; - - token = fn(i); - if (!token) /* unused bit */ - continue; - - if (len > 0) { /* separator? */ - if (len < size) - str[len] = ' '; - len++; - } - - while (*token) { - if (len < size) - str[len] = *token; - token++; - len++; - } - } - } - - /* terminate 'str' */ - if (len < size) - str[len] = 0; - else - str[size - 1] = 0; - - return len; -} - -int -libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int m = 0; - int matched; - int n; - int t; - - /* Allow a number for backwards compatibility */ - - for (n = strlen(str); n > 0; n--) - if (!isspace(str[n - 1])) - break; - matched = n; - t = sscanf(str, "%i%n", &m, &matched); - if (t >= 1 && matched == n) { - /* don't print warning for lctl set_param debug=0 or -1 */ - if (m && m != -1) - CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); - *mask = m; - return 0; - } - - return cfs_str2mask(str, fn, mask, is_subsys ? 0 : D_CANTMASK, - 0xffffffff); -} - -/** - * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() - */ -void libcfs_debug_dumplog_internal(void *arg) -{ - static time64_t last_dump_time; - time64_t current_time; - void *journal_info; - - journal_info = current->journal_info; - current->journal_info = NULL; - current_time = ktime_get_real_seconds(); - - if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) && - current_time > last_dump_time) { - last_dump_time = current_time; - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%lld.%ld", libcfs_debug_file_path_arr, - (s64)current_time, (long)arg); - pr_alert("LustreError: dumping log to %s\n", debug_file_name); - cfs_tracefile_dump_all_pages(debug_file_name); - libcfs_run_debug_log_upcall(debug_file_name); - } - - current->journal_info = journal_info; -} - -static int libcfs_debug_dumplog_thread(void *arg) -{ - libcfs_debug_dumplog_internal(arg); - wake_up(&debug_ctlwq); - return 0; -} - -void libcfs_debug_dumplog(void) -{ - wait_queue_entry_t wait; - struct task_struct *dumper; - - /* we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() - */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&debug_ctlwq, &wait); - - dumper = kthread_run(libcfs_debug_dumplog_thread, - (void *)(long)current->pid, - "libcfs_debug_dumper"); - set_current_state(TASK_INTERRUPTIBLE); - if (IS_ERR(dumper)) - pr_err("LustreError: cannot start log dump thread: %ld\n", - PTR_ERR(dumper)); - else - schedule(); - - /* be sure to teardown if cfs_create_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); -} -EXPORT_SYMBOL(libcfs_debug_dumplog); - -int libcfs_debug_init(unsigned long bufsize) -{ - unsigned int max = libcfs_debug_mb; - int rc = 0; - - init_waitqueue_head(&debug_ctlwq); - - if (libcfs_console_max_delay <= 0 || /* not set by user or */ - libcfs_console_min_delay <= 0 || /* set to invalid values */ - libcfs_console_min_delay >= libcfs_console_max_delay) { - libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; - libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; - } - - if (libcfs_debug_file_path) { - strlcpy(libcfs_debug_file_path_arr, - libcfs_debug_file_path, - sizeof(libcfs_debug_file_path_arr)); - } - - /* If libcfs_debug_mb is set to an invalid value or uninitialized - * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES - */ - if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { - max = TCD_MAX_PAGES; - } else { - max = max / num_possible_cpus(); - max <<= (20 - PAGE_SHIFT); - } - - rc = cfs_tracefile_init(max); - if (!rc) { - libcfs_register_panic_notifier(); - libcfs_debug_mb = cfs_trace_get_debug_mb(); - } - - return rc; -} - -int libcfs_debug_cleanup(void) -{ - libcfs_unregister_panic_notifier(); - cfs_tracefile_exit(); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - cfs_trace_flush_pages(); - return 0; -} - -/* Debug markers, although printed by S_LNET should not be marked as such. */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int libcfs_debug_mark_buffer(const char *text) -{ - CDEBUG(D_TRACE, - "***************************************************\n"); - LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE, - "***************************************************\n"); - - return 0; -} - -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_LNET diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c deleted file mode 100644 index bd86b3b5bc34..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/fail.c +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Oracle Corporation, Inc. - */ - -#include -#include -#include -#include -#include - -unsigned long cfs_fail_loc; -EXPORT_SYMBOL(cfs_fail_loc); - -unsigned int cfs_fail_val; -EXPORT_SYMBOL(cfs_fail_val); - -int cfs_fail_err; -EXPORT_SYMBOL(cfs_fail_err); - -DECLARE_WAIT_QUEUE_HEAD(cfs_race_waitq); -EXPORT_SYMBOL(cfs_race_waitq); - -int cfs_race_state; -EXPORT_SYMBOL(cfs_race_state); - -int __cfs_fail_check_set(u32 id, u32 value, int set) -{ - static atomic_t cfs_fail_count = ATOMIC_INIT(0); - - LASSERT(!(id & CFS_FAIL_ONCE)); - - if ((cfs_fail_loc & (CFS_FAILED | CFS_FAIL_ONCE)) == - (CFS_FAILED | CFS_FAIL_ONCE)) { - atomic_set(&cfs_fail_count, 0); /* paranoia */ - return 0; - } - - /* Fail 1/cfs_fail_val times */ - if (cfs_fail_loc & CFS_FAIL_RAND) { - if (cfs_fail_val < 2 || prandom_u32_max(cfs_fail_val) > 0) - return 0; - } - - /* Skip the first cfs_fail_val, then fail */ - if (cfs_fail_loc & CFS_FAIL_SKIP) { - if (atomic_inc_return(&cfs_fail_count) <= cfs_fail_val) - return 0; - } - - /* check cfs_fail_val... */ - if (set == CFS_FAIL_LOC_VALUE) { - if (cfs_fail_val != -1 && cfs_fail_val != value) - return 0; - } - - /* Fail cfs_fail_val times, overridden by FAIL_ONCE */ - if (cfs_fail_loc & CFS_FAIL_SOME && - (!(cfs_fail_loc & CFS_FAIL_ONCE) || cfs_fail_val <= 1)) { - int count = atomic_inc_return(&cfs_fail_count); - - if (count >= cfs_fail_val) { - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - atomic_set(&cfs_fail_count, 0); - /* we are lost race to increase */ - if (count > cfs_fail_val) - return 0; - } - } - - /* Take into account the current call for FAIL_ONCE for ORSET only, - * as RESET is a new fail_loc, it does not change the current call - */ - if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE)) - set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc); - /* Lost race to set CFS_FAILED_BIT. */ - if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) { - /* If CFS_FAIL_ONCE is valid, only one process can fail, - * otherwise multi-process can fail at the same time. - */ - if (cfs_fail_loc & CFS_FAIL_ONCE) - return 0; - } - - switch (set) { - case CFS_FAIL_LOC_NOSET: - case CFS_FAIL_LOC_VALUE: - break; - case CFS_FAIL_LOC_ORSET: - cfs_fail_loc |= value & ~(CFS_FAILED | CFS_FAIL_ONCE); - break; - case CFS_FAIL_LOC_RESET: - cfs_fail_loc = value; - atomic_set(&cfs_fail_count, 0); - break; - default: - LASSERTF(0, "called with bad set %u\n", set); - break; - } - - return 1; -} -EXPORT_SYMBOL(__cfs_fail_check_set); - -int __cfs_fail_timeout_set(u32 id, u32 value, int ms, int set) -{ - int ret; - - ret = __cfs_fail_check_set(id, value, set); - if (ret && likely(ms > 0)) { - CERROR("cfs_fail_timeout id %x sleeping for %dms\n", - id, ms); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ms * HZ / 1000); - CERROR("cfs_fail_timeout id %x awake\n", id); - } - return ret; -} -EXPORT_SYMBOL(__cfs_fail_timeout_set); diff --git a/drivers/staging/lustre/lnet/libcfs/hash.c b/drivers/staging/lustre/lnet/libcfs/hash.c deleted file mode 100644 index 48be66f0d654..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/hash.c +++ /dev/null @@ -1,2065 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/hash.c - * - * Implement a hash class for hash process in lustre system. - * - * Author: YuZhangyong - * - * 2008-08-15: Brian Behlendorf - * - Simplified API and improved documentation - * - Added per-hash feature flags: - * * CFS_HASH_DEBUG additional validation - * * CFS_HASH_REHASH dynamic rehashing - * - Added per-hash statistics - * - General performance enhancements - * - * 2009-07-31: Liang Zhen - * - move all stuff to libcfs - * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH - * - ignore hs_rwlock if without CFS_HASH_REHASH setting - * - buckets are allocated one by one(instead of contiguous memory), - * to avoid unnecessary cacheline conflict - * - * 2010-03-01: Liang Zhen - * - "bucket" is a group of hlist_head now, user can specify bucket size - * by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share - * one lock for reducing memory overhead. - * - * - support lockless hash, caller will take care of locks: - * avoid lock overhead for hash tables that are already protected - * by locking in the caller for another reason - * - * - support both spin_lock/rwlock for bucket: - * overhead of spinlock contention is lower than read/write - * contention of rwlock, so using spinlock to serialize operations on - * bucket is more reasonable for those frequently changed hash tables - * - * - support one-single lock mode: - * one lock to protect all hash operations to avoid overhead of - * multiple locks if hash table is always small - * - * - removed a lot of unnecessary addref & decref on hash element: - * addref & decref are atomic operations in many use-cases which - * are expensive. - * - * - support non-blocking cfs_hash_add() and cfs_hash_findadd(): - * some lustre use-cases require these functions to be strictly - * non-blocking, we need to schedule required rehash on a different - * thread on those cases. - * - * - safer rehash on large hash table - * In old implementation, rehash function will exclusively lock the - * hash table and finish rehash in one batch, it's dangerous on SMP - * system because rehash millions of elements could take long time. - * New implemented rehash can release lock and relax CPU in middle - * of rehash, it's safe for another thread to search/change on the - * hash table even it's in rehasing. - * - * - support two different refcount modes - * . hash table has refcount on element - * . hash table doesn't change refcount on adding/removing element - * - * - support long name hash table (for param-tree) - * - * - fix a bug for cfs_hash_rehash_key: - * in old implementation, cfs_hash_rehash_key could screw up the - * hash-table because @key is overwritten without any protection. - * Now we need user to define hs_keycpy for those rehash enabled - * hash tables, cfs_hash_rehash_key will overwrite hash-key - * inside lock by calling hs_keycpy. - * - * - better hash iteration: - * Now we support both locked iteration & lockless iteration of hash - * table. Also, user can break the iteration by return 1 in callback. - */ -#include -#include -#include -#include -#include - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static unsigned int warn_on_depth = 8; -module_param(warn_on_depth, uint, 0644); -MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high."); -#endif - -struct workqueue_struct *cfs_rehash_wq; - -static inline void -cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {} - -static inline void -cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->spin) -{ - spin_lock(&lock->spin); -} - -static inline void -cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->spin) -{ - spin_unlock(&lock->spin); -} - -static inline void -cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive) - __acquires(&lock->rw) -{ - if (!exclusive) - read_lock(&lock->rw); - else - write_lock(&lock->rw); -} - -static inline void -cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive) - __releases(&lock->rw) -{ - if (!exclusive) - read_unlock(&lock->rw); - else - write_unlock(&lock->rw); -} - -/** No lock hash */ -static struct cfs_hash_lock_ops cfs_hash_nl_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** no bucket lock, one spinlock to protect everything */ -static struct cfs_hash_lock_ops cfs_hash_nbl_lops = { - .hs_lock = cfs_hash_spin_lock, - .hs_unlock = cfs_hash_spin_unlock, - .hs_bkt_lock = cfs_hash_nl_lock, - .hs_bkt_unlock = cfs_hash_nl_unlock, -}; - -/** spin bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_spin_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is enabled */ -static struct cfs_hash_lock_ops cfs_hash_bkt_rw_lops = { - .hs_lock = cfs_hash_rw_lock, - .hs_unlock = cfs_hash_rw_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -/** spin bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_spin_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_spin_lock, - .hs_bkt_unlock = cfs_hash_spin_unlock, -}; - -/** rw bucket lock, rehash is disabled */ -static struct cfs_hash_lock_ops cfs_hash_nr_bkt_rw_lops = { - .hs_lock = cfs_hash_nl_lock, - .hs_unlock = cfs_hash_nl_unlock, - .hs_bkt_lock = cfs_hash_rw_lock, - .hs_bkt_unlock = cfs_hash_rw_unlock, -}; - -static void -cfs_hash_lock_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs)) { - hs->hs_lops = &cfs_hash_nl_lops; - - } else if (cfs_hash_with_no_bktlock(hs)) { - hs->hs_lops = &cfs_hash_nbl_lops; - spin_lock_init(&hs->hs_lock.spin); - - } else if (cfs_hash_with_rehash(hs)) { - rwlock_init(&hs->hs_lock.rw); - - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_bkt_spin_lops; - else - LBUG(); - } else { - if (cfs_hash_with_rw_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_rw_lops; - else if (cfs_hash_with_spin_bktlock(hs)) - hs->hs_lops = &cfs_hash_nr_bkt_spin_lops; - else - LBUG(); - } -} - -/** - * Simple hash head without depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head { - struct hlist_head hh_head; /**< entries list */ -}; - -static int -cfs_hash_hh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head); -} - -static struct hlist_head * -cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head *head; - - head = (struct cfs_hash_head *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hh_head; -} - -static int -cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd)); - return -1; /* unknown depth */ -} - -static int -cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hlist_del_init(hnode); - return -1; /* unknown depth */ -} - -/** - * Simple hash head with depth tracking - * new element is always added to head of hlist - */ -struct cfs_hash_head_dep { - struct hlist_head hd_head; /**< entries list */ - unsigned int hd_depth; /**< list length */ -}; - -static int -cfs_hash_hd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_head_dep); -} - -static struct hlist_head * -cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_head_dep *head; - - head = (struct cfs_hash_head_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].hd_head; -} - -static int -cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_add_head(hnode, &hh->hd_head); - return ++hh->hd_depth; -} - -static int -cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_head_dep *hh; - - hh = container_of(cfs_hash_hd_hhead(hs, bd), - struct cfs_hash_head_dep, hd_head); - hlist_del_init(hnode); - return --hh->hd_depth; -} - -/** - * double links hash head without depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead { - struct hlist_head dh_head; /**< entries list */ - struct hlist_node *dh_tail; /**< the last entry */ -}; - -static int -cfs_hash_dh_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead); -} - -static struct hlist_head * -cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead *head; - - head = (struct cfs_hash_dhead *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dh_head; -} - -static int -cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (dh->dh_tail) /* not empty */ - hlist_add_behind(hnode, dh->dh_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dh_head); - dh->dh_tail = hnode; - return -1; /* unknown depth */ -} - -static int -cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead *dh; - - dh = container_of(cfs_hash_dh_hhead(hs, bd), - struct cfs_hash_dhead, dh_head); - if (!hnd->next) { /* it's the tail */ - dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return -1; /* unknown depth */ -} - -/** - * double links hash head with depth tracking - * new element is always added to tail of hlist - */ -struct cfs_hash_dhead_dep { - struct hlist_head dd_head; /**< entries list */ - struct hlist_node *dd_tail; /**< the last entry */ - unsigned int dd_depth; /**< list length */ -}; - -static int -cfs_hash_dd_hhead_size(struct cfs_hash *hs) -{ - return sizeof(struct cfs_hash_dhead_dep); -} - -static struct hlist_head * -cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd) -{ - struct cfs_hash_dhead_dep *head; - - head = (struct cfs_hash_dhead_dep *)&bd->bd_bucket->hsb_head[0]; - return &head[bd->bd_offset].dd_head; -} - -static int -cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (dh->dd_tail) /* not empty */ - hlist_add_behind(hnode, dh->dd_tail); - else /* empty list */ - hlist_add_head(hnode, &dh->dd_head); - dh->dd_tail = hnode; - return ++dh->dd_depth; -} - -static int -cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnd) -{ - struct cfs_hash_dhead_dep *dh; - - dh = container_of(cfs_hash_dd_hhead(hs, bd), - struct cfs_hash_dhead_dep, dd_head); - if (!hnd->next) { /* it's the tail */ - dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL : - container_of(hnd->pprev, struct hlist_node, next); - } - hlist_del_init(hnd); - return --dh->dd_depth; -} - -static struct cfs_hash_hlist_ops cfs_hash_hh_hops = { - .hop_hhead = cfs_hash_hh_hhead, - .hop_hhead_size = cfs_hash_hh_hhead_size, - .hop_hnode_add = cfs_hash_hh_hnode_add, - .hop_hnode_del = cfs_hash_hh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_hd_hops = { - .hop_hhead = cfs_hash_hd_hhead, - .hop_hhead_size = cfs_hash_hd_hhead_size, - .hop_hnode_add = cfs_hash_hd_hnode_add, - .hop_hnode_del = cfs_hash_hd_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dh_hops = { - .hop_hhead = cfs_hash_dh_hhead, - .hop_hhead_size = cfs_hash_dh_hhead_size, - .hop_hnode_add = cfs_hash_dh_hnode_add, - .hop_hnode_del = cfs_hash_dh_hnode_del, -}; - -static struct cfs_hash_hlist_ops cfs_hash_dd_hops = { - .hop_hhead = cfs_hash_dd_hhead, - .hop_hhead_size = cfs_hash_dd_hhead_size, - .hop_hnode_add = cfs_hash_dd_hnode_add, - .hop_hnode_del = cfs_hash_dd_hnode_del, -}; - -static void -cfs_hash_hlist_setup(struct cfs_hash *hs) -{ - if (cfs_hash_with_add_tail(hs)) { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_dd_hops : &cfs_hash_dh_hops; - } else { - hs->hs_hops = cfs_hash_with_depth(hs) ? - &cfs_hash_hd_hops : &cfs_hash_hh_hops; - } -} - -static void -cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts, - unsigned int bits, const void *key, struct cfs_hash_bd *bd) -{ - unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1); - - LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits); - - bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)]; - bd->bd_offset = index >> (bits - hs->hs_bkt_bits); -} - -void -cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (likely(!hs->hs_rehash_buckets)) { - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, bd); - } else { - LASSERT(hs->hs_rehash_bits); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, bd); - } -} -EXPORT_SYMBOL(cfs_hash_bd_get); - -static inline void -cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur) -{ - if (likely(dep_cur <= bd->bd_bucket->hsb_depmax)) - return; - - bd->bd_bucket->hsb_depmax = dep_cur; -# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 - if (likely(!warn_on_depth || - max(warn_on_depth, hs->hs_dep_max) >= dep_cur)) - return; - - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_max = dep_cur; - hs->hs_dep_bkt = bd->bd_bucket->hsb_index; - hs->hs_dep_off = bd->bd_offset; - hs->hs_dep_bits = hs->hs_cur_bits; - spin_unlock(&hs->hs_dep_lock); - - queue_work(cfs_rehash_wq, &hs->hs_dep_work); -# endif -} - -void -cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - int rc; - - rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode); - cfs_hash_bd_dep_record(hs, bd, rc); - bd->bd_bucket->hsb_version++; - if (unlikely(!bd->bd_bucket->hsb_version)) - bd->bd_bucket->hsb_version++; - bd->bd_bucket->hsb_count++; - - if (cfs_hash_with_counter(hs)) - atomic_inc(&hs->hs_count); - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_get(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_add_locked); - -void -cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode) -{ - hs->hs_hops->hop_hnode_del(hs, bd, hnode); - - LASSERT(bd->bd_bucket->hsb_count > 0); - bd->bd_bucket->hsb_count--; - bd->bd_bucket->hsb_version++; - if (unlikely(!bd->bd_bucket->hsb_version)) - bd->bd_bucket->hsb_version++; - - if (cfs_hash_with_counter(hs)) { - LASSERT(atomic_read(&hs->hs_count) > 0); - atomic_dec(&hs->hs_count); - } - if (!cfs_hash_with_no_itemref(hs)) - cfs_hash_put_locked(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_bd_del_locked); - -void -cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old, - struct cfs_hash_bd *bd_new, struct hlist_node *hnode) -{ - struct cfs_hash_bucket *obkt = bd_old->bd_bucket; - struct cfs_hash_bucket *nbkt = bd_new->bd_bucket; - int rc; - - if (!cfs_hash_bd_compare(bd_old, bd_new)) - return; - - /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops - * in cfs_hash_bd_del/add_locked - */ - hs->hs_hops->hop_hnode_del(hs, bd_old, hnode); - rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode); - cfs_hash_bd_dep_record(hs, bd_new, rc); - - LASSERT(obkt->hsb_count > 0); - obkt->hsb_count--; - obkt->hsb_version++; - if (unlikely(!obkt->hsb_version)) - obkt->hsb_version++; - nbkt->hsb_count++; - nbkt->hsb_version++; - if (unlikely(!nbkt->hsb_version)) - nbkt->hsb_version++; -} - -enum { - /** always set, for sanity (avoid ZERO intent) */ - CFS_HS_LOOKUP_MASK_FIND = BIT(0), - /** return entry with a ref */ - CFS_HS_LOOKUP_MASK_REF = BIT(1), - /** add entry if not existing */ - CFS_HS_LOOKUP_MASK_ADD = BIT(2), - /** delete entry, ignore other masks */ - CFS_HS_LOOKUP_MASK_DEL = BIT(3), -}; - -enum cfs_hash_lookup_intent { - /** return item w/o refcount */ - CFS_HS_LOOKUP_IT_PEEK = CFS_HS_LOOKUP_MASK_FIND, - /** return item with refcount */ - CFS_HS_LOOKUP_IT_FIND = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_REF), - /** return item w/o refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_ADD = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** return item with refcount if existed, otherwise add */ - CFS_HS_LOOKUP_IT_FINDADD = (CFS_HS_LOOKUP_IT_FIND | - CFS_HS_LOOKUP_MASK_ADD), - /** delete if existed */ - CFS_HS_LOOKUP_IT_FINDDEL = (CFS_HS_LOOKUP_MASK_FIND | - CFS_HS_LOOKUP_MASK_DEL) -}; - -static struct hlist_node * -cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key, struct hlist_node *hnode, - enum cfs_hash_lookup_intent intent) - -{ - struct hlist_head *hhead = cfs_hash_bd_hhead(hs, bd); - struct hlist_node *ehnode; - struct hlist_node *match; - int intent_add = intent & CFS_HS_LOOKUP_MASK_ADD; - - /* with this function, we can avoid a lot of useless refcount ops, - * which are expensive atomic operations most time. - */ - match = intent_add ? NULL : hnode; - hlist_for_each(ehnode, hhead) { - if (!cfs_hash_keycmp(hs, key, ehnode)) - continue; - - if (match && match != ehnode) /* can't match */ - continue; - - /* match and ... */ - if (intent & CFS_HS_LOOKUP_MASK_DEL) { - cfs_hash_bd_del_locked(hs, bd, ehnode); - return ehnode; - } - - /* caller wants refcount? */ - if (intent & CFS_HS_LOOKUP_MASK_REF) - cfs_hash_get(hs, ehnode); - return ehnode; - } - /* no match item */ - if (!intent_add) - return NULL; - - LASSERT(hnode); - cfs_hash_bd_add_locked(hs, bd, hnode); - return hnode; -} - -struct hlist_node * -cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_FIND); -} -EXPORT_SYMBOL(cfs_hash_bd_lookup_locked); - -struct hlist_node * -cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - const void *key) -{ - return cfs_hash_bd_lookup_intent(hs, bd, key, NULL, - CFS_HS_LOOKUP_IT_PEEK); -} -EXPORT_SYMBOL(cfs_hash_bd_peek_locked); - -static void -cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - /** - * bds must be ascendantly ordered by bd->bd_bucket->hsb_index. - * NB: it's possible that several bds point to the same bucket but - * have different bd::bd_offset, so need take care of deadlock. - */ - cfs_hash_for_each_bd(bds, n, i) { - if (prev == bds[i].bd_bucket) - continue; - - LASSERT(!prev || prev->hsb_index < bds[i].bd_bucket->hsb_index); - cfs_hash_bd_lock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } -} - -static void -cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, int excl) -{ - struct cfs_hash_bucket *prev = NULL; - int i; - - cfs_hash_for_each_bd(bds, n, i) { - if (prev != bds[i].bd_bucket) { - cfs_hash_bd_unlock(hs, &bds[i], excl); - prev = bds[i].bd_bucket; - } - } -} - -static struct hlist_node * -cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key) -{ - struct hlist_node *ehnode; - unsigned int i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL, - CFS_HS_LOOKUP_IT_FIND); - if (ehnode) - return ehnode; - } - return NULL; -} - -static struct hlist_node * -cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - int intent; - unsigned int i; - - LASSERT(hnode); - intent = (!noref * CFS_HS_LOOKUP_MASK_REF) | CFS_HS_LOOKUP_IT_PEEK; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, - NULL, intent); - if (ehnode) - return ehnode; - } - - if (i == 1) { /* only one bucket */ - cfs_hash_bd_add_locked(hs, &bds[0], hnode); - } else { - struct cfs_hash_bd mybd; - - cfs_hash_bd_get(hs, key, &mybd); - cfs_hash_bd_add_locked(hs, &mybd, hnode); - } - - return hnode; -} - -static struct hlist_node * -cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - unsigned int n, const void *key, - struct hlist_node *hnode) -{ - struct hlist_node *ehnode; - unsigned int i; - - cfs_hash_for_each_bd(bds, n, i) { - ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode, - CFS_HS_LOOKUP_IT_FINDDEL); - if (ehnode) - return ehnode; - } - return NULL; -} - -static void -cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2) -{ - int rc; - - if (!bd2->bd_bucket) - return; - - if (!bd1->bd_bucket) { - *bd1 = *bd2; - bd2->bd_bucket = NULL; - return; - } - - rc = cfs_hash_bd_compare(bd1, bd2); - if (!rc) - bd2->bd_bucket = NULL; - else if (rc > 0) - swap(*bd1, *bd2); /* swap bd1 and bd2 */ -} - -void -cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, - struct cfs_hash_bd *bds) -{ - /* NB: caller should hold hs_lock.rw if REHASH is set */ - cfs_hash_bd_from_key(hs, hs->hs_buckets, - hs->hs_cur_bits, key, &bds[0]); - if (likely(!hs->hs_rehash_buckets)) { - /* no rehash or not rehashing */ - bds[1].bd_bucket = NULL; - return; - } - - LASSERT(hs->hs_rehash_bits); - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &bds[1]); - - cfs_hash_bd_order(&bds[0], &bds[1]); -} - -void -cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_lock(hs, bds, 2, excl); -} - -void -cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl) -{ - cfs_hash_multi_bd_unlock(hs, bds, 2, excl); -} - -struct hlist_node * -cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key) -{ - return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key); -} - -struct hlist_node * -cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode, - int noref) -{ - return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key, - hnode, noref); -} - -struct hlist_node * -cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds, - const void *key, struct hlist_node *hnode) -{ - return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode); -} - -static void -cfs_hash_buckets_free(struct cfs_hash_bucket **buckets, - int bkt_size, int prev_size, int size) -{ - int i; - - for (i = prev_size; i < size; i++) - kfree(buckets[i]); - - kvfree(buckets); -} - -/* - * Create or grow bucket memory. Return old_buckets if no allocation was - * needed, the newly allocated buckets if allocation was needed and - * successful, and NULL on error. - */ -static struct cfs_hash_bucket ** -cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts, - unsigned int old_size, unsigned int new_size) -{ - struct cfs_hash_bucket **new_bkts; - int i; - - LASSERT(!old_size || old_bkts); - - if (old_bkts && old_size == new_size) - return old_bkts; - - new_bkts = kvmalloc_array(new_size, sizeof(new_bkts[0]), GFP_KERNEL); - if (!new_bkts) - return NULL; - - if (old_bkts) { - memcpy(new_bkts, old_bkts, - min(old_size, new_size) * sizeof(*old_bkts)); - } - - for (i = old_size; i < new_size; i++) { - struct hlist_head *hhead; - struct cfs_hash_bd bd; - - new_bkts[i] = kzalloc(cfs_hash_bkt_size(hs), GFP_KERNEL); - if (!new_bkts[i]) { - cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs), - old_size, new_size); - return NULL; - } - - new_bkts[i]->hsb_index = i; - new_bkts[i]->hsb_version = 1; /* shouldn't be zero */ - new_bkts[i]->hsb_depmax = -1; /* unknown */ - bd.bd_bucket = new_bkts[i]; - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) - INIT_HLIST_HEAD(hhead); - - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_no_bktlock(hs)) - continue; - - if (cfs_hash_with_rw_bktlock(hs)) - rwlock_init(&new_bkts[i]->hsb_lock.rw); - else if (cfs_hash_with_spin_bktlock(hs)) - spin_lock_init(&new_bkts[i]->hsb_lock.spin); - else - LBUG(); /* invalid use-case */ - } - return new_bkts; -} - -/** - * Initialize new libcfs hash, where: - * @name - Descriptive hash name - * @cur_bits - Initial hash table size, in bits - * @max_bits - Maximum allowed hash table resize, in bits - * @ops - Registered hash table operations - * @flags - CFS_HASH_REHASH enable synamic hash resizing - * - CFS_HASH_SORT enable chained hash sort - */ -static void cfs_hash_rehash_worker(struct work_struct *work); - -#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 -static void cfs_hash_dep_print(struct work_struct *work) -{ - struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_dep_work); - int dep; - int bkt; - int off; - int bits; - - spin_lock(&hs->hs_dep_lock); - dep = hs->hs_dep_max; - bkt = hs->hs_dep_bkt; - off = hs->hs_dep_off; - bits = hs->hs_dep_bits; - spin_unlock(&hs->hs_dep_lock); - - LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n", - hs->hs_name, bits, dep, bkt, off); - spin_lock(&hs->hs_dep_lock); - hs->hs_dep_bits = 0; /* mark as workitem done */ - spin_unlock(&hs->hs_dep_lock); - return 0; -} - -static void cfs_hash_depth_wi_init(struct cfs_hash *hs) -{ - spin_lock_init(&hs->hs_dep_lock); - INIT_WORK(&hs->hs_dep_work, cfs_hash_dep_print); -} - -static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) -{ - cancel_work_sync(&hs->hs_dep_work); -} - -#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */ - -static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {} -static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {} - -#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */ - -struct cfs_hash * -cfs_hash_create(char *name, unsigned int cur_bits, unsigned int max_bits, - unsigned int bkt_bits, unsigned int extra_bytes, - unsigned int min_theta, unsigned int max_theta, - struct cfs_hash_ops *ops, unsigned int flags) -{ - struct cfs_hash *hs; - int len; - - BUILD_BUG_ON(CFS_HASH_THETA_BITS >= 15); - - LASSERT(name); - LASSERT(ops->hs_key); - LASSERT(ops->hs_hash); - LASSERT(ops->hs_object); - LASSERT(ops->hs_keycmp); - LASSERT(ops->hs_get); - LASSERT(ops->hs_put || ops->hs_put_locked); - - if (flags & CFS_HASH_REHASH) - flags |= CFS_HASH_COUNTER; /* must have counter */ - - LASSERT(cur_bits > 0); - LASSERT(cur_bits >= bkt_bits); - LASSERT(max_bits >= cur_bits && max_bits < 31); - LASSERT(ergo(!(flags & CFS_HASH_REHASH), cur_bits == max_bits)); - LASSERT(ergo(flags & CFS_HASH_REHASH, !(flags & CFS_HASH_NO_LOCK))); - LASSERT(ergo(flags & CFS_HASH_REHASH_KEY, ops->hs_keycpy)); - - len = !(flags & CFS_HASH_BIGNAME) ? - CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN; - hs = kzalloc(offsetof(struct cfs_hash, hs_name[len]), GFP_KERNEL); - if (!hs) - return NULL; - - strlcpy(hs->hs_name, name, len); - hs->hs_flags = flags; - - atomic_set(&hs->hs_refcount, 1); - atomic_set(&hs->hs_count, 0); - - cfs_hash_lock_setup(hs); - cfs_hash_hlist_setup(hs); - - hs->hs_cur_bits = (u8)cur_bits; - hs->hs_min_bits = (u8)cur_bits; - hs->hs_max_bits = (u8)max_bits; - hs->hs_bkt_bits = (u8)bkt_bits; - - hs->hs_ops = ops; - hs->hs_extra_bytes = extra_bytes; - hs->hs_rehash_bits = 0; - INIT_WORK(&hs->hs_rehash_work, cfs_hash_rehash_worker); - cfs_hash_depth_wi_init(hs); - - if (cfs_hash_with_rehash(hs)) - __cfs_hash_set_theta(hs, min_theta, max_theta); - - hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0, - CFS_HASH_NBKT(hs)); - if (hs->hs_buckets) - return hs; - - kfree(hs); - return NULL; -} -EXPORT_SYMBOL(cfs_hash_create); - -/** - * Cleanup libcfs hash @hs. - */ -static void -cfs_hash_destroy(struct cfs_hash *hs) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - int i; - - LASSERT(hs); - LASSERT(!cfs_hash_is_exiting(hs) && - !cfs_hash_is_iterating(hs)); - - /** - * prohibit further rehashes, don't need any lock because - * I'm the only (last) one can change it. - */ - hs->hs_exiting = 1; - if (cfs_hash_with_rehash(hs)) - cfs_hash_rehash_cancel(hs); - - cfs_hash_depth_wi_cancel(hs); - /* rehash should be done/canceled */ - LASSERT(hs->hs_buckets && !hs->hs_rehash_buckets); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - LASSERT(bd.bd_bucket); - /* no need to take this lock, just for consistent code */ - cfs_hash_bd_lock(hs, &bd, 1); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - LASSERTF(!cfs_hash_with_assert_empty(hs), - "hash %s bucket %u(%u) is not empty: %u items left\n", - hs->hs_name, bd.bd_bucket->hsb_index, - bd.bd_offset, bd.bd_bucket->hsb_count); - /* can't assert key valicate, because we - * can interrupt rehash - */ - cfs_hash_bd_del_locked(hs, &bd, hnode); - cfs_hash_exit(hs, hnode); - } - } - LASSERT(!bd.bd_bucket->hsb_count); - cfs_hash_bd_unlock(hs, &bd, 1); - cond_resched(); - } - - LASSERT(!atomic_read(&hs->hs_count)); - - cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs), - 0, CFS_HASH_NBKT(hs)); - i = cfs_hash_with_bigname(hs) ? - CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN; - kfree(hs); -} - -struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs) -{ - if (atomic_inc_not_zero(&hs->hs_refcount)) - return hs; - return NULL; -} -EXPORT_SYMBOL(cfs_hash_getref); - -void cfs_hash_putref(struct cfs_hash *hs) -{ - if (atomic_dec_and_test(&hs->hs_refcount)) - cfs_hash_destroy(hs); -} -EXPORT_SYMBOL(cfs_hash_putref); - -static inline int -cfs_hash_rehash_bits(struct cfs_hash *hs) -{ - if (cfs_hash_with_no_lock(hs) || - !cfs_hash_with_rehash(hs)) - return -EOPNOTSUPP; - - if (unlikely(cfs_hash_is_exiting(hs))) - return -ESRCH; - - if (unlikely(cfs_hash_is_rehashing(hs))) - return -EALREADY; - - if (unlikely(cfs_hash_is_iterating(hs))) - return -EAGAIN; - - /* XXX: need to handle case with max_theta != 2.0 - * and the case with min_theta != 0.5 - */ - if ((hs->hs_cur_bits < hs->hs_max_bits) && - (__cfs_hash_theta(hs) > hs->hs_max_theta)) - return hs->hs_cur_bits + 1; - - if (!cfs_hash_with_shrink(hs)) - return 0; - - if ((hs->hs_cur_bits > hs->hs_min_bits) && - (__cfs_hash_theta(hs) < hs->hs_min_theta)) - return hs->hs_cur_bits - 1; - - return 0; -} - -/** - * don't allow inline rehash if: - * - user wants non-blocking change (add/del) on hash table - * - too many elements - */ -static inline int -cfs_hash_rehash_inline(struct cfs_hash *hs) -{ - return !cfs_hash_with_nblk_change(hs) && - atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called when the item is added. - */ -void -cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bd; - int bits; - - LASSERT(hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - cfs_hash_bd_get_and_lock(hs, key, &bd, 1); - - cfs_hash_key_validate(hs, key, hnode); - cfs_hash_bd_add_locked(hs, &bd, hnode); - - cfs_hash_bd_unlock(hs, &bd, 1); - - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); -} -EXPORT_SYMBOL(cfs_hash_add); - -static struct hlist_node * -cfs_hash_find_or_add(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode, int noref) -{ - struct hlist_node *ehnode; - struct cfs_hash_bd bds[2]; - int bits = 0; - - LASSERTF(hlist_unhashed(hnode), "hnode = %p\n", hnode); - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - cfs_hash_key_validate(hs, key, hnode); - ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key, - hnode, noref); - cfs_hash_dual_bd_unlock(hs, bds, 1); - - if (ehnode == hnode) /* new item added */ - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return ehnode; -} - -/** - * Add item @hnode to libcfs hash @hs using @key. The registered - * ops->hs_get function will be called if the item was added. - * Returns 0 on success or -EALREADY on key collisions. - */ -int -cfs_hash_add_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ? - -EALREADY : 0; -} -EXPORT_SYMBOL(cfs_hash_add_unique); - -/** - * Add item @hnode to libcfs hash @hs using @key. If this @key - * already exists in the hash then ops->hs_get will be called on the - * conflicting entry and that entry will be returned to the caller. - * Otherwise ops->hs_get is called on the item which was added. - */ -void * -cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key, - struct hlist_node *hnode) -{ - hnode = cfs_hash_find_or_add(hs, key, hnode, 0); - - return cfs_hash_object(hs, hnode); -} -EXPORT_SYMBOL(cfs_hash_findadd_unique); - -/** - * Delete item @hnode from the libcfs hash @hs using @key. The @key - * is required to ensure the correct hash bucket is locked since there - * is no direct linkage from the item to the bucket. The object - * removed from the hash will be returned and obs->hs_put is called - * on the removed object. - */ -void * -cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode) -{ - void *obj = NULL; - int bits = 0; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1); - - /* NB: do nothing if @hnode is not in hash table */ - if (!hnode || !hlist_unhashed(hnode)) { - if (!bds[1].bd_bucket && hnode) { - cfs_hash_bd_del_locked(hs, &bds[0], hnode); - } else { - hnode = cfs_hash_dual_bd_finddel_locked(hs, bds, - key, hnode); - } - } - - if (hnode) { - obj = cfs_hash_object(hs, hnode); - bits = cfs_hash_rehash_bits(hs); - } - - cfs_hash_dual_bd_unlock(hs, bds, 1); - cfs_hash_unlock(hs, 0); - if (bits > 0) - cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs)); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_del); - -/** - * Delete item given @key in libcfs hash @hs. The first @key found in - * the hash will be removed, if the key exists multiple times in the hash - * @hs this function must be called once per key. The removed object - * will be returned and ops->hs_put is called on the removed object. - */ -void * -cfs_hash_del_key(struct cfs_hash *hs, const void *key) -{ - return cfs_hash_del(hs, key, NULL); -} -EXPORT_SYMBOL(cfs_hash_del_key); - -/** - * Lookup an item using @key in the libcfs hash @hs and return it. - * If the @key is found in the hash hs->hs_get() is called and the - * matching objects is returned. It is the callers responsibility - * to call the counterpart ops->hs_put using the cfs_hash_put() macro - * when when finished with the object. If the @key was not found - * in the hash @hs NULL is returned. - */ -void * -cfs_hash_lookup(struct cfs_hash *hs, const void *key) -{ - void *obj = NULL; - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - - cfs_hash_lock(hs, 0); - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key); - if (hnode) - obj = cfs_hash_object(hs, hnode); - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); - - return obj; -} -EXPORT_SYMBOL(cfs_hash_lookup); - -static void -cfs_hash_for_each_enter(struct cfs_hash *hs) -{ - LASSERT(!cfs_hash_is_exiting(hs)); - - if (!cfs_hash_with_rehash(hs)) - return; - /* - * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter - * because it's just an unreliable signal to rehash-thread, - * rehash-thread will try to finish rehash ASAP when seeing this. - */ - hs->hs_iterating = 1; - - cfs_hash_lock(hs, 1); - hs->hs_iterators++; - cfs_hash_unlock(hs, 1); - - /* NB: iteration is mostly called by service thread, - * we tend to cancel pending rehash-request, instead of - * blocking service thread, we will relaunch rehash request - * after iteration - */ - if (cfs_hash_is_rehashing(hs)) - cfs_hash_rehash_cancel(hs); -} - -static void -cfs_hash_for_each_exit(struct cfs_hash *hs) -{ - int remained; - int bits; - - if (!cfs_hash_with_rehash(hs)) - return; - cfs_hash_lock(hs, 1); - remained = --hs->hs_iterators; - bits = cfs_hash_rehash_bits(hs); - cfs_hash_unlock(hs, 1); - /* NB: it's race on cfs_has_t::hs_iterating, see above */ - if (!remained) - hs->hs_iterating = 0; - if (bits > 0) { - cfs_hash_rehash(hs, atomic_read(&hs->hs_count) < - CFS_HASH_LOOP_HOG); - } -} - -/** - * For each item in the libcfs hash @hs call the passed callback @func - * and pass to it as an argument each hash item and the private @data. - * - * a) the function may sleep! - * b) during the callback: - * . the bucket lock is held so the callback must never sleep. - * . if @removal_safe is true, use can remove current item by - * cfs_hash_bd_del_locked - */ -static u64 -cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int remove_safe) -{ - struct hlist_node *hnode; - struct hlist_node *pos; - struct cfs_hash_bd bd; - u64 count = 0; - int excl = !!remove_safe; - int loop = 0; - int i; - - cfs_hash_for_each_enter(hs); - - cfs_hash_lock(hs, 0); - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - cfs_hash_bd_lock(hs, &bd, excl); - if (!func) { /* only glimpse size */ - count += bd.bd_bucket->hsb_count; - cfs_hash_bd_unlock(hs, &bd, excl); - continue; - } - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - cfs_hash_bucket_validate(hs, &bd, hnode); - count++; - loop++; - if (func(hs, &bd, hnode, data)) { - cfs_hash_bd_unlock(hs, &bd, excl); - goto out; - } - } - } - cfs_hash_bd_unlock(hs, &bd, excl); - if (loop < CFS_HASH_LOOP_HOG) - continue; - loop = 0; - cfs_hash_unlock(hs, 0); - cond_resched(); - cfs_hash_lock(hs, 0); - } - out: - cfs_hash_unlock(hs, 0); - - cfs_hash_for_each_exit(hs); - return count; -} - -struct cfs_hash_cond_arg { - cfs_hash_cond_opt_cb_t func; - void *arg; -}; - -static int -cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - struct cfs_hash_cond_arg *cond = data; - - if (cond->func(cfs_hash_object(hs, hnode), cond->arg)) - cfs_hash_bd_del_locked(hs, bd, hnode); - return 0; -} - -/** - * Delete item from the libcfs hash @hs when @func return true. - * The write lock being hold during loop for each bucket to avoid - * any object be reference. - */ -void -cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data) -{ - struct cfs_hash_cond_arg arg = { - .func = func, - .arg = data, - }; - - cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1); -} -EXPORT_SYMBOL(cfs_hash_cond_del); - -void -cfs_hash_for_each(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each); - -void -cfs_hash_for_each_safe(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - cfs_hash_for_each_tight(hs, func, data, 1); -} -EXPORT_SYMBOL(cfs_hash_for_each_safe); - -static int -cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - *(int *)data = 0; - return 1; /* return 1 to break the loop */ -} - -int -cfs_hash_is_empty(struct cfs_hash *hs) -{ - int empty = 1; - - cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0); - return empty; -} -EXPORT_SYMBOL(cfs_hash_is_empty); - -u64 -cfs_hash_size_get(struct cfs_hash *hs) -{ - return cfs_hash_with_counter(hs) ? - atomic_read(&hs->hs_count) : - cfs_hash_for_each_tight(hs, NULL, NULL, 0); -} -EXPORT_SYMBOL(cfs_hash_size_get); - -/* - * cfs_hash_for_each_relax: - * Iterate the hash table and call @func on each item without - * any lock. This function can't guarantee to finish iteration - * if these features are enabled: - * - * a. if rehash_key is enabled, an item can be moved from - * one bucket to another bucket - * b. user can remove non-zero-ref item from hash-table, - * so the item can be removed from hash-table, even worse, - * it's possible that user changed key and insert to another - * hash bucket. - * there's no way for us to finish iteration correctly on previous - * two cases, so iteration has to be stopped on change. - */ -static int -cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int start) -{ - struct hlist_node *next = NULL; - struct hlist_node *hnode; - struct cfs_hash_bd bd; - u32 version; - int count = 0; - int stop_on_change; - int has_put_locked; - int end = -1; - int rc = 0; - int i; - - stop_on_change = cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs); - has_put_locked = hs->hs_ops->hs_put_locked != NULL; - cfs_hash_lock(hs, 0); -again: - LASSERT(!cfs_hash_is_rehashing(hs)); - - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - if (i < start) - continue; - else if (end > 0 && i >= end) - break; - - cfs_hash_bd_lock(hs, &bd, 0); - version = cfs_hash_bd_version_get(&bd); - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - hnode = hhead->first; - if (!hnode) - continue; - cfs_hash_get(hs, hnode); - - for (; hnode; hnode = next) { - cfs_hash_bucket_validate(hs, &bd, hnode); - next = hnode->next; - if (next) - cfs_hash_get(hs, next); - cfs_hash_bd_unlock(hs, &bd, 0); - cfs_hash_unlock(hs, 0); - - rc = func(hs, &bd, hnode, data); - if (stop_on_change || !has_put_locked) - cfs_hash_put(hs, hnode); - cond_resched(); - count++; - - cfs_hash_lock(hs, 0); - cfs_hash_bd_lock(hs, &bd, 0); - if (stop_on_change) { - if (version != - cfs_hash_bd_version_get(&bd)) - rc = -EINTR; - } else if (has_put_locked) { - cfs_hash_put_locked(hs, hnode); - } - if (rc) /* callback wants to break iteration */ - break; - } - if (next) { - if (has_put_locked) { - cfs_hash_put_locked(hs, next); - next = NULL; - } - break; - } else if (rc) { - break; - } - } - cfs_hash_bd_unlock(hs, &bd, 0); - if (next && !has_put_locked) { - cfs_hash_put(hs, next); - next = NULL; - } - if (rc) /* callback wants to break iteration */ - break; - } - if (start > 0 && !rc) { - end = start; - start = 0; - goto again; - } - - cfs_hash_unlock(hs, 0); - return count; -} - -int -cfs_hash_for_each_nolock(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data, int start) -{ - if (cfs_hash_with_no_lock(hs) || - cfs_hash_with_rehash_key(hs) || - !cfs_hash_with_no_itemref(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - cfs_hash_for_each_relax(hs, func, data, start); - cfs_hash_for_each_exit(hs); - - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_nolock); - -/** - * For each hash bucket in the libcfs hash @hs call the passed callback - * @func until all the hash buckets are empty. The passed callback @func - * or the previously registered callback hs->hs_put must remove the item - * from the hash. You may either use the cfs_hash_del() or hlist_del() - * functions. No rwlocks will be held during the callback @func it is - * safe to sleep if needed. This function will not terminate until the - * hash is empty. Note it is still possible to concurrently add new - * items in to the hash. It is the callers responsibility to ensure - * the required locking is in place to prevent concurrent insertions. - */ -int -cfs_hash_for_each_empty(struct cfs_hash *hs, cfs_hash_for_each_cb_t func, - void *data) -{ - unsigned int i = 0; - - if (cfs_hash_with_no_lock(hs)) - return -EOPNOTSUPP; - - if (!hs->hs_ops->hs_get || - (!hs->hs_ops->hs_put && !hs->hs_ops->hs_put_locked)) - return -EOPNOTSUPP; - - cfs_hash_for_each_enter(hs); - while (cfs_hash_for_each_relax(hs, func, data, 0)) { - CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n", - hs->hs_name, i++); - } - cfs_hash_for_each_exit(hs); - return 0; -} -EXPORT_SYMBOL(cfs_hash_for_each_empty); - -void -cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned int hindex, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_head *hhead; - struct hlist_node *hnode; - struct cfs_hash_bd bd; - - cfs_hash_for_each_enter(hs); - cfs_hash_lock(hs, 0); - if (hindex >= CFS_HASH_NHLIST(hs)) - goto out; - - cfs_hash_bd_index_set(hs, hindex, &bd); - - cfs_hash_bd_lock(hs, &bd, 0); - hhead = cfs_hash_bd_hhead(hs, &bd); - hlist_for_each(hnode, hhead) { - if (func(hs, &bd, hnode, data)) - break; - } - cfs_hash_bd_unlock(hs, &bd, 0); -out: - cfs_hash_unlock(hs, 0); - cfs_hash_for_each_exit(hs); -} -EXPORT_SYMBOL(cfs_hash_hlist_for_each); - -/* - * For each item in the libcfs hash @hs which matches the @key call - * the passed callback @func and pass to it as an argument each hash - * item and the private @data. During the callback the bucket lock - * is held so the callback must never sleep. - */ -void -cfs_hash_for_each_key(struct cfs_hash *hs, const void *key, - cfs_hash_for_each_cb_t func, void *data) -{ - struct hlist_node *hnode; - struct cfs_hash_bd bds[2]; - unsigned int i; - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0); - - cfs_hash_for_each_bd(bds, 2, i) { - struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]); - - hlist_for_each(hnode, hlist) { - cfs_hash_bucket_validate(hs, &bds[i], hnode); - - if (cfs_hash_keycmp(hs, key, hnode)) { - if (func(hs, &bds[i], hnode, data)) - break; - } - } - } - - cfs_hash_dual_bd_unlock(hs, bds, 0); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_for_each_key); - -/** - * Rehash the libcfs hash @hs to the given @bits. This can be used - * to grow the hash size when excessive chaining is detected, or to - * shrink the hash when it is larger than needed. When the CFS_HASH_REHASH - * flag is set in @hs the libcfs hash may be dynamically rehashed - * during addition or removal if the hash's theta value exceeds - * either the hs->hs_min_theta or hs->max_theta values. By default - * these values are tuned to keep the chained hash depth small, and - * this approach assumes a reasonably uniform hashing function. The - * theta thresholds for @hs are tunable via cfs_hash_set_theta(). - */ -void -cfs_hash_rehash_cancel(struct cfs_hash *hs) -{ - LASSERT(cfs_hash_with_rehash(hs)); - cancel_work_sync(&hs->hs_rehash_work); -} - -void -cfs_hash_rehash(struct cfs_hash *hs, int do_rehash) -{ - int rc; - - LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs)); - - cfs_hash_lock(hs, 1); - - rc = cfs_hash_rehash_bits(hs); - if (rc <= 0) { - cfs_hash_unlock(hs, 1); - return; - } - - hs->hs_rehash_bits = rc; - if (!do_rehash) { - /* launch and return */ - queue_work(cfs_rehash_wq, &hs->hs_rehash_work); - cfs_hash_unlock(hs, 1); - return; - } - - /* rehash right now */ - cfs_hash_unlock(hs, 1); - - cfs_hash_rehash_worker(&hs->hs_rehash_work); -} - -static int -cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old) -{ - struct cfs_hash_bd new; - struct hlist_head *hhead; - struct hlist_node *hnode; - struct hlist_node *pos; - void *key; - int c = 0; - - /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */ - cfs_hash_bd_for_each_hlist(hs, old, hhead) { - hlist_for_each_safe(hnode, pos, hhead) { - key = cfs_hash_key(hs, hnode); - LASSERT(key); - /* Validate hnode is in the correct bucket. */ - cfs_hash_bucket_validate(hs, old, hnode); - /* - * Delete from old hash bucket; move to new bucket. - * ops->hs_key must be defined. - */ - cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets, - hs->hs_rehash_bits, key, &new); - cfs_hash_bd_move_locked(hs, old, &new, hnode); - c++; - } - } - - return c; -} - -static void -cfs_hash_rehash_worker(struct work_struct *work) -{ - struct cfs_hash *hs = container_of(work, struct cfs_hash, hs_rehash_work); - struct cfs_hash_bucket **bkts; - struct cfs_hash_bd bd; - unsigned int old_size; - unsigned int new_size; - int bsize; - int count = 0; - int rc = 0; - int i; - - LASSERT(hs && cfs_hash_with_rehash(hs)); - - cfs_hash_lock(hs, 0); - LASSERT(cfs_hash_is_rehashing(hs)); - - old_size = CFS_HASH_NBKT(hs); - new_size = CFS_HASH_RH_NBKT(hs); - - cfs_hash_unlock(hs, 0); - - /* - * don't need hs::hs_rwlock for hs::hs_buckets, - * because nobody can change bkt-table except me. - */ - bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets, - old_size, new_size); - cfs_hash_lock(hs, 1); - if (!bkts) { - rc = -ENOMEM; - goto out; - } - - if (bkts == hs->hs_buckets) { - bkts = NULL; /* do nothing */ - goto out; - } - - rc = __cfs_hash_theta(hs); - if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) { - /* free the new allocated bkt-table */ - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - rc = -EALREADY; - goto out; - } - - LASSERT(!hs->hs_rehash_buckets); - hs->hs_rehash_buckets = bkts; - - rc = 0; - cfs_hash_for_each_bucket(hs, &bd, i) { - if (cfs_hash_is_exiting(hs)) { - rc = -ESRCH; - /* someone wants to destroy the hash, abort now */ - if (old_size < new_size) /* OK to free old bkt-table */ - break; - /* it's shrinking, need free new bkt-table */ - hs->hs_rehash_buckets = NULL; - old_size = new_size; - new_size = CFS_HASH_NBKT(hs); - goto out; - } - - count += cfs_hash_rehash_bd(hs, &bd); - if (count < CFS_HASH_LOOP_HOG || - cfs_hash_is_iterating(hs)) { /* need to finish ASAP */ - continue; - } - - count = 0; - cfs_hash_unlock(hs, 1); - cond_resched(); - cfs_hash_lock(hs, 1); - } - - hs->hs_rehash_count++; - - bkts = hs->hs_buckets; - hs->hs_buckets = hs->hs_rehash_buckets; - hs->hs_rehash_buckets = NULL; - - hs->hs_cur_bits = hs->hs_rehash_bits; -out: - hs->hs_rehash_bits = 0; - bsize = cfs_hash_bkt_size(hs); - cfs_hash_unlock(hs, 1); - /* can't refer to @hs anymore because it could be destroyed */ - if (bkts) - cfs_hash_buckets_free(bkts, bsize, new_size, old_size); - if (rc) - CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc); -} - -/** - * Rehash the object referenced by @hnode in the libcfs hash @hs. The - * @old_key must be provided to locate the objects previous location - * in the hash, and the @new_key will be used to reinsert the object. - * Use this function instead of a cfs_hash_add() + cfs_hash_del() - * combo when it is critical that there is no window in time where the - * object is missing from the hash. When an object is being rehashed - * the registered cfs_hash_get() and cfs_hash_put() functions will - * not be called. - */ -void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key, - void *new_key, struct hlist_node *hnode) -{ - struct cfs_hash_bd bds[3]; - struct cfs_hash_bd old_bds[2]; - struct cfs_hash_bd new_bd; - - LASSERT(!hlist_unhashed(hnode)); - - cfs_hash_lock(hs, 0); - - cfs_hash_dual_bd_get(hs, old_key, old_bds); - cfs_hash_bd_get(hs, new_key, &new_bd); - - bds[0] = old_bds[0]; - bds[1] = old_bds[1]; - bds[2] = new_bd; - - /* NB: bds[0] and bds[1] are ordered already */ - cfs_hash_bd_order(&bds[1], &bds[2]); - cfs_hash_bd_order(&bds[0], &bds[1]); - - cfs_hash_multi_bd_lock(hs, bds, 3, 1); - if (likely(!old_bds[1].bd_bucket)) { - cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode); - } else { - cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode); - cfs_hash_bd_add_locked(hs, &new_bd, hnode); - } - /* overwrite key inside locks, otherwise may screw up with - * other operations, i.e: rehash - */ - cfs_hash_keycpy(hs, hnode, new_key); - - cfs_hash_multi_bd_unlock(hs, bds, 3, 1); - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_rehash_key); - -void cfs_hash_debug_header(struct seq_file *m) -{ - seq_printf(m, "%-*s cur min max theta t-min t-max flags rehash count maxdep maxdepb distribution\n", - CFS_HASH_BIGNAME_LEN, "name"); -} -EXPORT_SYMBOL(cfs_hash_debug_header); - -static struct cfs_hash_bucket ** -cfs_hash_full_bkts(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return hs->hs_buckets; - - LASSERT(hs->hs_rehash_bits); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - hs->hs_rehash_buckets : hs->hs_buckets; -} - -static unsigned int -cfs_hash_full_nbkt(struct cfs_hash *hs) -{ - /* NB: caller should hold hs->hs_rwlock if REHASH is set */ - if (!hs->hs_rehash_buckets) - return CFS_HASH_NBKT(hs); - - LASSERT(hs->hs_rehash_bits); - return hs->hs_rehash_bits > hs->hs_cur_bits ? - CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs); -} - -void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m) -{ - int dist[8] = { 0, }; - int maxdep = -1; - int maxdepb = -1; - int total = 0; - int theta; - int i; - - cfs_hash_lock(hs, 0); - theta = __cfs_hash_theta(hs); - - seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d 0x%02x %6d ", - CFS_HASH_BIGNAME_LEN, hs->hs_name, - 1 << hs->hs_cur_bits, 1 << hs->hs_min_bits, - 1 << hs->hs_max_bits, - __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta), - __cfs_hash_theta_int(hs->hs_min_theta), - __cfs_hash_theta_frac(hs->hs_min_theta), - __cfs_hash_theta_int(hs->hs_max_theta), - __cfs_hash_theta_frac(hs->hs_max_theta), - hs->hs_flags, hs->hs_rehash_count); - - /* - * The distribution is a summary of the chained hash depth in - * each of the libcfs hash buckets. Each buckets hsb_count is - * divided by the hash theta value and used to generate a - * histogram of the hash distribution. A uniform hash will - * result in all hash buckets being close to the average thus - * only the first few entries in the histogram will be non-zero. - * If you hash function results in a non-uniform hash the will - * be observable by outlier bucks in the distribution histogram. - * - * Uniform hash distribution: 128/128/0/0/0/0/0/0 - * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 - */ - for (i = 0; i < cfs_hash_full_nbkt(hs); i++) { - struct cfs_hash_bd bd; - - bd.bd_bucket = cfs_hash_full_bkts(hs)[i]; - cfs_hash_bd_lock(hs, &bd, 0); - if (maxdep < bd.bd_bucket->hsb_depmax) { - maxdep = bd.bd_bucket->hsb_depmax; - maxdepb = ffz(~maxdep); - } - total += bd.bd_bucket->hsb_count; - dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++; - cfs_hash_bd_unlock(hs, &bd, 0); - } - - seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb); - for (i = 0; i < 8; i++) - seq_printf(m, "%d%c", dist[i], (i == 7) ? '\n' : '/'); - - cfs_hash_unlock(hs, 0); -} -EXPORT_SYMBOL(cfs_hash_debug_str); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c deleted file mode 100644 index 3d1cf457b286..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c +++ /dev/null @@ -1,1086 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Please see comments in libcfs/include/libcfs/libcfs_cpu.h for introduction - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include -#include - -#include -#include -#include - -/** Global CPU partition table */ -struct cfs_cpt_table *cfs_cpt_tab __read_mostly; -EXPORT_SYMBOL(cfs_cpt_tab); - -/** - * modparam for setting number of partitions - * - * 0 : estimate best value based on cores or NUMA nodes - * 1 : disable multiple partitions - * >1 : specify number of partitions - */ -static int cpu_npartitions; -module_param(cpu_npartitions, int, 0444); -MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions"); - -/** - * modparam for setting CPU partitions patterns: - * - * i.e: "0[0,1,2,3] 1[4,5,6,7]", number before bracket is CPU partition ID, - * number in bracket is processor ID (core or HT) - * - * i.e: "N 0[0,1] 1[2,3]" the first character 'N' means numbers in bracket - * are NUMA node ID, number before bracket is CPU partition ID. - * - * i.e: "N", shortcut expression to create CPT from NUMA & CPU topology - * - * NB: If user specified cpu_pattern, cpu_npartitions will be ignored - */ -static char *cpu_pattern = "N"; -module_param(cpu_pattern, charp, 0444); -MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern"); - -static struct cfs_cpt_data { - /* serialize hotplug etc */ - spinlock_t cpt_lock; - /* reserved for hotplug */ - unsigned long cpt_version; - /* mutex to protect cpt_cpumask */ - struct mutex cpt_mutex; - /* scratch buffer for set/unset_node */ - cpumask_var_t cpt_cpumask; -} cpt_data; - -#define CFS_CPU_VERSION_MAGIC 0xbabecafe - -struct cfs_cpt_table * -cfs_cpt_table_alloc(unsigned int ncpt) -{ - struct cfs_cpt_table *cptab; - int i; - - cptab = kzalloc(sizeof(*cptab), GFP_NOFS); - if (!cptab) - return NULL; - - cptab->ctb_nparts = ncpt; - - cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask), - GFP_NOFS); - if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) || - !cptab->ctb_nodemask) - goto failed; - - cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(), - sizeof(cptab->ctb_cpu2cpt[0]), - GFP_KERNEL); - if (!cptab->ctb_cpu2cpt) - goto failed; - - memset(cptab->ctb_cpu2cpt, -1, - num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0])); - - cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]), - GFP_KERNEL); - if (!cptab->ctb_parts) - goto failed; - - for (i = 0; i < ncpt; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask), - GFP_NOFS); - if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) || - !part->cpt_nodemask) - goto failed; - } - - spin_lock(&cpt_data.cpt_lock); - /* Reserved for hotplug */ - cptab->ctb_version = cpt_data.cpt_version; - spin_unlock(&cpt_data.cpt_lock); - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} -EXPORT_SYMBOL(cfs_cpt_table_alloc); - -void -cfs_cpt_table_free(struct cfs_cpt_table *cptab) -{ - int i; - - kvfree(cptab->ctb_cpu2cpt); - - for (i = 0; cptab->ctb_parts && i < cptab->ctb_nparts; i++) { - struct cfs_cpu_partition *part = &cptab->ctb_parts[i]; - - kfree(part->cpt_nodemask); - free_cpumask_var(part->cpt_cpumask); - } - - kvfree(cptab->ctb_parts); - - kfree(cptab->ctb_nodemask); - free_cpumask_var(cptab->ctb_cpumask); - - kfree(cptab); -} -EXPORT_SYMBOL(cfs_cpt_table_free); - -int -cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len) -{ - char *tmp = buf; - int rc = 0; - int i; - int j; - - for (i = 0; i < cptab->ctb_nparts; i++) { - if (len > 0) { - rc = snprintf(tmp, len, "%d\t: ", i); - len -= rc; - } - - if (len <= 0) { - rc = -EFBIG; - goto out; - } - - tmp += rc; - for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) { - rc = snprintf(tmp, len, "%d ", j); - len -= rc; - if (len <= 0) { - rc = -EFBIG; - goto out; - } - tmp += rc; - } - - *tmp = '\n'; - tmp++; - len--; - } - - out: - if (rc < 0) - return rc; - - return tmp - buf; -} -EXPORT_SYMBOL(cfs_cpt_table_print); - -static void -cfs_node_to_cpumask(int node, cpumask_t *mask) -{ - const cpumask_t *tmp = cpumask_of_node(node); - - if (tmp) - cpumask_copy(mask, tmp); - else - cpumask_clear(mask); -} - -int -cfs_cpt_number(struct cfs_cpt_table *cptab) -{ - return cptab->ctb_nparts; -} -EXPORT_SYMBOL(cfs_cpt_number); - -int -cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_weight(cptab->ctb_cpumask) : - cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask); -} -EXPORT_SYMBOL(cfs_cpt_weight); - -int -cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cpumask_any_and(cptab->ctb_cpumask, - cpu_online_mask) < nr_cpu_ids : - cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask, - cpu_online_mask) < nr_cpu_ids; -} -EXPORT_SYMBOL(cfs_cpt_online); - -cpumask_var_t * -cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - &cptab->ctb_cpumask : &cptab->ctb_parts[cpt].cpt_cpumask; -} -EXPORT_SYMBOL(cfs_cpt_cpumask); - -nodemask_t * -cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt) -{ - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - return cpt == CFS_CPT_ANY ? - cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask; -} -EXPORT_SYMBOL(cfs_cpt_nodemask); - -int -cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - - LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts); - - if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) { - CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu); - return 0; - } - - if (cptab->ctb_cpu2cpt[cpu] != -1) { - CDEBUG(D_INFO, "CPU %d is already in partition %d\n", - cpu, cptab->ctb_cpu2cpt[cpu]); - return 0; - } - - cptab->ctb_cpu2cpt[cpu] = cpt; - - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - - cpumask_set_cpu(cpu, cptab->ctb_cpumask); - cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - - node = cpu_to_node(cpu); - - /* first CPU of @node in this CPT table */ - if (!node_isset(node, *cptab->ctb_nodemask)) - node_set(node, *cptab->ctb_nodemask); - - /* first CPU of @node in this partition */ - if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)) - node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpu); - -void -cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu) -{ - int node; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpu < 0 || cpu >= nr_cpu_ids) { - CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu); - return; - } - - if (cpt == CFS_CPT_ANY) { - /* caller doesn't know the partition ID */ - cpt = cptab->ctb_cpu2cpt[cpu]; - if (cpt < 0) { /* not set in this CPT-table */ - CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n", - cpt, cptab); - return; - } - - } else if (cpt != cptab->ctb_cpu2cpt[cpu]) { - CDEBUG(D_INFO, - "CPU %d is not in cpu-partition %d\n", cpu, cpt); - return; - } - - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask)); - LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask)); - - cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask); - cpumask_clear_cpu(cpu, cptab->ctb_cpumask); - cptab->ctb_cpu2cpt[cpu] = -1; - - node = cpu_to_node(cpu); - - LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask)); - LASSERT(node_isset(node, *cptab->ctb_nodemask)); - - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) { - /* this CPT has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask); - - for_each_cpu(i, cptab->ctb_cpumask) { - /* this CPT-table has other CPU belonging to this node? */ - if (cpu_to_node(i) == node) - break; - } - - if (i >= nr_cpu_ids) - node_clear(node, *cptab->ctb_nodemask); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpu); - -int -cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - if (!cpumask_weight(mask) || - cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) { - CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n", - cpt); - return 0; - } - - for_each_cpu(i, mask) { - if (!cfs_cpt_set_cpu(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_cpumask); - -void -cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask) -{ - int i; - - for_each_cpu(i, mask) - cfs_cpt_unset_cpu(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_cpumask); - -int -cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - int rc; - - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return 0; - } - - mutex_lock(&cpt_data.cpt_mutex); - - cfs_node_to_cpumask(node, cpt_data.cpt_cpumask); - - rc = cfs_cpt_set_cpumask(cptab, cpt, cpt_data.cpt_cpumask); - - mutex_unlock(&cpt_data.cpt_mutex); - - return rc; -} -EXPORT_SYMBOL(cfs_cpt_set_node); - -void -cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node) -{ - if (node < 0 || node >= MAX_NUMNODES) { - CDEBUG(D_INFO, - "Invalid NUMA id %d for CPU partition %d\n", node, cpt); - return; - } - - mutex_lock(&cpt_data.cpt_mutex); - - cfs_node_to_cpumask(node, cpt_data.cpt_cpumask); - - cfs_cpt_unset_cpumask(cptab, cpt, cpt_data.cpt_cpumask); - - mutex_unlock(&cpt_data.cpt_mutex); -} -EXPORT_SYMBOL(cfs_cpt_unset_node); - -int -cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) { - if (!cfs_cpt_set_node(cptab, cpt, i)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL(cfs_cpt_set_nodemask); - -void -cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask) -{ - int i; - - for_each_node_mask(i, *mask) - cfs_cpt_unset_node(cptab, cpt, i); -} -EXPORT_SYMBOL(cfs_cpt_unset_nodemask); - -void -cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt) -{ - int last; - int i; - - if (cpt == CFS_CPT_ANY) { - last = cptab->ctb_nparts - 1; - cpt = 0; - } else { - last = cpt; - } - - for (; cpt <= last; cpt++) { - for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) - cfs_cpt_unset_cpu(cptab, cpt, i); - } -} -EXPORT_SYMBOL(cfs_cpt_clear); - -int -cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt) -{ - nodemask_t *mask; - int weight; - int rotor; - int node; - - /* convert CPU partition ID to HW node id */ - - if (cpt < 0 || cpt >= cptab->ctb_nparts) { - mask = cptab->ctb_nodemask; - rotor = cptab->ctb_spread_rotor++; - } else { - mask = cptab->ctb_parts[cpt].cpt_nodemask; - rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++; - } - - weight = nodes_weight(*mask); - LASSERT(weight > 0); - - rotor %= weight; - - for_each_node_mask(node, *mask) { - if (!rotor--) - return node; - } - - LBUG(); - return 0; -} -EXPORT_SYMBOL(cfs_cpt_spread_node); - -int -cfs_cpt_current(struct cfs_cpt_table *cptab, int remap) -{ - int cpu; - int cpt; - - preempt_disable(); - cpu = smp_processor_id(); - cpt = cptab->ctb_cpu2cpt[cpu]; - - if (cpt < 0 && remap) { - /* don't return negative value for safety of upper layer, - * instead we shadow the unknown cpu to a valid partition ID - */ - cpt = cpu % cptab->ctb_nparts; - } - preempt_enable(); - return cpt; -} -EXPORT_SYMBOL(cfs_cpt_current); - -int -cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu) -{ - LASSERT(cpu >= 0 && cpu < nr_cpu_ids); - - return cptab->ctb_cpu2cpt[cpu]; -} -EXPORT_SYMBOL(cfs_cpt_of_cpu); - -int -cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt) -{ - cpumask_var_t *cpumask; - nodemask_t *nodemask; - int rc; - int i; - - LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts)); - - if (cpt == CFS_CPT_ANY) { - cpumask = &cptab->ctb_cpumask; - nodemask = cptab->ctb_nodemask; - } else { - cpumask = &cptab->ctb_parts[cpt].cpt_cpumask; - nodemask = cptab->ctb_parts[cpt].cpt_nodemask; - } - - if (cpumask_any_and(*cpumask, cpu_online_mask) >= nr_cpu_ids) { - CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n", - cpt); - return -EINVAL; - } - - for_each_online_cpu(i) { - if (cpumask_test_cpu(i, *cpumask)) - continue; - - rc = set_cpus_allowed_ptr(current, *cpumask); - set_mems_allowed(*nodemask); - if (!rc) - schedule(); /* switch to allowed CPU */ - - return rc; - } - - /* don't need to set affinity because all online CPUs are covered */ - return 0; -} -EXPORT_SYMBOL(cfs_cpt_bind); - -/** - * Choose max to \a number CPUs from \a node and set them in \a cpt. - * We always prefer to choose CPU in the same core/socket. - */ -static int -cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt, - cpumask_t *node, int number) -{ - cpumask_var_t socket; - cpumask_var_t core; - int rc = 0; - int cpu; - - LASSERT(number > 0); - - if (number >= cpumask_weight(node)) { - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - rc = cfs_cpt_set_cpu(cptab, cpt, cpu); - if (!rc) - return -EINVAL; - cpumask_clear_cpu(cpu, node); - } - return 0; - } - - /* - * Allocate scratch buffers - * As we cannot initialize a cpumask_var_t, we need - * to alloc both before we can risk trying to free either - */ - if (!zalloc_cpumask_var(&socket, GFP_NOFS)) - rc = -ENOMEM; - if (!zalloc_cpumask_var(&core, GFP_NOFS)) - rc = -ENOMEM; - if (rc) - goto out; - - while (!cpumask_empty(node)) { - cpu = cpumask_first(node); - - /* get cpumask for cores in the same socket */ - cpumask_copy(socket, topology_core_cpumask(cpu)); - cpumask_and(socket, socket, node); - - LASSERT(!cpumask_empty(socket)); - - while (!cpumask_empty(socket)) { - int i; - - /* get cpumask for hts in the same core */ - cpumask_copy(core, topology_sibling_cpumask(cpu)); - cpumask_and(core, core, node); - - LASSERT(!cpumask_empty(core)); - - for_each_cpu(i, core) { - cpumask_clear_cpu(i, socket); - cpumask_clear_cpu(i, node); - - rc = cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - rc = -EINVAL; - goto out; - } - - if (!--number) - goto out; - } - cpu = cpumask_first(socket); - } - } - -out: - free_cpumask_var(socket); - free_cpumask_var(core); - return rc; -} - -#define CPT_WEIGHT_MIN 4u - -static unsigned int -cfs_cpt_num_estimate(void) -{ - unsigned int nnode = num_online_nodes(); - unsigned int ncpu = num_online_cpus(); - unsigned int ncpt; - - if (ncpu <= CPT_WEIGHT_MIN) { - ncpt = 1; - goto out; - } - - /* generate reasonable number of CPU partitions based on total number - * of CPUs, Preferred N should be power2 and match this condition: - * 2 * (N - 1)^2 < NCPUS <= 2 * N^2 - */ - for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) - ; - - if (ncpt <= nnode) { /* fat numa system */ - while (nnode > ncpt) - nnode >>= 1; - - } else { /* ncpt > nnode */ - while ((nnode << 1) <= ncpt) - nnode <<= 1; - } - - ncpt = nnode; - -out: -#if (BITS_PER_LONG == 32) - /* config many CPU partitions on 32-bit system could consume - * too much memory - */ - ncpt = min(2U, ncpt); -#endif - while (ncpu % ncpt) - ncpt--; /* worst case is 1 */ - - return ncpt; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create(int ncpt) -{ - struct cfs_cpt_table *cptab = NULL; - cpumask_var_t mask; - int cpt = 0; - int num; - int rc; - int i; - - rc = cfs_cpt_num_estimate(); - if (ncpt <= 0) - ncpt = rc; - - if (ncpt > num_online_cpus() || ncpt > 4 * rc) { - CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n", - ncpt, rc); - } - - if (num_online_cpus() % ncpt) { - CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n", - (int)num_online_cpus(), ncpt); - goto failed; - } - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate CPU map(%d)\n", ncpt); - goto failed; - } - - num = num_online_cpus() / ncpt; - if (!num) { - CERROR("CPU changed while setting CPU partition\n"); - goto failed; - } - - if (!zalloc_cpumask_var(&mask, GFP_NOFS)) { - CERROR("Failed to allocate scratch cpumask\n"); - goto failed; - } - - for_each_online_node(i) { - cfs_node_to_cpumask(i, mask); - - while (!cpumask_empty(mask)) { - struct cfs_cpu_partition *part; - int n; - - /* - * Each emulated NUMA node has all allowed CPUs in - * the mask. - * End loop when all partitions have assigned CPUs. - */ - if (cpt == ncpt) - break; - - part = &cptab->ctb_parts[cpt]; - - n = num - cpumask_weight(part->cpt_cpumask); - LASSERT(n > 0); - - rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n); - if (rc < 0) - goto failed_mask; - - LASSERT(num >= cpumask_weight(part->cpt_cpumask)); - if (num == cpumask_weight(part->cpt_cpumask)) - cpt++; - } - } - - if (cpt != ncpt || - num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) { - CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n", - cptab->ctb_nparts, num, cpt, - cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)); - goto failed_mask; - } - - free_cpumask_var(mask); - - return cptab; - - failed_mask: - free_cpumask_var(mask); - failed: - CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n", - ncpt, num_online_nodes(), num_online_cpus()); - - if (cptab) - cfs_cpt_table_free(cptab); - - return NULL; -} - -static struct cfs_cpt_table * -cfs_cpt_table_create_pattern(char *pattern) -{ - struct cfs_cpt_table *cptab; - char *str; - int node = 0; - int high; - int ncpt = 0; - int cpt; - int rc; - int c; - int i; - - str = strim(pattern); - if (*str == 'n' || *str == 'N') { - pattern = str + 1; - if (*pattern != '\0') { - node = 1; - } else { /* shortcut to create CPT from NUMA & CPU topology */ - node = -1; - ncpt = num_online_nodes(); - } - } - - if (!ncpt) { /* scanning bracket which is mark of partition */ - for (str = pattern;; str++, ncpt++) { - str = strchr(str, '['); - if (!str) - break; - } - } - - if (!ncpt || - (node && ncpt > num_online_nodes()) || - (!node && ncpt > num_online_cpus())) { - CERROR("Invalid pattern %s, or too many partitions %d\n", - pattern, ncpt); - return NULL; - } - - cptab = cfs_cpt_table_alloc(ncpt); - if (!cptab) { - CERROR("Failed to allocate cpu partition table\n"); - return NULL; - } - - if (node < 0) { /* shortcut to create CPT from NUMA & CPU topology */ - cpt = 0; - - for_each_online_node(i) { - if (cpt >= ncpt) { - CERROR("CPU changed while setting CPU partition table, %d/%d\n", - cpt, ncpt); - goto failed; - } - - rc = cfs_cpt_set_node(cptab, cpt++, i); - if (!rc) - goto failed; - } - return cptab; - } - - high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1; - - for (str = strim(pattern), c = 0;; c++) { - struct cfs_range_expr *range; - struct cfs_expr_list *el; - char *bracket = strchr(str, '['); - int n; - - if (!bracket) { - if (*str) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - if (c != ncpt) { - CERROR("expect %d partitions but found %d\n", - ncpt, c); - goto failed; - } - break; - } - - if (sscanf(str, "%d%n", &cpt, &n) < 1) { - CERROR("Invalid cpu pattern %s\n", str); - goto failed; - } - - if (cpt < 0 || cpt >= ncpt) { - CERROR("Invalid partition id %d, total partitions %d\n", - cpt, ncpt); - goto failed; - } - - if (cfs_cpt_weight(cptab, cpt)) { - CERROR("Partition %d has already been set.\n", cpt); - goto failed; - } - - str = strim(str + n); - if (str != bracket) { - CERROR("Invalid pattern %s\n", str); - goto failed; - } - - bracket = strchr(str, ']'); - if (!bracket) { - CERROR("missing right bracket for cpt %d, %s\n", - cpt, str); - goto failed; - } - - if (cfs_expr_list_parse(str, (bracket - str) + 1, - 0, high, &el)) { - CERROR("Can't parse number range: %s\n", str); - goto failed; - } - - list_for_each_entry(range, &el->el_exprs, re_link) { - for (i = range->re_lo; i <= range->re_hi; i++) { - if ((i - range->re_lo) % range->re_stride) - continue; - - rc = node ? cfs_cpt_set_node(cptab, cpt, i) : - cfs_cpt_set_cpu(cptab, cpt, i); - if (!rc) { - cfs_expr_list_free(el); - goto failed; - } - } - } - - cfs_expr_list_free(el); - - if (!cfs_cpt_online(cptab, cpt)) { - CERROR("No online CPU is found on partition %d\n", cpt); - goto failed; - } - - str = strim(bracket + 1); - } - - return cptab; - - failed: - cfs_cpt_table_free(cptab); - return NULL; -} - -#ifdef CONFIG_HOTPLUG_CPU -static enum cpuhp_state lustre_cpu_online; - -static void cfs_cpu_incr_cpt_version(void) -{ - spin_lock(&cpt_data.cpt_lock); - cpt_data.cpt_version++; - spin_unlock(&cpt_data.cpt_lock); -} - -static int cfs_cpu_online(unsigned int cpu) -{ - cfs_cpu_incr_cpt_version(); - return 0; -} - -static int cfs_cpu_dead(unsigned int cpu) -{ - bool warn; - - cfs_cpu_incr_cpt_version(); - - mutex_lock(&cpt_data.cpt_mutex); - /* if all HTs in a core are offline, it may break affinity */ - cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu)); - warn = cpumask_any_and(cpt_data.cpt_cpumask, - cpu_online_mask) >= nr_cpu_ids; - mutex_unlock(&cpt_data.cpt_mutex); - CDEBUG(warn ? D_WARNING : D_INFO, - "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n", - cpu); - return 0; -} -#endif - -void -cfs_cpu_fini(void) -{ - if (cfs_cpt_tab) - cfs_cpt_table_free(cfs_cpt_tab); - -#ifdef CONFIG_HOTPLUG_CPU - if (lustre_cpu_online > 0) - cpuhp_remove_state_nocalls(lustre_cpu_online); - cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD); -#endif - free_cpumask_var(cpt_data.cpt_cpumask); -} - -int -cfs_cpu_init(void) -{ - int ret = 0; - - LASSERT(!cfs_cpt_tab); - - memset(&cpt_data, 0, sizeof(cpt_data)); - - if (!zalloc_cpumask_var(&cpt_data.cpt_cpumask, GFP_NOFS)) { - CERROR("Failed to allocate scratch buffer\n"); - return -1; - } - - spin_lock_init(&cpt_data.cpt_lock); - mutex_init(&cpt_data.cpt_mutex); - -#ifdef CONFIG_HOTPLUG_CPU - ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD, - "staging/lustre/cfe:dead", NULL, - cfs_cpu_dead); - if (ret < 0) - goto failed; - ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "staging/lustre/cfe:online", - cfs_cpu_online, NULL); - if (ret < 0) - goto failed; - lustre_cpu_online = ret; -#endif - ret = -EINVAL; - - if (*cpu_pattern) { - char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL); - - if (!cpu_pattern_dup) { - CERROR("Failed to duplicate cpu_pattern\n"); - goto failed; - } - - cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup); - kfree(cpu_pattern_dup); - if (!cfs_cpt_tab) { - CERROR("Failed to create cptab from pattern %s\n", - cpu_pattern); - goto failed; - } - - } else { - cfs_cpt_tab = cfs_cpt_table_create(cpu_npartitions); - if (!cfs_cpt_tab) { - CERROR("Failed to create ptable with npartitions %d\n", - cpu_npartitions); - goto failed; - } - } - - spin_lock(&cpt_data.cpt_lock); - if (cfs_cpt_tab->ctb_version != cpt_data.cpt_version) { - spin_unlock(&cpt_data.cpt_lock); - CERROR("CPU hotplug/unplug during setup\n"); - goto failed; - } - spin_unlock(&cpt_data.cpt_lock); - - LCONSOLE(0, "HW nodes: %d, HW CPU cores: %d, npartitions: %d\n", - num_online_nodes(), num_online_cpus(), - cfs_cpt_number(cfs_cpt_tab)); - return 0; - - failed: - cfs_cpu_fini(); - return ret; -} diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c b/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c deleted file mode 100644 index 223505c37545..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_lock.c +++ /dev/null @@ -1,155 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include - -/** destroy cpu-partition lock, see libcfs_private.h for more detail */ -void -cfs_percpt_lock_free(struct cfs_percpt_lock *pcl) -{ - LASSERT(pcl->pcl_locks); - LASSERT(!pcl->pcl_locked); - - cfs_percpt_free(pcl->pcl_locks); - kfree(pcl); -} -EXPORT_SYMBOL(cfs_percpt_lock_free); - -/** - * create cpu-partition lock, see libcfs_private.h for more detail. - * - * cpu-partition lock is designed for large-scale SMP system, so we need to - * reduce cacheline conflict as possible as we can, that's the - * reason we always allocate cacheline-aligned memory block. - */ -struct cfs_percpt_lock * -cfs_percpt_lock_create(struct cfs_cpt_table *cptab, - struct lock_class_key *keys) -{ - struct cfs_percpt_lock *pcl; - spinlock_t *lock; - int i; - - /* NB: cptab can be NULL, pcl will be for HW CPUs on that case */ - pcl = kzalloc(sizeof(*pcl), GFP_NOFS); - if (!pcl) - return NULL; - - pcl->pcl_cptab = cptab; - pcl->pcl_locks = cfs_percpt_alloc(cptab, sizeof(*lock)); - if (!pcl->pcl_locks) { - kfree(pcl); - return NULL; - } - - if (!keys) - CWARN("Cannot setup class key for percpt lock, you may see recursive locking warnings which are actually fake.\n"); - - cfs_percpt_for_each(lock, i, pcl->pcl_locks) { - spin_lock_init(lock); - if (keys) - lockdep_set_class(lock, &keys[i]); - } - - return pcl; -} -EXPORT_SYMBOL(cfs_percpt_lock_create); - -/** - * lock a CPU partition - * - * \a index != CFS_PERCPT_LOCK_EX - * hold private lock indexed by \a index - * - * \a index == CFS_PERCPT_LOCK_EX - * exclusively lock @pcl and nobody can take private lock - */ -void -cfs_percpt_lock(struct cfs_percpt_lock *pcl, int index) - __acquires(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - LASSERT(index >= CFS_PERCPT_LOCK_EX && index < ncpt); - - if (ncpt == 1) { - index = 0; - } else { /* serialize with exclusive lock */ - while (pcl->pcl_locked) - cpu_relax(); - } - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_lock(pcl->pcl_locks[index]); - return; - } - - /* exclusive lock request */ - for (i = 0; i < ncpt; i++) { - spin_lock(pcl->pcl_locks[i]); - if (!i) { - LASSERT(!pcl->pcl_locked); - /* nobody should take private lock after this - * so I wouldn't starve for too long time - */ - pcl->pcl_locked = 1; - } - } -} -EXPORT_SYMBOL(cfs_percpt_lock); - -/** unlock a CPU partition */ -void -cfs_percpt_unlock(struct cfs_percpt_lock *pcl, int index) - __releases(pcl->pcl_locks) -{ - int ncpt = cfs_cpt_number(pcl->pcl_cptab); - int i; - - index = ncpt == 1 ? 0 : index; - - if (likely(index != CFS_PERCPT_LOCK_EX)) { - spin_unlock(pcl->pcl_locks[index]); - return; - } - - for (i = ncpt - 1; i >= 0; i--) { - if (!i) { - LASSERT(pcl->pcl_locked); - pcl->pcl_locked = 0; - } - spin_unlock(pcl->pcl_locks[i]); - } -} -EXPORT_SYMBOL(cfs_percpt_unlock); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c b/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c deleted file mode 100644 index 2d533be9bb30..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_mem.c +++ /dev/null @@ -1,171 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include -#include - -struct cfs_var_array { - unsigned int va_count; /* # of buffers */ - unsigned int va_size; /* size of each var */ - struct cfs_cpt_table *va_cptab; /* cpu partition table */ - void *va_ptrs[0]; /* buffer addresses */ -}; - -/* - * free per-cpu data, see more detail in cfs_percpt_free - */ -void -cfs_percpt_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) - kfree(arr->va_ptrs[i]); - - kvfree(arr); -} -EXPORT_SYMBOL(cfs_percpt_free); - -/* - * allocate per cpu-partition variables, returned value is an array of pointers, - * variable can be indexed by CPU partition ID, i.e: - * - * arr = cfs_percpt_alloc(cfs_cpu_pt, size); - * then caller can access memory block for CPU 0 by arr[0], - * memory block for CPU 1 by arr[1]... - * memory block for CPU N by arr[N]... - * - * cacheline aligned. - */ -void * -cfs_percpt_alloc(struct cfs_cpt_table *cptab, unsigned int size) -{ - struct cfs_var_array *arr; - int count; - int i; - - count = cfs_cpt_number(cptab); - - arr = kvzalloc(offsetof(struct cfs_var_array, va_ptrs[count]), - GFP_KERNEL); - if (!arr) - return NULL; - - size = L1_CACHE_ALIGN(size); - arr->va_size = size; - arr->va_count = count; - arr->va_cptab = cptab; - - for (i = 0; i < count; i++) { - arr->va_ptrs[i] = kzalloc_node(size, GFP_KERNEL, - cfs_cpt_spread_node(cptab, i)); - if (!arr->va_ptrs[i]) { - cfs_percpt_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_percpt_alloc); - -/* - * return number of CPUs (or number of elements in per-cpu data) - * according to cptab of @vars - */ -int -cfs_percpt_number(void *vars) -{ - struct cfs_var_array *arr; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - return arr->va_count; -} -EXPORT_SYMBOL(cfs_percpt_number); - -/* - * free variable array, see more detail in cfs_array_alloc - */ -void -cfs_array_free(void *vars) -{ - struct cfs_var_array *arr; - int i; - - arr = container_of(vars, struct cfs_var_array, va_ptrs[0]); - - for (i = 0; i < arr->va_count; i++) { - if (!arr->va_ptrs[i]) - continue; - - kvfree(arr->va_ptrs[i]); - } - kvfree(arr); -} -EXPORT_SYMBOL(cfs_array_free); - -/* - * allocate a variable array, returned value is an array of pointers. - * Caller can specify length of array by @count, @size is size of each - * memory block in array. - */ -void * -cfs_array_alloc(int count, unsigned int size) -{ - struct cfs_var_array *arr; - int i; - - arr = kvmalloc(offsetof(struct cfs_var_array, va_ptrs[count]), GFP_KERNEL); - if (!arr) - return NULL; - - arr->va_count = count; - arr->va_size = size; - - for (i = 0; i < count; i++) { - arr->va_ptrs[i] = kvzalloc(size, GFP_KERNEL); - - if (!arr->va_ptrs[i]) { - cfs_array_free((void *)&arr->va_ptrs[0]); - return NULL; - } - } - - return (void *)&arr->va_ptrs[0]; -} -EXPORT_SYMBOL(cfs_array_alloc); diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c deleted file mode 100644 index e1fb1263e3ae..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c +++ /dev/null @@ -1,562 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * String manipulation functions. - * - * libcfs/libcfs/libcfs_string.c - * - * Author: Nathan Rutman - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Convert a text string to a bitmask */ -int cfs_str2mask(const char *str, const char *(*bit2str)(int bit), - int *oldmask, int minmask, int allmask) -{ - const char *debugstr; - char op = '\0'; - int newmask = minmask, i, len, found = 0; - - /* must be a list of tokens separated by whitespace - * and optionally an operator ('+' or '-'). If an operator - * appears first in , '*oldmask' is used as the starting point - * (relative), otherwise minmask is used (absolute). An operator - * applies to all following tokens up to the next operator. - */ - while (*str != '\0') { - while (isspace(*str)) - str++; - if (*str == '\0') - break; - if (*str == '+' || *str == '-') { - op = *str++; - if (!found) - /* only if first token is relative */ - newmask = *oldmask; - while (isspace(*str)) - str++; - if (*str == '\0') /* trailing op */ - return -EINVAL; - } - - /* find token length */ - len = 0; - while (str[len] != '\0' && !isspace(str[len]) && - str[len] != '+' && str[len] != '-') - len++; - - /* match token */ - found = 0; - for (i = 0; i < 32; i++) { - debugstr = bit2str(i); - if (debugstr && strlen(debugstr) == len && - !strncasecmp(str, debugstr, len)) { - if (op == '-') - newmask &= ~(1 << i); - else - newmask |= (1 << i); - found = 1; - break; - } - } - if (!found && len == 3 && - !strncasecmp(str, "ALL", len)) { - if (op == '-') - newmask = minmask; - else - newmask = allmask; - found = 1; - } - if (!found) { - CWARN("unknown mask '%.*s'.\n" - "mask usage: [+|-] ...\n", len, str); - return -EINVAL; - } - str += len; - } - - *oldmask = newmask; - return 0; -} - -/* get the first string out of @str */ -char *cfs_firststr(char *str, size_t size) -{ - size_t i = 0; - char *end; - - /* trim leading spaces */ - while (i < size && *str && isspace(*str)) { - ++i; - ++str; - } - - /* string with all spaces */ - if (*str == '\0') - goto out; - - end = str; - while (i < size && *end != '\0' && !isspace(*end)) { - ++i; - ++end; - } - - *end = '\0'; -out: - return str; -} -EXPORT_SYMBOL(cfs_firststr); - -/** - * Extracts tokens from strings. - * - * Looks for \a delim in string \a next, sets \a res to point to - * substring before the delimiter, sets \a next right after the found - * delimiter. - * - * \retval 1 if \a res points to a string of non-whitespace characters - * \retval 0 otherwise - */ -int -cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res) -{ - char *end; - - if (!next->ls_str) - return 0; - - /* skip leading white spaces */ - while (next->ls_len) { - if (!isspace(*next->ls_str)) - break; - next->ls_str++; - next->ls_len--; - } - - if (!next->ls_len) /* whitespaces only */ - return 0; - - if (*next->ls_str == delim) { - /* first non-writespace is the delimiter */ - return 0; - } - - res->ls_str = next->ls_str; - end = memchr(next->ls_str, delim, next->ls_len); - if (!end) { - /* there is no the delimeter in the string */ - end = next->ls_str + next->ls_len; - next->ls_str = NULL; - } else { - next->ls_str = end + 1; - next->ls_len -= (end - res->ls_str + 1); - } - - /* skip ending whitespaces */ - while (--end != res->ls_str) { - if (!isspace(*end)) - break; - } - - res->ls_len = end - res->ls_str + 1; - return 1; -} -EXPORT_SYMBOL(cfs_gettok); - -/** - * Converts string to integer. - * - * Accepts decimal and hexadecimal number recordings. - * - * \retval 1 if first \a nob chars of \a str convert to decimal or - * hexadecimal integer in the range [\a min, \a max] - * \retval 0 otherwise - */ -int -cfs_str2num_check(char *str, int nob, unsigned int *num, - unsigned int min, unsigned int max) -{ - bool all_numbers = true; - char *endp, cache; - int rc; - - /** - * kstrouint can only handle strings composed - * of only numbers. We need to scan the string - * passed in for the first non-digit character - * and end the string at that location. If we - * don't find any non-digit character we still - * need to place a '\0' at position nob since - * we are not interested in the rest of the - * string which is longer than nob in size. - * After we are done the character at the - * position we placed '\0' must be restored. - */ - for (endp = str; endp < str + nob; endp++) { - if (!isdigit(*endp)) { - all_numbers = false; - break; - } - } - cache = *endp; - *endp = '\0'; - - rc = kstrtouint(str, 10, num); - *endp = cache; - if (rc || !all_numbers) - return 0; - - return (*num >= min && *num <= max); -} -EXPORT_SYMBOL(cfs_str2num_check); - -/** - * Parses \ token of the syntax. If \a bracketed is false, - * \a src should only have a single token which can be \ or \* - * - * \retval pointer to allocated range_expr and initialized - * range_expr::re_lo, range_expr::re_hi and range_expr:re_stride if \a - `* src parses to - * \ | - * \ '-' \ | - * \ '-' \ '/' \ - * \retval 0 will be returned if it can be parsed, otherwise -EINVAL or - * -ENOMEM will be returned. - */ -static int -cfs_range_expr_parse(struct cfs_lstr *src, unsigned int min, unsigned int max, - int bracketed, struct cfs_range_expr **expr) -{ - struct cfs_range_expr *re; - struct cfs_lstr tok; - - re = kzalloc(sizeof(*re), GFP_NOFS); - if (!re) - return -ENOMEM; - - if (src->ls_len == 1 && src->ls_str[0] == '*') { - re->re_lo = min; - re->re_hi = max; - re->re_stride = 1; - goto out; - } - - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_lo, min, max)) { - /* is parsed */ - re->re_hi = re->re_lo; - re->re_stride = 1; - goto out; - } - - if (!bracketed || !cfs_gettok(src, '-', &tok)) - goto failed; - - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_lo, min, max)) - goto failed; - - /* - */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_hi, min, max)) { - /* - is parsed */ - re->re_stride = 1; - goto out; - } - - /* go to check '-' '/' */ - if (cfs_gettok(src, '/', &tok)) { - if (!cfs_str2num_check(tok.ls_str, tok.ls_len, - &re->re_hi, min, max)) - goto failed; - - /* - / ... */ - if (cfs_str2num_check(src->ls_str, src->ls_len, - &re->re_stride, min, max)) { - /* - / is parsed */ - goto out; - } - } - - out: - *expr = re; - return 0; - - failed: - kfree(re); - return -EINVAL; -} - -/** - * Print the range expression \a re into specified \a buffer. - * If \a bracketed is true, expression does not need additional - * brackets. - * - * \retval number of characters written - */ -static int -cfs_range_expr_print(char *buffer, int count, struct cfs_range_expr *expr, - bool bracketed) -{ - int i; - char s[] = "["; - char e[] = "]"; - - if (bracketed) { - s[0] = '\0'; - e[0] = '\0'; - } - - if (expr->re_lo == expr->re_hi) - i = scnprintf(buffer, count, "%u", expr->re_lo); - else if (expr->re_stride == 1) - i = scnprintf(buffer, count, "%s%u-%u%s", - s, expr->re_lo, expr->re_hi, e); - else - i = scnprintf(buffer, count, "%s%u-%u/%u%s", - s, expr->re_lo, expr->re_hi, expr->re_stride, e); - return i; -} - -/** - * Print a list of range expressions (\a expr_list) into specified \a buffer. - * If the list contains several expressions, separate them with comma - * and surround the list with brackets. - * - * \retval number of characters written - */ -int -cfs_expr_list_print(char *buffer, int count, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - int i = 0, j = 0; - int numexprs = 0; - - if (count <= 0) - return 0; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) - numexprs++; - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "["); - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (j++) - i += scnprintf(buffer + i, count - i, ","); - i += cfs_range_expr_print(buffer + i, count - i, expr, - numexprs > 1); - } - - if (numexprs > 1) - i += scnprintf(buffer + i, count - i, "]"); - - return i; -} -EXPORT_SYMBOL(cfs_expr_list_print); - -/** - * Matches value (\a value) against ranges expression list \a expr_list. - * - * \retval 1 if \a value matches - * \retval 0 otherwise - */ -int -cfs_expr_list_match(u32 value, struct cfs_expr_list *expr_list) -{ - struct cfs_range_expr *expr; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - if (value >= expr->re_lo && value <= expr->re_hi && - !((value - expr->re_lo) % expr->re_stride)) - return 1; - } - - return 0; -} -EXPORT_SYMBOL(cfs_expr_list_match); - -/** - * Convert express list (\a expr_list) to an array of all matched values - * - * \retval N N is total number of all matched values - * \retval 0 if expression list is empty - * \retval < 0 for failure - */ -int -cfs_expr_list_values(struct cfs_expr_list *expr_list, int max, u32 **valpp) -{ - struct cfs_range_expr *expr; - u32 *val; - int count = 0; - int i; - - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (!((i - expr->re_lo) % expr->re_stride)) - count++; - } - } - - if (!count) /* empty expression list */ - return 0; - - if (count > max) { - CERROR("Number of values %d exceeds max allowed %d\n", - max, count); - return -EINVAL; - } - - val = kvmalloc_array(count, sizeof(val[0]), GFP_KERNEL | __GFP_ZERO); - if (!val) - return -ENOMEM; - - count = 0; - list_for_each_entry(expr, &expr_list->el_exprs, re_link) { - for (i = expr->re_lo; i <= expr->re_hi; i++) { - if (!((i - expr->re_lo) % expr->re_stride)) - val[count++] = i; - } - } - - *valpp = val; - return count; -} -EXPORT_SYMBOL(cfs_expr_list_values); - -/** - * Frees cfs_range_expr structures of \a expr_list. - * - * \retval none - */ -void -cfs_expr_list_free(struct cfs_expr_list *expr_list) -{ - while (!list_empty(&expr_list->el_exprs)) { - struct cfs_range_expr *expr; - - expr = list_entry(expr_list->el_exprs.next, - struct cfs_range_expr, re_link); - list_del(&expr->re_link); - kfree(expr); - } - - kfree(expr_list); -} -EXPORT_SYMBOL(cfs_expr_list_free); - -/** - * Parses \ token of the syntax. - * - * \retval 0 if \a str parses to \ | \ - * \retval -errno otherwise - */ -int -cfs_expr_list_parse(char *str, int len, unsigned int min, unsigned int max, - struct cfs_expr_list **elpp) -{ - struct cfs_expr_list *expr_list; - struct cfs_range_expr *expr; - struct cfs_lstr src; - int rc; - - expr_list = kzalloc(sizeof(*expr_list), GFP_NOFS); - if (!expr_list) - return -ENOMEM; - - src.ls_str = str; - src.ls_len = len; - - INIT_LIST_HEAD(&expr_list->el_exprs); - - if (src.ls_str[0] == '[' && - src.ls_str[src.ls_len - 1] == ']') { - src.ls_str++; - src.ls_len -= 2; - - rc = -EINVAL; - while (src.ls_str) { - struct cfs_lstr tok; - - if (!cfs_gettok(&src, ',', &tok)) { - rc = -EINVAL; - break; - } - - rc = cfs_range_expr_parse(&tok, min, max, 1, &expr); - if (rc) - break; - - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - } else { - rc = cfs_range_expr_parse(&src, min, max, 0, &expr); - if (!rc) - list_add_tail(&expr->re_link, &expr_list->el_exprs); - } - - if (rc) - cfs_expr_list_free(expr_list); - else - *elpp = expr_list; - - return rc; -} -EXPORT_SYMBOL(cfs_expr_list_parse); - -/** - * Frees cfs_expr_list structures of \a list. - * - * For each struct cfs_expr_list structure found on \a list it frees - * range_expr list attached to it and frees the cfs_expr_list itself. - * - * \retval none - */ -void -cfs_expr_list_free_list(struct list_head *list) -{ - struct cfs_expr_list *el; - - while (!list_empty(list)) { - el = list_entry(list->next, struct cfs_expr_list, el_link); - list_del(&el->el_link); - cfs_expr_list_free(el); - } -} -EXPORT_SYMBOL(cfs_expr_list_free_list); diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c b/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c deleted file mode 100644 index db81ed527452..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux-crypto-adler.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - */ - -/* - * This is crypto api shash wrappers to zlib_adler32. - */ - -#include -#include -#include -#include "linux-crypto.h" - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static int adler32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = 1; - - return 0; -} - -static int adler32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - *mctx = *(u32 *)key; - return 0; -} - -static int adler32_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *cksump = shash_desc_ctx(desc); - - *cksump = *mctx; - - return 0; -} - -static int adler32_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *cksump = shash_desc_ctx(desc); - - *cksump = zlib_adler32(*cksump, data, len); - return 0; -} - -static int __adler32_finup(u32 *cksump, const u8 *data, unsigned int len, - u8 *out) -{ - *(u32 *)out = zlib_adler32(*cksump, data, len); - return 0; -} - -static int adler32_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(shash_desc_ctx(desc), data, len, out); -} - -static int adler32_final(struct shash_desc *desc, u8 *out) -{ - u32 *cksump = shash_desc_ctx(desc); - - *(u32 *)out = *cksump; - return 0; -} - -static int adler32_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __adler32_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg = { - .setkey = adler32_setkey, - .init = adler32_init, - .update = adler32_update, - .final = adler32_final, - .finup = adler32_finup, - .digest = adler32_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "adler32", - .cra_driver_name = "adler32-zlib", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = adler32_cra_init, - } -}; - -int cfs_crypto_adler32_register(void) -{ - return crypto_register_shash(&alg); -} - -void cfs_crypto_adler32_unregister(void) -{ - crypto_unregister_shash(&alg); -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux-crypto.c deleted file mode 100644 index 21ff9bf6da47..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.c +++ /dev/null @@ -1,447 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Copyright (c) 2012, Intel Corporation. - */ - -#include -#include -#include -#include -#include -#include -#include "linux-crypto.h" - -/** - * Array of hash algorithm speed in MByte per second - */ -static int cfs_crypto_hash_speeds[CFS_HASH_ALG_MAX]; - -/** - * Initialize the state descriptor for the specified hash algorithm. - * - * An internal routine to allocate the hash-specific state in \a req for - * use with cfs_crypto_hash_digest() to compute the hash of a single message, - * though possibly in multiple chunks. The descriptor internal state should - * be freed with cfs_crypto_hash_final(). - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * \param[out] type pointer to the hash description in hash_types[] - * array - * \param[in,out] req hash state descriptor to be initialized - * \param[in] key initial hash value/state, NULL to use default - * value - * \param[in] key_len length of \a key - * - * \retval 0 on success - * \retval negative errno on failure - */ -static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg, - const struct cfs_crypto_hash_type **type, - struct ahash_request **req, - unsigned char *key, - unsigned int key_len) -{ - struct crypto_ahash *tfm; - int err = 0; - - *type = cfs_crypto_hash_type(hash_alg); - - if (!*type) { - CWARN("Unsupported hash algorithm id = %d, max id is %d\n", - hash_alg, CFS_HASH_ALG_MAX); - return -EINVAL; - } - tfm = crypto_alloc_ahash((*type)->cht_name, 0, CRYPTO_ALG_ASYNC); - - if (IS_ERR(tfm)) { - CDEBUG(D_INFO, "Failed to alloc crypto hash %s\n", - (*type)->cht_name); - return PTR_ERR(tfm); - } - - *req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!*req) { - CDEBUG(D_INFO, "Failed to alloc ahash_request for %s\n", - (*type)->cht_name); - crypto_free_ahash(tfm); - return -ENOMEM; - } - - ahash_request_set_callback(*req, 0, NULL, NULL); - - if (key) - err = crypto_ahash_setkey(tfm, key, key_len); - else if ((*type)->cht_key) - err = crypto_ahash_setkey(tfm, - (unsigned char *)&((*type)->cht_key), - (*type)->cht_size); - - if (err) { - ahash_request_free(*req); - crypto_free_ahash(tfm); - return err; - } - - CDEBUG(D_INFO, "Using crypto hash: %s (%s) speed %d MB/s\n", - crypto_ahash_alg_name(tfm), crypto_ahash_driver_name(tfm), - cfs_crypto_hash_speeds[hash_alg]); - - err = crypto_ahash_init(*req); - if (err) { - ahash_request_free(*req); - crypto_free_ahash(tfm); - } - return err; -} - -/** - * Calculate hash digest for the passed buffer. - * - * This should be used when computing the hash on a single contiguous buffer. - * It combines the hash initialization, computation, and cleanup. - * - * \param[in] hash_alg id of hash algorithm (CFS_HASH_ALG_*) - * \param[in] buf data buffer on which to compute hash - * \param[in] buf_len length of \a buf in bytes - * \param[in] key initial value/state for algorithm, - * if \a key = NULL use default initial value - * \param[in] key_len length of \a key in bytes - * \param[out] hash pointer to computed hash value, - * if \a hash = NULL then \a hash_len is to digest - * size in bytes, retval -ENOSPC - * \param[in,out] hash_len size of \a hash buffer - * - * \retval -EINVAL \a buf, \a buf_len, \a hash_len, - * \a hash_alg invalid - * \retval -ENOENT \a hash_alg is unsupported - * \retval -ENOSPC \a hash is NULL, or \a hash_len less than - * digest size - * \retval 0 for success - * \retval negative errno for other errors from lower - * layers. - */ -int cfs_crypto_hash_digest(enum cfs_crypto_hash_alg hash_alg, - const void *buf, unsigned int buf_len, - unsigned char *key, unsigned int key_len, - unsigned char *hash, unsigned int *hash_len) -{ - struct scatterlist sl; - struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; - - if (!buf || !buf_len || !hash_len) - return -EINVAL; - - err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); - if (err) - return err; - - if (!hash || *hash_len < type->cht_size) { - *hash_len = type->cht_size; - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - return -ENOSPC; - } - sg_init_one(&sl, buf, buf_len); - - ahash_request_set_crypt(req, &sl, hash, sl.length); - err = crypto_ahash_digest(req); - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_digest); - -/** - * Allocate and initialize descriptor for hash algorithm. - * - * This should be used to initialize a hash descriptor for multiple calls - * to a single hash function when computing the hash across multiple - * separate buffers or pages using cfs_crypto_hash_update{,_page}(). - * - * The hash descriptor should be freed with cfs_crypto_hash_final(). - * - * \param[in] hash_alg algorithm id (CFS_HASH_ALG_*) - * \param[in] key initial value/state for algorithm, if \a key = NULL - * use default initial value - * \param[in] key_len length of \a key in bytes - * - * \retval pointer to descriptor of hash instance - * \retval ERR_PTR(errno) in case of error - */ -struct ahash_request * -cfs_crypto_hash_init(enum cfs_crypto_hash_alg hash_alg, - unsigned char *key, unsigned int key_len) -{ - struct ahash_request *req; - int err; - const struct cfs_crypto_hash_type *type; - - err = cfs_crypto_hash_alloc(hash_alg, &type, &req, key, key_len); - - if (err) - return ERR_PTR(err); - return req; -} -EXPORT_SYMBOL(cfs_crypto_hash_init); - -/** - * Update hash digest computed on data within the given \a page - * - * \param[in] hreq hash state descriptor - * \param[in] page data page on which to compute the hash - * \param[in] offset offset within \a page at which to start hash - * \param[in] len length of data on which to compute hash - * - * \retval 0 for success - * \retval negative errno on failure - */ -int cfs_crypto_hash_update_page(struct ahash_request *req, - struct page *page, unsigned int offset, - unsigned int len) -{ - struct scatterlist sl; - - sg_init_table(&sl, 1); - sg_set_page(&sl, page, len, offset & ~PAGE_MASK); - - ahash_request_set_crypt(req, &sl, NULL, sl.length); - return crypto_ahash_update(req); -} -EXPORT_SYMBOL(cfs_crypto_hash_update_page); - -/** - * Update hash digest computed on the specified data - * - * \param[in] req hash state descriptor - * \param[in] buf data buffer on which to compute the hash - * \param[in] buf_len length of \buf on which to compute hash - * - * \retval 0 for success - * \retval negative errno on failure - */ -int cfs_crypto_hash_update(struct ahash_request *req, - const void *buf, unsigned int buf_len) -{ - struct scatterlist sl; - - sg_init_one(&sl, buf, buf_len); - - ahash_request_set_crypt(req, &sl, NULL, sl.length); - return crypto_ahash_update(req); -} -EXPORT_SYMBOL(cfs_crypto_hash_update); - -/** - * Finish hash calculation, copy hash digest to buffer, clean up hash descriptor - * - * \param[in] req hash descriptor - * \param[out] hash pointer to hash buffer to store hash digest - * \param[in,out] hash_len pointer to hash buffer size, if \a req = NULL - * only free \a req instead of computing the hash - * - * \retval 0 for success - * \retval -EOVERFLOW if hash_len is too small for the hash digest - * \retval negative errno for other errors from lower layers - */ -int cfs_crypto_hash_final(struct ahash_request *req, - unsigned char *hash, unsigned int *hash_len) -{ - int err; - int size = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); - - if (!hash || !hash_len) { - err = 0; - goto free_ahash; - } - if (*hash_len < size) { - err = -EOVERFLOW; - goto free_ahash; - } - - ahash_request_set_crypt(req, NULL, hash, 0); - err = crypto_ahash_final(req); - if (!err) - *hash_len = size; -free_ahash: - crypto_free_ahash(crypto_ahash_reqtfm(req)); - ahash_request_free(req); - return err; -} -EXPORT_SYMBOL(cfs_crypto_hash_final); - -/** - * Compute the speed of specified hash function - * - * Run a speed test on the given hash algorithm on buffer of the given size. - * The speed is stored internally in the cfs_crypto_hash_speeds[] array, and - * is available through the cfs_crypto_hash_speed() function. - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * \param[in] buf data buffer on which to compute the hash - * \param[in] buf_len length of \buf on which to compute hash - */ -static void cfs_crypto_performance_test(enum cfs_crypto_hash_alg hash_alg) -{ - int buf_len = max(PAGE_SIZE, 1048576UL); - void *buf; - unsigned long start, end; - int bcount, err = 0; - struct page *page; - unsigned char hash[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; - unsigned int hash_len = sizeof(hash); - - page = alloc_page(GFP_KERNEL); - if (!page) { - err = -ENOMEM; - goto out_err; - } - - buf = kmap(page); - memset(buf, 0xAD, PAGE_SIZE); - kunmap(page); - - for (start = jiffies, end = start + msecs_to_jiffies(MSEC_PER_SEC), - bcount = 0; time_before(jiffies, end); bcount++) { - struct ahash_request *hdesc; - int i; - - hdesc = cfs_crypto_hash_init(hash_alg, NULL, 0); - if (IS_ERR(hdesc)) { - err = PTR_ERR(hdesc); - break; - } - - for (i = 0; i < buf_len / PAGE_SIZE; i++) { - err = cfs_crypto_hash_update_page(hdesc, page, 0, - PAGE_SIZE); - if (err) - break; - } - - err = cfs_crypto_hash_final(hdesc, hash, &hash_len); - if (err) - break; - } - end = jiffies; - __free_page(page); -out_err: - if (err) { - cfs_crypto_hash_speeds[hash_alg] = err; - CDEBUG(D_INFO, "Crypto hash algorithm %s test error: rc = %d\n", - cfs_crypto_hash_name(hash_alg), err); - } else { - unsigned long tmp; - - tmp = ((bcount * buf_len / jiffies_to_msecs(end - start)) * - 1000) / (1024 * 1024); - cfs_crypto_hash_speeds[hash_alg] = (int)tmp; - CDEBUG(D_CONFIG, "Crypto hash algorithm %s speed = %d MB/s\n", - cfs_crypto_hash_name(hash_alg), - cfs_crypto_hash_speeds[hash_alg]); - } -} - -/** - * hash speed in Mbytes per second for valid hash algorithm - * - * Return the performance of the specified \a hash_alg that was previously - * computed using cfs_crypto_performance_test(). - * - * \param[in] hash_alg hash algorithm id (CFS_HASH_ALG_*) - * - * \retval positive speed of the hash function in MB/s - * \retval -ENOENT if \a hash_alg is unsupported - * \retval negative errno if \a hash_alg speed is unavailable - */ -int cfs_crypto_hash_speed(enum cfs_crypto_hash_alg hash_alg) -{ - if (hash_alg < CFS_HASH_ALG_MAX) - return cfs_crypto_hash_speeds[hash_alg]; - return -ENOENT; -} -EXPORT_SYMBOL(cfs_crypto_hash_speed); - -/** - * Run the performance test for all hash algorithms. - * - * Run the cfs_crypto_performance_test() benchmark for all of the available - * hash functions using a 1MB buffer size. This is a reasonable buffer size - * for Lustre RPCs, even if the actual RPC size is larger or smaller. - * - * Since the setup cost and computation speed of various hash algorithms is - * a function of the buffer size (and possibly internal contention of offload - * engines), this speed only represents an estimate of the actual speed under - * actual usage, but is reasonable for comparing available algorithms. - * - * The actual speeds are available via cfs_crypto_hash_speed() for later - * comparison. - * - * \retval 0 on success - * \retval -ENOMEM if no memory is available for test buffer - */ -static int cfs_crypto_test_hashes(void) -{ - enum cfs_crypto_hash_alg hash_alg; - - for (hash_alg = 0; hash_alg < CFS_HASH_ALG_MAX; hash_alg++) - cfs_crypto_performance_test(hash_alg); - - return 0; -} - -static int adler32; - -/** - * Register available hash functions - * - * \retval 0 - */ -int cfs_crypto_register(void) -{ - request_module("crc32c"); - - if (cfs_crypto_adler32_register() == 0) - adler32 = 1; - - /* check all algorithms and do performance test */ - cfs_crypto_test_hashes(); - return 0; -} - -/** - * Unregister previously registered hash functions - */ -void cfs_crypto_unregister(void) -{ - if (adler32) - cfs_crypto_adler32_unregister(); - adler32 = 0; -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux-crypto.h b/drivers/staging/lustre/lnet/libcfs/linux-crypto.h deleted file mode 100644 index 5616e9ea1450..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux-crypto.h +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/** - * Functions for start/stop shash adler32 algorithm. - */ -int cfs_crypto_adler32_register(void); -void cfs_crypto_adler32_unregister(void); diff --git a/drivers/staging/lustre/lnet/libcfs/linux-debug.c b/drivers/staging/lustre/lnet/libcfs/linux-debug.c deleted file mode 100644 index 15ab849374c2..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux-debug.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/linux/linux-debug.c - * - * Author: Phil Schwan - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -# define DEBUG_SUBSYSTEM S_LNET - -#include "tracefile.h" - -#include - -char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; - -/** - * Upcall function once a Lustre log has been dumped. - * - * \param file path of the dumped log - */ -void libcfs_run_debug_log_upcall(char *file) -{ - char *argv[3]; - int rc; - static const char * const envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL - }; - - argv[0] = lnet_debug_log_upcall; - - LASSERTF(file, "called on a null filename\n"); - argv[1] = file; /* only need to pass the path of the file */ - - argv[2] = NULL; - - rc = call_usermodehelper(argv[0], argv, (char **)envp, 1); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", - rc, argv[0], argv[1]); - } else { - CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", - argv[0], argv[1]); - } -} - -/* coverity[+kill] */ -void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(msgdata, "LBUG\n"); - - if (in_interrupt()) { - panic("LBUG in interrupt.\n"); - /* not reached */ - } - - dump_stack(); - if (!libcfs_panic_on_lbug) - libcfs_debug_dumplog(); - if (libcfs_panic_on_lbug) - panic("LBUG"); - set_current_state(TASK_UNINTERRUPTIBLE); - while (1) - schedule(); -} -EXPORT_SYMBOL(lbug_with_loc); - -static int panic_notifier(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (libcfs_panic_in_progress) - return 0; - - libcfs_panic_in_progress = 1; - mb(); - - return 0; -} - -static struct notifier_block libcfs_panic_notifier = { - .notifier_call = panic_notifier, - .next = NULL, - .priority = 10000, -}; - -void libcfs_register_panic_notifier(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &libcfs_panic_notifier); -} - -void libcfs_unregister_panic_notifier(void) -{ - atomic_notifier_chain_unregister(&panic_notifier_list, - &libcfs_panic_notifier); -} diff --git a/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c b/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c deleted file mode 100644 index 347138409eba..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/linux-tracefile.c +++ /dev/null @@ -1,258 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include -#include -#include "tracefile.h" - -/* percents to share the total debug memory for each type */ -static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = { - 80, /* 80% pages for CFS_TCD_TYPE_PROC */ - 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */ - 10 /* 10% pages for CFS_TCD_TYPE_IRQ */ -}; - -char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; - -static DECLARE_RWSEM(cfs_tracefile_sem); - -int cfs_tracefile_init_arch(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - - /* initialize trace_data */ - memset(cfs_trace_data, 0, sizeof(cfs_trace_data)); - for (i = 0; i < CFS_TCD_TYPE_MAX; i++) { - cfs_trace_data[i] = - kmalloc_array(num_possible_cpus(), - sizeof(union cfs_trace_data_union), - GFP_KERNEL); - if (!cfs_trace_data[i]) - goto out; - } - - /* arch related info initialized */ - cfs_tcd_for_each(tcd, i, j) { - spin_lock_init(&tcd->tcd_lock); - tcd->tcd_pages_factor = pages_factor[i]; - tcd->tcd_type = i; - tcd->tcd_cpu = j; - } - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - cfs_trace_console_buffers[i][j] = - kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE, - GFP_KERNEL); - - if (!cfs_trace_console_buffers[i][j]) - goto out; - } - - return 0; - -out: - cfs_tracefile_fini_arch(); - pr_err("lnet: Not enough memory\n"); - return -ENOMEM; -} - -void cfs_tracefile_fini_arch(void) -{ - int i; - int j; - - for (i = 0; i < num_possible_cpus(); i++) - for (j = 0; j < 3; j++) { - kfree(cfs_trace_console_buffers[i][j]); - cfs_trace_console_buffers[i][j] = NULL; - } - - for (i = 0; cfs_trace_data[i]; i++) { - kfree(cfs_trace_data[i]); - cfs_trace_data[i] = NULL; - } -} - -void cfs_tracefile_read_lock(void) -{ - down_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_read_unlock(void) -{ - up_read(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_lock(void) -{ - down_write(&cfs_tracefile_sem); -} - -void cfs_tracefile_write_unlock(void) -{ - up_write(&cfs_tracefile_sem); -} - -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void) -{ - if (in_irq()) - return CFS_TCD_TYPE_IRQ; - if (in_softirq()) - return CFS_TCD_TYPE_SOFTIRQ; - return CFS_TCD_TYPE_PROC; -} - -/* - * The walking argument indicates the locking comes from all tcd types - * iterator and we must lock it and dissable local irqs to avoid deadlocks - * with other interrupt locks that might be happening. See LU-1311 - * for details. - */ -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __acquires(&tcd->tc_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_lock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_lock_irq(&tcd->tcd_lock); - else - spin_lock(&tcd->tcd_lock); - return 1; -} - -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) - __releases(&tcd->tcd_lock) -{ - __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); - if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) - spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); - else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) - spin_unlock_bh(&tcd->tcd_lock); - else if (unlikely(walking)) - spin_unlock_irq(&tcd->tcd_lock); - else - spin_unlock(&tcd->tcd_lock); -} - -void -cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *msgdata, - unsigned long stack) -{ - struct timespec64 ts; - - ktime_get_real_ts64(&ts); - - header->ph_subsys = msgdata->msg_subsys; - header->ph_mask = msgdata->msg_mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_type = cfs_trace_buf_idx_get(); - /* y2038 safe since all user space treats this as unsigned, but - * will overflow in 2106 - */ - header->ph_sec = (u32)ts.tv_sec; - header->ph_usec = ts.tv_nsec / NSEC_PER_USEC; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = msgdata->msg_line; - header->ph_extern_pid = 0; -} - -static char * -dbghdr_to_err_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNetError"; - default: - return "LustreError"; - } -} - -static char * -dbghdr_to_info_string(struct ptldebug_header *hdr) -{ - switch (hdr->ph_subsys) { - case S_LND: - case S_LNET: - return "LNet"; - default: - return "Lustre"; - } -} - -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn) -{ - char *prefix = "Lustre", *ptype = NULL; - - if (mask & D_EMERG) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_EMERG; - } else if (mask & D_ERROR) { - prefix = dbghdr_to_err_string(hdr); - ptype = KERN_ERR; - } else if (mask & D_WARNING) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_WARNING; - } else if (mask & (D_CONSOLE | libcfs_printk)) { - prefix = dbghdr_to_info_string(hdr); - ptype = KERN_INFO; - } - - if (mask & D_CONSOLE) { - pr_info("%s%s: %.*s", ptype, prefix, len, buf); - } else { - pr_info("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, - hdr->ph_pid, hdr->ph_extern_pid, file, - hdr->ph_line_num, fn, len, buf); - } -} - -int cfs_trace_max_debug_mb(void) -{ - int total_mb = (totalram_pages >> (20 - PAGE_SHIFT)); - - return max(512, (total_mb * 80) / 100); -} diff --git a/drivers/staging/lustre/lnet/libcfs/module.c b/drivers/staging/lustre/lnet/libcfs/module.c deleted file mode 100644 index 5dc7de9e6478..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/module.c +++ /dev/null @@ -1,758 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -# define DEBUG_SUBSYSTEM S_LNET - -#include - -#include -#include -#include -#include "tracefile.h" - -struct lnet_debugfs_symlink_def { - char *name; - char *target; -}; - -static struct dentry *lnet_debugfs_root; - -BLOCKING_NOTIFIER_HEAD(libcfs_ioctl_list); -EXPORT_SYMBOL(libcfs_ioctl_list); - -static inline size_t libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) -{ - size_t len = sizeof(*data); - - len += cfs_size_round(data->ioc_inllen1); - len += cfs_size_round(data->ioc_inllen2); - return len; -} - -static inline bool libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) -{ - if (data->ioc_hdr.ioc_len > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_len larger than 1<<30\n"); - return true; - } - if (data->ioc_inllen1 > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); - return true; - } - if (data->ioc_inllen2 > BIT(30)) { - CERROR("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); - return true; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); - return true; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); - return true; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); - return true; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); - return true; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return true; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return true; - } - if ((u32)libcfs_ioctl_packlen(data) != data->ioc_hdr.ioc_len) { - CERROR("LIBCFS ioctl: packlen != ioc_len\n"); - return true; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); - return true; - } - if (data->ioc_inllen2 && - data->ioc_bulk[cfs_size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); - return true; - } - return false; -} - -static int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data) -{ - if (libcfs_ioctl_is_invalid(data)) { - CERROR("libcfs ioctl: parameter not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - cfs_size_round(data->ioc_inllen1); - - return 0; -} - -static int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp, - const struct libcfs_ioctl_hdr __user *uhdr) -{ - struct libcfs_ioctl_hdr hdr; - int err; - - if (copy_from_user(&hdr, uhdr, sizeof(hdr))) - return -EFAULT; - - if (hdr.ioc_version != LIBCFS_IOCTL_VERSION && - hdr.ioc_version != LIBCFS_IOCTL_VERSION2) { - CERROR("libcfs ioctl: version mismatch expected %#x, got %#x\n", - LIBCFS_IOCTL_VERSION, hdr.ioc_version); - return -EINVAL; - } - - if (hdr.ioc_len < sizeof(hdr)) { - CERROR("libcfs ioctl: user buffer too small for ioctl\n"); - return -EINVAL; - } - - if (hdr.ioc_len > LIBCFS_IOC_DATA_MAX) { - CERROR("libcfs ioctl: user buffer is too large %d/%d\n", - hdr.ioc_len, LIBCFS_IOC_DATA_MAX); - return -EINVAL; - } - - *hdr_pp = kvmalloc(hdr.ioc_len, GFP_KERNEL); - if (!*hdr_pp) - return -ENOMEM; - - if (copy_from_user(*hdr_pp, uhdr, hdr.ioc_len)) { - err = -EFAULT; - goto free; - } - - if ((*hdr_pp)->ioc_version != hdr.ioc_version || - (*hdr_pp)->ioc_len != hdr.ioc_len) { - err = -EINVAL; - goto free; - } - - return 0; - -free: - kvfree(*hdr_pp); - return err; -} - -static int libcfs_ioctl(unsigned long cmd, void __user *uparam) -{ - struct libcfs_ioctl_data *data = NULL; - struct libcfs_ioctl_hdr *hdr; - int err; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - err = libcfs_ioctl_getdata(&hdr, uparam); - if (err) { - CDEBUG_LIMIT(D_ERROR, - "libcfs ioctl: data header error %d\n", err); - return err; - } - - if (hdr->ioc_version == LIBCFS_IOCTL_VERSION) { - /* - * The libcfs_ioctl_data_adjust() function performs adjustment - * operations on the libcfs_ioctl_data structure to make - * it usable by the code. This doesn't need to be called - * for new data structures added. - */ - data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); - err = libcfs_ioctl_data_adjust(data); - if (err) - goto out; - } - - CDEBUG(D_IOCTL, "libcfs ioctl cmd %lu\n", cmd); - switch (cmd) { - case IOC_LIBCFS_CLEAR_DEBUG: - libcfs_debug_clear_buffer(); - break; - - case IOC_LIBCFS_MARK_DEBUG: - if (!data || !data->ioc_inlbuf1 || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') { - err = -EINVAL; - goto out; - } - libcfs_debug_mark_buffer(data->ioc_inlbuf1); - break; - - default: - err = blocking_notifier_call_chain(&libcfs_ioctl_list, - cmd, hdr); - if (!(err & NOTIFY_STOP_MASK)) - /* No-one claimed the ioctl */ - err = -EINVAL; - else - err = notifier_to_errno(err); - if (!err) - if (copy_to_user(uparam, hdr, hdr->ioc_len)) - err = -EFAULT; - break; - } -out: - kvfree(hdr); - return err; -} - -static long -libcfs_psdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (_IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - return -EINVAL; - } - - return libcfs_ioctl(cmd, (void __user *)arg); -} - -static const struct file_operations libcfs_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = libcfs_psdev_ioctl, -}; - -static struct miscdevice libcfs_dev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "lnet", - .fops = &libcfs_fops, -}; - -static int libcfs_dev_registered; - -int lprocfs_call_handler(void *data, int write, loff_t *ppos, - void __user *buffer, size_t *lenp, - int (*handler)(void *data, int write, loff_t pos, - void __user *buffer, int len)) -{ - int rc = handler(data, write, *ppos, buffer, *lenp); - - if (rc < 0) - return rc; - - if (write) { - *ppos += *lenp; - } else { - *lenp = rc; - *ppos += rc; - } - return 0; -} -EXPORT_SYMBOL(lprocfs_call_handler); - -static int __proc_dobitmasks(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - const int tmpstrlen = 512; - char *tmpstr; - int rc; - unsigned int *mask = data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - int is_printk = (mask == &libcfs_printk) ? 1 : 0; - - rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen); - if (rc < 0) - return rc; - - if (!write) { - libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); - rc = strlen(tmpstr); - - if (pos >= rc) { - rc = 0; - } else { - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - } - } else { - rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); - if (rc < 0) { - kfree(tmpstr); - return rc; - } - - rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); - /* Always print LBUG/LASSERT to console, so keep this mask */ - if (is_printk) - *mask |= D_EMERG; - } - - kfree(tmpstr); - return rc; -} - -static int proc_dobitmasks(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dobitmasks); -} - -static int __proc_dump_kernel(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) - return 0; - - return cfs_trace_dump_debug_buffer_usrstr(buffer, nob); -} - -static int proc_dump_kernel(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_dump_kernel); -} - -static int __proc_daemon_file(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - if (!write) { - int len = strlen(cfs_tracefile); - - if (pos >= len) - return 0; - - return cfs_trace_copyout_string(buffer, nob, - cfs_tracefile + pos, "\n"); - } - - return cfs_trace_daemon_command_usrstr(buffer, nob); -} - -static int proc_daemon_file(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_daemon_file); -} - -static int libcfs_force_lbug(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - if (write) - LBUG(); - return 0; -} - -static int proc_fail_loc(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - int rc; - long old_fail_loc = cfs_fail_loc; - - rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (old_fail_loc != cfs_fail_loc) - wake_up(&cfs_race_waitq); - return rc; -} - -static int __proc_cpt_table(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - char *buf = NULL; - int len = 4096; - int rc = 0; - - if (write) - return -EPERM; - - while (1) { - buf = kzalloc(len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rc = cfs_cpt_table_print(cfs_cpt_tab, buf, len); - if (rc >= 0) - break; - - if (rc == -EFBIG) { - kfree(buf); - len <<= 1; - continue; - } - goto out; - } - - if (pos >= rc) { - rc = 0; - goto out; - } - - rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL); - out: - kfree(buf); - return rc; -} - -static int proc_cpt_table(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_cpt_table); -} - -static struct ctl_table lnet_table[] = { - { - .procname = "debug", - .data = &libcfs_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "subsystem_debug", - .data = &libcfs_subsystem_debug, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "printk", - .data = &libcfs_printk, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dobitmasks, - }, - { - .procname = "cpu_partition_table", - .maxlen = 128, - .mode = 0444, - .proc_handler = &proc_cpt_table, - }, - { - .procname = "debug_log_upcall", - .data = lnet_debug_log_upcall, - .maxlen = sizeof(lnet_debug_log_upcall), - .mode = 0644, - .proc_handler = &proc_dostring, - }, - { - .procname = "catastrophe", - .data = &libcfs_catastrophe, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .procname = "dump_kernel", - .maxlen = 256, - .mode = 0200, - .proc_handler = &proc_dump_kernel, - }, - { - .procname = "daemon_file", - .mode = 0644, - .maxlen = 256, - .proc_handler = &proc_daemon_file, - }, - { - .procname = "force_lbug", - .data = NULL, - .maxlen = 0, - .mode = 0200, - .proc_handler = &libcfs_force_lbug - }, - { - .procname = "fail_loc", - .data = &cfs_fail_loc, - .maxlen = sizeof(cfs_fail_loc), - .mode = 0644, - .proc_handler = &proc_fail_loc - }, - { - .procname = "fail_val", - .data = &cfs_fail_val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .procname = "fail_err", - .data = &cfs_fail_err, - .maxlen = sizeof(cfs_fail_err), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - } -}; - -static const struct lnet_debugfs_symlink_def lnet_debugfs_symlinks[] = { - { "console_ratelimit", - "/sys/module/libcfs/parameters/libcfs_console_ratelimit"}, - { "debug_path", - "/sys/module/libcfs/parameters/libcfs_debug_file_path"}, - { "panic_on_lbug", - "/sys/module/libcfs/parameters/libcfs_panic_on_lbug"}, - { "libcfs_console_backoff", - "/sys/module/libcfs/parameters/libcfs_console_backoff"}, - { "debug_mb", - "/sys/module/libcfs/parameters/libcfs_debug_mb"}, - { "console_min_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_min_delay"}, - { "console_max_delay_centisecs", - "/sys/module/libcfs/parameters/libcfs_console_max_delay"}, - {}, -}; - -static ssize_t lnet_debugfs_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 0, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static ssize_t lnet_debugfs_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ctl_table *table = filp->private_data; - int error; - - error = table->proc_handler(table, 1, (void __user *)buf, &count, ppos); - if (!error) - error = count; - - return error; -} - -static const struct file_operations lnet_debugfs_file_operations_rw = { - .open = simple_open, - .read = lnet_debugfs_read, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_ro = { - .open = simple_open, - .read = lnet_debugfs_read, - .llseek = default_llseek, -}; - -static const struct file_operations lnet_debugfs_file_operations_wo = { - .open = simple_open, - .write = lnet_debugfs_write, - .llseek = default_llseek, -}; - -static const struct file_operations *lnet_debugfs_fops_select(umode_t mode) -{ - if (!(mode & 0222)) - return &lnet_debugfs_file_operations_ro; - - if (!(mode & 0444)) - return &lnet_debugfs_file_operations_wo; - - return &lnet_debugfs_file_operations_rw; -} - -void lustre_insert_debugfs(struct ctl_table *table) -{ - if (!lnet_debugfs_root) - lnet_debugfs_root = debugfs_create_dir("lnet", NULL); - - /* Even if we cannot create, just ignore it altogether) */ - if (IS_ERR_OR_NULL(lnet_debugfs_root)) - return; - - /* - * We don't save the dentry returned because we don't call - * debugfs_remove() but rather remove_recursive() - */ - for (; table->procname; table++) - debugfs_create_file(table->procname, table->mode, - lnet_debugfs_root, table, - lnet_debugfs_fops_select(table->mode)); -} -EXPORT_SYMBOL_GPL(lustre_insert_debugfs); - -static void lustre_insert_debugfs_links( - const struct lnet_debugfs_symlink_def *symlinks) -{ - for (; symlinks && symlinks->name; symlinks++) - debugfs_create_symlink(symlinks->name, lnet_debugfs_root, - symlinks->target); -} - -static void lustre_remove_debugfs(void) -{ - debugfs_remove_recursive(lnet_debugfs_root); - - lnet_debugfs_root = NULL; -} - -static DEFINE_MUTEX(libcfs_startup); -static int libcfs_active; - -int libcfs_setup(void) -{ - int rc = -EINVAL; - - mutex_lock(&libcfs_startup); - if (libcfs_active) - goto out; - - if (!libcfs_dev_registered) - goto err; - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc < 0) { - pr_err("LustreError: libcfs_debug_init: %d\n", rc); - goto err; - } - - rc = cfs_cpu_init(); - if (rc) - goto err; - - cfs_rehash_wq = alloc_workqueue("cfs_rh", WQ_SYSFS, 4); - if (!cfs_rehash_wq) { - CERROR("Failed to start rehash workqueue.\n"); - rc = -ENOMEM; - goto err; - } - - rc = cfs_crypto_register(); - if (rc) { - CERROR("cfs_crypto_register: error %d\n", rc); - goto err; - } - - lustre_insert_debugfs(lnet_table); - if (!IS_ERR_OR_NULL(lnet_debugfs_root)) - lustre_insert_debugfs_links(lnet_debugfs_symlinks); - - CDEBUG(D_OTHER, "portals setup OK\n"); -out: - libcfs_active = 1; - mutex_unlock(&libcfs_startup); - return 0; -err: - cfs_crypto_unregister(); - if (cfs_rehash_wq) - destroy_workqueue(cfs_rehash_wq); - cfs_cpu_fini(); - libcfs_debug_cleanup(); - mutex_unlock(&libcfs_startup); - return rc; -} -EXPORT_SYMBOL(libcfs_setup); - -static int libcfs_init(void) -{ - int rc; - - rc = misc_register(&libcfs_dev); - if (rc) - CERROR("misc_register: error %d\n", rc); - else - libcfs_dev_registered = 1; - return rc; -} - -static void libcfs_exit(void) -{ - int rc; - - lustre_remove_debugfs(); - - if (cfs_rehash_wq) - destroy_workqueue(cfs_rehash_wq); - - cfs_crypto_unregister(); - - if (libcfs_dev_registered) - misc_deregister(&libcfs_dev); - - cfs_cpu_fini(); - - rc = libcfs_debug_cleanup(); - if (rc) - pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre helper library"); -MODULE_VERSION(LIBCFS_VERSION); -MODULE_LICENSE("GPL"); - -module_init(libcfs_init); -module_exit(libcfs_exit); diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.c b/drivers/staging/lustre/lnet/libcfs/tracefile.c deleted file mode 100644 index 7ca562e156f0..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.c +++ /dev/null @@ -1,1198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * libcfs/libcfs/tracefile.c - * - * Author: Zach Brown - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#define pr_fmt(fmt) "Lustre: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include "tracefile.h" - -/* XXX move things up to the top, comment */ -union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; - -char cfs_tracefile[TRACEFILE_NAME_SIZE]; -long long cfs_tracefile_size = CFS_TRACEFILE_SIZE; -static struct tracefiled_ctl trace_tctl; -static DEFINE_MUTEX(cfs_trace_thread_mutex); -static int thread_running; - -static atomic_t cfs_tage_allocated = ATOMIC_INIT(0); - -struct page_collection { - struct list_head pc_pages; - /* - * if this flag is set, collect_pages() will spill both - * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, - * only ->tcd_pages are spilled. - */ - int pc_want_daemon_pages; -}; - -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - wait_queue_head_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -/* - * small data-structure for each page owned by tracefiled. - */ -struct cfs_trace_page { - /* - * page itself - */ - struct page *page; - /* - * linkage into one of the lists in trace_data_union or - * page_collection - */ - struct list_head linkage; - /* - * number of bytes used within this page - */ - unsigned int used; - /* - * cpu that owns this page - */ - unsigned short cpu; - /* - * type(context) of this page - */ - unsigned short type; -}; - -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd); - -static inline struct cfs_trace_page * -cfs_tage_from_list(struct list_head *list) -{ - return list_entry(list, struct cfs_trace_page, linkage); -} - -static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp) -{ - struct page *page; - struct cfs_trace_page *tage; - - /* My caller is trying to free memory */ - if (!in_interrupt() && (current->flags & PF_MEMALLOC)) - return NULL; - - /* - * Don't spam console with allocation failures: they will be reported - * by upper layer anyway. - */ - gfp |= __GFP_NOWARN; - page = alloc_page(gfp); - if (!page) - return NULL; - - tage = kmalloc(sizeof(*tage), gfp); - if (!tage) { - __free_page(page); - return NULL; - } - - tage->page = page; - atomic_inc(&cfs_tage_allocated); - return tage; -} - -static void cfs_tage_free(struct cfs_trace_page *tage) -{ - __free_page(tage->page); - kfree(tage); - atomic_dec(&cfs_tage_allocated); -} - -static void cfs_tage_to_tail(struct cfs_trace_page *tage, - struct list_head *queue) -{ - list_move_tail(&tage->linkage, queue); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock) -{ - int i; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++i) { - struct cfs_trace_page *tage; - - tage = cfs_tage_alloc(gfp); - if (!tage) - break; - list_add_tail(&tage->linkage, stock); - } - return i; -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page * -cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len) -{ - struct cfs_trace_page *tage; - - if (tcd->tcd_cur_pages > 0) { - __LASSERT(!list_empty(&tcd->tcd_pages)); - tage = cfs_tage_from_list(tcd->tcd_pages.prev); - if (tage->used + len <= PAGE_SIZE) - return tage; - } - - if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - if (tcd->tcd_cur_stock_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev); - --tcd->tcd_cur_stock_pages; - list_del_init(&tage->linkage); - } else { - tage = cfs_tage_alloc(GFP_ATOMIC); - if (unlikely(!tage)) { - if (!(current->flags & PF_MEMALLOC) || - in_interrupt()) - pr_warn_ratelimited("cannot allocate a tage (%ld)\n", - tcd->tcd_cur_pages); - return NULL; - } - } - - tage->used = 0; - tage->cpu = smp_processor_id(); - tage->type = tcd->tcd_type; - list_add_tail(&tage->linkage, &tcd->tcd_pages); - tcd->tcd_cur_pages++; - - if (tcd->tcd_cur_pages > 8 && thread_running) { - struct tracefiled_ctl *tctl = &trace_tctl; - /* - * wake up tracefiled to process some pages. - */ - wake_up(&tctl->tctl_waitq); - } - return tage; - } - return NULL; -} - -static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd) -{ - int pgcount = tcd->tcd_cur_pages / 10; - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - pr_warn_ratelimited("debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n", - pgcount + 1, tcd->tcd_cur_pages); - - INIT_LIST_HEAD(&pc.pc_pages); - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (!pgcount--) - break; - - list_move_tail(&tage->linkage, &pc.pc_pages); - tcd->tcd_cur_pages--; - } - put_pages_on_tcd_daemon_list(&pc, tcd); -} - -/* return a page that has 'len' bytes left at the end */ -static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd, - unsigned long len) -{ - struct cfs_trace_page *tage; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (len > PAGE_SIZE) { - pr_err("cowardly refusing to write %lu bytes in a page\n", len); - return NULL; - } - - tage = cfs_trace_get_tage_try(tcd, len); - if (tage) - return tage; - if (thread_running) - cfs_tcd_shrink(tcd); - if (tcd->tcd_cur_pages > 0) { - tage = cfs_tage_from_list(tcd->tcd_pages.next); - tage->used = 0; - cfs_tage_to_tail(tage, &tcd->tcd_pages); - } - return tage; -} - -int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata, - const char *format, ...) -{ - va_list args; - int rc; - - va_start(args, format); - rc = libcfs_debug_vmsg2(msgdata, format, args, NULL); - va_end(args); - - return rc; -} -EXPORT_SYMBOL(libcfs_debug_msg); - -int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata, - const char *format1, va_list args, - const char *format2, ...) -{ - struct cfs_trace_cpu_data *tcd = NULL; - struct ptldebug_header header = { 0 }; - struct cfs_trace_page *tage; - /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - int mask = msgdata->msg_mask; - const char *file = kbasename(msgdata->msg_file); - struct cfs_debug_limit_state *cdls = msgdata->msg_cdls; - - tcd = cfs_trace_get_tcd(); - - /* cfs_trace_get_tcd() grabs a lock, which disables preemption and - * pins us to a particular CPU. This avoids an smp_processor_id() - * warning on Linux when debugging is enabled. - */ - cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK()); - - if (!tcd) /* arch may not log in IRQ context */ - goto console; - - if (!tcd->tcd_cur_pages) - header.ph_flags |= PH_FLAG_FIRST_RECORD; - - if (tcd->tcd_shutting_down) { - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - depth = 0; - known_size = strlen(file) + 1 + depth; - if (msgdata->msg_fn) - known_size += strlen(msgdata->msg_fn) + 1; - - if (libcfs_debug_binary) - known_size += sizeof(header); - - /* - * '2' used because vsnprintf return real size required for output - * _without_ terminating NULL. - * if needed is to small for this format. - */ - for (i = 0; i < 2; i++) { - tage = cfs_trace_get_tage(tcd, needed + known_size + 1); - if (!tage) { - if (needed + known_size > PAGE_SIZE) - mask |= D_ERROR; - - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - string_buf = (char *)page_address(tage->page) + - tage->used + known_size; - - max_nob = PAGE_SIZE - tage->used - known_size; - if (max_nob <= 0) { - pr_emerg("negative max_nob: %d\n", max_nob); - mask |= D_ERROR; - cfs_trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, max_nob, format1, ap); - va_end(ap); - } - - if (format2) { - remain = max_nob - needed; - if (remain < 0) - remain = 0; - - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - - if (needed < max_nob) /* well. printing ok.. */ - break; - } - - if (*(string_buf + needed - 1) != '\n') - pr_info("format at %s:%d:%s doesn't end in newline\n", file, - msgdata->msg_line, msgdata->msg_fn); - - header.ph_len = known_size + needed; - debug_buf = (char *)page_address(tage->page) + tage->used; - - if (libcfs_debug_binary) { - memcpy(debug_buf, &header, sizeof(header)); - tage->used += sizeof(header); - debug_buf += sizeof(header); - } - - /* indent message according to the nesting level */ - while (depth-- > 0) { - *(debug_buf++) = '.'; - ++tage->used; - } - - strcpy(debug_buf, file); - tage->used += strlen(file) + 1; - debug_buf += strlen(file) + 1; - - if (msgdata->msg_fn) { - strcpy(debug_buf, msgdata->msg_fn); - tage->used += strlen(msgdata->msg_fn) + 1; - debug_buf += strlen(msgdata->msg_fn) + 1; - } - - __LASSERT(debug_buf == string_buf); - - tage->used += needed; - __LASSERT(tage->used <= PAGE_SIZE); - -console: - if (!(mask & libcfs_printk)) { - /* no console output requested */ - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (cdls) { - if (libcfs_console_ratelimit && - cdls->cdls_next && /* not first time ever */ - !time_after(jiffies, cdls->cdls_next)) { - /* skipping a console message */ - cdls->cdls_count++; - if (tcd) - cfs_trace_put_tcd(tcd); - return 1; - } - - if (time_after(jiffies, - cdls->cdls_next + libcfs_console_max_delay + - 10 * HZ)) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= libcfs_console_backoff * 4; - } else { - cdls->cdls_delay *= libcfs_console_backoff; - } - - if (cdls->cdls_delay < libcfs_console_min_delay) - cdls->cdls_delay = libcfs_console_min_delay; - else if (cdls->cdls_delay > libcfs_console_max_delay) - cdls->cdls_delay = libcfs_console_max_delay; - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1; - } - - if (tcd) { - cfs_print_to_console(&header, mask, string_buf, needed, file, - msgdata->msg_fn); - cfs_trace_put_tcd(tcd); - } else { - string_buf = cfs_trace_get_console_buffer(); - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, - CFS_TRACE_CONSOLE_BUFFER_SIZE, - format1, ap); - va_end(ap); - } - if (format2) { - remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed; - if (remain > 0) { - va_start(ap, format2); - needed += vsnprintf(string_buf + needed, remain, - format2, ap); - va_end(ap); - } - } - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - } - - if (cdls && cdls->cdls_count) { - string_buf = cfs_trace_get_console_buffer(); - - needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE, - "Skipped %d previous similar message%s\n", - cdls->cdls_count, - (cdls->cdls_count > 1) ? "s" : ""); - - cfs_print_to_console(&header, mask, - string_buf, needed, file, msgdata->msg_fn); - - put_cpu(); - cdls->cdls_count = 0; - } - - return 0; -} -EXPORT_SYMBOL(libcfs_debug_vmsg2); - -void -cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *msgdata) -{ - struct ptldebug_header hdr; - - libcfs_panic_in_progress = 1; - libcfs_catastrophe = 1; - mb(); - - cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK()); - - cfs_print_to_console(&hdr, D_EMERG, str, strlen(str), - msgdata->msg_file, msgdata->msg_fn); - - panic("Lustre debug assertion failure\n"); - - /* not reached */ -} - -static void -panic_collect_pages(struct page_collection *pc) -{ - /* Do the collect_pages job on a single CPU: assumes that all other - * CPUs have been stopped during a panic. If this isn't true for some - * arch, this will have to be implemented separately in each arch. - */ - struct cfs_trace_cpu_data *tcd; - int i; - int j; - - INIT_LIST_HEAD(&pc->pc_pages); - - cfs_tcd_for_each(tcd, i, j) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } -} - -static void collect_pages_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - list_splice_init(&tcd->tcd_pages, &pc->pc_pages); - tcd->tcd_cur_pages = 0; - if (pc->pc_want_daemon_pages) { - list_splice_init(&tcd->tcd_daemon_pages, - &pc->pc_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } - } -} - -static void collect_pages(struct page_collection *pc) -{ - INIT_LIST_HEAD(&pc->pc_pages); - - if (libcfs_panic_in_progress) - panic_collect_pages(pc); - else - collect_pages_on_all_cpus(pc); -} - -static void put_pages_back_on_all_cpus(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - struct list_head *cur_head; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - cur_head = tcd->tcd_pages.next; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != cpu || tage->type != i) - continue; - - cfs_tage_to_tail(tage, cur_head); - tcd->tcd_cur_pages++; - } - } - } -} - -static void put_pages_back(struct page_collection *pc) -{ - if (!libcfs_panic_in_progress) - put_pages_back_on_all_cpus(pc); -} - -/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that - * we have a good amount of data at all times for dumping during an LBUG, even - * if we have been steadily writing (and otherwise discarding) pages via the - * debug daemon. - */ -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct cfs_trace_cpu_data *tcd) -{ - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type) - continue; - - cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages++; - - if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { - struct cfs_trace_page *victim; - - __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next); - - __LASSERT_TAGE_INVARIANT(victim); - - list_del(&victim->linkage); - cfs_tage_free(victim); - tcd->tcd_cur_daemon_pages--; - } - } -} - -static void put_pages_on_daemon_list(struct page_collection *pc) -{ - struct cfs_trace_cpu_data *tcd; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) - put_pages_on_tcd_daemon_list(pc, tcd); - } -} - -void cfs_trace_debug_print(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - char *p, *file, *fn; - struct page *page; - - __LASSERT_TAGE_INVARIANT(tage); - - page = tage->page; - p = page_address(page); - while (p < ((char *)page_address(page) + tage->used)) { - struct ptldebug_header *hdr; - int len; - - hdr = (void *)p; - p += sizeof(*hdr); - file = p; - p += strlen(file) + 1; - fn = p; - p += strlen(fn) + 1; - len = hdr->ph_len - (int)(p - (char *)hdr); - - cfs_print_to_console(hdr, D_EMERG, p, len, file, fn); - - p += len; - } - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_tracefile_dump_all_pages(char *filename) -{ - struct page_collection pc; - struct file *filp; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - char *buf; - mm_segment_t __oldfs; - int rc; - - cfs_tracefile_write_lock(); - - filp = filp_open(filename, O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - pr_err("LustreError: can't open %s for dump: rc %d\n", - filename, rc); - goto out; - } - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) { - rc = 0; - goto close; - } - __oldfs = get_fs(); - set_fs(get_ds()); - - /* ok, for now, just write the pages. in the future we'll be building - * iobufs with the pages and calling generic_direct_IO - */ - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - buf = kmap(tage->page); - rc = kernel_write(filp, buf, tage->used, &filp->f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - pr_warn("wanted to write %u but wrote %d\n", tage->used, - rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - list_del(&tage->linkage); - cfs_tage_free(tage); - } - set_fs(__oldfs); - rc = vfs_fsync(filp, 1); - if (rc) - pr_err("sync returns %d\n", rc); -close: - filp_close(filp, NULL); -out: - cfs_tracefile_write_unlock(); - return rc; -} - -void cfs_trace_flush_pages(void) -{ - struct page_collection pc; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } -} - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob) -{ - int nob; - - if (usr_buffer_nob > knl_buffer_nob) - return -EOVERFLOW; - - if (copy_from_user((void *)knl_buffer, - usr_buffer, usr_buffer_nob)) - return -EFAULT; - - nob = strnlen(knl_buffer, usr_buffer_nob); - while (--nob >= 0) /* strip trailing whitespace */ - if (!isspace(knl_buffer[nob])) - break; - - if (nob < 0) /* empty string */ - return -EINVAL; - - if (nob == knl_buffer_nob) /* no space to terminate */ - return -EOVERFLOW; - - knl_buffer[nob + 1] = 0; /* terminate */ - return 0; -} -EXPORT_SYMBOL(cfs_trace_copyin_string); - -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_buffer, char *append) -{ - /* - * NB if 'append' != NULL, it's a single character to append to the - * copied out string - usually "\n" or "" (i.e. a terminating zero byte) - */ - int nob = strlen(knl_buffer); - - if (nob > usr_buffer_nob) - nob = usr_buffer_nob; - - if (copy_to_user(usr_buffer, knl_buffer, nob)) - return -EFAULT; - - if (append && nob < usr_buffer_nob) { - if (copy_to_user(usr_buffer + nob, append, 1)) - return -EFAULT; - - nob++; - } - - return nob; -} -EXPORT_SYMBOL(cfs_trace_copyout_string); - -int cfs_trace_allocate_string_buffer(char **str, int nob) -{ - if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */ - return -EINVAL; - - *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO); - if (!*str) - return -ENOMEM; - - return 0; -} - -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (rc) - goto out; - - if (str[0] != '/') { - rc = -EINVAL; - goto out; - } - rc = cfs_tracefile_dump_all_pages(str); -out: - kfree(str); - return rc; -} - -int cfs_trace_daemon_command(char *str) -{ - int rc = 0; - - cfs_tracefile_write_lock(); - - if (!strcmp(str, "stop")) { - cfs_tracefile_write_unlock(); - cfs_trace_stop_thread(); - cfs_tracefile_write_lock(); - memset(cfs_tracefile, 0, sizeof(cfs_tracefile)); - - } else if (!strncmp(str, "size=", 5)) { - unsigned long tmp; - - rc = kstrtoul(str + 5, 10, &tmp); - if (!rc) { - if (tmp < 10 || tmp > 20480) - cfs_tracefile_size = CFS_TRACEFILE_SIZE; - else - cfs_tracefile_size = tmp << 20; - } - } else if (strlen(str) >= sizeof(cfs_tracefile)) { - rc = -ENAMETOOLONG; - } else if (str[0] != '/') { - rc = -EINVAL; - } else { - strcpy(cfs_tracefile, str); - - pr_info("debug daemon will attempt to start writing to %s (%lukB max)\n", - cfs_tracefile, - (long)(cfs_tracefile_size >> 10)); - - cfs_trace_start_thread(); - } - - cfs_tracefile_write_unlock(); - return rc; -} - -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob) -{ - char *str; - int rc; - - rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1); - if (rc) - return rc; - - rc = cfs_trace_copyin_string(str, usr_str_nob + 1, - usr_str, usr_str_nob); - if (!rc) - rc = cfs_trace_daemon_command(str); - - kfree(str); - return rc; -} - -int cfs_trace_set_debug_mb(int mb) -{ - int i; - int j; - int pages; - int limit = cfs_trace_max_debug_mb(); - struct cfs_trace_cpu_data *tcd; - - if (mb < num_possible_cpus()) { - pr_warn("%d MB is too small for debug buffer size, setting it to %d MB.\n", - mb, num_possible_cpus()); - mb = num_possible_cpus(); - } - - if (mb > limit) { - pr_warn("%d MB is too large for debug buffer size, setting it to %d MB.\n", - mb, limit); - mb = limit; - } - - mb /= num_possible_cpus(); - pages = mb << (20 - PAGE_SHIFT); - - cfs_tracefile_write_lock(); - - cfs_tcd_for_each(tcd, i, j) - tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; - - cfs_tracefile_write_unlock(); - - return 0; -} - -int cfs_trace_get_debug_mb(void) -{ - int i; - int j; - struct cfs_trace_cpu_data *tcd; - int total_pages = 0; - - cfs_tracefile_read_lock(); - - cfs_tcd_for_each(tcd, i, j) - total_pages += tcd->tcd_max_pages; - - cfs_tracefile_read_unlock(); - - return (total_pages >> (20 - PAGE_SHIFT)) + 1; -} - -static int tracefiled(void *arg) -{ - struct page_collection pc; - struct tracefiled_ctl *tctl = arg; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - struct file *filp; - char *buf; - int last_loop = 0; - int rc; - - /* we're started late enough that we pick up init's fs context */ - /* this is so broken in uml? what on earth is going on? */ - - complete(&tctl->tctl_start); - - while (1) { - wait_queue_entry_t __wait; - - pc.pc_want_daemon_pages = 0; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) - goto end_loop; - - filp = NULL; - cfs_tracefile_read_lock(); - if (cfs_tracefile[0]) { - filp = filp_open(cfs_tracefile, - O_CREAT | O_RDWR | O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - filp = NULL; - pr_warn("couldn't open %s: %d\n", cfs_tracefile, - rc); - } - } - cfs_tracefile_read_unlock(); - if (!filp) { - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - goto end_loop; - } - - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - static loff_t f_pos; - - __LASSERT_TAGE_INVARIANT(tage); - - if (f_pos >= (off_t)cfs_tracefile_size) - f_pos = 0; - else if (f_pos > i_size_read(file_inode(filp))) - f_pos = i_size_read(file_inode(filp)); - - buf = kmap(tage->page); - rc = kernel_write(filp, buf, tage->used, &f_pos); - kunmap(tage->page); - - if (rc != (int)tage->used) { - pr_warn("wanted to write %u but wrote %d\n", - tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - } - - filp_close(filp, NULL); - put_pages_on_daemon_list(&pc); - if (!list_empty(&pc.pc_pages)) { - int i; - - pr_alert("trace pages aren't empty\n"); - pr_err("total cpus(%d): ", num_possible_cpus()); - for (i = 0; i < num_possible_cpus(); i++) - if (cpu_online(i)) - pr_cont("%d(on) ", i); - else - pr_cont("%d(off) ", i); - pr_cont("\n"); - - i = 0; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, - linkage) - pr_err("page %d belongs to cpu %d\n", - ++i, tage->cpu); - pr_err("There are %d pages unwritten\n", i); - } - __LASSERT(list_empty(&pc.pc_pages)); -end_loop: - if (atomic_read(&tctl->tctl_shutdown)) { - if (!last_loop) { - last_loop = 1; - continue; - } else { - break; - } - } - init_waitqueue_entry(&__wait, current); - add_wait_queue(&tctl->tctl_waitq, &__wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - remove_wait_queue(&tctl->tctl_waitq, &__wait); - } - complete(&tctl->tctl_stop); - return 0; -} - -int cfs_trace_start_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - struct task_struct *task; - int rc = 0; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) - goto out; - - init_completion(&tctl->tctl_start); - init_completion(&tctl->tctl_stop); - init_waitqueue_head(&tctl->tctl_waitq); - atomic_set(&tctl->tctl_shutdown, 0); - - task = kthread_run(tracefiled, tctl, "ktracefiled"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto out; - } - - wait_for_completion(&tctl->tctl_start); - thread_running = 1; -out: - mutex_unlock(&cfs_trace_thread_mutex); - return rc; -} - -void cfs_trace_stop_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - - mutex_lock(&cfs_trace_thread_mutex); - if (thread_running) { - pr_info("shutting down debug daemon thread...\n"); - atomic_set(&tctl->tctl_shutdown, 1); - wait_for_completion(&tctl->tctl_stop); - thread_running = 0; - } - mutex_unlock(&cfs_trace_thread_mutex); -} - -int cfs_tracefile_init(int max_pages) -{ - struct cfs_trace_cpu_data *tcd; - int i; - int j; - int rc; - int factor; - - rc = cfs_tracefile_init_arch(); - if (rc) - return rc; - - cfs_tcd_for_each(tcd, i, j) { - /* tcd_pages_factor is initialized int tracefile_init_arch. */ - factor = tcd->tcd_pages_factor; - INIT_LIST_HEAD(&tcd->tcd_pages); - INIT_LIST_HEAD(&tcd->tcd_stock_pages); - INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_pages = 0; - tcd->tcd_cur_stock_pages = 0; - tcd->tcd_cur_daemon_pages = 0; - tcd->tcd_max_pages = (max_pages * factor) / 100; - LASSERT(tcd->tcd_max_pages > 0); - tcd->tcd_shutting_down = 0; - } - - return 0; -} - -static void trace_cleanup_on_all_cpus(void) -{ - struct cfs_trace_cpu_data *tcd; - struct cfs_trace_page *tage; - struct cfs_trace_page *tmp; - int i, cpu; - - for_each_possible_cpu(cpu) { - cfs_tcd_for_each_type_lock(tcd, i, cpu) { - tcd->tcd_shutting_down = 1; - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, - linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - cfs_tage_free(tage); - } - - tcd->tcd_cur_pages = 0; - } - } -} - -static void cfs_trace_cleanup(void) -{ - struct page_collection pc; - - INIT_LIST_HEAD(&pc.pc_pages); - - trace_cleanup_on_all_cpus(); - - cfs_tracefile_fini_arch(); -} - -void cfs_tracefile_exit(void) -{ - cfs_trace_stop_thread(); - cfs_trace_cleanup(); -} diff --git a/drivers/staging/lustre/lnet/libcfs/tracefile.h b/drivers/staging/lustre/lnet/libcfs/tracefile.h deleted file mode 100644 index 0608240d897f..000000000000 --- a/drivers/staging/lustre/lnet/libcfs/tracefile.h +++ /dev/null @@ -1,274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LIBCFS_TRACEFILE_H__ -#define __LIBCFS_TRACEFILE_H__ - -#include -#include -#include -#include -#include -#include -#include - -enum cfs_trace_buf_type { - CFS_TCD_TYPE_PROC = 0, - CFS_TCD_TYPE_SOFTIRQ, - CFS_TCD_TYPE_IRQ, - CFS_TCD_TYPE_MAX -}; - -/* trace file lock routines */ - -#define TRACEFILE_NAME_SIZE 1024 -extern char cfs_tracefile[TRACEFILE_NAME_SIZE]; -extern long long cfs_tracefile_size; - -/** - * The path of debug log dump upcall script. - */ -extern char lnet_debug_log_upcall[1024]; - -void libcfs_run_debug_log_upcall(char *file); - -int cfs_tracefile_init_arch(void); -void cfs_tracefile_fini_arch(void); - -void cfs_tracefile_read_lock(void); -void cfs_tracefile_read_unlock(void); -void cfs_tracefile_write_lock(void); -void cfs_tracefile_write_unlock(void); - -int cfs_tracefile_dump_all_pages(char *filename); -void cfs_trace_debug_print(void); -void cfs_trace_flush_pages(void); -int cfs_trace_start_thread(void); -void cfs_trace_stop_thread(void); -int cfs_tracefile_init(int max_pages); -void cfs_tracefile_exit(void); - -int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob, - const char __user *usr_buffer, int usr_buffer_nob); -int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob, - const char *knl_str, char *append); -int cfs_trace_allocate_string_buffer(char **str, int nob); -int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_daemon_command(char *str); -int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob); -int cfs_trace_set_debug_mb(int mb); -int cfs_trace_get_debug_mb(void); - -void libcfs_debug_dumplog_internal(void *arg); -void libcfs_register_panic_notifier(void); -void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; -int cfs_trace_max_debug_mb(void); - -#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) -#define CFS_TRACEFILE_SIZE (500 << 20) - -#ifdef LUSTRE_TRACEFILE_PRIVATE - -/* - * Private declare for tracefile - */ -#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) - -#define CFS_TRACEFILE_SIZE (500 << 20) - -/* - * Size of a buffer for sprinting console messages if we can't get a page - * from system - */ -#define CFS_TRACE_CONSOLE_BUFFER_SIZE 1024 - -union cfs_trace_data_union { - struct cfs_trace_cpu_data { - /* - * Even though this structure is meant to be per-CPU, locking - * is needed because in some places the data may be accessed - * from other CPUs. This lock is directly used in trace_get_tcd - * and trace_put_tcd, which are called in libcfs_debug_vmsg2 and - * tcd_for_each_type_lock - */ - spinlock_t tcd_lock; - unsigned long tcd_lock_flags; - - /* - * pages with trace records not yet processed by tracefiled. - */ - struct list_head tcd_pages; - /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; - - /* - * pages with trace records already processed by - * tracefiled. These pages are kept in memory, so that some - * portion of log can be written in the event of LBUG. This - * list is maintained in LRU order. - * - * Pages are moved to ->tcd_daemon_pages by tracefiled() - * (put_pages_on_daemon_list()). LRU pages from this list are - * discarded when list grows too large. - */ - struct list_head tcd_daemon_pages; - /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; - - /* - * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. - * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current - * implementation. - */ - unsigned long tcd_max_pages; - - /* - * preallocated pages to write trace records into. Pages from - * ->tcd_stock_pages are moved to ->tcd_pages by - * portals_debug_msg(). - * - * This list is necessary, because on some platforms it's - * impossible to perform efficient atomic page allocation in a - * non-blockable context. - * - * Such platforms fill ->tcd_stock_pages "on occasion", when - * tracing code is entered in blockable context. - * - * trace_get_tage_try() tries to get a page from - * ->tcd_stock_pages first and resorts to atomic page - * allocation only if this queue is empty. ->tcd_stock_pages - * is replenished when tracing code is entered in blocking - * context (darwin-tracefile.c:trace_get_tcd()). We try to - * maintain TCD_STOCK_PAGES (40 by default) pages in this - * queue. Atomic allocation is only required if more than - * TCD_STOCK_PAGES pagesful are consumed by trace records all - * emitted in non-blocking contexts. Which is quite unlikely. - */ - struct list_head tcd_stock_pages; - /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; - - unsigned short tcd_shutting_down; - unsigned short tcd_cpu; - unsigned short tcd_type; - /* The factors to share debug memory. */ - unsigned short tcd_pages_factor; - } tcd; - char __pad[L1_CACHE_ALIGN(sizeof(struct cfs_trace_cpu_data))]; -}; - -#define TCD_MAX_TYPES 8 -extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS]; - -#define cfs_tcd_for_each(tcd, i, j) \ - for (i = 0; cfs_trace_data[i]; i++) \ - for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \ - j < num_possible_cpus(); \ - j++, (tcd) = &(*cfs_trace_data[i])[j].tcd) - -#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ - for (i = 0; cfs_trace_data[i] && \ - (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ - cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) - -void cfs_set_ptldebug_header(struct ptldebug_header *header, - struct libcfs_debug_msg_data *m, - unsigned long stack); -void cfs_print_to_console(struct ptldebug_header *hdr, int mask, - const char *buf, int len, const char *file, - const char *fn); - -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); - -extern char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX]; -enum cfs_trace_buf_type cfs_trace_buf_idx_get(void); - -static inline char * -cfs_trace_get_console_buffer(void) -{ - unsigned int i = get_cpu(); - unsigned int j = cfs_trace_buf_idx_get(); - - return cfs_trace_console_buffers[i][j]; -} - -static inline struct cfs_trace_cpu_data * -cfs_trace_get_tcd(void) -{ - struct cfs_trace_cpu_data *tcd = - &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd; - - cfs_trace_lock_tcd(tcd, 0); - - return tcd; -} - -static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd) -{ - cfs_trace_unlock_tcd(tcd, 0); - - put_cpu(); -} - -int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp, - struct list_head *stock); - -void cfs_trace_assertion_failed(const char *str, - struct libcfs_debug_msg_data *m); - -/* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -do { \ - if (unlikely(!(cond))) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_EMERG, NULL); \ - cfs_trace_assertion_failed("ASSERTION("#cond") failed", \ - &msgdata); \ - } \ -} while (0) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -do { \ - __LASSERT(tage); \ - __LASSERT(tage->page); \ - __LASSERT(tage->used <= PAGE_SIZE); \ - __LASSERT(page_count(tage->page) > 0); \ -} while (0) - -#endif /* LUSTRE_TRACEFILE_PRIVATE */ - -#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/drivers/staging/lustre/lnet/lnet/Makefile b/drivers/staging/lustre/lnet/lnet/Makefile deleted file mode 100644 index 0a9d70924fe0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET) += lnet.o - -lnet-y := api-ni.o config.o nidstrings.o net_fault.o \ - lib-me.o lib-msg.o lib-eq.o lib-md.o lib-ptl.o \ - lib-socket.o lib-move.o module.o lo.o \ - router.o router_proc.o acceptor.o peer.o diff --git a/drivers/staging/lustre/lnet/lnet/acceptor.c b/drivers/staging/lustre/lnet/lnet/acceptor.c deleted file mode 100644 index 5648f17eddc0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/acceptor.c +++ /dev/null @@ -1,501 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include -#include -#include - -static int accept_port = 988; -static int accept_backlog = 127; -static int accept_timeout = 5; - -static struct { - int pta_shutdown; - struct socket *pta_sock; - struct completion pta_signal; -} lnet_acceptor_state = { - .pta_shutdown = 1 -}; - -int -lnet_acceptor_port(void) -{ - return accept_port; -} -EXPORT_SYMBOL(lnet_acceptor_port); - -static inline int -lnet_accept_magic(__u32 magic, __u32 constant) -{ - return (magic == constant || - magic == __swab32(constant)); -} - -static char *accept = "secure"; - -module_param(accept, charp, 0444); -MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)"); -module_param(accept_port, int, 0444); -MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)"); -module_param(accept_backlog, int, 0444); -MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog"); -module_param(accept_timeout, int, 0644); -MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)"); - -static char *accept_type; - -static int -lnet_acceptor_get_tunables(void) -{ - /* - * Userland acceptor uses 'accept_type' instead of 'accept', due to - * conflict with 'accept(2)', but kernel acceptor still uses 'accept' - * for compatibility. Hence the trick. - */ - accept_type = accept; - return 0; -} - -int -lnet_acceptor_timeout(void) -{ - return accept_timeout; -} -EXPORT_SYMBOL(lnet_acceptor_timeout); - -void -lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int peer_port) -{ - switch (rc) { - /* "normal" errors */ - case -ECONNREFUSED: - CNETERR("Connection to %s at host %pI4h on port %d was refused: check that Lustre is running on that node.\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -EHOSTUNREACH: - case -ENETUNREACH: - CNETERR("Connection to %s at host %pI4h was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n", - libcfs_nid2str(peer_nid), &peer_ip); - break; - case -ETIMEDOUT: - CNETERR("Connection to %s at host %pI4h on port %d took too long: that node may be hung or experiencing high load.\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -ECONNRESET: - LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h on port %d was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port, - libcfs_nid2str(peer_nid)); - break; - case -EPROTO: - LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at host %pI4h on port %d: is it running a compatible version of Lustre?\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - case -EADDRINUSE: - LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to connect to %s at host %pI4h on port %d\n", - libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - default: - LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s at host %pI4h on port %d\n", - rc, libcfs_nid2str(peer_nid), - &peer_ip, peer_port); - break; - } -} -EXPORT_SYMBOL(lnet_connect_console_error); - -int -lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port) -{ - struct lnet_acceptor_connreq cr; - struct socket *sock; - int rc; - int port; - int fatal; - - BUILD_BUG_ON(sizeof(cr) > 16); /* too big to be on the stack */ - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - --port) { - /* Iterate through reserved ports. */ - - rc = lnet_sock_connect(&sock, &fatal, local_ip, port, peer_ip, - peer_port); - if (rc) { - if (fatal) - goto failed; - continue; - } - - BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1); - - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr.acr_nid = peer_nid; - - if (the_lnet.ln_testprotocompat) { - /* single-shot proto check */ - lnet_net_lock(LNET_LOCK_EX); - if (the_lnet.ln_testprotocompat & 4) { - cr.acr_version++; - the_lnet.ln_testprotocompat &= ~4; - } - if (the_lnet.ln_testprotocompat & 8) { - cr.acr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~8; - } - lnet_net_unlock(LNET_LOCK_EX); - } - - rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc) - goto failed_sock; - - *sockp = sock; - return 0; - } - - rc = -EADDRINUSE; - goto failed; - - failed_sock: - sock_release(sock); - failed: - lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port); - return rc; -} -EXPORT_SYMBOL(lnet_connect); - -static int -lnet_accept(struct socket *sock, __u32 magic) -{ - struct lnet_acceptor_connreq cr; - __u32 peer_ip; - int peer_port; - int rc; - int flip; - struct lnet_ni *ni; - char *str; - - LASSERT(sizeof(cr) <= 16); /* not too big for the stack */ - - rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT(!rc); /* we succeeded before */ - - if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) { - /* - * future version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. I send back - * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" - */ - memset(&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - rc = lnet_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc) - CERROR("Error sending magic+version in response to LNET magic from %pI4h: %d\n", - &peer_ip, rc); - return -EPROTO; - } - - if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC)) - str = "'old' socknal/tcpnal"; - else - str = "unrecognised"; - - LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h magic %08x: %s acceptor protocol\n", - &peer_ip, magic, str); - return -EPROTO; - } - - flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC); - - rc = lnet_sock_read(sock, &cr.acr_version, sizeof(cr.acr_version), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request version from %pI4h\n", - rc, &peer_ip); - return -EIO; - } - - if (flip) - __swab32s(&cr.acr_version); - - if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) { - /* - * future version compatibility! - * An acceptor-specific protocol rev will first send a version - * query. I send back my current version to tell her I'm - * "old". - */ - int peer_version = cr.acr_version; - - memset(&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - - rc = lnet_sock_write(sock, &cr, sizeof(cr), accept_timeout); - if (rc) - CERROR("Error sending magic+version in response to version %d from %pI4h: %d\n", - peer_version, &peer_ip, rc); - return -EPROTO; - } - - rc = lnet_sock_read(sock, &cr.acr_nid, - sizeof(cr) - - offsetof(struct lnet_acceptor_connreq, acr_nid), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); - return -EIO; - } - - if (flip) - __swab64s(&cr.acr_nid); - - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (!ni || /* no matching net */ - ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni) - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h for %s: No matching NI\n", - &peer_ip, libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - if (!ni->ni_lnd->lnd_accept) { - /* This catches a request for the loopback LND */ - lnet_ni_decref(ni); - LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h for %s: NI doesn not accept IP connections\n", - &peer_ip, libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - CDEBUG(D_NET, "Accept %s from %pI4h\n", - libcfs_nid2str(cr.acr_nid), &peer_ip); - - rc = ni->ni_lnd->lnd_accept(ni, sock); - - lnet_ni_decref(ni); - return rc; -} - -static int -lnet_acceptor(void *arg) -{ - struct socket *newsock; - int rc; - __u32 magic; - __u32 peer_ip; - int peer_port; - int secure = (int)((long)arg); - - LASSERT(!lnet_acceptor_state.pta_sock); - - rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock, 0, accept_port, - accept_backlog); - if (rc) { - if (rc == -EADDRINUSE) - LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port %d: port already in use\n", - accept_port); - else - LCONSOLE_ERROR_MSG(0x123, "Can't start acceptor on port %d: unexpected error %d\n", - accept_port, rc); - - lnet_acceptor_state.pta_sock = NULL; - } else { - LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port); - } - - /* set init status and unblock parent */ - lnet_acceptor_state.pta_shutdown = rc; - complete(&lnet_acceptor_state.pta_signal); - - if (rc) - return rc; - - while (!lnet_acceptor_state.pta_shutdown) { - rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock); - if (rc) { - if (rc != -EAGAIN) { - CWARN("Accept error %d: pausing...\n", rc); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } - continue; - } - - /* maybe the LNet acceptor thread has been waken */ - if (lnet_acceptor_state.pta_shutdown) { - sock_release(newsock); - break; - } - - rc = lnet_sock_getaddr(newsock, 1, &peer_ip, &peer_port); - if (rc) { - CERROR("Can't determine new connection's address\n"); - goto failed; - } - - if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - CERROR("Refusing connection from %pI4h: insecure port %d\n", - &peer_ip, peer_port); - goto failed; - } - - rc = lnet_sock_read(newsock, &magic, sizeof(magic), - accept_timeout); - if (rc) { - CERROR("Error %d reading connection request from %pI4h\n", - rc, &peer_ip); - goto failed; - } - - rc = lnet_accept(newsock, magic); - if (rc) - goto failed; - - continue; - -failed: - sock_release(newsock); - } - - sock_release(lnet_acceptor_state.pta_sock); - lnet_acceptor_state.pta_sock = NULL; - - CDEBUG(D_NET, "Acceptor stopping\n"); - - /* unblock lnet_acceptor_stop() */ - complete(&lnet_acceptor_state.pta_signal); - return 0; -} - -static inline int -accept2secure(const char *acc, long *sec) -{ - if (!strcmp(acc, "secure")) { - *sec = 1; - return 1; - } else if (!strcmp(acc, "all")) { - *sec = 0; - return 1; - } else if (!strcmp(acc, "none")) { - return 0; - } - - LCONSOLE_ERROR_MSG(0x124, "Can't parse 'accept=\"%s\"'\n", - acc); - return -EINVAL; -} - -int -lnet_acceptor_start(void) -{ - struct task_struct *task; - int rc; - long rc2; - long secure; - - /* if acceptor is already running return immediately */ - if (!lnet_acceptor_state.pta_shutdown) - return 0; - - LASSERT(!lnet_acceptor_state.pta_sock); - - rc = lnet_acceptor_get_tunables(); - if (rc) - return rc; - - init_completion(&lnet_acceptor_state.pta_signal); - rc = accept2secure(accept_type, &secure); - if (rc <= 0) - return rc; - - if (!lnet_count_acceptor_nis()) /* not required */ - return 0; - - task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure, - "acceptor_%03ld", secure); - if (IS_ERR(task)) { - rc2 = PTR_ERR(task); - CERROR("Can't start acceptor thread: %ld\n", rc2); - - return -ESRCH; - } - - /* wait for acceptor to startup */ - wait_for_completion(&lnet_acceptor_state.pta_signal); - - if (!lnet_acceptor_state.pta_shutdown) { - /* started OK */ - LASSERT(lnet_acceptor_state.pta_sock); - return 0; - } - - LASSERT(!lnet_acceptor_state.pta_sock); - - return -ENETDOWN; -} - -void -lnet_acceptor_stop(void) -{ - struct sock *sk; - - if (lnet_acceptor_state.pta_shutdown) /* not running */ - return; - - lnet_acceptor_state.pta_shutdown = 1; - - sk = lnet_acceptor_state.pta_sock->sk; - - /* awake any sleepers using safe method */ - sk->sk_state_change(sk); - - /* block until acceptor signals exit */ - wait_for_completion(&lnet_acceptor_state.pta_signal); -} diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c deleted file mode 100644 index f9ed6977056c..000000000000 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ /dev/null @@ -1,2307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include -#include - -#include -#include - -#define D_LNI D_CONSOLE - -struct lnet the_lnet; /* THE state of the network */ -EXPORT_SYMBOL(the_lnet); - -static char *ip2nets = ""; -module_param(ip2nets, charp, 0444); -MODULE_PARM_DESC(ip2nets, "LNET network <- IP table"); - -static char *networks = ""; -module_param(networks, charp, 0444); -MODULE_PARM_DESC(networks, "local networks"); - -static char *routes = ""; -module_param(routes, charp, 0444); -MODULE_PARM_DESC(routes, "routes to non-local networks"); - -static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; -module_param(rnet_htable_size, int, 0444); -MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); - -static int lnet_ping(struct lnet_process_id id, int timeout_ms, - struct lnet_process_id __user *ids, int n_ids); - -static char * -lnet_get_routes(void) -{ - return routes; -} - -static char * -lnet_get_networks(void) -{ - char *nets; - int rc; - - if (*networks && *ip2nets) { - LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or 'ip2nets' but not both at once\n"); - return NULL; - } - - if (*ip2nets) { - rc = lnet_parse_ip2nets(&nets, ip2nets); - return !rc ? nets : NULL; - } - - if (*networks) - return networks; - - return "tcp"; -} - -static void -lnet_init_locks(void) -{ - spin_lock_init(&the_lnet.ln_eq_wait_lock); - init_waitqueue_head(&the_lnet.ln_eq_waitq); - init_waitqueue_head(&the_lnet.ln_rc_waitq); - mutex_init(&the_lnet.ln_lnd_mutex); - mutex_init(&the_lnet.ln_api_mutex); -} - -static int -lnet_create_remote_nets_table(void) -{ - int i; - struct list_head *hash; - - LASSERT(!the_lnet.ln_remote_nets_hash); - LASSERT(the_lnet.ln_remote_nets_hbits > 0); - hash = kvmalloc_array(LNET_REMOTE_NETS_HASH_SIZE, sizeof(*hash), - GFP_KERNEL); - if (!hash) { - CERROR("Failed to create remote nets hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) - INIT_LIST_HEAD(&hash[i]); - the_lnet.ln_remote_nets_hash = hash; - return 0; -} - -static void -lnet_destroy_remote_nets_table(void) -{ - int i; - - if (!the_lnet.ln_remote_nets_hash) - return; - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) - LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i])); - - kvfree(the_lnet.ln_remote_nets_hash); - the_lnet.ln_remote_nets_hash = NULL; -} - -static void -lnet_destroy_locks(void) -{ - if (the_lnet.ln_res_lock) { - cfs_percpt_lock_free(the_lnet.ln_res_lock); - the_lnet.ln_res_lock = NULL; - } - - if (the_lnet.ln_net_lock) { - cfs_percpt_lock_free(the_lnet.ln_net_lock); - the_lnet.ln_net_lock = NULL; - } -} - -static int -lnet_create_locks(void) -{ - lnet_init_locks(); - - the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (!the_lnet.ln_res_lock) - goto failed; - - the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table()); - if (!the_lnet.ln_net_lock) - goto failed; - - return 0; - - failed: - lnet_destroy_locks(); - return -ENOMEM; -} - -static void lnet_assert_wire_constants(void) -{ - /* - * Wire protocol assertions generated by 'wirecheck' - * running on Linux robert.bartonsoftware.com 2.6.8-1.521 - * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux - * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) - */ - - /* Constants... */ - BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded); - BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1); - BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0); - BUILD_BUG_ON(LNET_MSG_ACK != 0); - BUILD_BUG_ON(LNET_MSG_PUT != 1); - BUILD_BUG_ON(LNET_MSG_GET != 2); - BUILD_BUG_ON(LNET_MSG_REPLY != 3); - BUILD_BUG_ON(LNET_MSG_HELLO != 4); - - /* Checks for struct ptl_handle_wire_t */ - BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire, wh_object_cookie) != 8); - BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8); - - /* Checks for struct struct lnet_magicversion */ - BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2); - BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_minor) != 6); - BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2); - - /* Checks for struct struct lnet_hdr */ - BUILD_BUG_ON((int)sizeof(struct lnet_hdr) != 72); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_nid) != 0); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_nid) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_nid) != 8); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_nid) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, dest_pid) != 16); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->dest_pid) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, src_pid) != 20); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->src_pid) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, type) != 24); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->type) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, payload_length) != 28); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->payload_length) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg) != 40); - - /* Ack */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.ack.mlength) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) != 4); - - /* Put */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.hdr_data) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.ptl_index) != 64); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.put.offset) != 68); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) != 4); - - /* Get */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.return_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) != 16); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.match_bits) != 48); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.ptl_index) != 56); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.src_offset) != 60); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) != 4); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.get.sink_length) != 64); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) != 4); - - /* Reply */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) != 16); - - /* Hello */ - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.incarnation) != 32); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) != 8); - BUILD_BUG_ON((int)offsetof(struct lnet_hdr, msg.hello.type) != 40); - BUILD_BUG_ON((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) != 4); -} - -static struct lnet_lnd * -lnet_find_lnd_by_type(__u32 type) -{ - struct lnet_lnd *lnd; - struct list_head *tmp; - - /* holding lnd mutex */ - list_for_each(tmp, &the_lnet.ln_lnds) { - lnd = list_entry(tmp, struct lnet_lnd, lnd_list); - - if (lnd->lnd_type == type) - return lnd; - } - - return NULL; -} - -void -lnet_register_lnd(struct lnet_lnd *lnd) -{ - mutex_lock(&the_lnet.ln_lnd_mutex); - - LASSERT(libcfs_isknown_lnd(lnd->lnd_type)); - LASSERT(!lnet_find_lnd_by_type(lnd->lnd_type)); - - list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds); - lnd->lnd_refcount = 0; - - CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); - - mutex_unlock(&the_lnet.ln_lnd_mutex); -} -EXPORT_SYMBOL(lnet_register_lnd); - -void -lnet_unregister_lnd(struct lnet_lnd *lnd) -{ - mutex_lock(&the_lnet.ln_lnd_mutex); - - LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd); - LASSERT(!lnd->lnd_refcount); - - list_del(&lnd->lnd_list); - CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); - - mutex_unlock(&the_lnet.ln_lnd_mutex); -} -EXPORT_SYMBOL(lnet_unregister_lnd); - -void -lnet_counters_get(struct lnet_counters *counters) -{ - struct lnet_counters *ctr; - int i; - - memset(counters, 0, sizeof(*counters)); - - lnet_net_lock(LNET_LOCK_EX); - - cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) { - counters->msgs_max += ctr->msgs_max; - counters->msgs_alloc += ctr->msgs_alloc; - counters->errors += ctr->errors; - counters->send_count += ctr->send_count; - counters->recv_count += ctr->recv_count; - counters->route_count += ctr->route_count; - counters->drop_count += ctr->drop_count; - counters->send_length += ctr->send_length; - counters->recv_length += ctr->recv_length; - counters->route_length += ctr->route_length; - counters->drop_length += ctr->drop_length; - } - lnet_net_unlock(LNET_LOCK_EX); -} -EXPORT_SYMBOL(lnet_counters_get); - -void -lnet_counters_reset(void) -{ - struct lnet_counters *counters; - int i; - - lnet_net_lock(LNET_LOCK_EX); - - cfs_percpt_for_each(counters, i, the_lnet.ln_counters) - memset(counters, 0, sizeof(struct lnet_counters)); - - lnet_net_unlock(LNET_LOCK_EX); -} - -static char * -lnet_res_type2str(int type) -{ - switch (type) { - default: - LBUG(); - case LNET_COOKIE_TYPE_MD: - return "MD"; - case LNET_COOKIE_TYPE_ME: - return "ME"; - case LNET_COOKIE_TYPE_EQ: - return "EQ"; - } -} - -static void -lnet_res_container_cleanup(struct lnet_res_container *rec) -{ - int count = 0; - - if (!rec->rec_type) /* not set yet, it's uninitialized */ - return; - - while (!list_empty(&rec->rec_active)) { - struct list_head *e = rec->rec_active.next; - - list_del_init(e); - if (rec->rec_type == LNET_COOKIE_TYPE_EQ) { - kfree(list_entry(e, struct lnet_eq, eq_list)); - - } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) { - kfree(list_entry(e, struct lnet_libmd, md_list)); - - } else { /* NB: Active MEs should be attached on portals */ - LBUG(); - } - count++; - } - - if (count > 0) { - /* - * Found alive MD/ME/EQ, user really should unlink/free - * all of them before finalize LNet, but if someone didn't, - * we have to recycle garbage for him - */ - CERROR("%d active elements on exit of %s container\n", - count, lnet_res_type2str(rec->rec_type)); - } - - kfree(rec->rec_lh_hash); - rec->rec_lh_hash = NULL; - - rec->rec_type = 0; /* mark it as finalized */ -} - -static int -lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type) -{ - int rc = 0; - int i; - - LASSERT(!rec->rec_type); - - rec->rec_type = type; - INIT_LIST_HEAD(&rec->rec_active); - rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type; - - /* Arbitrary choice of hash table size */ - rec->rec_lh_hash = kvmalloc_cpt(LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]), - GFP_KERNEL, cpt); - if (!rec->rec_lh_hash) { - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < LNET_LH_HASH_SIZE; i++) - INIT_LIST_HEAD(&rec->rec_lh_hash[i]); - - return 0; - -out: - CERROR("Failed to setup %s resource container\n", - lnet_res_type2str(type)); - lnet_res_container_cleanup(rec); - return rc; -} - -static void -lnet_res_containers_destroy(struct lnet_res_container **recs) -{ - struct lnet_res_container *rec; - int i; - - cfs_percpt_for_each(rec, i, recs) - lnet_res_container_cleanup(rec); - - cfs_percpt_free(recs); -} - -static struct lnet_res_container ** -lnet_res_containers_create(int type) -{ - struct lnet_res_container **recs; - struct lnet_res_container *rec; - int rc; - int i; - - recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec)); - if (!recs) { - CERROR("Failed to allocate %s resource containers\n", - lnet_res_type2str(type)); - return NULL; - } - - cfs_percpt_for_each(rec, i, recs) { - rc = lnet_res_container_setup(rec, i, type); - if (rc) { - lnet_res_containers_destroy(recs); - return NULL; - } - } - - return recs; -} - -struct lnet_libhandle * -lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie) -{ - /* ALWAYS called with lnet_res_lock held */ - struct list_head *head; - struct lnet_libhandle *lh; - unsigned int hash; - - if ((cookie & LNET_COOKIE_MASK) != rec->rec_type) - return NULL; - - hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS); - head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK]; - - list_for_each_entry(lh, head, lh_hash_chain) { - if (lh->lh_cookie == cookie) - return lh; - } - - return NULL; -} - -void -lnet_res_lh_initialize(struct lnet_res_container *rec, - struct lnet_libhandle *lh) -{ - /* ALWAYS called with lnet_res_lock held */ - unsigned int ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS; - unsigned int hash; - - lh->lh_cookie = rec->rec_lh_cookie; - rec->rec_lh_cookie += 1 << ibits; - - hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK; - - list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]); -} - -static int lnet_unprepare(void); - -static int -lnet_prepare(lnet_pid_t requested_pid) -{ - /* Prepare to bring up the network */ - struct lnet_res_container **recs; - int rc = 0; - - if (requested_pid == LNET_PID_ANY) { - /* Don't instantiate LNET just for me */ - return -ENETDOWN; - } - - LASSERT(!the_lnet.ln_refcount); - - the_lnet.ln_routing = 0; - - LASSERT(!(requested_pid & LNET_PID_USERFLAG)); - the_lnet.ln_pid = requested_pid; - - INIT_LIST_HEAD(&the_lnet.ln_test_peers); - INIT_LIST_HEAD(&the_lnet.ln_nis); - INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); - INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); - INIT_LIST_HEAD(&the_lnet.ln_routers); - INIT_LIST_HEAD(&the_lnet.ln_drop_rules); - INIT_LIST_HEAD(&the_lnet.ln_delay_rules); - - rc = lnet_create_remote_nets_table(); - if (rc) - goto failed; - /* - * NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid after reboot. - */ - the_lnet.ln_interface_cookie = ktime_get_ns(); - - the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct lnet_counters)); - if (!the_lnet.ln_counters) { - CERROR("Failed to allocate counters for LNet\n"); - rc = -ENOMEM; - goto failed; - } - - rc = lnet_peer_tables_create(); - if (rc) - goto failed; - - rc = lnet_msg_containers_create(); - if (rc) - goto failed; - - rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0, - LNET_COOKIE_TYPE_EQ); - if (rc) - goto failed; - - recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME); - if (!recs) { - rc = -ENOMEM; - goto failed; - } - - the_lnet.ln_me_containers = recs; - - recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD); - if (!recs) { - rc = -ENOMEM; - goto failed; - } - - the_lnet.ln_md_containers = recs; - - rc = lnet_portals_create(); - if (rc) { - CERROR("Failed to create portals for LNet: %d\n", rc); - goto failed; - } - - return 0; - - failed: - lnet_unprepare(); - return rc; -} - -static int -lnet_unprepare(void) -{ - /* - * NB no LNET_LOCK since this is the last reference. All LND instances - * have shut down already, so it is safe to unlink and free all - * descriptors, even those that appear committed to a network op (eg MD - * with non-zero pending count) - */ - lnet_fail_nid(LNET_NID_ANY, 0); - - LASSERT(!the_lnet.ln_refcount); - LASSERT(list_empty(&the_lnet.ln_test_peers)); - LASSERT(list_empty(&the_lnet.ln_nis)); - LASSERT(list_empty(&the_lnet.ln_nis_cpt)); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); - - lnet_portals_destroy(); - - if (the_lnet.ln_md_containers) { - lnet_res_containers_destroy(the_lnet.ln_md_containers); - the_lnet.ln_md_containers = NULL; - } - - if (the_lnet.ln_me_containers) { - lnet_res_containers_destroy(the_lnet.ln_me_containers); - the_lnet.ln_me_containers = NULL; - } - - lnet_res_container_cleanup(&the_lnet.ln_eq_container); - - lnet_msg_containers_destroy(); - lnet_peer_tables_destroy(); - lnet_rtrpools_free(0); - - if (the_lnet.ln_counters) { - cfs_percpt_free(the_lnet.ln_counters); - the_lnet.ln_counters = NULL; - } - lnet_destroy_remote_nets_table(); - - return 0; -} - -struct lnet_ni * -lnet_net2ni_locked(__u32 net, int cpt) -{ - struct list_head *tmp; - struct lnet_ni *ni; - - LASSERT(cpt != LNET_LOCK_EX); - - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) { - lnet_ni_addref_locked(ni, cpt); - return ni; - } - } - - return NULL; -} - -struct lnet_ni * -lnet_net2ni(__u32 net) -{ - struct lnet_ni *ni; - - lnet_net_lock(0); - ni = lnet_net2ni_locked(net, 0); - lnet_net_unlock(0); - - return ni; -} -EXPORT_SYMBOL(lnet_net2ni); - -static unsigned int -lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) -{ - __u64 key = nid; - unsigned int val; - - LASSERT(number >= 1 && number <= LNET_CPT_NUMBER); - - if (number == 1) - return 0; - - val = hash_long(key, LNET_CPT_BITS); - /* NB: LNET_CP_NUMBER doesn't have to be PO2 */ - if (val < number) - return val; - - return (unsigned int)(key + val + (val >> 1)) % number; -} - -int -lnet_cpt_of_nid_locked(lnet_nid_t nid) -{ - struct lnet_ni *ni; - - /* must called with hold of lnet_net_lock */ - if (LNET_CPT_NUMBER == 1) - return 0; /* the only one */ - - /* take lnet_net_lock(any) would be OK */ - if (!list_empty(&the_lnet.ln_nis_cpt)) { - list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) { - if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) - continue; - - LASSERT(ni->ni_cpts); - return ni->ni_cpts[lnet_nid_cpt_hash - (nid, ni->ni_ncpts)]; - } - } - - return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); -} - -int -lnet_cpt_of_nid(lnet_nid_t nid) -{ - int cpt; - int cpt2; - - if (LNET_CPT_NUMBER == 1) - return 0; /* the only one */ - - if (list_empty(&the_lnet.ln_nis_cpt)) - return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); - - cpt = lnet_net_lock_current(); - cpt2 = lnet_cpt_of_nid_locked(nid); - lnet_net_unlock(cpt); - - return cpt2; -} -EXPORT_SYMBOL(lnet_cpt_of_nid); - -int -lnet_islocalnet(__u32 net) -{ - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - - ni = lnet_net2ni_locked(net, cpt); - if (ni) - lnet_ni_decref_locked(ni, cpt); - - lnet_net_unlock(cpt); - - return !!ni; -} - -struct lnet_ni * -lnet_nid2ni_locked(lnet_nid_t nid, int cpt) -{ - struct lnet_ni *ni; - struct list_head *tmp; - - LASSERT(cpt != LNET_LOCK_EX); - - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (ni->ni_nid == nid) { - lnet_ni_addref_locked(ni, cpt); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnid(lnet_nid_t nid) -{ - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - ni = lnet_nid2ni_locked(nid, cpt); - if (ni) - lnet_ni_decref_locked(ni, cpt); - lnet_net_unlock(cpt); - - return !!ni; -} - -int -lnet_count_acceptor_nis(void) -{ - /* Return the # of NIs that need the acceptor. */ - int count = 0; - struct list_head *tmp; - struct lnet_ni *ni; - int cpt; - - cpt = lnet_net_lock_current(); - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (ni->ni_lnd->lnd_accept) - count++; - } - - lnet_net_unlock(cpt); - - return count; -} - -static struct lnet_ping_info * -lnet_ping_info_create(int num_ni) -{ - struct lnet_ping_info *ping_info; - unsigned int infosz; - - infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]); - ping_info = kvzalloc(infosz, GFP_KERNEL); - if (!ping_info) { - CERROR("Can't allocate ping info[%d]\n", num_ni); - return NULL; - } - - ping_info->pi_nnis = num_ni; - ping_info->pi_pid = the_lnet.ln_pid; - ping_info->pi_magic = LNET_PROTO_PING_MAGIC; - ping_info->pi_features = LNET_PING_FEAT_NI_STATUS; - - return ping_info; -} - -static inline int -lnet_get_ni_count(void) -{ - struct lnet_ni *ni; - int count = 0; - - lnet_net_lock(0); - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) - count++; - - lnet_net_unlock(0); - - return count; -} - -static inline void -lnet_ping_info_free(struct lnet_ping_info *pinfo) -{ - kvfree(pinfo); -} - -static void -lnet_ping_info_destroy(void) -{ - struct lnet_ni *ni; - - lnet_net_lock(LNET_LOCK_EX); - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - lnet_ni_lock(ni); - ni->ni_status = NULL; - lnet_ni_unlock(ni); - } - - lnet_ping_info_free(the_lnet.ln_ping_info); - the_lnet.ln_ping_info = NULL; - - lnet_net_unlock(LNET_LOCK_EX); -} - -static void -lnet_ping_event_handler(struct lnet_event *event) -{ - struct lnet_ping_info *pinfo = event->md.user_ptr; - - if (event->unlinked) - pinfo->pi_features = LNET_PING_FEAT_INVAL; -} - -static int -lnet_ping_info_setup(struct lnet_ping_info **ppinfo, - struct lnet_handle_md *md_handle, - int ni_count, bool set_eq) -{ - struct lnet_process_id id = {LNET_NID_ANY, LNET_PID_ANY}; - struct lnet_handle_me me_handle; - struct lnet_md md = { NULL }; - int rc, rc2; - - if (set_eq) { - rc = LNetEQAlloc(0, lnet_ping_event_handler, - &the_lnet.ln_ping_target_eq); - if (rc) { - CERROR("Can't allocate ping EQ: %d\n", rc); - return rc; - } - } - - *ppinfo = lnet_ping_info_create(ni_count); - if (!*ppinfo) { - rc = -ENOMEM; - goto failed_0; - } - - rc = LNetMEAttach(LNET_RESERVED_PORTAL, id, - LNET_PROTO_PING_MATCHBITS, 0, - LNET_UNLINK, LNET_INS_AFTER, - &me_handle); - if (rc) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; - } - - /* initialize md content */ - md.start = *ppinfo; - md.length = offsetof(struct lnet_ping_info, - pi_ni[(*ppinfo)->pi_nnis]); - md.threshold = LNET_MD_THRESH_INF; - md.max_size = 0; - md.options = LNET_MD_OP_GET | LNET_MD_TRUNCATE | - LNET_MD_MANAGE_REMOTE; - md.user_ptr = NULL; - md.eq_handle = the_lnet.ln_ping_target_eq; - md.user_ptr = *ppinfo; - - rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle); - if (rc) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; - } - - return 0; - -failed_2: - rc2 = LNetMEUnlink(me_handle); - LASSERT(!rc2); -failed_1: - lnet_ping_info_free(*ppinfo); - *ppinfo = NULL; -failed_0: - if (set_eq) - LNetEQFree(the_lnet.ln_ping_target_eq); - return rc; -} - -static void -lnet_ping_md_unlink(struct lnet_ping_info *pinfo, - struct lnet_handle_md *md_handle) -{ - LNetMDUnlink(*md_handle); - LNetInvalidateMDHandle(md_handle); - - /* NB md could be busy; this just starts the unlink */ - while (pinfo->pi_features != LNET_PING_FEAT_INVAL) { - CDEBUG(D_NET, "Still waiting for ping MD to unlink\n"); - set_current_state(TASK_NOLOAD); - schedule_timeout(HZ); - } -} - -static void -lnet_ping_info_install_locked(struct lnet_ping_info *ping_info) -{ - struct lnet_ni_status *ns; - struct lnet_ni *ni; - int i = 0; - - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - LASSERT(i < ping_info->pi_nnis); - - ns = &ping_info->pi_ni[i]; - - ns->ns_nid = ni->ni_nid; - - lnet_ni_lock(ni); - ns->ns_status = (ni->ni_status) ? - ni->ni_status->ns_status : LNET_NI_STATUS_UP; - ni->ni_status = ns; - lnet_ni_unlock(ni); - - i++; - } -} - -static void -lnet_ping_target_update(struct lnet_ping_info *pinfo, - struct lnet_handle_md md_handle) -{ - struct lnet_ping_info *old_pinfo = NULL; - struct lnet_handle_md old_md; - - /* switch the NIs to point to the new ping info created */ - lnet_net_lock(LNET_LOCK_EX); - - if (!the_lnet.ln_routing) - pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED; - lnet_ping_info_install_locked(pinfo); - - if (the_lnet.ln_ping_info) { - old_pinfo = the_lnet.ln_ping_info; - old_md = the_lnet.ln_ping_target_md; - } - the_lnet.ln_ping_target_md = md_handle; - the_lnet.ln_ping_info = pinfo; - - lnet_net_unlock(LNET_LOCK_EX); - - if (old_pinfo) { - /* unlink the old ping info */ - lnet_ping_md_unlink(old_pinfo, &old_md); - lnet_ping_info_free(old_pinfo); - } -} - -static void -lnet_ping_target_fini(void) -{ - int rc; - - lnet_ping_md_unlink(the_lnet.ln_ping_info, - &the_lnet.ln_ping_target_md); - - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT(!rc); - - lnet_ping_info_destroy(); -} - -static int -lnet_ni_tq_credits(struct lnet_ni *ni) -{ - int credits; - - LASSERT(ni->ni_ncpts >= 1); - - if (ni->ni_ncpts == 1) - return ni->ni_maxtxcredits; - - credits = ni->ni_maxtxcredits / ni->ni_ncpts; - credits = max(credits, 8 * ni->ni_peertxcredits); - credits = min(credits, ni->ni_maxtxcredits); - - return credits; -} - -static void -lnet_ni_unlink_locked(struct lnet_ni *ni) -{ - if (!list_empty(&ni->ni_cptlist)) { - list_del_init(&ni->ni_cptlist); - lnet_ni_decref_locked(ni, 0); - } - - /* move it to zombie list and nobody can find it anymore */ - LASSERT(!list_empty(&ni->ni_list)); - list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); - lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ -} - -static void -lnet_clear_zombies_nis_locked(void) -{ - int i; - int islo; - struct lnet_ni *ni; - struct lnet_ni *temp; - - /* - * Now wait for the NI's I just nuked to show up on ln_zombie_nis - * and shut them down in guaranteed thread context - */ - i = 2; - list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis_zombie, ni_list) { - int *ref; - int j; - - list_del_init(&ni->ni_list); - cfs_percpt_for_each(ref, j, ni->ni_refs) { - if (!*ref) - continue; - /* still busy, add it back to zombie list */ - list_add(&ni->ni_list, &the_lnet.ln_nis_zombie); - break; - } - - if (!list_empty(&ni->ni_list)) { - lnet_net_unlock(LNET_LOCK_EX); - ++i; - if ((i & (-i)) == i) { - CDEBUG(D_WARNING, "Waiting for zombie LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - lnet_net_lock(LNET_LOCK_EX); - continue; - } - - ni->ni_lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - - islo = ni->ni_lnd->lnd_type == LOLND; - - LASSERT(!in_interrupt()); - ni->ni_lnd->lnd_shutdown(ni); - - /* - * can't deref lnd anymore now; it might have unregistered - * itself... - */ - if (!islo) - CDEBUG(D_LNI, "Removed LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - - lnet_ni_free(ni); - i = 2; - - lnet_net_lock(LNET_LOCK_EX); - } -} - -static void -lnet_shutdown_lndnis(void) -{ - struct lnet_ni *ni; - struct lnet_ni *temp; - int i; - - /* NB called holding the global mutex */ - - /* All quiet on the API front */ - LASSERT(!the_lnet.ln_shutdown); - LASSERT(!the_lnet.ln_refcount); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_shutdown = 1; /* flag shutdown */ - - /* Unlink NIs from the global table */ - list_for_each_entry_safe(ni, temp, &the_lnet.ln_nis, ni_list) { - lnet_ni_unlink_locked(ni); - } - - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni) { - lnet_ni_decref_locked(the_lnet.ln_loni, 0); - the_lnet.ln_loni = NULL; - } - - lnet_net_unlock(LNET_LOCK_EX); - - /* - * Clear lazy portals and drop delayed messages which hold refs - * on their lnet_msg::msg_rxpeer - */ - for (i = 0; i < the_lnet.ln_nportals; i++) - LNetClearLazyPortal(i); - - /* - * Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) - */ - lnet_peer_tables_cleanup(NULL); - - lnet_net_lock(LNET_LOCK_EX); - - lnet_clear_zombies_nis_locked(); - the_lnet.ln_shutdown = 0; - lnet_net_unlock(LNET_LOCK_EX); -} - -/* shutdown down the NI and release refcount */ -static void -lnet_shutdown_lndni(struct lnet_ni *ni) -{ - int i; - - lnet_net_lock(LNET_LOCK_EX); - lnet_ni_unlink_locked(ni); - lnet_net_unlock(LNET_LOCK_EX); - - /* clear messages for this NI on the lazy portal */ - for (i = 0; i < the_lnet.ln_nportals; i++) - lnet_clear_lazy_portal(ni, i, "Shutting down NI"); - - /* Do peer table cleanup for this ni */ - lnet_peer_tables_cleanup(ni); - - lnet_net_lock(LNET_LOCK_EX); - lnet_clear_zombies_nis_locked(); - lnet_net_unlock(LNET_LOCK_EX); -} - -static int -lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) -{ - struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL; - int rc = -EINVAL; - int lnd_type; - struct lnet_lnd *lnd; - struct lnet_tx_queue *tq; - int i; - u32 seed; - - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - - LASSERT(libcfs_isknown_lnd(lnd_type)); - - if (lnd_type == CIBLND || lnd_type == OPENIBLND || - lnd_type == IIBLND || lnd_type == VIBLND) { - CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); - goto failed0; - } - - /* Make sure this new NI is unique. */ - lnet_net_lock(LNET_LOCK_EX); - rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis); - lnet_net_unlock(LNET_LOCK_EX); - if (!rc) { - if (lnd_type == LOLND) { - lnet_ni_free(ni); - return 0; - } - - CERROR("Net %s is not unique\n", - libcfs_net2str(LNET_NIDNET(ni->ni_nid))); - rc = -EEXIST; - goto failed0; - } - - mutex_lock(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); - - if (!lnd) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = request_module("%s", libcfs_lnd2modname(lnd_type)); - mutex_lock(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (!lnd) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); - rc = -EINVAL; - goto failed0; - } - } - - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount++; - lnet_net_unlock(LNET_LOCK_EX); - - ni->ni_lnd = lnd; - - if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf)) - lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk; - - if (lnd_tunables) { - ni->ni_lnd_tunables = kzalloc(sizeof(*ni->ni_lnd_tunables), - GFP_NOFS); - if (!ni->ni_lnd_tunables) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = -ENOMEM; - goto failed0; - } - memcpy(ni->ni_lnd_tunables, lnd_tunables, - sizeof(*ni->ni_lnd_tunables)); - } - - /* - * If given some LND tunable parameters, parse those now to - * override the values in the NI structure. - */ - if (conf) { - if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) - ni->ni_peerrtrcredits = - conf->cfg_config_u.cfg_net.net_peer_rtr_credits; - if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) - ni->ni_peertimeout = - conf->cfg_config_u.cfg_net.net_peer_timeout; - if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1) - ni->ni_peertxcredits = - conf->cfg_config_u.cfg_net.net_peer_tx_credits; - if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) - ni->ni_maxtxcredits = - conf->cfg_config_u.cfg_net.net_max_tx_credits; - } - - rc = lnd->lnd_startup(ni); - - mutex_unlock(&the_lnet.ln_lnd_mutex); - - if (rc) { - LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount--; - lnet_net_unlock(LNET_LOCK_EX); - goto failed0; - } - - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query); - - lnet_net_lock(LNET_LOCK_EX); - /* refcount for ln_nis */ - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - if (ni->ni_cpts) { - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt); - } - - lnet_net_unlock(LNET_LOCK_EX); - - if (lnd->lnd_type == LOLND) { - lnet_ni_addref(ni); - LASSERT(!the_lnet.ln_loni); - the_lnet.ln_loni = ni; - return 0; - } - - if (!ni->ni_peertxcredits || !ni->ni_maxtxcredits) { - LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - !ni->ni_peertxcredits ? - "" : "per-peer "); - /* - * shutdown the NI since if we get here then it must've already - * been started - */ - lnet_shutdown_lndni(ni); - return -EINVAL; - } - - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - tq->tq_credits_min = - tq->tq_credits_max = - tq->tq_credits = lnet_ni_tq_credits(ni); - } - - /* Nodes with small feet have little entropy. The NID for this - * node gives the most entropy in the low bits. - */ - seed = LNET_NIDADDR(ni->ni_nid); - add_device_randomness(&seed, sizeof(seed)); - - CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", - libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, - lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, - ni->ni_peerrtrcredits, ni->ni_peertimeout); - - return 0; -failed0: - lnet_ni_free(ni); - return rc; -} - -static int -lnet_startup_lndnis(struct list_head *nilist) -{ - struct lnet_ni *ni; - int rc; - int ni_count = 0; - - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, struct lnet_ni, ni_list); - list_del(&ni->ni_list); - rc = lnet_startup_lndni(ni, NULL); - - if (rc < 0) - goto failed; - - ni_count++; - } - - return ni_count; -failed: - lnet_shutdown_lndnis(); - - return rc; -} - -/** - * Initialize LNet library. - * - * Automatically called at module loading time. Caller has to call - * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the - * latter returned 0. It must be called exactly once. - * - * \retval 0 on success - * \retval -ve on failures. - */ -int lnet_lib_init(void) -{ - int rc; - - lnet_assert_wire_constants(); - - memset(&the_lnet, 0, sizeof(the_lnet)); - - /* refer to global cfs_cpt_tab for now */ - the_lnet.ln_cpt_table = cfs_cpt_tab; - the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab); - - LASSERT(the_lnet.ln_cpt_number > 0); - if (the_lnet.ln_cpt_number > LNET_CPT_MAX) { - /* we are under risk of consuming all lh_cookie */ - CERROR("Can't have %d CPTs for LNet (max allowed is %d), please change setting of CPT-table and retry\n", - the_lnet.ln_cpt_number, LNET_CPT_MAX); - return -E2BIG; - } - - while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number) - the_lnet.ln_cpt_bits++; - - rc = lnet_create_locks(); - if (rc) { - CERROR("Can't create LNet global locks: %d\n", rc); - return rc; - } - - the_lnet.ln_refcount = 0; - LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh); - INIT_LIST_HEAD(&the_lnet.ln_lnds); - INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie); - INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow); - - /* - * The hash table size is the number of bits it takes to express the set - * ln_num_routes, minus 1 (better to under estimate than over so we - * don't waste memory). - */ - if (rnet_htable_size <= 0) - rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; - else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX) - rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX; - the_lnet.ln_remote_nets_hbits = max_t(int, 1, - order_base_2(rnet_htable_size) - 1); - - /* - * All LNDs apart from the LOLND are in separate modules. They - * register themselves when their module loads, and unregister - * themselves when their module is unloaded. - */ - lnet_register_lnd(&the_lolnd); - return 0; -} - -/** - * Finalize LNet library. - * - * \pre lnet_lib_init() called with success. - * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls. - */ -void lnet_lib_exit(void) -{ - LASSERT(!the_lnet.ln_refcount); - - while (!list_empty(&the_lnet.ln_lnds)) - lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, - struct lnet_lnd, lnd_list)); - lnet_destroy_locks(); -} - -/** - * Set LNet PID and start LNet interfaces, routing, and forwarding. - * - * Users must call this function at least once before any other functions. - * For each successful call there must be a corresponding call to - * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is - * ignored. - * - * The PID used by LNet may be different from the one requested. - * See LNetGetId(). - * - * \param requested_pid PID requested by the caller. - * - * \return >= 0 on success, and < 0 error code on failures. - */ -int -LNetNIInit(lnet_pid_t requested_pid) -{ - int im_a_router = 0; - int rc; - int ni_count; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - struct list_head net_head; - - INIT_LIST_HEAD(&net_head); - - mutex_lock(&the_lnet.ln_api_mutex); - - CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); - - if (the_lnet.ln_refcount > 0) { - rc = the_lnet.ln_refcount++; - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - - rc = lnet_prepare(requested_pid); - if (rc) { - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - - /* Add in the loopback network */ - if (!lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head)) { - rc = -ENOMEM; - goto err_empty_list; - } - - /* - * If LNet is being initialized via DLC it is possible - * that the user requests not to load module parameters (ones which - * are supported by DLC) on initialization. Therefore, make sure not - * to load networks, routes and forwarding from module parameters - * in this case. On cleanup in case of failure only clean up - * routes if it has been loaded - */ - if (!the_lnet.ln_nis_from_mod_params) { - rc = lnet_parse_networks(&net_head, lnet_get_networks()); - if (rc < 0) - goto err_empty_list; - } - - ni_count = lnet_startup_lndnis(&net_head); - if (ni_count < 0) { - rc = ni_count; - goto err_empty_list; - } - - if (!the_lnet.ln_nis_from_mod_params) { - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc) - goto err_shutdown_lndnis; - - rc = lnet_check_routes(); - if (rc) - goto err_destroy_routes; - - rc = lnet_rtrpools_alloc(im_a_router); - if (rc) - goto err_destroy_routes; - } - - rc = lnet_acceptor_start(); - if (rc) - goto err_destroy_routes; - - the_lnet.ln_refcount = 1; - /* Now I may use my own API functions... */ - - rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true); - if (rc) - goto err_acceptor_stop; - - lnet_ping_target_update(pinfo, md_handle); - - rc = lnet_router_checker_start(); - if (rc) - goto err_stop_ping; - - lnet_fault_init(); - lnet_router_debugfs_init(); - - mutex_unlock(&the_lnet.ln_api_mutex); - - return 0; - -err_stop_ping: - lnet_ping_target_fini(); -err_acceptor_stop: - the_lnet.ln_refcount = 0; - lnet_acceptor_stop(); -err_destroy_routes: - if (!the_lnet.ln_nis_from_mod_params) - lnet_destroy_routes(); -err_shutdown_lndnis: - lnet_shutdown_lndnis(); -err_empty_list: - lnet_unprepare(); - LASSERT(rc < 0); - mutex_unlock(&the_lnet.ln_api_mutex); - while (!list_empty(&net_head)) { - struct lnet_ni *ni; - - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); - } - return rc; -} -EXPORT_SYMBOL(LNetNIInit); - -/** - * Stop LNet interfaces, routing, and forwarding. - * - * Users must call this function once for each successful call to LNetNIInit(). - * Once the LNetNIFini() operation has been started, the results of pending - * API operations are undefined. - * - * \return always 0 for current implementation. - */ -int -LNetNIFini(void) -{ - mutex_lock(&the_lnet.ln_api_mutex); - - LASSERT(the_lnet.ln_refcount > 0); - - if (the_lnet.ln_refcount != 1) { - the_lnet.ln_refcount--; - } else { - LASSERT(!the_lnet.ln_niinit_self); - - lnet_fault_fini(); - lnet_router_debugfs_fini(); - lnet_router_checker_stop(); - lnet_ping_target_fini(); - - /* Teardown fns that use my own API functions BEFORE here */ - the_lnet.ln_refcount = 0; - - lnet_acceptor_stop(); - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - lnet_unprepare(); - } - - mutex_unlock(&the_lnet.ln_api_mutex); - return 0; -} -EXPORT_SYMBOL(LNetNIFini); - -/** - * Grabs the ni data from the ni structure and fills the out - * parameters - * - * \param[in] ni network interface structure - * \param[out] config NI configuration - */ -static void -lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config) -{ - struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL; - struct lnet_ioctl_net_config *net_config; - size_t min_size, tunable_size = 0; - int i; - - if (!ni || !config) - return; - - net_config = (struct lnet_ioctl_net_config *)config->cfg_bulk; - if (!net_config) - return; - - BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) != - ARRAY_SIZE(net_config->ni_interfaces)); - - for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) { - if (!ni->ni_interfaces[i]) - break; - - strncpy(net_config->ni_interfaces[i], - ni->ni_interfaces[i], - sizeof(net_config->ni_interfaces[i])); - } - - config->cfg_nid = ni->ni_nid; - config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout; - config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits; - config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits; - config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits; - - net_config->ni_status = ni->ni_status->ns_status; - - if (ni->ni_cpts) { - int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT); - - for (i = 0; i < num_cpts; i++) - net_config->ni_cpts[i] = ni->ni_cpts[i]; - - config->cfg_ncpts = num_cpts; - } - - /* - * See if user land tools sent in a newer and larger version - * of struct lnet_tunables than what the kernel uses. - */ - min_size = sizeof(*config) + sizeof(*net_config); - - if (config->cfg_hdr.ioc_len > min_size) - tunable_size = config->cfg_hdr.ioc_len - min_size; - - /* Don't copy to much data to user space */ - min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables)); - lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk; - - if (ni->ni_lnd_tunables && lnd_cfg && min_size) { - memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size); - config->cfg_config_u.cfg_net.net_interface_count = 1; - - /* Tell user land that kernel side has less data */ - if (tunable_size > sizeof(*ni->ni_lnd_tunables)) { - min_size = tunable_size - sizeof(ni->ni_lnd_tunables); - config->cfg_hdr.ioc_len -= min_size; - } - } -} - -static int -lnet_get_net_config(struct lnet_ioctl_config_data *config) -{ - struct lnet_ni *ni; - struct list_head *tmp; - int idx = config->cfg_count; - int cpt, i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (i++ != idx) - continue; - - ni = list_entry(tmp, struct lnet_ni, ni_list); - lnet_ni_lock(ni); - lnet_fill_ni_info(ni, config); - lnet_ni_unlock(ni); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -int -lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf) -{ - char *nets = conf->cfg_config_u.cfg_net.net_intf; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - struct lnet_ni *ni; - struct list_head net_head; - struct lnet_remotenet *rnet; - int rc; - - INIT_LIST_HEAD(&net_head); - - /* Create a ni structure for the network string */ - rc = lnet_parse_networks(&net_head, nets); - if (rc <= 0) - return !rc ? -EINVAL : rc; - - mutex_lock(&the_lnet.ln_api_mutex); - - if (rc > 1) { - rc = -EINVAL; /* only add one interface per call */ - goto failed0; - } - - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - - lnet_net_lock(LNET_LOCK_EX); - rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid)); - lnet_net_unlock(LNET_LOCK_EX); - /* - * make sure that the net added doesn't invalidate the current - * configuration LNet is keeping - */ - if (rnet) { - CERROR("Adding net %s will invalidate routing configuration\n", - nets); - rc = -EUSERS; - goto failed0; - } - - rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(), - false); - if (rc) - goto failed0; - - list_del_init(&ni->ni_list); - - rc = lnet_startup_lndni(ni, conf); - if (rc) - goto failed1; - - if (ni->ni_lnd->lnd_accept) { - rc = lnet_acceptor_start(); - if (rc < 0) { - /* shutdown the ni that we just started */ - CERROR("Failed to start up acceptor thread\n"); - lnet_shutdown_lndni(ni); - goto failed1; - } - } - - lnet_ping_target_update(pinfo, md_handle); - mutex_unlock(&the_lnet.ln_api_mutex); - - return 0; - -failed1: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); -failed0: - mutex_unlock(&the_lnet.ln_api_mutex); - while (!list_empty(&net_head)) { - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); - } - return rc; -} - -int -lnet_dyn_del_ni(__u32 net) -{ - struct lnet_ni *ni; - struct lnet_ping_info *pinfo; - struct lnet_handle_md md_handle; - int rc; - - /* don't allow userspace to shutdown the LOLND */ - if (LNET_NETTYP(net) == LOLND) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, - lnet_get_ni_count() - 1, false); - if (rc) - goto out; - - ni = lnet_net2ni(net); - if (!ni) { - rc = -EINVAL; - goto failed; - } - - /* decrement the reference counter taken by lnet_net2ni() */ - lnet_ni_decref_locked(ni, 0); - - lnet_shutdown_lndni(ni); - - if (!lnet_count_acceptor_nis()) - lnet_acceptor_stop(); - - lnet_ping_target_update(pinfo, md_handle); - goto out; -failed: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); -out: - mutex_unlock(&the_lnet.ln_api_mutex); - - return rc; -} - -/** - * LNet ioctl handler. - * - */ -int -LNetCtl(unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - struct lnet_ioctl_config_data *config; - struct lnet_process_id id = {0}; - struct lnet_ni *ni; - int rc; - unsigned long secs_passed; - - BUILD_BUG_ON(LIBCFS_IOC_DATA_MAX < - sizeof(struct lnet_ioctl_net_config) + - sizeof(struct lnet_ioctl_config_data)); - - switch (cmd) { - case IOC_LIBCFS_GET_NI: - rc = LNetGetId(data->ioc_count, &id); - data->ioc_nid = id.nid; - return rc; - - case IOC_LIBCFS_FAIL_NID: - return lnet_fail_nid(data->ioc_nid, data->ioc_count); - - case IOC_LIBCFS_ADD_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_add_route(config->cfg_net, - config->cfg_config_u.cfg_route.rtr_hop, - config->cfg_nid, - config->cfg_config_u.cfg_route.rtr_priority); - if (!rc) { - rc = lnet_check_routes(); - if (rc) - lnet_del_route(config->cfg_net, - config->cfg_nid); - } - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_DEL_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_del_route(config->cfg_net, config->cfg_nid); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_GET_ROUTE: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - return lnet_get_route(config->cfg_count, - &config->cfg_net, - &config->cfg_config_u.cfg_route.rtr_hop, - &config->cfg_nid, - &config->cfg_config_u.cfg_route.rtr_flags, - &config->cfg_config_u.cfg_route.rtr_priority); - - case IOC_LIBCFS_GET_NET: { - size_t total = sizeof(*config) + - sizeof(struct lnet_ioctl_net_config); - config = arg; - - if (config->cfg_hdr.ioc_len < total) - return -EINVAL; - - return lnet_get_net_config(config); - } - - case IOC_LIBCFS_GET_LNET_STATS: { - struct lnet_ioctl_lnet_stats *lnet_stats = arg; - - if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats)) - return -EINVAL; - - lnet_counters_get(&lnet_stats->st_cntrs); - return 0; - } - - case IOC_LIBCFS_CONFIG_RTR: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - if (config->cfg_config_u.cfg_buffers.buf_enable) { - rc = lnet_rtrpools_enable(); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - } - lnet_rtrpools_disable(); - mutex_unlock(&the_lnet.ln_api_mutex); - return 0; - - case IOC_LIBCFS_ADD_BUF: - config = arg; - - if (config->cfg_hdr.ioc_len < sizeof(*config)) - return -EINVAL; - - mutex_lock(&the_lnet.ln_api_mutex); - rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.buf_tiny, - config->cfg_config_u.cfg_buffers.buf_small, - config->cfg_config_u.cfg_buffers.buf_large); - mutex_unlock(&the_lnet.ln_api_mutex); - return rc; - - case IOC_LIBCFS_GET_BUF: { - struct lnet_ioctl_pool_cfg *pool_cfg; - size_t total = sizeof(*config) + sizeof(*pool_cfg); - - config = arg; - - if (config->cfg_hdr.ioc_len < total) - return -EINVAL; - - pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk; - return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg); - } - - case IOC_LIBCFS_GET_PEER_INFO: { - struct lnet_ioctl_peer *peer_info = arg; - - if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info)) - return -EINVAL; - - return lnet_get_peer_info(peer_info->pr_count, - &peer_info->pr_nid, - peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness, - &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt, - &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount, - &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits, - &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob); - } - - case IOC_LIBCFS_NOTIFY_ROUTER: - secs_passed = (ktime_get_real_seconds() - data->ioc_u64[0]); - secs_passed *= msecs_to_jiffies(MSEC_PER_SEC); - - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, - jiffies - secs_passed); - - case IOC_LIBCFS_LNET_DIST: - rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); - if (rc < 0 && rc != -EHOSTUNREACH) - return rc; - - data->ioc_u32[0] = rc; - return 0; - - case IOC_LIBCFS_TESTPROTOCOMPAT: - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_testprotocompat = data->ioc_flags; - lnet_net_unlock(LNET_LOCK_EX); - return 0; - - case IOC_LIBCFS_LNET_FAULT: - return lnet_fault_ctl(data->ioc_flags, data); - - case IOC_LIBCFS_PING: - id.nid = data->ioc_nid; - id.pid = data->ioc_u32[0]; - rc = lnet_ping(id, data->ioc_u32[1], /* timeout */ - data->ioc_pbuf1, - data->ioc_plen1 / sizeof(struct lnet_process_id)); - if (rc < 0) - return rc; - data->ioc_count = rc; - return 0; - - default: - ni = lnet_net2ni(data->ioc_net); - if (!ni) - return -EINVAL; - - if (!ni->ni_lnd->lnd_ctl) - rc = -EINVAL; - else - rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); - - lnet_ni_decref(ni); - return rc; - } - /* not reached */ -} -EXPORT_SYMBOL(LNetCtl); - -void LNetDebugPeer(struct lnet_process_id id) -{ - lnet_debug_peer(id.nid); -} -EXPORT_SYMBOL(LNetDebugPeer); - -/** - * Retrieve the lnet_process_id ID of LNet interface at \a index. Note that - * all interfaces share a same PID, as requested by LNetNIInit(). - * - * \param index Index of the interface to look up. - * \param id On successful return, this location will hold the - * lnet_process_id ID of the interface. - * - * \retval 0 If an interface exists at \a index. - * \retval -ENOENT If no interface has been found. - */ -int -LNetGetId(unsigned int index, struct lnet_process_id *id) -{ - struct lnet_ni *ni; - struct list_head *tmp; - int cpt; - int rc = -ENOENT; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_net_lock_current(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (index--) - continue; - - ni = list_entry(tmp, struct lnet_ni, ni_list); - - id->nid = ni->ni_nid; - id->pid = the_lnet.ln_pid; - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} -EXPORT_SYMBOL(LNetGetId); - -static int lnet_ping(struct lnet_process_id id, int timeout_ms, - struct lnet_process_id __user *ids, int n_ids) -{ - struct lnet_handle_eq eqh; - struct lnet_handle_md mdh; - struct lnet_event event; - struct lnet_md md = { NULL }; - int which; - int unlinked = 0; - int replied = 0; - const int a_long_time = 60000; /* mS */ - int infosz; - struct lnet_ping_info *info; - struct lnet_process_id tmpid; - int i; - int nob; - int rc; - int rc2; - - infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]); - - if (n_ids <= 0 || - id.nid == LNET_NID_ANY || - timeout_ms > 500000 || /* arbitrary limit! */ - n_ids > 20) /* arbitrary limit! */ - return -EINVAL; - - if (id.pid == LNET_PID_ANY) - id.pid = LNET_PID_LUSTRE; - - info = kzalloc(infosz, GFP_KERNEL); - if (!info) - return -ENOMEM; - - /* NB 2 events max (including any unlink event) */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); - if (rc) { - CERROR("Can't allocate EQ: %d\n", rc); - goto out_0; - } - - /* initialize md content */ - md.start = info; - md.length = infosz; - md.threshold = 2; /*GET/REPLY*/ - md.max_size = 0; - md.options = LNET_MD_TRUNCATE; - md.user_ptr = NULL; - md.eq_handle = eqh; - - rc = LNetMDBind(md, LNET_UNLINK, &mdh); - if (rc) { - CERROR("Can't bind MD: %d\n", rc); - goto out_1; - } - - rc = LNetGet(LNET_NID_ANY, mdh, id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - if (rc) { - /* Don't CERROR; this could be deliberate! */ - - rc2 = LNetMDUnlink(mdh); - LASSERT(!rc2); - - /* NB must wait for the UNLINK event below... */ - unlinked = 1; - timeout_ms = a_long_time; - } - - do { - /* MUST block for unlink to complete */ - - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, !unlinked, - &event, &which); - - CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, - (rc2 <= 0) ? -1 : event.type, - (rc2 <= 0) ? -1 : event.status, - (rc2 > 0 && event.unlinked) ? " unlinked" : ""); - - LASSERT(rc2 != -EOVERFLOW); /* can't miss anything */ - - if (rc2 <= 0 || event.status) { - /* timeout or error */ - if (!replied && !rc) - rc = (rc2 < 0) ? rc2 : - !rc2 ? -ETIMEDOUT : - event.status; - - if (!unlinked) { - /* Ensure completion in finite time... */ - LNetMDUnlink(mdh); - /* No assertion (racing with network) */ - unlinked = 1; - timeout_ms = a_long_time; - } else if (!rc2) { - /* timed out waiting for unlink */ - CWARN("ping %s: late network completion\n", - libcfs_id2str(id)); - } - } else if (event.type == LNET_EVENT_REPLY) { - replied = 1; - rc = event.mlength; - } - - } while (rc2 <= 0 || !event.unlinked); - - if (!replied) { - if (rc >= 0) - CWARN("%s: Unexpected rc >= 0 but no reply!\n", - libcfs_id2str(id)); - rc = -EIO; - goto out_1; - } - - nob = rc; - LASSERT(nob >= 0 && nob <= infosz); - - rc = -EPROTO; /* if I can't parse... */ - - if (nob < 8) { - /* can't check magic/version */ - CERROR("%s: ping info too short %d\n", - libcfs_id2str(id), nob); - goto out_1; - } - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { - lnet_swap_pinginfo(info); - } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CERROR("%s: Unexpected magic %08x\n", - libcfs_id2str(id), info->pi_magic); - goto out_1; - } - - if (!(info->pi_features & LNET_PING_FEAT_NI_STATUS)) { - CERROR("%s: ping w/o NI status: 0x%x\n", - libcfs_id2str(id), info->pi_features); - goto out_1; - } - - if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) { - CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[0])); - goto out_1; - } - - if (info->pi_nnis < n_ids) - n_ids = info->pi_nnis; - - if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) { - CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), - nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids])); - goto out_1; - } - - rc = -EFAULT; /* If I SEGV... */ - - memset(&tmpid, 0, sizeof(tmpid)); - for (i = 0; i < n_ids; i++) { - tmpid.pid = info->pi_pid; - tmpid.nid = info->pi_ni[i].ns_nid; - if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) - goto out_1; - } - rc = info->pi_nnis; - - out_1: - rc2 = LNetEQFree(eqh); - if (rc2) - CERROR("rc2 %d\n", rc2); - LASSERT(!rc2); - - out_0: - kfree(info); - return rc; -} diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c deleted file mode 100644 index 55ecc1998b7e..000000000000 --- a/drivers/staging/lustre/lnet/lnet/config.c +++ /dev/null @@ -1,1235 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include -#include -#include -#include - -struct lnet_text_buf { /* tmp struct for parsing routes */ - struct list_head ltb_list; /* stash on lists */ - int ltb_size; /* allocated size */ - char ltb_text[0]; /* text buffer */ -}; - -static int lnet_tbnob; /* track text buf allocation */ -#define LNET_MAX_TEXTBUF_NOB (64 << 10) /* bound allocation */ -#define LNET_SINGLE_TEXTBUF_NOB (4 << 10) - -static void -lnet_syntax(char *name, char *str, int offset, int width) -{ - static char dots[LNET_SINGLE_TEXTBUF_NOB]; - static char dashes[LNET_SINGLE_TEXTBUF_NOB]; - - memset(dots, '.', sizeof(dots)); - dots[sizeof(dots) - 1] = 0; - memset(dashes, '-', sizeof(dashes)); - dashes[sizeof(dashes) - 1] = 0; - - LCONSOLE_ERROR_MSG(0x10f, "Error parsing '%s=\"%s\"'\n", name, str); - LCONSOLE_ERROR_MSG(0x110, "here...........%.*s..%.*s|%.*s|\n", - (int)strlen(name), dots, offset, dots, - (width < 1) ? 0 : width - 1, dashes); -} - -static int -lnet_issep(char c) -{ - switch (c) { - case '\n': - case '\r': - case ';': - return 1; - default: - return 0; - } -} - -int -lnet_net_unique(__u32 net, struct list_head *nilist) -{ - struct list_head *tmp; - struct lnet_ni *ni; - - list_for_each(tmp, nilist) { - ni = list_entry(tmp, struct lnet_ni, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) - return 0; - } - - return 1; -} - -void -lnet_ni_free(struct lnet_ni *ni) -{ - int i; - - if (ni->ni_refs) - cfs_percpt_free(ni->ni_refs); - - if (ni->ni_tx_queues) - cfs_percpt_free(ni->ni_tx_queues); - - if (ni->ni_cpts) - cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts); - - kfree(ni->ni_lnd_tunables); - - for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i]; i++) - kfree(ni->ni_interfaces[i]); - - /* release reference to net namespace */ - if (ni->ni_net_ns) - put_net(ni->ni_net_ns); - - kfree(ni); -} - -struct lnet_ni * -lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) -{ - struct lnet_tx_queue *tq; - struct lnet_ni *ni; - int rc; - int i; - - if (!lnet_net_unique(net, nilist)) { - LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n", - libcfs_net2str(net)); - return NULL; - } - - ni = kzalloc(sizeof(*ni), GFP_NOFS); - if (!ni) { - CERROR("Out of memory creating network %s\n", - libcfs_net2str(net)); - return NULL; - } - - spin_lock_init(&ni->ni_lock); - INIT_LIST_HEAD(&ni->ni_cptlist); - ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ni->ni_refs[0])); - if (!ni->ni_refs) - goto failed; - - ni->ni_tx_queues = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ni->ni_tx_queues[0])); - if (!ni->ni_tx_queues) - goto failed; - - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) - INIT_LIST_HEAD(&tq->tq_delayed); - - if (!el) { - ni->ni_cpts = NULL; - ni->ni_ncpts = LNET_CPT_NUMBER; - } else { - rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts); - if (rc <= 0) { - CERROR("Failed to set CPTs for NI %s: %d\n", - libcfs_net2str(net), rc); - goto failed; - } - - LASSERT(rc <= LNET_CPT_NUMBER); - if (rc == LNET_CPT_NUMBER) { - cfs_expr_list_values_free(ni->ni_cpts, LNET_CPT_NUMBER); - ni->ni_cpts = NULL; - } - - ni->ni_ncpts = rc; - } - - /* LND will fill in the address part of the NID */ - ni->ni_nid = LNET_MKNID(net, 0); - - /* Store net namespace in which current ni is being created */ - if (current->nsproxy->net_ns) - ni->ni_net_ns = get_net(current->nsproxy->net_ns); - else - ni->ni_net_ns = NULL; - - ni->ni_last_alive = ktime_get_real_seconds(); - list_add_tail(&ni->ni_list, nilist); - return ni; - failed: - lnet_ni_free(ni); - return NULL; -} - -int -lnet_parse_networks(struct list_head *nilist, char *networks) -{ - struct cfs_expr_list *el = NULL; - char *tokens; - char *str; - char *tmp; - struct lnet_ni *ni; - __u32 net; - int nnets = 0; - struct list_head *temp_node; - - if (!networks) { - CERROR("networks string is undefined\n"); - return -EINVAL; - } - - if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { - /* _WAY_ conservative */ - LCONSOLE_ERROR_MSG(0x112, - "Can't parse networks: string too long\n"); - return -EINVAL; - } - - tokens = kstrdup(networks, GFP_KERNEL); - if (!tokens) { - CERROR("Can't allocate net tokens\n"); - return -ENOMEM; - } - - tmp = tokens; - str = tokens; - - while (str && *str) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - char *square = strchr(str, '['); - char *iface; - int niface; - int rc; - - /* - * NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) - */ - if (square && (!comma || square < comma)) { - /* - * i.e: o2ib0(ib0)[1,2], number between square - * brackets are CPTs this NI needs to be bond - */ - if (bracket && bracket > square) { - tmp = square; - goto failed_syntax; - } - - tmp = strchr(square, ']'); - if (!tmp) { - tmp = square; - goto failed_syntax; - } - - rc = cfs_expr_list_parse(square, tmp - square + 1, - 0, LNET_CPT_NUMBER - 1, &el); - if (rc) { - tmp = square; - goto failed_syntax; - } - - while (square <= tmp) - *square++ = ' '; - } - - if (!bracket || (comma && comma < bracket)) { - /* no interface list specified */ - - if (comma) - *comma++ = 0; - net = libcfs_str2net(strim(str)); - - if (net == LNET_NIDNET(LNET_NID_ANY)) { - LCONSOLE_ERROR_MSG(0x113, - "Unrecognised network type\n"); - tmp = str; - goto failed_syntax; - } - - if (LNET_NETTYP(net) != LOLND && /* LO is implicit */ - !lnet_ni_alloc(net, el, nilist)) - goto failed; - - if (el) { - cfs_expr_list_free(el); - el = NULL; - } - - str = comma; - continue; - } - - *bracket = 0; - net = libcfs_str2net(strim(str)); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - tmp = str; - goto failed_syntax; - } - - ni = lnet_ni_alloc(net, el, nilist); - if (!ni) - goto failed; - - if (el) { - cfs_expr_list_free(el); - el = NULL; - } - - niface = 0; - iface = bracket + 1; - - bracket = strchr(iface, ')'); - if (!bracket) { - tmp = iface; - goto failed_syntax; - } - - *bracket = 0; - do { - comma = strchr(iface, ','); - if (comma) - *comma++ = 0; - - iface = strim(iface); - if (!*iface) { - tmp = iface; - goto failed_syntax; - } - - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR_MSG(0x115, - "Too many interfaces for net %s\n", - libcfs_net2str(net)); - goto failed; - } - - /* - * Allocate a separate piece of memory and copy - * into it the string, so we don't have - * a depencency on the tokens string. This way we - * can free the tokens at the end of the function. - * The newly allocated ni_interfaces[] can be - * freed when freeing the NI - */ - ni->ni_interfaces[niface] = kstrdup(iface, GFP_KERNEL); - if (!ni->ni_interfaces[niface]) { - CERROR("Can't allocate net interface name\n"); - goto failed; - } - niface++; - iface = comma; - } while (iface); - - str = bracket + 1; - comma = strchr(bracket + 1, ','); - if (comma) { - *comma = 0; - str = strim(str); - if (*str) { - tmp = str; - goto failed_syntax; - } - str = comma + 1; - continue; - } - - str = strim(str); - if (*str) { - tmp = str; - goto failed_syntax; - } - } - - list_for_each(temp_node, nilist) - nnets++; - - kfree(tokens); - return nnets; - - failed_syntax: - lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp)); - failed: - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, struct lnet_ni, ni_list); - - list_del(&ni->ni_list); - lnet_ni_free(ni); - } - - if (el) - cfs_expr_list_free(el); - - kfree(tokens); - - return -EINVAL; -} - -static struct lnet_text_buf * -lnet_new_text_buf(int str_len) -{ - struct lnet_text_buf *ltb; - int nob; - - /* NB allocate space for the terminating 0 */ - nob = offsetof(struct lnet_text_buf, ltb_text[str_len + 1]); - if (nob > LNET_SINGLE_TEXTBUF_NOB) { - /* _way_ conservative for "route net gateway..." */ - CERROR("text buffer too big\n"); - return NULL; - } - - if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) { - CERROR("Too many text buffers\n"); - return NULL; - } - - ltb = kzalloc(nob, GFP_KERNEL); - if (!ltb) - return NULL; - - ltb->ltb_size = nob; - ltb->ltb_text[0] = 0; - lnet_tbnob += nob; - return ltb; -} - -static void -lnet_free_text_buf(struct lnet_text_buf *ltb) -{ - lnet_tbnob -= ltb->ltb_size; - kfree(ltb); -} - -static void -lnet_free_text_bufs(struct list_head *tbs) -{ - struct lnet_text_buf *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } -} - -static int -lnet_str2tbs_sep(struct list_head *tbs, char *str) -{ - struct list_head pending; - char *sep; - int nob; - int i; - struct lnet_text_buf *ltb; - - INIT_LIST_HEAD(&pending); - - /* Split 'str' into separate commands */ - for (;;) { - /* skip leading whitespace */ - while (isspace(*str)) - str++; - - /* scan for separator or comment */ - for (sep = str; *sep; sep++) - if (lnet_issep(*sep) || *sep == '#') - break; - - nob = (int)(sep - str); - if (nob > 0) { - ltb = lnet_new_text_buf(nob); - if (!ltb) { - lnet_free_text_bufs(&pending); - return -ENOMEM; - } - - for (i = 0; i < nob; i++) - if (isspace(str[i])) - ltb->ltb_text[i] = ' '; - else - ltb->ltb_text[i] = str[i]; - - ltb->ltb_text[nob] = 0; - - list_add_tail(<b->ltb_list, &pending); - } - - if (*sep == '#') { - /* scan for separator */ - do { - sep++; - } while (*sep && !lnet_issep(*sep)); - } - - if (!*sep) - break; - - str = sep + 1; - } - - list_splice(&pending, tbs->prev); - return 0; -} - -static int -lnet_expand1tb(struct list_head *list, - char *str, char *sep1, char *sep2, - char *item, int itemlen) -{ - int len1 = (int)(sep1 - str); - int len2 = strlen(sep2 + 1); - struct lnet_text_buf *ltb; - - LASSERT(*sep1 == '['); - LASSERT(*sep2 == ']'); - - ltb = lnet_new_text_buf(len1 + itemlen + len2); - if (!ltb) - return -ENOMEM; - - memcpy(ltb->ltb_text, str, len1); - memcpy(<b->ltb_text[len1], item, itemlen); - memcpy(<b->ltb_text[len1 + itemlen], sep2 + 1, len2); - ltb->ltb_text[len1 + itemlen + len2] = 0; - - list_add_tail(<b->ltb_list, list); - return 0; -} - -static int -lnet_str2tbs_expand(struct list_head *tbs, char *str) -{ - char num[16]; - struct list_head pending; - char *sep; - char *sep2; - char *parsed; - char *enditem; - int lo; - int hi; - int stride; - int i; - int nob; - int scanned; - - INIT_LIST_HEAD(&pending); - - sep = strchr(str, '['); - if (!sep) /* nothing to expand */ - return 0; - - sep2 = strchr(sep, ']'); - if (!sep2) - goto failed; - - for (parsed = sep; parsed < sep2; parsed = enditem) { - enditem = ++parsed; - while (enditem < sep2 && *enditem != ',') - enditem++; - - if (enditem == parsed) /* no empty items */ - goto failed; - - if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, - &stride, &scanned) < 3) { - if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) { - /* simple string enumeration */ - if (lnet_expand1tb(&pending, str, sep, sep2, - parsed, - (int)(enditem - parsed))) { - goto failed; - } - continue; - } - - stride = 1; - } - - /* range expansion */ - - if (enditem != parsed + scanned) /* no trailing junk */ - goto failed; - - if (hi < 0 || lo < 0 || stride < 0 || hi < lo || - (hi - lo) % stride) - goto failed; - - for (i = lo; i <= hi; i += stride) { - snprintf(num, sizeof(num), "%d", i); - nob = strlen(num); - if (nob + 1 == sizeof(num)) - goto failed; - - if (lnet_expand1tb(&pending, str, sep, sep2, - num, nob)) - goto failed; - } - } - - list_splice(&pending, tbs->prev); - return 1; - - failed: - lnet_free_text_bufs(&pending); - return -EINVAL; -} - -static int -lnet_parse_hops(char *str, unsigned int *hops) -{ - int len = strlen(str); - int nob = len; - - return (sscanf(str, "%u%n", hops, &nob) >= 1 && - nob == len && - *hops > 0 && *hops < 256); -} - -#define LNET_PRIORITY_SEPARATOR (':') - -static int -lnet_parse_priority(char *str, unsigned int *priority, char **token) -{ - int nob; - char *sep; - int len; - - sep = strchr(str, LNET_PRIORITY_SEPARATOR); - if (!sep) { - *priority = 0; - return 0; - } - len = strlen(sep + 1); - - if ((sscanf((sep + 1), "%u%n", priority, &nob) < 1) || (len != nob)) { - /* - * Update the caller's token pointer so it treats the found - * priority as the token to report in the error message. - */ - *token += sep - str + 1; - return -EINVAL; - } - - CDEBUG(D_NET, "gateway %s, priority %d, nob %d\n", str, *priority, nob); - - /* - * Change priority separator to \0 to be able to parse NID - */ - *sep = '\0'; - return 0; -} - -static int -lnet_parse_route(char *str, int *im_a_router) -{ - /* static scratch buffer OK (single threaded) */ - static char cmd[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head nets; - struct list_head gateways; - struct list_head *tmp1; - struct list_head *tmp2; - __u32 net; - lnet_nid_t nid; - struct lnet_text_buf *ltb; - int rc; - char *sep; - char *token = str; - int ntokens = 0; - int myrc = -1; - __u32 hops; - int got_hops = 0; - unsigned int priority = 0; - - INIT_LIST_HEAD(&gateways); - INIT_LIST_HEAD(&nets); - - /* save a copy of the string for error messages */ - strncpy(cmd, str, sizeof(cmd)); - cmd[sizeof(cmd) - 1] = '\0'; - - sep = str; - for (;;) { - /* scan for token start */ - while (isspace(*sep)) - sep++; - if (!*sep) { - if (ntokens < (got_hops ? 3 : 2)) - goto token_error; - break; - } - - ntokens++; - token = sep++; - - /* scan for token end */ - while (*sep && !isspace(*sep)) - sep++; - if (*sep) - *sep++ = 0; - - if (ntokens == 1) { - tmp2 = &nets; /* expanding nets */ - } else if (ntokens == 2 && - lnet_parse_hops(token, &hops)) { - got_hops = 1; /* got a hop count */ - continue; - } else { - tmp2 = &gateways; /* expanding gateways */ - } - - ltb = lnet_new_text_buf(strlen(token)); - if (!ltb) - goto out; - - strcpy(ltb->ltb_text, token); - tmp1 = <b->ltb_list; - list_add_tail(tmp1, tmp2); - - while (tmp1 != tmp2) { - ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); - - rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); - if (rc < 0) - goto token_error; - - tmp1 = tmp1->next; - - if (rc > 0) { /* expanded! */ - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - continue; - } - - if (ntokens == 1) { - net = libcfs_str2net(ltb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND) - goto token_error; - } else { - rc = lnet_parse_priority(ltb->ltb_text, - &priority, &token); - if (rc < 0) - goto token_error; - - nid = libcfs_str2nid(ltb->ltb_text); - if (nid == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - goto token_error; - } - } - } - - /** - * if there are no hops set then we want to flag this value as - * unset since hops is an optional parameter - */ - if (!got_hops) - hops = LNET_UNDEFINED_HOPS; - - LASSERT(!list_empty(&nets)); - LASSERT(!list_empty(&gateways)); - - list_for_each(tmp1, &nets) { - ltb = list_entry(tmp1, struct lnet_text_buf, ltb_list); - net = libcfs_str2net(ltb->ltb_text); - LASSERT(net != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(tmp2, &gateways) { - ltb = list_entry(tmp2, struct lnet_text_buf, ltb_list); - nid = libcfs_str2nid(ltb->ltb_text); - LASSERT(nid != LNET_NID_ANY); - - if (lnet_islocalnid(nid)) { - *im_a_router = 1; - continue; - } - - rc = lnet_add_route(net, hops, nid, priority); - if (rc && rc != -EEXIST && rc != -EHOSTUNREACH) { - CERROR("Can't create route to %s via %s\n", - libcfs_net2str(net), - libcfs_nid2str(nid)); - goto out; - } - } - } - - myrc = 0; - goto out; - - token_error: - lnet_syntax("routes", cmd, (int)(token - str), strlen(token)); - out: - lnet_free_text_bufs(&nets); - lnet_free_text_bufs(&gateways); - return myrc; -} - -static int -lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) -{ - struct lnet_text_buf *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, struct lnet_text_buf, ltb_list); - - if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { - lnet_free_text_bufs(tbs); - return -EINVAL; - } - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } - - return 0; -} - -int -lnet_parse_routes(char *routes, int *im_a_router) -{ - struct list_head tbs; - int rc = 0; - - *im_a_router = 0; - - INIT_LIST_HEAD(&tbs); - - if (lnet_str2tbs_sep(&tbs, routes) < 0) { - CERROR("Error parsing routes\n"); - rc = -EINVAL; - } else { - rc = lnet_parse_route_tbs(&tbs, im_a_router); - } - - LASSERT(!lnet_tbnob); - return rc; -} - -static int -lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip) -{ - LIST_HEAD(list); - int rc; - int i; - - rc = cfs_ip_addr_parse(token, len, &list); - if (rc) - return rc; - - for (rc = i = 0; !rc && i < nip; i++) - rc = cfs_ip_addr_match(ipaddrs[i], &list); - - cfs_expr_list_free_list(&list); - - return rc; -} - -static int -lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) -{ - static char tokens[LNET_SINGLE_TEXTBUF_NOB]; - - int matched = 0; - int ntokens = 0; - int len; - char *net = NULL; - char *sep; - char *token; - int rc; - - LASSERT(strlen(net_entry) < sizeof(tokens)); - - /* work on a copy of the string */ - strcpy(tokens, net_entry); - sep = tokens; - for (;;) { - /* scan for token start */ - while (isspace(*sep)) - sep++; - if (!*sep) - break; - - token = sep++; - - /* scan for token end */ - while (*sep && !isspace(*sep)) - sep++; - if (*sep) - *sep++ = 0; - - if (!ntokens++) { - net = token; - continue; - } - - len = strlen(token); - - rc = lnet_match_network_token(token, len, ipaddrs, nip); - if (rc < 0) { - lnet_syntax("ip2nets", net_entry, - (int)(token - tokens), len); - return rc; - } - - if (rc) - matched |= 1; - } - - if (!matched) - return 0; - - strcpy(net_entry, net); /* replace with matched net */ - return 1; -} - -static __u32 -lnet_netspec2net(char *netspec) -{ - char *bracket = strchr(netspec, '('); - __u32 net; - - if (bracket) - *bracket = 0; - - net = libcfs_str2net(netspec); - - if (bracket) - *bracket = '('; - - return net; -} - -static int -lnet_splitnets(char *source, struct list_head *nets) -{ - int offset = 0; - int offset2; - int len; - struct lnet_text_buf *tb; - struct lnet_text_buf *tb2; - struct list_head *t; - char *sep; - char *bracket; - __u32 net; - - LASSERT(!list_empty(nets)); - LASSERT(nets->next == nets->prev); /* single entry */ - - tb = list_entry(nets->next, struct lnet_text_buf, ltb_list); - - for (;;) { - sep = strchr(tb->ltb_text, ','); - bracket = strchr(tb->ltb_text, '('); - - if (sep && bracket && bracket < sep) { - /* netspec lists interfaces... */ - - offset2 = offset + (int)(bracket - tb->ltb_text); - len = strlen(bracket); - - bracket = strchr(bracket + 1, ')'); - - if (!bracket || - !(bracket[1] == ',' || !bracket[1])) { - lnet_syntax("ip2nets", source, offset2, len); - return -EINVAL; - } - - sep = !bracket[1] ? NULL : bracket + 1; - } - - if (sep) - *sep++ = 0; - - net = lnet_netspec2net(tb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - - list_for_each(t, nets) { - tb2 = list_entry(t, struct lnet_text_buf, ltb_list); - - if (tb2 == tb) - continue; - - if (net == lnet_netspec2net(tb2->ltb_text)) { - /* duplicate network */ - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - } - - if (!sep) - return 0; - - offset += (int)(sep - tb->ltb_text); - len = strlen(sep); - tb2 = lnet_new_text_buf(len); - if (!tb2) - return -ENOMEM; - - strncpy(tb2->ltb_text, sep, len); - tb2->ltb_text[len] = '\0'; - list_add_tail(&tb2->ltb_list, nets); - - tb = tb2; - } -} - -static int -lnet_match_networks(char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) -{ - static char networks[LNET_SINGLE_TEXTBUF_NOB]; - static char source[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head raw_entries; - struct list_head matched_nets; - struct list_head current_nets; - struct list_head *t; - struct list_head *t2; - struct lnet_text_buf *tb; - struct lnet_text_buf *temp; - struct lnet_text_buf *tb2; - __u32 net1; - __u32 net2; - int len; - int count; - int dup; - int rc; - - INIT_LIST_HEAD(&raw_entries); - if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) { - CERROR("Error parsing ip2nets\n"); - LASSERT(!lnet_tbnob); - return -EINVAL; - } - - INIT_LIST_HEAD(&matched_nets); - INIT_LIST_HEAD(¤t_nets); - networks[0] = 0; - count = 0; - len = 0; - rc = 0; - - list_for_each_entry_safe(tb, temp, &raw_entries, ltb_list) { - strncpy(source, tb->ltb_text, sizeof(source)); - source[sizeof(source) - 1] = '\0'; - - /* replace ltb_text with the network(s) add on match */ - rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); - if (rc < 0) - break; - - list_del(&tb->ltb_list); - - if (!rc) { /* no match */ - lnet_free_text_buf(tb); - continue; - } - - /* split into separate networks */ - INIT_LIST_HEAD(¤t_nets); - list_add(&tb->ltb_list, ¤t_nets); - rc = lnet_splitnets(source, ¤t_nets); - if (rc < 0) - break; - - dup = 0; - list_for_each(t, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf, ltb_list); - net1 = lnet_netspec2net(tb->ltb_text); - LASSERT(net1 != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(t2, &matched_nets) { - tb2 = list_entry(t2, struct lnet_text_buf, - ltb_list); - net2 = lnet_netspec2net(tb2->ltb_text); - LASSERT(net2 != LNET_NIDNET(LNET_NID_ANY)); - - if (net1 == net2) { - dup = 1; - break; - } - } - - if (dup) - break; - } - - if (dup) { - lnet_free_text_bufs(¤t_nets); - continue; - } - - list_for_each_safe(t, t2, ¤t_nets) { - tb = list_entry(t, struct lnet_text_buf, ltb_list); - - list_del(&tb->ltb_list); - list_add_tail(&tb->ltb_list, &matched_nets); - - len += snprintf(networks + len, sizeof(networks) - len, - "%s%s", !len ? "" : ",", - tb->ltb_text); - - if (len >= sizeof(networks)) { - CERROR("Too many matched networks\n"); - rc = -E2BIG; - goto out; - } - } - - count++; - } - - out: - lnet_free_text_bufs(&raw_entries); - lnet_free_text_bufs(&matched_nets); - lnet_free_text_bufs(¤t_nets); - LASSERT(!lnet_tbnob); - - if (rc < 0) - return rc; - - *networksp = networks; - return count; -} - -static int -lnet_ipaddr_enumerate(__u32 **ipaddrsp) -{ - int up; - __u32 netmask; - __u32 *ipaddrs; - __u32 *ipaddrs2; - int nip; - char **ifnames; - int nif = lnet_ipif_enumerate(&ifnames); - int i; - int rc; - - if (nif <= 0) - return nif; - - ipaddrs = kcalloc(nif, sizeof(*ipaddrs), GFP_KERNEL); - if (!ipaddrs) { - CERROR("Can't allocate ipaddrs[%d]\n", nif); - lnet_ipif_free_enumeration(ifnames, nif); - return -ENOMEM; - } - - for (i = nip = 0; i < nif; i++) { - if (!strcmp(ifnames[i], "lo")) - continue; - - rc = lnet_ipif_query(ifnames[i], &up, &ipaddrs[nip], &netmask); - if (rc) { - CWARN("Can't query interface %s: %d\n", - ifnames[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s: it's down\n", - ifnames[i]); - continue; - } - - nip++; - } - - lnet_ipif_free_enumeration(ifnames, nif); - - if (nip == nif) { - *ipaddrsp = ipaddrs; - } else { - if (nip > 0) { - ipaddrs2 = kcalloc(nip, sizeof(*ipaddrs2), - GFP_KERNEL); - if (!ipaddrs2) { - CERROR("Can't allocate ipaddrs[%d]\n", nip); - nip = -ENOMEM; - } else { - memcpy(ipaddrs2, ipaddrs, - nip * sizeof(*ipaddrs)); - *ipaddrsp = ipaddrs2; - rc = nip; - } - } - kfree(ipaddrs); - } - return nip; -} - -int -lnet_parse_ip2nets(char **networksp, char *ip2nets) -{ - __u32 *ipaddrs = NULL; - int nip = lnet_ipaddr_enumerate(&ipaddrs); - int rc; - - if (nip < 0) { - LCONSOLE_ERROR_MSG(0x117, - "Error %d enumerating local IP interfaces for ip2nets to match\n", - nip); - return nip; - } - - if (!nip) { - LCONSOLE_ERROR_MSG(0x118, - "No local IP interfaces for ip2nets to match\n"); - return -ENOENT; - } - - rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip); - kfree(ipaddrs); - - if (rc < 0) { - LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc); - return rc; - } - - if (!rc) { - LCONSOLE_ERROR_MSG(0x11a, - "ip2nets does not match any local IP interfaces\n"); - return -ENOENT; - } - - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-eq.c b/drivers/staging/lustre/lnet/lnet/lib-eq.c deleted file mode 100644 index c78e70373ab4..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-eq.c +++ /dev/null @@ -1,426 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-eq.c - * - * Library level Event queue management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/** - * Create an event queue that has room for \a count number of events. - * - * The event queue is circular and older events will be overwritten by new - * ones if they are not removed in time by the user using the functions - * LNetEQGet(), LNetEQWait(), or LNetEQPoll(). It is up to the user to - * determine the appropriate size of the event queue to prevent this loss - * of events. Note that when EQ handler is specified in \a callback, no - * event loss can happen, since the handler is run for each event deposited - * into the EQ. - * - * \param count The number of events to be stored in the event queue. It - * will be rounded up to the next power of two. - * \param callback A handler function that runs when an event is deposited - * into the EQ. The constant value LNET_EQ_HANDLER_NONE can be used to - * indicate that no event handler is desired. - * \param handle On successful return, this location will hold a handle for - * the newly created EQ. - * - * \retval 0 On success. - * \retval -EINVAL If an parameter is not valid. - * \retval -ENOMEM If memory for the EQ can't be allocated. - * - * \see lnet_eq_handler_t for the discussion on EQ handler semantics. - */ -int -LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, - struct lnet_handle_eq *handle) -{ - struct lnet_eq *eq; - - LASSERT(the_lnet.ln_refcount > 0); - - /* - * We need count to be a power of 2 so that when eq_{enq,deq}_seq - * overflow, they don't skip entries, so the queue has the same - * apparent capacity at all times - */ - if (count) - count = roundup_pow_of_two(count); - - if (callback != LNET_EQ_HANDLER_NONE && count) - CWARN("EQ callback is guaranteed to get every event, do you still want to set eqcount %d for polling event which will have locking overhead? Please contact with developer to confirm\n", count); - - /* - * count can be 0 if only need callback, we can eliminate - * overhead of enqueue event - */ - if (!count && callback == LNET_EQ_HANDLER_NONE) - return -EINVAL; - - eq = kzalloc(sizeof(*eq), GFP_NOFS); - if (!eq) - return -ENOMEM; - - if (count) { - eq->eq_events = kvmalloc_array(count, sizeof(struct lnet_event), - GFP_KERNEL | __GFP_ZERO); - if (!eq->eq_events) - goto failed; - /* - * NB allocator has set all event sequence numbers to 0, - * so all them should be earlier than eq_deq_seq - */ - } - - eq->eq_deq_seq = 1; - eq->eq_enq_seq = 1; - eq->eq_size = count; - eq->eq_callback = callback; - - eq->eq_refs = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*eq->eq_refs[0])); - if (!eq->eq_refs) - goto failed; - - /* MUST hold both exclusive lnet_res_lock */ - lnet_res_lock(LNET_LOCK_EX); - /* - * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock - */ - lnet_eq_wait_lock(); - - lnet_res_lh_initialize(&the_lnet.ln_eq_container, &eq->eq_lh); - list_add(&eq->eq_list, &the_lnet.ln_eq_container.rec_active); - - lnet_eq_wait_unlock(); - lnet_res_unlock(LNET_LOCK_EX); - - lnet_eq2handle(handle, eq); - return 0; - -failed: - kvfree(eq->eq_events); - - if (eq->eq_refs) - cfs_percpt_free(eq->eq_refs); - - kfree(eq); - return -ENOMEM; -} -EXPORT_SYMBOL(LNetEQAlloc); - -/** - * Release the resources associated with an event queue if it's idle; - * otherwise do nothing and it's up to the user to try again. - * - * \param eqh A handle for the event queue to be released. - * - * \retval 0 If the EQ is not in use and freed. - * \retval -ENOENT If \a eqh does not point to a valid EQ. - * \retval -EBUSY If the EQ is still in use by some MDs. - */ -int -LNetEQFree(struct lnet_handle_eq eqh) -{ - struct lnet_eq *eq; - struct lnet_event *events = NULL; - int **refs = NULL; - int *ref; - int rc = 0; - int size = 0; - int i; - - LASSERT(the_lnet.ln_refcount > 0); - - lnet_res_lock(LNET_LOCK_EX); - /* - * NB: hold lnet_eq_wait_lock for EQ link/unlink, so we can do - * both EQ lookup and poll event with only lnet_eq_wait_lock - */ - lnet_eq_wait_lock(); - - eq = lnet_handle2eq(&eqh); - if (!eq) { - rc = -ENOENT; - goto out; - } - - cfs_percpt_for_each(ref, i, eq->eq_refs) { - LASSERT(*ref >= 0); - if (!*ref) - continue; - - CDEBUG(D_NET, "Event equeue (%d: %d) busy on destroy.\n", - i, *ref); - rc = -EBUSY; - goto out; - } - - /* stash for free after lock dropped */ - events = eq->eq_events; - size = eq->eq_size; - refs = eq->eq_refs; - - lnet_res_lh_invalidate(&eq->eq_lh); - list_del(&eq->eq_list); - kfree(eq); - out: - lnet_eq_wait_unlock(); - lnet_res_unlock(LNET_LOCK_EX); - - kvfree(events); - if (refs) - cfs_percpt_free(refs); - - return rc; -} -EXPORT_SYMBOL(LNetEQFree); - -void -lnet_eq_enqueue_event(struct lnet_eq *eq, struct lnet_event *ev) -{ - /* MUST called with resource lock hold but w/o lnet_eq_wait_lock */ - int index; - - if (!eq->eq_size) { - LASSERT(eq->eq_callback != LNET_EQ_HANDLER_NONE); - eq->eq_callback(ev); - return; - } - - lnet_eq_wait_lock(); - ev->sequence = eq->eq_enq_seq++; - - LASSERT(is_power_of_2(eq->eq_size)); - index = ev->sequence & (eq->eq_size - 1); - - eq->eq_events[index] = *ev; - - if (eq->eq_callback != LNET_EQ_HANDLER_NONE) - eq->eq_callback(ev); - - /* Wake anyone waiting in LNetEQPoll() */ - if (waitqueue_active(&the_lnet.ln_eq_waitq)) - wake_up_all(&the_lnet.ln_eq_waitq); - lnet_eq_wait_unlock(); -} - -static int -lnet_eq_dequeue_event(struct lnet_eq *eq, struct lnet_event *ev) -{ - int new_index = eq->eq_deq_seq & (eq->eq_size - 1); - struct lnet_event *new_event = &eq->eq_events[new_index]; - int rc; - - /* must called with lnet_eq_wait_lock hold */ - if (LNET_SEQ_GT(eq->eq_deq_seq, new_event->sequence)) - return 0; - - /* We've got a new event... */ - *ev = *new_event; - - CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->eq_deq_seq, eq->eq_size); - - /* ...but did it overwrite an event we've not seen yet? */ - if (eq->eq_deq_seq == new_event->sequence) { - rc = 1; - } else { - /* - * don't complain with CERROR: some EQs are sized small - * anyway; if it's important, the caller should complain - */ - CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n", - eq->eq_deq_seq, new_event->sequence); - rc = -EOVERFLOW; - } - - eq->eq_deq_seq = new_event->sequence + 1; - return rc; -} - -/** - * A nonblocking function that can be used to get the next event in an EQ. - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully. The event is removed from the queue. - * - * \param eventq A handle for the event queue. - * \param event On successful return (1 or -EOVERFLOW), this location will - * hold the next event in the EQ. - * - * \retval 0 No pending event in the EQ. - * \retval 1 Indicates success. - * \retval -ENOENT If \a eventq does not point to a valid EQ. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ has been dropped due to limited space in the EQ. - */ - -/** - * Block the calling process until there is an event in the EQ. - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully. This function returns the next event - * in the EQ and removes it from the EQ. - * - * \param eventq A handle for the event queue. - * \param event On successful return (1 or -EOVERFLOW), this location will - * hold the next event in the EQ. - * - * \retval 1 Indicates success. - * \retval -ENOENT If \a eventq does not point to a valid EQ. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ has been dropped due to limited space in the EQ. - */ - -static int -lnet_eq_wait_locked(int *timeout_ms, long state) -__must_hold(&the_lnet.ln_eq_wait_lock) -{ - int tms = *timeout_ms; - int wait; - wait_queue_entry_t wl; - unsigned long now; - - if (!tms) - return -ENXIO; /* don't want to wait and no new event */ - - init_waitqueue_entry(&wl, current); - set_current_state(state); - add_wait_queue(&the_lnet.ln_eq_waitq, &wl); - - lnet_eq_wait_unlock(); - - if (tms < 0) { - schedule(); - } else { - now = jiffies; - schedule_timeout(msecs_to_jiffies(tms)); - tms -= jiffies_to_msecs(jiffies - now); - if (tms < 0) /* no more wait but may have new event */ - tms = 0; - } - - wait = tms; /* might need to call here again */ - *timeout_ms = tms; - - lnet_eq_wait_lock(); - remove_wait_queue(&the_lnet.ln_eq_waitq, &wl); - - return wait; -} - -/** - * Block the calling process until there's an event from a set of EQs or - * timeout happens. - * - * If an event handler is associated with the EQ, the handler will run before - * this function returns successfully, in which case the corresponding event - * is consumed. - * - * LNetEQPoll() provides a timeout to allow applications to poll, block for a - * fixed period, or block indefinitely. - * - * \param eventqs,neq An array of EQ handles, and size of the array. - * \param timeout_ms Time in milliseconds to wait for an event to occur on - * one of the EQs. The constant LNET_TIME_FOREVER can be used to indicate an - * infinite timeout. - * \param interruptible, if true, use TASK_INTERRUPTIBLE, else TASK_NOLOAD - * \param event,which On successful return (1 or -EOVERFLOW), \a event will - * hold the next event in the EQs, and \a which will contain the index of the - * EQ from which the event was taken. - * - * \retval 0 No pending event in the EQs after timeout. - * \retval 1 Indicates success. - * \retval -EOVERFLOW Indicates success (i.e., an event is returned) and that - * at least one event between this event and the last event obtained from the - * EQ indicated by \a which has been dropped due to limited space in the EQ. - * \retval -ENOENT If there's an invalid handle in \a eventqs. - */ -int -LNetEQPoll(struct lnet_handle_eq *eventqs, int neq, int timeout_ms, - int interruptible, - struct lnet_event *event, int *which) -{ - int wait = 1; - int rc; - int i; - - LASSERT(the_lnet.ln_refcount > 0); - - if (neq < 1) - return -ENOENT; - - lnet_eq_wait_lock(); - - for (;;) { - for (i = 0; i < neq; i++) { - struct lnet_eq *eq = lnet_handle2eq(&eventqs[i]); - - if (!eq) { - lnet_eq_wait_unlock(); - return -ENOENT; - } - - rc = lnet_eq_dequeue_event(eq, event); - if (rc) { - lnet_eq_wait_unlock(); - *which = i; - return rc; - } - } - - if (!wait) - break; - - /* - * return value of lnet_eq_wait_locked: - * -1 : did nothing and it's sure no new event - * 1 : sleep inside and wait until new event - * 0 : don't want to wait anymore, but might have new event - * so need to call dequeue again - */ - wait = lnet_eq_wait_locked(&timeout_ms, - interruptible ? TASK_INTERRUPTIBLE - : TASK_NOLOAD); - if (wait < 0) /* no new event */ - break; - } - - lnet_eq_wait_unlock(); - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c deleted file mode 100644 index 8a22514aaf71..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-md.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-md.c - * - * Memory Descriptor management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/* must be called with lnet_res_lock held */ -void -lnet_md_unlink(struct lnet_libmd *md) -{ - if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) { - /* first unlink attempt... */ - struct lnet_me *me = md->md_me; - - md->md_flags |= LNET_MD_FLAG_ZOMBIE; - - /* - * Disassociate from ME (if any), - * and unlink it if it was created - * with LNET_UNLINK - */ - if (me) { - /* detach MD from portal */ - lnet_ptl_detach_md(me, md); - if (me->me_unlink == LNET_UNLINK) - lnet_me_unlink(me); - } - - /* ensure all future handle lookups fail */ - lnet_res_lh_invalidate(&md->md_lh); - } - - if (md->md_refcount) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if (md->md_eq) { - int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); - - LASSERT(*md->md_eq->eq_refs[cpt] > 0); - (*md->md_eq->eq_refs[cpt])--; - } - - LASSERT(!list_empty(&md->md_list)); - list_del_init(&md->md_list); - kfree(md); -} - -static int -lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink) -{ - int i; - unsigned int niov; - int total_length = 0; - - lmd->md_me = NULL; - lmd->md_start = umd->start; - lmd->md_offset = 0; - lmd->md_max_size = umd->max_size; - lmd->md_options = umd->options; - lmd->md_user_ptr = umd->user_ptr; - lmd->md_eq = NULL; - lmd->md_threshold = umd->threshold; - lmd->md_refcount = 0; - lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; - - if (umd->options & LNET_MD_IOVEC) { - if (umd->options & LNET_MD_KIOV) /* Can't specify both */ - return -EINVAL; - - niov = umd->length; - lmd->md_niov = umd->length; - memcpy(lmd->md_iov.iov, umd->start, - niov * sizeof(lmd->md_iov.iov[0])); - - for (i = 0; i < (int)niov; i++) { - /* We take the base address on trust */ - /* invalid length */ - if (lmd->md_iov.iov[i].iov_len <= 0) - return -EINVAL; - - total_length += lmd->md_iov.iov[i].iov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */ - (umd->max_size < 0 || - umd->max_size > total_length)) /* illegal max_size */ - return -EINVAL; - - } else if (umd->options & LNET_MD_KIOV) { - niov = umd->length; - lmd->md_niov = umd->length; - memcpy(lmd->md_iov.kiov, umd->start, - niov * sizeof(lmd->md_iov.kiov[0])); - - for (i = 0; i < (int)niov; i++) { - /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].bv_offset + - lmd->md_iov.kiov[i].bv_len > PAGE_SIZE) - return -EINVAL; /* invalid length */ - - total_length += lmd->md_iov.kiov[i].bv_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) /* illegal max_size */ - return -EINVAL; - } else { /* contiguous */ - lmd->md_length = umd->length; - niov = 1; - lmd->md_niov = 1; - lmd->md_iov.iov[0].iov_base = umd->start; - lmd->md_iov.iov[0].iov_len = umd->length; - - if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ - (umd->max_size < 0 || - umd->max_size > (int)umd->length)) /* illegal max_size */ - return -EINVAL; - } - - return 0; -} - -/* must be called with resource lock held */ -static int -lnet_md_link(struct lnet_libmd *md, struct lnet_handle_eq eq_handle, int cpt) -{ - struct lnet_res_container *container = the_lnet.ln_md_containers[cpt]; - - /* - * NB we are passed an allocated, but inactive md. - * if we return success, caller may lnet_md_unlink() it. - * otherwise caller may only kfree() it. - */ - /* - * This implementation doesn't know how to create START events or - * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... - */ - /* - * TODO - reevaluate what should be here in light of - * the removal of the start and end events - * maybe there we shouldn't even allow LNET_EQ_NONE!) - * LASSERT(!eq); - */ - if (!LNetEQHandleIsInvalid(eq_handle)) { - md->md_eq = lnet_handle2eq(&eq_handle); - - if (!md->md_eq) - return -ENOENT; - - (*md->md_eq->eq_refs[cpt])++; - } - - lnet_res_lh_initialize(container, &md->md_lh); - - LASSERT(list_empty(&md->md_list)); - list_add(&md->md_list, &container->rec_active); - - return 0; -} - -/* must be called with lnet_res_lock held */ -void -lnet_md_deconstruct(struct lnet_libmd *lmd, struct lnet_md *umd) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - umd->start = lmd->md_start; - umd->length = !(lmd->md_options & - (LNET_MD_IOVEC | LNET_MD_KIOV)) ? - lmd->md_length : lmd->md_niov; - umd->threshold = lmd->md_threshold; - umd->max_size = lmd->md_max_size; - umd->options = lmd->md_options; - umd->user_ptr = lmd->md_user_ptr; - lnet_eq2handle(&umd->eq_handle, lmd->md_eq); -} - -static int -lnet_md_validate(struct lnet_md *umd) -{ - if (!umd->start && umd->length) { - CERROR("MD start pointer can not be NULL with length %u\n", - umd->length); - return -EINVAL; - } - - if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) && - umd->length > LNET_MAX_IOV) { - CERROR("Invalid option: too many fragments %u, %d max\n", - umd->length, LNET_MAX_IOV); - return -EINVAL; - } - - return 0; -} - -/** - * Create a memory descriptor and attach it to a ME - * - * \param meh A handle for a ME to associate the new MD with. - * \param umd Provides initial values for the user-visible parts of a MD. - * Other than its use for initialization, there is no linkage between this - * structure and the MD maintained by the LNet. - * \param unlink A flag to indicate whether the MD is automatically unlinked - * when it becomes inactive, either because the operation threshold drops to - * zero or because the available memory becomes less than \a umd.max_size. - * (Note that the check for unlinking a MD only occurs after the completion - * of a successful operation on the MD.) The value LNET_UNLINK enables auto - * unlinking; the value LNET_RETAIN disables it. - * \param handle On successful returns, a handle to the newly created MD is - * saved here. This handle can be used later in LNetMDUnlink(). - * - * \retval 0 On success. - * \retval -EINVAL If \a umd is not valid. - * \retval -ENOMEM If new MD cannot be allocated. - * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a - * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by - * calling LNetInvalidateHandle() on it. - * \retval -EBUSY If the ME pointed to by \a meh is already associated with - * a MD. - */ -int -LNetMDAttach(struct lnet_handle_me meh, struct lnet_md umd, - enum lnet_unlink unlink, struct lnet_handle_md *handle) -{ - LIST_HEAD(matches); - LIST_HEAD(drops); - struct lnet_me *me; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (lnet_md_validate(&umd)) - return -EINVAL; - - if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { - CERROR("Invalid option: no MD_OP set\n"); - return -EINVAL; - } - - md = lnet_md_alloc(&umd); - if (!md) - return -ENOMEM; - - rc = lnet_md_build(md, &umd, unlink); - if (rc) - goto out_free; - - cpt = lnet_cpt_of_cookie(meh.cookie); - - lnet_res_lock(cpt); - - me = lnet_handle2me(&meh); - if (!me) - rc = -ENOENT; - else if (me->me_md) - rc = -EBUSY; - else - rc = lnet_md_link(md, umd.eq_handle, cpt); - - if (rc) - goto out_unlock; - - /* - * attach this MD to portal of ME and check if it matches any - * blocked msgs on this portal - */ - lnet_ptl_attach_md(me, md, &matches, &drops); - - lnet_md2handle(handle, md); - - lnet_res_unlock(cpt); - - lnet_drop_delayed_msg_list(&drops, "Bad match"); - lnet_recv_delayed_msg_list(&matches); - - return 0; - -out_unlock: - lnet_res_unlock(cpt); -out_free: - kfree(md); - return rc; -} -EXPORT_SYMBOL(LNetMDAttach); - -/** - * Create a "free floating" memory descriptor - a MD that is not associated - * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations. - * - * \param umd,unlink See the discussion for LNetMDAttach(). - * \param handle On successful returns, a handle to the newly created MD is - * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(), - * and LNetGet() operations. - * - * \retval 0 On success. - * \retval -EINVAL If \a umd is not valid. - * \retval -ENOMEM If new MD cannot be allocated. - * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that - * it's OK to supply a NULL \a umd.eq_handle by calling - * LNetInvalidateHandle() on it. - */ -int -LNetMDBind(struct lnet_md umd, enum lnet_unlink unlink, - struct lnet_handle_md *handle) -{ - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (lnet_md_validate(&umd)) - return -EINVAL; - - if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { - CERROR("Invalid option: GET|PUT illegal on active MDs\n"); - return -EINVAL; - } - - md = lnet_md_alloc(&umd); - if (!md) - return -ENOMEM; - - rc = lnet_md_build(md, &umd, unlink); - if (rc) - goto out_free; - - cpt = lnet_res_lock_current(); - - rc = lnet_md_link(md, umd.eq_handle, cpt); - if (rc) - goto out_unlock; - - lnet_md2handle(handle, md); - - lnet_res_unlock(cpt); - return 0; - -out_unlock: - lnet_res_unlock(cpt); -out_free: - kfree(md); - - return rc; -} -EXPORT_SYMBOL(LNetMDBind); - -/** - * Unlink the memory descriptor from any ME it may be linked to and release - * the internal resources associated with it. As a result, active messages - * associated with the MD may get aborted. - * - * This function does not free the memory region associated with the MD; - * i.e., the memory the user allocated for this MD. If the ME associated with - * this MD is not NULL and was created with auto unlink enabled, the ME is - * unlinked as well (see LNetMEAttach()). - * - * Explicitly unlinking a MD via this function call has the same behavior as - * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK - * is generated in the latter case. - * - * An unlinked event can be reported in two ways: - * - If there's no pending operations on the MD, it's unlinked immediately - * and an LNET_EVENT_UNLINK event is logged before this function returns. - * - Otherwise, the MD is only marked for deletion when this function - * returns, and the unlinked event will be piggybacked on the event of - * the completion of the last operation by setting the unlinked field of - * the event. No dedicated LNET_EVENT_UNLINK event is generated. - * - * Note that in both cases the unlinked field of the event is always set; no - * more event will happen on the MD after such an event is logged. - * - * \param mdh A handle for the MD to be unlinked. - * - * \retval 0 On success. - * \retval -ENOENT If \a mdh does not point to a valid MD object. - */ -int -LNetMDUnlink(struct lnet_handle_md mdh) -{ - struct lnet_event ev; - struct lnet_libmd *md; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md) { - lnet_res_unlock(cpt); - return -ENOENT; - } - - md->md_flags |= LNET_MD_FLAG_ABORTED; - /* - * If the MD is busy, lnet_md_unlink just marks it for deletion, and - * when the LND is done, the completion event flags that the MD was - * unlinked. Otherwise, we enqueue an event now... - */ - if (md->md_eq && !md->md_refcount) { - lnet_build_unlink_event(md, &ev); - lnet_eq_enqueue_event(md->md_eq, &ev); - } - - lnet_md_unlink(md); - - lnet_res_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(LNetMDUnlink); diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c deleted file mode 100644 index 672e37bdd045..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-me.c +++ /dev/null @@ -1,274 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-me.c - * - * Match Entry management routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/** - * Create and attach a match entry to the match list of \a portal. The new - * ME is empty, i.e. not associated with a memory descriptor. LNetMDAttach() - * can be used to attach a MD to an empty ME. - * - * \param portal The portal table index where the ME should be attached. - * \param match_id Specifies the match criteria for the process ID of - * the requester. The constants LNET_PID_ANY and LNET_NID_ANY can be - * used to wildcard either of the identifiers in the lnet_process_id - * structure. - * \param match_bits,ignore_bits Specify the match criteria to apply - * to the match bits in the incoming request. The ignore bits are used - * to mask out insignificant bits in the incoming match bits. The resulting - * bits are then compared to the ME's match bits to determine if the - * incoming request meets the match criteria. - * \param unlink Indicates whether the ME should be unlinked when the memory - * descriptor associated with it is unlinked (Note that the check for - * unlinking a ME only occurs when the memory descriptor is unlinked.). - * Valid values are LNET_RETAIN and LNET_UNLINK. - * \param pos Indicates whether the new ME should be prepended or - * appended to the match list. Allowed constants: LNET_INS_BEFORE, - * LNET_INS_AFTER. - * \param handle On successful returns, a handle to the newly created ME - * object is saved here. This handle can be used later in LNetMEInsert(), - * LNetMEUnlink(), or LNetMDAttach() functions. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is invalid. - * \retval -ENOMEM If new ME object cannot be allocated. - */ -int -LNetMEAttach(unsigned int portal, - struct lnet_process_id match_id, - __u64 match_bits, __u64 ignore_bits, - enum lnet_unlink unlink, enum lnet_ins_pos pos, - struct lnet_handle_me *handle) -{ - struct lnet_match_table *mtable; - struct lnet_me *me; - struct list_head *head; - - LASSERT(the_lnet.ln_refcount > 0); - - if ((int)portal >= the_lnet.ln_nportals) - return -EINVAL; - - mtable = lnet_mt_of_attach(portal, match_id, - match_bits, ignore_bits, pos); - if (!mtable) /* can't match portal type */ - return -EPERM; - - me = kzalloc(sizeof(*me), GFP_NOFS); - if (!me) - return -ENOMEM; - - lnet_res_lock(mtable->mt_cpt); - - me->me_portal = portal; - me->me_match_id = match_id; - me->me_match_bits = match_bits; - me->me_ignore_bits = ignore_bits; - me->me_unlink = unlink; - me->me_md = NULL; - - lnet_res_lh_initialize(the_lnet.ln_me_containers[mtable->mt_cpt], - &me->me_lh); - if (ignore_bits) - head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; - else - head = lnet_mt_match_head(mtable, match_id, match_bits); - - me->me_pos = head - &mtable->mt_mhash[0]; - if (pos == LNET_INS_AFTER || pos == LNET_INS_LOCAL) - list_add_tail(&me->me_list, head); - else - list_add(&me->me_list, head); - - lnet_me2handle(handle, me); - - lnet_res_unlock(mtable->mt_cpt); - return 0; -} -EXPORT_SYMBOL(LNetMEAttach); - -/** - * Create and a match entry and insert it before or after the ME pointed to by - * \a current_meh. The new ME is empty, i.e. not associated with a memory - * descriptor. LNetMDAttach() can be used to attach a MD to an empty ME. - * - * This function is identical to LNetMEAttach() except for the position - * where the new ME is inserted. - * - * \param current_meh A handle for a ME. The new ME will be inserted - * immediately before or immediately after this ME. - * \param match_id,match_bits,ignore_bits,unlink,pos,handle See the discussion - * for LNetMEAttach(). - * - * \retval 0 On success. - * \retval -ENOMEM If new ME object cannot be allocated. - * \retval -ENOENT If \a current_meh does not point to a valid match entry. - */ -int -LNetMEInsert(struct lnet_handle_me current_meh, - struct lnet_process_id match_id, - __u64 match_bits, __u64 ignore_bits, - enum lnet_unlink unlink, enum lnet_ins_pos pos, - struct lnet_handle_me *handle) -{ - struct lnet_me *current_me; - struct lnet_me *new_me; - struct lnet_portal *ptl; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - if (pos == LNET_INS_LOCAL) - return -EPERM; - - new_me = kzalloc(sizeof(*new_me), GFP_NOFS); - if (!new_me) - return -ENOMEM; - - cpt = lnet_cpt_of_cookie(current_meh.cookie); - - lnet_res_lock(cpt); - - current_me = lnet_handle2me(¤t_meh); - if (!current_me) { - kfree(new_me); - - lnet_res_unlock(cpt); - return -ENOENT; - } - - LASSERT(current_me->me_portal < the_lnet.ln_nportals); - - ptl = the_lnet.ln_portals[current_me->me_portal]; - if (lnet_ptl_is_unique(ptl)) { - /* nosense to insertion on unique portal */ - kfree(new_me); - lnet_res_unlock(cpt); - return -EPERM; - } - - new_me->me_pos = current_me->me_pos; - new_me->me_portal = current_me->me_portal; - new_me->me_match_id = match_id; - new_me->me_match_bits = match_bits; - new_me->me_ignore_bits = ignore_bits; - new_me->me_unlink = unlink; - new_me->me_md = NULL; - - lnet_res_lh_initialize(the_lnet.ln_me_containers[cpt], &new_me->me_lh); - - if (pos == LNET_INS_AFTER) - list_add(&new_me->me_list, ¤t_me->me_list); - else - list_add_tail(&new_me->me_list, ¤t_me->me_list); - - lnet_me2handle(handle, new_me); - - lnet_res_unlock(cpt); - - return 0; -} -EXPORT_SYMBOL(LNetMEInsert); - -/** - * Unlink a match entry from its match list. - * - * This operation also releases any resources associated with the ME. If a - * memory descriptor is attached to the ME, then it will be unlinked as well - * and an unlink event will be generated. It is an error to use the ME handle - * after calling LNetMEUnlink(). - * - * \param meh A handle for the ME to be unlinked. - * - * \retval 0 On success. - * \retval -ENOENT If \a meh does not point to a valid ME. - * \see LNetMDUnlink() for the discussion on delivering unlink event. - */ -int -LNetMEUnlink(struct lnet_handle_me meh) -{ - struct lnet_me *me; - struct lnet_libmd *md; - struct lnet_event ev; - int cpt; - - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_cpt_of_cookie(meh.cookie); - lnet_res_lock(cpt); - - me = lnet_handle2me(&meh); - if (!me) { - lnet_res_unlock(cpt); - return -ENOENT; - } - - md = me->me_md; - if (md) { - md->md_flags |= LNET_MD_FLAG_ABORTED; - if (md->md_eq && !md->md_refcount) { - lnet_build_unlink_event(md, &ev); - lnet_eq_enqueue_event(md->md_eq, &ev); - } - } - - lnet_me_unlink(me); - - lnet_res_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(LNetMEUnlink); - -/* call with lnet_res_lock please */ -void -lnet_me_unlink(struct lnet_me *me) -{ - list_del(&me->me_list); - - if (me->me_md) { - struct lnet_libmd *md = me->me_md; - - /* detach MD from portal of this ME */ - lnet_ptl_detach_md(me, md); - lnet_md_unlink(md); - } - - lnet_res_lh_invalidate(&me->me_lh); - kfree(me); -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c deleted file mode 100644 index f8eaf8ff8d8d..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ /dev/null @@ -1,2386 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-move.c - * - * Data movement routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include - -static int local_nid_dist_zero = 1; -module_param(local_nid_dist_zero, int, 0444); -MODULE_PARM_DESC(local_nid_dist_zero, "Reserved"); - -int -lnet_fail_nid(lnet_nid_t nid, unsigned int threshold) -{ - struct lnet_test_peer *tp; - struct lnet_test_peer *temp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - /* NB: use lnet_net_lock(0) to serialize operations on test peers */ - if (threshold) { - /* Adding a new entry */ - tp = kzalloc(sizeof(*tp), GFP_NOFS); - if (!tp) - return -ENOMEM; - - tp->tp_nid = nid; - tp->tp_threshold = threshold; - - lnet_net_lock(0); - list_add_tail(&tp->tp_list, &the_lnet.ln_test_peers); - lnet_net_unlock(0); - return 0; - } - - /* removing entries */ - INIT_LIST_HEAD(&cull); - - lnet_net_lock(0); - - list_for_each_safe(el, next, &the_lnet.ln_test_peers) { - tp = list_entry(el, struct lnet_test_peer, tp_list); - - if (!tp->tp_threshold || /* needs culling anyway */ - nid == LNET_NID_ANY || /* removing all entries */ - tp->tp_nid == nid) { /* matched this one */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - } - - lnet_net_unlock(0); - - list_for_each_entry_safe(tp, temp, &cull, tp_list) { - list_del(&tp->tp_list); - kfree(tp); - } - return 0; -} - -static int -fail_peer(lnet_nid_t nid, int outgoing) -{ - struct lnet_test_peer *tp; - struct lnet_test_peer *temp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - int fail = 0; - - INIT_LIST_HEAD(&cull); - - /* NB: use lnet_net_lock(0) to serialize operations on test peers */ - lnet_net_lock(0); - - list_for_each_safe(el, next, &the_lnet.ln_test_peers) { - tp = list_entry(el, struct lnet_test_peer, tp_list); - - if (!tp->tp_threshold) { - /* zombie entry */ - if (outgoing) { - /* - * only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. - */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != LNET_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - !tp->tp_threshold) { - /* see above */ - list_del(&tp->tp_list); - list_add(&tp->tp_list, &cull); - } - } - break; - } - } - - lnet_net_unlock(0); - - list_for_each_entry_safe(tp, temp, &cull, tp_list) { - list_del(&tp->tp_list); - - kfree(tp); - } - - return fail; -} - -unsigned int -lnet_iov_nob(unsigned int niov, struct kvec *iov) -{ - unsigned int nob = 0; - - LASSERT(!niov || iov); - while (niov-- > 0) - nob += (iov++)->iov_len; - - return nob; -} -EXPORT_SYMBOL(lnet_iov_nob); - -void -lnet_copy_iov2iter(struct iov_iter *to, - unsigned int nsiov, const struct kvec *siov, - unsigned int soffset, unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - const char *s; - size_t left; - - if (!nob) - return; - - /* skip complete frags before 'soffset' */ - LASSERT(nsiov > 0); - while (soffset >= siov->iov_len) { - soffset -= siov->iov_len; - siov++; - nsiov--; - LASSERT(nsiov > 0); - } - - s = (char *)siov->iov_base + soffset; - left = siov->iov_len - soffset; - do { - size_t n, copy = left; - - LASSERT(nsiov > 0); - - if (copy > nob) - copy = nob; - n = copy_to_iter(s, copy, to); - if (n != copy) - return; - nob -= n; - - siov++; - s = (char *)siov->iov_base; - left = siov->iov_len; - nsiov--; - } while (nob > 0); -} -EXPORT_SYMBOL(lnet_copy_iov2iter); - -void -lnet_copy_kiov2iter(struct iov_iter *to, - unsigned int nsiov, const struct bio_vec *siov, - unsigned int soffset, unsigned int nob) -{ - if (!nob) - return; - - LASSERT(!in_interrupt()); - - LASSERT(nsiov > 0); - while (soffset >= siov->bv_len) { - soffset -= siov->bv_len; - siov++; - nsiov--; - LASSERT(nsiov > 0); - } - - do { - size_t copy = siov->bv_len - soffset, n; - - LASSERT(nsiov > 0); - - if (copy > nob) - copy = nob; - n = copy_page_to_iter(siov->bv_page, - siov->bv_offset + soffset, - copy, to); - if (n != copy) - return; - nob -= n; - siov++; - nsiov--; - soffset = 0; - } while (nob > 0); -} -EXPORT_SYMBOL(lnet_copy_kiov2iter); - -int -lnet_extract_iov(int dst_niov, struct kvec *dst, - int src_niov, const struct kvec *src, - unsigned int offset, unsigned int len) -{ - /* - * Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' - */ - unsigned int frag_len; - unsigned int niov; - - if (!len) /* no data => */ - return 0; /* no frags */ - - LASSERT(src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT(src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT(src_niov > 0); - LASSERT((int)niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return niov; - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -EXPORT_SYMBOL(lnet_extract_iov); - -unsigned int -lnet_kiov_nob(unsigned int niov, struct bio_vec *kiov) -{ - unsigned int nob = 0; - - LASSERT(!niov || kiov); - while (niov-- > 0) - nob += (kiov++)->bv_len; - - return nob; -} -EXPORT_SYMBOL(lnet_kiov_nob); - -int -lnet_extract_kiov(int dst_niov, struct bio_vec *dst, - int src_niov, const struct bio_vec *src, - unsigned int offset, unsigned int len) -{ - /* - * Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' - */ - unsigned int frag_len; - unsigned int niov; - - if (!len) /* no data => */ - return 0; /* no frags */ - - LASSERT(src_niov > 0); - while (offset >= src->bv_len) { /* skip initial frags */ - offset -= src->bv_len; - src_niov--; - src++; - LASSERT(src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT(src_niov > 0); - LASSERT((int)niov <= dst_niov); - - frag_len = src->bv_len - offset; - dst->bv_page = src->bv_page; - dst->bv_offset = src->bv_offset + offset; - - if (len <= frag_len) { - dst->bv_len = len; - LASSERT(dst->bv_offset + dst->bv_len - <= PAGE_SIZE); - return niov; - } - - dst->bv_len = frag_len; - LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -EXPORT_SYMBOL(lnet_extract_kiov); - -void -lnet_ni_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, - int delayed, unsigned int offset, unsigned int mlen, - unsigned int rlen) -{ - unsigned int niov = 0; - struct kvec *iov = NULL; - struct bio_vec *kiov = NULL; - struct iov_iter to; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(!mlen || msg); - - if (msg) { - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - LASSERT(rlen == msg->msg_len); - LASSERT(mlen <= msg->msg_len); - LASSERT(msg->msg_offset == offset); - LASSERT(msg->msg_wanted == mlen); - - msg->msg_receiving = 0; - - if (mlen) { - niov = msg->msg_niov; - iov = msg->msg_iov; - kiov = msg->msg_kiov; - - LASSERT(niov > 0); - LASSERT(!iov != !kiov); - } - } - - if (iov) { - iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset); - iov_iter_advance(&to, offset); - } else { - iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset); - iov_iter_advance(&to, offset); - } - rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -static void -lnet_setpayloadbuffer(struct lnet_msg *msg) -{ - struct lnet_libmd *md = msg->msg_md; - - LASSERT(msg->msg_len > 0); - LASSERT(!msg->msg_routing); - LASSERT(md); - LASSERT(!msg->msg_niov); - LASSERT(!msg->msg_iov); - LASSERT(!msg->msg_kiov); - - msg->msg_niov = md->md_niov; - if (md->md_options & LNET_MD_KIOV) - msg->msg_kiov = md->md_iov.kiov; - else - msg->msg_iov = md->md_iov.iov; -} - -void -lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target, - unsigned int offset, unsigned int len) -{ - msg->msg_type = type; - msg->msg_target = target; - msg->msg_len = len; - msg->msg_offset = offset; - - if (len) - lnet_setpayloadbuffer(msg); - - memset(&msg->msg_hdr, 0, sizeof(msg->msg_hdr)); - msg->msg_hdr.type = cpu_to_le32(type); - msg->msg_hdr.dest_nid = cpu_to_le64(target.nid); - msg->msg_hdr.dest_pid = cpu_to_le32(target.pid); - /* src_nid will be set later */ - msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid); - msg->msg_hdr.payload_length = cpu_to_le32(len); -} - -static void -lnet_ni_send(struct lnet_ni *ni, struct lnet_msg *msg) -{ - void *priv = msg->msg_private; - int rc; - - LASSERT(!in_interrupt()); - LASSERT(LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || - (msg->msg_txcredit && msg->msg_peertxcredit)); - - rc = ni->ni_lnd->lnd_send(ni, priv, msg); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -static int -lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc; - - LASSERT(!msg->msg_sending); - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_rx_ready_delay); - LASSERT(ni->ni_lnd->lnd_eager_recv); - - msg->msg_rx_ready_delay = 1; - rc = ni->ni_lnd->lnd_eager_recv(ni, msg->msg_private, msg, - &msg->msg_private); - if (rc) { - CERROR("recv from %s / send to %s aborted: eager_recv failed %d\n", - libcfs_nid2str(msg->msg_rxpeer->lp_nid), - libcfs_id2str(msg->msg_target), rc); - LASSERT(rc < 0); /* required by my callers */ - } - - return rc; -} - -/* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */ -static void -lnet_ni_query_locked(struct lnet_ni *ni, struct lnet_peer *lp) -{ - unsigned long last_alive = 0; - - LASSERT(lnet_peer_aliveness_enabled(lp)); - LASSERT(ni->ni_lnd->lnd_query); - - lnet_net_unlock(lp->lp_cpt); - ni->ni_lnd->lnd_query(ni, lp->lp_nid, &last_alive); - lnet_net_lock(lp->lp_cpt); - - lp->lp_last_query = jiffies; - - if (last_alive) /* NI has updated timestamp */ - lp->lp_last_alive = last_alive; -} - -/* NB: always called with lnet_net_lock held */ -static inline int -lnet_peer_is_alive(struct lnet_peer *lp, unsigned long now) -{ - int alive; - unsigned long deadline; - - LASSERT(lnet_peer_aliveness_enabled(lp)); - - /* Trust lnet_notify() if it has more recent aliveness news, but - * ignore the initial assumed death (see lnet_peers_start_down()). - */ - if (!lp->lp_alive && lp->lp_alive_count > 0 && - time_after_eq(lp->lp_timestamp, lp->lp_last_alive)) - return 0; - - deadline = lp->lp_last_alive + lp->lp_ni->ni_peertimeout * HZ; - alive = time_after(deadline, now); - - /* Update obsolete lp_alive except for routers assumed to be dead - * initially, because router checker would update aliveness in this - * case, and moreover lp_last_alive at peer creation is assumed. - */ - if (alive && !lp->lp_alive && - !(lnet_isrouter(lp) && !lp->lp_alive_count)) - lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); - - return alive; -} - -/* - * NB: returns 1 when alive, 0 when dead, negative when error; - * may drop the lnet_net_lock - */ -static int -lnet_peer_alive_locked(struct lnet_peer *lp) -{ - unsigned long now = jiffies; - - if (!lnet_peer_aliveness_enabled(lp)) - return -ENODEV; - - if (lnet_peer_is_alive(lp, now)) - return 1; - - /* - * Peer appears dead, but we should avoid frequent NI queries (at - * most once per lnet_queryinterval seconds). - */ - if (lp->lp_last_query) { - static const int lnet_queryinterval = 1; - - unsigned long next_query = - lp->lp_last_query + lnet_queryinterval * HZ; - - if (time_before(now, next_query)) { - if (lp->lp_alive) - CWARN("Unexpected aliveness of peer %s: %d < %d (%d/%d)\n", - libcfs_nid2str(lp->lp_nid), - (int)now, (int)next_query, - lnet_queryinterval, - lp->lp_ni->ni_peertimeout); - return 0; - } - } - - /* query NI for latest aliveness news */ - lnet_ni_query_locked(lp->lp_ni, lp); - - if (lnet_peer_is_alive(lp, now)) - return 1; - - lnet_notify_locked(lp, 0, 0, lp->lp_last_alive); - return 0; -} - -/** - * \param msg The message to be sent. - * \param do_send True if lnet_ni_send() should be called in this function. - * lnet_send() is going to lnet_net_unlock immediately after this, so - * it sets do_send FALSE and I don't do the unlock/send/lock bit. - * - * \retval LNET_CREDIT_OK If \a msg sent or OK to send. - * \retval LNET_CREDIT_WAIT If \a msg blocked for credit. - * \retval -EHOSTUNREACH If the next hop of the message appears dead. - * \retval -ECANCELED If the MD of the message has been unlinked. - */ -static int -lnet_post_send_locked(struct lnet_msg *msg, int do_send) -{ - struct lnet_peer *lp = msg->msg_txpeer; - struct lnet_ni *ni = lp->lp_ni; - int cpt = msg->msg_tx_cpt; - struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt]; - - /* non-lnet_send() callers have checked before */ - LASSERT(!do_send || msg->msg_tx_delayed); - LASSERT(!msg->msg_receiving); - LASSERT(msg->msg_tx_committed); - - /* NB 'lp' is always the next hop */ - if (!(msg->msg_target.pid & LNET_PID_USERFLAG) && - !lnet_peer_alive_locked(lp)) { - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; - lnet_net_unlock(cpt); - - CNETERR("Dropping message for %s: peer not alive\n", - libcfs_id2str(msg->msg_target)); - if (do_send) - lnet_finalize(ni, msg, -EHOSTUNREACH); - - lnet_net_lock(cpt); - return -EHOSTUNREACH; - } - - if (msg->msg_md && - (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED)) { - lnet_net_unlock(cpt); - - CNETERR("Aborting message for %s: LNetM[DE]Unlink() already called on the MD/ME.\n", - libcfs_id2str(msg->msg_target)); - if (do_send) - lnet_finalize(ni, msg, -ECANCELED); - - lnet_net_lock(cpt); - return -ECANCELED; - } - - if (!msg->msg_peertxcredit) { - LASSERT((lp->lp_txcredits < 0) == - !list_empty(&lp->lp_txq)); - - msg->msg_peertxcredit = 1; - lp->lp_txqnob += msg->msg_len + sizeof(struct lnet_hdr); - lp->lp_txcredits--; - - if (lp->lp_txcredits < lp->lp_mintxcredits) - lp->lp_mintxcredits = lp->lp_txcredits; - - if (lp->lp_txcredits < 0) { - msg->msg_tx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_txq); - return LNET_CREDIT_WAIT; - } - } - - if (!msg->msg_txcredit) { - LASSERT((tq->tq_credits < 0) == - !list_empty(&tq->tq_delayed)); - - msg->msg_txcredit = 1; - tq->tq_credits--; - - if (tq->tq_credits < tq->tq_credits_min) - tq->tq_credits_min = tq->tq_credits; - - if (tq->tq_credits < 0) { - msg->msg_tx_delayed = 1; - list_add_tail(&msg->msg_list, &tq->tq_delayed); - return LNET_CREDIT_WAIT; - } - } - - if (do_send) { - lnet_net_unlock(cpt); - lnet_ni_send(ni, msg); - lnet_net_lock(cpt); - } - return LNET_CREDIT_OK; -} - -static struct lnet_rtrbufpool * -lnet_msg2bufpool(struct lnet_msg *msg) -{ - struct lnet_rtrbufpool *rbp; - int cpt; - - LASSERT(msg->msg_rx_committed); - - cpt = msg->msg_rx_cpt; - rbp = &the_lnet.ln_rtrpools[cpt][0]; - - LASSERT(msg->msg_len <= LNET_MTU); - while (msg->msg_len > (unsigned int)rbp->rbp_npages * PAGE_SIZE) { - rbp++; - LASSERT(rbp < &the_lnet.ln_rtrpools[cpt][LNET_NRBPOOLS]); - } - - return rbp; -} - -static int -lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv) -{ - /* - * lnet_parse is going to lnet_net_unlock immediately after this, so it - * sets do_recv FALSE and I don't do the unlock/send/lock bit. - * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if - * received or OK to receive - */ - struct lnet_peer *lp = msg->msg_rxpeer; - struct lnet_rtrbufpool *rbp; - struct lnet_rtrbuf *rb; - - LASSERT(!msg->msg_iov); - LASSERT(!msg->msg_kiov); - LASSERT(!msg->msg_niov); - LASSERT(msg->msg_routing); - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - - /* non-lnet_parse callers only receive delayed messages */ - LASSERT(!do_recv || msg->msg_rx_delayed); - - if (!msg->msg_peerrtrcredit) { - LASSERT((lp->lp_rtrcredits < 0) == - !list_empty(&lp->lp_rtrq)); - - msg->msg_peerrtrcredit = 1; - lp->lp_rtrcredits--; - if (lp->lp_rtrcredits < lp->lp_minrtrcredits) - lp->lp_minrtrcredits = lp->lp_rtrcredits; - - if (lp->lp_rtrcredits < 0) { - /* must have checked eager_recv before here */ - LASSERT(msg->msg_rx_ready_delay); - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_rtrq); - return LNET_CREDIT_WAIT; - } - } - - rbp = lnet_msg2bufpool(msg); - - if (!msg->msg_rtrcredit) { - msg->msg_rtrcredit = 1; - rbp->rbp_credits--; - if (rbp->rbp_credits < rbp->rbp_mincredits) - rbp->rbp_mincredits = rbp->rbp_credits; - - if (rbp->rbp_credits < 0) { - /* must have checked eager_recv before here */ - LASSERT(msg->msg_rx_ready_delay); - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &rbp->rbp_msgs); - return LNET_CREDIT_WAIT; - } - } - - LASSERT(!list_empty(&rbp->rbp_bufs)); - rb = list_entry(rbp->rbp_bufs.next, struct lnet_rtrbuf, rb_list); - list_del(&rb->rb_list); - - msg->msg_niov = rbp->rbp_npages; - msg->msg_kiov = &rb->rb_kiov[0]; - - if (do_recv) { - int cpt = msg->msg_rx_cpt; - - lnet_net_unlock(cpt); - lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1, - 0, msg->msg_len, msg->msg_len); - lnet_net_lock(cpt); - } - return LNET_CREDIT_OK; -} - -void -lnet_return_tx_credits_locked(struct lnet_msg *msg) -{ - struct lnet_peer *txpeer = msg->msg_txpeer; - struct lnet_msg *msg2; - - if (msg->msg_txcredit) { - struct lnet_ni *ni = txpeer->lp_ni; - struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt]; - - /* give back NI txcredits */ - msg->msg_txcredit = 0; - - LASSERT((tq->tq_credits < 0) == - !list_empty(&tq->tq_delayed)); - - tq->tq_credits++; - if (tq->tq_credits <= 0) { - msg2 = list_entry(tq->tq_delayed.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer->lp_ni == ni); - LASSERT(msg2->msg_tx_delayed); - - (void)lnet_post_send_locked(msg2, 1); - } - } - - if (msg->msg_peertxcredit) { - /* give back peer txcredits */ - msg->msg_peertxcredit = 0; - - LASSERT((txpeer->lp_txcredits < 0) == - !list_empty(&txpeer->lp_txq)); - - txpeer->lp_txqnob -= msg->msg_len + sizeof(struct lnet_hdr); - LASSERT(txpeer->lp_txqnob >= 0); - - txpeer->lp_txcredits++; - if (txpeer->lp_txcredits <= 0) { - msg2 = list_entry(txpeer->lp_txq.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer == txpeer); - LASSERT(msg2->msg_tx_delayed); - - (void)lnet_post_send_locked(msg2, 1); - } - } - - if (txpeer) { - msg->msg_txpeer = NULL; - lnet_peer_decref_locked(txpeer); - } -} - -void -lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp) -{ - struct lnet_msg *msg; - - if (list_empty(&rbp->rbp_msgs)) - return; - msg = list_entry(rbp->rbp_msgs.next, - struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - (void)lnet_post_routed_recv_locked(msg, 1); -} - -void -lnet_drop_routed_msgs_locked(struct list_head *list, int cpt) -{ - struct list_head drop; - struct lnet_msg *msg; - struct lnet_msg *tmp; - - INIT_LIST_HEAD(&drop); - - list_splice_init(list, &drop); - - lnet_net_unlock(cpt); - - list_for_each_entry_safe(msg, tmp, &drop, msg_list) { - lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL, - 0, 0, 0, msg->msg_hdr.payload_length); - list_del_init(&msg->msg_list); - lnet_finalize(NULL, msg, -ECANCELED); - } - - lnet_net_lock(cpt); -} - -void -lnet_return_rx_credits_locked(struct lnet_msg *msg) -{ - struct lnet_peer *rxpeer = msg->msg_rxpeer; - struct lnet_msg *msg2; - - if (msg->msg_rtrcredit) { - /* give back global router credits */ - struct lnet_rtrbuf *rb; - struct lnet_rtrbufpool *rbp; - - /* - * NB If a msg ever blocks for a buffer in rbp_msgs, it stays - * there until it gets one allocated, or aborts the wait - * itself - */ - LASSERT(msg->msg_kiov); - - rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); - rbp = rb->rb_pool; - - msg->msg_kiov = NULL; - msg->msg_rtrcredit = 0; - - LASSERT(rbp == lnet_msg2bufpool(msg)); - - LASSERT((rbp->rbp_credits > 0) == - !list_empty(&rbp->rbp_bufs)); - - /* - * If routing is now turned off, we just drop this buffer and - * don't bother trying to return credits. - */ - if (!the_lnet.ln_routing) { - lnet_destroy_rtrbuf(rb, rbp->rbp_npages); - goto routing_off; - } - - /* - * It is possible that a user has lowered the desired number of - * buffers in this pool. Make sure we never put back - * more buffers than the stated number. - */ - if (unlikely(rbp->rbp_credits >= rbp->rbp_req_nbuffers)) { - /* Discard this buffer so we don't have too many. */ - lnet_destroy_rtrbuf(rb, rbp->rbp_npages); - rbp->rbp_nbuffers--; - } else { - list_add(&rb->rb_list, &rbp->rbp_bufs); - rbp->rbp_credits++; - if (rbp->rbp_credits <= 0) - lnet_schedule_blocked_locked(rbp); - } - } - -routing_off: - if (msg->msg_peerrtrcredit) { - /* give back peer router credits */ - msg->msg_peerrtrcredit = 0; - - LASSERT((rxpeer->lp_rtrcredits < 0) == - !list_empty(&rxpeer->lp_rtrq)); - - rxpeer->lp_rtrcredits++; - /* - * drop all messages which are queued to be routed on that - * peer. - */ - if (!the_lnet.ln_routing) { - lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq, - msg->msg_rx_cpt); - } else if (rxpeer->lp_rtrcredits <= 0) { - msg2 = list_entry(rxpeer->lp_rtrq.next, - struct lnet_msg, msg_list); - list_del(&msg2->msg_list); - - (void)lnet_post_routed_recv_locked(msg2, 1); - } - } - if (rxpeer) { - msg->msg_rxpeer = NULL; - lnet_peer_decref_locked(rxpeer); - } -} - -static int -lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2) -{ - struct lnet_peer *p1 = r1->lr_gateway; - struct lnet_peer *p2 = r2->lr_gateway; - int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops; - int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops; - - if (r1->lr_priority < r2->lr_priority) - return 1; - - if (r1->lr_priority > r2->lr_priority) - return -ERANGE; - - if (r1_hops < r2_hops) - return 1; - - if (r1_hops > r2_hops) - return -ERANGE; - - if (p1->lp_txqnob < p2->lp_txqnob) - return 1; - - if (p1->lp_txqnob > p2->lp_txqnob) - return -ERANGE; - - if (p1->lp_txcredits > p2->lp_txcredits) - return 1; - - if (p1->lp_txcredits < p2->lp_txcredits) - return -ERANGE; - - if (r1->lr_seq - r2->lr_seq <= 0) - return 1; - - return -ERANGE; -} - -static struct lnet_peer * -lnet_find_route_locked(struct lnet_ni *ni, lnet_nid_t target, - lnet_nid_t rtr_nid) -{ - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct lnet_route *best_route; - struct lnet_route *last_route; - struct lnet_peer *lp_best; - struct lnet_peer *lp; - int rc; - - /* - * If @rtr_nid is not LNET_NID_ANY, return the gateway with - * rtr_nid nid, otherwise find the best gateway I can use - */ - rnet = lnet_find_net_locked(LNET_NIDNET(target)); - if (!rnet) - return NULL; - - lp_best = NULL; - best_route = NULL; - last_route = NULL; - list_for_each_entry(route, &rnet->lrn_routes, lr_list) { - lp = route->lr_gateway; - - if (!lnet_is_route_alive(route)) - continue; - - if (ni && lp->lp_ni != ni) - continue; - - if (lp->lp_nid == rtr_nid) /* it's pre-determined router */ - return lp; - - if (!lp_best) { - best_route = route; - last_route = route; - lp_best = lp; - continue; - } - - /* no protection on below fields, but it's harmless */ - if (last_route->lr_seq - route->lr_seq < 0) - last_route = route; - - rc = lnet_compare_routes(route, best_route); - if (rc < 0) - continue; - - best_route = route; - lp_best = lp; - } - - /* - * set sequence number on the best router to the latest sequence + 1 - * so we can round-robin all routers, it's race and inaccurate but - * harmless and functional - */ - if (best_route) - best_route->lr_seq = last_route->lr_seq + 1; - return lp_best; -} - -int -lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) -{ - lnet_nid_t dst_nid = msg->msg_target.nid; - struct lnet_ni *src_ni; - struct lnet_ni *local_ni; - struct lnet_peer *lp; - int cpt; - int cpt2; - int rc; - - /* - * NB: rtr_nid is set to LNET_NID_ANY for all current use-cases, - * but we might want to use pre-determined router for ACK/REPLY - * in the future - */ - /* NB: ni == interface pre-determined (ACK/REPLY) */ - LASSERT(!msg->msg_txpeer); - LASSERT(!msg->msg_sending); - LASSERT(!msg->msg_target_is_router); - LASSERT(!msg->msg_receiving); - - msg->msg_sending = 1; - - LASSERT(!msg->msg_tx_committed); - cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid); - again: - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - lnet_net_unlock(cpt); - return -ESHUTDOWN; - } - - if (src_nid == LNET_NID_ANY) { - src_ni = NULL; - } else { - src_ni = lnet_nid2ni_locked(src_nid, cpt); - if (!src_ni) { - lnet_net_unlock(cpt); - LCONSOLE_WARN("Can't send to %s: src %s is not a local nid\n", - libcfs_nid2str(dst_nid), - libcfs_nid2str(src_nid)); - return -EINVAL; - } - LASSERT(!msg->msg_routing); - } - - /* Is this for someone on a local network? */ - local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid), cpt); - - if (local_ni) { - if (!src_ni) { - src_ni = local_ni; - src_nid = src_ni->ni_nid; - } else if (src_ni == local_ni) { - lnet_ni_decref_locked(local_ni, cpt); - } else { - lnet_ni_decref_locked(local_ni, cpt); - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - LCONSOLE_WARN("No route to %s via from %s\n", - libcfs_nid2str(dst_nid), - libcfs_nid2str(src_nid)); - return -EINVAL; - } - - LASSERT(src_nid != LNET_NID_ANY); - lnet_msg_commit(msg, cpt); - - if (!msg->msg_routing) - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - - if (src_ni == the_lnet.ln_loni) { - /* No send credit hassles with LOLND */ - lnet_net_unlock(cpt); - lnet_ni_send(src_ni, msg); - - lnet_net_lock(cpt); - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - return 0; - } - - rc = lnet_nid2peer_locked(&lp, dst_nid, cpt); - /* lp has ref on src_ni; lose mine */ - lnet_ni_decref_locked(src_ni, cpt); - if (rc) { - lnet_net_unlock(cpt); - LCONSOLE_WARN("Error %d finding peer %s\n", rc, - libcfs_nid2str(dst_nid)); - /* ENOMEM or shutting down */ - return rc; - } - LASSERT(lp->lp_ni == src_ni); - } else { - /* sending to a remote network */ - lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid); - if (!lp) { - if (src_ni) - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - - LCONSOLE_WARN("No route to %s via %s (all routers down)\n", - libcfs_id2str(msg->msg_target), - libcfs_nid2str(src_nid)); - return -EHOSTUNREACH; - } - - /* - * rtr_nid is LNET_NID_ANY or NID of pre-determined router, - * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't - * pre-determined router, this can happen if router table - * was changed when we release the lock - */ - if (rtr_nid != lp->lp_nid) { - cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid); - if (cpt2 != cpt) { - if (src_ni) - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); - - rtr_nid = lp->lp_nid; - cpt = cpt2; - goto again; - } - } - - CDEBUG(D_NET, "Best route to %s via %s for %s %d\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid), - lnet_msgtyp2str(msg->msg_type), msg->msg_len); - - if (!src_ni) { - src_ni = lp->lp_ni; - src_nid = src_ni->ni_nid; - } else { - LASSERT(src_ni == lp->lp_ni); - lnet_ni_decref_locked(src_ni, cpt); - } - - lnet_peer_addref_locked(lp); - - LASSERT(src_nid != LNET_NID_ANY); - lnet_msg_commit(msg, cpt); - - if (!msg->msg_routing) { - /* I'm the source and now I know which NI to send on */ - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - msg->msg_target_is_router = 1; - msg->msg_target.nid = lp->lp_nid; - msg->msg_target.pid = LNET_PID_LUSTRE; - } - - /* 'lp' is our best choice of peer */ - - LASSERT(!msg->msg_peertxcredit); - LASSERT(!msg->msg_txcredit); - LASSERT(!msg->msg_txpeer); - - msg->msg_txpeer = lp; /* msg takes my ref on lp */ - - rc = lnet_post_send_locked(msg, 0); - lnet_net_unlock(cpt); - - if (rc < 0) - return rc; - - if (rc == LNET_CREDIT_OK) - lnet_ni_send(src_ni, msg); - - return 0; /* rc == LNET_CREDIT_OK or LNET_CREDIT_WAIT */ -} - -void -lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob) -{ - lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += nob; - lnet_net_unlock(cpt); - - lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); -} - -static void -lnet_recv_put(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - - if (msg->msg_wanted) - lnet_setpayloadbuffer(msg); - - lnet_build_msg_event(msg, LNET_EVENT_PUT); - - /* - * Must I ACK? If so I'll grab the ack_wmd out of the header and put - * it back into the ACK during lnet_finalize() - */ - msg->msg_ack = !lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - !(msg->msg_md->md_options & LNET_MD_ACK_DISABLE); - - lnet_ni_recv(ni, msg->msg_private, msg, msg->msg_rx_delayed, - msg->msg_offset, msg->msg_wanted, hdr->payload_length); -} - -static int -lnet_parse_put(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_match_info info; - bool ready_delay; - int rc; - - /* Convert put fields to host byte order */ - le64_to_cpus(&hdr->msg.put.match_bits); - le32_to_cpus(&hdr->msg.put.ptl_index); - le32_to_cpus(&hdr->msg.put.offset); - - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_PUT; - info.mi_portal = hdr->msg.put.ptl_index; - info.mi_rlength = hdr->payload_length; - info.mi_roffset = hdr->msg.put.offset; - info.mi_mbits = hdr->msg.put.match_bits; - - msg->msg_rx_ready_delay = !ni->ni_lnd->lnd_eager_recv; - ready_delay = msg->msg_rx_ready_delay; - - again: - rc = lnet_ptl_match_md(&info, msg); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_OK: - lnet_recv_put(ni, msg); - return 0; - - case LNET_MATCHMD_NONE: - /** - * no eager_recv or has already called it, should - * have been attached on delayed list - */ - if (ready_delay) - return 0; - - rc = lnet_ni_eager_recv(ni, msg); - if (!rc) { - ready_delay = true; - goto again; - } - /* fall through */ - - case LNET_MATCHMD_DROP: - CNETERR("Dropping PUT from %s portal %d match %llu offset %d length %d: %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, - info.mi_mbits, info.mi_roffset, info.mi_rlength, rc); - - return -ENOENT; /* -ve: OK but no match */ - } -} - -static int -lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) -{ - struct lnet_match_info info; - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_handle_wire reply_wmd; - int rc; - - /* Convert get fields to host byte order */ - le64_to_cpus(&hdr->msg.get.match_bits); - le32_to_cpus(&hdr->msg.get.ptl_index); - le32_to_cpus(&hdr->msg.get.sink_length); - le32_to_cpus(&hdr->msg.get.src_offset); - - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_GET; - info.mi_portal = hdr->msg.get.ptl_index; - info.mi_rlength = hdr->msg.get.sink_length; - info.mi_roffset = hdr->msg.get.src_offset; - info.mi_mbits = hdr->msg.get.match_bits; - - rc = lnet_ptl_match_md(&info, msg); - if (rc == LNET_MATCHMD_DROP) { - CNETERR("Dropping GET from %s portal %d match %llu offset %d length %d\n", - libcfs_id2str(info.mi_id), info.mi_portal, - info.mi_mbits, info.mi_roffset, info.mi_rlength); - return -ENOENT; /* -ve: OK but no match */ - } - - LASSERT(rc == LNET_MATCHMD_OK); - - lnet_build_msg_event(msg, LNET_EVENT_GET); - - reply_wmd = hdr->msg.get.return_wmd; - - lnet_prep_send(msg, LNET_MSG_REPLY, info.mi_id, - msg->msg_offset, msg->msg_wanted); - - msg->msg_hdr.msg.reply.dst_wmd = reply_wmd; - - if (rdma_get) { - /* The LND completes the REPLY from her recv procedure */ - lnet_ni_recv(ni, msg->msg_private, msg, 0, - msg->msg_offset, msg->msg_len, msg->msg_len); - return 0; - } - - lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0); - msg->msg_receiving = 0; - - rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY); - if (rc < 0) { - /* didn't get as far as lnet_ni_send() */ - CERROR("%s: Unable to send REPLY for GET from %s: %d\n", - libcfs_nid2str(ni->ni_nid), - libcfs_id2str(info.mi_id), rc); - - lnet_finalize(ni, msg, rc); - } - - return 0; -} - -static int -lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg) -{ - void *private = msg->msg_private; - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_process_id src = {0}; - struct lnet_libmd *md; - int rlength; - int mlength; - int cpt; - - cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie); - lnet_res_lock(cpt); - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (!md || !md->md_threshold || md->md_me) { - CNETERR("%s: Dropping REPLY from %s for %s MD %#llx.%#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - !md ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - if (md && md->md_me) - CERROR("REPLY MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - return -ENOENT; /* -ve: OK but no match */ - } - - LASSERT(!md->md_offset); - - rlength = hdr->payload_length; - mlength = min_t(uint, rlength, md->md_length); - - if (mlength < rlength && - !(md->md_options & LNET_MD_TRUNCATE)) { - CNETERR("%s: Dropping REPLY from %s length %d for MD %#llx would overflow (%d)\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - rlength, hdr->msg.reply.dst_wmd.wh_object_cookie, - mlength); - lnet_res_unlock(cpt); - return -ENOENT; /* -ve: OK but no match */ - } - - CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md %#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); - - lnet_msg_attach_md(msg, md, 0, mlength); - - if (mlength) - lnet_setpayloadbuffer(msg); - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_REPLY); - - lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength); - return 0; -} - -static int -lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_process_id src = {0}; - struct lnet_libmd *md; - int cpt; - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - /* Convert ack fields to host byte order */ - le64_to_cpus(&hdr->msg.ack.match_bits); - le32_to_cpus(&hdr->msg.ack.mlength); - - cpt = lnet_cpt_of_cookie(hdr->msg.ack.dst_wmd.wh_object_cookie); - lnet_res_lock(cpt); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (!md || !md->md_threshold || md->md_me) { - /* Don't moan; this is expected */ - CDEBUG(D_NET, - "%s: Dropping ACK from %s to %s MD %#llx.%#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - !md ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); - if (md && md->md_me) - CERROR("Source MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - return -ENOENT; /* -ve! */ - } - - CDEBUG(D_NET, "%s: ACK from %s into md %#llx\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - hdr->msg.ack.dst_wmd.wh_object_cookie); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_ACK); - - lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len); - return 0; -} - -/** - * \retval LNET_CREDIT_OK If \a msg is forwarded - * \retval LNET_CREDIT_WAIT If \a msg is blocked because w/o buffer - * \retval -ve error code - */ -int -lnet_parse_forward_locked(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc = 0; - - if (!the_lnet.ln_routing) - return -ECANCELED; - - if (msg->msg_rxpeer->lp_rtrcredits <= 0 || - lnet_msg2bufpool(msg)->rbp_credits <= 0) { - if (!ni->ni_lnd->lnd_eager_recv) { - msg->msg_rx_ready_delay = 1; - } else { - lnet_net_unlock(msg->msg_rx_cpt); - rc = lnet_ni_eager_recv(ni, msg); - lnet_net_lock(msg->msg_rx_cpt); - } - } - - if (!rc) - rc = lnet_post_routed_recv_locked(msg, 0); - return rc; -} - -int -lnet_parse_local(struct lnet_ni *ni, struct lnet_msg *msg) -{ - int rc; - - switch (msg->msg_type) { - case LNET_MSG_ACK: - rc = lnet_parse_ack(ni, msg); - break; - case LNET_MSG_PUT: - rc = lnet_parse_put(ni, msg); - break; - case LNET_MSG_GET: - rc = lnet_parse_get(ni, msg, msg->msg_rdma_get); - break; - case LNET_MSG_REPLY: - rc = lnet_parse_reply(ni, msg); - break; - default: /* prevent an unused label if !kernel */ - LASSERT(0); - return -EPROTO; - } - - LASSERT(!rc || rc == -ENOENT); - return rc; -} - -char * -lnet_msgtyp2str(int type) -{ - switch (type) { - case LNET_MSG_ACK: - return "ACK"; - case LNET_MSG_PUT: - return "PUT"; - case LNET_MSG_GET: - return "GET"; - case LNET_MSG_REPLY: - return "REPLY"; - case LNET_MSG_HELLO: - return "HELLO"; - default: - return ""; - } -} - -void -lnet_print_hdr(struct lnet_hdr *hdr) -{ - struct lnet_process_id src = {0}; - struct lnet_process_id dst = {0}; - char *type_str = lnet_msgtyp2str(hdr->type); - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - dst.nid = hdr->dest_nid; - dst.pid = hdr->dest_pid; - - CWARN("P3 Header at %p of type %s\n", hdr, type_str); - CWARN(" From %s\n", libcfs_id2str(src)); - CWARN(" To %s\n", libcfs_id2str(dst)); - - switch (hdr->type) { - default: - break; - - case LNET_MSG_PUT: - CWARN(" Ptl index %d, ack md %#llx.%#llx, match bits %llu\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - CWARN(" Length %d, offset %d, hdr data %#llx\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CWARN(" Ptl index %d, return md %#llx.%#llx, match bits %llu\n", - hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CWARN(" Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case LNET_MSG_ACK: - CWARN(" dst md %#llx.%#llx, manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case LNET_MSG_REPLY: - CWARN(" dst md %#llx.%#llx, length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); - } -} - -int -lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, - void *private, int rdma_req) -{ - int rc = 0; - int cpt; - int for_me; - struct lnet_msg *msg; - lnet_pid_t dest_pid; - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - __u32 payload_length; - __u32 type; - - LASSERT(!in_interrupt()); - - type = le32_to_cpu(hdr->type); - src_nid = le64_to_cpu(hdr->src_nid); - dest_nid = le64_to_cpu(hdr->dest_nid); - dest_pid = le32_to_cpu(hdr->dest_pid); - payload_length = le32_to_cpu(hdr->payload_length); - - for_me = (ni->ni_nid == dest_nid); - cpt = lnet_cpt_of_nid(from_nid); - - switch (type) { - case LNET_MSG_ACK: - case LNET_MSG_GET: - if (payload_length > 0) { - CERROR("%s, src %s: bad %s payload %d (0 expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), payload_length); - return -EPROTO; - } - break; - - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - if (payload_length > - (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) { - CERROR("%s, src %s: bad %s payload %d (%d max expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), - payload_length, - for_me ? LNET_MAX_PAYLOAD : LNET_MTU); - return -EPROTO; - } - break; - - default: - CERROR("%s, src %s: Bad message type 0x%x\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), type); - return -EPROTO; - } - - if (the_lnet.ln_routing && - ni->ni_last_alive != ktime_get_real_seconds()) { - /* NB: so far here is the only place to set NI status to "up */ - lnet_ni_lock(ni); - ni->ni_last_alive = ktime_get_real_seconds(); - if (ni->ni_status && - ni->ni_status->ns_status == LNET_NI_STATUS_DOWN) - ni->ni_status->ns_status = LNET_NI_STATUS_UP; - lnet_ni_unlock(ni); - } - - /* - * Regard a bad destination NID as a protocol error. Senders should - * know what they're doing; if they don't they're misconfigured, buggy - * or malicious so we chop them off at the knees :) - */ - if (!for_me) { - if (LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) { - /* should have gone direct */ - CERROR("%s, src %s: Bad dest nid %s (should have been sent direct)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (lnet_islocalnid(dest_nid)) { - /* - * dest is another local NI; sender should have used - * this node's NID on its own network - */ - CERROR("%s, src %s: Bad dest nid %s (it's my nid but on a different network)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (rdma_req && type == LNET_MSG_GET) { - CERROR("%s, src %s: Bad optimized GET for %s (final destination must be me)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (!the_lnet.ln_routing) { - CERROR("%s, src %s: Dropping message for %s (routing not enabled)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - goto drop; - } - } - - /* - * Message looks OK; we're not going to return an error, so we MUST - * call back lnd_recv() come what may... - */ - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(src_nid, 0)) { /* shall we now? */ - CERROR("%s, src %s: Dropping %s to simulate failure\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - if (!list_empty(&the_lnet.ln_drop_rules) && - lnet_drop_rule_match(hdr)) { - CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate silent message loss\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid), lnet_msgtyp2str(type)); - goto drop; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("%s, src %s: Dropping %s (out of memory)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - /* msg zeroed by kzalloc() - * i.e. flags all clear, pointers NULL etc - */ - msg->msg_type = type; - msg->msg_private = private; - msg->msg_receiving = 1; - msg->msg_rdma_get = rdma_req; - msg->msg_wanted = payload_length; - msg->msg_len = payload_length; - msg->msg_offset = 0; - msg->msg_hdr = *hdr; - /* for building message event */ - msg->msg_from = from_nid; - if (!for_me) { - msg->msg_target.pid = dest_pid; - msg->msg_target.nid = dest_nid; - msg->msg_routing = 1; - - } else { - /* convert common msg->hdr fields to host byteorder */ - msg->msg_hdr.type = type; - msg->msg_hdr.src_nid = src_nid; - le32_to_cpus(&msg->msg_hdr.src_pid); - msg->msg_hdr.dest_nid = dest_nid; - msg->msg_hdr.dest_pid = dest_pid; - msg->msg_hdr.payload_length = payload_length; - } - - lnet_net_lock(cpt); - rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt); - if (rc) { - lnet_net_unlock(cpt); - CERROR("%s, src %s: Dropping %s (error %d looking up sender)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), rc); - kfree(msg); - if (rc == -ESHUTDOWN) - /* We are shutting down. Don't do anything more */ - return 0; - goto drop; - } - - if (lnet_isrouter(msg->msg_rxpeer)) { - lnet_peer_set_alive(msg->msg_rxpeer); - if (avoid_asym_router_failure && - LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) { - /* received a remote message from router, update - * remote NI status on this router. - * NB: multi-hop routed message will be ignored. - */ - lnet_router_ni_update_locked(msg->msg_rxpeer, - LNET_NIDNET(src_nid)); - } - } - - lnet_msg_commit(msg, cpt); - - /* message delay simulation */ - if (unlikely(!list_empty(&the_lnet.ln_delay_rules) && - lnet_delay_rule_match_locked(hdr, msg))) { - lnet_net_unlock(cpt); - return 0; - } - - if (!for_me) { - rc = lnet_parse_forward_locked(ni, msg); - lnet_net_unlock(cpt); - - if (rc < 0) - goto free_drop; - - if (rc == LNET_CREDIT_OK) { - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, payload_length, payload_length); - } - return 0; - } - - lnet_net_unlock(cpt); - - rc = lnet_parse_local(ni, msg); - if (rc) - goto free_drop; - return 0; - - free_drop: - LASSERT(!msg->msg_md); - lnet_finalize(ni, msg, rc); - - drop: - lnet_drop_message(ni, cpt, private, payload_length); - return 0; -} -EXPORT_SYMBOL(lnet_parse); - -void -lnet_drop_delayed_msg_list(struct list_head *head, char *reason) -{ - while (!list_empty(head)) { - struct lnet_process_id id = {0}; - struct lnet_msg *msg; - - msg = list_entry(head->next, struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - id.nid = msg->msg_hdr.src_nid; - id.pid = msg->msg_hdr.src_pid; - - LASSERT(!msg->msg_md); - LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_rxpeer); - LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); - - CWARN("Dropping delayed PUT from %s portal %d match %llu offset %d length %d: %s\n", - libcfs_id2str(id), - msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length, reason); - - /* - * NB I can't drop msg's ref on msg_rxpeer until after I've - * called lnet_drop_message(), so I just hang onto msg as well - * until that's done - */ - lnet_drop_message(msg->msg_rxpeer->lp_ni, - msg->msg_rxpeer->lp_cpt, - msg->msg_private, msg->msg_len); - /* - * NB: message will not generate event because w/o attached MD, - * but we still should give error code so lnet_msg_decommit() - * can skip counters operations and other checks. - */ - lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT); - } -} - -void -lnet_recv_delayed_msg_list(struct list_head *head) -{ - while (!list_empty(head)) { - struct lnet_msg *msg; - struct lnet_process_id id; - - msg = list_entry(head->next, struct lnet_msg, msg_list); - list_del(&msg->msg_list); - - /* - * md won't disappear under me, since each msg - * holds a ref on it - */ - id.nid = msg->msg_hdr.src_nid; - id.pid = msg->msg_hdr.src_pid; - - LASSERT(msg->msg_rx_delayed); - LASSERT(msg->msg_md); - LASSERT(msg->msg_rxpeer); - LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", - libcfs_id2str(id), msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length); - - lnet_recv_put(msg->msg_rxpeer->lp_ni, msg); - } -} - -/** - * Initiate an asynchronous PUT operation. - * - * There are several events associated with a PUT: completion of the send on - * the initiator node (LNET_EVENT_SEND), and when the send completes - * successfully, the receipt of an acknowledgment (LNET_EVENT_ACK) indicating - * that the operation was accepted by the target. The event LNET_EVENT_PUT is - * used at the target node to indicate the completion of incoming data - * delivery. - * - * The local events will be logged in the EQ associated with the MD pointed to - * by \a mdh handle. Using a MD without an associated EQ results in these - * events being discarded. In this case, the caller must have another - * mechanism (e.g., a higher level protocol) for determining when it is safe - * to modify the memory region associated with the MD. - * - * Note that LNet does not guarantee the order of LNET_EVENT_SEND and - * LNET_EVENT_ACK, though intuitively ACK should happen after SEND. - * - * \param self Indicates the NID of a local interface through which to send - * the PUT request. Use LNET_NID_ANY to let LNet choose one by itself. - * \param mdh A handle for the MD that describes the memory to be sent. The MD - * must be "free floating" (See LNetMDBind()). - * \param ack Controls whether an acknowledgment is requested. - * Acknowledgments are only sent when they are requested by the initiating - * process and the target MD enables them. - * \param target A process identifier for the target process. - * \param portal The index in the \a target's portal table. - * \param match_bits The match bits to use for MD selection at the target - * process. - * \param offset The offset into the target MD (only used when the target - * MD has the LNET_MD_MANAGE_REMOTE option set). - * \param hdr_data 64 bits of user data that can be included in the message - * header. This data is written to an event queue entry at the target if an - * EQ is present on the matching MD. - * - * \retval 0 Success, and only in this case events will be generated - * and logged to EQ (if it exists). - * \retval -EIO Simulated failure. - * \retval -ENOMEM Memory allocation failure. - * \retval -ENOENT Invalid MD object. - * - * \see lnet_event::hdr_data and lnet_event_kind. - */ -int -LNetPut(lnet_nid_t self, struct lnet_handle_md mdh, enum lnet_ack_req ack, - struct lnet_process_id target, unsigned int portal, - __u64 match_bits, unsigned int offset, - __u64 hdr_data) -{ - struct lnet_msg *msg; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(target.nid, 1)) { /* shall we now? */ - CERROR("Dropping PUT to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("Dropping PUT to %s: ENOMEM on struct lnet_msg\n", - libcfs_id2str(target)); - return -ENOMEM; - } - msg->msg_vmflush = !!(current->flags & PF_MEMALLOC); - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md || !md->md_threshold || md->md_me) { - CERROR("Dropping PUT (%llu:%d:%s): MD (%d) invalid\n", - match_bits, portal, libcfs_id2str(target), - !md ? -1 : md->md_threshold); - if (md && md->md_me) - CERROR("Source MD also attached to portal %d\n", - md->md_me->me_portal); - lnet_res_unlock(cpt); - - kfree(msg); - return -ENOENT; - } - - CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target)); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length); - - msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.put.offset = cpu_to_le32(offset); - msg->msg_hdr.msg.put.hdr_data = hdr_data; - - /* NB handles only looked up by creator (no flips) */ - if (ack == LNET_ACK_REQ) { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - } else { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - LNET_WIRE_HANDLE_COOKIE_NONE; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - LNET_WIRE_HANDLE_COOKIE_NONE; - } - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_SEND); - - rc = lnet_send(self, msg, LNET_NID_ANY); - if (rc) { - CNETERR("Error sending PUT to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize(NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} -EXPORT_SYMBOL(LNetPut); - -struct lnet_msg * -lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg) -{ - /* - * The LND can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg for the LND to pass to lnet_finalize() when the sink - * data has been received. - * - * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lnet_finalize() is called on it, so the LND must call this first - */ - struct lnet_msg *msg = kzalloc(sizeof(*msg), GFP_NOFS); - struct lnet_libmd *getmd = getmsg->msg_md; - struct lnet_process_id peer_id = getmsg->msg_target; - int cpt; - - LASSERT(!getmsg->msg_target_is_router); - LASSERT(!getmsg->msg_routing); - - if (!msg) { - CERROR("%s: Dropping REPLY from %s: can't allocate msg\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); - goto drop; - } - - cpt = lnet_cpt_of_cookie(getmd->md_lh.lh_cookie); - lnet_res_lock(cpt); - - LASSERT(getmd->md_refcount > 0); - - if (!getmd->md_threshold) { - CERROR("%s: Dropping REPLY from %s for inactive MD %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), - getmd); - lnet_res_unlock(cpt); - goto drop; - } - - LASSERT(!getmd->md_offset); - - CDEBUG(D_NET, "%s: Reply from %s md %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); - - /* setup information for lnet_build_msg_event */ - msg->msg_from = peer_id.nid; - msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ - msg->msg_hdr.src_nid = peer_id.nid; - msg->msg_hdr.payload_length = getmd->md_length; - msg->msg_receiving = 1; /* required by lnet_msg_attach_md */ - - lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length); - lnet_res_unlock(cpt); - - cpt = lnet_cpt_of_nid(peer_id.nid); - - lnet_net_lock(cpt); - lnet_msg_commit(msg, cpt); - lnet_net_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_REPLY); - - return msg; - - drop: - cpt = lnet_cpt_of_nid(peer_id.nid); - - lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += getmd->md_length; - lnet_net_unlock(cpt); - - kfree(msg); - - return NULL; -} -EXPORT_SYMBOL(lnet_create_reply_msg); - -void -lnet_set_reply_msg_len(struct lnet_ni *ni, struct lnet_msg *reply, - unsigned int len) -{ - /* - * Set the REPLY length, now the RDMA that elides the REPLY message has - * completed and I know it. - */ - LASSERT(reply); - LASSERT(reply->msg_type == LNET_MSG_GET); - LASSERT(reply->msg_ev.type == LNET_EVENT_REPLY); - - /* - * NB I trusted my peer to RDMA. If she tells me she's written beyond - * the end of my buffer, I might as well be dead. - */ - LASSERT(len <= reply->msg_ev.mlength); - - reply->msg_ev.mlength = len; -} -EXPORT_SYMBOL(lnet_set_reply_msg_len); - -/** - * Initiate an asynchronous GET operation. - * - * On the initiator node, an LNET_EVENT_SEND is logged when the GET request - * is sent, and an LNET_EVENT_REPLY is logged when the data returned from - * the target node in the REPLY has been written to local MD. - * - * On the target node, an LNET_EVENT_GET is logged when the GET request - * arrives and is accepted into a MD. - * - * \param self,target,portal,match_bits,offset See the discussion in LNetPut(). - * \param mdh A handle for the MD that describes the memory into which the - * requested data will be received. The MD must be "free floating" - * (See LNetMDBind()). - * - * \retval 0 Success, and only in this case events will be generated - * and logged to EQ (if it exists) of the MD. - * \retval -EIO Simulated failure. - * \retval -ENOMEM Memory allocation failure. - * \retval -ENOENT Invalid MD object. - */ -int -LNetGet(lnet_nid_t self, struct lnet_handle_md mdh, - struct lnet_process_id target, unsigned int portal, - __u64 match_bits, unsigned int offset) -{ - struct lnet_msg *msg; - struct lnet_libmd *md; - int cpt; - int rc; - - LASSERT(the_lnet.ln_refcount > 0); - - if (!list_empty(&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer(target.nid, 1)) { /* shall we now? */ - CERROR("Dropping GET to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = kzalloc(sizeof(*msg), GFP_NOFS); - if (!msg) { - CERROR("Dropping GET to %s: ENOMEM on struct lnet_msg\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - cpt = lnet_cpt_of_cookie(mdh.cookie); - lnet_res_lock(cpt); - - md = lnet_handle2md(&mdh); - if (!md || !md->md_threshold || md->md_me) { - CERROR("Dropping GET (%llu:%d:%s): MD (%d) invalid\n", - match_bits, portal, libcfs_id2str(target), - !md ? -1 : md->md_threshold); - if (md && md->md_me) - CERROR("REPLY MD also attached to portal %d\n", - md->md_me->me_portal); - - lnet_res_unlock(cpt); - - kfree(msg); - return -ENOENT; - } - - CDEBUG(D_NET, "%s -> %s\n", __func__, libcfs_id2str(target)); - - lnet_msg_attach_md(msg, md, 0, 0); - - lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0); - - msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset); - msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length); - - /* NB handles only looked up by creator (no flips) */ - msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.get.return_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - - lnet_res_unlock(cpt); - - lnet_build_msg_event(msg, LNET_EVENT_SEND); - - rc = lnet_send(self, msg, LNET_NID_ANY); - if (rc < 0) { - CNETERR("Error sending GET to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize(NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} -EXPORT_SYMBOL(LNetGet); - -/** - * Calculate distance to node at \a dstnid. - * - * \param dstnid Target NID. - * \param srcnidp If not NULL, NID of the local interface to reach \a dstnid - * is saved here. - * \param orderp If not NULL, order of the route to reach \a dstnid is saved - * here. - * - * \retval 0 If \a dstnid belongs to a local interface, and reserved option - * local_nid_dist_zero is set, which is the default. - * \retval positives Distance to target NID, i.e. number of hops plus one. - * \retval -EHOSTUNREACH If \a dstnid is not reachable. - */ -int -LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) -{ - struct list_head *e; - struct lnet_ni *ni; - struct lnet_remotenet *rnet; - __u32 dstnet = LNET_NIDNET(dstnid); - int hops; - int cpt; - __u32 order = 2; - struct list_head *rn_list; - - /* - * if !local_nid_dist_zero, I don't return a distance of 0 ever - * (when lustre sees a distance of 0, it substitutes 0@lo), so I - * keep order 0 free for 0@lo and order 1 free for a local NID - * match - */ - LASSERT(the_lnet.ln_refcount > 0); - - cpt = lnet_net_lock_current(); - - list_for_each(e, &the_lnet.ln_nis) { - ni = list_entry(e, struct lnet_ni, ni_list); - - if (ni->ni_nid == dstnid) { - if (srcnidp) - *srcnidp = dstnid; - if (orderp) { - if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND) - *orderp = 0; - else - *orderp = 1; - } - lnet_net_unlock(cpt); - - return local_nid_dist_zero ? 0 : 1; - } - - if (LNET_NIDNET(ni->ni_nid) == dstnet) { - /* - * Check if ni was originally created in - * current net namespace. - * If not, assign order above 0xffff0000, - * to make this ni not a priority. - */ - if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns)) - order += 0xffff0000; - - if (srcnidp) - *srcnidp = ni->ni_nid; - if (orderp) - *orderp = order; - lnet_net_unlock(cpt); - return 1; - } - - order++; - } - - rn_list = lnet_net2rnethash(dstnet); - list_for_each(e, rn_list) { - rnet = list_entry(e, struct lnet_remotenet, lrn_list); - - if (rnet->lrn_net == dstnet) { - struct lnet_route *route; - struct lnet_route *shortest = NULL; - __u32 shortest_hops = LNET_UNDEFINED_HOPS; - __u32 route_hops; - - LASSERT(!list_empty(&rnet->lrn_routes)); - - list_for_each_entry(route, &rnet->lrn_routes, - lr_list) { - route_hops = route->lr_hops; - if (route_hops == LNET_UNDEFINED_HOPS) - route_hops = 1; - if (!shortest || - route_hops < shortest_hops) { - shortest = route; - shortest_hops = route_hops; - } - } - - LASSERT(shortest); - hops = shortest_hops; - if (srcnidp) - *srcnidp = shortest->lr_gateway->lp_ni->ni_nid; - if (orderp) - *orderp = order; - lnet_net_unlock(cpt); - return hops + 1; - } - order++; - } - - lnet_net_unlock(cpt); - return -EHOSTUNREACH; -} -EXPORT_SYMBOL(LNetDist); diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c deleted file mode 100644 index 0091273c04b9..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-msg.c +++ /dev/null @@ -1,625 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-msg.c - * - * Message decoding, parsing and finalizing routines - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -void -lnet_build_unlink_event(struct lnet_libmd *md, struct lnet_event *ev) -{ - memset(ev, 0, sizeof(*ev)); - - ev->status = 0; - ev->unlinked = 1; - ev->type = LNET_EVENT_UNLINK; - lnet_md_deconstruct(md, &ev->md); - lnet_md2handle(&ev->md_handle, md); -} - -/* - * Don't need any lock, must be called after lnet_commit_md - */ -void -lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type) -{ - struct lnet_hdr *hdr = &msg->msg_hdr; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(!msg->msg_routing); - - ev->type = ev_type; - - if (ev_type == LNET_EVENT_SEND) { - /* event for active message */ - ev->target.nid = le64_to_cpu(hdr->dest_nid); - ev->target.pid = le32_to_cpu(hdr->dest_pid); - ev->initiator.nid = LNET_NID_ANY; - ev->initiator.pid = the_lnet.ln_pid; - ev->sender = LNET_NID_ANY; - } else { - /* event for passive message */ - ev->target.pid = hdr->dest_pid; - ev->target.nid = hdr->dest_nid; - ev->initiator.pid = hdr->src_pid; - ev->initiator.nid = hdr->src_nid; - ev->rlength = hdr->payload_length; - ev->sender = msg->msg_from; - ev->mlength = msg->msg_wanted; - ev->offset = msg->msg_offset; - } - - switch (ev_type) { - default: - LBUG(); - - case LNET_EVENT_PUT: /* passive PUT */ - ev->pt_index = hdr->msg.put.ptl_index; - ev->match_bits = hdr->msg.put.match_bits; - ev->hdr_data = hdr->msg.put.hdr_data; - return; - - case LNET_EVENT_GET: /* passive GET */ - ev->pt_index = hdr->msg.get.ptl_index; - ev->match_bits = hdr->msg.get.match_bits; - ev->hdr_data = 0; - return; - - case LNET_EVENT_ACK: /* ACK */ - ev->match_bits = hdr->msg.ack.match_bits; - ev->mlength = hdr->msg.ack.mlength; - return; - - case LNET_EVENT_REPLY: /* REPLY */ - return; - - case LNET_EVENT_SEND: /* active message */ - if (msg->msg_type == LNET_MSG_PUT) { - ev->pt_index = le32_to_cpu(hdr->msg.put.ptl_index); - ev->match_bits = le64_to_cpu(hdr->msg.put.match_bits); - ev->offset = le32_to_cpu(hdr->msg.put.offset); - ev->mlength = - ev->rlength = le32_to_cpu(hdr->payload_length); - ev->hdr_data = le64_to_cpu(hdr->msg.put.hdr_data); - - } else { - LASSERT(msg->msg_type == LNET_MSG_GET); - ev->pt_index = le32_to_cpu(hdr->msg.get.ptl_index); - ev->match_bits = le64_to_cpu(hdr->msg.get.match_bits); - ev->mlength = - ev->rlength = le32_to_cpu(hdr->msg.get.sink_length); - ev->offset = le32_to_cpu(hdr->msg.get.src_offset); - ev->hdr_data = 0; - } - return; - } -} - -void -lnet_msg_commit(struct lnet_msg *msg, int cpt) -{ - struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt]; - struct lnet_counters *counters = the_lnet.ln_counters[cpt]; - - /* routed message can be committed for both receiving and sending */ - LASSERT(!msg->msg_tx_committed); - - if (msg->msg_sending) { - LASSERT(!msg->msg_receiving); - - msg->msg_tx_cpt = cpt; - msg->msg_tx_committed = 1; - if (msg->msg_rx_committed) { /* routed message REPLY */ - LASSERT(msg->msg_onactivelist); - return; - } - } else { - LASSERT(!msg->msg_sending); - msg->msg_rx_cpt = cpt; - msg->msg_rx_committed = 1; - } - - LASSERT(!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add(&msg->msg_activelist, &container->msc_active); - - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; -} - -static void -lnet_msg_decommit_tx(struct lnet_msg *msg, int status) -{ - struct lnet_counters *counters; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(msg->msg_tx_committed); - if (status) - goto out; - - counters = the_lnet.ln_counters[msg->msg_tx_cpt]; - switch (ev->type) { - default: /* routed message */ - LASSERT(msg->msg_routing); - LASSERT(msg->msg_rx_committed); - LASSERT(!ev->type); - - counters->route_length += msg->msg_len; - counters->route_count++; - goto out; - - case LNET_EVENT_PUT: - /* should have been decommitted */ - LASSERT(!msg->msg_rx_committed); - /* overwritten while sending ACK */ - LASSERT(msg->msg_type == LNET_MSG_ACK); - msg->msg_type = LNET_MSG_PUT; /* fix type */ - break; - - case LNET_EVENT_SEND: - LASSERT(!msg->msg_rx_committed); - if (msg->msg_type == LNET_MSG_PUT) - counters->send_length += msg->msg_len; - break; - - case LNET_EVENT_GET: - LASSERT(msg->msg_rx_committed); - /* - * overwritten while sending reply, we should never be - * here for optimized GET - */ - LASSERT(msg->msg_type == LNET_MSG_REPLY); - msg->msg_type = LNET_MSG_GET; /* fix type */ - break; - } - - counters->send_count++; - out: - lnet_return_tx_credits_locked(msg); - msg->msg_tx_committed = 0; -} - -static void -lnet_msg_decommit_rx(struct lnet_msg *msg, int status) -{ - struct lnet_counters *counters; - struct lnet_event *ev = &msg->msg_ev; - - LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */ - LASSERT(msg->msg_rx_committed); - - if (status) - goto out; - - counters = the_lnet.ln_counters[msg->msg_rx_cpt]; - switch (ev->type) { - default: - LASSERT(!ev->type); - LASSERT(msg->msg_routing); - goto out; - - case LNET_EVENT_ACK: - LASSERT(msg->msg_type == LNET_MSG_ACK); - break; - - case LNET_EVENT_GET: - /* - * type is "REPLY" if it's an optimized GET on passive side, - * because optimized GET will never be committed for sending, - * so message type wouldn't be changed back to "GET" by - * lnet_msg_decommit_tx(), see details in lnet_parse_get() - */ - LASSERT(msg->msg_type == LNET_MSG_REPLY || - msg->msg_type == LNET_MSG_GET); - counters->send_length += msg->msg_wanted; - break; - - case LNET_EVENT_PUT: - LASSERT(msg->msg_type == LNET_MSG_PUT); - break; - - case LNET_EVENT_REPLY: - /* - * type is "GET" if it's an optimized GET on active side, - * see details in lnet_create_reply_msg() - */ - LASSERT(msg->msg_type == LNET_MSG_GET || - msg->msg_type == LNET_MSG_REPLY); - break; - } - - counters->recv_count++; - if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY) - counters->recv_length += msg->msg_wanted; - - out: - lnet_return_rx_credits_locked(msg); - msg->msg_rx_committed = 0; -} - -void -lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status) -{ - int cpt2 = cpt; - - LASSERT(msg->msg_tx_committed || msg->msg_rx_committed); - LASSERT(msg->msg_onactivelist); - - if (msg->msg_tx_committed) { /* always decommit for sending first */ - LASSERT(cpt == msg->msg_tx_cpt); - lnet_msg_decommit_tx(msg, status); - } - - if (msg->msg_rx_committed) { - /* forwarding msg committed for both receiving and sending */ - if (cpt != msg->msg_rx_cpt) { - lnet_net_unlock(cpt); - cpt2 = msg->msg_rx_cpt; - lnet_net_lock(cpt2); - } - lnet_msg_decommit_rx(msg, status); - } - - list_del(&msg->msg_activelist); - msg->msg_onactivelist = 0; - - the_lnet.ln_counters[cpt2]->msgs_alloc--; - - if (cpt2 != cpt) { - lnet_net_unlock(cpt2); - lnet_net_lock(cpt); - } -} - -void -lnet_msg_attach_md(struct lnet_msg *msg, struct lnet_libmd *md, - unsigned int offset, unsigned int mlen) -{ - /* NB: @offset and @len are only useful for receiving */ - /* - * Here, we attach the MD on lnet_msg and mark it busy and - * decrementing its threshold. Come what may, the lnet_msg "owns" - * the MD until a call to lnet_msg_detach_md or lnet_finalize() - * signals completion. - */ - LASSERT(!msg->msg_routing); - - msg->msg_md = md; - if (msg->msg_receiving) { /* committed for receiving */ - msg->msg_offset = offset; - msg->msg_wanted = mlen; - } - - md->md_refcount++; - if (md->md_threshold != LNET_MD_THRESH_INF) { - LASSERT(md->md_threshold > 0); - md->md_threshold--; - } - - /* build umd in event */ - lnet_md2handle(&msg->msg_ev.md_handle, md); - lnet_md_deconstruct(md, &msg->msg_ev.md); -} - -void -lnet_msg_detach_md(struct lnet_msg *msg, int status) -{ - struct lnet_libmd *md = msg->msg_md; - int unlink; - - /* Now it's safe to drop my caller's ref */ - md->md_refcount--; - LASSERT(md->md_refcount >= 0); - - unlink = lnet_md_unlinkable(md); - if (md->md_eq) { - msg->msg_ev.status = status; - msg->msg_ev.unlinked = unlink; - lnet_eq_enqueue_event(md->md_eq, &msg->msg_ev); - } - - if (unlink) - lnet_md_unlink(md); - - msg->msg_md = NULL; -} - -static int -lnet_complete_msg_locked(struct lnet_msg *msg, int cpt) -{ - struct lnet_handle_wire ack_wmd; - int rc; - int status = msg->msg_ev.status; - - LASSERT(msg->msg_onactivelist); - - if (!status && msg->msg_ack) { - /* Only send an ACK if the PUT completed successfully */ - - lnet_msg_decommit(msg, cpt, 0); - - msg->msg_ack = 0; - lnet_net_unlock(cpt); - - LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); - LASSERT(!msg->msg_routing); - - ack_wmd = msg->msg_hdr.msg.put.ack_wmd; - - lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); - - msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; - msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; - msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - - /* - * NB: we probably want to use NID of msg::msg_from as 3rd - * parameter (router NID) if it's routed message - */ - rc = lnet_send(msg->msg_ev.target.nid, msg, LNET_NID_ANY); - - lnet_net_lock(cpt); - /* - * NB: message is committed for sending, we should return - * on success because LND will finalize this message later. - * - * Also, there is possibility that message is committed for - * sending and also failed before delivering to LND, - * i.e: ENOMEM, in that case we can't fall through either - * because CPT for sending can be different with CPT for - * receiving, so we should return back to lnet_finalize() - * to make sure we are locking the correct partition. - */ - return rc; - - } else if (!status && /* OK so far */ - (msg->msg_routing && !msg->msg_sending)) { - /* not forwarded */ - LASSERT(!msg->msg_receiving); /* called back recv already */ - lnet_net_unlock(cpt); - - rc = lnet_send(LNET_NID_ANY, msg, LNET_NID_ANY); - - lnet_net_lock(cpt); - /* - * NB: message is committed for sending, we should return - * on success because LND will finalize this message later. - * - * Also, there is possibility that message is committed for - * sending and also failed before delivering to LND, - * i.e: ENOMEM, in that case we can't fall through either: - * - The rule is message must decommit for sending first if - * the it's committed for both sending and receiving - * - CPT for sending can be different with CPT for receiving, - * so we should return back to lnet_finalize() to make - * sure we are locking the correct partition. - */ - return rc; - } - - lnet_msg_decommit(msg, cpt, status); - kfree(msg); - return 0; -} - -void -lnet_finalize(struct lnet_ni *ni, struct lnet_msg *msg, int status) -{ - struct lnet_msg_container *container; - int my_slot; - int cpt; - int rc; - int i; - - LASSERT(!in_interrupt()); - - if (!msg) - return; - - msg->msg_ev.status = status; - - if (msg->msg_md) { - cpt = lnet_cpt_of_cookie(msg->msg_md->md_lh.lh_cookie); - - lnet_res_lock(cpt); - lnet_msg_detach_md(msg, status); - lnet_res_unlock(cpt); - } - - again: - rc = 0; - if (!msg->msg_tx_committed && !msg->msg_rx_committed) { - /* not committed to network yet */ - LASSERT(!msg->msg_onactivelist); - kfree(msg); - return; - } - - /* - * NB: routed message can be committed for both receiving and sending, - * we should finalize in LIFO order and keep counters correct. - * (finalize sending first then finalize receiving) - */ - cpt = msg->msg_tx_committed ? msg->msg_tx_cpt : msg->msg_rx_cpt; - lnet_net_lock(cpt); - - container = the_lnet.ln_msg_containers[cpt]; - list_add_tail(&msg->msg_list, &container->msc_finalizing); - - /* - * Recursion breaker. Don't complete the message here if I am (or - * enough other threads are) already completing messages - */ - my_slot = -1; - for (i = 0; i < container->msc_nfinalizers; i++) { - if (container->msc_finalizers[i] == current) - break; - - if (my_slot < 0 && !container->msc_finalizers[i]) - my_slot = i; - } - - if (i < container->msc_nfinalizers || my_slot < 0) { - lnet_net_unlock(cpt); - return; - } - - container->msc_finalizers[my_slot] = current; - - while (!list_empty(&container->msc_finalizing)) { - msg = list_entry(container->msc_finalizing.next, - struct lnet_msg, msg_list); - - list_del(&msg->msg_list); - - /* - * NB drops and regains the lnet lock if it actually does - * anything, so my finalizing friends can chomp along too - */ - rc = lnet_complete_msg_locked(msg, cpt); - if (rc) - break; - } - - if (unlikely(!list_empty(&the_lnet.ln_delay_rules))) { - lnet_net_unlock(cpt); - lnet_delay_rule_check(); - lnet_net_lock(cpt); - } - - container->msc_finalizers[my_slot] = NULL; - lnet_net_unlock(cpt); - - if (rc) - goto again; -} -EXPORT_SYMBOL(lnet_finalize); - -void -lnet_msg_container_cleanup(struct lnet_msg_container *container) -{ - int count = 0; - - if (!container->msc_init) - return; - - while (!list_empty(&container->msc_active)) { - struct lnet_msg *msg; - - msg = list_entry(container->msc_active.next, - struct lnet_msg, msg_activelist); - LASSERT(msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del(&msg->msg_activelist); - kfree(msg); - count++; - } - - if (count > 0) - CERROR("%d active msg on exit\n", count); - - kvfree(container->msc_finalizers); - container->msc_finalizers = NULL; - container->msc_init = 0; -} - -int -lnet_msg_container_setup(struct lnet_msg_container *container, int cpt) -{ - container->msc_init = 1; - - INIT_LIST_HEAD(&container->msc_active); - INIT_LIST_HEAD(&container->msc_finalizing); - - /* number of CPUs */ - container->msc_nfinalizers = cfs_cpt_weight(lnet_cpt_table(), cpt); - - container->msc_finalizers = kvzalloc_cpt(container->msc_nfinalizers * - sizeof(*container->msc_finalizers), - GFP_KERNEL, cpt); - - if (!container->msc_finalizers) { - CERROR("Failed to allocate message finalizers\n"); - lnet_msg_container_cleanup(container); - return -ENOMEM; - } - - return 0; -} - -void -lnet_msg_containers_destroy(void) -{ - struct lnet_msg_container *container; - int i; - - if (!the_lnet.ln_msg_containers) - return; - - cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) - lnet_msg_container_cleanup(container); - - cfs_percpt_free(the_lnet.ln_msg_containers); - the_lnet.ln_msg_containers = NULL; -} - -int -lnet_msg_containers_create(void) -{ - struct lnet_msg_container *container; - int rc; - int i; - - the_lnet.ln_msg_containers = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*container)); - - if (!the_lnet.ln_msg_containers) { - CERROR("Failed to allocate cpu-partition data for network\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(container, i, the_lnet.ln_msg_containers) { - rc = lnet_msg_container_setup(container, i); - if (rc) { - lnet_msg_containers_destroy(); - return rc; - } - } - - return 0; -} diff --git a/drivers/staging/lustre/lnet/lnet/lib-ptl.c b/drivers/staging/lustre/lnet/lnet/lib-ptl.c deleted file mode 100644 index fc47379c5938..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-ptl.c +++ /dev/null @@ -1,987 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/lib-ptl.c - * - * portal & match routines - * - * Author: liang@whamcloud.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/* NB: add /proc interfaces in upcoming patches */ -int portal_rotor = LNET_PTL_ROTOR_HASH_RT; -module_param(portal_rotor, int, 0644); -MODULE_PARM_DESC(portal_rotor, "redirect PUTs to different cpu-partitions"); - -static int -lnet_ptl_match_type(unsigned int index, struct lnet_process_id match_id, - __u64 mbits, __u64 ignore_bits) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[index]; - int unique; - - unique = !ignore_bits && - match_id.nid != LNET_NID_ANY && - match_id.pid != LNET_PID_ANY; - - LASSERT(!lnet_ptl_is_unique(ptl) || !lnet_ptl_is_wildcard(ptl)); - - /* prefer to check w/o any lock */ - if (likely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) - goto match; - - /* unset, new portal */ - lnet_ptl_lock(ptl); - /* check again with lock */ - if (unlikely(lnet_ptl_is_unique(ptl) || lnet_ptl_is_wildcard(ptl))) { - lnet_ptl_unlock(ptl); - goto match; - } - - /* still not set */ - if (unique) - lnet_ptl_setopt(ptl, LNET_PTL_MATCH_UNIQUE); - else - lnet_ptl_setopt(ptl, LNET_PTL_MATCH_WILDCARD); - - lnet_ptl_unlock(ptl); - - return 1; - - match: - if ((lnet_ptl_is_unique(ptl) && !unique) || - (lnet_ptl_is_wildcard(ptl) && unique)) - return 0; - return 1; -} - -static void -lnet_ptl_enable_mt(struct lnet_portal *ptl, int cpt) -{ - struct lnet_match_table *mtable = ptl->ptl_mtables[cpt]; - int i; - - /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - mtable->mt_enabled = 1; - - ptl->ptl_mt_maps[ptl->ptl_mt_nmaps] = cpt; - for (i = ptl->ptl_mt_nmaps - 1; i >= 0; i--) { - LASSERT(ptl->ptl_mt_maps[i] != cpt); - if (ptl->ptl_mt_maps[i] < cpt) - break; - - /* swap to order */ - ptl->ptl_mt_maps[i + 1] = ptl->ptl_mt_maps[i]; - ptl->ptl_mt_maps[i] = cpt; - } - - ptl->ptl_mt_nmaps++; -} - -static void -lnet_ptl_disable_mt(struct lnet_portal *ptl, int cpt) -{ - struct lnet_match_table *mtable = ptl->ptl_mtables[cpt]; - int i; - - /* with hold of both lnet_res_lock(cpt) and lnet_ptl_lock */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - if (LNET_CPT_NUMBER == 1) - return; /* never disable the only match-table */ - - mtable->mt_enabled = 0; - - LASSERT(ptl->ptl_mt_nmaps > 0 && - ptl->ptl_mt_nmaps <= LNET_CPT_NUMBER); - - /* remove it from mt_maps */ - ptl->ptl_mt_nmaps--; - for (i = 0; i < ptl->ptl_mt_nmaps; i++) { - if (ptl->ptl_mt_maps[i] >= cpt) /* overwrite it */ - ptl->ptl_mt_maps[i] = ptl->ptl_mt_maps[i + 1]; - } -} - -static int -lnet_try_match_md(struct lnet_libmd *md, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - /* - * ALWAYS called holding the lnet_res_lock, and can't lnet_res_unlock; - * lnet_match_blocked_msg() relies on this to avoid races - */ - unsigned int offset; - unsigned int mlength; - struct lnet_me *me = md->md_me; - - /* MD exhausted */ - if (lnet_md_exhausted(md)) - return LNET_MATCHMD_NONE | LNET_MATCHMD_EXHAUSTED; - - /* mismatched MD op */ - if (!(md->md_options & info->mi_opc)) - return LNET_MATCHMD_NONE; - - /* mismatched ME nid/pid? */ - if (me->me_match_id.nid != LNET_NID_ANY && - me->me_match_id.nid != info->mi_id.nid) - return LNET_MATCHMD_NONE; - - if (me->me_match_id.pid != LNET_PID_ANY && - me->me_match_id.pid != info->mi_id.pid) - return LNET_MATCHMD_NONE; - - /* mismatched ME matchbits? */ - if ((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) - return LNET_MATCHMD_NONE; - - /* Hurrah! This _is_ a match; check it out... */ - - if (!(md->md_options & LNET_MD_MANAGE_REMOTE)) - offset = md->md_offset; - else - offset = info->mi_roffset; - - if (md->md_options & LNET_MD_MAX_SIZE) { - mlength = md->md_max_size; - LASSERT(md->md_offset + mlength <= md->md_length); - } else { - mlength = md->md_length - offset; - } - - if (info->mi_rlength <= mlength) { /* fits in allowed space */ - mlength = info->mi_rlength; - } else if (!(md->md_options & LNET_MD_TRUNCATE)) { - /* this packet _really_ is too big */ - CERROR("Matching packet from %s, match %llu length %d too big: %d left, %d allowed\n", - libcfs_id2str(info->mi_id), info->mi_mbits, - info->mi_rlength, md->md_length - offset, mlength); - - return LNET_MATCHMD_DROP; - } - - /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from %s of length %d/%d into md %#llx [%d] + %d\n", - (info->mi_opc == LNET_MD_OP_PUT) ? "put" : "get", - info->mi_portal, libcfs_id2str(info->mi_id), mlength, - info->mi_rlength, md->md_lh.lh_cookie, md->md_niov, offset); - - lnet_msg_attach_md(msg, md, offset, mlength); - md->md_offset = offset + mlength; - - if (!lnet_md_exhausted(md)) - return LNET_MATCHMD_OK; - - /* - * Auto-unlink NOW, so the ME gets unlinked if required. - * We bumped md->md_refcount above so the MD just gets flagged - * for unlink when it is finalized. - */ - if (md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) - lnet_md_unlink(md); - - return LNET_MATCHMD_OK | LNET_MATCHMD_EXHAUSTED; -} - -static struct lnet_match_table * -lnet_match2mt(struct lnet_portal *ptl, struct lnet_process_id id, __u64 mbits) -{ - if (LNET_CPT_NUMBER == 1) - return ptl->ptl_mtables[0]; /* the only one */ - - /* if it's a unique portal, return match-table hashed by NID */ - return lnet_ptl_is_unique(ptl) ? - ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL; -} - -struct lnet_match_table * -lnet_mt_of_attach(unsigned int index, struct lnet_process_id id, - __u64 mbits, __u64 ignore_bits, enum lnet_ins_pos pos) -{ - struct lnet_portal *ptl; - struct lnet_match_table *mtable; - - /* NB: called w/o lock */ - LASSERT(index < the_lnet.ln_nportals); - - if (!lnet_ptl_match_type(index, id, mbits, ignore_bits)) - return NULL; - - ptl = the_lnet.ln_portals[index]; - - mtable = lnet_match2mt(ptl, id, mbits); - if (mtable) /* unique portal or only one match-table */ - return mtable; - - /* it's a wildcard portal */ - switch (pos) { - default: - return NULL; - case LNET_INS_BEFORE: - case LNET_INS_AFTER: - /* - * posted by no affinity thread, always hash to specific - * match-table to avoid buffer stealing which is heavy - */ - return ptl->ptl_mtables[ptl->ptl_index % LNET_CPT_NUMBER]; - case LNET_INS_LOCAL: - /* posted by cpu-affinity thread */ - return ptl->ptl_mtables[lnet_cpt_current()]; - } -} - -static struct lnet_match_table * -lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct lnet_match_table *mtable; - struct lnet_portal *ptl; - unsigned int nmaps; - unsigned int rotor; - unsigned int cpt; - bool routed; - - /* NB: called w/o lock */ - LASSERT(info->mi_portal < the_lnet.ln_nportals); - ptl = the_lnet.ln_portals[info->mi_portal]; - - LASSERT(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)); - - mtable = lnet_match2mt(ptl, info->mi_id, info->mi_mbits); - if (mtable) - return mtable; - - /* it's a wildcard portal */ - routed = LNET_NIDNET(msg->msg_hdr.src_nid) != - LNET_NIDNET(msg->msg_hdr.dest_nid); - - if (portal_rotor == LNET_PTL_ROTOR_OFF || - (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) { - cpt = lnet_cpt_current(); - if (ptl->ptl_mtables[cpt]->mt_enabled) - return ptl->ptl_mtables[cpt]; - } - - rotor = ptl->ptl_rotor++; /* get round-robin factor */ - if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed) - cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid); - else - cpt = rotor % LNET_CPT_NUMBER; - - if (!ptl->ptl_mtables[cpt]->mt_enabled) { - /* is there any active entry for this portal? */ - nmaps = ptl->ptl_mt_nmaps; - /* map to an active mtable to avoid heavy "stealing" */ - if (nmaps) { - /* - * NB: there is possibility that ptl_mt_maps is being - * changed because we are not under protection of - * lnet_ptl_lock, but it shouldn't hurt anything - */ - cpt = ptl->ptl_mt_maps[rotor % nmaps]; - } - } - - return ptl->ptl_mtables[cpt]; -} - -static int -lnet_mt_test_exhausted(struct lnet_match_table *mtable, int pos) -{ - __u64 *bmap; - int i; - - if (!lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) - return 0; - - if (pos < 0) { /* check all bits */ - for (i = 0; i < LNET_MT_EXHAUSTED_BMAP; i++) { - if (mtable->mt_exhausted[i] != (__u64)(-1)) - return 0; - } - return 1; - } - - LASSERT(pos <= LNET_MT_HASH_IGNORE); - /* mtable::mt_mhash[pos] is marked as exhausted or not */ - bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; - pos &= (1 << LNET_MT_BITS_U64) - 1; - - return (*bmap & BIT(pos)); -} - -static void -lnet_mt_set_exhausted(struct lnet_match_table *mtable, int pos, int exhausted) -{ - __u64 *bmap; - - LASSERT(lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])); - LASSERT(pos <= LNET_MT_HASH_IGNORE); - - /* set mtable::mt_mhash[pos] as exhausted/non-exhausted */ - bmap = &mtable->mt_exhausted[pos >> LNET_MT_BITS_U64]; - pos &= (1 << LNET_MT_BITS_U64) - 1; - - if (!exhausted) - *bmap &= ~(1ULL << pos); - else - *bmap |= 1ULL << pos; -} - -struct list_head * -lnet_mt_match_head(struct lnet_match_table *mtable, - struct lnet_process_id id, __u64 mbits) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[mtable->mt_portal]; - unsigned long hash = mbits; - - if (!lnet_ptl_is_wildcard(ptl)) { - hash += id.nid + id.pid; - - LASSERT(lnet_ptl_is_unique(ptl)); - hash = hash_long(hash, LNET_MT_HASH_BITS); - } - return &mtable->mt_mhash[hash & LNET_MT_HASH_MASK]; -} - -int -lnet_mt_match_md(struct lnet_match_table *mtable, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct list_head *head; - struct lnet_me *me; - struct lnet_me *tmp; - int exhausted = 0; - int rc; - - /* any ME with ignore bits? */ - if (!list_empty(&mtable->mt_mhash[LNET_MT_HASH_IGNORE])) - head = &mtable->mt_mhash[LNET_MT_HASH_IGNORE]; - else - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); - again: - /* NB: only wildcard portal needs to return LNET_MATCHMD_EXHAUSTED */ - if (lnet_ptl_is_wildcard(the_lnet.ln_portals[mtable->mt_portal])) - exhausted = LNET_MATCHMD_EXHAUSTED; - - list_for_each_entry_safe(me, tmp, head, me_list) { - /* ME attached but MD not attached yet */ - if (!me->me_md) - continue; - - LASSERT(me == me->me_md->md_me); - - rc = lnet_try_match_md(me->me_md, info, msg); - if (!(rc & LNET_MATCHMD_EXHAUSTED)) - exhausted = 0; /* mlist is not empty */ - - if (rc & LNET_MATCHMD_FINISH) { - /* - * don't return EXHAUSTED bit because we don't know - * whether the mlist is empty or not - */ - return rc & ~LNET_MATCHMD_EXHAUSTED; - } - } - - if (exhausted == LNET_MATCHMD_EXHAUSTED) { /* @head is exhausted */ - lnet_mt_set_exhausted(mtable, head - mtable->mt_mhash, 1); - if (!lnet_mt_test_exhausted(mtable, -1)) - exhausted = 0; - } - - if (!exhausted && head == &mtable->mt_mhash[LNET_MT_HASH_IGNORE]) { - head = lnet_mt_match_head(mtable, info->mi_id, info->mi_mbits); - goto again; /* re-check MEs w/o ignore-bits */ - } - - if (info->mi_opc == LNET_MD_OP_GET || - !lnet_ptl_is_lazy(the_lnet.ln_portals[info->mi_portal])) - return exhausted | LNET_MATCHMD_DROP; - - return exhausted | LNET_MATCHMD_NONE; -} - -static int -lnet_ptl_match_early(struct lnet_portal *ptl, struct lnet_msg *msg) -{ - int rc; - - /* - * message arrived before any buffer posting on this portal, - * simply delay or drop this message - */ - if (likely(lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl))) - return 0; - - lnet_ptl_lock(ptl); - /* check it again with hold of lock */ - if (lnet_ptl_is_wildcard(ptl) || lnet_ptl_is_unique(ptl)) { - lnet_ptl_unlock(ptl); - return 0; - } - - if (lnet_ptl_is_lazy(ptl)) { - if (msg->msg_rx_ready_delay) { - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); - } - rc = LNET_MATCHMD_NONE; - } else { - rc = LNET_MATCHMD_DROP; - } - - lnet_ptl_unlock(ptl); - return rc; -} - -static int -lnet_ptl_match_delay(struct lnet_portal *ptl, - struct lnet_match_info *info, struct lnet_msg *msg) -{ - int first = ptl->ptl_mt_maps[0]; /* read w/o lock */ - int rc = 0; - int i; - - /** - * Steal buffer from other CPTs, and delay msg if nothing to - * steal. This function is more expensive than a regular - * match, but we don't expect it can happen a lot. The return - * code contains one of LNET_MATCHMD_OK, LNET_MATCHMD_DROP, or - * LNET_MATCHMD_NONE. - */ - LASSERT(lnet_ptl_is_wildcard(ptl)); - - for (i = 0; i < LNET_CPT_NUMBER; i++) { - struct lnet_match_table *mtable; - int cpt; - - cpt = (first + i) % LNET_CPT_NUMBER; - mtable = ptl->ptl_mtables[cpt]; - if (i && i != LNET_CPT_NUMBER - 1 && !mtable->mt_enabled) - continue; - - lnet_res_lock(cpt); - lnet_ptl_lock(ptl); - - if (!i) { - /* The first try, add to stealing list. */ - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_stealing); - } - - if (!list_empty(&msg->msg_list)) { - /* On stealing list. */ - rc = lnet_mt_match_md(mtable, info, msg); - - if ((rc & LNET_MATCHMD_EXHAUSTED) && - mtable->mt_enabled) - lnet_ptl_disable_mt(ptl, cpt); - - if (rc & LNET_MATCHMD_FINISH) { - /* Match found, remove from stealing list. */ - list_del_init(&msg->msg_list); - } else if (i == LNET_CPT_NUMBER - 1 || /* (1) */ - !ptl->ptl_mt_nmaps || /* (2) */ - (ptl->ptl_mt_nmaps == 1 && /* (3) */ - ptl->ptl_mt_maps[0] == cpt)) { - /** - * No match found, and this is either - * (1) the last cpt to check, or - * (2) there is no active cpt, or - * (3) this is the only active cpt. - * There is nothing to steal: delay or - * drop the message. - */ - list_del_init(&msg->msg_list); - - if (lnet_ptl_is_lazy(ptl)) { - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, - &ptl->ptl_msg_delayed); - rc = LNET_MATCHMD_NONE; - } else { - rc = LNET_MATCHMD_DROP; - } - } else { - /* Do another iteration. */ - rc = 0; - } - } else { - /** - * No longer on stealing list: another thread - * matched the message in lnet_ptl_attach_md(). - * We are now expected to handle the message. - */ - rc = !msg->msg_md ? - LNET_MATCHMD_DROP : LNET_MATCHMD_OK; - } - - lnet_ptl_unlock(ptl); - lnet_res_unlock(cpt); - - /** - * Note that test (1) above ensures that we always - * exit the loop through this break statement. - * - * LNET_MATCHMD_NONE means msg was added to the - * delayed queue, and we may no longer reference it - * after lnet_ptl_unlock() and lnet_res_unlock(). - */ - if (rc & (LNET_MATCHMD_FINISH | LNET_MATCHMD_NONE)) - break; - } - - return rc; -} - -int -lnet_ptl_match_md(struct lnet_match_info *info, struct lnet_msg *msg) -{ - struct lnet_match_table *mtable; - struct lnet_portal *ptl; - int rc; - - CDEBUG(D_NET, "Request from %s of length %d into portal %d MB=%#llx\n", - libcfs_id2str(info->mi_id), info->mi_rlength, info->mi_portal, - info->mi_mbits); - - if (info->mi_portal >= the_lnet.ln_nportals) { - CERROR("Invalid portal %d not in [0-%d]\n", - info->mi_portal, the_lnet.ln_nportals); - return LNET_MATCHMD_DROP; - } - - ptl = the_lnet.ln_portals[info->mi_portal]; - rc = lnet_ptl_match_early(ptl, msg); - if (rc) /* matched or delayed early message */ - return rc; - - mtable = lnet_mt_of_match(info, msg); - lnet_res_lock(mtable->mt_cpt); - - if (the_lnet.ln_shutdown) { - rc = LNET_MATCHMD_DROP; - goto out1; - } - - rc = lnet_mt_match_md(mtable, info, msg); - if ((rc & LNET_MATCHMD_EXHAUSTED) && mtable->mt_enabled) { - lnet_ptl_lock(ptl); - lnet_ptl_disable_mt(ptl, mtable->mt_cpt); - lnet_ptl_unlock(ptl); - } - - if (rc & LNET_MATCHMD_FINISH) /* matched or dropping */ - goto out1; - - if (!msg->msg_rx_ready_delay) - goto out1; - - LASSERT(lnet_ptl_is_lazy(ptl)); - LASSERT(!msg->msg_rx_delayed); - - /* NB: we don't expect "delay" can happen a lot */ - if (lnet_ptl_is_unique(ptl) || LNET_CPT_NUMBER == 1) { - lnet_ptl_lock(ptl); - - msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &ptl->ptl_msg_delayed); - - lnet_ptl_unlock(ptl); - lnet_res_unlock(mtable->mt_cpt); - rc = LNET_MATCHMD_NONE; - } else { - lnet_res_unlock(mtable->mt_cpt); - rc = lnet_ptl_match_delay(ptl, info, msg); - } - - /* LNET_MATCHMD_NONE means msg was added to the delay queue */ - if (rc & LNET_MATCHMD_NONE) { - CDEBUG(D_NET, - "Delaying %s from %s ptl %d MB %#llx off %d len %d\n", - info->mi_opc == LNET_MD_OP_PUT ? "PUT" : "GET", - libcfs_id2str(info->mi_id), info->mi_portal, - info->mi_mbits, info->mi_roffset, info->mi_rlength); - } - goto out0; - out1: - lnet_res_unlock(mtable->mt_cpt); - out0: - /* EXHAUSTED bit is only meaningful for internal functions */ - return rc & ~LNET_MATCHMD_EXHAUSTED; -} - -void -lnet_ptl_detach_md(struct lnet_me *me, struct lnet_libmd *md) -{ - LASSERT(me->me_md == md && md->md_me == me); - - me->me_md = NULL; - md->md_me = NULL; -} - -/* called with lnet_res_lock held */ -void -lnet_ptl_attach_md(struct lnet_me *me, struct lnet_libmd *md, - struct list_head *matches, struct list_head *drops) -{ - struct lnet_portal *ptl = the_lnet.ln_portals[me->me_portal]; - struct lnet_match_table *mtable; - struct list_head *head; - struct lnet_msg *tmp; - struct lnet_msg *msg; - int exhausted = 0; - int cpt; - - LASSERT(!md->md_refcount); /* a brand new MD */ - - me->me_md = md; - md->md_me = me; - - cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); - mtable = ptl->ptl_mtables[cpt]; - - if (list_empty(&ptl->ptl_msg_stealing) && - list_empty(&ptl->ptl_msg_delayed) && - !lnet_mt_test_exhausted(mtable, me->me_pos)) - return; - - lnet_ptl_lock(ptl); - head = &ptl->ptl_msg_stealing; - again: - list_for_each_entry_safe(msg, tmp, head, msg_list) { - struct lnet_match_info info; - struct lnet_hdr *hdr; - int rc; - - LASSERT(msg->msg_rx_delayed || head == &ptl->ptl_msg_stealing); - - hdr = &msg->msg_hdr; - info.mi_id.nid = hdr->src_nid; - info.mi_id.pid = hdr->src_pid; - info.mi_opc = LNET_MD_OP_PUT; - info.mi_portal = hdr->msg.put.ptl_index; - info.mi_rlength = hdr->payload_length; - info.mi_roffset = hdr->msg.put.offset; - info.mi_mbits = hdr->msg.put.match_bits; - - rc = lnet_try_match_md(md, &info, msg); - - exhausted = (rc & LNET_MATCHMD_EXHAUSTED); - if (rc & LNET_MATCHMD_NONE) { - if (exhausted) - break; - continue; - } - - /* Hurrah! This _is_ a match */ - LASSERT(rc & LNET_MATCHMD_FINISH); - list_del_init(&msg->msg_list); - - if (head == &ptl->ptl_msg_stealing) { - if (exhausted) - break; - /* stealing thread will handle the message */ - continue; - } - - if (rc & LNET_MATCHMD_OK) { - list_add_tail(&msg->msg_list, matches); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", - libcfs_id2str(info.mi_id), - info.mi_portal, info.mi_mbits, - info.mi_roffset, info.mi_rlength); - } else { - list_add_tail(&msg->msg_list, drops); - } - - if (exhausted) - break; - } - - if (!exhausted && head == &ptl->ptl_msg_stealing) { - head = &ptl->ptl_msg_delayed; - goto again; - } - - if (lnet_ptl_is_wildcard(ptl) && !exhausted) { - lnet_mt_set_exhausted(mtable, me->me_pos, 0); - if (!mtable->mt_enabled) - lnet_ptl_enable_mt(ptl, cpt); - } - - lnet_ptl_unlock(ptl); -} - -static void -lnet_ptl_cleanup(struct lnet_portal *ptl) -{ - struct lnet_match_table *mtable; - int i; - - if (!ptl->ptl_mtables) /* uninitialized portal */ - return; - - LASSERT(list_empty(&ptl->ptl_msg_delayed)); - LASSERT(list_empty(&ptl->ptl_msg_stealing)); - cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) { - struct list_head *mhash; - struct lnet_me *me; - int j; - - if (!mtable->mt_mhash) /* uninitialized match-table */ - continue; - - mhash = mtable->mt_mhash; - /* cleanup ME */ - for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) { - while (!list_empty(&mhash[j])) { - me = list_entry(mhash[j].next, - struct lnet_me, me_list); - CERROR("Active ME %p on exit\n", me); - list_del(&me->me_list); - kfree(me); - } - } - /* the extra entry is for MEs with ignore bits */ - kvfree(mhash); - } - - cfs_percpt_free(ptl->ptl_mtables); - ptl->ptl_mtables = NULL; -} - -static int -lnet_ptl_setup(struct lnet_portal *ptl, int index) -{ - struct lnet_match_table *mtable; - struct list_head *mhash; - int i; - int j; - - ptl->ptl_mtables = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(struct lnet_match_table)); - if (!ptl->ptl_mtables) { - CERROR("Failed to create match table for portal %d\n", index); - return -ENOMEM; - } - - ptl->ptl_index = index; - INIT_LIST_HEAD(&ptl->ptl_msg_delayed); - INIT_LIST_HEAD(&ptl->ptl_msg_stealing); - spin_lock_init(&ptl->ptl_lock); - cfs_percpt_for_each(mtable, i, ptl->ptl_mtables) { - /* the extra entry is for MEs with ignore bits */ - mhash = kvzalloc_cpt(sizeof(*mhash) * (LNET_MT_HASH_SIZE + 1), - GFP_KERNEL, i); - if (!mhash) { - CERROR("Failed to create match hash for portal %d\n", - index); - goto failed; - } - - memset(&mtable->mt_exhausted[0], -1, - sizeof(mtable->mt_exhausted[0]) * - LNET_MT_EXHAUSTED_BMAP); - mtable->mt_mhash = mhash; - for (j = 0; j < LNET_MT_HASH_SIZE + 1; j++) - INIT_LIST_HEAD(&mhash[j]); - - mtable->mt_portal = index; - mtable->mt_cpt = i; - } - - return 0; - failed: - lnet_ptl_cleanup(ptl); - return -ENOMEM; -} - -void -lnet_portals_destroy(void) -{ - int i; - - if (!the_lnet.ln_portals) - return; - - for (i = 0; i < the_lnet.ln_nportals; i++) - lnet_ptl_cleanup(the_lnet.ln_portals[i]); - - cfs_array_free(the_lnet.ln_portals); - the_lnet.ln_portals = NULL; - the_lnet.ln_nportals = 0; -} - -int -lnet_portals_create(void) -{ - int size; - int i; - - size = offsetof(struct lnet_portal, ptl_mt_maps[LNET_CPT_NUMBER]); - - the_lnet.ln_portals = cfs_array_alloc(MAX_PORTALS, size); - if (!the_lnet.ln_portals) { - CERROR("Failed to allocate portals table\n"); - return -ENOMEM; - } - the_lnet.ln_nportals = MAX_PORTALS; - - for (i = 0; i < the_lnet.ln_nportals; i++) { - if (lnet_ptl_setup(the_lnet.ln_portals[i], i)) { - lnet_portals_destroy(); - return -ENOMEM; - } - } - - return 0; -} - -/** - * Turn on the lazy portal attribute. Use with caution! - * - * This portal attribute only affects incoming PUT requests to the portal, - * and is off by default. By default, if there's no matching MD for an - * incoming PUT request, it is simply dropped. With the lazy attribute on, - * such requests are queued indefinitely until either a matching MD is - * posted to the portal or the lazy attribute is turned off. - * - * It would prevent dropped requests, however it should be regarded as the - * last line of defense - i.e. users must keep a close watch on active - * buffers on a lazy portal and once it becomes too low post more buffers as - * soon as possible. This is because delayed requests usually have detrimental - * effects on underlying network connections. A few delayed requests often - * suffice to bring an underlying connection to a complete halt, due to flow - * control mechanisms. - * - * There's also a DOS attack risk. If users don't post match-all MDs on a - * lazy portal, a malicious peer can easily stop a service by sending some - * PUT requests with match bits that won't match any MD. A routed server is - * especially vulnerable since the connections to its neighbor routers are - * shared among all clients. - * - * \param portal Index of the portal to enable the lazy attribute on. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is not a valid index. - */ -int -LNetSetLazyPortal(int portal) -{ - struct lnet_portal *ptl; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - CDEBUG(D_NET, "Setting portal %d lazy\n", portal); - ptl = the_lnet.ln_portals[portal]; - - lnet_res_lock(LNET_LOCK_EX); - lnet_ptl_lock(ptl); - - lnet_ptl_setopt(ptl, LNET_PTL_LAZY); - - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - - return 0; -} -EXPORT_SYMBOL(LNetSetLazyPortal); - -int -lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason) -{ - struct lnet_portal *ptl; - LIST_HEAD(zombies); - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - ptl = the_lnet.ln_portals[portal]; - - lnet_res_lock(LNET_LOCK_EX); - lnet_ptl_lock(ptl); - - if (!lnet_ptl_is_lazy(ptl)) { - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - return 0; - } - - if (ni) { - struct lnet_msg *msg, *tmp; - - /* grab all messages which are on the NI passed in */ - list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed, - msg_list) { - if (msg->msg_rxpeer->lp_ni == ni) - list_move(&msg->msg_list, &zombies); - } - } else { - if (the_lnet.ln_shutdown) - CWARN("Active lazy portal %d on exit\n", portal); - else - CDEBUG(D_NET, "clearing portal %d lazy\n", portal); - - /* grab all the blocked messages atomically */ - list_splice_init(&ptl->ptl_msg_delayed, &zombies); - - lnet_ptl_unsetopt(ptl, LNET_PTL_LAZY); - } - - lnet_ptl_unlock(ptl); - lnet_res_unlock(LNET_LOCK_EX); - - lnet_drop_delayed_msg_list(&zombies, reason); - - return 0; -} - -/** - * Turn off the lazy portal attribute. Delayed requests on the portal, - * if any, will be all dropped when this function returns. - * - * \param portal Index of the portal to disable the lazy attribute on. - * - * \retval 0 On success. - * \retval -EINVAL If \a portal is not a valid index. - */ -int -LNetClearLazyPortal(int portal) -{ - return lnet_clear_lazy_portal(NULL, portal, - "Clearing lazy portal attr"); -} -EXPORT_SYMBOL(LNetClearLazyPortal); diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c deleted file mode 100644 index 9b61260155f2..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ /dev/null @@ -1,585 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include -#include -/* For sys_open & sys_close */ -#include -#include - -#include - -static int -kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = filp->f_op->unlocked_ioctl(filp, cmd, arg); - set_fs(oldfs); - - return err; -} - -static int -lnet_sock_ioctl(int cmd, unsigned long arg) -{ - struct file *sock_filp; - struct socket *sock; - int rc; - - rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); - if (rc) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - sock_filp = sock_alloc_file(sock, 0, NULL); - if (IS_ERR(sock_filp)) - return PTR_ERR(sock_filp); - - rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg); - - fput(sock_filp); - return rc; -} - -int -lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - int nob; - int rc; - __be32 val; - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - return -EINVAL; - } - - BUILD_BUG_ON(sizeof(ifr.ifr_name) < IFNAMSIZ); - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get flags for interface %s\n", name); - return rc; - } - - if (!(ifr.ifr_flags & IFF_UP)) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - return 0; - } - *up = 1; - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get IP address for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - if (strlen(name) > sizeof(ifr.ifr_name) - 1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - if (rc) { - CERROR("Can't get netmask for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; - *mask = ntohl(val); - - return 0; -} -EXPORT_SYMBOL(lnet_ipif_query); - -int -lnet_ipif_enumerate(char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > PAGE_SIZE) { - toobig = 1; - nalloc = PAGE_SIZE / sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - ifr = kzalloc(nalloc * sizeof(*ifr), GFP_KERNEL); - if (!ifr) { - CERROR("ENOMEM enumerating up to %d interfaces\n", - nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); - if (rc < 0) { - CERROR("Error %d enumerating interfaces\n", rc); - goto out1; - } - - LASSERT(!rc); - - nfound = ifc.ifc_len / sizeof(*ifr); - LASSERT(nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - kfree(ifr); - nalloc *= 2; - } - - if (!nfound) - goto out1; - - names = kzalloc(nfound * sizeof(*names), GFP_KERNEL); - if (!names) { - rc = -ENOMEM; - goto out1; - } - - for (i = 0; i < nfound; i++) { - nob = strnlen(ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - names[i] = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (!names[i]) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - lnet_ipif_free_enumeration(names, nfound); -out1: - kfree(ifr); -out0: - return rc; -} -EXPORT_SYMBOL(lnet_ipif_enumerate); - -void -lnet_ipif_free_enumeration(char **names, int n) -{ - int i; - - LASSERT(n > 0); - - for (i = 0; i < n && names[i]; i++) - kfree(names[i]); - - kfree(names); -} -EXPORT_SYMBOL(lnet_ipif_free_enumeration); - -int -lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); - unsigned long then; - struct timeval tv; - struct kvec iov = { .iov_base = buffer, .iov_len = nob }; - struct msghdr msg = {NULL,}; - - LASSERT(nob > 0); - /* - * Caller may pass a zero timeout if she thinks the socket buffer is - * empty enough to take the whole message immediately - */ - iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob); - for (;;) { - msg.msg_flags = !timeout ? MSG_DONTWAIT : 0; - if (timeout) { - /* Set send timeout to remaining time */ - jiffies_to_timeval(jiffies_left, &tv); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - (char *)&tv, sizeof(tv)); - if (rc) { - CERROR("Can't set socket send timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - then = jiffies; - rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); - jiffies_left -= jiffies - then; - - if (rc < 0) - return rc; - - if (!rc) { - CERROR("Unexpected zero rc\n"); - return -ECONNABORTED; - } - - if (!msg_data_left(&msg)) - break; - - if (jiffies_left <= 0) - return -EAGAIN; - } - return 0; -} -EXPORT_SYMBOL(lnet_sock_write); - -int -lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); - unsigned long then; - struct timeval tv; - struct kvec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_flags = 0 - }; - - LASSERT(nob > 0); - LASSERT(jiffies_left > 0); - - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, nob); - - for (;;) { - /* Set receive timeout to remaining time */ - jiffies_to_timeval(jiffies_left, &tv); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&tv, sizeof(tv)); - if (rc) { - CERROR("Can't set socket recv timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - then = jiffies; - rc = sock_recvmsg(sock, &msg, 0); - jiffies_left -= jiffies - then; - - if (rc < 0) - return rc; - - if (!rc) - return -ECONNRESET; - - if (!msg_data_left(&msg)) - return 0; - - if (jiffies_left <= 0) - return -ETIMEDOUT; - } -} -EXPORT_SYMBOL(lnet_sock_read); - -static int -lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, - int local_port) -{ - struct sockaddr_in locaddr; - struct socket *sock; - int rc; - int option; - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - option = 1; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - if (local_ip || local_port) { - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - if (!local_ip) - locaddr.sin_addr.s_addr = htonl(INADDR_ANY); - else - locaddr.sin_addr.s_addr = htonl(local_ip); - - rc = kernel_bind(sock, (struct sockaddr *)&locaddr, - sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto failed; - } - if (rc) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto failed; - } - } - return 0; - -failed: - sock_release(sock); - return rc; -} - -int -lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) -{ - int option; - int rc; - - if (txbufsize) { - option = txbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set send buffer %d: %d\n", - option, rc); - return rc; - } - } - - if (rxbufsize) { - option = rxbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set receive buffer %d: %d\n", - option, rc); - return rc; - } - } - return 0; -} -EXPORT_SYMBOL(lnet_sock_setbuf); - -int -lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int rc; - - if (remote) - rc = kernel_getpeername(sock, (struct sockaddr *)&sin); - else - rc = kernel_getsockname(sock, (struct sockaddr *)&sin); - if (rc < 0) { - CERROR("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip) - *ip = ntohl(sin.sin_addr.s_addr); - - if (port) - *port = ntohs(sin.sin_port); - - return 0; -} -EXPORT_SYMBOL(lnet_sock_getaddr); - -int -lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) -{ - if (txbufsize) - *txbufsize = sock->sk->sk_sndbuf; - - if (rxbufsize) - *rxbufsize = sock->sk->sk_rcvbuf; - - return 0; -} -EXPORT_SYMBOL(lnet_sock_getbuf); - -int -lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, - int backlog) -{ - int fatal; - int rc; - - rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); - if (rc) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - - rc = kernel_listen(*sockp, backlog); - if (!rc) - return 0; - - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - sock_release(*sockp); - return rc; -} - -int -lnet_sock_accept(struct socket **newsockp, struct socket *sock) -{ - wait_queue_entry_t wait; - struct socket *newsock; - int rc; - - /* - * XXX this should add a ref to sock->ops->owner, if - * TCP could be a module - */ - rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); - if (rc) { - CERROR("Can't allocate socket\n"); - return rc; - } - - newsock->ops = sock->ops; - - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); - if (rc == -EAGAIN) { - /* Nothing ready, so wait for activity */ - init_waitqueue_entry(&wait, current); - add_wait_queue(sk_sleep(sock->sk), &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - remove_wait_queue(sk_sleep(sock->sk), &wait); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); - } - - if (rc) - goto failed; - - *newsockp = newsock; - return 0; - -failed: - sock_release(newsock); - return rc; -} - -int -lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, - int local_port, __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - int rc; - - rc = lnet_sock_create(sockp, fatal, local_ip, local_port); - if (rc) - return rc; - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, - sizeof(srvaddr), 0); - if (!rc) - return 0; - - /* - * EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... - */ - *fatal = !(rc == -EADDRNOTAVAIL); - - CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc, - &local_ip, local_port, &peer_ip, peer_port); - - sock_release(*sockp); - return rc; -} diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c deleted file mode 100644 index 7456b989e451..000000000000 --- a/drivers/staging/lustre/lnet/lnet/lo.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -static int -lolnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) -{ - LASSERT(!lntmsg->msg_routing); - LASSERT(!lntmsg->msg_target_is_router); - - return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0); -} - -static int -lolnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, struct iov_iter *to, unsigned int rlen) -{ - struct lnet_msg *sendmsg = private; - - if (lntmsg) { /* not discarding */ - if (sendmsg->msg_iov) - lnet_copy_iov2iter(to, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, - iov_iter_count(to)); - else - lnet_copy_kiov2iter(to, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, - iov_iter_count(to)); - - lnet_finalize(ni, lntmsg, 0); - } - - lnet_finalize(ni, sendmsg, 0); - return 0; -} - -static int lolnd_instanced; - -static void -lolnd_shutdown(struct lnet_ni *ni) -{ - CDEBUG(D_NET, "shutdown\n"); - LASSERT(lolnd_instanced); - - lolnd_instanced = 0; -} - -static int -lolnd_startup(struct lnet_ni *ni) -{ - LASSERT(ni->ni_lnd == &the_lolnd); - LASSERT(!lolnd_instanced); - lolnd_instanced = 1; - - return 0; -} - -struct lnet_lnd the_lolnd = { - /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list}, - /* .lnd_refcount = */ 0, - /* .lnd_type = */ LOLND, - /* .lnd_startup = */ lolnd_startup, - /* .lnd_shutdown = */ lolnd_shutdown, - /* .lnt_ctl = */ NULL, - /* .lnd_send = */ lolnd_send, - /* .lnd_recv = */ lolnd_recv, - /* .lnd_eager_recv = */ NULL, - /* .lnd_notify = */ NULL, - /* .lnd_accept = */ NULL -}; diff --git a/drivers/staging/lustre/lnet/lnet/module.c b/drivers/staging/lustre/lnet/lnet/module.c deleted file mode 100644 index 9d06664f0c17..000000000000 --- a/drivers/staging/lustre/lnet/lnet/module.c +++ /dev/null @@ -1,239 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include - -static int config_on_load; -module_param(config_on_load, int, 0444); -MODULE_PARM_DESC(config_on_load, "configure network at module load"); - -static struct mutex lnet_config_mutex; - -static int -lnet_configure(void *arg) -{ - /* 'arg' only there so I can be passed to cfs_create_thread() */ - int rc = 0; - - mutex_lock(&lnet_config_mutex); - - if (!the_lnet.ln_niinit_self) { - rc = try_module_get(THIS_MODULE); - - if (rc != 1) - goto out; - - rc = LNetNIInit(LNET_PID_LUSTRE); - if (rc >= 0) { - the_lnet.ln_niinit_self = 1; - rc = 0; - } else { - module_put(THIS_MODULE); - } - } - -out: - mutex_unlock(&lnet_config_mutex); - return rc; -} - -static int -lnet_unconfigure(void) -{ - int refcount; - - mutex_lock(&lnet_config_mutex); - - if (the_lnet.ln_niinit_self) { - the_lnet.ln_niinit_self = 0; - LNetNIFini(); - module_put(THIS_MODULE); - } - - mutex_lock(&the_lnet.ln_api_mutex); - refcount = the_lnet.ln_refcount; - mutex_unlock(&the_lnet.ln_api_mutex); - - mutex_unlock(&lnet_config_mutex); - return !refcount ? 0 : -EBUSY; -} - -static int -lnet_dyn_configure(struct libcfs_ioctl_hdr *hdr) -{ - struct lnet_ioctl_config_data *conf = - (struct lnet_ioctl_config_data *)hdr; - int rc; - - if (conf->cfg_hdr.ioc_len < sizeof(*conf)) - return -EINVAL; - - mutex_lock(&lnet_config_mutex); - if (!the_lnet.ln_niinit_self) { - rc = -EINVAL; - goto out_unlock; - } - rc = lnet_dyn_add_ni(LNET_PID_LUSTRE, conf); -out_unlock: - mutex_unlock(&lnet_config_mutex); - - return rc; -} - -static int -lnet_dyn_unconfigure(struct libcfs_ioctl_hdr *hdr) -{ - struct lnet_ioctl_config_data *conf = - (struct lnet_ioctl_config_data *)hdr; - int rc; - - if (conf->cfg_hdr.ioc_len < sizeof(*conf)) - return -EINVAL; - - mutex_lock(&lnet_config_mutex); - if (!the_lnet.ln_niinit_self) { - rc = -EINVAL; - goto out_unlock; - } - rc = lnet_dyn_del_ni(conf->cfg_net); -out_unlock: - mutex_unlock(&lnet_config_mutex); - - return rc; -} - -static int -lnet_ioctl(struct notifier_block *nb, - unsigned long cmd, void *vdata) -{ - int rc; - struct libcfs_ioctl_hdr *hdr = vdata; - - switch (cmd) { - case IOC_LIBCFS_CONFIGURE: { - struct libcfs_ioctl_data *data = - (struct libcfs_ioctl_data *)hdr; - - if (data->ioc_hdr.ioc_len < sizeof(*data)) { - rc = -EINVAL; - } else { - the_lnet.ln_nis_from_mod_params = data->ioc_flags; - rc = lnet_configure(NULL); - } - break; - } - - case IOC_LIBCFS_UNCONFIGURE: - rc = lnet_unconfigure(); - break; - - case IOC_LIBCFS_ADD_NET: - rc = lnet_dyn_configure(hdr); - break; - - case IOC_LIBCFS_DEL_NET: - rc = lnet_dyn_unconfigure(hdr); - break; - - default: - /* - * Passing LNET_PID_ANY only gives me a ref if the net is up - * already; I'll need it to ensure the net can't go down while - * I'm called into it - */ - rc = LNetNIInit(LNET_PID_ANY); - if (rc >= 0) { - rc = LNetCtl(cmd, hdr); - LNetNIFini(); - } - break; - } - return notifier_from_ioctl_errno(rc); -} - -static struct notifier_block lnet_ioctl_handler = { - .notifier_call = lnet_ioctl, -}; - -static int __init lnet_init(void) -{ - int rc; - - mutex_init(&lnet_config_mutex); - - rc = libcfs_setup(); - if (rc) - return rc; - - rc = lnet_lib_init(); - if (rc) { - CERROR("lnet_lib_init: error %d\n", rc); - return rc; - } - - rc = blocking_notifier_chain_register(&libcfs_ioctl_list, - &lnet_ioctl_handler); - LASSERT(!rc); - - if (config_on_load) { - /* - * Have to schedule a separate thread to avoid deadlocking - * in modload - */ - (void)kthread_run(lnet_configure, NULL, "lnet_initd"); - } - - return 0; -} - -static void __exit lnet_exit(void) -{ - int rc; - - rc = blocking_notifier_chain_unregister(&libcfs_ioctl_list, - &lnet_ioctl_handler); - LASSERT(!rc); - - lnet_lib_exit(); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Networking layer"); -MODULE_VERSION(LNET_VERSION); -MODULE_LICENSE("GPL"); - -module_init(lnet_init); -module_exit(lnet_exit); diff --git a/drivers/staging/lustre/lnet/lnet/net_fault.c b/drivers/staging/lustre/lnet/lnet/net_fault.c deleted file mode 100644 index 0066394b0bb0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/net_fault.c +++ /dev/null @@ -1,1023 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Seagate, Inc. - * - * lnet/lnet/net_fault.c - * - * Lustre network fault simulation - * - * Author: liang.zhen@intel.com - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include - -#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \ - LNET_GET_BIT | LNET_REPLY_BIT) - -struct lnet_drop_rule { - /** link chain on the_lnet.ln_drop_rules */ - struct list_head dr_link; - /** attributes of this rule */ - struct lnet_fault_attr dr_attr; - /** lock to protect \a dr_drop_at and \a dr_stat */ - spinlock_t dr_lock; - /** - * the message sequence to drop, which means message is dropped when - * dr_stat.drs_count == dr_drop_at - */ - unsigned long dr_drop_at; - /** - * seconds to drop the next message, it's exclusive with dr_drop_at - */ - unsigned long dr_drop_time; - /** baseline to caculate dr_drop_time */ - unsigned long dr_time_base; - /** statistic of dropped messages */ - struct lnet_fault_stat dr_stat; -}; - -static bool -lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid) -{ - if (nid == msg_nid || nid == LNET_NID_ANY) - return true; - - if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid)) - return false; - - /* 255.255.255.255@net is wildcard for all addresses in a network */ - return LNET_NIDADDR(nid) == LNET_NIDADDR(LNET_NID_ANY); -} - -static bool -lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal) -{ - if (!lnet_fault_nid_match(attr->fa_src, src) || - !lnet_fault_nid_match(attr->fa_dst, dst)) - return false; - - if (!(attr->fa_msg_mask & (1 << type))) - return false; - - /** - * NB: ACK and REPLY have no portal, but they should have been - * rejected by message mask - */ - if (attr->fa_ptl_mask && /* has portal filter */ - !(attr->fa_ptl_mask & (1ULL << portal))) - return false; - - return true; -} - -static int -lnet_fault_attr_validate(struct lnet_fault_attr *attr) -{ - if (!attr->fa_msg_mask) - attr->fa_msg_mask = LNET_MSG_MASK; /* all message types */ - - if (!attr->fa_ptl_mask) /* no portal filter */ - return 0; - - /* NB: only PUT and GET can be filtered if portal filter has been set */ - attr->fa_msg_mask &= LNET_GET_BIT | LNET_PUT_BIT; - if (!attr->fa_msg_mask) { - CDEBUG(D_NET, "can't find valid message type bits %x\n", - attr->fa_msg_mask); - return -EINVAL; - } - return 0; -} - -static void -lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type) -{ - /* NB: fs_counter is NOT updated by this function */ - switch (type) { - case LNET_MSG_PUT: - stat->fs_put++; - return; - case LNET_MSG_ACK: - stat->fs_ack++; - return; - case LNET_MSG_GET: - stat->fs_get++; - return; - case LNET_MSG_REPLY: - stat->fs_reply++; - return; - } -} - -/** - * LNet message drop simulation - */ - -/** - * Add a new drop rule to LNet - * There is no check for duplicated drop rule, all rules will be checked for - * incoming message. - */ -static int -lnet_drop_rule_add(struct lnet_fault_attr *attr) -{ - struct lnet_drop_rule *rule; - - if (attr->u.drop.da_rate & attr->u.drop.da_interval) { - CDEBUG(D_NET, "please provide either drop rate or drop interval, but not both at the same time %d/%d\n", - attr->u.drop.da_rate, attr->u.drop.da_interval); - return -EINVAL; - } - - if (lnet_fault_attr_validate(attr)) - return -EINVAL; - - rule = kzalloc(sizeof(*rule), GFP_NOFS); - if (!rule) - return -ENOMEM; - - spin_lock_init(&rule->dr_lock); - - rule->dr_attr = *attr; - if (attr->u.drop.da_interval) { - rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ; - rule->dr_drop_time = jiffies + - prandom_u32_max(attr->u.drop.da_interval) * HZ; - } else { - rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate); - } - - lnet_net_lock(LNET_LOCK_EX); - list_add(&rule->dr_link, &the_lnet.ln_drop_rules); - lnet_net_unlock(LNET_LOCK_EX); - - CDEBUG(D_NET, "Added drop rule: src %s, dst %s, rate %d, interval %d\n", - libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), - attr->u.drop.da_rate, attr->u.drop.da_interval); - return 0; -} - -/** - * Remove matched drop rules from lnet, all rules that can match \a src and - * \a dst will be removed. - * If \a src is zero, then all rules have \a dst as destination will be remove - * If \a dst is zero, then all rules have \a src as source will be removed - * If both of them are zero, all rules will be removed - */ -static int -lnet_drop_rule_del(lnet_nid_t src, lnet_nid_t dst) -{ - struct lnet_drop_rule *rule; - struct lnet_drop_rule *tmp; - struct list_head zombies; - int n = 0; - - INIT_LIST_HEAD(&zombies); - - lnet_net_lock(LNET_LOCK_EX); - list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) { - if (rule->dr_attr.fa_src != src && src) - continue; - - if (rule->dr_attr.fa_dst != dst && dst) - continue; - - list_move(&rule->dr_link, &zombies); - } - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &zombies, dr_link) { - CDEBUG(D_NET, "Remove drop rule: src %s->dst: %s (1/%d, %d)\n", - libcfs_nid2str(rule->dr_attr.fa_src), - libcfs_nid2str(rule->dr_attr.fa_dst), - rule->dr_attr.u.drop.da_rate, - rule->dr_attr.u.drop.da_interval); - - list_del(&rule->dr_link); - kfree(rule); - n++; - } - - return n; -} - -/** - * List drop rule at position of \a pos - */ -static int -lnet_drop_rule_list(int pos, struct lnet_fault_attr *attr, - struct lnet_fault_stat *stat) -{ - struct lnet_drop_rule *rule; - int cpt; - int i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - if (i++ < pos) - continue; - - spin_lock(&rule->dr_lock); - *attr = rule->dr_attr; - *stat = rule->dr_stat; - spin_unlock(&rule->dr_lock); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -/** - * reset counters for all drop rules - */ -static void -lnet_drop_rule_reset(void) -{ - struct lnet_drop_rule *rule; - int cpt; - - cpt = lnet_net_lock_current(); - - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - struct lnet_fault_attr *attr = &rule->dr_attr; - - spin_lock(&rule->dr_lock); - - memset(&rule->dr_stat, 0, sizeof(rule->dr_stat)); - if (attr->u.drop.da_rate) { - rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate); - } else { - rule->dr_drop_time = jiffies + - prandom_u32_max(attr->u.drop.da_interval) * HZ; - rule->dr_time_base = jiffies + attr->u.drop.da_interval * HZ; - } - spin_unlock(&rule->dr_lock); - } - - lnet_net_unlock(cpt); -} - -/** - * check source/destination NID, portal, message type and drop rate, - * decide whether should drop this message or not - */ -static bool -drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal) -{ - struct lnet_fault_attr *attr = &rule->dr_attr; - bool drop; - - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) - return false; - - /* match this rule, check drop rate now */ - spin_lock(&rule->dr_lock); - if (rule->dr_drop_time) { /* time based drop */ - unsigned long now = jiffies; - - rule->dr_stat.fs_count++; - drop = time_after_eq(now, rule->dr_drop_time); - if (drop) { - if (time_after(now, rule->dr_time_base)) - rule->dr_time_base = now; - - rule->dr_drop_time = rule->dr_time_base + - prandom_u32_max(attr->u.drop.da_interval) * HZ; - rule->dr_time_base += attr->u.drop.da_interval * HZ; - - CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), - rule->dr_drop_time); - } - - } else { /* rate based drop */ - drop = rule->dr_stat.fs_count++ == rule->dr_drop_at; - - if (!do_div(rule->dr_stat.fs_count, attr->u.drop.da_rate)) { - rule->dr_drop_at = rule->dr_stat.fs_count + - prandom_u32_max(attr->u.drop.da_rate); - CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), rule->dr_drop_at); - } - } - - if (drop) { /* drop this message, update counters */ - lnet_fault_stat_inc(&rule->dr_stat, type); - rule->dr_stat.u.drop.ds_dropped++; - } - - spin_unlock(&rule->dr_lock); - return drop; -} - -/** - * Check if message from \a src to \a dst can match any existed drop rule - */ -bool -lnet_drop_rule_match(struct lnet_hdr *hdr) -{ - struct lnet_drop_rule *rule; - lnet_nid_t src = le64_to_cpu(hdr->src_nid); - lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); - unsigned int typ = le32_to_cpu(hdr->type); - unsigned int ptl = -1; - bool drop = false; - int cpt; - - /** - * NB: if Portal is specified, then only PUT and GET will be - * filtered by drop rule - */ - if (typ == LNET_MSG_PUT) - ptl = le32_to_cpu(hdr->msg.put.ptl_index); - else if (typ == LNET_MSG_GET) - ptl = le32_to_cpu(hdr->msg.get.ptl_index); - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - drop = drop_rule_match(rule, src, dst, typ, ptl); - if (drop) - break; - } - - lnet_net_unlock(cpt); - return drop; -} - -/** - * LNet Delay Simulation - */ -/** timestamp (second) to send delayed message */ -#define msg_delay_send msg_ev.hdr_data - -struct lnet_delay_rule { - /** link chain on the_lnet.ln_delay_rules */ - struct list_head dl_link; - /** link chain on delay_dd.dd_sched_rules */ - struct list_head dl_sched_link; - /** attributes of this rule */ - struct lnet_fault_attr dl_attr; - /** lock to protect \a below members */ - spinlock_t dl_lock; - /** refcount of delay rule */ - atomic_t dl_refcount; - /** - * the message sequence to delay, which means message is delayed when - * dl_stat.fs_count == dl_delay_at - */ - unsigned long dl_delay_at; - /** - * seconds to delay the next message, it's exclusive with dl_delay_at - */ - unsigned long dl_delay_time; - /** baseline to caculate dl_delay_time */ - unsigned long dl_time_base; - /** jiffies to send the next delayed message */ - unsigned long dl_msg_send; - /** delayed message list */ - struct list_head dl_msg_list; - /** statistic of delayed messages */ - struct lnet_fault_stat dl_stat; - /** timer to wakeup delay_daemon */ - struct timer_list dl_timer; -}; - -struct delay_daemon_data { - /** serialise rule add/remove */ - struct mutex dd_mutex; - /** protect rules on \a dd_sched_rules */ - spinlock_t dd_lock; - /** scheduled delay rules (by timer) */ - struct list_head dd_sched_rules; - /** daemon thread sleeps at here */ - wait_queue_head_t dd_waitq; - /** controller (lctl command) wait at here */ - wait_queue_head_t dd_ctl_waitq; - /** daemon is running */ - unsigned int dd_running; - /** daemon stopped */ - unsigned int dd_stopped; -}; - -static struct delay_daemon_data delay_dd; - -static unsigned long -round_timeout(unsigned long timeout) -{ - return (unsigned int)rounddown(timeout, HZ) + HZ; -} - -static void -delay_rule_decref(struct lnet_delay_rule *rule) -{ - if (atomic_dec_and_test(&rule->dl_refcount)) { - LASSERT(list_empty(&rule->dl_sched_link)); - LASSERT(list_empty(&rule->dl_msg_list)); - LASSERT(list_empty(&rule->dl_link)); - - kfree(rule); - } -} - -/** - * check source/destination NID, portal, message type and delay rate, - * decide whether should delay this message or not - */ -static bool -delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal, - struct lnet_msg *msg) -{ - struct lnet_fault_attr *attr = &rule->dl_attr; - bool delay; - - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) - return false; - - /* match this rule, check delay rate now */ - spin_lock(&rule->dl_lock); - if (rule->dl_delay_time) { /* time based delay */ - unsigned long now = jiffies; - - rule->dl_stat.fs_count++; - delay = time_after_eq(now, rule->dl_delay_time); - if (delay) { - if (time_after(now, rule->dl_time_base)) - rule->dl_time_base = now; - - rule->dl_delay_time = rule->dl_time_base + - prandom_u32_max(attr->u.delay.la_interval) * HZ; - rule->dl_time_base += attr->u.delay.la_interval * HZ; - - CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), - rule->dl_delay_time); - } - - } else { /* rate based delay */ - delay = rule->dl_stat.fs_count++ == rule->dl_delay_at; - /* generate the next random rate sequence */ - if (!do_div(rule->dl_stat.fs_count, attr->u.delay.la_rate)) { - rule->dl_delay_at = rule->dl_stat.fs_count + - prandom_u32_max(attr->u.delay.la_rate); - CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n", - libcfs_nid2str(attr->fa_src), - libcfs_nid2str(attr->fa_dst), rule->dl_delay_at); - } - } - - if (!delay) { - spin_unlock(&rule->dl_lock); - return false; - } - - /* delay this message, update counters */ - lnet_fault_stat_inc(&rule->dl_stat, type); - rule->dl_stat.u.delay.ls_delayed++; - - list_add_tail(&msg->msg_list, &rule->dl_msg_list); - msg->msg_delay_send = round_timeout( - jiffies + attr->u.delay.la_latency * HZ); - if (rule->dl_msg_send == -1) { - rule->dl_msg_send = msg->msg_delay_send; - mod_timer(&rule->dl_timer, rule->dl_msg_send); - } - - spin_unlock(&rule->dl_lock); - return true; -} - -/** - * check if \a msg can match any Delay Rule, receiving of this message - * will be delayed if there is a match. - */ -bool -lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg) -{ - struct lnet_delay_rule *rule; - lnet_nid_t src = le64_to_cpu(hdr->src_nid); - lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); - unsigned int typ = le32_to_cpu(hdr->type); - unsigned int ptl = -1; - - /* NB: called with hold of lnet_net_lock */ - - /** - * NB: if Portal is specified, then only PUT and GET will be - * filtered by delay rule - */ - if (typ == LNET_MSG_PUT) - ptl = le32_to_cpu(hdr->msg.put.ptl_index); - else if (typ == LNET_MSG_GET) - ptl = le32_to_cpu(hdr->msg.get.ptl_index); - - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - if (delay_rule_match(rule, src, dst, typ, ptl, msg)) - return true; - } - - return false; -} - -/** check out delayed messages for send */ -static void -delayed_msg_check(struct lnet_delay_rule *rule, bool all, - struct list_head *msg_list) -{ - struct lnet_msg *msg; - struct lnet_msg *tmp; - unsigned long now = jiffies; - - if (!all && rule->dl_msg_send > now) - return; - - spin_lock(&rule->dl_lock); - list_for_each_entry_safe(msg, tmp, &rule->dl_msg_list, msg_list) { - if (!all && msg->msg_delay_send > now) - break; - - msg->msg_delay_send = 0; - list_move_tail(&msg->msg_list, msg_list); - } - - if (list_empty(&rule->dl_msg_list)) { - del_timer(&rule->dl_timer); - rule->dl_msg_send = -1; - - } else if (!list_empty(msg_list)) { - /* - * dequeued some timedout messages, update timer for the - * next delayed message on rule - */ - msg = list_entry(rule->dl_msg_list.next, - struct lnet_msg, msg_list); - rule->dl_msg_send = msg->msg_delay_send; - mod_timer(&rule->dl_timer, rule->dl_msg_send); - } - spin_unlock(&rule->dl_lock); -} - -static void -delayed_msg_process(struct list_head *msg_list, bool drop) -{ - struct lnet_msg *msg; - - while (!list_empty(msg_list)) { - struct lnet_ni *ni; - int cpt; - int rc; - - msg = list_entry(msg_list->next, struct lnet_msg, msg_list); - LASSERT(msg->msg_rxpeer); - - ni = msg->msg_rxpeer->lp_ni; - cpt = msg->msg_rx_cpt; - - list_del_init(&msg->msg_list); - if (drop) { - rc = -ECANCELED; - - } else if (!msg->msg_routing) { - rc = lnet_parse_local(ni, msg); - if (!rc) - continue; - - } else { - lnet_net_lock(cpt); - rc = lnet_parse_forward_locked(ni, msg); - lnet_net_unlock(cpt); - - switch (rc) { - case LNET_CREDIT_OK: - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, msg->msg_len, msg->msg_len); - /* fall through */ - case LNET_CREDIT_WAIT: - continue; - default: /* failures */ - break; - } - } - - lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len); - lnet_finalize(ni, msg, rc); - } -} - -/** - * Process delayed messages for scheduled rules - * This function can either be called by delay_rule_daemon, or by lnet_finalise - */ -void -lnet_delay_rule_check(void) -{ - struct lnet_delay_rule *rule; - struct list_head msgs; - - INIT_LIST_HEAD(&msgs); - while (1) { - if (list_empty(&delay_dd.dd_sched_rules)) - break; - - spin_lock_bh(&delay_dd.dd_lock); - if (list_empty(&delay_dd.dd_sched_rules)) { - spin_unlock_bh(&delay_dd.dd_lock); - break; - } - - rule = list_entry(delay_dd.dd_sched_rules.next, - struct lnet_delay_rule, dl_sched_link); - list_del_init(&rule->dl_sched_link); - spin_unlock_bh(&delay_dd.dd_lock); - - delayed_msg_check(rule, false, &msgs); - delay_rule_decref(rule); /* -1 for delay_dd.dd_sched_rules */ - } - - if (!list_empty(&msgs)) - delayed_msg_process(&msgs, false); -} - -/** daemon thread to handle delayed messages */ -static int -lnet_delay_rule_daemon(void *arg) -{ - delay_dd.dd_running = 1; - wake_up(&delay_dd.dd_ctl_waitq); - - while (delay_dd.dd_running) { - wait_event_interruptible(delay_dd.dd_waitq, - !delay_dd.dd_running || - !list_empty(&delay_dd.dd_sched_rules)); - lnet_delay_rule_check(); - } - - /* in case more rules have been enqueued after my last check */ - lnet_delay_rule_check(); - delay_dd.dd_stopped = 1; - wake_up(&delay_dd.dd_ctl_waitq); - - return 0; -} - -static void -delay_timer_cb(struct timer_list *t) -{ - struct lnet_delay_rule *rule = from_timer(rule, t, dl_timer); - - spin_lock_bh(&delay_dd.dd_lock); - if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) { - atomic_inc(&rule->dl_refcount); - list_add_tail(&rule->dl_sched_link, &delay_dd.dd_sched_rules); - wake_up(&delay_dd.dd_waitq); - } - spin_unlock_bh(&delay_dd.dd_lock); -} - -/** - * Add a new delay rule to LNet - * There is no check for duplicated delay rule, all rules will be checked for - * incoming message. - */ -int -lnet_delay_rule_add(struct lnet_fault_attr *attr) -{ - struct lnet_delay_rule *rule; - int rc = 0; - - if (attr->u.delay.la_rate & attr->u.delay.la_interval) { - CDEBUG(D_NET, "please provide either delay rate or delay interval, but not both at the same time %d/%d\n", - attr->u.delay.la_rate, attr->u.delay.la_interval); - return -EINVAL; - } - - if (!attr->u.delay.la_latency) { - CDEBUG(D_NET, "delay latency cannot be zero\n"); - return -EINVAL; - } - - if (lnet_fault_attr_validate(attr)) - return -EINVAL; - - rule = kzalloc(sizeof(*rule), GFP_NOFS); - if (!rule) - return -ENOMEM; - - mutex_lock(&delay_dd.dd_mutex); - if (!delay_dd.dd_running) { - struct task_struct *task; - - /** - * NB: although LND threads will process delayed message - * in lnet_finalize, but there is no guarantee that LND - * threads will be waken up if no other message needs to - * be handled. - * Only one daemon thread, performance is not the concern - * of this simualation module. - */ - task = kthread_run(lnet_delay_rule_daemon, NULL, "lnet_dd"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto failed; - } - wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running); - } - - timer_setup(&rule->dl_timer, delay_timer_cb, 0); - - spin_lock_init(&rule->dl_lock); - INIT_LIST_HEAD(&rule->dl_msg_list); - INIT_LIST_HEAD(&rule->dl_sched_link); - - rule->dl_attr = *attr; - if (attr->u.delay.la_interval) { - rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ; - rule->dl_delay_time = jiffies + - prandom_u32_max(attr->u.delay.la_interval) * HZ; - } else { - rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate); - } - - rule->dl_msg_send = -1; - - lnet_net_lock(LNET_LOCK_EX); - atomic_set(&rule->dl_refcount, 1); - list_add(&rule->dl_link, &the_lnet.ln_delay_rules); - lnet_net_unlock(LNET_LOCK_EX); - - CDEBUG(D_NET, "Added delay rule: src %s, dst %s, rate %d\n", - libcfs_nid2str(attr->fa_src), libcfs_nid2str(attr->fa_src), - attr->u.delay.la_rate); - - mutex_unlock(&delay_dd.dd_mutex); - return 0; -failed: - mutex_unlock(&delay_dd.dd_mutex); - kfree(rule); - return rc; -} - -/** - * Remove matched Delay Rules from lnet, if \a shutdown is true or both \a src - * and \a dst are zero, all rules will be removed, otherwise only matched rules - * will be removed. - * If \a src is zero, then all rules have \a dst as destination will be remove - * If \a dst is zero, then all rules have \a src as source will be removed - * - * When a delay rule is removed, all delayed messages of this rule will be - * processed immediately. - */ -int -lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown) -{ - struct lnet_delay_rule *rule; - struct lnet_delay_rule *tmp; - struct list_head rule_list; - struct list_head msg_list; - int n = 0; - bool cleanup; - - INIT_LIST_HEAD(&rule_list); - INIT_LIST_HEAD(&msg_list); - - if (shutdown) { - src = 0; - dst = 0; - } - - mutex_lock(&delay_dd.dd_mutex); - lnet_net_lock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) { - if (rule->dl_attr.fa_src != src && src) - continue; - - if (rule->dl_attr.fa_dst != dst && dst) - continue; - - CDEBUG(D_NET, "Remove delay rule: src %s->dst: %s (1/%d, %d)\n", - libcfs_nid2str(rule->dl_attr.fa_src), - libcfs_nid2str(rule->dl_attr.fa_dst), - rule->dl_attr.u.delay.la_rate, - rule->dl_attr.u.delay.la_interval); - /* refcount is taken over by rule_list */ - list_move(&rule->dl_link, &rule_list); - } - - /* check if we need to shutdown delay_daemon */ - cleanup = list_empty(&the_lnet.ln_delay_rules) && - !list_empty(&rule_list); - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry_safe(rule, tmp, &rule_list, dl_link) { - list_del_init(&rule->dl_link); - - del_timer_sync(&rule->dl_timer); - delayed_msg_check(rule, true, &msg_list); - delay_rule_decref(rule); /* -1 for the_lnet.ln_delay_rules */ - n++; - } - - if (cleanup) { /* no more delay rule, shutdown delay_daemon */ - LASSERT(delay_dd.dd_running); - delay_dd.dd_running = 0; - wake_up(&delay_dd.dd_waitq); - - while (!delay_dd.dd_stopped) - wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_stopped); - } - mutex_unlock(&delay_dd.dd_mutex); - - if (!list_empty(&msg_list)) - delayed_msg_process(&msg_list, shutdown); - - return n; -} - -/** - * List Delay Rule at position of \a pos - */ -int -lnet_delay_rule_list(int pos, struct lnet_fault_attr *attr, - struct lnet_fault_stat *stat) -{ - struct lnet_delay_rule *rule; - int cpt; - int i = 0; - int rc = -ENOENT; - - cpt = lnet_net_lock_current(); - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - if (i++ < pos) - continue; - - spin_lock(&rule->dl_lock); - *attr = rule->dl_attr; - *stat = rule->dl_stat; - spin_unlock(&rule->dl_lock); - rc = 0; - break; - } - - lnet_net_unlock(cpt); - return rc; -} - -/** - * reset counters for all Delay Rules - */ -void -lnet_delay_rule_reset(void) -{ - struct lnet_delay_rule *rule; - int cpt; - - cpt = lnet_net_lock_current(); - - list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) { - struct lnet_fault_attr *attr = &rule->dl_attr; - - spin_lock(&rule->dl_lock); - - memset(&rule->dl_stat, 0, sizeof(rule->dl_stat)); - if (attr->u.delay.la_rate) { - rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate); - } else { - rule->dl_delay_time = - jiffies + prandom_u32_max( - attr->u.delay.la_interval) * HZ; - rule->dl_time_base = jiffies + attr->u.delay.la_interval * HZ; - } - spin_unlock(&rule->dl_lock); - } - - lnet_net_unlock(cpt); -} - -int -lnet_fault_ctl(int opc, struct libcfs_ioctl_data *data) -{ - struct lnet_fault_attr *attr; - struct lnet_fault_stat *stat; - - attr = (struct lnet_fault_attr *)data->ioc_inlbuf1; - - switch (opc) { - default: - return -EINVAL; - - case LNET_CTL_DROP_ADD: - if (!attr) - return -EINVAL; - - return lnet_drop_rule_add(attr); - - case LNET_CTL_DROP_DEL: - if (!attr) - return -EINVAL; - - data->ioc_count = lnet_drop_rule_del(attr->fa_src, - attr->fa_dst); - return 0; - - case LNET_CTL_DROP_RESET: - lnet_drop_rule_reset(); - return 0; - - case LNET_CTL_DROP_LIST: - stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; - if (!attr || !stat) - return -EINVAL; - - return lnet_drop_rule_list(data->ioc_count, attr, stat); - - case LNET_CTL_DELAY_ADD: - if (!attr) - return -EINVAL; - - return lnet_delay_rule_add(attr); - - case LNET_CTL_DELAY_DEL: - if (!attr) - return -EINVAL; - - data->ioc_count = lnet_delay_rule_del(attr->fa_src, - attr->fa_dst, false); - return 0; - - case LNET_CTL_DELAY_RESET: - lnet_delay_rule_reset(); - return 0; - - case LNET_CTL_DELAY_LIST: - stat = (struct lnet_fault_stat *)data->ioc_inlbuf2; - if (!attr || !stat) - return -EINVAL; - - return lnet_delay_rule_list(data->ioc_count, attr, stat); - } -} - -int -lnet_fault_init(void) -{ - BUILD_BUG_ON(LNET_PUT_BIT != 1 << LNET_MSG_PUT); - BUILD_BUG_ON(LNET_ACK_BIT != 1 << LNET_MSG_ACK); - BUILD_BUG_ON(LNET_GET_BIT != 1 << LNET_MSG_GET); - BUILD_BUG_ON(LNET_REPLY_BIT != 1 << LNET_MSG_REPLY); - - mutex_init(&delay_dd.dd_mutex); - spin_lock_init(&delay_dd.dd_lock); - init_waitqueue_head(&delay_dd.dd_waitq); - init_waitqueue_head(&delay_dd.dd_ctl_waitq); - INIT_LIST_HEAD(&delay_dd.dd_sched_rules); - - return 0; -} - -void -lnet_fault_fini(void) -{ - lnet_drop_rule_del(0, 0); - lnet_delay_rule_del(0, 0, true); - - LASSERT(list_empty(&the_lnet.ln_drop_rules)); - LASSERT(list_empty(&the_lnet.ln_delay_rules)); - LASSERT(list_empty(&delay_dd.dd_sched_rules)); -} diff --git a/drivers/staging/lustre/lnet/lnet/nidstrings.c b/drivers/staging/lustre/lnet/lnet/nidstrings.c deleted file mode 100644 index 0f6c3fa16c65..000000000000 --- a/drivers/staging/lustre/lnet/lnet/nidstrings.c +++ /dev/null @@ -1,1261 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/nidstrings.c - * - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include -#include -#include -#include - -/* max value for numeric network address */ -#define MAX_NUMERIC_VALUE 0xffffffff - -#define IPSTRING_LENGTH 16 - -/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids - * consistent in all conversion functions. Some code fragments are copied - * around for the sake of clarity... - */ - -/* CAVEAT EMPTOR! Racey temporary buffer allocation! - * Choose the number of nidstrings to support the MAXIMUM expected number of - * concurrent users. If there are more, the returned string will be volatile. - * NB this number must allow for a process to be descheduled for a timeslice - * between getting its string and using it. - */ - -static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE]; -static int libcfs_nidstring_idx; - -static DEFINE_SPINLOCK(libcfs_nidstring_lock); - -static struct netstrfns *libcfs_namenum2netstrfns(const char *name); - -char * -libcfs_next_nidstring(void) -{ - char *str; - unsigned long flags; - - spin_lock_irqsave(&libcfs_nidstring_lock, flags); - - str = libcfs_nidstrings[libcfs_nidstring_idx++]; - if (libcfs_nidstring_idx == ARRAY_SIZE(libcfs_nidstrings)) - libcfs_nidstring_idx = 0; - - spin_unlock_irqrestore(&libcfs_nidstring_lock, flags); - return str; -} -EXPORT_SYMBOL(libcfs_next_nidstring); - -/** - * Nid range list syntax. - * \verbatim - * - * :== [ ' ' ] - * :== '@' - * :== '*' | - * | - * - * :== ... - * - * :== | - * - * :== '[' [ ',' ] ']' - * :== | - * '-' | - * '-' '/' - * :== | - * :== "lo" | "tcp" | "o2ib" | "cib" | "openib" | "iib" | - * "vib" | "ra" | "elan" | "mx" | "ptl" - * \endverbatim - */ - -/** - * Structure to represent \ token of the syntax. - * - * One of this is created for each \ parsed. - */ -struct nidrange { - /** - * Link to list of this structures which is built on nid range - * list parsing. - */ - struct list_head nr_link; - /** - * List head for addrrange::ar_link. - */ - struct list_head nr_addrranges; - /** - * Flag indicating that *@ is found. - */ - int nr_all; - /** - * Pointer to corresponding element of libcfs_netstrfns. - */ - struct netstrfns *nr_netstrfns; - /** - * Number of network. E.g. 5 if \ is "elan5". - */ - int nr_netnum; -}; - -/** - * Structure to represent \ token of the syntax. - */ -struct addrrange { - /** - * Link to nidrange::nr_addrranges. - */ - struct list_head ar_link; - /** - * List head for cfs_expr_list::el_list. - */ - struct list_head ar_numaddr_ranges; -}; - -/** - * Parses \ token on the syntax. - * - * Allocates struct addrrange and links to \a nidrange via - * (nidrange::nr_addrranges) - * - * \retval 0 if \a src parses to '*' | \ | \ - * \retval -errno otherwise - */ -static int -parse_addrange(const struct cfs_lstr *src, struct nidrange *nidrange) -{ - struct addrrange *addrrange; - - if (src->ls_len == 1 && src->ls_str[0] == '*') { - nidrange->nr_all = 1; - return 0; - } - - addrrange = kzalloc(sizeof(struct addrrange), GFP_NOFS); - if (!addrrange) - return -ENOMEM; - list_add_tail(&addrrange->ar_link, &nidrange->nr_addrranges); - INIT_LIST_HEAD(&addrrange->ar_numaddr_ranges); - - return nidrange->nr_netstrfns->nf_parse_addrlist(src->ls_str, - src->ls_len, - &addrrange->ar_numaddr_ranges); -} - -/** - * Finds or creates struct nidrange. - * - * Checks if \a src is a valid network name, looks for corresponding - * nidrange on the ist of nidranges (\a nidlist), creates new struct - * nidrange if it is not found. - * - * \retval pointer to struct nidrange matching network specified via \a src - * \retval NULL if \a src does not match any network - */ -static struct nidrange * -add_nidrange(const struct cfs_lstr *src, - struct list_head *nidlist) -{ - struct netstrfns *nf; - struct nidrange *nr; - int endlen; - unsigned int netnum; - - if (src->ls_len >= LNET_NIDSTR_SIZE) - return NULL; - - nf = libcfs_namenum2netstrfns(src->ls_str); - if (!nf) - return NULL; - endlen = src->ls_len - strlen(nf->nf_name); - if (!endlen) - /* network name only, e.g. "elan" or "tcp" */ - netnum = 0; - else { - /* - * e.g. "elan25" or "tcp23", refuse to parse if - * network name is not appended with decimal or - * hexadecimal number - */ - if (!cfs_str2num_check(src->ls_str + strlen(nf->nf_name), - endlen, &netnum, 0, MAX_NUMERIC_VALUE)) - return NULL; - } - - list_for_each_entry(nr, nidlist, nr_link) { - if (nr->nr_netstrfns != nf) - continue; - if (nr->nr_netnum != netnum) - continue; - return nr; - } - - nr = kzalloc(sizeof(struct nidrange), GFP_NOFS); - if (!nr) - return NULL; - list_add_tail(&nr->nr_link, nidlist); - INIT_LIST_HEAD(&nr->nr_addrranges); - nr->nr_netstrfns = nf; - nr->nr_all = 0; - nr->nr_netnum = netnum; - - return nr; -} - -/** - * Parses \ token of the syntax. - * - * \retval 1 if \a src parses to \ '@' \ - * \retval 0 otherwise - */ -static int -parse_nidrange(struct cfs_lstr *src, struct list_head *nidlist) -{ - struct cfs_lstr addrrange; - struct cfs_lstr net; - struct nidrange *nr; - - if (!cfs_gettok(src, '@', &addrrange)) - goto failed; - - if (!cfs_gettok(src, '@', &net) || src->ls_str) - goto failed; - - nr = add_nidrange(&net, nidlist); - if (!nr) - goto failed; - - if (parse_addrange(&addrrange, nr)) - goto failed; - - return 1; -failed: - return 0; -} - -/** - * Frees addrrange structures of \a list. - * - * For each struct addrrange structure found on \a list it frees - * cfs_expr_list list attached to it and frees the addrrange itself. - * - * \retval none - */ -static void -free_addrranges(struct list_head *list) -{ - while (!list_empty(list)) { - struct addrrange *ar; - - ar = list_entry(list->next, struct addrrange, ar_link); - - cfs_expr_list_free_list(&ar->ar_numaddr_ranges); - list_del(&ar->ar_link); - kfree(ar); - } -} - -/** - * Frees nidrange strutures of \a list. - * - * For each struct nidrange structure found on \a list it frees - * addrrange list attached to it and frees the nidrange itself. - * - * \retval none - */ -void -cfs_free_nidlist(struct list_head *list) -{ - struct list_head *pos, *next; - struct nidrange *nr; - - list_for_each_safe(pos, next, list) { - nr = list_entry(pos, struct nidrange, nr_link); - free_addrranges(&nr->nr_addrranges); - list_del(pos); - kfree(nr); - } -} -EXPORT_SYMBOL(cfs_free_nidlist); - -/** - * Parses nid range list. - * - * Parses with rigorous syntax and overflow checking \a str into - * \ [ ' ' \ ], compiles \a str into set of - * structures and links that structure to \a nidlist. The resulting - * list can be used to match a NID againts set of NIDS defined by \a - * str. - * \see cfs_match_nid - * - * \retval 1 on success - * \retval 0 otherwise - */ -int -cfs_parse_nidlist(char *str, int len, struct list_head *nidlist) -{ - struct cfs_lstr src; - struct cfs_lstr res; - int rc; - - src.ls_str = str; - src.ls_len = len; - INIT_LIST_HEAD(nidlist); - while (src.ls_str) { - rc = cfs_gettok(&src, ' ', &res); - if (!rc) { - cfs_free_nidlist(nidlist); - return 0; - } - rc = parse_nidrange(&res, nidlist); - if (!rc) { - cfs_free_nidlist(nidlist); - return 0; - } - } - return 1; -} -EXPORT_SYMBOL(cfs_parse_nidlist); - -/** - * Matches a nid (\a nid) against the compiled list of nidranges (\a nidlist). - * - * \see cfs_parse_nidlist() - * - * \retval 1 on match - * \retval 0 otherwises - */ -int cfs_match_nid(lnet_nid_t nid, struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - - list_for_each_entry(nr, nidlist, nr_link) { - if (nr->nr_netstrfns->nf_type != LNET_NETTYP(LNET_NIDNET(nid))) - continue; - if (nr->nr_netnum != LNET_NETNUM(LNET_NIDNET(nid))) - continue; - if (nr->nr_all) - return 1; - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) - if (nr->nr_netstrfns->nf_match_addr(LNET_NIDADDR(nid), - &ar->ar_numaddr_ranges)) - return 1; - } - return 0; -} -EXPORT_SYMBOL(cfs_match_nid); - -/** - * Print the network part of the nidrange \a nr into the specified \a buffer. - * - * \retval number of characters written - */ -static int -cfs_print_network(char *buffer, int count, struct nidrange *nr) -{ - struct netstrfns *nf = nr->nr_netstrfns; - - if (!nr->nr_netnum) - return scnprintf(buffer, count, "@%s", nf->nf_name); - else - return scnprintf(buffer, count, "@%s%u", - nf->nf_name, nr->nr_netnum); -} - -/** - * Print a list of addrrange (\a addrranges) into the specified \a buffer. - * At max \a count characters can be printed into \a buffer. - * - * \retval number of characters written - */ -static int -cfs_print_addrranges(char *buffer, int count, struct list_head *addrranges, - struct nidrange *nr) -{ - int i = 0; - struct addrrange *ar; - struct netstrfns *nf = nr->nr_netstrfns; - - list_for_each_entry(ar, addrranges, ar_link) { - if (i) - i += scnprintf(buffer + i, count - i, " "); - i += nf->nf_print_addrlist(buffer + i, count - i, - &ar->ar_numaddr_ranges); - i += cfs_print_network(buffer + i, count - i, nr); - } - return i; -} - -/** - * Print a list of nidranges (\a nidlist) into the specified \a buffer. - * At max \a count characters can be printed into \a buffer. - * Nidranges are separated by a space character. - * - * \retval number of characters written - */ -int cfs_print_nidlist(char *buffer, int count, struct list_head *nidlist) -{ - int i = 0; - struct nidrange *nr; - - if (count <= 0) - return 0; - - list_for_each_entry(nr, nidlist, nr_link) { - if (i) - i += scnprintf(buffer + i, count - i, " "); - - if (nr->nr_all) { - LASSERT(list_empty(&nr->nr_addrranges)); - i += scnprintf(buffer + i, count - i, "*"); - i += cfs_print_network(buffer + i, count - i, nr); - } else { - i += cfs_print_addrranges(buffer + i, count - i, - &nr->nr_addrranges, nr); - } - } - return i; -} -EXPORT_SYMBOL(cfs_print_nidlist); - -/** - * Determines minimum and maximum addresses for a single - * numeric address range - * - * \param ar - * \param min_nid - * \param max_nid - */ -static void cfs_ip_ar_min_max(struct addrrange *ar, __u32 *min_nid, - __u32 *max_nid) -{ - struct cfs_expr_list *el; - struct cfs_range_expr *re; - __u32 tmp_ip_addr = 0; - unsigned int min_ip[4] = {0}; - unsigned int max_ip[4] = {0}; - int re_count = 0; - - list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) { - list_for_each_entry(re, &el->el_exprs, re_link) { - min_ip[re_count] = re->re_lo; - max_ip[re_count] = re->re_hi; - re_count++; - } - } - - tmp_ip_addr = ((min_ip[0] << 24) | (min_ip[1] << 16) | - (min_ip[2] << 8) | min_ip[3]); - - if (min_nid) - *min_nid = tmp_ip_addr; - - tmp_ip_addr = ((max_ip[0] << 24) | (max_ip[1] << 16) | - (max_ip[2] << 8) | max_ip[3]); - - if (max_nid) - *max_nid = tmp_ip_addr; -} - -/** - * Determines minimum and maximum addresses for a single - * numeric address range - * - * \param ar - * \param min_nid - * \param max_nid - */ -static void cfs_num_ar_min_max(struct addrrange *ar, __u32 *min_nid, - __u32 *max_nid) -{ - struct cfs_expr_list *el; - struct cfs_range_expr *re; - unsigned int min_addr = 0; - unsigned int max_addr = 0; - - list_for_each_entry(el, &ar->ar_numaddr_ranges, el_link) { - list_for_each_entry(re, &el->el_exprs, re_link) { - if (re->re_lo < min_addr || !min_addr) - min_addr = re->re_lo; - if (re->re_hi > max_addr) - max_addr = re->re_hi; - } - } - - if (min_nid) - *min_nid = min_addr; - if (max_nid) - *max_nid = max_addr; -} - -/** - * Determines whether an expression list in an nidrange contains exactly - * one contiguous address range. Calls the correct netstrfns for the LND - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -bool cfs_nidrange_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct netstrfns *nf = NULL; - char *lndname = NULL; - int netnum = -1; - - list_for_each_entry(nr, nidlist, nr_link) { - nf = nr->nr_netstrfns; - if (!lndname) - lndname = nf->nf_name; - if (netnum == -1) - netnum = nr->nr_netnum; - - if (strcmp(lndname, nf->nf_name) || - netnum != nr->nr_netnum) - return false; - } - - if (!nf) - return false; - - if (!nf->nf_is_contiguous(nidlist)) - return false; - - return true; -} -EXPORT_SYMBOL(cfs_nidrange_is_contiguous); - -/** - * Determines whether an expression list in an num nidrange contains exactly - * one contiguous address range. - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -static bool cfs_num_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - struct cfs_expr_list *el; - struct cfs_range_expr *re; - int last_hi = 0; - __u32 last_end_nid = 0; - __u32 current_start_nid = 0; - __u32 current_end_nid = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_num_ar_min_max(ar, ¤t_start_nid, - ¤t_end_nid); - if (last_end_nid && - (current_start_nid - last_end_nid != 1)) - return false; - last_end_nid = current_end_nid; - list_for_each_entry(el, &ar->ar_numaddr_ranges, - el_link) { - list_for_each_entry(re, &el->el_exprs, - re_link) { - if (re->re_stride > 1) - return false; - else if (last_hi && - re->re_hi - last_hi != 1) - return false; - last_hi = re->re_hi; - } - } - } - } - - return true; -} - -/** - * Determines whether an expression list in an ip nidrange contains exactly - * one contiguous address range. - * - * \param *nidlist - * - * \retval true if contiguous - * \retval false if not contiguous - */ -static bool cfs_ip_is_contiguous(struct list_head *nidlist) -{ - struct nidrange *nr; - struct addrrange *ar; - struct cfs_expr_list *el; - struct cfs_range_expr *re; - int expr_count; - int last_hi = 255; - int last_diff = 0; - __u32 last_end_nid = 0; - __u32 current_start_nid = 0; - __u32 current_end_nid = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - last_hi = 255; - last_diff = 0; - cfs_ip_ar_min_max(ar, ¤t_start_nid, - ¤t_end_nid); - if (last_end_nid && - (current_start_nid - last_end_nid != 1)) - return false; - last_end_nid = current_end_nid; - list_for_each_entry(el, &ar->ar_numaddr_ranges, - el_link) { - expr_count = 0; - list_for_each_entry(re, &el->el_exprs, - re_link) { - expr_count++; - if (re->re_stride > 1 || - (last_diff > 0 && last_hi != 255) || - (last_diff > 0 && last_hi == 255 && - re->re_lo > 0)) - return false; - last_hi = re->re_hi; - last_diff = re->re_hi - re->re_lo; - } - } - } - } - - return true; -} - -/** - * Takes a linked list of nidrange expressions, determines the minimum - * and maximum nid and creates appropriate nid structures - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -void cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid, - char *max_nid, size_t nidstr_length) -{ - struct nidrange *nr; - struct netstrfns *nf = NULL; - int netnum = -1; - __u32 min_addr; - __u32 max_addr; - char *lndname = NULL; - char min_addr_str[IPSTRING_LENGTH]; - char max_addr_str[IPSTRING_LENGTH]; - - list_for_each_entry(nr, nidlist, nr_link) { - nf = nr->nr_netstrfns; - lndname = nf->nf_name; - if (netnum == -1) - netnum = nr->nr_netnum; - - nf->nf_min_max(nidlist, &min_addr, &max_addr); - } - nf->nf_addr2str(min_addr, min_addr_str, sizeof(min_addr_str)); - nf->nf_addr2str(max_addr, max_addr_str, sizeof(max_addr_str)); - - snprintf(min_nid, nidstr_length, "%s@%s%d", min_addr_str, lndname, - netnum); - snprintf(max_nid, nidstr_length, "%s@%s%d", max_addr_str, lndname, - netnum); -} -EXPORT_SYMBOL(cfs_nidrange_find_min_max); - -/** - * Determines the min and max NID values for num LNDs - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -static void cfs_num_min_max(struct list_head *nidlist, __u32 *min_nid, - __u32 *max_nid) -{ - struct nidrange *nr; - struct addrrange *ar; - unsigned int tmp_min_addr = 0; - unsigned int tmp_max_addr = 0; - unsigned int min_addr = 0; - unsigned int max_addr = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_num_ar_min_max(ar, &tmp_min_addr, - &tmp_max_addr); - if (tmp_min_addr < min_addr || !min_addr) - min_addr = tmp_min_addr; - if (tmp_max_addr > max_addr) - max_addr = tmp_min_addr; - } - } - *max_nid = max_addr; - *min_nid = min_addr; -} - -/** - * Takes an nidlist and determines the minimum and maximum - * ip addresses. - * - * \param *nidlist - * \param *min_nid - * \param *max_nid - */ -static void cfs_ip_min_max(struct list_head *nidlist, __u32 *min_nid, - __u32 *max_nid) -{ - struct nidrange *nr; - struct addrrange *ar; - __u32 tmp_min_ip_addr = 0; - __u32 tmp_max_ip_addr = 0; - __u32 min_ip_addr = 0; - __u32 max_ip_addr = 0; - - list_for_each_entry(nr, nidlist, nr_link) { - list_for_each_entry(ar, &nr->nr_addrranges, ar_link) { - cfs_ip_ar_min_max(ar, &tmp_min_ip_addr, - &tmp_max_ip_addr); - if (tmp_min_ip_addr < min_ip_addr || !min_ip_addr) - min_ip_addr = tmp_min_ip_addr; - if (tmp_max_ip_addr > max_ip_addr) - max_ip_addr = tmp_max_ip_addr; - } - } - - if (min_nid) - *min_nid = min_ip_addr; - if (max_nid) - *max_nid = max_ip_addr; -} - -static int -libcfs_lo_str2addr(const char *str, int nob, __u32 *addr) -{ - *addr = 0; - return 1; -} - -static void -libcfs_ip_addr2str(__u32 addr, char *str, size_t size) -{ - snprintf(str, size, "%u.%u.%u.%u", - (addr >> 24) & 0xff, (addr >> 16) & 0xff, - (addr >> 8) & 0xff, addr & 0xff); -} - -/* - * CAVEAT EMPTOR XscanfX - * I use "%n" at the end of a sscanf format to detect trailing junk. However - * sscanf may return immediately if it sees the terminating '0' in a string, so - * I initialise the %n variable to the expected length. If sscanf sets it; - * fine, if it doesn't, then the scan ended at the end of the string, which is - * fine too :) - */ -static int -libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) -{ - unsigned int a; - unsigned int b; - unsigned int c; - unsigned int d; - int n = nob; /* XscanfX */ - - /* numeric IP? */ - if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && - n == nob && - !(a & ~0xff) && !(b & ~0xff) && - !(c & ~0xff) && !(d & ~0xff)) { - *addr = ((a << 24) | (b << 16) | (c << 8) | d); - return 1; - } - - return 0; -} - -/* Used by lnet/config.c so it can't be static */ -int -cfs_ip_addr_parse(char *str, int len, struct list_head *list) -{ - struct cfs_expr_list *el; - struct cfs_lstr src; - int rc; - int i; - - src.ls_str = str; - src.ls_len = len; - i = 0; - - while (src.ls_str) { - struct cfs_lstr res; - - if (!cfs_gettok(&src, '.', &res)) { - rc = -EINVAL; - goto out; - } - - rc = cfs_expr_list_parse(res.ls_str, res.ls_len, 0, 255, &el); - if (rc) - goto out; - - list_add_tail(&el->el_link, list); - i++; - } - - if (i == 4) - return 0; - - rc = -EINVAL; -out: - cfs_expr_list_free_list(list); - - return rc; -} - -static int -libcfs_ip_addr_range_print(char *buffer, int count, struct list_head *list) -{ - int i = 0, j = 0; - struct cfs_expr_list *el; - - list_for_each_entry(el, list, el_link) { - LASSERT(j++ < 4); - if (i) - i += scnprintf(buffer + i, count - i, "."); - i += cfs_expr_list_print(buffer + i, count - i, el); - } - return i; -} - -/** - * Matches address (\a addr) against address set encoded in \a list. - * - * \retval 1 if \a addr matches - * \retval 0 otherwise - */ -int -cfs_ip_addr_match(__u32 addr, struct list_head *list) -{ - struct cfs_expr_list *el; - int i = 0; - - list_for_each_entry_reverse(el, list, el_link) { - if (!cfs_expr_list_match(addr & 0xff, el)) - return 0; - addr >>= 8; - i++; - } - - return i == 4; -} - -static void -libcfs_decnum_addr2str(__u32 addr, char *str, size_t size) -{ - snprintf(str, size, "%u", addr); -} - -static int -libcfs_num_str2addr(const char *str, int nob, __u32 *addr) -{ - int n; - - n = nob; - if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) - return 1; - - return 0; -} - -/** - * Nf_parse_addrlist method for networks using numeric addresses. - * - * Examples of such networks are gm and elan. - * - * \retval 0 if \a str parsed to numeric address - * \retval errno otherwise - */ -static int -libcfs_num_parse(char *str, int len, struct list_head *list) -{ - struct cfs_expr_list *el; - int rc; - - rc = cfs_expr_list_parse(str, len, 0, MAX_NUMERIC_VALUE, &el); - if (!rc) - list_add_tail(&el->el_link, list); - - return rc; -} - -static int -libcfs_num_addr_range_print(char *buffer, int count, struct list_head *list) -{ - int i = 0, j = 0; - struct cfs_expr_list *el; - - list_for_each_entry(el, list, el_link) { - LASSERT(j++ < 1); - i += cfs_expr_list_print(buffer + i, count - i, el); - } - return i; -} - -/* - * Nf_match_addr method for networks using numeric addresses - * - * \retval 1 on match - * \retval 0 otherwise - */ -static int -libcfs_num_match(__u32 addr, struct list_head *numaddr) -{ - struct cfs_expr_list *el; - - LASSERT(!list_empty(numaddr)); - el = list_entry(numaddr->next, struct cfs_expr_list, el_link); - - return cfs_expr_list_match(addr, el); -} - -static struct netstrfns libcfs_netstrfns[] = { - { .nf_type = LOLND, - .nf_name = "lo", - .nf_modname = "klolnd", - .nf_addr2str = libcfs_decnum_addr2str, - .nf_str2addr = libcfs_lo_str2addr, - .nf_parse_addrlist = libcfs_num_parse, - .nf_print_addrlist = libcfs_num_addr_range_print, - .nf_match_addr = libcfs_num_match, - .nf_is_contiguous = cfs_num_is_contiguous, - .nf_min_max = cfs_num_min_max }, - { .nf_type = SOCKLND, - .nf_name = "tcp", - .nf_modname = "ksocklnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, - { .nf_type = O2IBLND, - .nf_name = "o2ib", - .nf_modname = "ko2iblnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, - { .nf_type = GNILND, - .nf_name = "gni", - .nf_modname = "kgnilnd", - .nf_addr2str = libcfs_decnum_addr2str, - .nf_str2addr = libcfs_num_str2addr, - .nf_parse_addrlist = libcfs_num_parse, - .nf_print_addrlist = libcfs_num_addr_range_print, - .nf_match_addr = libcfs_num_match, - .nf_is_contiguous = cfs_num_is_contiguous, - .nf_min_max = cfs_num_min_max }, - { .nf_type = GNIIPLND, - .nf_name = "gip", - .nf_modname = "kgnilnd", - .nf_addr2str = libcfs_ip_addr2str, - .nf_str2addr = libcfs_ip_str2addr, - .nf_parse_addrlist = cfs_ip_addr_parse, - .nf_print_addrlist = libcfs_ip_addr_range_print, - .nf_match_addr = cfs_ip_addr_match, - .nf_is_contiguous = cfs_ip_is_contiguous, - .nf_min_max = cfs_ip_min_max }, -}; - -static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns); - -static struct netstrfns * -libcfs_lnd2netstrfns(__u32 lnd) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (lnd == libcfs_netstrfns[i].nf_type) - return &libcfs_netstrfns[i]; - - return NULL; -} - -static struct netstrfns * -libcfs_namenum2netstrfns(const char *name) -{ - struct netstrfns *nf; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (!strncmp(name, nf->nf_name, strlen(nf->nf_name))) - return nf; - } - return NULL; -} - -static struct netstrfns * -libcfs_name2netstrfns(const char *name) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (!strcmp(libcfs_netstrfns[i].nf_name, name)) - return &libcfs_netstrfns[i]; - - return NULL; -} - -int -libcfs_isknown_lnd(__u32 lnd) -{ - return !!libcfs_lnd2netstrfns(lnd); -} -EXPORT_SYMBOL(libcfs_isknown_lnd); - -char * -libcfs_lnd2modname(__u32 lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - return nf ? nf->nf_modname : NULL; -} -EXPORT_SYMBOL(libcfs_lnd2modname); - -int -libcfs_str2lnd(const char *str) -{ - struct netstrfns *nf = libcfs_name2netstrfns(str); - - if (nf) - return nf->nf_type; - - return -ENXIO; -} -EXPORT_SYMBOL(libcfs_str2lnd); - -char * -libcfs_lnd2str_r(__u32 lnd, char *buf, size_t buf_size) -{ - struct netstrfns *nf; - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) - snprintf(buf, buf_size, "?%u?", lnd); - else - snprintf(buf, buf_size, "%s", nf->nf_name); - - return buf; -} -EXPORT_SYMBOL(libcfs_lnd2str_r); - -char * -libcfs_net2str_r(__u32 net, char *buf, size_t buf_size) -{ - __u32 nnum = LNET_NETNUM(net); - __u32 lnd = LNET_NETTYP(net); - struct netstrfns *nf; - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) - snprintf(buf, buf_size, "<%u:%u>", lnd, nnum); - else if (!nnum) - snprintf(buf, buf_size, "%s", nf->nf_name); - else - snprintf(buf, buf_size, "%s%u", nf->nf_name, nnum); - - return buf; -} -EXPORT_SYMBOL(libcfs_net2str_r); - -char * -libcfs_nid2str_r(lnet_nid_t nid, char *buf, size_t buf_size) -{ - __u32 addr = LNET_NIDADDR(nid); - __u32 net = LNET_NIDNET(nid); - __u32 nnum = LNET_NETNUM(net); - __u32 lnd = LNET_NETTYP(net); - struct netstrfns *nf; - - if (nid == LNET_NID_ANY) { - strncpy(buf, "", buf_size); - buf[buf_size - 1] = '\0'; - return buf; - } - - nf = libcfs_lnd2netstrfns(lnd); - if (!nf) { - snprintf(buf, buf_size, "%x@<%u:%u>", addr, lnd, nnum); - } else { - size_t addr_len; - - nf->nf_addr2str(addr, buf, buf_size); - addr_len = strlen(buf); - if (!nnum) - snprintf(buf + addr_len, buf_size - addr_len, "@%s", - nf->nf_name); - else - snprintf(buf + addr_len, buf_size - addr_len, "@%s%u", - nf->nf_name, nnum); - } - - return buf; -} -EXPORT_SYMBOL(libcfs_nid2str_r); - -static struct netstrfns * -libcfs_str2net_internal(const char *str, __u32 *net) -{ - struct netstrfns *nf = NULL; - int nob; - unsigned int netnum; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (!strncmp(str, nf->nf_name, strlen(nf->nf_name))) - break; - } - - if (i == libcfs_nnetstrfns) - return NULL; - - nob = strlen(nf->nf_name); - - if (strlen(str) == (unsigned int)nob) { - netnum = 0; - } else { - if (nf->nf_type == LOLND) /* net number not allowed */ - return NULL; - - str += nob; - i = strlen(str); - if (sscanf(str, "%u%n", &netnum, &i) < 1 || - i != (int)strlen(str)) - return NULL; - } - - *net = LNET_MKNET(nf->nf_type, netnum); - return nf; -} - -__u32 -libcfs_str2net(const char *str) -{ - __u32 net; - - if (libcfs_str2net_internal(str, &net)) - return net; - - return LNET_NIDNET(LNET_NID_ANY); -} -EXPORT_SYMBOL(libcfs_str2net); - -lnet_nid_t -libcfs_str2nid(const char *str) -{ - const char *sep = strchr(str, '@'); - struct netstrfns *nf; - __u32 net; - __u32 addr; - - if (sep) { - nf = libcfs_str2net_internal(sep + 1, &net); - if (!nf) - return LNET_NID_ANY; - } else { - sep = str + strlen(str); - net = LNET_MKNET(SOCKLND, 0); - nf = libcfs_lnd2netstrfns(SOCKLND); - LASSERT(nf); - } - - if (!nf->nf_str2addr(str, (int)(sep - str), &addr)) - return LNET_NID_ANY; - - return LNET_MKNID(net, addr); -} -EXPORT_SYMBOL(libcfs_str2nid); - -char * -libcfs_id2str(struct lnet_process_id id) -{ - char *str = libcfs_next_nidstring(); - - if (id.pid == LNET_PID_ANY) { - snprintf(str, LNET_NIDSTR_SIZE, - "LNET_PID_ANY-%s", libcfs_nid2str(id.nid)); - return str; - } - - snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", - id.pid & LNET_PID_USERFLAG ? "U" : "", - id.pid & ~LNET_PID_USERFLAG, libcfs_nid2str(id.nid)); - return str; -} -EXPORT_SYMBOL(libcfs_id2str); - -int -libcfs_str2anynid(lnet_nid_t *nidp, const char *str) -{ - if (!strcmp(str, "*")) { - *nidp = LNET_NID_ANY; - return 1; - } - - *nidp = libcfs_str2nid(str); - return *nidp != LNET_NID_ANY; -} -EXPORT_SYMBOL(libcfs_str2anynid); diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c deleted file mode 100644 index 58294149f7b2..000000000000 --- a/drivers/staging/lustre/lnet/lnet/peer.c +++ /dev/null @@ -1,456 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/lnet/peer.c - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include - -int -lnet_peer_tables_create(void) -{ - struct lnet_peer_table *ptable; - struct list_head *hash; - int i; - int j; - - the_lnet.ln_peer_tables = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*ptable)); - if (!the_lnet.ln_peer_tables) { - CERROR("Failed to allocate cpu-partition peer tables\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - INIT_LIST_HEAD(&ptable->pt_deathrow); - - hash = kvmalloc_cpt(LNET_PEER_HASH_SIZE * sizeof(*hash), - GFP_KERNEL, i); - if (!hash) { - CERROR("Failed to create peer hash table\n"); - lnet_peer_tables_destroy(); - return -ENOMEM; - } - - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - INIT_LIST_HEAD(&hash[j]); - ptable->pt_hash = hash; /* sign of initialization */ - } - - return 0; -} - -void -lnet_peer_tables_destroy(void) -{ - struct lnet_peer_table *ptable; - struct list_head *hash; - int i; - int j; - - if (!the_lnet.ln_peer_tables) - return; - - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - hash = ptable->pt_hash; - if (!hash) /* not initialized */ - break; - - LASSERT(list_empty(&ptable->pt_deathrow)); - - ptable->pt_hash = NULL; - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - LASSERT(list_empty(&hash[j])); - - kvfree(hash); - } - - cfs_percpt_free(the_lnet.ln_peer_tables); - the_lnet.ln_peer_tables = NULL; -} - -static void -lnet_peer_table_cleanup_locked(struct lnet_ni *ni, - struct lnet_peer_table *ptable) -{ - int i; - struct lnet_peer *lp; - struct lnet_peer *tmp; - - for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { - list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni && ni != lp->lp_ni) - continue; - list_del_init(&lp->lp_hashlist); - /* Lose hash table's ref */ - ptable->pt_zombies++; - lnet_peer_decref_locked(lp); - } - } -} - -static void -lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, - int cpt_locked) -{ - int i; - - for (i = 3; ptable->pt_zombies; i++) { - lnet_net_unlock(cpt_locked); - - if (is_power_of_2(i)) { - CDEBUG(D_WARNING, - "Waiting for %d zombies on peer table\n", - ptable->pt_zombies); - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ >> 1); - lnet_net_lock(cpt_locked); - } -} - -static void -lnet_peer_table_del_rtrs_locked(struct lnet_ni *ni, - struct lnet_peer_table *ptable, - int cpt_locked) -{ - struct lnet_peer *lp; - struct lnet_peer *tmp; - lnet_nid_t lp_nid; - int i; - - for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { - list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni != lp->lp_ni) - continue; - - if (!lp->lp_rtr_refcount) - continue; - - lp_nid = lp->lp_nid; - - lnet_net_unlock(cpt_locked); - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); - lnet_net_lock(cpt_locked); - } - } -} - -void -lnet_peer_tables_cleanup(struct lnet_ni *ni) -{ - struct lnet_peer_table *ptable; - struct list_head deathrow; - struct lnet_peer *lp; - struct lnet_peer *temp; - int i; - - INIT_LIST_HEAD(&deathrow); - - LASSERT(the_lnet.ln_shutdown || ni); - /* - * If just deleting the peers for a NI, get rid of any routes these - * peers are gateways for. - */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_del_rtrs_locked(ni, ptable, i); - lnet_net_unlock(i); - } - - /* - * Start the process of moving the applicable peers to - * deathrow. - */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_cleanup_locked(ni, ptable); - lnet_net_unlock(i); - } - - /* Cleanup all entries on deathrow. */ - cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_deathrow_wait_locked(ptable, i); - list_splice_init(&ptable->pt_deathrow, &deathrow); - lnet_net_unlock(i); - } - - list_for_each_entry_safe(lp, temp, &deathrow, lp_hashlist) { - list_del(&lp->lp_hashlist); - kfree(lp); - } -} - -void -lnet_destroy_peer_locked(struct lnet_peer *lp) -{ - struct lnet_peer_table *ptable; - - LASSERT(!lp->lp_refcount); - LASSERT(!lp->lp_rtr_refcount); - LASSERT(list_empty(&lp->lp_txq)); - LASSERT(list_empty(&lp->lp_hashlist)); - LASSERT(!lp->lp_txqnob); - - ptable = the_lnet.ln_peer_tables[lp->lp_cpt]; - LASSERT(ptable->pt_number > 0); - ptable->pt_number--; - - lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt); - lp->lp_ni = NULL; - - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - LASSERT(ptable->pt_zombies > 0); - ptable->pt_zombies--; -} - -struct lnet_peer * -lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) -{ - struct list_head *peers; - struct lnet_peer *lp; - - LASSERT(!the_lnet.ln_shutdown); - - peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; - list_for_each_entry(lp, peers, lp_hashlist) { - if (lp->lp_nid == nid) { - lnet_peer_addref_locked(lp); - return lp; - } - } - - return NULL; -} - -int -lnet_nid2peer_locked(struct lnet_peer **lpp, lnet_nid_t nid, int cpt) -{ - struct lnet_peer_table *ptable; - struct lnet_peer *lp = NULL; - struct lnet_peer *lp2; - int cpt2; - int rc = 0; - - *lpp = NULL; - if (the_lnet.ln_shutdown) /* it's shutting down */ - return -ESHUTDOWN; - - /* cpt can be LNET_LOCK_EX if it's called from router functions */ - cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid); - - ptable = the_lnet.ln_peer_tables[cpt2]; - lp = lnet_find_peer_locked(ptable, nid); - if (lp) { - *lpp = lp; - return 0; - } - - if (!list_empty(&ptable->pt_deathrow)) { - lp = list_entry(ptable->pt_deathrow.next, - struct lnet_peer, lp_hashlist); - list_del(&lp->lp_hashlist); - } - - /* - * take extra refcount in case another thread has shutdown LNet - * and destroyed locks and peer-table before I finish the allocation - */ - ptable->pt_number++; - lnet_net_unlock(cpt); - - if (lp) - memset(lp, 0, sizeof(*lp)); - else - lp = kzalloc_cpt(sizeof(*lp), GFP_NOFS, cpt2); - - if (!lp) { - rc = -ENOMEM; - lnet_net_lock(cpt); - goto out; - } - - INIT_LIST_HEAD(&lp->lp_txq); - INIT_LIST_HEAD(&lp->lp_rtrq); - INIT_LIST_HEAD(&lp->lp_routes); - - lp->lp_notify = 0; - lp->lp_notifylnd = 0; - lp->lp_notifying = 0; - lp->lp_alive_count = 0; - lp->lp_timestamp = 0; - lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */ - lp->lp_last_alive = jiffies; /* assumes alive */ - lp->lp_last_query = 0; /* haven't asked NI yet */ - lp->lp_ping_timestamp = 0; - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; - lp->lp_nid = nid; - lp->lp_cpt = cpt2; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ - lp->lp_rtr_refcount = 0; - - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - rc = -ESHUTDOWN; - goto out; - } - - lp2 = lnet_find_peer_locked(ptable, nid); - if (lp2) { - *lpp = lp2; - goto out; - } - - lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2); - if (!lp->lp_ni) { - rc = -EHOSTUNREACH; - goto out; - } - - lp->lp_txcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_rtrcredits = lnet_peer_buffer_credits(lp->lp_ni); - lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); - - list_add_tail(&lp->lp_hashlist, - &ptable->pt_hash[lnet_nid2peerhash(nid)]); - ptable->pt_version++; - *lpp = lp; - - return 0; -out: - if (lp) - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - ptable->pt_number--; - return rc; -} - -void -lnet_debug_peer(lnet_nid_t nid) -{ - char *aliveness = "NA"; - struct lnet_peer *lp; - int rc; - int cpt; - - cpt = lnet_cpt_of_nid(nid); - lnet_net_lock(cpt); - - rc = lnet_nid2peer_locked(&lp, nid, cpt); - if (rc) { - lnet_net_unlock(cpt); - CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); - return; - } - - if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) - aliveness = lp->lp_alive ? "up" : "down"; - - CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", - libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - aliveness, lp->lp_ni->ni_peertxcredits, - lp->lp_rtrcredits, lp->lp_minrtrcredits, - lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); - - lnet_peer_decref_locked(lp); - - lnet_net_unlock(cpt); -} - -int -lnet_get_peer_info(__u32 peer_index, __u64 *nid, - char aliveness[LNET_MAX_STR_LEN], - __u32 *cpt_iter, __u32 *refcount, - __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, - __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, - __u32 *peer_tx_qnob) -{ - struct lnet_peer_table *peer_table; - struct lnet_peer *lp; - bool found = false; - int lncpt, j; - - /* get the number of CPTs */ - lncpt = cfs_percpt_number(the_lnet.ln_peer_tables); - - /* - * if the cpt number to be examined is >= the number of cpts in - * the system then indicate that there are no more cpts to examin - */ - if (*cpt_iter >= lncpt) - return -ENOENT; - - /* get the current table */ - peer_table = the_lnet.ln_peer_tables[*cpt_iter]; - /* if the ptable is NULL then there are no more cpts to examine */ - if (!peer_table) - return -ENOENT; - - lnet_net_lock(*cpt_iter); - - for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) { - struct list_head *peers = &peer_table->pt_hash[j]; - - list_for_each_entry(lp, peers, lp_hashlist) { - if (peer_index-- > 0) - continue; - - snprintf(aliveness, LNET_MAX_STR_LEN, "NA"); - if (lnet_isrouter(lp) || - lnet_peer_aliveness_enabled(lp)) - snprintf(aliveness, LNET_MAX_STR_LEN, - lp->lp_alive ? "up" : "down"); - - *nid = lp->lp_nid; - *refcount = lp->lp_refcount; - *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits; - *peer_tx_credits = lp->lp_txcredits; - *peer_rtr_credits = lp->lp_rtrcredits; - *peer_min_rtr_credits = lp->lp_mintxcredits; - *peer_tx_qnob = lp->lp_txqnob; - - found = true; - } - } - lnet_net_unlock(*cpt_iter); - - *cpt_iter = lncpt; - - return found ? 0 : -ENOENT; -} diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c deleted file mode 100644 index 6267d5e4bbd6..000000000000 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ /dev/null @@ -1,1799 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2015, Intel Corporation. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include -#include - -#define LNET_NRB_TINY_MIN 512 /* min value for each CPT */ -#define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4) -#define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */ -#define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4) -#define LNET_NRB_SMALL_PAGES 1 -#define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */ -#define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4) -#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \ - PAGE_SHIFT) - -static char *forwarding = ""; -module_param(forwarding, charp, 0444); -MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks"); - -static int tiny_router_buffers; -module_param(tiny_router_buffers, int, 0444); -MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router"); -static int small_router_buffers; -module_param(small_router_buffers, int, 0444); -MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers; -module_param(large_router_buffers, int, 0444); -MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router"); -static int peer_buffer_credits; -module_param(peer_buffer_credits, int, 0444); -MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer"); - -static int auto_down = 1; -module_param(auto_down, int, 0444); -MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error"); - -int -lnet_peer_buffer_credits(struct lnet_ni *ni) -{ - /* NI option overrides LNet default */ - if (ni->ni_peerrtrcredits > 0) - return ni->ni_peerrtrcredits; - if (peer_buffer_credits > 0) - return peer_buffer_credits; - - /* - * As an approximation, allow this peer the same number of router - * buffers as it is allowed outstanding sends - */ - return ni->ni_peertxcredits; -} - -/* forward ref's */ -static int lnet_router_checker(void *); - -static int check_routers_before_use; -module_param(check_routers_before_use, int, 0444); -MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use"); - -int avoid_asym_router_failure = 1; -module_param(avoid_asym_router_failure, int, 0644); -MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); - -static int dead_router_check_interval = 60; -module_param(dead_router_check_interval, int, 0644); -MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)"); - -static int live_router_check_interval = 60; -module_param(live_router_check_interval, int, 0644); -MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)"); - -static int router_ping_timeout = 50; -module_param(router_ping_timeout, int, 0644); -MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query"); - -int -lnet_peers_start_down(void) -{ - return check_routers_before_use; -} - -void -lnet_notify_locked(struct lnet_peer *lp, int notifylnd, int alive, - unsigned long when) -{ - if (time_before(when, lp->lp_timestamp)) { /* out of date information */ - CDEBUG(D_NET, "Out of date\n"); - return; - } - - lp->lp_timestamp = when; /* update timestamp */ - lp->lp_ping_deadline = 0; /* disable ping timeout */ - - if (lp->lp_alive_count && /* got old news */ - (!lp->lp_alive) == (!alive)) { /* new date for old news */ - CDEBUG(D_NET, "Old news\n"); - return; - } - - /* Flag that notification is outstanding */ - - lp->lp_alive_count++; - lp->lp_alive = !(!alive); /* 1 bit! */ - lp->lp_notify = 1; - lp->lp_notifylnd |= notifylnd; - if (lp->lp_alive) - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */ - - CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); -} - -static void -lnet_ni_notify_locked(struct lnet_ni *ni, struct lnet_peer *lp) -{ - int alive; - int notifylnd; - - /* - * Notify only in 1 thread at any time to ensure ordered notification. - * NB individual events can be missed; the only guarantee is that you - * always get the most recent news - */ - if (lp->lp_notifying || !ni) - return; - - lp->lp_notifying = 1; - - while (lp->lp_notify) { - alive = lp->lp_alive; - notifylnd = lp->lp_notifylnd; - - lp->lp_notifylnd = 0; - lp->lp_notify = 0; - - if (notifylnd && ni->ni_lnd->lnd_notify) { - lnet_net_unlock(lp->lp_cpt); - - /* - * A new notification could happen now; I'll handle it - * when control returns to me - */ - ni->ni_lnd->lnd_notify(ni, lp->lp_nid, alive); - - lnet_net_lock(lp->lp_cpt); - } - } - - lp->lp_notifying = 0; -} - -static void -lnet_rtr_addref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount >= 0); - - /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount++; - if (lp->lp_rtr_refcount == 1) { - struct list_head *pos; - - /* a simple insertion sort */ - list_for_each_prev(pos, &the_lnet.ln_routers) { - struct lnet_peer *rtr; - - rtr = list_entry(pos, struct lnet_peer, lp_rtr_list); - if (rtr->lp_nid < lp->lp_nid) - break; - } - - list_add(&lp->lp_rtr_list, pos); - /* addref for the_lnet.ln_routers */ - lnet_peer_addref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -static void -lnet_rtr_decref_locked(struct lnet_peer *lp) -{ - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount > 0); - - /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount--; - if (!lp->lp_rtr_refcount) { - LASSERT(list_empty(&lp->lp_routes)); - - if (lp->lp_rcd) { - list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; - } - - list_del(&lp->lp_rtr_list); - /* decref for the_lnet.ln_routers */ - lnet_peer_decref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -struct lnet_remotenet * -lnet_find_net_locked(__u32 net) -{ - struct lnet_remotenet *rnet; - struct list_head *rn_list; - - LASSERT(!the_lnet.ln_shutdown); - - rn_list = lnet_net2rnethash(net); - list_for_each_entry(rnet, rn_list, lrn_list) { - if (rnet->lrn_net == net) - return rnet; - } - return NULL; -} - -static void lnet_shuffle_seed(void) -{ - static int seeded; - struct lnet_ni *ni; - - if (seeded) - return; - - /* - * Nodes with small feet have little entropy - * the NID for this node gives the most entropy in the low bits - */ - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - __u32 lnd_type, seed; - - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - if (lnd_type != LOLND) { - seed = (LNET_NIDADDR(ni->ni_nid) | lnd_type); - add_device_randomness(&seed, sizeof(seed)); - } - } - - seeded = 1; -} - -/* NB expects LNET_LOCK held */ -static void -lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) -{ - unsigned int len = 0; - unsigned int offset = 0; - struct list_head *e; - - lnet_shuffle_seed(); - - list_for_each(e, &rnet->lrn_routes) { - len++; - } - - /* len+1 positions to add a new entry */ - offset = prandom_u32_max(len + 1); - list_for_each(e, &rnet->lrn_routes) { - if (!offset) - break; - offset--; - } - list_add(&route->lr_list, e); - list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes); - - the_lnet.ln_remote_nets_version++; - lnet_rtr_addref_locked(route->lr_gateway); -} - -int -lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, - unsigned int priority) -{ - struct list_head *e; - struct lnet_remotenet *rnet; - struct lnet_remotenet *rnet2; - struct lnet_route *route; - struct lnet_ni *ni; - int add_route; - int rc; - - CDEBUG(D_NET, "Add route: net %s hops %d priority %u gw %s\n", - libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway)); - - if (gateway == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND || - net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND || - LNET_NIDNET(gateway) == net || - (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255))) - return -EINVAL; - - if (lnet_islocalnet(net)) /* it's a local network */ - return -EEXIST; - - /* Assume net, route, all new */ - route = kzalloc(sizeof(*route), GFP_NOFS); - rnet = kzalloc(sizeof(*rnet), GFP_NOFS); - if (!route || !rnet) { - CERROR("Out of memory creating route %s %d %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - kfree(route); - kfree(rnet); - return -ENOMEM; - } - - INIT_LIST_HEAD(&rnet->lrn_routes); - rnet->lrn_net = net; - route->lr_hops = hops; - route->lr_net = net; - route->lr_priority = priority; - - lnet_net_lock(LNET_LOCK_EX); - - rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX); - if (rc) { - lnet_net_unlock(LNET_LOCK_EX); - - kfree(route); - kfree(rnet); - - if (rc == -EHOSTUNREACH) /* gateway is not on a local net */ - return rc; /* ignore the route entry */ - CERROR("Error %d creating route %s %d %s\n", rc, - libcfs_net2str(net), hops, - libcfs_nid2str(gateway)); - return rc; - } - - LASSERT(!the_lnet.ln_shutdown); - - rnet2 = lnet_find_net_locked(net); - if (!rnet2) { - /* new network */ - list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net)); - rnet2 = rnet; - } - - /* Search for a duplicate route (it's a NOOP if it is) */ - add_route = 1; - list_for_each(e, &rnet2->lrn_routes) { - struct lnet_route *route2; - - route2 = list_entry(e, struct lnet_route, lr_list); - if (route2->lr_gateway == route->lr_gateway) { - add_route = 0; - break; - } - - /* our lookups must be true */ - LASSERT(route2->lr_gateway->lp_nid != gateway); - } - - if (add_route) { - lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */ - lnet_add_route_to_rnet(rnet2, route); - - ni = route->lr_gateway->lp_ni; - lnet_net_unlock(LNET_LOCK_EX); - - /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify) - ni->ni_lnd->lnd_notify(ni, gateway, 1); - - lnet_net_lock(LNET_LOCK_EX); - } - - /* -1 for notify or !add_route */ - lnet_peer_decref_locked(route->lr_gateway); - lnet_net_unlock(LNET_LOCK_EX); - rc = 0; - - if (!add_route) { - rc = -EEXIST; - kfree(route); - } - - if (rnet != rnet2) - kfree(rnet); - - /* indicate to startup the router checker if configured */ - wake_up(&the_lnet.ln_rc_waitq); - - return rc; -} - -int -lnet_check_routes(void) -{ - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct lnet_route *route2; - struct list_head *e1; - struct list_head *e2; - int cpt; - struct list_head *rn_list; - int i; - - cpt = lnet_net_lock_current(); - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - route2 = NULL; - list_for_each(e2, &rnet->lrn_routes) { - lnet_nid_t nid1; - lnet_nid_t nid2; - int net; - - route = list_entry(e2, struct lnet_route, lr_list); - - if (!route2) { - route2 = route; - continue; - } - - if (route->lr_gateway->lp_ni == - route2->lr_gateway->lp_ni) - continue; - - nid1 = route->lr_gateway->lp_nid; - nid2 = route2->lr_gateway->lp_nid; - net = rnet->lrn_net; - - lnet_net_unlock(cpt); - - CERROR("Routes to %s via %s and %s not supported\n", - libcfs_net2str(net), - libcfs_nid2str(nid1), - libcfs_nid2str(nid2)); - return -EINVAL; - } - } - } - - lnet_net_unlock(cpt); - return 0; -} - -int -lnet_del_route(__u32 net, lnet_nid_t gw_nid) -{ - struct lnet_peer *gateway; - struct lnet_remotenet *rnet; - struct lnet_route *route; - struct list_head *e1; - struct list_head *e2; - int rc = -ENOENT; - struct list_head *rn_list; - int idx = 0; - - CDEBUG(D_NET, "Del route: net %s : gw %s\n", - libcfs_net2str(net), libcfs_nid2str(gw_nid)); - - /* - * NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) - */ - lnet_net_lock(LNET_LOCK_EX); - if (net == LNET_NIDNET(LNET_NID_ANY)) - rn_list = &the_lnet.ln_remote_nets_hash[0]; - else - rn_list = lnet_net2rnethash(net); - - again: - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) - continue; - - list_for_each(e2, &rnet->lrn_routes) { - route = list_entry(e2, struct lnet_route, lr_list); - - gateway = route->lr_gateway; - if (!(gw_nid == LNET_NID_ANY || - gw_nid == gateway->lp_nid)) - continue; - - list_del(&route->lr_list); - list_del(&route->lr_gwlist); - the_lnet.ln_remote_nets_version++; - - if (list_empty(&rnet->lrn_routes)) - list_del(&rnet->lrn_list); - else - rnet = NULL; - - lnet_rtr_decref_locked(gateway); - lnet_peer_decref_locked(gateway); - - lnet_net_unlock(LNET_LOCK_EX); - - kfree(route); - kfree(rnet); - - rc = 0; - lnet_net_lock(LNET_LOCK_EX); - goto again; - } - } - - if (net == LNET_NIDNET(LNET_NID_ANY) && - ++idx < LNET_REMOTE_NETS_HASH_SIZE) { - rn_list = &the_lnet.ln_remote_nets_hash[idx]; - goto again; - } - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -void -lnet_destroy_routes(void) -{ - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY); -} - -int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg) -{ - int i, rc = -ENOENT, j; - - if (!the_lnet.ln_rtrpools) - return rc; - - for (i = 0; i < LNET_NRBPOOLS; i++) { - struct lnet_rtrbufpool *rbp; - - lnet_net_lock(LNET_LOCK_EX); - cfs_percpt_for_each(rbp, j, the_lnet.ln_rtrpools) { - if (i++ != idx) - continue; - - pool_cfg->pl_pools[i].pl_npages = rbp[i].rbp_npages; - pool_cfg->pl_pools[i].pl_nbuffers = rbp[i].rbp_nbuffers; - pool_cfg->pl_pools[i].pl_credits = rbp[i].rbp_credits; - pool_cfg->pl_pools[i].pl_mincredits = rbp[i].rbp_mincredits; - rc = 0; - break; - } - lnet_net_unlock(LNET_LOCK_EX); - } - - lnet_net_lock(LNET_LOCK_EX); - pool_cfg->pl_routing = the_lnet.ln_routing; - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -int -lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive, __u32 *priority) -{ - struct list_head *e1; - struct list_head *e2; - struct lnet_remotenet *rnet; - struct lnet_route *route; - int cpt; - int i; - struct list_head *rn_list; - - cpt = lnet_net_lock_current(); - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - list_for_each(e1, rn_list) { - rnet = list_entry(e1, struct lnet_remotenet, lrn_list); - - list_for_each(e2, &rnet->lrn_routes) { - route = list_entry(e2, struct lnet_route, - lr_list); - - if (!idx--) { - *net = rnet->lrn_net; - *hops = route->lr_hops; - *priority = route->lr_priority; - *gateway = route->lr_gateway->lp_nid; - *alive = lnet_is_route_alive(route); - lnet_net_unlock(cpt); - return 0; - } - } - } - } - - lnet_net_unlock(cpt); - return -ENOENT; -} - -void -lnet_swap_pinginfo(struct lnet_ping_info *info) -{ - int i; - struct lnet_ni_status *stat; - - __swab32s(&info->pi_magic); - __swab32s(&info->pi_features); - __swab32s(&info->pi_pid); - __swab32s(&info->pi_nnis); - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - stat = &info->pi_ni[i]; - __swab64s(&stat->ns_nid); - __swab32s(&stat->ns_status); - } -} - -/** - * parse router-checker pinginfo, record number of down NIs for remote - * networks on that router. - */ -static void -lnet_parse_rc_info(struct lnet_rc_data *rcd) -{ - struct lnet_ping_info *info = rcd->rcd_pinginfo; - struct lnet_peer *gw = rcd->rcd_gateway; - struct lnet_route *rte; - - if (!gw->lp_alive) - return; - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) - lnet_swap_pinginfo(info); - - /* NB always racing with network! */ - if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CDEBUG(D_NET, "%s: Unexpected magic %08x\n", - libcfs_nid2str(gw->lp_nid), info->pi_magic); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - gw->lp_ping_feats = info->pi_features; - if (!(gw->lp_ping_feats & LNET_PING_FEAT_MASK)) { - CDEBUG(D_NET, "%s: Unexpected features 0x%x\n", - libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats); - return; /* nothing I can understand */ - } - - if (!(gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) - return; /* can't carry NI status info */ - - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { - int down = 0; - int up = 0; - int i; - - if (gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) { - rte->lr_downis = 1; - continue; - } - - for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) { - struct lnet_ni_status *stat = &info->pi_ni[i]; - lnet_nid_t nid = stat->ns_nid; - - if (nid == LNET_NID_ANY) { - CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n", - libcfs_nid2str(gw->lp_nid)); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - continue; - - if (stat->ns_status == LNET_NI_STATUS_DOWN) { - down++; - continue; - } - - if (stat->ns_status == LNET_NI_STATUS_UP) { - if (LNET_NIDNET(nid) == rte->lr_net) { - up = 1; - break; - } - continue; - } - - CDEBUG(D_NET, "%s: Unexpected status 0x%x\n", - libcfs_nid2str(gw->lp_nid), stat->ns_status); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; - return; - } - - if (up) { /* ignore downed NIs if NI for dest network is up */ - rte->lr_downis = 0; - continue; - } - /** - * if @down is zero and this route is single-hop, it means - * we can't find NI for target network - */ - if (!down && rte->lr_hops == 1) - down = 1; - - rte->lr_downis = down; - } -} - -static void -lnet_router_checker_event(struct lnet_event *event) -{ - struct lnet_rc_data *rcd = event->md.user_ptr; - struct lnet_peer *lp; - - LASSERT(rcd); - - if (event->unlinked) { - LNetInvalidateMDHandle(&rcd->rcd_mdh); - return; - } - - LASSERT(event->type == LNET_EVENT_SEND || - event->type == LNET_EVENT_REPLY); - - lp = rcd->rcd_gateway; - LASSERT(lp); - - /* - * NB: it's called with holding lnet_res_lock, we have a few - * places need to hold both locks at the same time, please take - * care of lock ordering - */ - lnet_net_lock(lp->lp_cpt); - if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) { - /* ignore if no longer a router or rcd is replaced */ - goto out; - } - - if (event->type == LNET_EVENT_SEND) { - lp->lp_ping_notsent = 0; - if (!event->status) - goto out; - } - - /* LNET_EVENT_REPLY */ - /* - * A successful REPLY means the router is up. If _any_ comms - * to the router fail I assume it's down (this will happen if - * we ping alive routers to try to detect router death before - * apps get burned). - */ - lnet_notify_locked(lp, 1, !event->status, jiffies); - - /* - * The router checker will wake up very shortly and do the - * actual notification. - * XXX If 'lp' stops being a router before then, it will still - * have the notification pending!!! - */ - if (avoid_asym_router_failure && !event->status) - lnet_parse_rc_info(rcd); - - out: - lnet_net_unlock(lp->lp_cpt); -} - -static void -lnet_wait_known_routerstate(void) -{ - struct lnet_peer *rtr; - struct list_head *entry; - int all_known; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - - for (;;) { - int cpt = lnet_net_lock_current(); - - all_known = 1; - list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, struct lnet_peer, lp_rtr_list); - - if (!rtr->lp_alive_count) { - all_known = 0; - break; - } - } - - lnet_net_unlock(cpt); - - if (all_known) - return; - - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - } -} - -void -lnet_router_ni_update_locked(struct lnet_peer *gw, __u32 net) -{ - struct lnet_route *rte; - - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS)) { - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { - if (rte->lr_net == net) { - rte->lr_downis = 0; - break; - } - } - } -} - -static void -lnet_update_ni_status_locked(void) -{ - struct lnet_ni *ni; - time64_t now; - int timeout; - - LASSERT(the_lnet.ln_routing); - - timeout = router_ping_timeout + - max(live_router_check_interval, dead_router_check_interval); - - now = ktime_get_real_seconds(); - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - if (ni->ni_lnd->lnd_type == LOLND) - continue; - - if (now < ni->ni_last_alive + timeout) - continue; - - lnet_ni_lock(ni); - /* re-check with lock */ - if (now < ni->ni_last_alive + timeout) { - lnet_ni_unlock(ni); - continue; - } - - LASSERT(ni->ni_status); - - if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) { - CDEBUG(D_NET, "NI(%s:%d) status changed to down\n", - libcfs_nid2str(ni->ni_nid), timeout); - /* - * NB: so far, this is the only place to set - * NI status to "down" - */ - ni->ni_status->ns_status = LNET_NI_STATUS_DOWN; - } - lnet_ni_unlock(ni); - } -} - -static void -lnet_destroy_rc_data(struct lnet_rc_data *rcd) -{ - LASSERT(list_empty(&rcd->rcd_list)); - /* detached from network */ - LASSERT(LNetMDHandleIsInvalid(rcd->rcd_mdh)); - - if (rcd->rcd_gateway) { - int cpt = rcd->rcd_gateway->lp_cpt; - - lnet_net_lock(cpt); - lnet_peer_decref_locked(rcd->rcd_gateway); - lnet_net_unlock(cpt); - } - - kfree(rcd->rcd_pinginfo); - - kfree(rcd); -} - -static struct lnet_rc_data * -lnet_create_rc_data_locked(struct lnet_peer *gateway) -{ - struct lnet_rc_data *rcd = NULL; - struct lnet_ping_info *pi; - struct lnet_md md; - int rc; - int i; - - lnet_net_unlock(gateway->lp_cpt); - - rcd = kzalloc(sizeof(*rcd), GFP_NOFS); - if (!rcd) - goto out; - - LNetInvalidateMDHandle(&rcd->rcd_mdh); - INIT_LIST_HEAD(&rcd->rcd_list); - - pi = kzalloc(LNET_PINGINFO_SIZE, GFP_NOFS); - if (!pi) - goto out; - - for (i = 0; i < LNET_MAX_RTR_NIS; i++) { - pi->pi_ni[i].ns_nid = LNET_NID_ANY; - pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID; - } - rcd->rcd_pinginfo = pi; - - md.start = pi; - md.user_ptr = rcd; - md.length = LNET_PINGINFO_SIZE; - md.threshold = LNET_MD_THRESH_INF; - md.options = LNET_MD_TRUNCATE; - md.eq_handle = the_lnet.ln_rc_eqh; - - LASSERT(!LNetEQHandleIsInvalid(the_lnet.ln_rc_eqh)); - rc = LNetMDBind(md, LNET_UNLINK, &rcd->rcd_mdh); - if (rc < 0) { - CERROR("Can't bind MD: %d\n", rc); - goto out; - } - LASSERT(!rc); - - lnet_net_lock(gateway->lp_cpt); - /* router table changed or someone has created rcd for this gateway */ - if (!lnet_isrouter(gateway) || gateway->lp_rcd) { - lnet_net_unlock(gateway->lp_cpt); - goto out; - } - - lnet_peer_addref_locked(gateway); - rcd->rcd_gateway = gateway; - gateway->lp_rcd = rcd; - gateway->lp_ping_notsent = 0; - - return rcd; - - out: - if (rcd) { - if (!LNetMDHandleIsInvalid(rcd->rcd_mdh)) { - rc = LNetMDUnlink(rcd->rcd_mdh); - LASSERT(!rc); - } - lnet_destroy_rc_data(rcd); - } - - lnet_net_lock(gateway->lp_cpt); - return gateway->lp_rcd; -} - -static int -lnet_router_check_interval(struct lnet_peer *rtr) -{ - int secs; - - secs = rtr->lp_alive ? live_router_check_interval : - dead_router_check_interval; - if (secs < 0) - secs = 0; - - return secs; -} - -static void -lnet_ping_router_locked(struct lnet_peer *rtr) -{ - struct lnet_rc_data *rcd = NULL; - unsigned long now = jiffies; - int secs; - - lnet_peer_addref_locked(rtr); - - if (rtr->lp_ping_deadline && /* ping timed out? */ - time_after(now, rtr->lp_ping_deadline)) - lnet_notify_locked(rtr, 1, 0, now); - - /* Run any outstanding notifications */ - lnet_ni_notify_locked(rtr->lp_ni, rtr); - - if (!lnet_isrouter(rtr) || - the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { - /* router table changed or router checker is shutting down */ - lnet_peer_decref_locked(rtr); - return; - } - - rcd = rtr->lp_rcd ? - rtr->lp_rcd : lnet_create_rc_data_locked(rtr); - - if (!rcd) - return; - - secs = lnet_router_check_interval(rtr); - - CDEBUG(D_NET, - "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n", - libcfs_nid2str(rtr->lp_nid), secs, - rtr->lp_ping_deadline, rtr->lp_ping_notsent, - rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp); - - if (secs && !rtr->lp_ping_notsent && - time_after(now, rtr->lp_ping_timestamp + secs * HZ)) { - int rc; - struct lnet_process_id id; - struct lnet_handle_md mdh; - - id.nid = rtr->lp_nid; - id.pid = LNET_PID_LUSTRE; - CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id)); - - rtr->lp_ping_notsent = 1; - rtr->lp_ping_timestamp = now; - - mdh = rcd->rcd_mdh; - - if (!rtr->lp_ping_deadline) { - rtr->lp_ping_deadline = - jiffies + router_ping_timeout * HZ; - } - - lnet_net_unlock(rtr->lp_cpt); - - rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - lnet_net_lock(rtr->lp_cpt); - if (rc) - rtr->lp_ping_notsent = 0; /* no event pending */ - } - - lnet_peer_decref_locked(rtr); -} - -int -lnet_router_checker_start(void) -{ - struct task_struct *task; - int rc; - int eqsz = 0; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (check_routers_before_use && - dead_router_check_interval <= 0) { - LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n"); - return -EINVAL; - } - - init_completion(&the_lnet.ln_rc_signal); - - rc = LNetEQAlloc(0, lnet_router_checker_event, &the_lnet.ln_rc_eqh); - if (rc) { - CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc); - return -ENOMEM; - } - - the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; - task = kthread_run(lnet_router_checker, NULL, "router_checker"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("Can't start router checker thread: %d\n", rc); - /* block until event callback signals exit */ - wait_for_completion(&the_lnet.ln_rc_signal); - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(!rc); - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - return -ENOMEM; - } - - if (check_routers_before_use) { - /* - * Note that a helpful side-effect of pinging all known routers - * at startup is that it makes them drop stale connections they - * may have to a previous instance of me. - */ - lnet_wait_known_routerstate(); - } - - return 0; -} - -void -lnet_router_checker_stop(void) -{ - int rc; - - if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) - return; - - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING; - /* wakeup the RC thread if it's sleeping */ - wake_up(&the_lnet.ln_rc_waitq); - - /* block until event callback signals exit */ - wait_for_completion(&the_lnet.ln_rc_signal); - LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT(!rc); -} - -static void -lnet_prune_rc_data(int wait_unlink) -{ - struct lnet_rc_data *rcd; - struct lnet_rc_data *tmp; - struct lnet_peer *lp; - struct list_head head; - int i = 2; - - if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING && - list_empty(&the_lnet.ln_rcd_deathrow) && - list_empty(&the_lnet.ln_rcd_zombie))) - return; - - INIT_LIST_HEAD(&head); - - lnet_net_lock(LNET_LOCK_EX); - - if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { - /* router checker is stopping, prune all */ - list_for_each_entry(lp, &the_lnet.ln_routers, - lp_rtr_list) { - if (!lp->lp_rcd) - continue; - - LASSERT(list_empty(&lp->lp_rcd->rcd_list)); - list_add(&lp->lp_rcd->rcd_list, - &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; - } - } - - /* unlink all RCDs on deathrow list */ - list_splice_init(&the_lnet.ln_rcd_deathrow, &head); - - if (!list_empty(&head)) { - lnet_net_unlock(LNET_LOCK_EX); - - list_for_each_entry(rcd, &head, rcd_list) - LNetMDUnlink(rcd->rcd_mdh); - - lnet_net_lock(LNET_LOCK_EX); - } - - list_splice_init(&head, &the_lnet.ln_rcd_zombie); - - /* release all zombie RCDs */ - while (!list_empty(&the_lnet.ln_rcd_zombie)) { - list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie, - rcd_list) { - if (LNetMDHandleIsInvalid(rcd->rcd_mdh)) - list_move(&rcd->rcd_list, &head); - } - - wait_unlink = wait_unlink && - !list_empty(&the_lnet.ln_rcd_zombie); - - lnet_net_unlock(LNET_LOCK_EX); - - while (!list_empty(&head)) { - rcd = list_entry(head.next, - struct lnet_rc_data, rcd_list); - list_del_init(&rcd->rcd_list); - lnet_destroy_rc_data(rcd); - } - - if (!wait_unlink) - return; - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for rc buffers to unlink\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ / 4); - - lnet_net_lock(LNET_LOCK_EX); - } - - lnet_net_unlock(LNET_LOCK_EX); -} - -/* - * This function is called to check if the RC should block indefinitely. - * It's called from lnet_router_checker() as well as being passed to - * wait_event_interruptible() to avoid the lost wake_up problem. - * - * When it's called from wait_event_interruptible() it is necessary to - * also not sleep if the rc state is not running to avoid a deadlock - * when the system is shutting down - */ -static inline bool -lnet_router_checker_active(void) -{ - if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) - return true; - - /* - * Router Checker thread needs to run when routing is enabled in - * order to call lnet_update_ni_status_locked() - */ - if (the_lnet.ln_routing) - return true; - - return !list_empty(&the_lnet.ln_routers) && - (live_router_check_interval > 0 || - dead_router_check_interval > 0); -} - -static int -lnet_router_checker(void *arg) -{ - struct lnet_peer *rtr; - struct list_head *entry; - - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { - __u64 version; - int cpt; - int cpt2; - - cpt = lnet_net_lock_current(); -rescan: - version = the_lnet.ln_routers_version; - - list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, struct lnet_peer, lp_rtr_list); - - cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid); - if (cpt != cpt2) { - lnet_net_unlock(cpt); - cpt = cpt2; - lnet_net_lock(cpt); - /* the routers list has changed */ - if (version != the_lnet.ln_routers_version) - goto rescan; - } - - lnet_ping_router_locked(rtr); - - /* NB dropped lock */ - if (version != the_lnet.ln_routers_version) { - /* the routers list has changed */ - goto rescan; - } - } - - if (the_lnet.ln_routing) - lnet_update_ni_status_locked(); - - lnet_net_unlock(cpt); - - lnet_prune_rc_data(0); /* don't wait for UNLINK */ - - /* - * Call schedule_timeout() here always adds 1 to load average - * because kernel counts # active tasks as nr_running - * + nr_uninterruptible. - */ - /* - * if there are any routes then wakeup every second. If - * there are no routes then sleep indefinitely until woken - * up by a user adding a route - */ - if (!lnet_router_checker_active()) - wait_event_interruptible(the_lnet.ln_rc_waitq, - lnet_router_checker_active()); - else - wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, - false, - HZ); - } - - lnet_prune_rc_data(1); /* wait for UNLINK */ - - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - complete(&the_lnet.ln_rc_signal); - /* The unlink event callback will signal final completion */ - return 0; -} - -void -lnet_destroy_rtrbuf(struct lnet_rtrbuf *rb, int npages) -{ - while (--npages >= 0) - __free_page(rb->rb_kiov[npages].bv_page); - - kfree(rb); -} - -static struct lnet_rtrbuf * -lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt) -{ - int npages = rbp->rbp_npages; - int sz = offsetof(struct lnet_rtrbuf, rb_kiov[npages]); - struct page *page; - struct lnet_rtrbuf *rb; - int i; - - rb = kzalloc_cpt(sz, GFP_NOFS, cpt); - if (!rb) - return NULL; - - rb->rb_pool = rbp; - - for (i = 0; i < npages; i++) { - page = alloc_pages_node( - cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - while (--i >= 0) - __free_page(rb->rb_kiov[i].bv_page); - - kfree(rb); - return NULL; - } - - rb->rb_kiov[i].bv_len = PAGE_SIZE; - rb->rb_kiov[i].bv_offset = 0; - rb->rb_kiov[i].bv_page = page; - } - - return rb; -} - -static void -lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt) -{ - int npages = rbp->rbp_npages; - struct list_head tmp; - struct lnet_rtrbuf *rb; - struct lnet_rtrbuf *temp; - - if (!rbp->rbp_nbuffers) /* not initialized or already freed */ - return; - - INIT_LIST_HEAD(&tmp); - - lnet_net_lock(cpt); - lnet_drop_routed_msgs_locked(&rbp->rbp_msgs, cpt); - list_splice_init(&rbp->rbp_bufs, &tmp); - rbp->rbp_req_nbuffers = 0; - rbp->rbp_nbuffers = 0; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; - lnet_net_unlock(cpt); - - /* Free buffers on the free list. */ - list_for_each_entry_safe(rb, temp, &tmp, rb_list) { - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - } -} - -static int -lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt) -{ - struct list_head rb_list; - struct lnet_rtrbuf *rb; - int num_rb; - int num_buffers = 0; - int old_req_nbufs; - int npages = rbp->rbp_npages; - - lnet_net_lock(cpt); - /* - * If we are called for less buffers than already in the pool, we - * just lower the req_nbuffers number and excess buffers will be - * thrown away as they are returned to the free list. Credits - * then get adjusted as well. - * If we already have enough buffers allocated to serve the - * increase requested, then we can treat that the same way as we - * do the decrease. - */ - num_rb = nbufs - rbp->rbp_nbuffers; - if (nbufs <= rbp->rbp_req_nbuffers || num_rb <= 0) { - rbp->rbp_req_nbuffers = nbufs; - lnet_net_unlock(cpt); - return 0; - } - /* - * store the older value of rbp_req_nbuffers and then set it to - * the new request to prevent lnet_return_rx_credits_locked() from - * freeing buffers that we need to keep around - */ - old_req_nbufs = rbp->rbp_req_nbuffers; - rbp->rbp_req_nbuffers = nbufs; - lnet_net_unlock(cpt); - - INIT_LIST_HEAD(&rb_list); - - /* - * allocate the buffers on a local list first. If all buffers are - * allocated successfully then join this list to the rbp buffer - * list. If not then free all allocated buffers. - */ - while (num_rb-- > 0) { - rb = lnet_new_rtrbuf(rbp, cpt); - if (!rb) { - CERROR("Failed to allocate %d route bufs of %d pages\n", - nbufs, npages); - - lnet_net_lock(cpt); - rbp->rbp_req_nbuffers = old_req_nbufs; - lnet_net_unlock(cpt); - - goto failed; - } - - list_add(&rb->rb_list, &rb_list); - num_buffers++; - } - - lnet_net_lock(cpt); - - list_splice_tail(&rb_list, &rbp->rbp_bufs); - rbp->rbp_nbuffers += num_buffers; - rbp->rbp_credits += num_buffers; - rbp->rbp_mincredits = rbp->rbp_credits; - /* - * We need to schedule blocked msg using the newly - * added buffers. - */ - while (!list_empty(&rbp->rbp_bufs) && - !list_empty(&rbp->rbp_msgs)) - lnet_schedule_blocked_locked(rbp); - - lnet_net_unlock(cpt); - - return 0; - -failed: - while (!list_empty(&rb_list)) { - rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list); - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - } - - return -ENOMEM; -} - -static void -lnet_rtrpool_init(struct lnet_rtrbufpool *rbp, int npages) -{ - INIT_LIST_HEAD(&rbp->rbp_msgs); - INIT_LIST_HEAD(&rbp->rbp_bufs); - - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; -} - -void -lnet_rtrpools_free(int keep_pools) -{ - struct lnet_rtrbufpool *rtrp; - int i; - - if (!the_lnet.ln_rtrpools) /* uninitialized or freed */ - return; - - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_free_bufs(&rtrp[LNET_TINY_BUF_IDX], i); - lnet_rtrpool_free_bufs(&rtrp[LNET_SMALL_BUF_IDX], i); - lnet_rtrpool_free_bufs(&rtrp[LNET_LARGE_BUF_IDX], i); - } - - if (!keep_pools) { - cfs_percpt_free(the_lnet.ln_rtrpools); - the_lnet.ln_rtrpools = NULL; - } -} - -static int -lnet_nrb_tiny_calculate(void) -{ - int nrbs = LNET_NRB_TINY; - - if (tiny_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "tiny_router_buffers=%d invalid when routing enabled\n", - tiny_router_buffers); - return -EINVAL; - } - - if (tiny_router_buffers > 0) - nrbs = tiny_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_TINY_MIN); -} - -static int -lnet_nrb_small_calculate(void) -{ - int nrbs = LNET_NRB_SMALL; - - if (small_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "small_router_buffers=%d invalid when routing enabled\n", - small_router_buffers); - return -EINVAL; - } - - if (small_router_buffers > 0) - nrbs = small_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_SMALL_MIN); -} - -static int -lnet_nrb_large_calculate(void) -{ - int nrbs = LNET_NRB_LARGE; - - if (large_router_buffers < 0) { - LCONSOLE_ERROR_MSG(0x10c, - "large_router_buffers=%d invalid when routing enabled\n", - large_router_buffers); - return -EINVAL; - } - - if (large_router_buffers > 0) - nrbs = large_router_buffers; - - nrbs /= LNET_CPT_NUMBER; - return max(nrbs, LNET_NRB_LARGE_MIN); -} - -int -lnet_rtrpools_alloc(int im_a_router) -{ - struct lnet_rtrbufpool *rtrp; - int nrb_tiny; - int nrb_small; - int nrb_large; - int rc; - int i; - - if (!strcmp(forwarding, "")) { - /* not set either way */ - if (!im_a_router) - return 0; - } else if (!strcmp(forwarding, "disabled")) { - /* explicitly disabled */ - return 0; - } else if (!strcmp(forwarding, "enabled")) { - /* explicitly enabled */ - } else { - LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n"); - return -EINVAL; - } - - nrb_tiny = lnet_nrb_tiny_calculate(); - if (nrb_tiny < 0) - return -EINVAL; - - nrb_small = lnet_nrb_small_calculate(); - if (nrb_small < 0) - return -EINVAL; - - nrb_large = lnet_nrb_large_calculate(); - if (nrb_large < 0) - return -EINVAL; - - the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(), - LNET_NRBPOOLS * - sizeof(struct lnet_rtrbufpool)); - if (!the_lnet.ln_rtrpools) { - LCONSOLE_ERROR_MSG(0x10c, - "Failed to initialize router buffe pool\n"); - return -ENOMEM; - } - - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], - nrb_tiny, i); - if (rc) - goto failed; - - lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX], - LNET_NRB_SMALL_PAGES); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], - nrb_small, i); - if (rc) - goto failed; - - lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX], - LNET_NRB_LARGE_PAGES); - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], - nrb_large, i); - if (rc) - goto failed; - } - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 1; - lnet_net_unlock(LNET_LOCK_EX); - - return 0; - - failed: - lnet_rtrpools_free(0); - return rc; -} - -static int -lnet_rtrpools_adjust_helper(int tiny, int small, int large) -{ - int nrb = 0; - int rc = 0; - int i; - struct lnet_rtrbufpool *rtrp; - - /* - * If the provided values for each buffer pool are different than the - * configured values, we need to take action. - */ - if (tiny >= 0) { - tiny_router_buffers = tiny; - nrb = lnet_nrb_tiny_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - if (small >= 0) { - small_router_buffers = small; - nrb = lnet_nrb_small_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - if (large >= 0) { - large_router_buffers = large; - nrb = lnet_nrb_large_calculate(); - cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) { - rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX], - nrb, i); - if (rc) - return rc; - } - } - - return 0; -} - -int -lnet_rtrpools_adjust(int tiny, int small, int large) -{ - /* - * this function doesn't revert the changes if adding new buffers - * failed. It's up to the user space caller to revert the - * changes. - */ - if (!the_lnet.ln_routing) - return 0; - - return lnet_rtrpools_adjust_helper(tiny, small, large); -} - -int -lnet_rtrpools_enable(void) -{ - int rc = 0; - - if (the_lnet.ln_routing) - return 0; - - if (!the_lnet.ln_rtrpools) - /* - * If routing is turned off, and we have never - * initialized the pools before, just call the - * standard buffer pool allocation routine as - * if we are just configuring this for the first - * time. - */ - rc = lnet_rtrpools_alloc(1); - else - rc = lnet_rtrpools_adjust_helper(0, 0, 0); - if (rc) - return rc; - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 1; - - the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED; - lnet_net_unlock(LNET_LOCK_EX); - - return rc; -} - -void -lnet_rtrpools_disable(void) -{ - if (!the_lnet.ln_routing) - return; - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_routing = 0; - the_lnet.ln_ping_info->pi_features |= LNET_PING_FEAT_RTE_DISABLED; - - tiny_router_buffers = 0; - small_router_buffers = 0; - large_router_buffers = 0; - lnet_net_unlock(LNET_LOCK_EX); - lnet_rtrpools_free(1); -} - -int -lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, unsigned long when) -{ - struct lnet_peer *lp = NULL; - unsigned long now = jiffies; - int cpt = lnet_cpt_of_nid(nid); - - LASSERT(!in_interrupt()); - - CDEBUG(D_NET, "%s notifying %s: %s\n", - !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), - alive ? "up" : "down"); - - if (ni && - LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) { - CWARN("Ignoring notification of %s %s by %s (different net)\n", - libcfs_nid2str(nid), alive ? "birth" : "death", - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - } - - /* can't do predictions... */ - if (time_after(when, now)) { - CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n", - !ni ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), alive ? "up" : "down", - (when - now) / HZ); - return -EINVAL; - } - - if (ni && !alive && /* LND telling me she's down */ - !auto_down) { /* auto-down disabled */ - CDEBUG(D_NET, "Auto-down disabled\n"); - return 0; - } - - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - lnet_net_unlock(cpt); - return -ESHUTDOWN; - } - - lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid); - if (!lp) { - /* nid not found */ - lnet_net_unlock(cpt); - CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid)); - return 0; - } - - /* - * We can't fully trust LND on reporting exact peer last_alive - * if he notifies us about dead peer. For example ksocklnd can - * call us with when == _time_when_the_node_was_booted_ if - * no connections were successfully established - */ - if (ni && !alive && when < lp->lp_last_alive) - when = lp->lp_last_alive; - - lnet_notify_locked(lp, !ni, alive, when); - - if (ni) - lnet_ni_notify_locked(ni, lp); - - lnet_peer_decref_locked(lp); - - lnet_net_unlock(cpt); - return 0; -} -EXPORT_SYMBOL(lnet_notify); diff --git a/drivers/staging/lustre/lnet/lnet/router_proc.c b/drivers/staging/lustre/lnet/lnet/router_proc.c deleted file mode 100644 index ae4b7f5953a0..000000000000 --- a/drivers/staging/lustre/lnet/lnet/router_proc.c +++ /dev/null @@ -1,907 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2012, Intel Corporation. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include - -/* - * This is really lnet_proc.c. You might need to update sanity test 215 - * if any file format is changed. - */ - -#define LNET_LOFFT_BITS (sizeof(loff_t) * 8) -/* - * NB: max allowed LNET_CPT_BITS is 8 on 64-bit system and 2 on 32-bit system - */ -#define LNET_PROC_CPT_BITS (LNET_CPT_BITS + 1) -/* change version, 16 bits or 8 bits */ -#define LNET_PROC_VER_BITS max_t(size_t, min_t(size_t, LNET_LOFFT_BITS, 64) / 4, 8) - -#define LNET_PROC_HASH_BITS LNET_PEER_HASH_BITS -/* - * bits for peer hash offset - * NB: we don't use the highest bit of *ppos because it's signed - */ -#define LNET_PROC_HOFF_BITS (LNET_LOFFT_BITS - \ - LNET_PROC_CPT_BITS - \ - LNET_PROC_VER_BITS - \ - LNET_PROC_HASH_BITS - 1) -/* bits for hash index + position */ -#define LNET_PROC_HPOS_BITS (LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS) -/* bits for peer hash table + hash version */ -#define LNET_PROC_VPOS_BITS (LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS) - -#define LNET_PROC_CPT_MASK ((1ULL << LNET_PROC_CPT_BITS) - 1) -#define LNET_PROC_VER_MASK ((1ULL << LNET_PROC_VER_BITS) - 1) -#define LNET_PROC_HASH_MASK ((1ULL << LNET_PROC_HASH_BITS) - 1) -#define LNET_PROC_HOFF_MASK ((1ULL << LNET_PROC_HOFF_BITS) - 1) - -#define LNET_PROC_CPT_GET(pos) \ - (int)(((pos) >> LNET_PROC_VPOS_BITS) & LNET_PROC_CPT_MASK) - -#define LNET_PROC_VER_GET(pos) \ - (int)(((pos) >> LNET_PROC_HPOS_BITS) & LNET_PROC_VER_MASK) - -#define LNET_PROC_HASH_GET(pos) \ - (int)(((pos) >> LNET_PROC_HOFF_BITS) & LNET_PROC_HASH_MASK) - -#define LNET_PROC_HOFF_GET(pos) \ - (int)((pos) & LNET_PROC_HOFF_MASK) - -#define LNET_PROC_POS_MAKE(cpt, ver, hash, off) \ - (((((loff_t)(cpt)) & LNET_PROC_CPT_MASK) << LNET_PROC_VPOS_BITS) | \ - ((((loff_t)(ver)) & LNET_PROC_VER_MASK) << LNET_PROC_HPOS_BITS) | \ - ((((loff_t)(hash)) & LNET_PROC_HASH_MASK) << LNET_PROC_HOFF_BITS) | \ - ((off) & LNET_PROC_HOFF_MASK)) - -#define LNET_PROC_VERSION(v) ((unsigned int)((v) & LNET_PROC_VER_MASK)) - -static int __proc_lnet_stats(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - int rc; - struct lnet_counters *ctrs; - int len; - char *tmpstr; - const int tmpsiz = 256; /* 7 %u and 4 %llu */ - - if (write) { - lnet_counters_reset(); - return 0; - } - - /* read */ - - ctrs = kzalloc(sizeof(*ctrs), GFP_NOFS); - if (!ctrs) - return -ENOMEM; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) { - kfree(ctrs); - return -ENOMEM; - } - - lnet_counters_get(ctrs); - - len = snprintf(tmpstr, tmpsiz, - "%u %u %u %u %u %u %u %llu %llu %llu %llu", - ctrs->msgs_alloc, ctrs->msgs_max, - ctrs->errors, - ctrs->send_count, ctrs->recv_count, - ctrs->route_count, ctrs->drop_count, - ctrs->send_length, ctrs->recv_length, - ctrs->route_length, ctrs->drop_length); - - if (pos >= min_t(int, len, strlen(tmpstr))) - rc = 0; - else - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, "\n"); - - kfree(tmpstr); - kfree(ctrs); - return rc; -} - -static int proc_lnet_stats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_stats); -} - -static int proc_lnet_routes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - const int tmpsiz = 256; - char *tmpstr; - char *s; - int rc = 0; - int len; - int ver; - int off; - - BUILD_BUG_ON(sizeof(loff_t) < 4); - - off = LNET_PROC_HOFF_GET(*ppos); - ver = LNET_PROC_VER_GET(*ppos); - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, "Routing %s\n", - the_lnet.ln_routing ? "enabled" : "disabled"); - LASSERT(tmpstr + tmpsiz - s > 0); - - s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4s %8s %7s %s\n", - "net", "hops", "priority", "state", "router"); - LASSERT(tmpstr + tmpsiz - s > 0); - - lnet_net_lock(0); - ver = (unsigned int)the_lnet.ln_remote_nets_version; - lnet_net_unlock(0); - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } else { - struct list_head *n; - struct list_head *r; - struct lnet_route *route = NULL; - struct lnet_remotenet *rnet = NULL; - int skip = off - 1; - struct list_head *rn_list; - int i; - - lnet_net_lock(0); - - if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) { - lnet_net_unlock(0); - kfree(tmpstr); - return -ESTALE; - } - - for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && !route; i++) { - rn_list = &the_lnet.ln_remote_nets_hash[i]; - - n = rn_list->next; - - while (n != rn_list && !route) { - rnet = list_entry(n, struct lnet_remotenet, - lrn_list); - - r = rnet->lrn_routes.next; - - while (r != &rnet->lrn_routes) { - struct lnet_route *re; - - re = list_entry(r, struct lnet_route, - lr_list); - if (!skip) { - route = re; - break; - } - - skip--; - r = r->next; - } - - n = n->next; - } - } - - if (route) { - __u32 net = rnet->lrn_net; - __u32 hops = route->lr_hops; - unsigned int priority = route->lr_priority; - lnet_nid_t nid = route->lr_gateway->lp_nid; - int alive = lnet_is_route_alive(route); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-8s %4u %8u %7s %s\n", - libcfs_net2str(net), hops, - priority, - alive ? "up" : "down", - libcfs_nid2str(nid)); - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) { - rc = -EFAULT; - } else { - off += 1; - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int proc_lnet_routers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int rc = 0; - char *tmpstr; - char *s; - const int tmpsiz = 256; - int len; - int ver; - int off; - - off = LNET_PROC_HOFF_GET(*ppos); - ver = LNET_PROC_VER_GET(*ppos); - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4s %7s %9s %6s %12s %9s %8s %7s %s\n", - "ref", "rtr_ref", "alive_cnt", "state", - "last_ping", "ping_sent", "deadline", - "down_ni", "router"); - LASSERT(tmpstr + tmpsiz - s > 0); - - lnet_net_lock(0); - ver = (unsigned int)the_lnet.ln_routers_version; - lnet_net_unlock(0); - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } else { - struct list_head *r; - struct lnet_peer *peer = NULL; - int skip = off - 1; - - lnet_net_lock(0); - - if (ver != LNET_PROC_VERSION(the_lnet.ln_routers_version)) { - lnet_net_unlock(0); - - kfree(tmpstr); - return -ESTALE; - } - - r = the_lnet.ln_routers.next; - - while (r != &the_lnet.ln_routers) { - struct lnet_peer *lp; - - lp = list_entry(r, struct lnet_peer, lp_rtr_list); - if (!skip) { - peer = lp; - break; - } - - skip--; - r = r->next; - } - - if (peer) { - lnet_nid_t nid = peer->lp_nid; - unsigned long now = jiffies; - unsigned long deadline = peer->lp_ping_deadline; - int nrefs = peer->lp_refcount; - int nrtrrefs = peer->lp_rtr_refcount; - int alive_cnt = peer->lp_alive_count; - int alive = peer->lp_alive; - int pingsent = !peer->lp_ping_notsent; - int last_ping = (now - peer->lp_ping_timestamp) / HZ; - int down_ni = 0; - struct lnet_route *rtr; - - if ((peer->lp_ping_feats & - LNET_PING_FEAT_NI_STATUS)) { - list_for_each_entry(rtr, &peer->lp_routes, - lr_gwlist) { - /* - * downis on any route should be the - * number of downis on the gateway - */ - if (rtr->lr_downis) { - down_ni = rtr->lr_downis; - break; - } - } - } - - if (!deadline) - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4d %7d %9d %6s %12d %9d %8s %7d %s\n", - nrefs, nrtrrefs, alive_cnt, - alive ? "up" : "down", last_ping, - pingsent, "NA", down_ni, - libcfs_nid2str(nid)); - else - s += snprintf(s, tmpstr + tmpsiz - s, - "%-4d %7d %9d %6s %12d %9d %8lu %7d %s\n", - nrefs, nrtrrefs, alive_cnt, - alive ? "up" : "down", last_ping, - pingsent, - (deadline - now) / HZ, - down_ni, libcfs_nid2str(nid)); - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) { - rc = -EFAULT; - } else { - off += 1; - *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); - } - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int proc_lnet_peers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - const int tmpsiz = 256; - struct lnet_peer_table *ptable; - char *tmpstr; - char *s; - int cpt = LNET_PROC_CPT_GET(*ppos); - int ver = LNET_PROC_VER_GET(*ppos); - int hash = LNET_PROC_HASH_GET(*ppos); - int hoff = LNET_PROC_HOFF_GET(*ppos); - int rc = 0; - int len; - - BUILD_BUG_ON(LNET_PROC_HASH_BITS < LNET_PEER_HASH_BITS); - LASSERT(!write); - - if (!*lenp) - return 0; - - if (cpt >= LNET_CPT_NUMBER) { - *lenp = 0; - return 0; - } - - tmpstr = kmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %4s %5s %5s %5s %5s %5s %5s %5s %s\n", - "nid", "refs", "state", "last", "max", - "rtr", "min", "tx", "min", "queue"); - LASSERT(tmpstr + tmpsiz - s > 0); - - hoff++; - } else { - struct lnet_peer *peer; - struct list_head *p; - int skip; - again: - p = NULL; - peer = NULL; - skip = hoff - 1; - - lnet_net_lock(cpt); - ptable = the_lnet.ln_peer_tables[cpt]; - if (hoff == 1) - ver = LNET_PROC_VERSION(ptable->pt_version); - - if (ver != LNET_PROC_VERSION(ptable->pt_version)) { - lnet_net_unlock(cpt); - kfree(tmpstr); - return -ESTALE; - } - - while (hash < LNET_PEER_HASH_SIZE) { - if (!p) - p = ptable->pt_hash[hash].next; - - while (p != &ptable->pt_hash[hash]) { - struct lnet_peer *lp; - - lp = list_entry(p, struct lnet_peer, - lp_hashlist); - if (!skip) { - peer = lp; - - /* - * minor optimization: start from idx+1 - * on next iteration if we've just - * drained lp_hashlist - */ - if (lp->lp_hashlist.next == - &ptable->pt_hash[hash]) { - hoff = 1; - hash++; - } else { - hoff++; - } - - break; - } - - skip--; - p = lp->lp_hashlist.next; - } - - if (peer) - break; - - p = NULL; - hoff = 1; - hash++; - } - - if (peer) { - lnet_nid_t nid = peer->lp_nid; - int nrefs = peer->lp_refcount; - int lastalive = -1; - char *aliveness = "NA"; - int maxcr = peer->lp_ni->ni_peertxcredits; - int txcr = peer->lp_txcredits; - int mintxcr = peer->lp_mintxcredits; - int rtrcr = peer->lp_rtrcredits; - int minrtrcr = peer->lp_minrtrcredits; - int txqnob = peer->lp_txqnob; - - if (lnet_isrouter(peer) || - lnet_peer_aliveness_enabled(peer)) - aliveness = peer->lp_alive ? "up" : "down"; - - if (lnet_peer_aliveness_enabled(peer)) { - unsigned long now = jiffies; - long delta; - - delta = now - peer->lp_last_alive; - lastalive = (delta) / HZ; - - /* No need to mess up peers contents with - * arbitrarily long integers - it suffices to - * know that lastalive is more than 10000s old - */ - if (lastalive >= 10000) - lastalive = 9999; - } - - lnet_net_unlock(cpt); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %4d %5s %5d %5d %5d %5d %5d %5d %d\n", - libcfs_nid2str(nid), nrefs, aliveness, - lastalive, maxcr, rtrcr, minrtrcr, txcr, - mintxcr, txqnob); - LASSERT(tmpstr + tmpsiz - s > 0); - - } else { /* peer is NULL */ - lnet_net_unlock(cpt); - } - - if (hash == LNET_PEER_HASH_SIZE) { - cpt++; - hash = 0; - hoff = 1; - if (!peer && cpt < LNET_CPT_NUMBER) - goto again; - } - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) - rc = -EFAULT; - else - *ppos = LNET_PROC_POS_MAKE(cpt, ver, hash, hoff); - } - - kfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -static int __proc_lnet_buffers(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - char *s; - char *tmpstr; - int tmpsiz; - int idx; - int len; - int rc; - int i; - - LASSERT(!write); - - /* (4 %d) * 4 * LNET_CPT_NUMBER */ - tmpsiz = 64 * (LNET_NRBPOOLS + 1) * LNET_CPT_NUMBER; - tmpstr = kvmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - s += snprintf(s, tmpstr + tmpsiz - s, - "%5s %5s %7s %7s\n", - "pages", "count", "credits", "min"); - LASSERT(tmpstr + tmpsiz - s > 0); - - if (!the_lnet.ln_rtrpools) - goto out; /* I'm not a router */ - - for (idx = 0; idx < LNET_NRBPOOLS; idx++) { - struct lnet_rtrbufpool *rbp; - - lnet_net_lock(LNET_LOCK_EX); - cfs_percpt_for_each(rbp, i, the_lnet.ln_rtrpools) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%5d %5d %7d %7d\n", - rbp[idx].rbp_npages, - rbp[idx].rbp_nbuffers, - rbp[idx].rbp_credits, - rbp[idx].rbp_mincredits); - LASSERT(tmpstr + tmpsiz - s > 0); - } - lnet_net_unlock(LNET_LOCK_EX); - } - - out: - len = s - tmpstr; - - if (pos >= min_t(int, len, strlen(tmpstr))) - rc = 0; - else - rc = cfs_trace_copyout_string(buffer, nob, - tmpstr + pos, NULL); - - kvfree(tmpstr); - return rc; -} - -static int proc_lnet_buffers(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_buffers); -} - -static int proc_lnet_nis(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int tmpsiz = 128 * LNET_CPT_NUMBER; - int rc = 0; - char *tmpstr; - char *s; - int len; - - LASSERT(!write); - - if (!*lenp) - return 0; - - tmpstr = kvmalloc(tmpsiz, GFP_KERNEL); - if (!tmpstr) - return -ENOMEM; - - s = tmpstr; /* points to current position in tmpstr[] */ - - if (!*ppos) { - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n", - "nid", "status", "alive", "refs", "peer", - "rtr", "max", "tx", "min"); - LASSERT(tmpstr + tmpsiz - s > 0); - } else { - struct list_head *n; - struct lnet_ni *ni = NULL; - int skip = *ppos - 1; - - lnet_net_lock(0); - - n = the_lnet.ln_nis.next; - - while (n != &the_lnet.ln_nis) { - struct lnet_ni *a_ni; - - a_ni = list_entry(n, struct lnet_ni, ni_list); - if (!skip) { - ni = a_ni; - break; - } - - skip--; - n = n->next; - } - - if (ni) { - struct lnet_tx_queue *tq; - char *stat; - time64_t now = ktime_get_real_seconds(); - int last_alive = -1; - int i; - int j; - - if (the_lnet.ln_routing) - last_alive = now - ni->ni_last_alive; - - /* @lo forever alive */ - if (ni->ni_lnd->lnd_type == LOLND) - last_alive = 0; - - lnet_ni_lock(ni); - LASSERT(ni->ni_status); - stat = (ni->ni_status->ns_status == - LNET_NI_STATUS_UP) ? "up" : "down"; - lnet_ni_unlock(ni); - - /* - * we actually output credits information for - * TX queue of each partition - */ - cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { - for (j = 0; ni->ni_cpts && - j < ni->ni_ncpts; j++) { - if (i == ni->ni_cpts[j]) - break; - } - - if (j == ni->ni_ncpts) - continue; - - if (i) - lnet_net_lock(i); - - s += snprintf(s, tmpstr + tmpsiz - s, - "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n", - libcfs_nid2str(ni->ni_nid), stat, - last_alive, *ni->ni_refs[i], - ni->ni_peertxcredits, - ni->ni_peerrtrcredits, - tq->tq_credits_max, - tq->tq_credits, - tq->tq_credits_min); - if (i) - lnet_net_unlock(i); - } - LASSERT(tmpstr + tmpsiz - s > 0); - } - - lnet_net_unlock(0); - } - - len = s - tmpstr; /* how many bytes was written */ - - if (len > *lenp) { /* linux-supplied buffer is too small */ - rc = -EINVAL; - } else if (len > 0) { /* wrote something */ - if (copy_to_user(buffer, tmpstr, len)) - rc = -EFAULT; - else - *ppos += 1; - } - - kvfree(tmpstr); - - if (!rc) - *lenp = len; - - return rc; -} - -struct lnet_portal_rotors { - int pr_value; - const char *pr_name; - const char *pr_desc; -}; - -static struct lnet_portal_rotors portal_rotors[] = { - { - .pr_value = LNET_PTL_ROTOR_OFF, - .pr_name = "OFF", - .pr_desc = "Turn off message rotor for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_ON, - .pr_name = "ON", - .pr_desc = "round-robin dispatch all PUT messages for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_RR_RT, - .pr_name = "RR_RT", - .pr_desc = "round-robin dispatch routed PUT message for wildcard portals" - }, - { - .pr_value = LNET_PTL_ROTOR_HASH_RT, - .pr_name = "HASH_RT", - .pr_desc = "dispatch routed PUT message by hashing source NID for wildcard portals" - }, - { - .pr_value = -1, - .pr_name = NULL, - .pr_desc = NULL - }, -}; - -static int __proc_lnet_portal_rotor(void *data, int write, - loff_t pos, void __user *buffer, int nob) -{ - const int buf_len = 128; - char *buf; - char *tmp; - int rc; - int i; - - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (!write) { - lnet_res_lock(0); - - for (i = 0; portal_rotors[i].pr_value >= 0; i++) { - if (portal_rotors[i].pr_value == portal_rotor) - break; - } - - LASSERT(portal_rotors[i].pr_value == portal_rotor); - lnet_res_unlock(0); - - rc = snprintf(buf, buf_len, - "{\n\tportals: all\n" - "\trotor: %s\n\tdescription: %s\n}", - portal_rotors[i].pr_name, - portal_rotors[i].pr_desc); - - if (pos >= min_t(int, rc, buf_len)) { - rc = 0; - } else { - rc = cfs_trace_copyout_string(buffer, nob, - buf + pos, "\n"); - } - goto out; - } - - rc = cfs_trace_copyin_string(buf, buf_len, buffer, nob); - if (rc < 0) - goto out; - - tmp = strim(buf); - - rc = -EINVAL; - lnet_res_lock(0); - for (i = 0; portal_rotors[i].pr_name; i++) { - if (!strncasecmp(portal_rotors[i].pr_name, tmp, - strlen(portal_rotors[i].pr_name))) { - portal_rotor = portal_rotors[i].pr_value; - rc = 0; - break; - } - } - lnet_res_unlock(0); -out: - kfree(buf); - return rc; -} - -static int proc_lnet_portal_rotor(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - return lprocfs_call_handler(table->data, write, ppos, buffer, lenp, - __proc_lnet_portal_rotor); -} - -static struct ctl_table lnet_table[] = { - /* - * NB No .strategy entries have been provided since sysctl(8) prefers - * to go via /proc for portability. - */ - { - .procname = "stats", - .mode = 0644, - .proc_handler = &proc_lnet_stats, - }, - { - .procname = "routes", - .mode = 0444, - .proc_handler = &proc_lnet_routes, - }, - { - .procname = "routers", - .mode = 0444, - .proc_handler = &proc_lnet_routers, - }, - { - .procname = "peers", - .mode = 0444, - .proc_handler = &proc_lnet_peers, - }, - { - .procname = "buffers", - .mode = 0444, - .proc_handler = &proc_lnet_buffers, - }, - { - .procname = "nis", - .mode = 0444, - .proc_handler = &proc_lnet_nis, - }, - { - .procname = "portal_rotor", - .mode = 0644, - .proc_handler = &proc_lnet_portal_rotor, - }, - { - } -}; - -void lnet_router_debugfs_init(void) -{ - lustre_insert_debugfs(lnet_table); -} - -void lnet_router_debugfs_fini(void) -{ -} diff --git a/drivers/staging/lustre/lnet/selftest/Makefile b/drivers/staging/lustre/lnet/selftest/Makefile deleted file mode 100644 index 3ccc8966b566..000000000000 --- a/drivers/staging/lustre/lnet/selftest/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LNET_SELFTEST) := lnet_selftest.o - -lnet_selftest-y := console.o conrpc.o conctl.o framework.o timer.o rpc.o \ - module.o ping_test.o brw_test.o diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c deleted file mode 100644 index f1ee219bc8f3..000000000000 --- a/drivers/staging/lustre/lnet/selftest/brw_test.c +++ /dev/null @@ -1,526 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/brw_test.c - * - * Author: Isaac Huang - */ - -#include "selftest.h" - -static int brw_srv_workitems = SFW_TEST_WI_MAX; -module_param(brw_srv_workitems, int, 0644); -MODULE_PARM_DESC(brw_srv_workitems, "# BRW server workitems"); - -static int brw_inject_errors; -module_param(brw_inject_errors, int, 0644); -MODULE_PARM_DESC(brw_inject_errors, "# data errors to inject randomly, zero by default"); - -#define BRW_POISON 0xbeefbeefbeefbeefULL -#define BRW_MAGIC 0xeeb0eeb1eeb2eeb3ULL -#define BRW_MSIZE sizeof(u64) - -static void -brw_client_fini(struct sfw_test_instance *tsi) -{ - struct srpc_bulk *bulk; - struct sfw_test_unit *tsu; - - LASSERT(tsi->tsi_is_client); - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - bulk = tsu->tsu_private; - if (!bulk) - continue; - - srpc_free_bulk(bulk); - tsu->tsu_private = NULL; - } -} - -static int -brw_client_init(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - int flags; - int off; - int npg; - int len; - int opc; - struct srpc_bulk *bulk; - struct sfw_test_unit *tsu; - - LASSERT(sn); - LASSERT(tsi->tsi_is_client); - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; - - opc = breq->blk_opc; - flags = breq->blk_flags; - npg = breq->blk_npg; - /* - * NB: this is not going to work for variable page size, - * but we have to keep it for compatibility - */ - len = npg * PAGE_SIZE; - off = 0; - } else { - struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; - - /* - * I should never get this step if it's unknown feature - * because make_session will reject unknown feature - */ - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - opc = breq->blk_opc; - flags = breq->blk_flags; - len = breq->blk_len; - off = breq->blk_offset & ~PAGE_MASK; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - if (off % BRW_MSIZE) - return -EINVAL; - - if (npg > LNET_MAX_IOV || npg <= 0) - return -EINVAL; - - if (opc != LST_BRW_READ && opc != LST_BRW_WRITE) - return -EINVAL; - - if (flags != LST_BRW_CHECK_NONE && - flags != LST_BRW_CHECK_FULL && flags != LST_BRW_CHECK_SIMPLE) - return -EINVAL; - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid), - off, npg, len, opc == LST_BRW_READ); - if (!bulk) { - brw_client_fini(tsi); - return -ENOMEM; - } - - tsu->tsu_private = bulk; - } - - return 0; -} - -static int brw_inject_one_error(void) -{ - struct timespec64 ts; - - if (brw_inject_errors <= 0) - return 0; - - ktime_get_ts64(&ts); - - if (!((ts.tv_nsec / NSEC_PER_USEC) & 1)) - return 0; - - return brw_inject_errors--; -} - -static void -brw_fill_page(struct page *pg, int off, int len, int pattern, __u64 magic) -{ - char *addr = page_address(pg) + off; - int i; - - LASSERT(addr); - LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); - - if (pattern == LST_BRW_CHECK_NONE) - return; - - if (magic == BRW_MAGIC) - magic += brw_inject_one_error(); - - if (pattern == LST_BRW_CHECK_SIMPLE) { - memcpy(addr, &magic, BRW_MSIZE); - if (len > BRW_MSIZE) { - addr += PAGE_SIZE - BRW_MSIZE; - memcpy(addr, &magic, BRW_MSIZE); - } - return; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < len; i += BRW_MSIZE) - memcpy(addr + i, &magic, BRW_MSIZE); - return; - } - - LBUG(); -} - -static int -brw_check_page(struct page *pg, int off, int len, int pattern, __u64 magic) -{ - char *addr = page_address(pg) + off; - __u64 data = 0; /* make compiler happy */ - int i; - - LASSERT(addr); - LASSERT(!(off % BRW_MSIZE) && !(len % BRW_MSIZE)); - - if (pattern == LST_BRW_CHECK_NONE) - return 0; - - if (pattern == LST_BRW_CHECK_SIMPLE) { - data = *((__u64 *)addr); - if (data != magic) - goto bad_data; - - if (len > BRW_MSIZE) { - addr += PAGE_SIZE - BRW_MSIZE; - data = *((__u64 *)addr); - if (data != magic) - goto bad_data; - } - return 0; - } - - if (pattern == LST_BRW_CHECK_FULL) { - for (i = 0; i < len; i += BRW_MSIZE) { - data = *(u64 *)(addr + i); - if (data != magic) - goto bad_data; - } - return 0; - } - - LBUG(); - -bad_data: - CERROR("Bad data in page %p: %#llx, %#llx expected\n", - pg, data, magic); - return 1; -} - -static void -brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) -{ - int i; - struct page *pg; - - for (i = 0; i < bk->bk_niov; i++) { - int off, len; - - pg = bk->bk_iovs[i].bv_page; - off = bk->bk_iovs[i].bv_offset; - len = bk->bk_iovs[i].bv_len; - brw_fill_page(pg, off, len, pattern, magic); - } -} - -static int -brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic) -{ - int i; - struct page *pg; - - for (i = 0; i < bk->bk_niov; i++) { - int off, len; - - pg = bk->bk_iovs[i].bv_page; - off = bk->bk_iovs[i].bv_offset; - len = bk->bk_iovs[i].bv_len; - if (brw_check_page(pg, off, len, pattern, magic)) { - CERROR("Bulk page %p (%d/%d) is corrupted!\n", - pg, i, bk->bk_niov); - return 1; - } - } - - return 0; -} - -static int -brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, - struct srpc_client_rpc **rpcpp) -{ - struct srpc_bulk *bulk = tsu->tsu_private; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_client_rpc *rpc; - struct srpc_brw_reqst *req; - int flags; - int npg; - int len; - int opc; - int rc; - - LASSERT(sn); - LASSERT(bulk); - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *breq = &tsi->tsi_u.bulk_v0; - - opc = breq->blk_opc; - flags = breq->blk_flags; - npg = breq->blk_npg; - len = npg * PAGE_SIZE; - } else { - struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; - int off; - - /* - * I should never get this step if it's unknown feature - * because make_session will reject unknown feature - */ - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - opc = breq->blk_opc; - flags = breq->blk_flags; - len = breq->blk_len; - off = breq->blk_offset; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc); - if (rc) - return rc; - - memcpy(&rpc->crpc_bulk, bulk, offsetof(struct srpc_bulk, bk_iovs[npg])); - if (opc == LST_BRW_WRITE) - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_MAGIC); - else - brw_fill_bulk(&rpc->crpc_bulk, flags, BRW_POISON); - - req = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - req->brw_flags = flags; - req->brw_rw = opc; - req->brw_len = len; - - *rpcpp = rpc; - return 0; -} - -static void -brw_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) -{ - __u64 magic = BRW_MAGIC; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_msg *msg = &rpc->crpc_replymsg; - struct srpc_brw_reply *reply = &msg->msg_body.brw_reply; - struct srpc_brw_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.brw_reqst; - - LASSERT(sn); - - if (rpc->crpc_status) { - CERROR("BRW RPC to %s failed with %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_status); - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_brw_errors); - return; - } - - if (msg->msg_magic != SRPC_MSG_MAGIC) { - __swab64s(&magic); - __swab32s(&reply->brw_status); - } - - CDEBUG(reply->brw_status ? D_WARNING : D_NET, - "BRW RPC to %s finished with brw_status: %d\n", - libcfs_id2str(rpc->crpc_dest), reply->brw_status); - - if (reply->brw_status) { - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -(int)reply->brw_status; - return; - } - - if (reqst->brw_rw == LST_BRW_WRITE) - return; - - if (brw_check_bulk(&rpc->crpc_bulk, reqst->brw_flags, magic)) { - CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->crpc_dest)); - atomic_inc(&sn->sn_brw_errors); - rpc->crpc_status = -EBADMSG; - } -} - -static void -brw_server_rpc_done(struct srpc_server_rpc *rpc) -{ - struct srpc_bulk *blk = rpc->srpc_bulk; - - if (!blk) - return; - - if (rpc->srpc_status) - CERROR("Bulk transfer %s %s has failed: %d\n", - blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer), rpc->srpc_status); - else - CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n", - blk->bk_niov, blk->bk_sink ? "from" : "to", - libcfs_id2str(rpc->srpc_peer)); - - sfw_free_pages(rpc); -} - -static int -brw_bulk_ready(struct srpc_server_rpc *rpc, int status) -{ - __u64 magic = BRW_MAGIC; - struct srpc_brw_reply *reply = &rpc->srpc_replymsg.msg_body.brw_reply; - struct srpc_brw_reqst *reqst; - struct srpc_msg *reqstmsg; - - LASSERT(rpc->srpc_bulk); - LASSERT(rpc->srpc_reqstbuf); - - reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - reqst = &reqstmsg->msg_body.brw_reqst; - - if (status) { - CERROR("BRW bulk %s failed for RPC from %s: %d\n", - reqst->brw_rw == LST_BRW_READ ? "READ" : "WRITE", - libcfs_id2str(rpc->srpc_peer), status); - return -EIO; - } - - if (reqst->brw_rw == LST_BRW_READ) - return 0; - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) - __swab64s(&magic); - - if (brw_check_bulk(rpc->srpc_bulk, reqst->brw_flags, magic)) { - CERROR("Bulk data from %s is corrupted!\n", - libcfs_id2str(rpc->srpc_peer)); - reply->brw_status = EBADMSG; - } - - return 0; -} - -static int -brw_server_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *replymsg = &rpc->srpc_replymsg; - struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply; - struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst; - int npg; - int rc; - - LASSERT(sv->sv_id == SRPC_SERVICE_BRW); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&reqst->brw_rw); - __swab32s(&reqst->brw_len); - __swab32s(&reqst->brw_flags); - __swab64s(&reqst->brw_rpyid); - __swab64s(&reqst->brw_bulkid); - } - LASSERT(reqstmsg->msg_type == (__u32)srpc_service2request(sv->sv_id)); - - reply->brw_status = 0; - rpc->srpc_done = brw_server_rpc_done; - - if ((reqst->brw_rw != LST_BRW_READ && reqst->brw_rw != LST_BRW_WRITE) || - (reqst->brw_flags != LST_BRW_CHECK_NONE && - reqst->brw_flags != LST_BRW_CHECK_FULL && - reqst->brw_flags != LST_BRW_CHECK_SIMPLE)) { - reply->brw_status = EINVAL; - return 0; - } - - if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { - replymsg->msg_ses_feats = LST_FEATS_MASK; - reply->brw_status = EPROTO; - return 0; - } - - if (!(reqstmsg->msg_ses_feats & LST_FEAT_BULK_LEN)) { - /* compat with old version */ - if (reqst->brw_len & ~PAGE_MASK) { - reply->brw_status = EINVAL; - return 0; - } - npg = reqst->brw_len >> PAGE_SHIFT; - - } else { - npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - - replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - - if (!reqst->brw_len || npg > LNET_MAX_IOV) { - reply->brw_status = EINVAL; - return 0; - } - - rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg, - reqst->brw_len, - reqst->brw_rw == LST_BRW_WRITE); - if (rc) - return rc; - - if (reqst->brw_rw == LST_BRW_READ) - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); - else - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); - - return 0; -} - -struct sfw_test_client_ops brw_test_client; - -void brw_init_test_client(void) -{ - brw_test_client.tso_init = brw_client_init; - brw_test_client.tso_fini = brw_client_fini; - brw_test_client.tso_prep_rpc = brw_client_prep_rpc; - brw_test_client.tso_done_rpc = brw_client_done_rpc; -}; - -struct srpc_service brw_test_service; - -void brw_init_test_service(void) -{ - brw_test_service.sv_id = SRPC_SERVICE_BRW; - brw_test_service.sv_name = "brw_test"; - brw_test_service.sv_handler = brw_server_handle; - brw_test_service.sv_bulk_ready = brw_bulk_ready; - brw_test_service.sv_wi_total = brw_srv_workitems; -} diff --git a/drivers/staging/lustre/lnet/selftest/conctl.c b/drivers/staging/lustre/lnet/selftest/conctl.c deleted file mode 100644 index 906d82d90c0c..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conctl.c +++ /dev/null @@ -1,801 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * IOC handle in kernel - * - * Author: Liang Zhen - */ - -#include -#include -#include "console.h" - -static int -lst_session_new_ioctl(struct lstio_session_new_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_key || /* no key is specified */ - !args->lstio_ses_namep || /* session name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_ses_namep, - args->lstio_ses_nmlen)) { - return -EFAULT; - } - - name[args->lstio_ses_nmlen] = 0; - - rc = lstcon_session_new(name, - args->lstio_ses_key, - args->lstio_ses_feats, - args->lstio_ses_timeout, - args->lstio_ses_force, - args->lstio_ses_idp); - - return rc; -} - -static int -lst_session_end_ioctl(struct lstio_session_end_args *args) -{ - if (args->lstio_ses_key != console_session.ses_key) - return -EACCES; - - return lstcon_session_end(); -} - -static int -lst_session_info_ioctl(struct lstio_session_info_args *args) -{ - /* no checking of key */ - - if (!args->lstio_ses_idp || /* address for output sid */ - !args->lstio_ses_keyp || /* address for output key */ - !args->lstio_ses_featp || /* address for output features */ - !args->lstio_ses_ndinfo || /* address for output ndinfo */ - !args->lstio_ses_namep || /* address for output name */ - args->lstio_ses_nmlen <= 0 || - args->lstio_ses_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_session_info(args->lstio_ses_idp, - args->lstio_ses_keyp, - args->lstio_ses_featp, - args->lstio_ses_ndinfo, - args->lstio_ses_namep, - args->lstio_ses_nmlen); -} - -static int -lst_debug_ioctl(struct lstio_debug_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int client = 1; - int rc; - - if (args->lstio_dbg_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_dbg_resultp) - return -EINVAL; - - if (args->lstio_dbg_namep && /* name of batch/group */ - (args->lstio_dbg_nmlen <= 0 || - args->lstio_dbg_nmlen > LST_NAME_SIZE)) - return -EINVAL; - - if (args->lstio_dbg_namep) { - - if (copy_from_user(name, args->lstio_dbg_namep, - args->lstio_dbg_nmlen)) - return -EFAULT; - - name[args->lstio_dbg_nmlen] = 0; - } - - rc = -EINVAL; - - switch (args->lstio_dbg_type) { - case LST_OPC_SESSION: - rc = lstcon_session_debug(args->lstio_dbg_timeout, - args->lstio_dbg_resultp); - break; - - case LST_OPC_BATCHSRV: - client = 0; - /* fall through */ - case LST_OPC_BATCHCLI: - if (!args->lstio_dbg_namep) - goto out; - - rc = lstcon_batch_debug(args->lstio_dbg_timeout, - name, client, args->lstio_dbg_resultp); - break; - - case LST_OPC_GROUP: - if (!args->lstio_dbg_namep) - goto out; - - rc = lstcon_group_debug(args->lstio_dbg_timeout, - name, args->lstio_dbg_resultp); - break; - - case LST_OPC_NODES: - if (args->lstio_dbg_count <= 0 || - !args->lstio_dbg_idsp) - goto out; - - rc = lstcon_nodes_debug(args->lstio_dbg_timeout, - args->lstio_dbg_count, - args->lstio_dbg_idsp, - args->lstio_dbg_resultp); - break; - - default: - break; - } - -out: - return rc; -} - -static int -lst_group_add_ioctl(struct lstio_group_add_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_add(name); - - return rc; -} - -static int -lst_group_del_ioctl(struct lstio_group_del_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_del(name); - - return rc; -} - -static int -lst_group_update_ioctl(struct lstio_group_update_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_resultp || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - switch (args->lstio_grp_opc) { - case LST_GROUP_CLEAN: - rc = lstcon_group_clean(name, args->lstio_grp_args); - break; - - case LST_GROUP_REFRESH: - rc = lstcon_group_refresh(name, args->lstio_grp_resultp); - break; - - case LST_GROUP_RMND: - if (args->lstio_grp_count <= 0 || - !args->lstio_grp_idsp) { - rc = -EINVAL; - break; - } - rc = lstcon_nodes_remove(name, args->lstio_grp_count, - args->lstio_grp_idsp, - args->lstio_grp_resultp); - break; - - default: - rc = -EINVAL; - break; - } - - return rc; -} - -static int -lst_nodes_add_ioctl(struct lstio_group_nodes_args *args) -{ - unsigned int feats; - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_idsp || /* array of ids */ - args->lstio_grp_count <= 0 || - !args->lstio_grp_resultp || - !args->lstio_grp_featp || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_nodes_add(name, args->lstio_grp_count, - args->lstio_grp_idsp, &feats, - args->lstio_grp_resultp); - - if (!rc && - copy_to_user(args->lstio_grp_featp, &feats, sizeof(feats))) { - return -EINVAL; - } - - return rc; -} - -static int -lst_group_list_ioctl(struct lstio_group_list_args *args) -{ - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_grp_idx < 0 || - !args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_group_list(args->lstio_grp_idx, - args->lstio_grp_nmlen, - args->lstio_grp_namep); -} - -static int -lst_group_info_ioctl(struct lstio_group_info_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int ndent; - int index; - int rc; - - if (args->lstio_grp_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_grp_namep || - args->lstio_grp_nmlen <= 0 || - args->lstio_grp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_grp_entp && /* output: group entry */ - !args->lstio_grp_dentsp) /* output: node entry */ - return -EINVAL; - - if (args->lstio_grp_dentsp) { /* have node entry */ - if (!args->lstio_grp_idxp || /* node index */ - !args->lstio_grp_ndentp) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&ndent, args->lstio_grp_ndentp, - sizeof(ndent)) || - copy_from_user(&index, args->lstio_grp_idxp, - sizeof(index))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - if (copy_from_user(name, args->lstio_grp_namep, - args->lstio_grp_nmlen)) - return -EFAULT; - - name[args->lstio_grp_nmlen] = 0; - - rc = lstcon_group_info(name, args->lstio_grp_entp, - &index, &ndent, args->lstio_grp_dentsp); - - if (rc) - return rc; - - if (args->lstio_grp_dentsp && - (copy_to_user(args->lstio_grp_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_grp_ndentp, &ndent, sizeof(ndent)))) - return -EFAULT; - - return 0; -} - -static int -lst_batch_add_ioctl(struct lstio_batch_add_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_add(name); - - return rc; -} - -static int -lst_batch_run_ioctl(struct lstio_batch_run_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_run(name, args->lstio_bat_timeout, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_stop_ioctl(struct lstio_batch_stop_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_resultp || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_stop(name, args->lstio_bat_force, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_query_ioctl(struct lstio_batch_query_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_resultp || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (args->lstio_bat_testidx < 0) - return -EINVAL; - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_test_batch_query(name, - args->lstio_bat_testidx, - args->lstio_bat_client, - args->lstio_bat_timeout, - args->lstio_bat_resultp); - - return rc; -} - -static int -lst_batch_list_ioctl(struct lstio_batch_list_args *args) -{ - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (args->lstio_bat_idx < 0 || - !args->lstio_bat_namep || - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - return lstcon_batch_list(args->lstio_bat_idx, - args->lstio_bat_nmlen, - args->lstio_bat_namep); -} - -static int -lst_batch_info_ioctl(struct lstio_batch_info_args *args) -{ - char name[LST_NAME_SIZE + 1]; - int rc; - int index; - int ndent; - - if (args->lstio_bat_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_bat_namep || /* batch name */ - args->lstio_bat_nmlen <= 0 || - args->lstio_bat_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_bat_entp && /* output: batch entry */ - !args->lstio_bat_dentsp) /* output: node entry */ - return -EINVAL; - - if (args->lstio_bat_dentsp) { /* have node entry */ - if (!args->lstio_bat_idxp || /* node index */ - !args->lstio_bat_ndentp) /* # of node entry */ - return -EINVAL; - - if (copy_from_user(&index, args->lstio_bat_idxp, - sizeof(index)) || - copy_from_user(&ndent, args->lstio_bat_ndentp, - sizeof(ndent))) - return -EFAULT; - - if (ndent <= 0 || index < 0) - return -EINVAL; - } - - if (copy_from_user(name, args->lstio_bat_namep, - args->lstio_bat_nmlen)) - return -EFAULT; - - name[args->lstio_bat_nmlen] = 0; - - rc = lstcon_batch_info(name, args->lstio_bat_entp, - args->lstio_bat_server, args->lstio_bat_testidx, - &index, &ndent, args->lstio_bat_dentsp); - - if (rc) - return rc; - - if (args->lstio_bat_dentsp && - (copy_to_user(args->lstio_bat_idxp, &index, sizeof(index)) || - copy_to_user(args->lstio_bat_ndentp, &ndent, sizeof(ndent)))) - rc = -EFAULT; - - return rc; -} - -static int -lst_stat_query_ioctl(struct lstio_stat_args *args) -{ - int rc; - char name[LST_NAME_SIZE + 1]; - - /* TODO: not finished */ - if (args->lstio_sta_key != console_session.ses_key) - return -EACCES; - - if (!args->lstio_sta_resultp) - return -EINVAL; - - if (args->lstio_sta_idsp) { - if (args->lstio_sta_count <= 0) - return -EINVAL; - - rc = lstcon_nodes_stat(args->lstio_sta_count, - args->lstio_sta_idsp, - args->lstio_sta_timeout, - args->lstio_sta_resultp); - } else if (args->lstio_sta_namep) { - if (args->lstio_sta_nmlen <= 0 || - args->lstio_sta_nmlen > LST_NAME_SIZE) - return -EINVAL; - - rc = copy_from_user(name, args->lstio_sta_namep, - args->lstio_sta_nmlen); - if (!rc) - rc = lstcon_group_stat(name, args->lstio_sta_timeout, - args->lstio_sta_resultp); - else - rc = -EFAULT; - } else { - rc = -EINVAL; - } - - return rc; -} - -static int lst_test_add_ioctl(struct lstio_test_args *args) -{ - char batch_name[LST_NAME_SIZE + 1]; - char src_name[LST_NAME_SIZE + 1]; - char dst_name[LST_NAME_SIZE + 1]; - void *param = NULL; - int ret = 0; - int rc = -ENOMEM; - - if (!args->lstio_tes_resultp || - !args->lstio_tes_retp || - !args->lstio_tes_bat_name || /* no specified batch */ - args->lstio_tes_bat_nmlen <= 0 || - args->lstio_tes_bat_nmlen > LST_NAME_SIZE || - !args->lstio_tes_sgrp_name || /* no source group */ - args->lstio_tes_sgrp_nmlen <= 0 || - args->lstio_tes_sgrp_nmlen > LST_NAME_SIZE || - !args->lstio_tes_dgrp_name || /* no target group */ - args->lstio_tes_dgrp_nmlen <= 0 || - args->lstio_tes_dgrp_nmlen > LST_NAME_SIZE) - return -EINVAL; - - if (!args->lstio_tes_loop || /* negative is infinite */ - args->lstio_tes_concur <= 0 || - args->lstio_tes_dist <= 0 || - args->lstio_tes_span <= 0) - return -EINVAL; - - /* have parameter, check if parameter length is valid */ - if (args->lstio_tes_param && - (args->lstio_tes_param_len <= 0 || - args->lstio_tes_param_len > - PAGE_SIZE - sizeof(struct lstcon_test))) - return -EINVAL; - - /* Enforce zero parameter length if there's no parameter */ - if (!args->lstio_tes_param && args->lstio_tes_param_len) - return -EINVAL; - - if (args->lstio_tes_param) { - param = memdup_user(args->lstio_tes_param, - args->lstio_tes_param_len); - if (IS_ERR(param)) - return PTR_ERR(param); - } - - rc = -EFAULT; - if (copy_from_user(batch_name, args->lstio_tes_bat_name, - args->lstio_tes_bat_nmlen) || - copy_from_user(src_name, args->lstio_tes_sgrp_name, - args->lstio_tes_sgrp_nmlen) || - copy_from_user(dst_name, args->lstio_tes_dgrp_name, - args->lstio_tes_dgrp_nmlen)) - goto out; - - rc = lstcon_test_add(batch_name, args->lstio_tes_type, - args->lstio_tes_loop, args->lstio_tes_concur, - args->lstio_tes_dist, args->lstio_tes_span, - src_name, dst_name, param, - args->lstio_tes_param_len, - &ret, args->lstio_tes_resultp); - - if (!rc && ret) - rc = (copy_to_user(args->lstio_tes_retp, &ret, - sizeof(ret))) ? -EFAULT : 0; -out: - kfree(param); - - return rc; -} - -int -lstcon_ioctl_entry(struct notifier_block *nb, - unsigned long cmd, void *vdata) -{ - struct libcfs_ioctl_hdr *hdr = vdata; - char *buf = NULL; - struct libcfs_ioctl_data *data; - int opc; - int rc = -EINVAL; - - if (cmd != IOC_LIBCFS_LNETST) - goto err; - - data = container_of(hdr, struct libcfs_ioctl_data, ioc_hdr); - - opc = data->ioc_u32[0]; - - if (data->ioc_plen1 > PAGE_SIZE) - goto err; - - buf = kmalloc(data->ioc_plen1, GFP_KERNEL); - rc = -ENOMEM; - if (!buf) - goto err; - - /* copy in parameter */ - rc = -EFAULT; - if (copy_from_user(buf, data->ioc_pbuf1, data->ioc_plen1)) - goto err; - - mutex_lock(&console_session.ses_mutex); - - console_session.ses_laststamp = ktime_get_real_seconds(); - - if (console_session.ses_shutdown) { - rc = -ESHUTDOWN; - goto out; - } - - if (console_session.ses_expired) - lstcon_session_end(); - - if (opc != LSTIO_SESSION_NEW && - console_session.ses_state == LST_SESSION_NONE) { - CDEBUG(D_NET, "LST no active session\n"); - rc = -ESRCH; - goto out; - } - - memset(&console_session.ses_trans_stat, 0, sizeof(struct lstcon_trans_stat)); - - switch (opc) { - case LSTIO_SESSION_NEW: - rc = lst_session_new_ioctl((struct lstio_session_new_args *)buf); - break; - case LSTIO_SESSION_END: - rc = lst_session_end_ioctl((struct lstio_session_end_args *)buf); - break; - case LSTIO_SESSION_INFO: - rc = lst_session_info_ioctl((struct lstio_session_info_args *)buf); - break; - case LSTIO_DEBUG: - rc = lst_debug_ioctl((struct lstio_debug_args *)buf); - break; - case LSTIO_GROUP_ADD: - rc = lst_group_add_ioctl((struct lstio_group_add_args *)buf); - break; - case LSTIO_GROUP_DEL: - rc = lst_group_del_ioctl((struct lstio_group_del_args *)buf); - break; - case LSTIO_GROUP_UPDATE: - rc = lst_group_update_ioctl((struct lstio_group_update_args *)buf); - break; - case LSTIO_NODES_ADD: - rc = lst_nodes_add_ioctl((struct lstio_group_nodes_args *)buf); - break; - case LSTIO_GROUP_LIST: - rc = lst_group_list_ioctl((struct lstio_group_list_args *)buf); - break; - case LSTIO_GROUP_INFO: - rc = lst_group_info_ioctl((struct lstio_group_info_args *)buf); - break; - case LSTIO_BATCH_ADD: - rc = lst_batch_add_ioctl((struct lstio_batch_add_args *)buf); - break; - case LSTIO_BATCH_START: - rc = lst_batch_run_ioctl((struct lstio_batch_run_args *)buf); - break; - case LSTIO_BATCH_STOP: - rc = lst_batch_stop_ioctl((struct lstio_batch_stop_args *)buf); - break; - case LSTIO_BATCH_QUERY: - rc = lst_batch_query_ioctl((struct lstio_batch_query_args *)buf); - break; - case LSTIO_BATCH_LIST: - rc = lst_batch_list_ioctl((struct lstio_batch_list_args *)buf); - break; - case LSTIO_BATCH_INFO: - rc = lst_batch_info_ioctl((struct lstio_batch_info_args *)buf); - break; - case LSTIO_TEST_ADD: - rc = lst_test_add_ioctl((struct lstio_test_args *)buf); - break; - case LSTIO_STAT_QUERY: - rc = lst_stat_query_ioctl((struct lstio_stat_args *)buf); - break; - default: - rc = -EINVAL; - goto out; - } - - if (copy_to_user(data->ioc_pbuf2, &console_session.ses_trans_stat, - sizeof(struct lstcon_trans_stat))) - rc = -EFAULT; -out: - mutex_unlock(&console_session.ses_mutex); -err: - kfree(buf); - - return notifier_from_ioctl_errno(rc); -} diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c deleted file mode 100644 index 0dabade3d091..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conrpc.c +++ /dev/null @@ -1,1396 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Console framework rpcs - * - * Author: Liang Zhen - */ - -#include -#include "timer.h" -#include "conrpc.h" -#include "console.h" - -void lstcon_rpc_stat_reply(struct lstcon_rpc_trans *, struct srpc_msg *, - struct lstcon_node *, struct lstcon_trans_stat *); - -static void -lstcon_rpc_done(struct srpc_client_rpc *rpc) -{ - struct lstcon_rpc *crpc = (struct lstcon_rpc *)rpc->crpc_priv; - - LASSERT(crpc && rpc == crpc->crp_rpc); - LASSERT(crpc->crp_posted && !crpc->crp_finished); - - spin_lock(&rpc->crpc_lock); - - if (!crpc->crp_trans) { - /* - * Orphan RPC is not in any transaction, - * I'm just a poor body and nobody loves me - */ - spin_unlock(&rpc->crpc_lock); - - /* release it */ - lstcon_rpc_put(crpc); - return; - } - - /* not an orphan RPC */ - crpc->crp_finished = 1; - - if (!crpc->crp_stamp) { - /* not aborted */ - LASSERT(!crpc->crp_status); - - crpc->crp_stamp = jiffies; - crpc->crp_status = rpc->crpc_status; - } - - /* wakeup (transaction)thread if I'm the last RPC in the transaction */ - if (atomic_dec_and_test(&crpc->crp_trans->tas_remaining)) - wake_up(&crpc->crp_trans->tas_waitq); - - spin_unlock(&rpc->crpc_lock); -} - -static int -lstcon_rpc_init(struct lstcon_node *nd, int service, unsigned int feats, - int bulk_npg, int bulk_len, int embedded, - struct lstcon_rpc *crpc) -{ - crpc->crp_rpc = sfw_create_rpc(nd->nd_id, service, - feats, bulk_npg, bulk_len, - lstcon_rpc_done, (void *)crpc); - if (!crpc->crp_rpc) - return -ENOMEM; - - crpc->crp_trans = NULL; - crpc->crp_node = nd; - crpc->crp_posted = 0; - crpc->crp_finished = 0; - crpc->crp_unpacked = 0; - crpc->crp_status = 0; - crpc->crp_stamp = 0; - crpc->crp_embedded = embedded; - INIT_LIST_HEAD(&crpc->crp_link); - - atomic_inc(&console_session.ses_rpc_counter); - - return 0; -} - -static int -lstcon_rpc_prep(struct lstcon_node *nd, int service, unsigned int feats, - int bulk_npg, int bulk_len, struct lstcon_rpc **crpcpp) -{ - struct lstcon_rpc *crpc = NULL; - int rc; - - spin_lock(&console_session.ses_rpc_lock); - - crpc = list_first_entry_or_null(&console_session.ses_rpc_freelist, - struct lstcon_rpc, crp_link); - if (crpc) - list_del_init(&crpc->crp_link); - - spin_unlock(&console_session.ses_rpc_lock); - - if (!crpc) { - crpc = kzalloc(sizeof(*crpc), GFP_NOFS); - if (!crpc) - return -ENOMEM; - } - - rc = lstcon_rpc_init(nd, service, feats, bulk_npg, bulk_len, 0, crpc); - if (!rc) { - *crpcpp = crpc; - return 0; - } - - kfree(crpc); - - return rc; -} - -void -lstcon_rpc_put(struct lstcon_rpc *crpc) -{ - struct srpc_bulk *bulk = &crpc->crp_rpc->crpc_bulk; - int i; - - LASSERT(list_empty(&crpc->crp_link)); - - for (i = 0; i < bulk->bk_niov; i++) { - if (!bulk->bk_iovs[i].bv_page) - continue; - - __free_page(bulk->bk_iovs[i].bv_page); - } - - srpc_client_rpc_decref(crpc->crp_rpc); - - if (crpc->crp_embedded) { - /* embedded RPC, don't recycle it */ - memset(crpc, 0, sizeof(*crpc)); - crpc->crp_embedded = 1; - - } else { - spin_lock(&console_session.ses_rpc_lock); - - list_add(&crpc->crp_link, - &console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - } - - /* RPC is not alive now */ - atomic_dec(&console_session.ses_rpc_counter); -} - -static void -lstcon_rpc_post(struct lstcon_rpc *crpc) -{ - struct lstcon_rpc_trans *trans = crpc->crp_trans; - - LASSERT(trans); - - atomic_inc(&trans->tas_remaining); - crpc->crp_posted = 1; - - sfw_post_rpc(crpc->crp_rpc); -} - -static char * -lstcon_rpc_trans_name(int transop) -{ - if (transop == LST_TRANS_SESNEW) - return "SESNEW"; - - if (transop == LST_TRANS_SESEND) - return "SESEND"; - - if (transop == LST_TRANS_SESQRY) - return "SESQRY"; - - if (transop == LST_TRANS_SESPING) - return "SESPING"; - - if (transop == LST_TRANS_TSBCLIADD) - return "TSBCLIADD"; - - if (transop == LST_TRANS_TSBSRVADD) - return "TSBSRVADD"; - - if (transop == LST_TRANS_TSBRUN) - return "TSBRUN"; - - if (transop == LST_TRANS_TSBSTOP) - return "TSBSTOP"; - - if (transop == LST_TRANS_TSBCLIQRY) - return "TSBCLIQRY"; - - if (transop == LST_TRANS_TSBSRVQRY) - return "TSBSRVQRY"; - - if (transop == LST_TRANS_STATQRY) - return "STATQRY"; - - return "Unknown"; -} - -int -lstcon_rpc_trans_prep(struct list_head *translist, int transop, - struct lstcon_rpc_trans **transpp) -{ - struct lstcon_rpc_trans *trans; - - if (translist) { - list_for_each_entry(trans, translist, tas_link) { - /* - * Can't enqueue two private transaction on - * the same object - */ - if ((trans->tas_opc & transop) == LST_TRANS_PRIVATE) - return -EPERM; - } - } - - /* create a trans group */ - trans = kzalloc(sizeof(*trans), GFP_NOFS); - if (!trans) - return -ENOMEM; - - trans->tas_opc = transop; - - if (!translist) - INIT_LIST_HEAD(&trans->tas_olink); - else - list_add_tail(&trans->tas_olink, translist); - - list_add_tail(&trans->tas_link, &console_session.ses_trans_list); - - INIT_LIST_HEAD(&trans->tas_rpcs_list); - atomic_set(&trans->tas_remaining, 0); - init_waitqueue_head(&trans->tas_waitq); - - spin_lock(&console_session.ses_rpc_lock); - trans->tas_features = console_session.ses_features; - spin_unlock(&console_session.ses_rpc_lock); - - *transpp = trans; - return 0; -} - -void -lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, struct lstcon_rpc *crpc) -{ - list_add_tail(&crpc->crp_link, &trans->tas_rpcs_list); - crpc->crp_trans = trans; -} - -void -lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error) -{ - struct srpc_client_rpc *rpc; - struct lstcon_rpc *crpc; - struct lstcon_node *nd; - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - if (!crpc->crp_posted || /* not posted */ - crpc->crp_stamp) { /* rpc done or aborted already */ - if (!crpc->crp_stamp) { - crpc->crp_stamp = jiffies; - crpc->crp_status = -EINTR; - } - spin_unlock(&rpc->crpc_lock); - continue; - } - - crpc->crp_stamp = jiffies; - crpc->crp_status = error; - - spin_unlock(&rpc->crpc_lock); - - sfw_abort_rpc(rpc); - - if (error != -ETIMEDOUT) - continue; - - nd = crpc->crp_node; - if (time_after(nd->nd_stamp, crpc->crp_stamp)) - continue; - - nd->nd_stamp = crpc->crp_stamp; - nd->nd_state = LST_NODE_DOWN; - } -} - -static int -lstcon_rpc_trans_check(struct lstcon_rpc_trans *trans) -{ - if (console_session.ses_shutdown && - !list_empty(&trans->tas_olink)) /* Not an end session RPC */ - return 1; - - return !atomic_read(&trans->tas_remaining) ? 1 : 0; -} - -int -lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout) -{ - struct lstcon_rpc *crpc; - int rc; - - if (list_empty(&trans->tas_rpcs_list)) - return 0; - - if (timeout < LST_TRANS_MIN_TIMEOUT) - timeout = LST_TRANS_MIN_TIMEOUT; - - CDEBUG(D_NET, "Transaction %s started\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - /* post all requests */ - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - LASSERT(!crpc->crp_posted); - - lstcon_rpc_post(crpc); - } - - mutex_unlock(&console_session.ses_mutex); - - rc = wait_event_interruptible_timeout(trans->tas_waitq, - lstcon_rpc_trans_check(trans), - timeout * HZ); - rc = (rc > 0) ? 0 : ((rc < 0) ? -EINTR : -ETIMEDOUT); - - mutex_lock(&console_session.ses_mutex); - - if (console_session.ses_shutdown) - rc = -ESHUTDOWN; - - if (rc || atomic_read(&trans->tas_remaining)) { - /* treat short timeout as canceled */ - if (rc == -ETIMEDOUT && timeout < LST_TRANS_MIN_TIMEOUT * 2) - rc = -EINTR; - - lstcon_rpc_trans_abort(trans, rc); - } - - CDEBUG(D_NET, "Transaction %s stopped: %d\n", - lstcon_rpc_trans_name(trans->tas_opc), rc); - - lstcon_rpc_trans_stat(trans, lstcon_trans_stat()); - - return rc; -} - -static int -lstcon_rpc_get_reply(struct lstcon_rpc *crpc, struct srpc_msg **msgpp) -{ - struct lstcon_node *nd = crpc->crp_node; - struct srpc_client_rpc *rpc = crpc->crp_rpc; - struct srpc_generic_reply *rep; - - LASSERT(nd && rpc); - LASSERT(crpc->crp_stamp); - - if (crpc->crp_status) { - *msgpp = NULL; - return crpc->crp_status; - } - - *msgpp = &rpc->crpc_replymsg; - if (!crpc->crp_unpacked) { - sfw_unpack_message(*msgpp); - crpc->crp_unpacked = 1; - } - - if (time_after(nd->nd_stamp, crpc->crp_stamp)) - return 0; - - nd->nd_stamp = crpc->crp_stamp; - rep = &(*msgpp)->msg_body.reply; - - if (rep->sid.ses_nid == LNET_NID_ANY) - nd->nd_state = LST_NODE_UNKNOWN; - else if (lstcon_session_match(rep->sid)) - nd->nd_state = LST_NODE_ACTIVE; - else - nd->nd_state = LST_NODE_BUSY; - - return 0; -} - -void -lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, struct lstcon_trans_stat *stat) -{ - struct lstcon_rpc *crpc; - struct srpc_msg *rep; - int error; - - LASSERT(stat); - - memset(stat, 0, sizeof(*stat)); - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - lstcon_rpc_stat_total(stat, 1); - - LASSERT(crpc->crp_stamp); - - error = lstcon_rpc_get_reply(crpc, &rep); - if (error) { - lstcon_rpc_stat_failure(stat, 1); - if (!stat->trs_rpc_errno) - stat->trs_rpc_errno = -error; - - continue; - } - - lstcon_rpc_stat_success(stat, 1); - - lstcon_rpc_stat_reply(trans, rep, crpc->crp_node, stat); - } - - if (trans->tas_opc == LST_TRANS_SESNEW && !stat->trs_fwk_errno) { - stat->trs_fwk_errno = - lstcon_session_feats_check(trans->tas_features); - } - - CDEBUG(D_NET, "transaction %s : success %d, failure %d, total %d, RPC error(%d), Framework error(%d)\n", - lstcon_rpc_trans_name(trans->tas_opc), - lstcon_rpc_stat_success(stat, 0), - lstcon_rpc_stat_failure(stat, 0), - lstcon_rpc_stat_total(stat, 0), - stat->trs_rpc_errno, stat->trs_fwk_errno); -} - -int -lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, - struct list_head __user *head_up, - lstcon_rpc_readent_func_t readent) -{ - struct list_head tmp; - struct list_head __user *next; - struct lstcon_rpc_ent *ent; - struct srpc_generic_reply *rep; - struct lstcon_rpc *crpc; - struct srpc_msg *msg; - struct lstcon_node *nd; - long dur; - struct timeval tv; - int error; - - LASSERT(head_up); - - next = head_up; - - list_for_each_entry(crpc, &trans->tas_rpcs_list, crp_link) { - if (copy_from_user(&tmp, next, - sizeof(struct list_head))) - return -EFAULT; - - next = tmp.next; - if (next == head_up) - return 0; - - ent = list_entry(next, struct lstcon_rpc_ent, rpe_link); - - LASSERT(crpc->crp_stamp); - - error = lstcon_rpc_get_reply(crpc, &msg); - - nd = crpc->crp_node; - - dur = (long)(crpc->crp_stamp - - (unsigned long)console_session.ses_id.ses_stamp); - jiffies_to_timeval(dur, &tv); - - if (copy_to_user(&ent->rpe_peer, &nd->nd_id, - sizeof(struct lnet_process_id)) || - copy_to_user(&ent->rpe_stamp, &tv, sizeof(tv)) || - copy_to_user(&ent->rpe_state, &nd->nd_state, - sizeof(nd->nd_state)) || - copy_to_user(&ent->rpe_rpc_errno, &error, - sizeof(error))) - return -EFAULT; - - if (error) - continue; - - /* RPC is done */ - rep = (struct srpc_generic_reply *)&msg->msg_body.reply; - - if (copy_to_user(&ent->rpe_sid, &rep->sid, sizeof(rep->sid)) || - copy_to_user(&ent->rpe_fwk_errno, &rep->status, - sizeof(rep->status))) - return -EFAULT; - - if (!readent) - continue; - - error = readent(trans->tas_opc, msg, ent); - if (error) - return error; - } - - return 0; -} - -void -lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans) -{ - struct srpc_client_rpc *rpc; - struct lstcon_rpc *crpc; - struct lstcon_rpc *tmp; - int count = 0; - - list_for_each_entry_safe(crpc, tmp, &trans->tas_rpcs_list, crp_link) { - rpc = crpc->crp_rpc; - - spin_lock(&rpc->crpc_lock); - - /* free it if not posted or finished already */ - if (!crpc->crp_posted || crpc->crp_finished) { - spin_unlock(&rpc->crpc_lock); - - list_del_init(&crpc->crp_link); - lstcon_rpc_put(crpc); - - continue; - } - - /* - * rpcs can be still not callbacked (even LNetMDUnlink is - * called) because huge timeout for inaccessible network, - * don't make user wait for them, just abandon them, they - * will be recycled in callback - */ - LASSERT(crpc->crp_status); - - crpc->crp_node = NULL; - crpc->crp_trans = NULL; - list_del_init(&crpc->crp_link); - count++; - - spin_unlock(&rpc->crpc_lock); - - atomic_dec(&trans->tas_remaining); - } - - LASSERT(!atomic_read(&trans->tas_remaining)); - - list_del(&trans->tas_link); - if (!list_empty(&trans->tas_olink)) - list_del(&trans->tas_olink); - - CDEBUG(D_NET, "Transaction %s destroyed with %d pending RPCs\n", - lstcon_rpc_trans_name(trans->tas_opc), count); - - kfree(trans); -} - -int -lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned int feats, struct lstcon_rpc **crpc) -{ - struct srpc_mksn_reqst *msrq; - struct srpc_rmsn_reqst *rsrq; - int rc; - - switch (transop) { - case LST_TRANS_SESNEW: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_MAKE_SESSION, - feats, 0, 0, crpc); - if (rc) - return rc; - - msrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.mksn_reqst; - msrq->mksn_sid = console_session.ses_id; - msrq->mksn_force = console_session.ses_force; - strlcpy(msrq->mksn_name, console_session.ses_name, - sizeof(msrq->mksn_name)); - break; - - case LST_TRANS_SESEND: - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_REMOVE_SESSION, - feats, 0, 0, crpc); - if (rc) - return rc; - - rsrq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.rmsn_reqst; - rsrq->rmsn_sid = console_session.ses_id; - break; - - default: - LBUG(); - } - - return 0; -} - -int -lstcon_dbgrpc_prep(struct lstcon_node *nd, unsigned int feats, - struct lstcon_rpc **crpc) -{ - struct srpc_debug_reqst *drq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_DEBUG, feats, 0, 0, crpc); - if (rc) - return rc; - - drq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - return rc; -} - -int -lstcon_batrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, - struct lstcon_tsb_hdr *tsb, struct lstcon_rpc **crpc) -{ - struct lstcon_batch *batch; - struct srpc_batch_reqst *brq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_BATCH, feats, 0, 0, crpc); - if (rc) - return rc; - - brq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.bat_reqst; - - brq->bar_sid = console_session.ses_id; - brq->bar_bid = tsb->tsb_id; - brq->bar_testidx = tsb->tsb_index; - brq->bar_opc = transop == LST_TRANS_TSBRUN ? SRPC_BATCH_OPC_RUN : - (transop == LST_TRANS_TSBSTOP ? SRPC_BATCH_OPC_STOP : - SRPC_BATCH_OPC_QUERY); - - if (transop != LST_TRANS_TSBRUN && - transop != LST_TRANS_TSBSTOP) - return 0; - - LASSERT(!tsb->tsb_index); - - batch = (struct lstcon_batch *)tsb; - brq->bar_arg = batch->bat_arg; - - return 0; -} - -int -lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int feats, - struct lstcon_rpc **crpc) -{ - struct srpc_stat_reqst *srq; - int rc; - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_QUERY_STAT, feats, 0, 0, crpc); - if (rc) - return rc; - - srq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.stat_reqst; - - srq->str_sid = console_session.ses_id; - srq->str_type = 0; /* XXX remove it */ - - return 0; -} - -static struct lnet_process_id_packed * -lstcon_next_id(int idx, int nkiov, struct bio_vec *kiov) -{ - struct lnet_process_id_packed *pid; - int i; - - i = idx / SFW_ID_PER_PAGE; - - LASSERT(i < nkiov); - - pid = (struct lnet_process_id_packed *)page_address(kiov[i].bv_page); - - return &pid[idx % SFW_ID_PER_PAGE]; -} - -static int -lstcon_dstnodes_prep(struct lstcon_group *grp, int idx, - int dist, int span, int nkiov, struct bio_vec *kiov) -{ - struct lnet_process_id_packed *pid; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int start; - int end; - int i = 0; - - LASSERT(dist >= 1); - LASSERT(span >= 1); - LASSERT(grp->grp_nnode >= 1); - - if (span > grp->grp_nnode) - return -EINVAL; - - start = ((idx / dist) * span) % grp->grp_nnode; - end = ((idx / dist) * span + span - 1) % grp->grp_nnode; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - nd = ndl->ndl_node; - if (i < start) { - i++; - continue; - } - - if (i > (end >= start ? end : grp->grp_nnode)) - break; - - pid = lstcon_next_id((i - start), nkiov, kiov); - pid->nid = nd->nd_id.nid; - pid->pid = nd->nd_id.pid; - i++; - } - - if (start <= end) /* done */ - return 0; - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) { - if (i > grp->grp_nnode + end) - break; - - nd = ndl->ndl_node; - pid = lstcon_next_id((i - start), nkiov, kiov); - pid->nid = nd->nd_id.nid; - pid->pid = nd->nd_id.pid; - i++; - } - - return 0; -} - -static int -lstcon_pingrpc_prep(struct lst_test_ping_param *param, struct srpc_test_reqst *req) -{ - struct test_ping_req *prq = &req->tsr_u.ping; - - prq->png_size = param->png_size; - prq->png_flags = param->png_flags; - /* TODO dest */ - return 0; -} - -static int -lstcon_bulkrpc_v0_prep(struct lst_test_bulk_param *param, - struct srpc_test_reqst *req) -{ - struct test_bulk_req *brq = &req->tsr_u.bulk_v0; - - brq->blk_opc = param->blk_opc; - brq->blk_npg = DIV_ROUND_UP(param->blk_size, PAGE_SIZE); - brq->blk_flags = param->blk_flags; - - return 0; -} - -static int -lstcon_bulkrpc_v1_prep(struct lst_test_bulk_param *param, bool is_client, - struct srpc_test_reqst *req) -{ - struct test_bulk_req_v1 *brq = &req->tsr_u.bulk_v1; - - brq->blk_opc = param->blk_opc; - brq->blk_flags = param->blk_flags; - brq->blk_len = param->blk_size; - brq->blk_offset = is_client ? param->blk_cli_off : param->blk_srv_off; - - return 0; -} - -int -lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned int feats, - struct lstcon_test *test, struct lstcon_rpc **crpc) -{ - struct lstcon_group *sgrp = test->tes_src_grp; - struct lstcon_group *dgrp = test->tes_dst_grp; - struct srpc_test_reqst *trq; - struct srpc_bulk *bulk; - int i; - int npg = 0; - int nob = 0; - int rc = 0; - - if (transop == LST_TRANS_TSBCLIADD) { - npg = sfw_id_pages(test->tes_span); - nob = !(feats & LST_FEAT_BULK_LEN) ? - npg * PAGE_SIZE : - sizeof(struct lnet_process_id_packed) * test->tes_span; - } - - rc = lstcon_rpc_prep(nd, SRPC_SERVICE_TEST, feats, npg, nob, crpc); - if (rc) - return rc; - - trq = &(*crpc)->crp_rpc->crpc_reqstmsg.msg_body.tes_reqst; - - if (transop == LST_TRANS_TSBSRVADD) { - int ndist = DIV_ROUND_UP(sgrp->grp_nnode, test->tes_dist); - int nspan = DIV_ROUND_UP(dgrp->grp_nnode, test->tes_span); - int nmax = DIV_ROUND_UP(ndist, nspan); - - trq->tsr_ndest = 0; - trq->tsr_loop = nmax * test->tes_dist * test->tes_concur; - } else { - bulk = &(*crpc)->crp_rpc->crpc_bulk; - - for (i = 0; i < npg; i++) { - int len; - - LASSERT(nob > 0); - - len = !(feats & LST_FEAT_BULK_LEN) ? - PAGE_SIZE : - min_t(int, nob, PAGE_SIZE); - nob -= len; - - bulk->bk_iovs[i].bv_offset = 0; - bulk->bk_iovs[i].bv_len = len; - bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL); - - if (!bulk->bk_iovs[i].bv_page) { - lstcon_rpc_put(*crpc); - return -ENOMEM; - } - } - - bulk->bk_sink = 0; - - LASSERT(transop == LST_TRANS_TSBCLIADD); - - rc = lstcon_dstnodes_prep(test->tes_dst_grp, - test->tes_cliidx++, - test->tes_dist, - test->tes_span, - npg, &bulk->bk_iovs[0]); - if (rc) { - lstcon_rpc_put(*crpc); - return rc; - } - - trq->tsr_ndest = test->tes_span; - trq->tsr_loop = test->tes_loop; - } - - trq->tsr_sid = console_session.ses_id; - trq->tsr_bid = test->tes_hdr.tsb_id; - trq->tsr_concur = test->tes_concur; - trq->tsr_is_client = (transop == LST_TRANS_TSBCLIADD) ? 1 : 0; - trq->tsr_stop_onerr = !!test->tes_stop_onerr; - - switch (test->tes_type) { - case LST_TEST_PING: - trq->tsr_service = SRPC_SERVICE_PING; - rc = lstcon_pingrpc_prep((struct lst_test_ping_param *) - &test->tes_param[0], trq); - break; - - case LST_TEST_BULK: - trq->tsr_service = SRPC_SERVICE_BRW; - if (!(feats & LST_FEAT_BULK_LEN)) { - rc = lstcon_bulkrpc_v0_prep((struct lst_test_bulk_param *) - &test->tes_param[0], trq); - } else { - rc = lstcon_bulkrpc_v1_prep((struct lst_test_bulk_param *) - &test->tes_param[0], - trq->tsr_is_client, trq); - } - - break; - default: - LBUG(); - break; - } - - return rc; -} - -static int -lstcon_sesnew_stat_reply(struct lstcon_rpc_trans *trans, - struct lstcon_node *nd, struct srpc_msg *reply) -{ - struct srpc_mksn_reply *mksn_rep = &reply->msg_body.mksn_reply; - int status = mksn_rep->mksn_status; - - if (!status && - (reply->msg_ses_feats & ~LST_FEATS_MASK)) { - mksn_rep->mksn_status = EPROTO; - status = EPROTO; - } - - if (status == EPROTO) { - CNETERR("session protocol error from %s: %u\n", - libcfs_nid2str(nd->nd_id.nid), - reply->msg_ses_feats); - } - - if (status) - return status; - - if (!trans->tas_feats_updated) { - spin_lock(&console_session.ses_rpc_lock); - if (!trans->tas_feats_updated) { /* recheck with lock */ - trans->tas_feats_updated = 1; - trans->tas_features = reply->msg_ses_feats; - } - spin_unlock(&console_session.ses_rpc_lock); - } - - if (reply->msg_ses_feats != trans->tas_features) { - CNETERR("Framework features %x from %s is different with features on this transaction: %x\n", - reply->msg_ses_feats, libcfs_nid2str(nd->nd_id.nid), - trans->tas_features); - mksn_rep->mksn_status = EPROTO; - status = EPROTO; - } - - if (!status) { - /* session timeout on remote node */ - nd->nd_timeout = mksn_rep->mksn_timeout; - } - - return status; -} - -void -lstcon_rpc_stat_reply(struct lstcon_rpc_trans *trans, struct srpc_msg *msg, - struct lstcon_node *nd, struct lstcon_trans_stat *stat) -{ - struct srpc_rmsn_reply *rmsn_rep; - struct srpc_debug_reply *dbg_rep; - struct srpc_batch_reply *bat_rep; - struct srpc_test_reply *test_rep; - struct srpc_stat_reply *stat_rep; - int rc = 0; - - switch (trans->tas_opc) { - case LST_TRANS_SESNEW: - rc = lstcon_sesnew_stat_reply(trans, nd, msg); - if (!rc) { - lstcon_sesop_stat_success(stat, 1); - return; - } - - lstcon_sesop_stat_failure(stat, 1); - break; - - case LST_TRANS_SESEND: - rmsn_rep = &msg->msg_body.rmsn_reply; - /* ESRCH is not an error for end session */ - if (!rmsn_rep->rmsn_status || - rmsn_rep->rmsn_status == ESRCH) { - lstcon_sesop_stat_success(stat, 1); - return; - } - - lstcon_sesop_stat_failure(stat, 1); - rc = rmsn_rep->rmsn_status; - break; - - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - dbg_rep = &msg->msg_body.dbg_reply; - - if (dbg_rep->dbg_status == ESRCH) { - lstcon_sesqry_stat_unknown(stat, 1); - return; - } - - if (lstcon_session_match(dbg_rep->dbg_sid)) - lstcon_sesqry_stat_active(stat, 1); - else - lstcon_sesqry_stat_busy(stat, 1); - return; - - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - bat_rep = &msg->msg_body.bat_reply; - - if (!bat_rep->bar_status) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - if (bat_rep->bar_status == EPERM && - trans->tas_opc == LST_TRANS_TSBSTOP) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - rc = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - bat_rep = &msg->msg_body.bat_reply; - - if (bat_rep->bar_active) - lstcon_tsbqry_stat_run(stat, 1); - else - lstcon_tsbqry_stat_idle(stat, 1); - - if (!bat_rep->bar_status) - return; - - lstcon_tsbqry_stat_failure(stat, 1); - rc = bat_rep->bar_status; - break; - - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - test_rep = &msg->msg_body.tes_reply; - - if (!test_rep->tsr_status) { - lstcon_tsbop_stat_success(stat, 1); - return; - } - - lstcon_tsbop_stat_failure(stat, 1); - rc = test_rep->tsr_status; - break; - - case LST_TRANS_STATQRY: - stat_rep = &msg->msg_body.stat_reply; - - if (!stat_rep->str_status) { - lstcon_statqry_stat_success(stat, 1); - return; - } - - lstcon_statqry_stat_failure(stat, 1); - rc = stat_rep->str_status; - break; - - default: - LBUG(); - } - - if (!stat->trs_fwk_errno) - stat->trs_fwk_errno = rc; -} - -int -lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - struct lstcon_rpc_trans **transpp) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - struct lstcon_rpc *rpc; - unsigned int feats; - int rc; - - /* Creating session RPG for list of nodes */ - - rc = lstcon_rpc_trans_prep(translist, transop, &trans); - if (rc) { - CERROR("Can't create transaction %d: %d\n", transop, rc); - return rc; - } - - feats = trans->tas_features; - list_for_each_entry(ndl, ndlist, ndl_link) { - rc = !condition ? 1 : - condition(transop, ndl->ndl_node, arg); - - if (!rc) - continue; - - if (rc < 0) { - CDEBUG(D_NET, "Condition error while creating RPC for transaction %d: %d\n", - transop, rc); - break; - } - - nd = ndl->ndl_node; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - rc = lstcon_sesrpc_prep(nd, transop, feats, &rpc); - break; - case LST_TRANS_SESQRY: - case LST_TRANS_SESPING: - rc = lstcon_dbgrpc_prep(nd, feats, &rpc); - break; - case LST_TRANS_TSBCLIADD: - case LST_TRANS_TSBSRVADD: - rc = lstcon_testrpc_prep(nd, transop, feats, - (struct lstcon_test *)arg, - &rpc); - break; - case LST_TRANS_TSBRUN: - case LST_TRANS_TSBSTOP: - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - rc = lstcon_batrpc_prep(nd, transop, feats, - (struct lstcon_tsb_hdr *)arg, - &rpc); - break; - case LST_TRANS_STATQRY: - rc = lstcon_statrpc_prep(nd, feats, &rpc); - break; - default: - rc = -EINVAL; - break; - } - - if (rc) { - CERROR("Failed to create RPC for transaction %s: %d\n", - lstcon_rpc_trans_name(transop), rc); - break; - } - - lstcon_rpc_trans_addreq(trans, rpc); - } - - if (!rc) { - *transpp = trans; - return 0; - } - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -static void -lstcon_rpc_pinger(void *arg) -{ - struct stt_timer *ptimer = (struct stt_timer *)arg; - struct lstcon_rpc_trans *trans; - struct lstcon_rpc *crpc; - struct srpc_msg *rep; - struct srpc_debug_reqst *drq; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int intv; - int count = 0; - int rc; - - /* - * RPC pinger is a special case of transaction, - * it's called by timer at 8 seconds interval. - */ - mutex_lock(&console_session.ses_mutex); - - if (console_session.ses_shutdown || console_session.ses_expired) { - mutex_unlock(&console_session.ses_mutex); - return; - } - - if (!console_session.ses_expired && - ktime_get_real_seconds() - console_session.ses_laststamp > - (time64_t)console_session.ses_timeout) - console_session.ses_expired = 1; - - trans = console_session.ses_ping; - - LASSERT(trans); - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) { - nd = ndl->ndl_node; - - if (console_session.ses_expired) { - /* idle console, end session on all nodes */ - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - rc = lstcon_sesrpc_prep(nd, LST_TRANS_SESEND, - trans->tas_features, &crpc); - if (rc) { - CERROR("Out of memory\n"); - break; - } - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - continue; - } - - crpc = &nd->nd_ping; - - if (crpc->crp_rpc) { - LASSERT(crpc->crp_trans == trans); - LASSERT(!list_empty(&crpc->crp_link)); - - spin_lock(&crpc->crp_rpc->crpc_lock); - - LASSERT(crpc->crp_posted); - - if (!crpc->crp_finished) { - /* in flight */ - spin_unlock(&crpc->crp_rpc->crpc_lock); - continue; - } - - spin_unlock(&crpc->crp_rpc->crpc_lock); - - lstcon_rpc_get_reply(crpc, &rep); - - list_del_init(&crpc->crp_link); - - lstcon_rpc_put(crpc); - } - - if (nd->nd_state != LST_NODE_ACTIVE) - continue; - - intv = (jiffies - nd->nd_stamp) / msecs_to_jiffies(MSEC_PER_SEC); - if (intv < nd->nd_timeout / 2) - continue; - - rc = lstcon_rpc_init(nd, SRPC_SERVICE_DEBUG, - trans->tas_features, 0, 0, 1, crpc); - if (rc) { - CERROR("Out of memory\n"); - break; - } - - drq = &crpc->crp_rpc->crpc_reqstmsg.msg_body.dbg_reqst; - - drq->dbg_sid = console_session.ses_id; - drq->dbg_flags = 0; - - lstcon_rpc_trans_addreq(trans, crpc); - lstcon_rpc_post(crpc); - - count++; - } - - if (console_session.ses_expired) { - mutex_unlock(&console_session.ses_mutex); - return; - } - - CDEBUG(D_NET, "Ping %d nodes in session\n", count); - - ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL; - stt_add_timer(ptimer); - - mutex_unlock(&console_session.ses_mutex); -} - -int -lstcon_rpc_pinger_start(void) -{ - struct stt_timer *ptimer; - int rc; - - LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(!atomic_read(&console_session.ses_rpc_counter)); - - rc = lstcon_rpc_trans_prep(NULL, LST_TRANS_SESPING, - &console_session.ses_ping); - if (rc) { - CERROR("Failed to create console pinger\n"); - return rc; - } - - ptimer = &console_session.ses_ping_timer; - ptimer->stt_expires = ktime_get_real_seconds() + LST_PING_INTERVAL; - - stt_add_timer(ptimer); - - return 0; -} - -void -lstcon_rpc_pinger_stop(void) -{ - LASSERT(console_session.ses_shutdown); - - stt_del_timer(&console_session.ses_ping_timer); - - lstcon_rpc_trans_abort(console_session.ses_ping, -ESHUTDOWN); - lstcon_rpc_trans_stat(console_session.ses_ping, lstcon_trans_stat()); - lstcon_rpc_trans_destroy(console_session.ses_ping); - - memset(lstcon_trans_stat(), 0, sizeof(struct lstcon_trans_stat)); - - console_session.ses_ping = NULL; -} - -void -lstcon_rpc_cleanup_wait(void) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_rpc *crpc; - struct lstcon_rpc *temp; - struct list_head *pacer; - struct list_head zlist; - - /* Called with hold of global mutex */ - - LASSERT(console_session.ses_shutdown); - - while (!list_empty(&console_session.ses_trans_list)) { - list_for_each(pacer, &console_session.ses_trans_list) { - trans = list_entry(pacer, struct lstcon_rpc_trans, - tas_link); - - CDEBUG(D_NET, "Session closed, wakeup transaction %s\n", - lstcon_rpc_trans_name(trans->tas_opc)); - - wake_up(&trans->tas_waitq); - } - - mutex_unlock(&console_session.ses_mutex); - - CWARN("Session is shutting down, waiting for termination of transactions\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - - mutex_lock(&console_session.ses_mutex); - } - - spin_lock(&console_session.ses_rpc_lock); - - lst_wait_until(!atomic_read(&console_session.ses_rpc_counter), - console_session.ses_rpc_lock, - "Network is not accessible or target is down, waiting for %d console RPCs to being recycled\n", - atomic_read(&console_session.ses_rpc_counter)); - - list_add(&zlist, &console_session.ses_rpc_freelist); - list_del_init(&console_session.ses_rpc_freelist); - - spin_unlock(&console_session.ses_rpc_lock); - - list_for_each_entry_safe(crpc, temp, &zlist, crp_link) { - list_del(&crpc->crp_link); - kfree(crpc); - } -} - -int -lstcon_rpc_module_init(void) -{ - INIT_LIST_HEAD(&console_session.ses_ping_timer.stt_list); - console_session.ses_ping_timer.stt_func = lstcon_rpc_pinger; - console_session.ses_ping_timer.stt_data = &console_session.ses_ping_timer; - - console_session.ses_ping = NULL; - - spin_lock_init(&console_session.ses_rpc_lock); - atomic_set(&console_session.ses_rpc_counter, 0); - INIT_LIST_HEAD(&console_session.ses_rpc_freelist); - - return 0; -} - -void -lstcon_rpc_module_fini(void) -{ - LASSERT(list_empty(&console_session.ses_rpc_freelist)); - LASSERT(!atomic_read(&console_session.ses_rpc_counter)); -} diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.h b/drivers/staging/lustre/lnet/selftest/conrpc.h deleted file mode 100644 index ce2f92d04838..000000000000 --- a/drivers/staging/lustre/lnet/selftest/conrpc.h +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * /lnet/selftest/conrpc.h - * - * Console rpc - * - * Author: Liang Zhen - */ - -#ifndef __LST_CONRPC_H__ -#define __LST_CONRPC_H__ - -#include -#include -#include "rpc.h" -#include "selftest.h" - -/* Console rpc and rpc transaction */ -#define LST_TRANS_TIMEOUT 30 -#define LST_TRANS_MIN_TIMEOUT 3 - -#define LST_VALIDATE_TIMEOUT(t) min(max(t, LST_TRANS_MIN_TIMEOUT), LST_TRANS_TIMEOUT) - -#define LST_PING_INTERVAL 8 - -struct lstcon_rpc_trans; -struct lstcon_tsb_hdr; -struct lstcon_test; -struct lstcon_node; - -struct lstcon_rpc { - struct list_head crp_link; /* chain on rpc transaction */ - struct srpc_client_rpc *crp_rpc; /* client rpc */ - struct lstcon_node *crp_node; /* destination node */ - struct lstcon_rpc_trans *crp_trans; /* conrpc transaction */ - - unsigned int crp_posted:1; /* rpc is posted */ - unsigned int crp_finished:1; /* rpc is finished */ - unsigned int crp_unpacked:1; /* reply is unpacked */ - /** RPC is embedded in other structure and can't free it */ - unsigned int crp_embedded:1; - int crp_status; /* console rpc errors */ - unsigned long crp_stamp; /* replied time stamp */ -}; - -struct lstcon_rpc_trans { - struct list_head tas_olink; /* link chain on owner list */ - struct list_head tas_link; /* link chain on global list */ - int tas_opc; /* operation code of transaction */ - unsigned int tas_feats_updated; /* features mask is uptodate */ - unsigned int tas_features; /* test features mask */ - wait_queue_head_t tas_waitq; /* wait queue head */ - atomic_t tas_remaining; /* # of un-scheduled rpcs */ - struct list_head tas_rpcs_list; /* queued requests */ -}; - -#define LST_TRANS_PRIVATE 0x1000 - -#define LST_TRANS_SESNEW (LST_TRANS_PRIVATE | 0x01) -#define LST_TRANS_SESEND (LST_TRANS_PRIVATE | 0x02) -#define LST_TRANS_SESQRY 0x03 -#define LST_TRANS_SESPING 0x04 - -#define LST_TRANS_TSBCLIADD (LST_TRANS_PRIVATE | 0x11) -#define LST_TRANS_TSBSRVADD (LST_TRANS_PRIVATE | 0x12) -#define LST_TRANS_TSBRUN (LST_TRANS_PRIVATE | 0x13) -#define LST_TRANS_TSBSTOP (LST_TRANS_PRIVATE | 0x14) -#define LST_TRANS_TSBCLIQRY 0x15 -#define LST_TRANS_TSBSRVQRY 0x16 - -#define LST_TRANS_STATQRY 0x21 - -typedef int (*lstcon_rpc_cond_func_t)(int, struct lstcon_node *, void *); -typedef int (*lstcon_rpc_readent_func_t)(int, struct srpc_msg *, - struct lstcon_rpc_ent __user *); - -int lstcon_sesrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_rpc **crpc); -int lstcon_dbgrpc_prep(struct lstcon_node *nd, - unsigned int version, struct lstcon_rpc **crpc); -int lstcon_batrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_tsb_hdr *tsb, - struct lstcon_rpc **crpc); -int lstcon_testrpc_prep(struct lstcon_node *nd, int transop, - unsigned int version, struct lstcon_test *test, - struct lstcon_rpc **crpc); -int lstcon_statrpc_prep(struct lstcon_node *nd, unsigned int version, - struct lstcon_rpc **crpc); -void lstcon_rpc_put(struct lstcon_rpc *crpc); -int lstcon_rpc_trans_prep(struct list_head *translist, - int transop, struct lstcon_rpc_trans **transpp); -int lstcon_rpc_trans_ndlist(struct list_head *ndlist, - struct list_head *translist, int transop, - void *arg, lstcon_rpc_cond_func_t condition, - struct lstcon_rpc_trans **transpp); -void lstcon_rpc_trans_stat(struct lstcon_rpc_trans *trans, - struct lstcon_trans_stat *stat); -int lstcon_rpc_trans_interpreter(struct lstcon_rpc_trans *trans, - struct list_head __user *head_up, - lstcon_rpc_readent_func_t readent); -void lstcon_rpc_trans_abort(struct lstcon_rpc_trans *trans, int error); -void lstcon_rpc_trans_destroy(struct lstcon_rpc_trans *trans); -void lstcon_rpc_trans_addreq(struct lstcon_rpc_trans *trans, - struct lstcon_rpc *req); -int lstcon_rpc_trans_postwait(struct lstcon_rpc_trans *trans, int timeout); -int lstcon_rpc_pinger_start(void); -void lstcon_rpc_pinger_stop(void); -void lstcon_rpc_cleanup_wait(void); -int lstcon_rpc_module_init(void); -void lstcon_rpc_module_fini(void); - -#endif diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c deleted file mode 100644 index 3c1c1b5997e0..000000000000 --- a/drivers/staging/lustre/lnet/selftest/console.c +++ /dev/null @@ -1,2104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Infrastructure of LST console - * - * Author: Liang Zhen - */ - -#include -#include "console.h" -#include "conrpc.h" - -#define LST_NODE_STATE_COUNTER(nd, p) \ -do { \ - if ((nd)->nd_state == LST_NODE_ACTIVE) \ - (p)->nle_nactive++; \ - else if ((nd)->nd_state == LST_NODE_BUSY) \ - (p)->nle_nbusy++; \ - else if ((nd)->nd_state == LST_NODE_DOWN) \ - (p)->nle_ndown++; \ - else \ - (p)->nle_nunknown++; \ - (p)->nle_nnode++; \ -} while (0) - -struct lstcon_session console_session; - -static void -lstcon_node_get(struct lstcon_node *nd) -{ - LASSERT(nd->nd_ref >= 1); - - nd->nd_ref++; -} - -static int -lstcon_node_find(struct lnet_process_id id, struct lstcon_node **ndpp, - int create) -{ - struct lstcon_ndlink *ndl; - unsigned int idx = LNET_NIDADDR(id.nid) % LST_GLOBAL_HASHSIZE; - - LASSERT(id.nid != LNET_NID_ANY); - - list_for_each_entry(ndl, &console_session.ses_ndl_hash[idx], - ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - lstcon_node_get(ndl->ndl_node); - *ndpp = ndl->ndl_node; - return 0; - } - - if (!create) - return -ENOENT; - - *ndpp = kzalloc(sizeof(**ndpp) + sizeof(*ndl), GFP_KERNEL); - if (!*ndpp) - return -ENOMEM; - - ndl = (struct lstcon_ndlink *)(*ndpp + 1); - - ndl->ndl_node = *ndpp; - - ndl->ndl_node->nd_ref = 1; - ndl->ndl_node->nd_id = id; - ndl->ndl_node->nd_stamp = jiffies; - ndl->ndl_node->nd_state = LST_NODE_UNKNOWN; - ndl->ndl_node->nd_timeout = 0; - memset(&ndl->ndl_node->nd_ping, 0, sizeof(struct lstcon_rpc)); - - /* - * queued in global hash & list, no refcount is taken by - * global hash & list, if caller release his refcount, - * node will be released - */ - list_add_tail(&ndl->ndl_hlink, &console_session.ses_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &console_session.ses_ndl_list); - - return 0; -} - -static void -lstcon_node_put(struct lstcon_node *nd) -{ - struct lstcon_ndlink *ndl; - - LASSERT(nd->nd_ref > 0); - - if (--nd->nd_ref > 0) - return; - - ndl = (struct lstcon_ndlink *)(nd + 1); - - LASSERT(!list_empty(&ndl->ndl_link)); - LASSERT(!list_empty(&ndl->ndl_hlink)); - - /* remove from session */ - list_del(&ndl->ndl_link); - list_del(&ndl->ndl_hlink); - - kfree(nd); -} - -static int -lstcon_ndlink_find(struct list_head *hash, struct lnet_process_id id, - struct lstcon_ndlink **ndlpp, int create) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int rc; - - if (id.nid == LNET_NID_ANY) - return -EINVAL; - - /* search in hash */ - list_for_each_entry(ndl, &hash[idx], ndl_hlink) { - if (ndl->ndl_node->nd_id.nid != id.nid || - ndl->ndl_node->nd_id.pid != id.pid) - continue; - - *ndlpp = ndl; - return 0; - } - - if (!create) - return -ENOENT; - - /* find or create in session hash */ - rc = lstcon_node_find(id, &nd, (create == 1) ? 1 : 0); - if (rc) - return rc; - - ndl = kzalloc(sizeof(struct lstcon_ndlink), GFP_NOFS); - if (!ndl) { - lstcon_node_put(nd); - return -ENOMEM; - } - - *ndlpp = ndl; - - ndl->ndl_node = nd; - INIT_LIST_HEAD(&ndl->ndl_link); - list_add_tail(&ndl->ndl_hlink, &hash[idx]); - - return 0; -} - -static void -lstcon_ndlink_release(struct lstcon_ndlink *ndl) -{ - LASSERT(list_empty(&ndl->ndl_link)); - LASSERT(!list_empty(&ndl->ndl_hlink)); - - list_del(&ndl->ndl_hlink); /* delete from hash */ - lstcon_node_put(ndl->ndl_node); - - kfree(ndl); -} - -static int -lstcon_group_alloc(char *name, struct lstcon_group **grpp) -{ - struct lstcon_group *grp; - int i; - - grp = kmalloc(offsetof(struct lstcon_group, - grp_ndl_hash[LST_NODE_HASHSIZE]), - GFP_KERNEL); - if (!grp) - return -ENOMEM; - - grp->grp_ref = 1; - if (name) { - if (strlen(name) > sizeof(grp->grp_name) - 1) { - kfree(grp); - return -E2BIG; - } - strncpy(grp->grp_name, name, sizeof(grp->grp_name)); - } - - INIT_LIST_HEAD(&grp->grp_link); - INIT_LIST_HEAD(&grp->grp_ndl_list); - INIT_LIST_HEAD(&grp->grp_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - INIT_LIST_HEAD(&grp->grp_ndl_hash[i]); - - *grpp = grp; - - return 0; -} - -static void -lstcon_group_addref(struct lstcon_group *grp) -{ - grp->grp_ref++; -} - -static void lstcon_group_ndlink_release(struct lstcon_group *, - struct lstcon_ndlink *); - -static void -lstcon_group_drain(struct lstcon_group *grp, int keep) -{ - struct lstcon_ndlink *ndl; - struct lstcon_ndlink *tmp; - - list_for_each_entry_safe(ndl, tmp, &grp->grp_ndl_list, ndl_link) { - if (!(ndl->ndl_node->nd_state & keep)) - lstcon_group_ndlink_release(grp, ndl); - } -} - -static void -lstcon_group_decref(struct lstcon_group *grp) -{ - int i; - - if (--grp->grp_ref > 0) - return; - - if (!list_empty(&grp->grp_link)) - list_del(&grp->grp_link); - - lstcon_group_drain(grp, 0); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - LASSERT(list_empty(&grp->grp_ndl_hash[i])); - - kfree(grp); -} - -static int -lstcon_group_find(const char *name, struct lstcon_group **grpp) -{ - struct lstcon_group *grp; - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (strncmp(grp->grp_name, name, LST_NAME_SIZE)) - continue; - - lstcon_group_addref(grp); /* +1 ref for caller */ - *grpp = grp; - return 0; - } - - return -ENOENT; -} - -static int -lstcon_group_ndlink_find(struct lstcon_group *grp, struct lnet_process_id id, - struct lstcon_ndlink **ndlpp, int create) -{ - int rc; - - rc = lstcon_ndlink_find(&grp->grp_ndl_hash[0], id, ndlpp, create); - if (rc) - return rc; - - if (!list_empty(&(*ndlpp)->ndl_link)) - return 0; - - list_add_tail(&(*ndlpp)->ndl_link, &grp->grp_ndl_list); - grp->grp_nnode++; - - return 0; -} - -static void -lstcon_group_ndlink_release(struct lstcon_group *grp, struct lstcon_ndlink *ndl) -{ - list_del_init(&ndl->ndl_link); - lstcon_ndlink_release(ndl); - grp->grp_nnode--; -} - -static void -lstcon_group_ndlink_move(struct lstcon_group *old, - struct lstcon_group *new, struct lstcon_ndlink *ndl) -{ - unsigned int idx = LNET_NIDADDR(ndl->ndl_node->nd_id.nid) % - LST_NODE_HASHSIZE; - - list_del(&ndl->ndl_hlink); - list_del(&ndl->ndl_link); - old->grp_nnode--; - - list_add_tail(&ndl->ndl_hlink, &new->grp_ndl_hash[idx]); - list_add_tail(&ndl->ndl_link, &new->grp_ndl_list); - new->grp_nnode++; -} - -static void -lstcon_group_move(struct lstcon_group *old, struct lstcon_group *new) -{ - struct lstcon_ndlink *ndl; - - while (!list_empty(&old->grp_ndl_list)) { - ndl = list_entry(old->grp_ndl_list.next, - struct lstcon_ndlink, ndl_link); - lstcon_group_ndlink_move(old, new, ndl); - } -} - -static int -lstcon_sesrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - struct lstcon_group *grp = (struct lstcon_group *)arg; - - switch (transop) { - case LST_TRANS_SESNEW: - if (nd->nd_state == LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_SESEND: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - - if (grp && nd->nd_ref > 1) - return 0; - break; - - case LST_TRANS_SESQRY: - break; - - default: - LBUG(); - } - - return 1; -} - -static int -lstcon_sesrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_debug_reply *rep; - - switch (transop) { - case LST_TRANS_SESNEW: - case LST_TRANS_SESEND: - return 0; - - case LST_TRANS_SESQRY: - rep = &msg->msg_body.dbg_reply; - - if (copy_to_user(&ent_up->rpe_priv[0], - &rep->dbg_timeout, sizeof(int)) || - copy_to_user(&ent_up->rpe_payload[0], - &rep->dbg_name, LST_NAME_SIZE)) - return -EFAULT; - - return 0; - - default: - LBUG(); - } - - return 0; -} - -static int -lstcon_group_nodes_add(struct lstcon_group *grp, - int count, struct lnet_process_id __user *ids_up, - unsigned int *featp, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* skip if it's in this group already */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 0); - if (!rc) - continue; - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 1); - if (rc) { - CERROR("Can't create ndlink, out of memory\n"); - break; - } - } - - if (rc) { - lstcon_group_decref(tmp); - return rc; - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESNEW, - tmp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_decref(tmp); - return rc; - } - - /* post all RPCs */ - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - *featp = trans->tas_features; - - /* destroy all RPGs */ - lstcon_rpc_trans_destroy(trans); - - lstcon_group_move(tmp, grp); - lstcon_group_decref(tmp); - - return rc; -} - -static int -lstcon_group_nodes_remove(struct lstcon_group *grp, - int count, struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int rc; - int i; - - /* End session and remove node from the group */ - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - goto error; - } - - /* move node to tmp group */ - if (!lstcon_group_ndlink_find(grp, id, &ndl, 0)) - lstcon_group_ndlink_move(grp, tmp, ndl); - } - - rc = lstcon_rpc_trans_ndlist(&tmp->grp_ndl_list, - &tmp->grp_trans_list, LST_TRANS_SESEND, - tmp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - goto error; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* release nodes anyway, because we can't rollback status */ - lstcon_group_decref(tmp); - - return rc; -error: - lstcon_group_move(tmp, grp); - lstcon_group_decref(tmp); - - return rc; -} - -int -lstcon_group_add(char *name) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp) ? 0 : -EEXIST; - if (rc) { - /* find a group with same name */ - lstcon_group_decref(grp); - return rc; - } - - rc = lstcon_group_alloc(name, &grp); - if (rc) { - CERROR("Can't allocate descriptor for group %s\n", name); - return -ENOMEM; - } - - list_add_tail(&grp->grp_link, &console_session.ses_grp_list); - - return rc; -} - -int -lstcon_nodes_add(char *name, int count, struct lnet_process_id __user *ids_up, - unsigned int *featp, struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - LASSERT(count > 0); - LASSERT(ids_up); - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by other threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - - return -EBUSY; - } - - rc = lstcon_group_nodes_add(grp, count, ids_up, featp, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_del(char *name) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by others threads or test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESEND, - grp, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - lstcon_group_decref(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - - lstcon_group_decref(grp); - /* - * -ref for session, it's destroyed, - * status can't be rolled back, destroy group anyway - */ - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_clean(char *name, int args) -{ - struct lstcon_group *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - args = (LST_NODE_ACTIVE | LST_NODE_BUSY | - LST_NODE_DOWN | LST_NODE_UNKNOWN) & ~args; - - lstcon_group_drain(grp, args); - - lstcon_group_decref(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_decref(grp); - - return 0; -} - -int -lstcon_nodes_remove(char *name, int count, - struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lstcon_group *grp = NULL; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - rc = lstcon_group_nodes_remove(grp, count, ids_up, result_up); - - lstcon_group_decref(grp); - /* release empty group */ - if (list_empty(&grp->grp_ndl_list)) - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_refresh(char *name, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group: %s\n", name); - return rc; - } - - if (grp->grp_ref > 2) { - /* referred by test */ - CDEBUG(D_NET, "Group %s is busy\n", name); - lstcon_group_decref(grp); - return -EBUSY; - } - - /* re-invite all inactive nodes int the group */ - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &grp->grp_trans_list, LST_TRANS_SESNEW, - grp, lstcon_sesrpc_condition, &trans); - if (rc) { - /* local error, return */ - CDEBUG(D_NET, "Can't create transaction: %d\n", rc); - lstcon_group_decref(grp); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* -ref for me */ - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_group_list(int index, int len, char __user *name_up) -{ - struct lstcon_group *grp; - - LASSERT(index >= 0); - LASSERT(name_up); - - list_for_each_entry(grp, &console_session.ses_grp_list, grp_link) { - if (!index--) { - return copy_to_user(name_up, grp->grp_name, len) ? - -EFAULT : 0; - } - } - - return -ENOENT; -} - -static int -lstcon_nodes_getent(struct list_head *head, int *index_p, - int *count_p, struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_ndlink *ndl; - struct lstcon_node *nd; - int count = 0; - int index = 0; - - LASSERT(index_p && count_p); - LASSERT(dents_up); - LASSERT(*index_p >= 0); - LASSERT(*count_p > 0); - - list_for_each_entry(ndl, head, ndl_link) { - if (index++ < *index_p) - continue; - - if (count >= *count_p) - break; - - nd = ndl->ndl_node; - if (copy_to_user(&dents_up[count].nde_id, - &nd->nd_id, sizeof(nd->nd_id)) || - copy_to_user(&dents_up[count].nde_state, - &nd->nd_state, sizeof(nd->nd_state))) - return -EFAULT; - - count++; - } - - if (index <= *index_p) - return -ENOENT; - - *count_p = count; - *index_p = index; - - return 0; -} - -int -lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gents_p, - int *index_p, int *count_p, - struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_ndlist_ent *gentp; - struct lstcon_group *grp; - struct lstcon_ndlink *ndl; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", name); - return rc; - } - - if (dents_up) { - /* verbose query */ - rc = lstcon_nodes_getent(&grp->grp_ndl_list, - index_p, count_p, dents_up); - lstcon_group_decref(grp); - - return rc; - } - - /* non-verbose query */ - gentp = kzalloc(sizeof(struct lstcon_ndlist_ent), GFP_NOFS); - if (!gentp) { - CERROR("Can't allocate ndlist_ent\n"); - lstcon_group_decref(grp); - - return -ENOMEM; - } - - list_for_each_entry(ndl, &grp->grp_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, gentp); - - rc = copy_to_user(gents_p, gentp, - sizeof(struct lstcon_ndlist_ent)) ? -EFAULT : 0; - - kfree(gentp); - - lstcon_group_decref(grp); - - return rc; -} - -static int -lstcon_batch_find(const char *name, struct lstcon_batch **batpp) -{ - struct lstcon_batch *bat; - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (!strncmp(bat->bat_name, name, LST_NAME_SIZE)) { - *batpp = bat; - return 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_add(char *name) -{ - struct lstcon_batch *bat; - int i; - int rc; - - rc = !lstcon_batch_find(name, &bat) ? -EEXIST : 0; - if (rc) { - CDEBUG(D_NET, "Batch %s already exists\n", name); - return rc; - } - - bat = kzalloc(sizeof(struct lstcon_batch), GFP_NOFS); - if (!bat) { - CERROR("Can't allocate descriptor for batch %s\n", name); - return -ENOMEM; - } - - bat->bat_cli_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE, - GFP_KERNEL); - if (!bat->bat_cli_hash) { - CERROR("Can't allocate hash for batch %s\n", name); - kfree(bat); - - return -ENOMEM; - } - - bat->bat_srv_hash = kmalloc(sizeof(struct list_head) * LST_NODE_HASHSIZE, - GFP_KERNEL); - if (!bat->bat_srv_hash) { - CERROR("Can't allocate hash for batch %s\n", name); - kfree(bat->bat_cli_hash); - kfree(bat); - - return -ENOMEM; - } - - if (strlen(name) > sizeof(bat->bat_name) - 1) { - kfree(bat->bat_srv_hash); - kfree(bat->bat_cli_hash); - kfree(bat); - return -E2BIG; - } - strncpy(bat->bat_name, name, sizeof(bat->bat_name)); - bat->bat_hdr.tsb_index = 0; - bat->bat_hdr.tsb_id.bat_id = ++console_session.ses_id_cookie; - - bat->bat_ntest = 0; - bat->bat_state = LST_BATCH_IDLE; - - INIT_LIST_HEAD(&bat->bat_cli_list); - INIT_LIST_HEAD(&bat->bat_srv_list); - INIT_LIST_HEAD(&bat->bat_test_list); - INIT_LIST_HEAD(&bat->bat_trans_list); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - INIT_LIST_HEAD(&bat->bat_cli_hash[i]); - INIT_LIST_HEAD(&bat->bat_srv_hash[i]); - } - - list_add_tail(&bat->bat_link, &console_session.ses_bat_list); - - return rc; -} - -int -lstcon_batch_list(int index, int len, char __user *name_up) -{ - struct lstcon_batch *bat; - - LASSERT(name_up); - LASSERT(index >= 0); - - list_for_each_entry(bat, &console_session.ses_bat_list, bat_link) { - if (!index--) { - return copy_to_user(name_up, bat->bat_name, len) ? - -EFAULT : 0; - } - } - - return -ENOENT; -} - -int -lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up, - int server, int testidx, int *index_p, int *ndent_p, - struct lstcon_node_ent __user *dents_up) -{ - struct lstcon_test_batch_ent *entp; - struct list_head *clilst; - struct list_head *srvlst; - struct lstcon_test *test = NULL; - struct lstcon_batch *bat; - struct lstcon_ndlink *ndl; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - if (testidx > 0) { - /* query test, test index start from 1 */ - list_for_each_entry(test, &bat->bat_test_list, tes_link) { - if (testidx-- == 1) - break; - } - - if (testidx > 0) { - CDEBUG(D_NET, "Can't find specified test in batch\n"); - return -ENOENT; - } - } - - clilst = !test ? &bat->bat_cli_list : - &test->tes_src_grp->grp_ndl_list; - srvlst = !test ? &bat->bat_srv_list : - &test->tes_dst_grp->grp_ndl_list; - - if (dents_up) { - rc = lstcon_nodes_getent((server ? srvlst : clilst), - index_p, ndent_p, dents_up); - return rc; - } - - /* non-verbose query */ - entp = kzalloc(sizeof(struct lstcon_test_batch_ent), GFP_NOFS); - if (!entp) - return -ENOMEM; - - if (!test) { - entp->u.tbe_batch.bae_ntest = bat->bat_ntest; - entp->u.tbe_batch.bae_state = bat->bat_state; - } else { - entp->u.tbe_test.tse_type = test->tes_type; - entp->u.tbe_test.tse_loop = test->tes_loop; - entp->u.tbe_test.tse_concur = test->tes_concur; - } - - list_for_each_entry(ndl, clilst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_cli_nle); - - list_for_each_entry(ndl, srvlst, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, &entp->tbe_srv_nle); - - rc = copy_to_user(ent_up, entp, - sizeof(struct lstcon_test_batch_ent)) ? -EFAULT : 0; - - kfree(entp); - - return rc; -} - -static int -lstcon_batrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - switch (transop) { - case LST_TRANS_TSBRUN: - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - break; - - case LST_TRANS_TSBSTOP: - if (nd->nd_state != LST_NODE_ACTIVE) - return 0; - break; - - case LST_TRANS_TSBCLIQRY: - case LST_TRANS_TSBSRVQRY: - break; - } - - return 1; -} - -static int -lstcon_batch_op(struct lstcon_batch *bat, int transop, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(&bat->bat_cli_list, - &bat->bat_trans_list, transop, - bat, lstcon_batrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_batch_run(char *name, int timeout, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - if (lstcon_batch_find(name, &bat)) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = timeout; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBRUN, result_up); - - /* mark batch as running if it's started in any node */ - if (lstcon_tsbop_stat_success(lstcon_trans_stat(), 0)) - bat->bat_state = LST_BATCH_RUNNING; - - return rc; -} - -int -lstcon_batch_stop(char *name, int force, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - if (lstcon_batch_find(name, &bat)) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return -ENOENT; - } - - bat->bat_arg = force; - - rc = lstcon_batch_op(bat, LST_TRANS_TSBSTOP, result_up); - - /* mark batch as stopped if all RPCs finished */ - if (!lstcon_tsbop_stat_failure(lstcon_trans_stat(), 0)) - bat->bat_state = LST_BATCH_IDLE; - - return rc; -} - -static void -lstcon_batch_destroy(struct lstcon_batch *bat) -{ - struct lstcon_ndlink *ndl; - struct lstcon_test *test; - int i; - - list_del(&bat->bat_link); - - while (!list_empty(&bat->bat_test_list)) { - test = list_entry(bat->bat_test_list.next, - struct lstcon_test, tes_link); - LASSERT(list_empty(&test->tes_trans_list)); - - list_del(&test->tes_link); - - lstcon_group_decref(test->tes_src_grp); - lstcon_group_decref(test->tes_dst_grp); - - kfree(test); - } - - LASSERT(list_empty(&bat->bat_trans_list)); - - while (!list_empty(&bat->bat_cli_list)) { - ndl = list_entry(bat->bat_cli_list.next, - struct lstcon_ndlink, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - while (!list_empty(&bat->bat_srv_list)) { - ndl = list_entry(bat->bat_srv_list.next, - struct lstcon_ndlink, ndl_link); - list_del_init(&ndl->ndl_link); - - lstcon_ndlink_release(ndl); - } - - for (i = 0; i < LST_NODE_HASHSIZE; i++) { - LASSERT(list_empty(&bat->bat_cli_hash[i])); - LASSERT(list_empty(&bat->bat_srv_hash[i])); - } - - kfree(bat->bat_cli_hash); - kfree(bat->bat_srv_hash); - kfree(bat); -} - -static int -lstcon_testrpc_condition(int transop, struct lstcon_node *nd, void *arg) -{ - struct lstcon_test *test; - struct lstcon_batch *batch; - struct lstcon_ndlink *ndl; - struct list_head *hash; - struct list_head *head; - - test = (struct lstcon_test *)arg; - LASSERT(test); - - batch = test->tes_batch; - LASSERT(batch); - - if (test->tes_oneside && - transop == LST_TRANS_TSBSRVADD) - return 0; - - if (nd->nd_state != LST_NODE_ACTIVE) - return -ENETDOWN; - - if (transop == LST_TRANS_TSBCLIADD) { - hash = batch->bat_cli_hash; - head = &batch->bat_cli_list; - - } else { - LASSERT(transop == LST_TRANS_TSBSRVADD); - - hash = batch->bat_srv_hash; - head = &batch->bat_srv_list; - } - - LASSERT(nd->nd_id.nid != LNET_NID_ANY); - - if (lstcon_ndlink_find(hash, nd->nd_id, &ndl, 1)) - return -ENOMEM; - - if (list_empty(&ndl->ndl_link)) - list_add_tail(&ndl->ndl_link, head); - - return 1; -} - -static int -lstcon_test_nodes_add(struct lstcon_test *test, - struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - int transop; - int rc; - - LASSERT(test->tes_src_grp); - LASSERT(test->tes_dst_grp); - - transop = LST_TRANS_TSBSRVADD; - grp = test->tes_dst_grp; -again: - rc = lstcon_rpc_trans_ndlist(&grp->grp_ndl_list, - &test->tes_trans_list, transop, - test, lstcon_testrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - if (lstcon_trans_stat()->trs_rpc_errno || - lstcon_trans_stat()->trs_fwk_errno) { - lstcon_rpc_trans_interpreter(trans, result_up, NULL); - - lstcon_rpc_trans_destroy(trans); - /* return if any error */ - CDEBUG(D_NET, "Failed to add test %s, RPC error %d, framework error %d\n", - transop == LST_TRANS_TSBCLIADD ? "client" : "server", - lstcon_trans_stat()->trs_rpc_errno, - lstcon_trans_stat()->trs_fwk_errno); - - return rc; - } - - lstcon_rpc_trans_destroy(trans); - - if (transop == LST_TRANS_TSBCLIADD) - return rc; - - transop = LST_TRANS_TSBCLIADD; - grp = test->tes_src_grp; - test->tes_cliidx = 0; - - /* requests to test clients */ - goto again; -} - -static int -lstcon_verify_batch(const char *name, struct lstcon_batch **batch) -{ - int rc; - - rc = lstcon_batch_find(name, batch); - if (rc) { - CDEBUG(D_NET, "Can't find batch %s\n", name); - return rc; - } - - if ((*batch)->bat_state != LST_BATCH_IDLE) { - CDEBUG(D_NET, "Can't change running batch %s\n", name); - return -EINVAL; - } - - return 0; -} - -static int -lstcon_verify_group(const char *name, struct lstcon_group **grp) -{ - int rc; - struct lstcon_ndlink *ndl; - - rc = lstcon_group_find(name, grp); - if (rc) { - CDEBUG(D_NET, "can't find group %s\n", name); - return rc; - } - - list_for_each_entry(ndl, &(*grp)->grp_ndl_list, ndl_link) { - if (ndl->ndl_node->nd_state == LST_NODE_ACTIVE) - return 0; - } - - CDEBUG(D_NET, "Group %s has no ACTIVE nodes\n", name); - - return -EINVAL; -} - -int -lstcon_test_add(char *batch_name, int type, int loop, - int concur, int dist, int span, - char *src_name, char *dst_name, - void *param, int paramlen, int *retp, - struct list_head __user *result_up) -{ - struct lstcon_test *test = NULL; - int rc; - struct lstcon_group *src_grp = NULL; - struct lstcon_group *dst_grp = NULL; - struct lstcon_batch *batch = NULL; - - /* - * verify that a batch of the given name exists, and the groups - * that will be part of the batch exist and have at least one - * active node - */ - rc = lstcon_verify_batch(batch_name, &batch); - if (rc) - goto out; - - rc = lstcon_verify_group(src_name, &src_grp); - if (rc) - goto out; - - rc = lstcon_verify_group(dst_name, &dst_grp); - if (rc) - goto out; - - if (dst_grp->grp_userland) - *retp = 1; - - test = kzalloc(offsetof(struct lstcon_test, tes_param[paramlen]), - GFP_KERNEL); - if (!test) { - CERROR("Can't allocate test descriptor\n"); - rc = -ENOMEM; - - goto out; - } - - test->tes_hdr.tsb_id = batch->bat_hdr.tsb_id; - test->tes_batch = batch; - test->tes_type = type; - test->tes_oneside = 0; /* TODO */ - test->tes_loop = loop; - test->tes_concur = concur; - test->tes_stop_onerr = 1; /* TODO */ - test->tes_span = span; - test->tes_dist = dist; - test->tes_cliidx = 0; /* just used for creating RPC */ - test->tes_src_grp = src_grp; - test->tes_dst_grp = dst_grp; - INIT_LIST_HEAD(&test->tes_trans_list); - - if (param) { - test->tes_paramlen = paramlen; - memcpy(&test->tes_param[0], param, paramlen); - } - - rc = lstcon_test_nodes_add(test, result_up); - - if (rc) - goto out; - - if (lstcon_trans_stat()->trs_rpc_errno || - lstcon_trans_stat()->trs_fwk_errno) - CDEBUG(D_NET, "Failed to add test %d to batch %s\n", type, - batch_name); - - /* add to test list anyway, so user can check what's going on */ - list_add_tail(&test->tes_link, &batch->bat_test_list); - - batch->bat_ntest++; - test->tes_hdr.tsb_index = batch->bat_ntest; - - /* hold groups so nobody can change them */ - return rc; -out: - kfree(test); - - if (dst_grp) - lstcon_group_decref(dst_grp); - - if (src_grp) - lstcon_group_decref(src_grp); - - return rc; -} - -static int -lstcon_test_find(struct lstcon_batch *batch, int idx, - struct lstcon_test **testpp) -{ - struct lstcon_test *test; - - list_for_each_entry(test, &batch->bat_test_list, tes_link) { - if (idx == test->tes_hdr.tsb_index) { - *testpp = test; - return 0; - } - } - - return -ENOENT; -} - -static int -lstcon_tsbrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; - - LASSERT(transop == LST_TRANS_TSBCLIQRY || - transop == LST_TRANS_TSBSRVQRY); - - /* positive errno, framework error code */ - if (copy_to_user(&ent_up->rpe_priv[0], &rep->bar_active, - sizeof(rep->bar_active))) - return -EFAULT; - - return 0; -} - -int -lstcon_test_batch_query(char *name, int testidx, int client, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - struct list_head *translist; - struct list_head *ndlist; - struct lstcon_tsb_hdr *hdr; - struct lstcon_batch *batch; - struct lstcon_test *test = NULL; - int transop; - int rc; - - rc = lstcon_batch_find(name, &batch); - if (rc) { - CDEBUG(D_NET, "Can't find batch: %s\n", name); - return rc; - } - - if (!testidx) { - translist = &batch->bat_trans_list; - ndlist = &batch->bat_cli_list; - hdr = &batch->bat_hdr; - } else { - /* query specified test only */ - rc = lstcon_test_find(batch, testidx, &test); - if (rc) { - CDEBUG(D_NET, "Can't find test: %d\n", testidx); - return rc; - } - - translist = &test->tes_trans_list; - ndlist = &test->tes_src_grp->grp_ndl_list; - hdr = &test->tes_hdr; - } - - transop = client ? LST_TRANS_TSBCLIQRY : LST_TRANS_TSBSRVQRY; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, transop, hdr, - lstcon_batrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, timeout); - - /* query a batch, not a test */ - if (!testidx && - !lstcon_rpc_stat_failure(lstcon_trans_stat(), 0) && - !lstcon_tsbqry_stat_run(lstcon_trans_stat(), 0)) { - /* all RPCs finished, and no active test */ - batch->bat_state = LST_BATCH_IDLE; - } - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_tsbrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -static int -lstcon_statrpc_readent(int transop, struct srpc_msg *msg, - struct lstcon_rpc_ent __user *ent_up) -{ - struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; - struct sfw_counters __user *sfwk_stat; - struct srpc_counters __user *srpc_stat; - struct lnet_counters __user *lnet_stat; - - if (rep->str_status) - return 0; - - sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0]; - srpc_stat = (struct srpc_counters __user *)(sfwk_stat + 1); - lnet_stat = (struct lnet_counters __user *)(srpc_stat + 1); - - if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) || - copy_to_user(srpc_stat, &rep->str_rpc, sizeof(*srpc_stat)) || - copy_to_user(lnet_stat, &rep->str_lnet, sizeof(*lnet_stat))) - return -EFAULT; - - return 0; -} - -static int -lstcon_ndlist_stat(struct list_head *ndlist, - int timeout, struct list_head __user *result_up) -{ - struct list_head head; - struct lstcon_rpc_trans *trans; - int rc; - - INIT_LIST_HEAD(&head); - - rc = lstcon_rpc_trans_ndlist(ndlist, &head, - LST_TRANS_STATQRY, NULL, NULL, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout)); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_statrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_group_stat(char *grp_name, int timeout, - struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(grp_name, &grp); - if (rc) { - CDEBUG(D_NET, "Can't find group %s\n", grp_name); - return rc; - } - - rc = lstcon_ndlist_stat(&grp->grp_ndl_list, timeout, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_ndlink *ndl; - struct lstcon_group *tmp; - struct lnet_process_id id; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &tmp); - if (rc) { - CERROR("Out of memory\n"); - return -ENOMEM; - } - - for (i = 0 ; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* add to tmp group */ - rc = lstcon_group_ndlink_find(tmp, id, &ndl, 2); - if (rc) { - CDEBUG((rc == -ENOMEM) ? D_ERROR : D_NET, - "Failed to find or create %s: %d\n", - libcfs_id2str(id), rc); - break; - } - } - - if (rc) { - lstcon_group_decref(tmp); - return rc; - } - - rc = lstcon_ndlist_stat(&tmp->grp_ndl_list, timeout, result_up); - - lstcon_group_decref(tmp); - - return rc; -} - -static int -lstcon_debug_ndlist(struct list_head *ndlist, - struct list_head *translist, - int timeout, struct list_head __user *result_up) -{ - struct lstcon_rpc_trans *trans; - int rc; - - rc = lstcon_rpc_trans_ndlist(ndlist, translist, LST_TRANS_SESQRY, - NULL, lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - lstcon_rpc_trans_postwait(trans, LST_VALIDATE_TIMEOUT(timeout)); - - rc = lstcon_rpc_trans_interpreter(trans, result_up, - lstcon_sesrpc_readent); - lstcon_rpc_trans_destroy(trans); - - return rc; -} - -int -lstcon_session_debug(int timeout, struct list_head __user *result_up) -{ - return lstcon_debug_ndlist(&console_session.ses_ndl_list, - NULL, timeout, result_up); -} - -int -lstcon_batch_debug(int timeout, char *name, - int client, struct list_head __user *result_up) -{ - struct lstcon_batch *bat; - int rc; - - rc = lstcon_batch_find(name, &bat); - if (rc) - return -ENOENT; - - rc = lstcon_debug_ndlist(client ? &bat->bat_cli_list : - &bat->bat_srv_list, - NULL, timeout, result_up); - - return rc; -} - -int -lstcon_group_debug(int timeout, char *name, - struct list_head __user *result_up) -{ - struct lstcon_group *grp; - int rc; - - rc = lstcon_group_find(name, &grp); - if (rc) - return -ENOENT; - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_nodes_debug(int timeout, int count, - struct lnet_process_id __user *ids_up, - struct list_head __user *result_up) -{ - struct lnet_process_id id; - struct lstcon_ndlink *ndl; - struct lstcon_group *grp; - int i; - int rc; - - rc = lstcon_group_alloc(NULL, &grp); - if (rc) { - CDEBUG(D_NET, "Out of memory\n"); - return rc; - } - - for (i = 0; i < count; i++) { - if (copy_from_user(&id, &ids_up[i], sizeof(id))) { - rc = -EFAULT; - break; - } - - /* node is added to tmp group */ - rc = lstcon_group_ndlink_find(grp, id, &ndl, 1); - if (rc) { - CERROR("Can't create node link\n"); - break; - } - } - - if (rc) { - lstcon_group_decref(grp); - return rc; - } - - rc = lstcon_debug_ndlist(&grp->grp_ndl_list, NULL, - timeout, result_up); - - lstcon_group_decref(grp); - - return rc; -} - -int -lstcon_session_match(struct lst_sid sid) -{ - return (console_session.ses_id.ses_nid == sid.ses_nid && - console_session.ses_id.ses_stamp == sid.ses_stamp) ? 1 : 0; -} - -static void -lstcon_new_session_id(struct lst_sid *sid) -{ - struct lnet_process_id id; - - LASSERT(console_session.ses_state == LST_SESSION_NONE); - - LNetGetId(1, &id); - sid->ses_nid = id.nid; - sid->ses_stamp = jiffies; -} - -int -lstcon_session_new(char *name, int key, unsigned int feats, - int timeout, int force, struct lst_sid __user *sid_up) -{ - int rc = 0; - int i; - - if (console_session.ses_state != LST_SESSION_NONE) { - /* session exists */ - if (!force) { - CNETERR("Session %s already exists\n", - console_session.ses_name); - return -EEXIST; - } - - rc = lstcon_session_end(); - - /* lstcon_session_end() only return local error */ - if (rc) - return rc; - } - - if (feats & ~LST_FEATS_MASK) { - CNETERR("Unknown session features %x\n", - (feats & ~LST_FEATS_MASK)); - return -EINVAL; - } - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) - LASSERT(list_empty(&console_session.ses_ndl_hash[i])); - - lstcon_new_session_id(&console_session.ses_id); - - console_session.ses_key = key; - console_session.ses_state = LST_SESSION_ACTIVE; - console_session.ses_force = !!force; - console_session.ses_features = feats; - console_session.ses_feats_updated = 0; - console_session.ses_timeout = (timeout <= 0) ? - LST_CONSOLE_TIMEOUT : timeout; - - if (strlen(name) > sizeof(console_session.ses_name) - 1) - return -E2BIG; - strlcpy(console_session.ses_name, name, - sizeof(console_session.ses_name)); - - rc = lstcon_batch_add(LST_DEFAULT_BATCH); - if (rc) - return rc; - - rc = lstcon_rpc_pinger_start(); - if (rc) { - struct lstcon_batch *bat = NULL; - - lstcon_batch_find(LST_DEFAULT_BATCH, &bat); - lstcon_batch_destroy(bat); - - return rc; - } - - if (!copy_to_user(sid_up, &console_session.ses_id, - sizeof(struct lst_sid))) - return rc; - - lstcon_session_end(); - - return -EFAULT; -} - -int -lstcon_session_info(struct lst_sid __user *sid_up, int __user *key_up, - unsigned __user *featp, - struct lstcon_ndlist_ent __user *ndinfo_up, - char __user *name_up, int len) -{ - struct lstcon_ndlist_ent *entp; - struct lstcon_ndlink *ndl; - int rc = 0; - - if (console_session.ses_state != LST_SESSION_ACTIVE) - return -ESRCH; - - entp = kzalloc(sizeof(*entp), GFP_NOFS); - if (!entp) - return -ENOMEM; - - list_for_each_entry(ndl, &console_session.ses_ndl_list, ndl_link) - LST_NODE_STATE_COUNTER(ndl->ndl_node, entp); - - if (copy_to_user(sid_up, &console_session.ses_id, - sizeof(*sid_up)) || - copy_to_user(key_up, &console_session.ses_key, - sizeof(*key_up)) || - copy_to_user(featp, &console_session.ses_features, - sizeof(*featp)) || - copy_to_user(ndinfo_up, entp, sizeof(*entp)) || - copy_to_user(name_up, console_session.ses_name, len)) - rc = -EFAULT; - - kfree(entp); - - return rc; -} - -int -lstcon_session_end(void) -{ - struct lstcon_rpc_trans *trans; - struct lstcon_group *grp; - struct lstcon_batch *bat; - int rc = 0; - - LASSERT(console_session.ses_state == LST_SESSION_ACTIVE); - - rc = lstcon_rpc_trans_ndlist(&console_session.ses_ndl_list, - NULL, LST_TRANS_SESEND, NULL, - lstcon_sesrpc_condition, &trans); - if (rc) { - CERROR("Can't create transaction: %d\n", rc); - return rc; - } - - console_session.ses_shutdown = 1; - - lstcon_rpc_pinger_stop(); - - lstcon_rpc_trans_postwait(trans, LST_TRANS_TIMEOUT); - - lstcon_rpc_trans_destroy(trans); - /* User can do nothing even rpc failed, so go on */ - - /* waiting for orphan rpcs to die */ - lstcon_rpc_cleanup_wait(); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_key = 0; - console_session.ses_force = 0; - console_session.ses_feats_updated = 0; - - /* destroy all batches */ - while (!list_empty(&console_session.ses_bat_list)) { - bat = list_entry(console_session.ses_bat_list.next, - struct lstcon_batch, bat_link); - - lstcon_batch_destroy(bat); - } - - /* destroy all groups */ - while (!list_empty(&console_session.ses_grp_list)) { - grp = list_entry(console_session.ses_grp_list.next, - struct lstcon_group, grp_link); - LASSERT(grp->grp_ref == 1); - - lstcon_group_decref(grp); - } - - /* all nodes should be released */ - LASSERT(list_empty(&console_session.ses_ndl_list)); - - console_session.ses_shutdown = 0; - console_session.ses_expired = 0; - - return rc; -} - -int -lstcon_session_feats_check(unsigned int feats) -{ - int rc = 0; - - if (feats & ~LST_FEATS_MASK) { - CERROR("Can't support these features: %x\n", - (feats & ~LST_FEATS_MASK)); - return -EPROTO; - } - - spin_lock(&console_session.ses_rpc_lock); - - if (!console_session.ses_feats_updated) { - console_session.ses_feats_updated = 1; - console_session.ses_features = feats; - } - - if (console_session.ses_features != feats) - rc = -EPROTO; - - spin_unlock(&console_session.ses_rpc_lock); - - if (rc) { - CERROR("remote features %x do not match with session features %x of console\n", - feats, console_session.ses_features); - } - - return rc; -} - -static int -lstcon_acceptor_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_msg *rep = &rpc->srpc_replymsg; - struct srpc_msg *req = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_join_reqst *jreq = &req->msg_body.join_reqst; - struct srpc_join_reply *jrep = &rep->msg_body.join_reply; - struct lstcon_group *grp = NULL; - struct lstcon_ndlink *ndl; - int rc = 0; - - sfw_unpack_message(req); - - mutex_lock(&console_session.ses_mutex); - - jrep->join_sid = console_session.ses_id; - - if (console_session.ses_id.ses_nid == LNET_NID_ANY) { - jrep->join_status = ESRCH; - goto out; - } - - if (lstcon_session_feats_check(req->msg_ses_feats)) { - jrep->join_status = EPROTO; - goto out; - } - - if (jreq->join_sid.ses_nid != LNET_NID_ANY && - !lstcon_session_match(jreq->join_sid)) { - jrep->join_status = EBUSY; - goto out; - } - - if (lstcon_group_find(jreq->join_group, &grp)) { - rc = lstcon_group_alloc(jreq->join_group, &grp); - if (rc) { - CERROR("Out of memory\n"); - goto out; - } - - list_add_tail(&grp->grp_link, - &console_session.ses_grp_list); - lstcon_group_addref(grp); - } - - if (grp->grp_ref > 2) { - /* Group in using */ - jrep->join_status = EBUSY; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 0); - if (!rc) { - jrep->join_status = EEXIST; - goto out; - } - - rc = lstcon_group_ndlink_find(grp, rpc->srpc_peer, &ndl, 1); - if (rc) { - CERROR("Out of memory\n"); - goto out; - } - - ndl->ndl_node->nd_state = LST_NODE_ACTIVE; - ndl->ndl_node->nd_timeout = console_session.ses_timeout; - - if (!grp->grp_userland) - grp->grp_userland = 1; - - strlcpy(jrep->join_session, console_session.ses_name, - sizeof(jrep->join_session)); - jrep->join_timeout = console_session.ses_timeout; - jrep->join_status = 0; - -out: - rep->msg_ses_feats = console_session.ses_features; - if (grp) - lstcon_group_decref(grp); - - mutex_unlock(&console_session.ses_mutex); - - return rc; -} - -static struct srpc_service lstcon_acceptor_service; - -static void lstcon_init_acceptor_service(void) -{ - /* initialize selftest console acceptor service table */ - lstcon_acceptor_service.sv_name = "join session"; - lstcon_acceptor_service.sv_handler = lstcon_acceptor_handle; - lstcon_acceptor_service.sv_id = SRPC_SERVICE_JOIN; - lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX; -} - -static struct notifier_block lstcon_ioctl_handler = { - .notifier_call = lstcon_ioctl_entry, -}; - -/* initialize console */ -int -lstcon_console_init(void) -{ - int i; - int rc; - - memset(&console_session, 0, sizeof(struct lstcon_session)); - - console_session.ses_id = LST_INVALID_SID; - console_session.ses_state = LST_SESSION_NONE; - console_session.ses_timeout = 0; - console_session.ses_force = 0; - console_session.ses_expired = 0; - console_session.ses_feats_updated = 0; - console_session.ses_features = LST_FEATS_MASK; - console_session.ses_laststamp = ktime_get_real_seconds(); - - mutex_init(&console_session.ses_mutex); - - INIT_LIST_HEAD(&console_session.ses_ndl_list); - INIT_LIST_HEAD(&console_session.ses_grp_list); - INIT_LIST_HEAD(&console_session.ses_bat_list); - INIT_LIST_HEAD(&console_session.ses_trans_list); - - console_session.ses_ndl_hash = - kmalloc(sizeof(struct list_head) * LST_GLOBAL_HASHSIZE, GFP_KERNEL); - if (!console_session.ses_ndl_hash) - return -ENOMEM; - - for (i = 0; i < LST_GLOBAL_HASHSIZE; i++) - INIT_LIST_HEAD(&console_session.ses_ndl_hash[i]); - - /* initialize acceptor service table */ - lstcon_init_acceptor_service(); - - rc = srpc_add_service(&lstcon_acceptor_service); - LASSERT(rc != -EBUSY); - if (rc) { - kfree(console_session.ses_ndl_hash); - return rc; - } - - rc = srpc_service_add_buffers(&lstcon_acceptor_service, - lstcon_acceptor_service.sv_wi_total); - if (rc) { - rc = -ENOMEM; - goto out; - } - - rc = blocking_notifier_chain_register(&libcfs_ioctl_list, - &lstcon_ioctl_handler); - - if (!rc) { - lstcon_rpc_module_init(); - return 0; - } - -out: - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - kfree(console_session.ses_ndl_hash); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return rc; -} - -int -lstcon_console_fini(void) -{ - int i; - - blocking_notifier_chain_unregister(&libcfs_ioctl_list, - &lstcon_ioctl_handler); - - mutex_lock(&console_session.ses_mutex); - - srpc_shutdown_service(&lstcon_acceptor_service); - srpc_remove_service(&lstcon_acceptor_service); - - if (console_session.ses_state != LST_SESSION_NONE) - lstcon_session_end(); - - lstcon_rpc_module_fini(); - - mutex_unlock(&console_session.ses_mutex); - - LASSERT(list_empty(&console_session.ses_ndl_list)); - LASSERT(list_empty(&console_session.ses_grp_list)); - LASSERT(list_empty(&console_session.ses_bat_list)); - LASSERT(list_empty(&console_session.ses_trans_list)); - - for (i = 0; i < LST_NODE_HASHSIZE; i++) - LASSERT(list_empty(&console_session.ses_ndl_hash[i])); - - kfree(console_session.ses_ndl_hash); - - srpc_wait_service_shutdown(&lstcon_acceptor_service); - - return 0; -} diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h deleted file mode 100644 index 2826205e36a1..000000000000 --- a/drivers/staging/lustre/lnet/selftest/console.h +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/console.h - * - * kernel structure for LST console - * - * Author: Liang Zhen - */ - -#ifndef __LST_CONSOLE_H__ -#define __LST_CONSOLE_H__ - -#include -#include -#include "selftest.h" -#include "conrpc.h" - -/* node descriptor */ -struct lstcon_node { - struct lnet_process_id nd_id; /* id of the node */ - int nd_ref; /* reference count */ - int nd_state; /* state of the node */ - int nd_timeout; /* session timeout */ - unsigned long nd_stamp; /* timestamp of last replied RPC */ - struct lstcon_rpc nd_ping; /* ping rpc */ -}; - -/* node link descriptor */ -struct lstcon_ndlink { - struct list_head ndl_link; /* chain on list */ - struct list_head ndl_hlink; /* chain on hash */ - struct lstcon_node *ndl_node; /* pointer to node */ -}; - -/* (alias of nodes) group descriptor */ -struct lstcon_group { - struct list_head grp_link; /* chain on global group list - */ - int grp_ref; /* reference count */ - int grp_userland; /* has userland nodes */ - int grp_nnode; /* # of nodes */ - char grp_name[LST_NAME_SIZE]; /* group name */ - - struct list_head grp_trans_list; /* transaction list */ - struct list_head grp_ndl_list; /* nodes list */ - struct list_head grp_ndl_hash[0]; /* hash table for nodes */ -}; - -#define LST_BATCH_IDLE 0xB0 /* idle batch */ -#define LST_BATCH_RUNNING 0xB1 /* running batch */ - -struct lstcon_tsb_hdr { - struct lst_bid tsb_id; /* batch ID */ - int tsb_index; /* test index */ -}; - -/* (tests ) batch descriptor */ -struct lstcon_batch { - struct lstcon_tsb_hdr bat_hdr; /* test_batch header */ - struct list_head bat_link; /* chain on session's batches list */ - int bat_ntest; /* # of test */ - int bat_state; /* state of the batch */ - int bat_arg; /* parameter for run|stop, timeout - * for run, force for stop - */ - char bat_name[LST_NAME_SIZE];/* name of batch */ - - struct list_head bat_test_list; /* list head of tests (struct lstcon_test) - */ - struct list_head bat_trans_list; /* list head of transaction */ - struct list_head bat_cli_list; /* list head of client nodes - * (struct lstcon_node) - */ - struct list_head *bat_cli_hash; /* hash table of client nodes */ - struct list_head bat_srv_list; /* list head of server nodes */ - struct list_head *bat_srv_hash; /* hash table of server nodes */ -}; - -/* a single test descriptor */ -struct lstcon_test { - struct lstcon_tsb_hdr tes_hdr; /* test batch header */ - struct list_head tes_link; /* chain on batch's tests list */ - struct lstcon_batch *tes_batch; /* pointer to batch */ - - int tes_type; /* type of the test, i.e: bulk, ping */ - int tes_stop_onerr; /* stop on error */ - int tes_oneside; /* one-sided test */ - int tes_concur; /* concurrency */ - int tes_loop; /* loop count */ - int tes_dist; /* nodes distribution of target group */ - int tes_span; /* nodes span of target group */ - int tes_cliidx; /* client index, used for RPC creating */ - - struct list_head tes_trans_list; /* transaction list */ - struct lstcon_group *tes_src_grp; /* group run the test */ - struct lstcon_group *tes_dst_grp; /* target group */ - - int tes_paramlen; /* test parameter length */ - char tes_param[0]; /* test parameter */ -}; - -#define LST_GLOBAL_HASHSIZE 503 /* global nodes hash table size */ -#define LST_NODE_HASHSIZE 239 /* node hash table (for batch or group) */ - -#define LST_SESSION_NONE 0x0 /* no session */ -#define LST_SESSION_ACTIVE 0x1 /* working session */ - -#define LST_CONSOLE_TIMEOUT 300 /* default console timeout */ - -struct lstcon_session { - struct mutex ses_mutex; /* only 1 thread in session */ - struct lst_sid ses_id; /* global session id */ - int ses_key; /* local session key */ - int ses_state; /* state of session */ - int ses_timeout; /* timeout in seconds */ - time64_t ses_laststamp; /* last operation stamp (seconds) - */ - unsigned int ses_features; /* tests features of the session - */ - unsigned int ses_feats_updated:1; /* features are synced with - * remote test nodes - */ - unsigned int ses_force:1; /* force creating */ - unsigned int ses_shutdown:1; /* session is shutting down */ - unsigned int ses_expired:1; /* console is timedout */ - __u64 ses_id_cookie; /* batch id cookie */ - char ses_name[LST_NAME_SIZE];/* session name */ - struct lstcon_rpc_trans *ses_ping; /* session pinger */ - struct stt_timer ses_ping_timer; /* timer for pinger */ - struct lstcon_trans_stat ses_trans_stat; /* transaction stats */ - - struct list_head ses_trans_list; /* global list of transaction */ - struct list_head ses_grp_list; /* global list of groups */ - struct list_head ses_bat_list; /* global list of batches */ - struct list_head ses_ndl_list; /* global list of nodes */ - struct list_head *ses_ndl_hash; /* hash table of nodes */ - - spinlock_t ses_rpc_lock; /* serialize */ - atomic_t ses_rpc_counter; /* # of initialized RPCs */ - struct list_head ses_rpc_freelist; /* idle console rpc */ -}; /* session descriptor */ - -extern struct lstcon_session console_session; - -static inline struct lstcon_trans_stat * -lstcon_trans_stat(void) -{ - return &console_session.ses_trans_stat; -} - -static inline struct list_head * -lstcon_id2hash(struct lnet_process_id id, struct list_head *hash) -{ - unsigned int idx = LNET_NIDADDR(id.nid) % LST_NODE_HASHSIZE; - - return &hash[idx]; -} - -int lstcon_ioctl_entry(struct notifier_block *nb, - unsigned long cmd, void *vdata); -int lstcon_console_init(void); -int lstcon_console_fini(void); -int lstcon_session_match(struct lst_sid sid); -int lstcon_session_new(char *name, int key, unsigned int version, - int timeout, int flags, struct lst_sid __user *sid_up); -int lstcon_session_info(struct lst_sid __user *sid_up, int __user *key, - unsigned __user *verp, struct lstcon_ndlist_ent __user *entp, - char __user *name_up, int len); -int lstcon_session_end(void); -int lstcon_session_debug(int timeout, struct list_head __user *result_up); -int lstcon_session_feats_check(unsigned int feats); -int lstcon_batch_debug(int timeout, char *name, - int client, struct list_head __user *result_up); -int lstcon_group_debug(int timeout, char *name, - struct list_head __user *result_up); -int lstcon_nodes_debug(int timeout, int nnd, - struct lnet_process_id __user *nds_up, - struct list_head __user *result_up); -int lstcon_group_add(char *name); -int lstcon_group_del(char *name); -int lstcon_group_clean(char *name, int args); -int lstcon_group_refresh(char *name, struct list_head __user *result_up); -int lstcon_nodes_add(char *name, int nnd, struct lnet_process_id __user *nds_up, - unsigned int *featp, struct list_head __user *result_up); -int lstcon_nodes_remove(char *name, int nnd, - struct lnet_process_id __user *nds_up, - struct list_head __user *result_up); -int lstcon_group_info(char *name, struct lstcon_ndlist_ent __user *gent_up, - int *index_p, int *ndent_p, - struct lstcon_node_ent __user *ndents_up); -int lstcon_group_list(int idx, int len, char __user *name_up); -int lstcon_batch_add(char *name); -int lstcon_batch_run(char *name, int timeout, - struct list_head __user *result_up); -int lstcon_batch_stop(char *name, int force, - struct list_head __user *result_up); -int lstcon_test_batch_query(char *name, int testidx, - int client, int timeout, - struct list_head __user *result_up); -int lstcon_batch_del(char *name); -int lstcon_batch_list(int idx, int namelen, char __user *name_up); -int lstcon_batch_info(char *name, struct lstcon_test_batch_ent __user *ent_up, - int server, int testidx, int *index_p, - int *ndent_p, struct lstcon_node_ent __user *dents_up); -int lstcon_group_stat(char *grp_name, int timeout, - struct list_head __user *result_up); -int lstcon_nodes_stat(int count, struct lnet_process_id __user *ids_up, - int timeout, struct list_head __user *result_up); -int lstcon_test_add(char *batch_name, int type, int loop, - int concur, int dist, int span, - char *src_name, char *dst_name, - void *param, int paramlen, int *retp, - struct list_head __user *result_up); -#endif diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c deleted file mode 100644 index 741af10560ad..000000000000 --- a/drivers/staging/lustre/lnet/selftest/framework.c +++ /dev/null @@ -1,1786 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/framework.c - * - * Author: Isaac Huang - * Author: Liang Zhen - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -struct lst_sid LST_INVALID_SID = {LNET_NID_ANY, -1}; - -static int session_timeout = 100; -module_param(session_timeout, int, 0444); -MODULE_PARM_DESC(session_timeout, "test session timeout in seconds (100 by default, 0 == never)"); - -static int rpc_timeout = 64; -module_param(rpc_timeout, int, 0644); -MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never)"); - -#define sfw_unpack_id(id) \ -do { \ - __swab64s(&(id).nid); \ - __swab32s(&(id).pid); \ -} while (0) - -#define sfw_unpack_sid(sid) \ -do { \ - __swab64s(&(sid).ses_nid); \ - __swab64s(&(sid).ses_stamp); \ -} while (0) - -#define sfw_unpack_fw_counters(fc) \ -do { \ - __swab32s(&(fc).running_ms); \ - __swab32s(&(fc).active_batches); \ - __swab32s(&(fc).zombie_sessions); \ - __swab32s(&(fc).brw_errors); \ - __swab32s(&(fc).ping_errors); \ -} while (0) - -#define sfw_unpack_rpc_counters(rc) \ -do { \ - __swab32s(&(rc).errors); \ - __swab32s(&(rc).rpcs_sent); \ - __swab32s(&(rc).rpcs_rcvd); \ - __swab32s(&(rc).rpcs_dropped); \ - __swab32s(&(rc).rpcs_expired); \ - __swab64s(&(rc).bulk_get); \ - __swab64s(&(rc).bulk_put); \ -} while (0) - -#define sfw_unpack_lnet_counters(lc) \ -do { \ - __swab32s(&(lc).errors); \ - __swab32s(&(lc).msgs_max); \ - __swab32s(&(lc).msgs_alloc); \ - __swab32s(&(lc).send_count); \ - __swab32s(&(lc).recv_count); \ - __swab32s(&(lc).drop_count); \ - __swab32s(&(lc).route_count); \ - __swab64s(&(lc).send_length); \ - __swab64s(&(lc).recv_length); \ - __swab64s(&(lc).drop_length); \ - __swab64s(&(lc).route_length); \ -} while (0) - -#define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive)) -#define sfw_batch_active(b) (atomic_read(&(b)->bat_nactive)) - -static struct smoketest_framework { - struct list_head fw_zombie_rpcs; /* RPCs to be recycled */ - struct list_head fw_zombie_sessions; /* stopping sessions */ - struct list_head fw_tests; /* registered test cases */ - atomic_t fw_nzombies; /* # zombie sessions */ - spinlock_t fw_lock; /* serialise */ - struct sfw_session *fw_session; /* _the_ session */ - int fw_shuttingdown; /* shutdown in progress */ - struct srpc_server_rpc *fw_active_srpc;/* running RPC */ -} sfw_data; - -/* forward ref's */ -int sfw_stop_batch(struct sfw_batch *tsb, int force); -void sfw_destroy_session(struct sfw_session *sn); - -static inline struct sfw_test_case * -sfw_find_test_case(int id) -{ - struct sfw_test_case *tsc; - - LASSERT(id <= SRPC_SERVICE_MAX_ID); - LASSERT(id > SRPC_FRAMEWORK_SERVICE_MAX_ID); - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - if (tsc->tsc_srv_service->sv_id == id) - return tsc; - } - - return NULL; -} - -static int -sfw_register_test(struct srpc_service *service, - struct sfw_test_client_ops *cliops) -{ - struct sfw_test_case *tsc; - - if (sfw_find_test_case(service->sv_id)) { - CERROR("Failed to register test %s (%d)\n", - service->sv_name, service->sv_id); - return -EEXIST; - } - - tsc = kzalloc(sizeof(struct sfw_test_case), GFP_NOFS); - if (!tsc) - return -ENOMEM; - - tsc->tsc_cli_ops = cliops; - tsc->tsc_srv_service = service; - - list_add_tail(&tsc->tsc_list, &sfw_data.fw_tests); - return 0; -} - -static void -sfw_add_session_timer(void) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct stt_timer *timer = &sn->sn_timer; - - LASSERT(!sfw_data.fw_shuttingdown); - - if (!sn || !sn->sn_timeout) - return; - - LASSERT(!sn->sn_timer_active); - - sn->sn_timer_active = 1; - timer->stt_expires = ktime_get_real_seconds() + sn->sn_timeout; - stt_add_timer(timer); -} - -static int -sfw_del_session_timer(void) -{ - struct sfw_session *sn = sfw_data.fw_session; - - if (!sn || !sn->sn_timer_active) - return 0; - - LASSERT(sn->sn_timeout); - - if (stt_del_timer(&sn->sn_timer)) { /* timer defused */ - sn->sn_timer_active = 0; - return 0; - } - - return -EBUSY; /* racing with sfw_session_expired() */ -} - -static void -sfw_deactivate_session(void) -__must_hold(&sfw_data.fw_lock) -{ - struct sfw_session *sn = sfw_data.fw_session; - int nactive = 0; - struct sfw_batch *tsb; - struct sfw_test_case *tsc; - - if (!sn) - return; - - LASSERT(!sn->sn_timer_active); - - sfw_data.fw_session = NULL; - atomic_inc(&sfw_data.fw_nzombies); - list_add(&sn->sn_list, &sfw_data.fw_zombie_sessions); - - spin_unlock(&sfw_data.fw_lock); - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - srpc_abort_service(tsc->tsc_srv_service); - } - - spin_lock(&sfw_data.fw_lock); - - list_for_each_entry(tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - nactive++; - sfw_stop_batch(tsb, 1); - } - } - - if (nactive) - return; /* wait for active batches to stop */ - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); - - spin_lock(&sfw_data.fw_lock); -} - -static void -sfw_session_expired(void *data) -{ - struct sfw_session *sn = data; - - spin_lock(&sfw_data.fw_lock); - - LASSERT(sn->sn_timer_active); - LASSERT(sn == sfw_data.fw_session); - - CWARN("Session expired! sid: %s-%llu, name: %s\n", - libcfs_nid2str(sn->sn_id.ses_nid), - sn->sn_id.ses_stamp, &sn->sn_name[0]); - - sn->sn_timer_active = 0; - sfw_deactivate_session(); - - spin_unlock(&sfw_data.fw_lock); -} - -static inline void -sfw_init_session(struct sfw_session *sn, struct lst_sid sid, - unsigned int features, const char *name) -{ - struct stt_timer *timer = &sn->sn_timer; - - memset(sn, 0, sizeof(struct sfw_session)); - INIT_LIST_HEAD(&sn->sn_list); - INIT_LIST_HEAD(&sn->sn_batches); - atomic_set(&sn->sn_refcount, 1); /* +1 for caller */ - atomic_set(&sn->sn_brw_errors, 0); - atomic_set(&sn->sn_ping_errors, 0); - strlcpy(&sn->sn_name[0], name, sizeof(sn->sn_name)); - - sn->sn_timer_active = 0; - sn->sn_id = sid; - sn->sn_features = features; - sn->sn_timeout = session_timeout; - sn->sn_started = jiffies; - - timer->stt_data = sn; - timer->stt_func = sfw_session_expired; - INIT_LIST_HEAD(&timer->stt_list); -} - -/* completion handler for incoming framework RPCs */ -static void -sfw_server_rpc_done(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - int status = rpc->srpc_status; - - CDEBUG(D_NET, "Incoming framework RPC done: service %s, peer %s, status %s:%d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), - status); - - if (rpc->srpc_bulk) - sfw_free_pages(rpc); -} - -static void -sfw_client_rpc_fini(struct srpc_client_rpc *rpc) -{ - LASSERT(!rpc->crpc_bulk.bk_niov); - LASSERT(list_empty(&rpc->crpc_list)); - LASSERT(!atomic_read(&rpc->crpc_refcount)); - - CDEBUG(D_NET, "Outgoing framework RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), - rpc->crpc_aborted, rpc->crpc_status); - - spin_lock(&sfw_data.fw_lock); - - /* my callers must finish all RPCs before shutting me down */ - LASSERT(!sfw_data.fw_shuttingdown); - list_add(&rpc->crpc_list, &sfw_data.fw_zombie_rpcs); - - spin_unlock(&sfw_data.fw_lock); -} - -static struct sfw_batch * -sfw_find_batch(struct lst_bid bid) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_batch *bat; - - LASSERT(sn); - - list_for_each_entry(bat, &sn->sn_batches, bat_list) { - if (bat->bat_id.bat_id == bid.bat_id) - return bat; - } - - return NULL; -} - -static struct sfw_batch * -sfw_bid2batch(struct lst_bid bid) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_batch *bat; - - LASSERT(sn); - - bat = sfw_find_batch(bid); - if (bat) - return bat; - - bat = kzalloc(sizeof(struct sfw_batch), GFP_NOFS); - if (!bat) - return NULL; - - bat->bat_error = 0; - bat->bat_session = sn; - bat->bat_id = bid; - atomic_set(&bat->bat_nactive, 0); - INIT_LIST_HEAD(&bat->bat_tests); - - list_add_tail(&bat->bat_list, &sn->sn_batches); - return bat; -} - -static int -sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct sfw_counters *cnt = &reply->str_fw; - struct sfw_batch *bat; - - reply->str_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (request->str_sid.ses_nid == LNET_NID_ANY) { - reply->str_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->str_sid, sn->sn_id)) { - reply->str_status = ESRCH; - return 0; - } - - lnet_counters_get(&reply->str_lnet); - srpc_get_counters(&reply->str_rpc); - - /* - * send over the msecs since the session was started - * with 32 bits to send, this is ~49 days - */ - cnt->running_ms = jiffies_to_msecs(jiffies - sn->sn_started); - cnt->brw_errors = atomic_read(&sn->sn_brw_errors); - cnt->ping_errors = atomic_read(&sn->sn_ping_errors); - cnt->zombie_sessions = atomic_read(&sfw_data.fw_nzombies); - - cnt->active_batches = 0; - list_for_each_entry(bat, &sn->sn_batches, bat_list) { - if (atomic_read(&bat->bat_nactive) > 0) - cnt->active_batches++; - } - - reply->str_status = 0; - return 0; -} - -int -sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct srpc_msg *msg = container_of(request, struct srpc_msg, - msg_body.mksn_reqst); - int cplen = 0; - - if (request->mksn_sid.ses_nid == LNET_NID_ANY) { - reply->mksn_sid = !sn ? LST_INVALID_SID : sn->sn_id; - reply->mksn_status = EINVAL; - return 0; - } - - if (sn) { - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; - reply->mksn_timeout = sn->sn_timeout; - - if (sfw_sid_equal(request->mksn_sid, sn->sn_id)) { - atomic_inc(&sn->sn_refcount); - return 0; - } - - if (!request->mksn_force) { - reply->mksn_status = EBUSY; - cplen = strlcpy(&reply->mksn_name[0], &sn->sn_name[0], - sizeof(reply->mksn_name)); - if (cplen >= sizeof(reply->mksn_name)) - return -E2BIG; - return 0; - } - } - - /* - * reject the request if it requires unknown features - * NB: old version will always accept all features because it's not - * aware of srpc_msg::msg_ses_feats, it's a defect but it's also - * harmless because it will return zero feature to console, and it's - * console's responsibility to make sure all nodes in a session have - * same feature mask. - */ - if (msg->msg_ses_feats & ~LST_FEATS_MASK) { - reply->mksn_status = EPROTO; - return 0; - } - - /* brand new or create by force */ - sn = kzalloc(sizeof(struct sfw_session), GFP_NOFS); - if (!sn) { - CERROR("dropping RPC mksn under memory pressure\n"); - return -ENOMEM; - } - - sfw_init_session(sn, request->mksn_sid, - msg->msg_ses_feats, &request->mksn_name[0]); - - spin_lock(&sfw_data.fw_lock); - - sfw_deactivate_session(); - LASSERT(!sfw_data.fw_session); - sfw_data.fw_session = sn; - - spin_unlock(&sfw_data.fw_lock); - - reply->mksn_status = 0; - reply->mksn_sid = sn->sn_id; - reply->mksn_timeout = sn->sn_timeout; - return 0; -} - -static int -sfw_remove_session(struct srpc_rmsn_reqst *request, - struct srpc_rmsn_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - - reply->rmsn_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (request->rmsn_sid.ses_nid == LNET_NID_ANY) { - reply->rmsn_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->rmsn_sid, sn->sn_id)) { - reply->rmsn_status = !sn ? ESRCH : EBUSY; - return 0; - } - - if (!atomic_dec_and_test(&sn->sn_refcount)) { - reply->rmsn_status = 0; - return 0; - } - - spin_lock(&sfw_data.fw_lock); - sfw_deactivate_session(); - spin_unlock(&sfw_data.fw_lock); - - reply->rmsn_status = 0; - reply->rmsn_sid = LST_INVALID_SID; - LASSERT(!sfw_data.fw_session); - return 0; -} - -static int -sfw_debug_session(struct srpc_debug_reqst *request, - struct srpc_debug_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - - if (!sn) { - reply->dbg_status = ESRCH; - reply->dbg_sid = LST_INVALID_SID; - return 0; - } - - reply->dbg_status = 0; - reply->dbg_sid = sn->sn_id; - reply->dbg_timeout = sn->sn_timeout; - if (strlcpy(reply->dbg_name, &sn->sn_name[0], sizeof(reply->dbg_name)) - >= sizeof(reply->dbg_name)) - return -E2BIG; - - return 0; -} - -static void -sfw_test_rpc_fini(struct srpc_client_rpc *rpc) -{ - struct sfw_test_unit *tsu = rpc->crpc_priv; - struct sfw_test_instance *tsi = tsu->tsu_instance; - - /* Called with hold of tsi->tsi_lock */ - LASSERT(list_empty(&rpc->crpc_list)); - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); -} - -static inline int -sfw_test_buffers(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - struct srpc_service *svc; - int nbuf; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - LASSERT(tsc); - svc = tsc->tsc_srv_service; - LASSERT(svc); - - nbuf = min(svc->sv_wi_total, tsi->tsi_loop) / svc->sv_ncpts; - return max(SFW_TEST_WI_MIN, nbuf + SFW_TEST_WI_EXTRA); -} - -static int -sfw_load_test(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - struct srpc_service *svc; - int nbuf; - int rc; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - nbuf = sfw_test_buffers(tsi); - LASSERT(tsc); - svc = tsc->tsc_srv_service; - - if (tsi->tsi_is_client) { - tsi->tsi_ops = tsc->tsc_cli_ops; - return 0; - } - - rc = srpc_service_add_buffers(svc, nbuf); - if (rc) { - CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", - svc->sv_name, nbuf, rc); - /* - * NB: this error handler is not strictly correct, because - * it may release more buffers than already allocated, - * but it doesn't matter because request portal should - * be lazy portal and will grow buffers if necessary. - */ - srpc_service_remove_buffers(svc, nbuf); - return -ENOMEM; - } - - CDEBUG(D_NET, "Reserved %d buffers for test %s\n", - nbuf * (srpc_serv_is_framework(svc) ? - 2 : cfs_cpt_number(cfs_cpt_tab)), svc->sv_name); - return 0; -} - -static void -sfw_unload_test(struct sfw_test_instance *tsi) -{ - struct sfw_test_case *tsc; - - LASSERT(tsi); - tsc = sfw_find_test_case(tsi->tsi_service); - LASSERT(tsc); - - if (tsi->tsi_is_client) - return; - - /* - * shrink buffers, because request portal is lazy portal - * which can grow buffers at runtime so we may leave - * some buffers behind, but never mind... - */ - srpc_service_remove_buffers(tsc->tsc_srv_service, - sfw_test_buffers(tsi)); -} - -static void -sfw_destroy_test_instance(struct sfw_test_instance *tsi) -{ - struct srpc_client_rpc *rpc; - struct sfw_test_unit *tsu; - - if (!tsi->tsi_is_client) - goto clean; - - tsi->tsi_ops->tso_fini(tsi); - - LASSERT(!tsi->tsi_stopping); - LASSERT(list_empty(&tsi->tsi_active_rpcs)); - LASSERT(!sfw_test_active(tsi)); - - while (!list_empty(&tsi->tsi_units)) { - tsu = list_entry(tsi->tsi_units.next, - struct sfw_test_unit, tsu_list); - list_del(&tsu->tsu_list); - kfree(tsu); - } - - while (!list_empty(&tsi->tsi_free_rpcs)) { - rpc = list_entry(tsi->tsi_free_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - kfree(rpc); - } - -clean: - sfw_unload_test(tsi); - kfree(tsi); -} - -static void -sfw_destroy_batch(struct sfw_batch *tsb) -{ - struct sfw_test_instance *tsi; - - LASSERT(!sfw_batch_active(tsb)); - LASSERT(list_empty(&tsb->bat_list)); - - while (!list_empty(&tsb->bat_tests)) { - tsi = list_entry(tsb->bat_tests.next, - struct sfw_test_instance, tsi_list); - list_del_init(&tsi->tsi_list); - sfw_destroy_test_instance(tsi); - } - - kfree(tsb); -} - -void -sfw_destroy_session(struct sfw_session *sn) -{ - struct sfw_batch *batch; - - LASSERT(list_empty(&sn->sn_list)); - LASSERT(sn != sfw_data.fw_session); - - while (!list_empty(&sn->sn_batches)) { - batch = list_entry(sn->sn_batches.next, - struct sfw_batch, bat_list); - list_del_init(&batch->bat_list); - sfw_destroy_batch(batch); - } - - kfree(sn); - atomic_dec(&sfw_data.fw_nzombies); -} - -static void -sfw_unpack_addtest_req(struct srpc_msg *msg) -{ - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - - LASSERT(msg->msg_type == SRPC_MSG_TEST_REQST); - LASSERT(req->tsr_is_client); - - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - if (req->tsr_service == SRPC_SERVICE_BRW) { - if (!(msg->msg_ses_feats & LST_FEAT_BULK_LEN)) { - struct test_bulk_req *bulk = &req->tsr_u.bulk_v0; - - __swab32s(&bulk->blk_opc); - __swab32s(&bulk->blk_npg); - __swab32s(&bulk->blk_flags); - - } else { - struct test_bulk_req_v1 *bulk = &req->tsr_u.bulk_v1; - - __swab16s(&bulk->blk_opc); - __swab16s(&bulk->blk_flags); - __swab32s(&bulk->blk_offset); - __swab32s(&bulk->blk_len); - } - - return; - } - - if (req->tsr_service == SRPC_SERVICE_PING) { - struct test_ping_req *ping = &req->tsr_u.ping; - - __swab32s(&ping->png_size); - __swab32s(&ping->png_flags); - return; - } - - LBUG(); -} - -static int -sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc) -{ - struct srpc_msg *msg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - struct srpc_bulk *bk = rpc->srpc_bulk; - int ndest = req->tsr_ndest; - struct sfw_test_unit *tsu; - struct sfw_test_instance *tsi; - int i; - int rc; - - tsi = kzalloc(sizeof(*tsi), GFP_NOFS); - if (!tsi) { - CERROR("Can't allocate test instance for batch: %llu\n", - tsb->bat_id.bat_id); - return -ENOMEM; - } - - spin_lock_init(&tsi->tsi_lock); - atomic_set(&tsi->tsi_nactive, 0); - INIT_LIST_HEAD(&tsi->tsi_units); - INIT_LIST_HEAD(&tsi->tsi_free_rpcs); - INIT_LIST_HEAD(&tsi->tsi_active_rpcs); - - tsi->tsi_stopping = 0; - tsi->tsi_batch = tsb; - tsi->tsi_loop = req->tsr_loop; - tsi->tsi_concur = req->tsr_concur; - tsi->tsi_service = req->tsr_service; - tsi->tsi_is_client = !!(req->tsr_is_client); - tsi->tsi_stoptsu_onerr = !!(req->tsr_stop_onerr); - - rc = sfw_load_test(tsi); - if (rc) { - kfree(tsi); - return rc; - } - - LASSERT(!sfw_batch_active(tsb)); - - if (!tsi->tsi_is_client) { - /* it's test server, just add it to tsb */ - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - - LASSERT(bk); - LASSERT(bk->bk_niov * SFW_ID_PER_PAGE >= (unsigned int)ndest); - LASSERT((unsigned int)bk->bk_len >= - sizeof(struct lnet_process_id_packed) * ndest); - - sfw_unpack_addtest_req(msg); - memcpy(&tsi->tsi_u, &req->tsr_u, sizeof(tsi->tsi_u)); - - for (i = 0; i < ndest; i++) { - struct lnet_process_id_packed *dests; - struct lnet_process_id_packed id; - int j; - - dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page); - LASSERT(dests); /* my pages are within KVM always */ - id = dests[i % SFW_ID_PER_PAGE]; - if (msg->msg_magic != SRPC_MSG_MAGIC) - sfw_unpack_id(id); - - for (j = 0; j < tsi->tsi_concur; j++) { - tsu = kzalloc(sizeof(struct sfw_test_unit), GFP_NOFS); - if (!tsu) { - rc = -ENOMEM; - CERROR("Can't allocate tsu for %d\n", - tsi->tsi_service); - goto error; - } - - tsu->tsu_dest.nid = id.nid; - tsu->tsu_dest.pid = id.pid; - tsu->tsu_instance = tsi; - tsu->tsu_private = NULL; - list_add_tail(&tsu->tsu_list, &tsi->tsi_units); - } - } - - rc = tsi->tsi_ops->tso_init(tsi); - if (!rc) { - list_add_tail(&tsi->tsi_list, &tsb->bat_tests); - return 0; - } - -error: - LASSERT(rc); - sfw_destroy_test_instance(tsi); - return rc; -} - -static void -sfw_test_unit_done(struct sfw_test_unit *tsu) -{ - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_batch *tsb = tsi->tsi_batch; - struct sfw_session *sn = tsb->bat_session; - - LASSERT(sfw_test_active(tsi)); - - if (!atomic_dec_and_test(&tsi->tsi_nactive)) - return; - - /* the test instance is done */ - spin_lock(&tsi->tsi_lock); - - tsi->tsi_stopping = 0; - - spin_unlock(&tsi->tsi_lock); - - spin_lock(&sfw_data.fw_lock); - - if (!atomic_dec_and_test(&tsb->bat_nactive) || /* tsb still active */ - sn == sfw_data.fw_session) { /* sn also active */ - spin_unlock(&sfw_data.fw_lock); - return; - } - - LASSERT(!list_empty(&sn->sn_list)); /* I'm a zombie! */ - - list_for_each_entry(tsb, &sn->sn_batches, bat_list) { - if (sfw_batch_active(tsb)) { - spin_unlock(&sfw_data.fw_lock); - return; - } - } - - list_del_init(&sn->sn_list); - spin_unlock(&sfw_data.fw_lock); - - sfw_destroy_session(sn); -} - -static void -sfw_test_rpc_done(struct srpc_client_rpc *rpc) -{ - struct sfw_test_unit *tsu = rpc->crpc_priv; - struct sfw_test_instance *tsi = tsu->tsu_instance; - int done = 0; - - tsi->tsi_ops->tso_done_rpc(tsu, rpc); - - spin_lock(&tsi->tsi_lock); - - LASSERT(sfw_test_active(tsi)); - LASSERT(!list_empty(&rpc->crpc_list)); - - list_del_init(&rpc->crpc_list); - - /* batch is stopping or loop is done or get error */ - if (tsi->tsi_stopping || !tsu->tsu_loop || - (rpc->crpc_status && tsi->tsi_stoptsu_onerr)) - done = 1; - - /* dec ref for poster */ - srpc_client_rpc_decref(rpc); - - spin_unlock(&tsi->tsi_lock); - - if (!done) { - swi_schedule_workitem(&tsu->tsu_worker); - return; - } - - sfw_test_unit_done(tsu); -} - -int -sfw_create_test_rpc(struct sfw_test_unit *tsu, struct lnet_process_id peer, - unsigned int features, int nblk, int blklen, - struct srpc_client_rpc **rpcpp) -{ - struct srpc_client_rpc *rpc = NULL; - struct sfw_test_instance *tsi = tsu->tsu_instance; - - spin_lock(&tsi->tsi_lock); - - LASSERT(sfw_test_active(tsi)); - /* pick request from buffer */ - rpc = list_first_entry_or_null(&tsi->tsi_free_rpcs, - struct srpc_client_rpc, crpc_list); - if (rpc) { - LASSERT(nblk == rpc->crpc_bulk.bk_niov); - list_del_init(&rpc->crpc_list); - } - - spin_unlock(&tsi->tsi_lock); - - if (!rpc) { - rpc = srpc_create_client_rpc(peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - } else { - srpc_init_client_rpc(rpc, peer, tsi->tsi_service, nblk, - blklen, sfw_test_rpc_done, - sfw_test_rpc_fini, tsu); - } - - if (!rpc) { - CERROR("Can't create rpc for test %d\n", tsi->tsi_service); - return -ENOMEM; - } - - rpc->crpc_reqstmsg.msg_ses_feats = features; - *rpcpp = rpc; - - return 0; -} - -static void -sfw_run_test(struct swi_workitem *wi) -{ - struct sfw_test_unit *tsu = container_of(wi, struct sfw_test_unit, tsu_worker); - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct srpc_client_rpc *rpc = NULL; - - if (tsi->tsi_ops->tso_prep_rpc(tsu, tsu->tsu_dest, &rpc)) { - LASSERT(!rpc); - goto test_done; - } - - LASSERT(rpc); - - spin_lock(&tsi->tsi_lock); - - if (tsi->tsi_stopping) { - list_add(&rpc->crpc_list, &tsi->tsi_free_rpcs); - spin_unlock(&tsi->tsi_lock); - goto test_done; - } - - if (tsu->tsu_loop > 0) - tsu->tsu_loop--; - - list_add_tail(&rpc->crpc_list, &tsi->tsi_active_rpcs); - spin_unlock(&tsi->tsi_lock); - - spin_lock(&rpc->crpc_lock); - rpc->crpc_timeout = rpc_timeout; - srpc_post_rpc(rpc); - spin_unlock(&rpc->crpc_lock); - return; - -test_done: - /* - * No one can schedule me now since: - * - previous RPC, if any, has done and - * - no new RPC is initiated. - * - my batch is still active; no one can run it again now. - * Cancel pending schedules and prevent future schedule attempts: - */ - sfw_test_unit_done(tsu); -} - -static int -sfw_run_batch(struct sfw_batch *tsb) -{ - struct swi_workitem *wi; - struct sfw_test_unit *tsu; - struct sfw_test_instance *tsi; - - if (sfw_batch_active(tsb)) { - CDEBUG(D_NET, "Batch already active: %llu (%d)\n", - tsb->bat_id.bat_id, atomic_read(&tsb->bat_nactive)); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - if (!tsi->tsi_is_client) /* skip server instances */ - continue; - - LASSERT(!tsi->tsi_stopping); - LASSERT(!sfw_test_active(tsi)); - - atomic_inc(&tsb->bat_nactive); - - list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - atomic_inc(&tsi->tsi_nactive); - tsu->tsu_loop = tsi->tsi_loop; - wi = &tsu->tsu_worker; - swi_init_workitem(wi, sfw_run_test, - lst_test_wq[lnet_cpt_of_nid(tsu->tsu_dest.nid)]); - swi_schedule_workitem(wi); - } - } - - return 0; -} - -int -sfw_stop_batch(struct sfw_batch *tsb, int force) -{ - struct sfw_test_instance *tsi; - struct srpc_client_rpc *rpc; - - if (!sfw_batch_active(tsb)) { - CDEBUG(D_NET, "Batch %llu inactive\n", tsb->bat_id.bat_id); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - spin_lock(&tsi->tsi_lock); - - if (!tsi->tsi_is_client || - !sfw_test_active(tsi) || tsi->tsi_stopping) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - tsi->tsi_stopping = 1; - - if (!force) { - spin_unlock(&tsi->tsi_lock); - continue; - } - - /* abort launched rpcs in the test */ - list_for_each_entry(rpc, &tsi->tsi_active_rpcs, crpc_list) { - spin_lock(&rpc->crpc_lock); - - srpc_abort_rpc(rpc, -EINTR); - - spin_unlock(&rpc->crpc_lock); - } - - spin_unlock(&tsi->tsi_lock); - } - - return 0; -} - -static int -sfw_query_batch(struct sfw_batch *tsb, int testidx, - struct srpc_batch_reply *reply) -{ - struct sfw_test_instance *tsi; - - if (testidx < 0) - return -EINVAL; - - if (!testidx) { - reply->bar_active = atomic_read(&tsb->bat_nactive); - return 0; - } - - list_for_each_entry(tsi, &tsb->bat_tests, tsi_list) { - if (testidx-- > 1) - continue; - - reply->bar_active = atomic_read(&tsi->tsi_nactive); - return 0; - } - - return -ENOENT; -} - -void -sfw_free_pages(struct srpc_server_rpc *rpc) -{ - srpc_free_bulk(rpc->srpc_bulk); - rpc->srpc_bulk = NULL; -} - -int -sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, - int sink) -{ - LASSERT(!rpc->srpc_bulk); - LASSERT(npages > 0 && npages <= LNET_MAX_IOV); - - rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink); - if (!rpc->srpc_bulk) - return -ENOMEM; - - return 0; -} - -static int -sfw_add_test(struct srpc_server_rpc *rpc) -{ - struct sfw_session *sn = sfw_data.fw_session; - struct srpc_test_reply *reply = &rpc->srpc_replymsg.msg_body.tes_reply; - struct srpc_test_reqst *request; - int rc; - struct sfw_batch *bat; - - request = &rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst; - reply->tsr_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (!request->tsr_loop || - !request->tsr_concur || - request->tsr_sid.ses_nid == LNET_NID_ANY || - request->tsr_ndest > SFW_MAX_NDESTS || - (request->tsr_is_client && !request->tsr_ndest) || - request->tsr_concur > SFW_MAX_CONCUR || - request->tsr_service > SRPC_SERVICE_MAX_ID || - request->tsr_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID) { - reply->tsr_status = EINVAL; - return 0; - } - - if (!sn || !sfw_sid_equal(request->tsr_sid, sn->sn_id) || - !sfw_find_test_case(request->tsr_service)) { - reply->tsr_status = ENOENT; - return 0; - } - - bat = sfw_bid2batch(request->tsr_bid); - if (!bat) { - CERROR("dropping RPC %s from %s under memory pressure\n", - rpc->srpc_scd->scd_svc->sv_name, - libcfs_id2str(rpc->srpc_peer)); - return -ENOMEM; - } - - if (sfw_batch_active(bat)) { - reply->tsr_status = EBUSY; - return 0; - } - - if (request->tsr_is_client && !rpc->srpc_bulk) { - /* rpc will be resumed later in sfw_bulk_ready */ - int npg = sfw_id_pages(request->tsr_ndest); - int len; - - if (!(sn->sn_features & LST_FEAT_BULK_LEN)) { - len = npg * PAGE_SIZE; - - } else { - len = sizeof(struct lnet_process_id_packed) * - request->tsr_ndest; - } - - return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1); - } - - rc = sfw_add_test_instance(bat, rpc); - CDEBUG(!rc ? D_NET : D_WARNING, - "%s test: sv %d %s, loop %d, concur %d, ndest %d\n", - !rc ? "Added" : "Failed to add", request->tsr_service, - request->tsr_is_client ? "client" : "server", - request->tsr_loop, request->tsr_concur, request->tsr_ndest); - - reply->tsr_status = (rc < 0) ? -rc : rc; - return 0; -} - -static int -sfw_control_batch(struct srpc_batch_reqst *request, - struct srpc_batch_reply *reply) -{ - struct sfw_session *sn = sfw_data.fw_session; - int rc = 0; - struct sfw_batch *bat; - - reply->bar_sid = !sn ? LST_INVALID_SID : sn->sn_id; - - if (!sn || !sfw_sid_equal(request->bar_sid, sn->sn_id)) { - reply->bar_status = ESRCH; - return 0; - } - - bat = sfw_find_batch(request->bar_bid); - if (!bat) { - reply->bar_status = ENOENT; - return 0; - } - - switch (request->bar_opc) { - case SRPC_BATCH_OPC_RUN: - rc = sfw_run_batch(bat); - break; - - case SRPC_BATCH_OPC_STOP: - rc = sfw_stop_batch(bat, request->bar_arg); - break; - - case SRPC_BATCH_OPC_QUERY: - rc = sfw_query_batch(bat, request->bar_testidx, reply); - break; - - default: - return -EINVAL; /* drop it */ - } - - reply->bar_status = (rc < 0) ? -rc : rc; - return 0; -} - -static int -sfw_handle_server_rpc(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *reply = &rpc->srpc_replymsg; - struct srpc_msg *request = &rpc->srpc_reqstbuf->buf_msg; - unsigned int features = LST_FEATS_MASK; - int rc = 0; - - LASSERT(!sfw_data.fw_active_srpc); - LASSERT(sv->sv_id <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&sfw_data.fw_lock); - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - /* Remove timer to avoid racing with it or expiring active session */ - if (sfw_del_session_timer()) { - CERROR("dropping RPC %s from %s: racing with expiry timer\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - sfw_unpack_message(request); - LASSERT(request->msg_type == srpc_service2request(sv->sv_id)); - - /* rpc module should have checked this */ - LASSERT(request->msg_version == SRPC_MSG_VERSION); - - if (sv->sv_id != SRPC_SERVICE_MAKE_SESSION && - sv->sv_id != SRPC_SERVICE_DEBUG) { - struct sfw_session *sn = sfw_data.fw_session; - - if (sn && - sn->sn_features != request->msg_ses_feats) { - CNETERR("Features of framework RPC don't match features of current session: %x/%x\n", - request->msg_ses_feats, sn->sn_features); - reply->msg_body.reply.status = EPROTO; - reply->msg_body.reply.sid = sn->sn_id; - goto out; - } - - } else if (request->msg_ses_feats & ~LST_FEATS_MASK) { - /* - * NB: at this point, old version will ignore features and - * create new session anyway, so console should be able - * to handle this - */ - reply->msg_body.reply.status = EPROTO; - goto out; - } - - switch (sv->sv_id) { - default: - LBUG(); - case SRPC_SERVICE_TEST: - rc = sfw_add_test(rpc); - break; - - case SRPC_SERVICE_BATCH: - rc = sfw_control_batch(&request->msg_body.bat_reqst, - &reply->msg_body.bat_reply); - break; - - case SRPC_SERVICE_QUERY_STAT: - rc = sfw_get_stats(&request->msg_body.stat_reqst, - &reply->msg_body.stat_reply); - break; - - case SRPC_SERVICE_DEBUG: - rc = sfw_debug_session(&request->msg_body.dbg_reqst, - &reply->msg_body.dbg_reply); - break; - - case SRPC_SERVICE_MAKE_SESSION: - rc = sfw_make_session(&request->msg_body.mksn_reqst, - &reply->msg_body.mksn_reply); - break; - - case SRPC_SERVICE_REMOVE_SESSION: - rc = sfw_remove_session(&request->msg_body.rmsn_reqst, - &reply->msg_body.rmsn_reply); - break; - } - - if (sfw_data.fw_session) - features = sfw_data.fw_session->sn_features; - out: - reply->msg_ses_feats = features; - rpc->srpc_done = sfw_server_rpc_done; - spin_lock(&sfw_data.fw_lock); - - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -static int -sfw_bulk_ready(struct srpc_server_rpc *rpc, int status) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - int rc; - - LASSERT(rpc->srpc_bulk); - LASSERT(sv->sv_id == SRPC_SERVICE_TEST); - LASSERT(!sfw_data.fw_active_srpc); - LASSERT(rpc->srpc_reqstbuf->buf_msg.msg_body.tes_reqst.tsr_is_client); - - spin_lock(&sfw_data.fw_lock); - - if (status) { - CERROR("Bulk transfer failed for RPC: service %s, peer %s, status %d\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer), status); - spin_unlock(&sfw_data.fw_lock); - return -EIO; - } - - if (sfw_data.fw_shuttingdown) { - spin_unlock(&sfw_data.fw_lock); - return -ESHUTDOWN; - } - - if (sfw_del_session_timer()) { - CERROR("dropping RPC %s from %s: racing with expiry timer\n", - sv->sv_name, libcfs_id2str(rpc->srpc_peer)); - spin_unlock(&sfw_data.fw_lock); - return -EAGAIN; - } - - sfw_data.fw_active_srpc = rpc; - spin_unlock(&sfw_data.fw_lock); - - rc = sfw_add_test(rpc); - - spin_lock(&sfw_data.fw_lock); - - if (!sfw_data.fw_shuttingdown) - sfw_add_session_timer(); - - sfw_data.fw_active_srpc = NULL; - spin_unlock(&sfw_data.fw_lock); - return rc; -} - -struct srpc_client_rpc * -sfw_create_rpc(struct lnet_process_id peer, int service, - unsigned int features, int nbulkiov, int bulklen, - void (*done)(struct srpc_client_rpc *), void *priv) -{ - struct srpc_client_rpc *rpc = NULL; - - spin_lock(&sfw_data.fw_lock); - - LASSERT(!sfw_data.fw_shuttingdown); - LASSERT(service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - if (!nbulkiov && !list_empty(&sfw_data.fw_zombie_rpcs)) { - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - - srpc_init_client_rpc(rpc, peer, service, 0, 0, - done, sfw_client_rpc_fini, priv); - } - - spin_unlock(&sfw_data.fw_lock); - - if (!rpc) { - rpc = srpc_create_client_rpc(peer, service, - nbulkiov, bulklen, done, - nbulkiov ? NULL : - sfw_client_rpc_fini, - priv); - } - - if (rpc) /* "session" is concept in framework */ - rpc->crpc_reqstmsg.msg_ses_feats = features; - - return rpc; -} - -void -sfw_unpack_message(struct srpc_msg *msg) -{ - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - /* srpc module should guarantee I wouldn't get crap */ - LASSERT(msg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - if (msg->msg_type == SRPC_MSG_STAT_REQST) { - struct srpc_stat_reqst *req = &msg->msg_body.stat_reqst; - - __swab32s(&req->str_type); - __swab64s(&req->str_rpyid); - sfw_unpack_sid(req->str_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_STAT_REPLY) { - struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; - - __swab32s(&rep->str_status); - sfw_unpack_sid(rep->str_sid); - sfw_unpack_fw_counters(rep->str_fw); - sfw_unpack_rpc_counters(rep->str_rpc); - sfw_unpack_lnet_counters(rep->str_lnet); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REQST) { - struct srpc_mksn_reqst *req = &msg->msg_body.mksn_reqst; - - __swab64s(&req->mksn_rpyid); - __swab32s(&req->mksn_force); - sfw_unpack_sid(req->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_MKSN_REPLY) { - struct srpc_mksn_reply *rep = &msg->msg_body.mksn_reply; - - __swab32s(&rep->mksn_status); - __swab32s(&rep->mksn_timeout); - sfw_unpack_sid(rep->mksn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REQST) { - struct srpc_rmsn_reqst *req = &msg->msg_body.rmsn_reqst; - - __swab64s(&req->rmsn_rpyid); - sfw_unpack_sid(req->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_RMSN_REPLY) { - struct srpc_rmsn_reply *rep = &msg->msg_body.rmsn_reply; - - __swab32s(&rep->rmsn_status); - sfw_unpack_sid(rep->rmsn_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REQST) { - struct srpc_debug_reqst *req = &msg->msg_body.dbg_reqst; - - __swab64s(&req->dbg_rpyid); - __swab32s(&req->dbg_flags); - sfw_unpack_sid(req->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_DEBUG_REPLY) { - struct srpc_debug_reply *rep = &msg->msg_body.dbg_reply; - - __swab32s(&rep->dbg_nbatch); - __swab32s(&rep->dbg_timeout); - sfw_unpack_sid(rep->dbg_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REQST) { - struct srpc_batch_reqst *req = &msg->msg_body.bat_reqst; - - __swab32s(&req->bar_opc); - __swab64s(&req->bar_rpyid); - __swab32s(&req->bar_testidx); - __swab32s(&req->bar_arg); - sfw_unpack_sid(req->bar_sid); - __swab64s(&req->bar_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_BATCH_REPLY) { - struct srpc_batch_reply *rep = &msg->msg_body.bat_reply; - - __swab32s(&rep->bar_status); - sfw_unpack_sid(rep->bar_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REQST) { - struct srpc_test_reqst *req = &msg->msg_body.tes_reqst; - - __swab64s(&req->tsr_rpyid); - __swab64s(&req->tsr_bulkid); - __swab32s(&req->tsr_loop); - __swab32s(&req->tsr_ndest); - __swab32s(&req->tsr_concur); - __swab32s(&req->tsr_service); - sfw_unpack_sid(req->tsr_sid); - __swab64s(&req->tsr_bid.bat_id); - return; - } - - if (msg->msg_type == SRPC_MSG_TEST_REPLY) { - struct srpc_test_reply *rep = &msg->msg_body.tes_reply; - - __swab32s(&rep->tsr_status); - sfw_unpack_sid(rep->tsr_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REQST) { - struct srpc_join_reqst *req = &msg->msg_body.join_reqst; - - __swab64s(&req->join_rpyid); - sfw_unpack_sid(req->join_sid); - return; - } - - if (msg->msg_type == SRPC_MSG_JOIN_REPLY) { - struct srpc_join_reply *rep = &msg->msg_body.join_reply; - - __swab32s(&rep->join_status); - __swab32s(&rep->join_timeout); - sfw_unpack_sid(rep->join_sid); - return; - } - - LBUG(); -} - -void -sfw_abort_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(atomic_read(&rpc->crpc_refcount) > 0); - LASSERT(rpc->crpc_service <= SRPC_FRAMEWORK_SERVICE_MAX_ID); - - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, -EINTR); - spin_unlock(&rpc->crpc_lock); -} - -void -sfw_post_rpc(struct srpc_client_rpc *rpc) -{ - spin_lock(&rpc->crpc_lock); - - LASSERT(!rpc->crpc_closed); - LASSERT(!rpc->crpc_aborted); - LASSERT(list_empty(&rpc->crpc_list)); - LASSERT(!sfw_data.fw_shuttingdown); - - rpc->crpc_timeout = rpc_timeout; - srpc_post_rpc(rpc); - - spin_unlock(&rpc->crpc_lock); -} - -static struct srpc_service sfw_services[] = { - { - /* sv_id */ SRPC_SERVICE_DEBUG, - /* sv_name */ "debug", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_QUERY_STAT, - /* sv_name */ "query stats", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_MAKE_SESSION, - /* sv_name */ "make session", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_REMOVE_SESSION, - /* sv_name */ "remove session", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_BATCH, - /* sv_name */ "batch service", - 0 - }, - { - /* sv_id */ SRPC_SERVICE_TEST, - /* sv_name */ "test service", - 0 - }, - { - /* sv_id */ 0, - /* sv_name */ NULL, - 0 - } -}; - -int -sfw_startup(void) -{ - int i; - int rc; - int error; - struct srpc_service *sv; - struct sfw_test_case *tsc; - - if (session_timeout < 0) { - CERROR("Session timeout must be non-negative: %d\n", - session_timeout); - return -EINVAL; - } - - if (rpc_timeout < 0) { - CERROR("RPC timeout must be non-negative: %d\n", - rpc_timeout); - return -EINVAL; - } - - if (!session_timeout) - CWARN("Zero session_timeout specified - test sessions never expire.\n"); - - if (!rpc_timeout) - CWARN("Zero rpc_timeout specified - test RPC never expire.\n"); - - memset(&sfw_data, 0, sizeof(struct smoketest_framework)); - - sfw_data.fw_session = NULL; - sfw_data.fw_active_srpc = NULL; - spin_lock_init(&sfw_data.fw_lock); - atomic_set(&sfw_data.fw_nzombies, 0); - INIT_LIST_HEAD(&sfw_data.fw_tests); - INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); - INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); - - brw_init_test_client(); - brw_init_test_service(); - rc = sfw_register_test(&brw_test_service, &brw_test_client); - LASSERT(!rc); - - ping_init_test_client(); - ping_init_test_service(); - rc = sfw_register_test(&ping_test_service, &ping_test_client); - LASSERT(!rc); - - error = 0; - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - - rc = srpc_add_service(sv); - LASSERT(rc != -EBUSY); - if (rc) { - CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - sv->sv_bulk_ready = NULL; - sv->sv_handler = sfw_handle_server_rpc; - sv->sv_wi_total = SFW_FRWK_WI_MAX; - if (sv->sv_id == SRPC_SERVICE_TEST) - sv->sv_bulk_ready = sfw_bulk_ready; - - rc = srpc_add_service(sv); - LASSERT(rc != -EBUSY); - if (rc) { - CWARN("Failed to add %s service: %d\n", - sv->sv_name, rc); - error = rc; - } - - /* about to sfw_shutdown, no need to add buffer */ - if (error) - continue; - - rc = srpc_service_add_buffers(sv, sv->sv_wi_total); - if (rc) { - CWARN("Failed to reserve enough buffers: service %s, %d needed: %d\n", - sv->sv_name, sv->sv_wi_total, rc); - error = -ENOMEM; - } - } - - if (error) - sfw_shutdown(); - return error; -} - -void -sfw_shutdown(void) -{ - struct srpc_service *sv; - struct sfw_test_case *tsc; - int i; - - spin_lock(&sfw_data.fw_lock); - - sfw_data.fw_shuttingdown = 1; - lst_wait_until(!sfw_data.fw_active_srpc, sfw_data.fw_lock, - "waiting for active RPC to finish.\n"); - - if (sfw_del_session_timer()) - lst_wait_until(!sfw_data.fw_session, sfw_data.fw_lock, - "waiting for session timer to explode.\n"); - - sfw_deactivate_session(); - lst_wait_until(!atomic_read(&sfw_data.fw_nzombies), - sfw_data.fw_lock, - "waiting for %d zombie sessions to die.\n", - atomic_read(&sfw_data.fw_nzombies)); - - spin_unlock(&sfw_data.fw_lock); - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - list_for_each_entry(tsc, &sfw_data.fw_tests, tsc_list) { - sv = tsc->tsc_srv_service; - srpc_shutdown_service(sv); - srpc_remove_service(sv); - } - - while (!list_empty(&sfw_data.fw_zombie_rpcs)) { - struct srpc_client_rpc *rpc; - - rpc = list_entry(sfw_data.fw_zombie_rpcs.next, - struct srpc_client_rpc, crpc_list); - list_del(&rpc->crpc_list); - - kfree(rpc); - } - - for (i = 0; ; i++) { - sv = &sfw_services[i]; - if (!sv->sv_name) - break; - - srpc_wait_service_shutdown(sv); - } - - while (!list_empty(&sfw_data.fw_tests)) { - tsc = list_entry(sfw_data.fw_tests.next, - struct sfw_test_case, tsc_list); - - srpc_wait_service_shutdown(tsc->tsc_srv_service); - - list_del(&tsc->tsc_list); - kfree(tsc); - } -} diff --git a/drivers/staging/lustre/lnet/selftest/module.c b/drivers/staging/lustre/lnet/selftest/module.c deleted file mode 100644 index 9ba65320f748..000000000000 --- a/drivers/staging/lustre/lnet/selftest/module.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" -#include "console.h" - -enum { - LST_INIT_NONE = 0, - LST_INIT_WI_SERIAL, - LST_INIT_WI_TEST, - LST_INIT_RPC, - LST_INIT_FW, - LST_INIT_CONSOLE -}; - -static int lst_init_step = LST_INIT_NONE; - -struct workqueue_struct *lst_serial_wq; -struct workqueue_struct **lst_test_wq; - -static void -lnet_selftest_exit(void) -{ - int i; - - switch (lst_init_step) { - case LST_INIT_CONSOLE: - lstcon_console_fini(); - /* fall through */ - case LST_INIT_FW: - sfw_shutdown(); - /* fall through */ - case LST_INIT_RPC: - srpc_shutdown(); - /* fall through */ - case LST_INIT_WI_TEST: - for (i = 0; - i < cfs_cpt_number(lnet_cpt_table()); i++) { - if (!lst_test_wq[i]) - continue; - destroy_workqueue(lst_test_wq[i]); - } - kvfree(lst_test_wq); - lst_test_wq = NULL; - /* fall through */ - case LST_INIT_WI_SERIAL: - destroy_workqueue(lst_serial_wq); - lst_serial_wq = NULL; - case LST_INIT_NONE: - break; - default: - LBUG(); - } -} - -static int -lnet_selftest_init(void) -{ - int nscheds; - int rc; - int i; - - rc = libcfs_setup(); - if (rc) - return rc; - - lst_serial_wq = alloc_ordered_workqueue("lst_s", 0); - if (!lst_serial_wq) { - CERROR("Failed to create serial WI scheduler for LST\n"); - return -ENOMEM; - } - lst_init_step = LST_INIT_WI_SERIAL; - - nscheds = cfs_cpt_number(lnet_cpt_table()); - lst_test_wq = kvmalloc_array(nscheds, sizeof(lst_test_wq[0]), - GFP_KERNEL | __GFP_ZERO); - if (!lst_test_wq) { - rc = -ENOMEM; - goto error; - } - - lst_init_step = LST_INIT_WI_TEST; - for (i = 0; i < nscheds; i++) { - int nthrs = cfs_cpt_weight(lnet_cpt_table(), i); - struct workqueue_attrs attrs = {0}; - cpumask_var_t *mask = cfs_cpt_cpumask(lnet_cpt_table(), i); - - /* reserve at least one CPU for LND */ - nthrs = max(nthrs - 1, 1); - lst_test_wq[i] = alloc_workqueue("lst_t", WQ_UNBOUND, nthrs); - if (!lst_test_wq[i]) { - CWARN("Failed to create CPU partition affinity WI scheduler %d for LST\n", - i); - rc = -ENOMEM; - goto error; - } - - if (mask && alloc_cpumask_var(&attrs.cpumask, GFP_KERNEL)) { - cpumask_copy(attrs.cpumask, *mask); - apply_workqueue_attrs(lst_test_wq[i], &attrs); - free_cpumask_var(attrs.cpumask); - } - } - - rc = srpc_startup(); - if (rc) { - CERROR("LST can't startup rpc\n"); - goto error; - } - lst_init_step = LST_INIT_RPC; - - rc = sfw_startup(); - if (rc) { - CERROR("LST can't startup framework\n"); - goto error; - } - lst_init_step = LST_INIT_FW; - - rc = lstcon_console_init(); - if (rc) { - CERROR("LST can't startup console\n"); - goto error; - } - lst_init_step = LST_INIT_CONSOLE; - return 0; -error: - lnet_selftest_exit(); - return rc; -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("LNet Selftest"); -MODULE_VERSION("2.7.0"); -MODULE_LICENSE("GPL"); - -module_init(lnet_selftest_init); -module_exit(lnet_selftest_exit); diff --git a/drivers/staging/lustre/lnet/selftest/ping_test.c b/drivers/staging/lustre/lnet/selftest/ping_test.c deleted file mode 100644 index f54bd630dbf8..000000000000 --- a/drivers/staging/lustre/lnet/selftest/ping_test.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/conctl.c - * - * Test client & Server - * - * Author: Liang Zhen - */ - -#include "selftest.h" - -#define LST_PING_TEST_MAGIC 0xbabeface - -static int ping_srv_workitems = SFW_TEST_WI_MAX; -module_param(ping_srv_workitems, int, 0644); -MODULE_PARM_DESC(ping_srv_workitems, "# PING server workitems"); - -struct lst_ping_data { - spinlock_t pnd_lock; /* serialize */ - int pnd_counter; /* sequence counter */ -}; - -static struct lst_ping_data lst_ping_data; - -static int -ping_client_init(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - - LASSERT(tsi->tsi_is_client); - LASSERT(sn && !(sn->sn_features & ~LST_FEATS_MASK)); - - spin_lock_init(&lst_ping_data.pnd_lock); - lst_ping_data.pnd_counter = 0; - - return 0; -} - -static void -ping_client_fini(struct sfw_test_instance *tsi) -{ - struct sfw_session *sn = tsi->tsi_batch->bat_session; - int errors; - - LASSERT(sn); - LASSERT(tsi->tsi_is_client); - - errors = atomic_read(&sn->sn_ping_errors); - if (errors) - CWARN("%d pings have failed.\n", errors); - else - CDEBUG(D_NET, "Ping test finished OK.\n"); -} - -static int -ping_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, - struct srpc_client_rpc **rpc) -{ - struct srpc_ping_reqst *req; - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct timespec64 ts; - int rc; - - LASSERT(sn); - LASSERT(!(sn->sn_features & ~LST_FEATS_MASK)); - - rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, 0, 0, rpc); - if (rc) - return rc; - - req = &(*rpc)->crpc_reqstmsg.msg_body.ping_reqst; - - req->pnr_magic = LST_PING_TEST_MAGIC; - - spin_lock(&lst_ping_data.pnd_lock); - req->pnr_seq = lst_ping_data.pnd_counter++; - spin_unlock(&lst_ping_data.pnd_lock); - - ktime_get_real_ts64(&ts); - req->pnr_time_sec = ts.tv_sec; - req->pnr_time_usec = ts.tv_nsec / NSEC_PER_USEC; - - return rc; -} - -static void -ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) -{ - struct sfw_test_instance *tsi = tsu->tsu_instance; - struct sfw_session *sn = tsi->tsi_batch->bat_session; - struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; - struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply; - struct timespec64 ts; - - LASSERT(sn); - - if (rpc->crpc_status) { - if (!tsi->tsi_stopping) /* rpc could have been aborted */ - atomic_inc(&sn->sn_ping_errors); - CERROR("Unable to ping %s (%d): %d\n", - libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq, rpc->crpc_status); - return; - } - - if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) { - __swab32s(&reply->pnr_seq); - __swab32s(&reply->pnr_magic); - __swab32s(&reply->pnr_status); - } - - if (reply->pnr_magic != LST_PING_TEST_MAGIC) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR("Bad magic %u from %s, %u expected.\n", - reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), - LST_PING_TEST_MAGIC); - return; - } - - if (reply->pnr_seq != reqst->pnr_seq) { - rpc->crpc_status = -EBADMSG; - atomic_inc(&sn->sn_ping_errors); - CERROR("Bad seq %u from %s, %u expected.\n", - reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), - reqst->pnr_seq); - return; - } - - ktime_get_real_ts64(&ts); - CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq, - (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + - (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); -} - -static int -ping_server_handle(struct srpc_server_rpc *rpc) -{ - struct srpc_service *sv = rpc->srpc_scd->scd_svc; - struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; - struct srpc_msg *replymsg = &rpc->srpc_replymsg; - struct srpc_ping_reqst *req = &reqstmsg->msg_body.ping_reqst; - struct srpc_ping_reply *rep = &rpc->srpc_replymsg.msg_body.ping_reply; - - LASSERT(sv->sv_id == SRPC_SERVICE_PING); - - if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { - LASSERT(reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); - - __swab32s(&req->pnr_seq); - __swab32s(&req->pnr_magic); - __swab64s(&req->pnr_time_sec); - __swab64s(&req->pnr_time_usec); - } - LASSERT(reqstmsg->msg_type == srpc_service2request(sv->sv_id)); - - if (req->pnr_magic != LST_PING_TEST_MAGIC) { - CERROR("Unexpected magic %08x from %s\n", - req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); - return -EINVAL; - } - - rep->pnr_seq = req->pnr_seq; - rep->pnr_magic = LST_PING_TEST_MAGIC; - - if (reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) { - replymsg->msg_ses_feats = LST_FEATS_MASK; - rep->pnr_status = EPROTO; - return 0; - } - - replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - - CDEBUG(D_NET, "Get ping %d from %s\n", - req->pnr_seq, libcfs_id2str(rpc->srpc_peer)); - return 0; -} - -struct sfw_test_client_ops ping_test_client; - -void ping_init_test_client(void) -{ - ping_test_client.tso_init = ping_client_init; - ping_test_client.tso_fini = ping_client_fini; - ping_test_client.tso_prep_rpc = ping_client_prep_rpc; - ping_test_client.tso_done_rpc = ping_client_done_rpc; -} - -struct srpc_service ping_test_service; - -void ping_init_test_service(void) -{ - ping_test_service.sv_id = SRPC_SERVICE_PING; - ping_test_service.sv_name = "ping_test"; - ping_test_service.sv_handler = ping_server_handle; - ping_test_service.sv_wi_total = ping_srv_workitems; -} diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c deleted file mode 100644 index 9613b0a77007..000000000000 --- a/drivers/staging/lustre/lnet/selftest/rpc.c +++ /dev/null @@ -1,1682 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/rpc.c - * - * Author: Isaac Huang - * - * 2012-05-13: Liang Zhen - * - percpt data for service to improve smp performance - * - code cleanup - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -enum srpc_state { - SRPC_STATE_NONE, - SRPC_STATE_NI_INIT, - SRPC_STATE_EQ_INIT, - SRPC_STATE_RUNNING, - SRPC_STATE_STOPPING, -}; - -static struct smoketest_rpc { - spinlock_t rpc_glock; /* global lock */ - struct srpc_service *rpc_services[SRPC_SERVICE_MAX_ID + 1]; - struct lnet_handle_eq rpc_lnet_eq; /* _the_ LNet event queue */ - enum srpc_state rpc_state; - struct srpc_counters rpc_counters; - __u64 rpc_matchbits; /* matchbits counter */ -} srpc_data; - -static inline int -srpc_serv_portal(int svc_id) -{ - return svc_id < SRPC_FRAMEWORK_SERVICE_MAX_ID ? - SRPC_FRAMEWORK_REQUEST_PORTAL : SRPC_REQUEST_PORTAL; -} - -/* forward ref's */ -void srpc_handle_rpc(struct swi_workitem *wi); - -void srpc_get_counters(struct srpc_counters *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - *cnt = srpc_data.rpc_counters; - spin_unlock(&srpc_data.rpc_glock); -} - -void srpc_set_counters(const struct srpc_counters *cnt) -{ - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters = *cnt; - spin_unlock(&srpc_data.rpc_glock); -} - -static int -srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off, - int nob) -{ - LASSERT(off < PAGE_SIZE); - LASSERT(nob > 0 && nob <= PAGE_SIZE); - - bk->bk_iovs[i].bv_offset = off; - bk->bk_iovs[i].bv_page = pg; - bk->bk_iovs[i].bv_len = nob; - return nob; -} - -void -srpc_free_bulk(struct srpc_bulk *bk) -{ - int i; - struct page *pg; - - LASSERT(bk); - - for (i = 0; i < bk->bk_niov; i++) { - pg = bk->bk_iovs[i].bv_page; - if (!pg) - break; - - __free_page(pg); - } - - kfree(bk); -} - -struct srpc_bulk * -srpc_alloc_bulk(int cpt, unsigned int bulk_off, unsigned int bulk_npg, - unsigned int bulk_len, int sink) -{ - struct srpc_bulk *bk; - int i; - - LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV); - - bk = kzalloc_cpt(offsetof(struct srpc_bulk, bk_iovs[bulk_npg]), - GFP_KERNEL, cpt); - if (!bk) { - CERROR("Can't allocate descriptor for %d pages\n", bulk_npg); - return NULL; - } - - memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); - bk->bk_sink = sink; - bk->bk_len = bulk_len; - bk->bk_niov = bulk_npg; - - for (i = 0; i < bulk_npg; i++) { - struct page *pg; - int nob; - - pg = alloc_pages_node(cfs_cpt_spread_node(lnet_cpt_table(), cpt), - GFP_KERNEL, 0); - if (!pg) { - CERROR("Can't allocate page %d of %d\n", i, bulk_npg); - srpc_free_bulk(bk); - return NULL; - } - - nob = min_t(unsigned int, bulk_off + bulk_len, PAGE_SIZE) - - bulk_off; - srpc_add_bulk_page(bk, pg, i, bulk_off, nob); - bulk_len -= nob; - bulk_off = 0; - } - - return bk; -} - -static inline __u64 -srpc_next_id(void) -{ - __u64 id; - - spin_lock(&srpc_data.rpc_glock); - id = srpc_data.rpc_matchbits++; - spin_unlock(&srpc_data.rpc_glock); - return id; -} - -static void -srpc_init_server_rpc(struct srpc_server_rpc *rpc, - struct srpc_service_cd *scd, - struct srpc_buffer *buffer) -{ - memset(rpc, 0, sizeof(*rpc)); - swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc, - srpc_serv_is_framework(scd->scd_svc) ? - lst_serial_wq : lst_test_wq[scd->scd_cpt]); - - rpc->srpc_ev.ev_fired = 1; /* no event expected now */ - - rpc->srpc_scd = scd; - rpc->srpc_reqstbuf = buffer; - rpc->srpc_peer = buffer->buf_peer; - rpc->srpc_self = buffer->buf_self; - LNetInvalidateMDHandle(&rpc->srpc_replymdh); -} - -static void -srpc_service_fini(struct srpc_service *svc) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - struct srpc_buffer *buf; - struct list_head *q; - int i; - - if (!svc->sv_cpt_data) - return; - - cfs_percpt_for_each(scd, i, svc->sv_cpt_data) { - while (1) { - if (!list_empty(&scd->scd_buf_posted)) - q = &scd->scd_buf_posted; - else if (!list_empty(&scd->scd_buf_blocked)) - q = &scd->scd_buf_blocked; - else - break; - - while (!list_empty(q)) { - buf = list_entry(q->next, struct srpc_buffer, - buf_list); - list_del(&buf->buf_list); - kfree(buf); - } - } - - LASSERT(list_empty(&scd->scd_rpc_active)); - - while (!list_empty(&scd->scd_rpc_free)) { - rpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); - list_del(&rpc->srpc_list); - kfree(rpc); - } - } - - cfs_percpt_free(svc->sv_cpt_data); - svc->sv_cpt_data = NULL; -} - -static int -srpc_service_nrpcs(struct srpc_service *svc) -{ - int nrpcs = svc->sv_wi_total / svc->sv_ncpts; - - return srpc_serv_is_framework(svc) ? - max(nrpcs, SFW_FRWK_WI_MIN) : max(nrpcs, SFW_TEST_WI_MIN); -} - -void srpc_add_buffer(struct swi_workitem *wi); - -static int -srpc_service_init(struct srpc_service *svc) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int nrpcs; - int i; - int j; - - svc->sv_shuttingdown = 0; - - svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(**svc->sv_cpt_data)); - if (!svc->sv_cpt_data) - return -ENOMEM; - - svc->sv_ncpts = srpc_serv_is_framework(svc) ? - 1 : cfs_cpt_number(lnet_cpt_table()); - nrpcs = srpc_service_nrpcs(svc); - - cfs_percpt_for_each(scd, i, svc->sv_cpt_data) { - scd->scd_cpt = i; - scd->scd_svc = svc; - spin_lock_init(&scd->scd_lock); - INIT_LIST_HEAD(&scd->scd_rpc_free); - INIT_LIST_HEAD(&scd->scd_rpc_active); - INIT_LIST_HEAD(&scd->scd_buf_posted); - INIT_LIST_HEAD(&scd->scd_buf_blocked); - - scd->scd_ev.ev_data = scd; - scd->scd_ev.ev_type = SRPC_REQUEST_RCVD; - - /* - * NB: don't use lst_serial_wq for adding buffer, - * see details in srpc_service_add_buffers() - */ - swi_init_workitem(&scd->scd_buf_wi, - srpc_add_buffer, lst_test_wq[i]); - - if (i && srpc_serv_is_framework(svc)) { - /* - * NB: framework service only needs srpc_service_cd for - * one partition, but we allocate for all to make - * it easier to implement, it will waste a little - * memory but nobody should care about this - */ - continue; - } - - for (j = 0; j < nrpcs; j++) { - rpc = kzalloc_cpt(sizeof(*rpc), GFP_NOFS, i); - if (!rpc) { - srpc_service_fini(svc); - return -ENOMEM; - } - list_add(&rpc->srpc_list, &scd->scd_rpc_free); - } - } - - return 0; -} - -int -srpc_add_service(struct srpc_service *sv) -{ - int id = sv->sv_id; - - LASSERT(0 <= id && id <= SRPC_SERVICE_MAX_ID); - - if (srpc_service_init(sv)) - return -ENOMEM; - - spin_lock(&srpc_data.rpc_glock); - - LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); - - if (srpc_data.rpc_services[id]) { - spin_unlock(&srpc_data.rpc_glock); - goto failed; - } - - srpc_data.rpc_services[id] = sv; - spin_unlock(&srpc_data.rpc_glock); - - CDEBUG(D_NET, "Adding service: id %d, name %s\n", id, sv->sv_name); - return 0; - - failed: - srpc_service_fini(sv); - return -EBUSY; -} - -int -srpc_remove_service(struct srpc_service *sv) -{ - int id = sv->sv_id; - - spin_lock(&srpc_data.rpc_glock); - - if (srpc_data.rpc_services[id] != sv) { - spin_unlock(&srpc_data.rpc_glock); - return -ENOENT; - } - - srpc_data.rpc_services[id] = NULL; - spin_unlock(&srpc_data.rpc_glock); - return 0; -} - -static int -srpc_post_passive_rdma(int portal, int local, __u64 matchbits, void *buf, - int len, int options, struct lnet_process_id peer, - struct lnet_handle_md *mdh, struct srpc_event *ev) -{ - int rc; - struct lnet_md md; - struct lnet_handle_me meh; - - rc = LNetMEAttach(portal, peer, matchbits, 0, LNET_UNLINK, - local ? LNET_INS_LOCAL : LNET_INS_AFTER, &meh); - if (rc) { - CERROR("LNetMEAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - return -ENOMEM; - } - - md.threshold = 1; - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.options = options; - md.eq_handle = srpc_data.rpc_lnet_eq; - - rc = LNetMDAttach(meh, md, LNET_UNLINK, mdh); - if (rc) { - CERROR("LNetMDAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - - rc = LNetMEUnlink(meh); - LASSERT(!rc); - return -ENOMEM; - } - - CDEBUG(D_NET, "Posted passive RDMA: peer %s, portal %d, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); - return 0; -} - -static int -srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, - int options, struct lnet_process_id peer, - lnet_nid_t self, struct lnet_handle_md *mdh, - struct srpc_event *ev) -{ - int rc; - struct lnet_md md; - - md.user_ptr = ev; - md.start = buf; - md.length = len; - md.eq_handle = srpc_data.rpc_lnet_eq; - md.threshold = options & LNET_MD_OP_GET ? 2 : 1; - md.options = options & ~(LNET_MD_OP_PUT | LNET_MD_OP_GET); - - rc = LNetMDBind(md, LNET_UNLINK, mdh); - if (rc) { - CERROR("LNetMDBind failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - return -ENOMEM; - } - - /* - * this is kind of an abuse of the LNET_MD_OP_{PUT,GET} options. - * they're only meaningful for MDs attached to an ME (i.e. passive - * buffers... - */ - if (options & LNET_MD_OP_PUT) { - rc = LNetPut(self, *mdh, LNET_NOACK_REQ, peer, - portal, matchbits, 0, 0); - } else { - LASSERT(options & LNET_MD_OP_GET); - - rc = LNetGet(self, *mdh, peer, portal, matchbits, 0); - } - - if (rc) { - CERROR("LNet%s(%s, %d, %lld) failed: %d\n", - options & LNET_MD_OP_PUT ? "Put" : "Get", - libcfs_id2str(peer), portal, matchbits, rc); - - /* - * The forthcoming unlink event will complete this operation - * with failure, so fall through and return success here. - */ - rc = LNetMDUnlink(*mdh); - LASSERT(!rc); - } else { - CDEBUG(D_NET, "Posted active RDMA: peer %s, portal %u, matchbits %#llx\n", - libcfs_id2str(peer), portal, matchbits); - } - return 0; -} - -static int -srpc_post_passive_rqtbuf(int service, int local, void *buf, int len, - struct lnet_handle_md *mdh, struct srpc_event *ev) -{ - struct lnet_process_id any = { 0 }; - - any.nid = LNET_NID_ANY; - any.pid = LNET_PID_ANY; - - return srpc_post_passive_rdma(srpc_serv_portal(service), - local, service, buf, len, - LNET_MD_OP_PUT, any, mdh, ev); -} - -static int -srpc_service_post_buffer(struct srpc_service_cd *scd, struct srpc_buffer *buf) -__must_hold(&scd->scd_lock) -{ - struct srpc_service *sv = scd->scd_svc; - struct srpc_msg *msg = &buf->buf_msg; - int rc; - - LNetInvalidateMDHandle(&buf->buf_mdh); - list_add(&buf->buf_list, &scd->scd_buf_posted); - scd->scd_buf_nposted++; - spin_unlock(&scd->scd_lock); - - rc = srpc_post_passive_rqtbuf(sv->sv_id, - !srpc_serv_is_framework(sv), - msg, sizeof(*msg), &buf->buf_mdh, - &scd->scd_ev); - - /* - * At this point, a RPC (new or delayed) may have arrived in - * msg and its event handler has been called. So we must add - * buf to scd_buf_posted _before_ dropping scd_lock - */ - spin_lock(&scd->scd_lock); - - if (!rc) { - if (!sv->sv_shuttingdown) - return 0; - - spin_unlock(&scd->scd_lock); - /* - * srpc_shutdown_service might have tried to unlink me - * when my buf_mdh was still invalid - */ - LNetMDUnlink(buf->buf_mdh); - spin_lock(&scd->scd_lock); - return 0; - } - - scd->scd_buf_nposted--; - if (sv->sv_shuttingdown) - return rc; /* don't allow to change scd_buf_posted */ - - list_del(&buf->buf_list); - spin_unlock(&scd->scd_lock); - - kfree(buf); - - spin_lock(&scd->scd_lock); - return rc; -} - -void -srpc_add_buffer(struct swi_workitem *wi) -{ - struct srpc_service_cd *scd = container_of(wi, struct srpc_service_cd, scd_buf_wi); - struct srpc_buffer *buf; - int rc = 0; - - /* - * it's called by workitem scheduler threads, these threads - * should have been set CPT affinity, so buffers will be posted - * on CPT local list of Portal - */ - spin_lock(&scd->scd_lock); - - while (scd->scd_buf_adjust > 0 && - !scd->scd_svc->sv_shuttingdown) { - scd->scd_buf_adjust--; /* consume it */ - scd->scd_buf_posting++; - - spin_unlock(&scd->scd_lock); - - buf = kzalloc(sizeof(*buf), GFP_NOFS); - if (!buf) { - CERROR("Failed to add new buf to service: %s\n", - scd->scd_svc->sv_name); - spin_lock(&scd->scd_lock); - rc = -ENOMEM; - break; - } - - spin_lock(&scd->scd_lock); - if (scd->scd_svc->sv_shuttingdown) { - spin_unlock(&scd->scd_lock); - kfree(buf); - - spin_lock(&scd->scd_lock); - rc = -ESHUTDOWN; - break; - } - - rc = srpc_service_post_buffer(scd, buf); - if (rc) - break; /* buf has been freed inside */ - - LASSERT(scd->scd_buf_posting > 0); - scd->scd_buf_posting--; - scd->scd_buf_total++; - scd->scd_buf_low = max(2, scd->scd_buf_total / 4); - } - - if (rc) { - scd->scd_buf_err_stamp = ktime_get_real_seconds(); - scd->scd_buf_err = rc; - - LASSERT(scd->scd_buf_posting > 0); - scd->scd_buf_posting--; - } - - spin_unlock(&scd->scd_lock); -} - -int -srpc_service_add_buffers(struct srpc_service *sv, int nbuffer) -{ - struct srpc_service_cd *scd; - int rc = 0; - int i; - - LASSERTF(nbuffer > 0, "nbuffer must be positive: %d\n", nbuffer); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - scd->scd_buf_err = 0; - scd->scd_buf_err_stamp = 0; - scd->scd_buf_posting = 0; - scd->scd_buf_adjust = nbuffer; - /* start to post buffers */ - swi_schedule_workitem(&scd->scd_buf_wi); - spin_unlock(&scd->scd_lock); - - /* framework service only post buffer for one partition */ - if (srpc_serv_is_framework(sv)) - break; - } - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - /* - * NB: srpc_service_add_buffers() can be called inside - * thread context of lst_serial_wq, and we don't normally - * allow to sleep inside thread context of WI scheduler - * because it will block current scheduler thread from doing - * anything else, even worse, it could deadlock if it's - * waiting on result from another WI of the same scheduler. - * However, it's safe at here because scd_buf_wi is scheduled - * by thread in a different WI scheduler (lst_test_wq), - * so we don't have any risk of deadlock, though this could - * block all WIs pending on lst_serial_wq for a moment - * which is not good but not fatal. - */ - lst_wait_until(scd->scd_buf_err || - (!scd->scd_buf_adjust && - !scd->scd_buf_posting), - scd->scd_lock, "waiting for adding buffer\n"); - - if (scd->scd_buf_err && !rc) - rc = scd->scd_buf_err; - - spin_unlock(&scd->scd_lock); - } - - return rc; -} - -void -srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer) -{ - struct srpc_service_cd *scd; - int num; - int i; - - LASSERT(!sv->sv_shuttingdown); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - num = scd->scd_buf_total + scd->scd_buf_posting; - scd->scd_buf_adjust -= min(nbuffer, num); - - spin_unlock(&scd->scd_lock); - } -} - -/* returns 1 if sv has finished, otherwise 0 */ -int -srpc_finish_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int i; - - LASSERT(sv->sv_shuttingdown); /* srpc_shutdown_service called */ - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - swi_cancel_workitem(&scd->scd_buf_wi); - - spin_lock(&scd->scd_lock); - - if (scd->scd_buf_nposted > 0) { - CDEBUG(D_NET, "waiting for %d posted buffers to unlink\n", - scd->scd_buf_nposted); - spin_unlock(&scd->scd_lock); - return 0; - } - - if (list_empty(&scd->scd_rpc_active)) { - spin_unlock(&scd->scd_lock); - continue; - } - - rpc = list_entry(scd->scd_rpc_active.next, - struct srpc_server_rpc, srpc_list); - CNETERR("Active RPC %p on shutdown: sv %s, peer %s, wi %s, ev fired %d type %d status %d lnet %d\n", - rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), - rpc->srpc_ev.ev_fired, rpc->srpc_ev.ev_type, - rpc->srpc_ev.ev_status, rpc->srpc_ev.ev_lnet); - spin_unlock(&scd->scd_lock); - return 0; - } - - /* no lock needed from now on */ - srpc_service_fini(sv); - return 1; -} - -/* called with sv->sv_lock held */ -static void -srpc_service_recycle_buffer(struct srpc_service_cd *scd, - struct srpc_buffer *buf) -__must_hold(&scd->scd_lock) -{ - if (!scd->scd_svc->sv_shuttingdown && scd->scd_buf_adjust >= 0) { - if (srpc_service_post_buffer(scd, buf)) { - CWARN("Failed to post %s buffer\n", - scd->scd_svc->sv_name); - } - return; - } - - /* service is shutting down, or we want to recycle some buffers */ - scd->scd_buf_total--; - - if (scd->scd_buf_adjust < 0) { - scd->scd_buf_adjust++; - if (scd->scd_buf_adjust < 0 && - !scd->scd_buf_total && !scd->scd_buf_posting) { - CDEBUG(D_INFO, - "Try to recycle %d buffers but nothing left\n", - scd->scd_buf_adjust); - scd->scd_buf_adjust = 0; - } - } - - spin_unlock(&scd->scd_lock); - kfree(buf); - spin_lock(&scd->scd_lock); -} - -void -srpc_abort_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - int i; - - CDEBUG(D_NET, "Aborting service: id %d, name %s\n", - sv->sv_id, sv->sv_name); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - /* - * schedule in-flight RPCs to notice the abort, NB: - * racing with incoming RPCs; complete fix should make test - * RPCs carry session ID in its headers - */ - list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) { - rpc->srpc_aborted = 1; - swi_schedule_workitem(&rpc->srpc_wi); - } - - spin_unlock(&scd->scd_lock); - } -} - -void -srpc_shutdown_service(struct srpc_service *sv) -{ - struct srpc_service_cd *scd; - struct srpc_server_rpc *rpc; - struct srpc_buffer *buf; - int i; - - CDEBUG(D_NET, "Shutting down service: id %d, name %s\n", - sv->sv_id, sv->sv_name); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) - spin_lock(&scd->scd_lock); - - sv->sv_shuttingdown = 1; /* i.e. no new active RPC */ - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) - spin_unlock(&scd->scd_lock); - - cfs_percpt_for_each(scd, i, sv->sv_cpt_data) { - spin_lock(&scd->scd_lock); - - /* schedule in-flight RPCs to notice the shutdown */ - list_for_each_entry(rpc, &scd->scd_rpc_active, srpc_list) - swi_schedule_workitem(&rpc->srpc_wi); - - spin_unlock(&scd->scd_lock); - - /* - * OK to traverse scd_buf_posted without lock, since no one - * touches scd_buf_posted now - */ - list_for_each_entry(buf, &scd->scd_buf_posted, buf_list) - LNetMDUnlink(buf->buf_mdh); - } -} - -static int -srpc_send_request(struct srpc_client_rpc *rpc) -{ - struct srpc_event *ev = &rpc->crpc_reqstev; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REQUEST_SENT; - - rc = srpc_post_active_rdma(srpc_serv_portal(rpc->crpc_service), - rpc->crpc_service, &rpc->crpc_reqstmsg, - sizeof(struct srpc_msg), LNET_MD_OP_PUT, - rpc->crpc_dest, LNET_NID_ANY, - &rpc->crpc_reqstmdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_prepare_reply(struct srpc_client_rpc *rpc) -{ - struct srpc_event *ev = &rpc->crpc_replyev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.rpyid; - int rc; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, - &rpc->crpc_replymsg, - sizeof(struct srpc_msg), - LNET_MD_OP_PUT, rpc->crpc_dest, - &rpc->crpc_replymdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_prepare_bulk(struct srpc_client_rpc *rpc) -{ - struct srpc_bulk *bk = &rpc->crpc_bulk; - struct srpc_event *ev = &rpc->crpc_bulkev; - __u64 *id = &rpc->crpc_reqstmsg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT(bk->bk_niov <= LNET_MAX_IOV); - - if (!bk->bk_niov) - return 0; /* nothing to do */ - - opt = bk->bk_sink ? LNET_MD_OP_PUT : LNET_MD_OP_GET; - opt |= LNET_MD_KIOV; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_BULK_REQ_RCVD; - - *id = srpc_next_id(); - - rc = srpc_post_passive_rdma(SRPC_RDMA_PORTAL, 0, *id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->crpc_dest, &bk->bk_mdh, ev); - if (rc) { - LASSERT(rc == -ENOMEM); - ev->ev_fired = 1; /* no more event expected */ - } - return rc; -} - -static int -srpc_do_bulk(struct srpc_server_rpc *rpc) -{ - struct srpc_event *ev = &rpc->srpc_ev; - struct srpc_bulk *bk = rpc->srpc_bulk; - __u64 id = rpc->srpc_reqstbuf->buf_msg.msg_body.reqst.bulkid; - int rc; - int opt; - - LASSERT(bk); - - opt = bk->bk_sink ? LNET_MD_OP_GET : LNET_MD_OP_PUT; - opt |= LNET_MD_KIOV; - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = bk->bk_sink ? SRPC_BULK_GET_RPLD : SRPC_BULK_PUT_SENT; - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, id, - &bk->bk_iovs[0], bk->bk_niov, opt, - rpc->srpc_peer, rpc->srpc_self, - &bk->bk_mdh, ev); - if (rc) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* only called from srpc_handle_rpc */ -static void -srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) -{ - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - struct srpc_buffer *buffer; - - LASSERT(status || rpc->srpc_wi.swi_state == SWI_STATE_DONE); - - rpc->srpc_status = status; - - CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, - "Server RPC %p done: service %s, peer %s, status %s:%d\n", - rpc, sv->sv_name, libcfs_id2str(rpc->srpc_peer), - swi_state2str(rpc->srpc_wi.swi_state), status); - - if (status) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_dropped++; - spin_unlock(&srpc_data.rpc_glock); - } - - if (rpc->srpc_done) - (*rpc->srpc_done) (rpc); - LASSERT(!rpc->srpc_bulk); - - spin_lock(&scd->scd_lock); - - if (rpc->srpc_reqstbuf) { - /* - * NB might drop sv_lock in srpc_service_recycle_buffer, but - * sv won't go away for scd_rpc_active must not be empty - */ - srpc_service_recycle_buffer(scd, rpc->srpc_reqstbuf); - rpc->srpc_reqstbuf = NULL; - } - - list_del(&rpc->srpc_list); /* from scd->scd_rpc_active */ - - /* - * No one can schedule me now since: - * - I'm not on scd_rpc_active. - * - all LNet events have been fired. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT(rpc->srpc_ev.ev_fired); - - if (!sv->sv_shuttingdown && !list_empty(&scd->scd_buf_blocked)) { - buffer = list_entry(scd->scd_buf_blocked.next, - struct srpc_buffer, buf_list); - list_del(&buffer->buf_list); - - srpc_init_server_rpc(rpc, scd, buffer); - list_add_tail(&rpc->srpc_list, &scd->scd_rpc_active); - swi_schedule_workitem(&rpc->srpc_wi); - } else { - list_add(&rpc->srpc_list, &scd->scd_rpc_free); - } - - spin_unlock(&scd->scd_lock); -} - -/* handles an incoming RPC */ -void -srpc_handle_rpc(struct swi_workitem *wi) -{ - struct srpc_server_rpc *rpc = container_of(wi, struct srpc_server_rpc, srpc_wi); - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - struct srpc_event *ev = &rpc->srpc_ev; - int rc = 0; - - LASSERT(wi == &rpc->srpc_wi); - - spin_lock(&scd->scd_lock); - - if (sv->sv_shuttingdown || rpc->srpc_aborted) { - spin_unlock(&scd->scd_lock); - - if (rpc->srpc_bulk) - LNetMDUnlink(rpc->srpc_bulk->bk_mdh); - LNetMDUnlink(rpc->srpc_replymdh); - - if (ev->ev_fired) { /* no more event, OK to finish */ - srpc_server_rpc_done(rpc, -ESHUTDOWN); - } - return; - } - - spin_unlock(&scd->scd_lock); - - switch (wi->swi_state) { - default: - LBUG(); - case SWI_STATE_NEWBORN: { - struct srpc_msg *msg; - struct srpc_generic_reply *reply; - - msg = &rpc->srpc_reqstbuf->buf_msg; - reply = &rpc->srpc_replymsg.msg_body.reply; - - if (!msg->msg_magic) { - /* moaned already in srpc_lnet_ev_handler */ - srpc_server_rpc_done(rpc, EBADMSG); - return; - } - - srpc_unpack_msg_hdr(msg); - if (msg->msg_version != SRPC_MSG_VERSION) { - CWARN("Version mismatch: %u, %u expected, from %s\n", - msg->msg_version, SRPC_MSG_VERSION, - libcfs_id2str(rpc->srpc_peer)); - reply->status = EPROTO; - /* drop through and send reply */ - } else { - reply->status = 0; - rc = (*sv->sv_handler)(rpc); - LASSERT(!reply->status || !rpc->srpc_bulk); - if (rc) { - srpc_server_rpc_done(rpc, rc); - return; - } - } - - wi->swi_state = SWI_STATE_BULK_STARTED; - - if (rpc->srpc_bulk) { - rc = srpc_do_bulk(rpc); - if (!rc) - return; /* wait for bulk */ - - LASSERT(ev->ev_fired); - ev->ev_status = rc; - } - } - /* fall through */ - case SWI_STATE_BULK_STARTED: - LASSERT(!rpc->srpc_bulk || ev->ev_fired); - - if (rpc->srpc_bulk) { - rc = ev->ev_status; - - if (sv->sv_bulk_ready) - rc = (*sv->sv_bulk_ready) (rpc, rc); - - if (rc) { - srpc_server_rpc_done(rpc, rc); - return; - } - } - - wi->swi_state = SWI_STATE_REPLY_SUBMITTED; - rc = srpc_send_reply(rpc); - if (!rc) - return; /* wait for reply */ - srpc_server_rpc_done(rpc, rc); - return; - - case SWI_STATE_REPLY_SUBMITTED: - if (!ev->ev_fired) { - CERROR("RPC %p: bulk %p, service %d\n", - rpc, rpc->srpc_bulk, sv->sv_id); - CERROR("Event: status %d, type %d, lnet %d\n", - ev->ev_status, ev->ev_type, ev->ev_lnet); - LASSERT(ev->ev_fired); - } - - wi->swi_state = SWI_STATE_DONE; - srpc_server_rpc_done(rpc, ev->ev_status); - return; - } -} - -static void -srpc_client_rpc_expired(void *data) -{ - struct srpc_client_rpc *rpc = data; - - CWARN("Client RPC expired: service %d, peer %s, timeout %d.\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - rpc->crpc_timeout); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_timeout = 0; - srpc_abort_rpc(rpc, -ETIMEDOUT); - - spin_unlock(&rpc->crpc_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_expired++; - spin_unlock(&srpc_data.rpc_glock); -} - -static void -srpc_add_client_rpc_timer(struct srpc_client_rpc *rpc) -{ - struct stt_timer *timer = &rpc->crpc_timer; - - if (!rpc->crpc_timeout) - return; - - INIT_LIST_HEAD(&timer->stt_list); - timer->stt_data = rpc; - timer->stt_func = srpc_client_rpc_expired; - timer->stt_expires = ktime_get_real_seconds() + rpc->crpc_timeout; - stt_add_timer(timer); -} - -/* - * Called with rpc->crpc_lock held. - * - * Upon exit the RPC expiry timer is not queued and the handler is not - * running on any CPU. - */ -static void -srpc_del_client_rpc_timer(struct srpc_client_rpc *rpc) -{ - /* timer not planted or already exploded */ - if (!rpc->crpc_timeout) - return; - - /* timer successfully defused */ - if (stt_del_timer(&rpc->crpc_timer)) - return; - - /* timer detonated, wait for it to explode */ - while (rpc->crpc_timeout) { - spin_unlock(&rpc->crpc_lock); - - schedule(); - - spin_lock(&rpc->crpc_lock); - } -} - -static void -srpc_client_rpc_done(struct srpc_client_rpc *rpc, int status) -{ - struct swi_workitem *wi = &rpc->crpc_wi; - - LASSERT(status || wi->swi_state == SWI_STATE_DONE); - - spin_lock(&rpc->crpc_lock); - - rpc->crpc_closed = 1; - if (!rpc->crpc_status) - rpc->crpc_status = status; - - srpc_del_client_rpc_timer(rpc); - - CDEBUG_LIMIT(!status ? D_NET : D_NETERROR, - "Client RPC done: service %d, peer %s, status %s:%d:%d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(wi->swi_state), rpc->crpc_aborted, status); - - /* - * No one can schedule me now since: - * - RPC timer has been defused. - * - all LNet events have been fired. - * - crpc_closed has been set, preventing srpc_abort_rpc from - * scheduling me. - * Cancel pending schedules and prevent future schedule attempts: - */ - LASSERT(!srpc_event_pending(rpc)); - - spin_unlock(&rpc->crpc_lock); - - (*rpc->crpc_done)(rpc); -} - -/* sends an outgoing RPC */ -void -srpc_send_rpc(struct swi_workitem *wi) -{ - int rc = 0; - struct srpc_client_rpc *rpc; - struct srpc_msg *reply; - int do_bulk; - - LASSERT(wi); - - rpc = container_of(wi, struct srpc_client_rpc, crpc_wi); - - LASSERT(rpc); - LASSERT(wi == &rpc->crpc_wi); - - reply = &rpc->crpc_replymsg; - do_bulk = rpc->crpc_bulk.bk_niov > 0; - - spin_lock(&rpc->crpc_lock); - - if (rpc->crpc_aborted) { - spin_unlock(&rpc->crpc_lock); - goto abort; - } - - spin_unlock(&rpc->crpc_lock); - - switch (wi->swi_state) { - default: - LBUG(); - case SWI_STATE_NEWBORN: - LASSERT(!srpc_event_pending(rpc)); - - rc = srpc_prepare_reply(rpc); - if (rc) { - srpc_client_rpc_done(rpc, rc); - return; - } - - rc = srpc_prepare_bulk(rpc); - if (rc) - break; - - wi->swi_state = SWI_STATE_REQUEST_SUBMITTED; - rc = srpc_send_request(rpc); - break; - - case SWI_STATE_REQUEST_SUBMITTED: - /* - * CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any - * order; however, they're processed in a strict order: - * rqt, rpy, and bulk. - */ - if (!rpc->crpc_reqstev.ev_fired) - break; - - rc = rpc->crpc_reqstev.ev_status; - if (rc) - break; - - wi->swi_state = SWI_STATE_REQUEST_SENT; - /* perhaps more events */ - /* fall through */ - case SWI_STATE_REQUEST_SENT: { - enum srpc_msg_type type = srpc_service2reply(rpc->crpc_service); - - if (!rpc->crpc_replyev.ev_fired) - break; - - rc = rpc->crpc_replyev.ev_status; - if (rc) - break; - - srpc_unpack_msg_hdr(reply); - if (reply->msg_type != type || - (reply->msg_magic != SRPC_MSG_MAGIC && - reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CWARN("Bad message from %s: type %u (%d expected), magic %u (%d expected).\n", - libcfs_id2str(rpc->crpc_dest), - reply->msg_type, type, - reply->msg_magic, SRPC_MSG_MAGIC); - rc = -EBADMSG; - break; - } - - if (do_bulk && reply->msg_body.reply.status) { - CWARN("Remote error %d at %s, unlink bulk buffer in case peer didn't initiate bulk transfer\n", - reply->msg_body.reply.status, - libcfs_id2str(rpc->crpc_dest)); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - } - - wi->swi_state = SWI_STATE_REPLY_RECEIVED; - } - /* fall through */ - case SWI_STATE_REPLY_RECEIVED: - if (do_bulk && !rpc->crpc_bulkev.ev_fired) - break; - - rc = do_bulk ? rpc->crpc_bulkev.ev_status : 0; - - /* - * Bulk buffer was unlinked due to remote error. Clear error - * since reply buffer still contains valid data. - * NB rpc->crpc_done shouldn't look into bulk data in case of - * remote error. - */ - if (do_bulk && rpc->crpc_bulkev.ev_lnet == LNET_EVENT_UNLINK && - !rpc->crpc_status && reply->msg_body.reply.status) - rc = 0; - - wi->swi_state = SWI_STATE_DONE; - srpc_client_rpc_done(rpc, rc); - return; - } - - if (rc) { - spin_lock(&rpc->crpc_lock); - srpc_abort_rpc(rpc, rc); - spin_unlock(&rpc->crpc_lock); - } - -abort: - if (rpc->crpc_aborted) { - LNetMDUnlink(rpc->crpc_reqstmdh); - LNetMDUnlink(rpc->crpc_replymdh); - LNetMDUnlink(rpc->crpc_bulk.bk_mdh); - - if (!srpc_event_pending(rpc)) { - srpc_client_rpc_done(rpc, -EINTR); - return; - } - } -} - -struct srpc_client_rpc * -srpc_create_client_rpc(struct lnet_process_id peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv) -{ - struct srpc_client_rpc *rpc; - - rpc = kzalloc(offsetof(struct srpc_client_rpc, - crpc_bulk.bk_iovs[nbulkiov]), GFP_KERNEL); - if (!rpc) - return NULL; - - srpc_init_client_rpc(rpc, peer, service, nbulkiov, - bulklen, rpc_done, rpc_fini, priv); - return rpc; -} - -/* called with rpc->crpc_lock held */ -void -srpc_abort_rpc(struct srpc_client_rpc *rpc, int why) -{ - LASSERT(why); - - if (rpc->crpc_aborted || /* already aborted */ - rpc->crpc_closed) /* callback imminent */ - return; - - CDEBUG(D_NET, "Aborting RPC: service %d, peer %s, state %s, why %d\n", - rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), - swi_state2str(rpc->crpc_wi.swi_state), why); - - rpc->crpc_aborted = 1; - rpc->crpc_status = why; - swi_schedule_workitem(&rpc->crpc_wi); -} - -/* called with rpc->crpc_lock held */ -void -srpc_post_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(!rpc->crpc_aborted); - LASSERT(srpc_data.rpc_state == SRPC_STATE_RUNNING); - - CDEBUG(D_NET, "Posting RPC: peer %s, service %d, timeout %d\n", - libcfs_id2str(rpc->crpc_dest), rpc->crpc_service, - rpc->crpc_timeout); - - srpc_add_client_rpc_timer(rpc); - swi_schedule_workitem(&rpc->crpc_wi); -} - -int -srpc_send_reply(struct srpc_server_rpc *rpc) -{ - struct srpc_event *ev = &rpc->srpc_ev; - struct srpc_msg *msg = &rpc->srpc_replymsg; - struct srpc_buffer *buffer = rpc->srpc_reqstbuf; - struct srpc_service_cd *scd = rpc->srpc_scd; - struct srpc_service *sv = scd->scd_svc; - __u64 rpyid; - int rc; - - LASSERT(buffer); - rpyid = buffer->buf_msg.msg_body.reqst.rpyid; - - spin_lock(&scd->scd_lock); - - if (!sv->sv_shuttingdown && !srpc_serv_is_framework(sv)) { - /* - * Repost buffer before replying since test client - * might send me another RPC once it gets the reply - */ - if (srpc_service_post_buffer(scd, buffer)) - CWARN("Failed to repost %s buffer\n", sv->sv_name); - rpc->srpc_reqstbuf = NULL; - } - - spin_unlock(&scd->scd_lock); - - ev->ev_fired = 0; - ev->ev_data = rpc; - ev->ev_type = SRPC_REPLY_SENT; - - msg->msg_magic = SRPC_MSG_MAGIC; - msg->msg_version = SRPC_MSG_VERSION; - msg->msg_type = srpc_service2reply(sv->sv_id); - - rc = srpc_post_active_rdma(SRPC_RDMA_PORTAL, rpyid, msg, - sizeof(*msg), LNET_MD_OP_PUT, - rpc->srpc_peer, rpc->srpc_self, - &rpc->srpc_replymdh, ev); - if (rc) - ev->ev_fired = 1; /* no more event expected */ - return rc; -} - -/* when in kernel always called with LNET_LOCK() held, and in thread context */ -static void -srpc_lnet_ev_handler(struct lnet_event *ev) -{ - struct srpc_service_cd *scd; - struct srpc_event *rpcev = ev->md.user_ptr; - struct srpc_client_rpc *crpc; - struct srpc_server_rpc *srpc; - struct srpc_buffer *buffer; - struct srpc_service *sv; - struct srpc_msg *msg; - enum srpc_msg_type type; - - LASSERT(!in_interrupt()); - - if (ev->status) { - __u32 errors; - - spin_lock(&srpc_data.rpc_glock); - if (ev->status != -ECANCELED) /* cancellation is not error */ - srpc_data.rpc_counters.errors++; - errors = srpc_data.rpc_counters.errors; - spin_unlock(&srpc_data.rpc_glock); - - CNETERR("LNet event status %d type %d, RPC errors %u\n", - ev->status, ev->type, errors); - } - - rpcev->ev_lnet = ev->type; - - switch (rpcev->ev_type) { - default: - CERROR("Unknown event: status %d, type %d, lnet %d\n", - rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet); - LBUG(); - case SRPC_REQUEST_SENT: - if (!ev->status && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_sent++; - spin_unlock(&srpc_data.rpc_glock); - } - /* fall through */ - case SRPC_REPLY_RCVD: - case SRPC_BULK_REQ_RCVD: - crpc = rpcev->ev_data; - - if (rpcev != &crpc->crpc_reqstev && - rpcev != &crpc->crpc_replyev && - rpcev != &crpc->crpc_bulkev) { - CERROR("rpcev %p, crpc %p, reqstev %p, replyev %p, bulkev %p\n", - rpcev, crpc, &crpc->crpc_reqstev, - &crpc->crpc_replyev, &crpc->crpc_bulkev); - CERROR("Bad event: status %d, type %d, lnet %d\n", - rpcev->ev_status, rpcev->ev_type, rpcev->ev_lnet); - LBUG(); - } - - spin_lock(&crpc->crpc_lock); - - LASSERT(!rpcev->ev_fired); - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - swi_schedule_workitem(&crpc->crpc_wi); - - spin_unlock(&crpc->crpc_lock); - break; - - case SRPC_REQUEST_RCVD: - scd = rpcev->ev_data; - sv = scd->scd_svc; - - LASSERT(rpcev == &scd->scd_ev); - - spin_lock(&scd->scd_lock); - - LASSERT(ev->unlinked); - LASSERT(ev->type == LNET_EVENT_PUT || - ev->type == LNET_EVENT_UNLINK); - LASSERT(ev->type != LNET_EVENT_UNLINK || - sv->sv_shuttingdown); - - buffer = container_of(ev->md.start, struct srpc_buffer, buf_msg); - buffer->buf_peer = ev->initiator; - buffer->buf_self = ev->target.nid; - - LASSERT(scd->scd_buf_nposted > 0); - scd->scd_buf_nposted--; - - if (sv->sv_shuttingdown) { - /* - * Leave buffer on scd->scd_buf_nposted since - * srpc_finish_service needs to traverse it. - */ - spin_unlock(&scd->scd_lock); - break; - } - - if (scd->scd_buf_err_stamp && - scd->scd_buf_err_stamp < ktime_get_real_seconds()) { - /* re-enable adding buffer */ - scd->scd_buf_err_stamp = 0; - scd->scd_buf_err = 0; - } - - if (!scd->scd_buf_err && /* adding buffer is enabled */ - !scd->scd_buf_adjust && - scd->scd_buf_nposted < scd->scd_buf_low) { - scd->scd_buf_adjust = max(scd->scd_buf_total / 2, - SFW_TEST_WI_MIN); - swi_schedule_workitem(&scd->scd_buf_wi); - } - - list_del(&buffer->buf_list); /* from scd->scd_buf_posted */ - msg = &buffer->buf_msg; - type = srpc_service2request(sv->sv_id); - - if (ev->status || ev->mlength != sizeof(*msg) || - (msg->msg_type != type && - msg->msg_type != __swab32(type)) || - (msg->msg_magic != SRPC_MSG_MAGIC && - msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) { - CERROR("Dropping RPC (%s) from %s: status %d mlength %d type %u magic %u.\n", - sv->sv_name, libcfs_id2str(ev->initiator), - ev->status, ev->mlength, - msg->msg_type, msg->msg_magic); - - /* - * NB can't call srpc_service_recycle_buffer here since - * it may call LNetM[DE]Attach. The invalid magic tells - * srpc_handle_rpc to drop this RPC - */ - msg->msg_magic = 0; - } - - if (!list_empty(&scd->scd_rpc_free)) { - srpc = list_entry(scd->scd_rpc_free.next, - struct srpc_server_rpc, - srpc_list); - list_del(&srpc->srpc_list); - - srpc_init_server_rpc(srpc, scd, buffer); - list_add_tail(&srpc->srpc_list, - &scd->scd_rpc_active); - swi_schedule_workitem(&srpc->srpc_wi); - } else { - list_add_tail(&buffer->buf_list, - &scd->scd_buf_blocked); - } - - spin_unlock(&scd->scd_lock); - - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.rpcs_rcvd++; - spin_unlock(&srpc_data.rpc_glock); - break; - - case SRPC_BULK_GET_RPLD: - LASSERT(ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_REPLY || - ev->type == LNET_EVENT_UNLINK); - - if (!ev->unlinked) - break; /* wait for final event */ - /* fall through */ - case SRPC_BULK_PUT_SENT: - if (!ev->status && ev->type != LNET_EVENT_UNLINK) { - spin_lock(&srpc_data.rpc_glock); - - if (rpcev->ev_type == SRPC_BULK_GET_RPLD) - srpc_data.rpc_counters.bulk_get += ev->mlength; - else - srpc_data.rpc_counters.bulk_put += ev->mlength; - - spin_unlock(&srpc_data.rpc_glock); - } - /* fall through */ - case SRPC_REPLY_SENT: - srpc = rpcev->ev_data; - scd = srpc->srpc_scd; - - LASSERT(rpcev == &srpc->srpc_ev); - - spin_lock(&scd->scd_lock); - - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? - -EINTR : ev->status; - swi_schedule_workitem(&srpc->srpc_wi); - - spin_unlock(&scd->scd_lock); - break; - } -} - -int -srpc_startup(void) -{ - int rc; - - memset(&srpc_data, 0, sizeof(struct smoketest_rpc)); - spin_lock_init(&srpc_data.rpc_glock); - - /* 1 second pause to avoid timestamp reuse */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - srpc_data.rpc_matchbits = ((__u64)ktime_get_real_seconds()) << 48; - - srpc_data.rpc_state = SRPC_STATE_NONE; - - rc = LNetNIInit(LNET_PID_LUSTRE); - if (rc < 0) { - CERROR("LNetNIInit() has failed: %d\n", rc); - return rc; - } - - srpc_data.rpc_state = SRPC_STATE_NI_INIT; - - LNetInvalidateEQHandle(&srpc_data.rpc_lnet_eq); - rc = LNetEQAlloc(0, srpc_lnet_ev_handler, &srpc_data.rpc_lnet_eq); - if (rc) { - CERROR("LNetEQAlloc() has failed: %d\n", rc); - goto bail; - } - - rc = LNetSetLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - LASSERT(!rc); - rc = LNetSetLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(!rc); - - srpc_data.rpc_state = SRPC_STATE_EQ_INIT; - - rc = stt_startup(); - -bail: - if (rc) - srpc_shutdown(); - else - srpc_data.rpc_state = SRPC_STATE_RUNNING; - - return rc; -} - -void -srpc_shutdown(void) -{ - int i; - int rc; - int state; - - state = srpc_data.rpc_state; - srpc_data.rpc_state = SRPC_STATE_STOPPING; - - switch (state) { - default: - LBUG(); - case SRPC_STATE_RUNNING: - spin_lock(&srpc_data.rpc_glock); - - for (i = 0; i <= SRPC_SERVICE_MAX_ID; i++) { - struct srpc_service *sv = srpc_data.rpc_services[i]; - - LASSERTF(!sv, "service not empty: id %d, name %s\n", - i, sv->sv_name); - } - - spin_unlock(&srpc_data.rpc_glock); - - stt_shutdown(); - /* fall through */ - case SRPC_STATE_EQ_INIT: - rc = LNetClearLazyPortal(SRPC_FRAMEWORK_REQUEST_PORTAL); - rc = LNetClearLazyPortal(SRPC_REQUEST_PORTAL); - LASSERT(!rc); - rc = LNetEQFree(srpc_data.rpc_lnet_eq); - LASSERT(!rc); /* the EQ should have no user by now */ - /* fall through */ - case SRPC_STATE_NI_INIT: - LNetNIFini(); - } -} diff --git a/drivers/staging/lustre/lnet/selftest/rpc.h b/drivers/staging/lustre/lnet/selftest/rpc.h deleted file mode 100644 index 465b5b534423..000000000000 --- a/drivers/staging/lustre/lnet/selftest/rpc.h +++ /dev/null @@ -1,295 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __SELFTEST_RPC_H__ -#define __SELFTEST_RPC_H__ - -#include - -/* - * LST wired structures - * - * XXX: *REPLY == *REQST + 1 - */ -enum srpc_msg_type { - SRPC_MSG_MKSN_REQST = 0, - SRPC_MSG_MKSN_REPLY = 1, - SRPC_MSG_RMSN_REQST = 2, - SRPC_MSG_RMSN_REPLY = 3, - SRPC_MSG_BATCH_REQST = 4, - SRPC_MSG_BATCH_REPLY = 5, - SRPC_MSG_STAT_REQST = 6, - SRPC_MSG_STAT_REPLY = 7, - SRPC_MSG_TEST_REQST = 8, - SRPC_MSG_TEST_REPLY = 9, - SRPC_MSG_DEBUG_REQST = 10, - SRPC_MSG_DEBUG_REPLY = 11, - SRPC_MSG_BRW_REQST = 12, - SRPC_MSG_BRW_REPLY = 13, - SRPC_MSG_PING_REQST = 14, - SRPC_MSG_PING_REPLY = 15, - SRPC_MSG_JOIN_REQST = 16, - SRPC_MSG_JOIN_REPLY = 17, -}; - -/* CAVEAT EMPTOR: - * All srpc_*_reqst_t's 1st field must be matchbits of reply buffer, - * and 2nd field matchbits of bulk buffer if any. - * - * All srpc_*_reply_t's 1st field must be a __u32 status, and 2nd field - * session id if needed. - */ -struct srpc_generic_reqst { - __u64 rpyid; /* reply buffer matchbits */ - __u64 bulkid; /* bulk buffer matchbits */ -} WIRE_ATTR; - -struct srpc_generic_reply { - __u32 status; - struct lst_sid sid; -} WIRE_ATTR; - -/* FRAMEWORK RPCs */ -struct srpc_mksn_reqst { - __u64 mksn_rpyid; /* reply buffer matchbits */ - struct lst_sid mksn_sid; /* session id */ - __u32 mksn_force; /* use brute force */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR; /* make session request */ - -struct srpc_mksn_reply { - __u32 mksn_status; /* session status */ - struct lst_sid mksn_sid; /* session id */ - __u32 mksn_timeout; /* session timeout */ - char mksn_name[LST_NAME_SIZE]; -} WIRE_ATTR; /* make session reply */ - -struct srpc_rmsn_reqst { - __u64 rmsn_rpyid; /* reply buffer matchbits */ - struct lst_sid rmsn_sid; /* session id */ -} WIRE_ATTR; /* remove session request */ - -struct srpc_rmsn_reply { - __u32 rmsn_status; - struct lst_sid rmsn_sid; /* session id */ -} WIRE_ATTR; /* remove session reply */ - -struct srpc_join_reqst { - __u64 join_rpyid; /* reply buffer matchbits */ - struct lst_sid join_sid; /* session id to join */ - char join_group[LST_NAME_SIZE]; /* group name */ -} WIRE_ATTR; - -struct srpc_join_reply { - __u32 join_status; /* returned status */ - struct lst_sid join_sid; /* session id */ - __u32 join_timeout; /* # seconds' inactivity to - * expire - */ - char join_session[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR; - -struct srpc_debug_reqst { - __u64 dbg_rpyid; /* reply buffer matchbits */ - struct lst_sid dbg_sid; /* session id */ - __u32 dbg_flags; /* bitmap of debug */ -} WIRE_ATTR; - -struct srpc_debug_reply { - __u32 dbg_status; /* returned code */ - struct lst_sid dbg_sid; /* session id */ - __u32 dbg_timeout; /* session timeout */ - __u32 dbg_nbatch; /* # of batches in the node */ - char dbg_name[LST_NAME_SIZE]; /* session name */ -} WIRE_ATTR; - -#define SRPC_BATCH_OPC_RUN 1 -#define SRPC_BATCH_OPC_STOP 2 -#define SRPC_BATCH_OPC_QUERY 3 - -struct srpc_batch_reqst { - __u64 bar_rpyid; /* reply buffer matchbits */ - struct lst_sid bar_sid; /* session id */ - struct lst_bid bar_bid; /* batch id */ - __u32 bar_opc; /* create/start/stop batch */ - __u32 bar_testidx; /* index of test */ - __u32 bar_arg; /* parameters */ -} WIRE_ATTR; - -struct srpc_batch_reply { - __u32 bar_status; /* status of request */ - struct lst_sid bar_sid; /* session id */ - __u32 bar_active; /* # of active tests in batch/test */ - __u32 bar_time; /* remained time */ -} WIRE_ATTR; - -struct srpc_stat_reqst { - __u64 str_rpyid; /* reply buffer matchbits */ - struct lst_sid str_sid; /* session id */ - __u32 str_type; /* type of stat */ -} WIRE_ATTR; - -struct srpc_stat_reply { - __u32 str_status; - struct lst_sid str_sid; - struct sfw_counters str_fw; - struct srpc_counters str_rpc; - struct lnet_counters str_lnet; -} WIRE_ATTR; - -struct test_bulk_req { - __u32 blk_opc; /* bulk operation code */ - __u32 blk_npg; /* # of pages */ - __u32 blk_flags; /* reserved flags */ -} WIRE_ATTR; - -struct test_bulk_req_v1 { - __u16 blk_opc; /* bulk operation code */ - __u16 blk_flags; /* data check flags */ - __u32 blk_len; /* data length */ - __u32 blk_offset; /* offset */ -} WIRE_ATTR; - -struct test_ping_req { - __u32 png_size; /* size of ping message */ - __u32 png_flags; /* reserved flags */ -} WIRE_ATTR; - -struct srpc_test_reqst { - __u64 tsr_rpyid; /* reply buffer matchbits */ - __u64 tsr_bulkid; /* bulk buffer matchbits */ - struct lst_sid tsr_sid; /* session id */ - struct lst_bid tsr_bid; /* batch id */ - __u32 tsr_service; /* test type: bulk|ping|... */ - __u32 tsr_loop; /* test client loop count or - * # server buffers needed - */ - __u32 tsr_concur; /* concurrency of test */ - __u8 tsr_is_client; /* is test client or not */ - __u8 tsr_stop_onerr; /* stop on error */ - __u32 tsr_ndest; /* # of dest nodes */ - - union { - struct test_ping_req ping; - struct test_bulk_req bulk_v0; - struct test_bulk_req_v1 bulk_v1; - } tsr_u; -} WIRE_ATTR; - -struct srpc_test_reply { - __u32 tsr_status; /* returned code */ - struct lst_sid tsr_sid; -} WIRE_ATTR; - -/* TEST RPCs */ -struct srpc_ping_reqst { - __u64 pnr_rpyid; - __u32 pnr_magic; - __u32 pnr_seq; - __u64 pnr_time_sec; - __u64 pnr_time_usec; -} WIRE_ATTR; - -struct srpc_ping_reply { - __u32 pnr_status; - __u32 pnr_magic; - __u32 pnr_seq; -} WIRE_ATTR; - -struct srpc_brw_reqst { - __u64 brw_rpyid; /* reply buffer matchbits */ - __u64 brw_bulkid; /* bulk buffer matchbits */ - __u32 brw_rw; /* read or write */ - __u32 brw_len; /* bulk data len */ - __u32 brw_flags; /* bulk data patterns */ -} WIRE_ATTR; /* bulk r/w request */ - -struct srpc_brw_reply { - __u32 brw_status; -} WIRE_ATTR; /* bulk r/w reply */ - -#define SRPC_MSG_MAGIC 0xeeb0f00d -#define SRPC_MSG_VERSION 1 - -struct srpc_msg { - __u32 msg_magic; /* magic number */ - __u32 msg_version; /* message version number */ - __u32 msg_type; /* type of message body: srpc_msg_type */ - __u32 msg_reserved0; - __u32 msg_reserved1; - __u32 msg_ses_feats; /* test session features */ - union { - struct srpc_generic_reqst reqst; - struct srpc_generic_reply reply; - - struct srpc_mksn_reqst mksn_reqst; - struct srpc_mksn_reply mksn_reply; - struct srpc_rmsn_reqst rmsn_reqst; - struct srpc_rmsn_reply rmsn_reply; - struct srpc_debug_reqst dbg_reqst; - struct srpc_debug_reply dbg_reply; - struct srpc_batch_reqst bat_reqst; - struct srpc_batch_reply bat_reply; - struct srpc_stat_reqst stat_reqst; - struct srpc_stat_reply stat_reply; - struct srpc_test_reqst tes_reqst; - struct srpc_test_reply tes_reply; - struct srpc_join_reqst join_reqst; - struct srpc_join_reply join_reply; - - struct srpc_ping_reqst ping_reqst; - struct srpc_ping_reply ping_reply; - struct srpc_brw_reqst brw_reqst; - struct srpc_brw_reply brw_reply; - } msg_body; -} WIRE_ATTR; - -static inline void -srpc_unpack_msg_hdr(struct srpc_msg *msg) -{ - if (msg->msg_magic == SRPC_MSG_MAGIC) - return; /* no flipping needed */ - - /* - * We do not swap the magic number here as it is needed to - * determine whether the body needs to be swapped. - */ - /* __swab32s(&msg->msg_magic); */ - __swab32s(&msg->msg_type); - __swab32s(&msg->msg_version); - __swab32s(&msg->msg_ses_feats); - __swab32s(&msg->msg_reserved0); - __swab32s(&msg->msg_reserved1); -} - -#endif /* __SELFTEST_RPC_H__ */ diff --git a/drivers/staging/lustre/lnet/selftest/selftest.h b/drivers/staging/lustre/lnet/selftest/selftest.h deleted file mode 100644 index 8737fa96b192..000000000000 --- a/drivers/staging/lustre/lnet/selftest/selftest.h +++ /dev/null @@ -1,622 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/selftest.h - * - * Author: Isaac Huang - */ -#ifndef __SELFTEST_SELFTEST_H__ -#define __SELFTEST_SELFTEST_H__ - -#define LNET_ONLY - -#include -#include -#include - -#include "rpc.h" -#include "timer.h" - -#ifndef MADE_WITHOUT_COMPROMISE -#define MADE_WITHOUT_COMPROMISE -#endif - -#define SWI_STATE_NEWBORN 0 -#define SWI_STATE_REPLY_SUBMITTED 1 -#define SWI_STATE_REPLY_SENT 2 -#define SWI_STATE_REQUEST_SUBMITTED 3 -#define SWI_STATE_REQUEST_SENT 4 -#define SWI_STATE_REPLY_RECEIVED 5 -#define SWI_STATE_BULK_STARTED 6 -#define SWI_STATE_DONE 10 - -/* forward refs */ -struct srpc_service; -struct srpc_service_cd; -struct sfw_test_unit; -struct sfw_test_instance; - -/* services below SRPC_FRAMEWORK_SERVICE_MAX_ID are framework - * services, e.g. create/modify session. - */ -#define SRPC_SERVICE_DEBUG 0 -#define SRPC_SERVICE_MAKE_SESSION 1 -#define SRPC_SERVICE_REMOVE_SESSION 2 -#define SRPC_SERVICE_BATCH 3 -#define SRPC_SERVICE_TEST 4 -#define SRPC_SERVICE_QUERY_STAT 5 -#define SRPC_SERVICE_JOIN 6 -#define SRPC_FRAMEWORK_SERVICE_MAX_ID 10 -/* other services start from SRPC_FRAMEWORK_SERVICE_MAX_ID+1 */ -#define SRPC_SERVICE_BRW 11 -#define SRPC_SERVICE_PING 12 -#define SRPC_SERVICE_MAX_ID 12 - -#define SRPC_REQUEST_PORTAL 50 -/* a lazy portal for framework RPC requests */ -#define SRPC_FRAMEWORK_REQUEST_PORTAL 51 -/* all reply/bulk RDMAs go to this portal */ -#define SRPC_RDMA_PORTAL 52 - -static inline enum srpc_msg_type -srpc_service2request(int service) -{ - switch (service) { - default: - LBUG(); - case SRPC_SERVICE_DEBUG: - return SRPC_MSG_DEBUG_REQST; - - case SRPC_SERVICE_MAKE_SESSION: - return SRPC_MSG_MKSN_REQST; - - case SRPC_SERVICE_REMOVE_SESSION: - return SRPC_MSG_RMSN_REQST; - - case SRPC_SERVICE_BATCH: - return SRPC_MSG_BATCH_REQST; - - case SRPC_SERVICE_TEST: - return SRPC_MSG_TEST_REQST; - - case SRPC_SERVICE_QUERY_STAT: - return SRPC_MSG_STAT_REQST; - - case SRPC_SERVICE_BRW: - return SRPC_MSG_BRW_REQST; - - case SRPC_SERVICE_PING: - return SRPC_MSG_PING_REQST; - - case SRPC_SERVICE_JOIN: - return SRPC_MSG_JOIN_REQST; - } -} - -static inline enum srpc_msg_type -srpc_service2reply(int service) -{ - return srpc_service2request(service) + 1; -} - -enum srpc_event_type { - SRPC_BULK_REQ_RCVD = 1, /* passive bulk request(PUT sink/GET source) - * received - */ - SRPC_BULK_PUT_SENT = 2, /* active bulk PUT sent (source) */ - SRPC_BULK_GET_RPLD = 3, /* active bulk GET replied (sink) */ - SRPC_REPLY_RCVD = 4, /* incoming reply received */ - SRPC_REPLY_SENT = 5, /* outgoing reply sent */ - SRPC_REQUEST_RCVD = 6, /* incoming request received */ - SRPC_REQUEST_SENT = 7, /* outgoing request sent */ -}; - -/* RPC event */ -struct srpc_event { - enum srpc_event_type ev_type; /* what's up */ - enum lnet_event_kind ev_lnet; /* LNet event type */ - int ev_fired; /* LNet event fired? */ - int ev_status; /* LNet event status */ - void *ev_data; /* owning server/client RPC */ -}; - -/* bulk descriptor */ -struct srpc_bulk { - int bk_len; /* len of bulk data */ - struct lnet_handle_md bk_mdh; - int bk_sink; /* sink/source */ - int bk_niov; /* # iov in bk_iovs */ - struct bio_vec bk_iovs[0]; -}; - -/* message buffer descriptor */ -struct srpc_buffer { - struct list_head buf_list; /* chain on srpc_service::*_msgq */ - struct srpc_msg buf_msg; - struct lnet_handle_md buf_mdh; - lnet_nid_t buf_self; - struct lnet_process_id buf_peer; -}; - -struct swi_workitem; -typedef void (*swi_action_t) (struct swi_workitem *); - -struct swi_workitem { - struct workqueue_struct *swi_wq; - struct work_struct swi_work; - swi_action_t swi_action; - int swi_state; -}; - -/* server-side state of a RPC */ -struct srpc_server_rpc { - /* chain on srpc_service::*_rpcq */ - struct list_head srpc_list; - struct srpc_service_cd *srpc_scd; - struct swi_workitem srpc_wi; - struct srpc_event srpc_ev; /* bulk/reply event */ - lnet_nid_t srpc_self; - struct lnet_process_id srpc_peer; - struct srpc_msg srpc_replymsg; - struct lnet_handle_md srpc_replymdh; - struct srpc_buffer *srpc_reqstbuf; - struct srpc_bulk *srpc_bulk; - - unsigned int srpc_aborted; /* being given up */ - int srpc_status; - void (*srpc_done)(struct srpc_server_rpc *); -}; - -/* client-side state of a RPC */ -struct srpc_client_rpc { - struct list_head crpc_list; /* chain on user's lists */ - spinlock_t crpc_lock; /* serialize */ - int crpc_service; - atomic_t crpc_refcount; - int crpc_timeout; /* # seconds to wait for reply */ - struct stt_timer crpc_timer; - struct swi_workitem crpc_wi; - struct lnet_process_id crpc_dest; - - void (*crpc_done)(struct srpc_client_rpc *); - void (*crpc_fini)(struct srpc_client_rpc *); - int crpc_status; /* completion status */ - void *crpc_priv; /* caller data */ - - /* state flags */ - unsigned int crpc_aborted:1; /* being given up */ - unsigned int crpc_closed:1; /* completed */ - - /* RPC events */ - struct srpc_event crpc_bulkev; /* bulk event */ - struct srpc_event crpc_reqstev; /* request event */ - struct srpc_event crpc_replyev; /* reply event */ - - /* bulk, request(reqst), and reply exchanged on wire */ - struct srpc_msg crpc_reqstmsg; - struct srpc_msg crpc_replymsg; - struct lnet_handle_md crpc_reqstmdh; - struct lnet_handle_md crpc_replymdh; - struct srpc_bulk crpc_bulk; -}; - -#define srpc_client_rpc_size(rpc) \ -offsetof(struct srpc_client_rpc, crpc_bulk.bk_iovs[(rpc)->crpc_bulk.bk_niov]) - -#define srpc_client_rpc_addref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)++\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - atomic_inc(&(rpc)->crpc_refcount); \ -} while (0) - -#define srpc_client_rpc_decref(rpc) \ -do { \ - CDEBUG(D_NET, "RPC[%p] -> %s (%d)--\n", \ - (rpc), libcfs_id2str((rpc)->crpc_dest), \ - atomic_read(&(rpc)->crpc_refcount)); \ - LASSERT(atomic_read(&(rpc)->crpc_refcount) > 0); \ - if (atomic_dec_and_test(&(rpc)->crpc_refcount)) \ - srpc_destroy_client_rpc(rpc); \ -} while (0) - -#define srpc_event_pending(rpc) (!(rpc)->crpc_bulkev.ev_fired || \ - !(rpc)->crpc_reqstev.ev_fired || \ - !(rpc)->crpc_replyev.ev_fired) - -/* CPU partition data of srpc service */ -struct srpc_service_cd { - /** serialize */ - spinlock_t scd_lock; - /** backref to service */ - struct srpc_service *scd_svc; - /** event buffer */ - struct srpc_event scd_ev; - /** free RPC descriptors */ - struct list_head scd_rpc_free; - /** in-flight RPCs */ - struct list_head scd_rpc_active; - /** workitem for posting buffer */ - struct swi_workitem scd_buf_wi; - /** CPT id */ - int scd_cpt; - /** error code for scd_buf_wi */ - int scd_buf_err; - /** timestamp for scd_buf_err */ - time64_t scd_buf_err_stamp; - /** total # request buffers */ - int scd_buf_total; - /** # posted request buffers */ - int scd_buf_nposted; - /** in progress of buffer posting */ - int scd_buf_posting; - /** allocate more buffers if scd_buf_nposted < scd_buf_low */ - int scd_buf_low; - /** increase/decrease some buffers */ - int scd_buf_adjust; - /** posted message buffers */ - struct list_head scd_buf_posted; - /** blocked for RPC descriptor */ - struct list_head scd_buf_blocked; -}; - -/* number of server workitems (mini-thread) for testing service */ -#define SFW_TEST_WI_MIN 256 -#define SFW_TEST_WI_MAX 2048 -/* extra buffers for tolerating buggy peers, or unbalanced number - * of peers between partitions - */ -#define SFW_TEST_WI_EXTRA 64 - -/* number of server workitems (mini-thread) for framework service */ -#define SFW_FRWK_WI_MIN 16 -#define SFW_FRWK_WI_MAX 256 - -struct srpc_service { - int sv_id; /* service id */ - const char *sv_name; /* human readable name */ - int sv_wi_total; /* total server workitems */ - int sv_shuttingdown; - int sv_ncpts; - /* percpt data for srpc_service */ - struct srpc_service_cd **sv_cpt_data; - /* Service callbacks: - * - sv_handler: process incoming RPC request - * - sv_bulk_ready: notify bulk data - */ - int (*sv_handler)(struct srpc_server_rpc *); - int (*sv_bulk_ready)(struct srpc_server_rpc *, int); -}; - -struct sfw_session { - struct list_head sn_list; /* chain on fw_zombie_sessions */ - struct lst_sid sn_id; /* unique identifier */ - unsigned int sn_timeout; /* # seconds' inactivity to expire */ - int sn_timer_active; - unsigned int sn_features; - struct stt_timer sn_timer; - struct list_head sn_batches; /* list of batches */ - char sn_name[LST_NAME_SIZE]; - atomic_t sn_refcount; - atomic_t sn_brw_errors; - atomic_t sn_ping_errors; - unsigned long sn_started; -}; - -#define sfw_sid_equal(sid0, sid1) ((sid0).ses_nid == (sid1).ses_nid && \ - (sid0).ses_stamp == (sid1).ses_stamp) - -struct sfw_batch { - struct list_head bat_list; /* chain on sn_batches */ - struct lst_bid bat_id; /* batch id */ - int bat_error; /* error code of batch */ - struct sfw_session *bat_session; /* batch's session */ - atomic_t bat_nactive; /* # of active tests */ - struct list_head bat_tests; /* test instances */ -}; - -struct sfw_test_client_ops { - int (*tso_init)(struct sfw_test_instance *tsi); /* initialize test - * client - */ - void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test - * client - */ - int (*tso_prep_rpc)(struct sfw_test_unit *tsu, - struct lnet_process_id dest, - struct srpc_client_rpc **rpc); /* prep a tests rpc */ - void (*tso_done_rpc)(struct sfw_test_unit *tsu, - struct srpc_client_rpc *rpc); /* done a test rpc */ -}; - -struct sfw_test_instance { - struct list_head tsi_list; /* chain on batch */ - int tsi_service; /* test type */ - struct sfw_batch *tsi_batch; /* batch */ - struct sfw_test_client_ops *tsi_ops; /* test client operation - */ - - /* public parameter for all test units */ - unsigned int tsi_is_client:1; /* is test client */ - unsigned int tsi_stoptsu_onerr:1; /* stop tsu on error */ - int tsi_concur; /* concurrency */ - int tsi_loop; /* loop count */ - - /* status of test instance */ - spinlock_t tsi_lock; /* serialize */ - unsigned int tsi_stopping:1; /* test is stopping */ - atomic_t tsi_nactive; /* # of active test - * unit - */ - struct list_head tsi_units; /* test units */ - struct list_head tsi_free_rpcs; /* free rpcs */ - struct list_head tsi_active_rpcs; /* active rpcs */ - - union { - struct test_ping_req ping; /* ping parameter */ - struct test_bulk_req bulk_v0; /* bulk parameter */ - struct test_bulk_req_v1 bulk_v1; /* bulk v1 parameter */ - } tsi_u; -}; - -/* - * XXX: trailing (PAGE_SIZE % sizeof(struct lnet_process_id)) bytes at the end - * of pages are not used - */ -#define SFW_MAX_CONCUR LST_MAX_CONCUR -#define SFW_ID_PER_PAGE (PAGE_SIZE / sizeof(struct lnet_process_id_packed)) -#define SFW_MAX_NDESTS (LNET_MAX_IOV * SFW_ID_PER_PAGE) -#define sfw_id_pages(n) (((n) + SFW_ID_PER_PAGE - 1) / SFW_ID_PER_PAGE) - -struct sfw_test_unit { - struct list_head tsu_list; /* chain on lst_test_instance */ - struct lnet_process_id tsu_dest; /* id of dest node */ - int tsu_loop; /* loop count of the test */ - struct sfw_test_instance *tsu_instance; /* pointer to test instance */ - void *tsu_private; /* private data */ - struct swi_workitem tsu_worker; /* workitem of the test unit */ -}; - -struct sfw_test_case { - struct list_head tsc_list; /* chain on fw_tests */ - struct srpc_service *tsc_srv_service; /* test service */ - struct sfw_test_client_ops *tsc_cli_ops; /* ops of test client */ -}; - -struct srpc_client_rpc * -sfw_create_rpc(struct lnet_process_id peer, int service, - unsigned int features, int nbulkiov, int bulklen, - void (*done)(struct srpc_client_rpc *), void *priv); -int sfw_create_test_rpc(struct sfw_test_unit *tsu, - struct lnet_process_id peer, unsigned int features, - int nblk, int blklen, struct srpc_client_rpc **rpc); -void sfw_abort_rpc(struct srpc_client_rpc *rpc); -void sfw_post_rpc(struct srpc_client_rpc *rpc); -void sfw_client_rpc_done(struct srpc_client_rpc *rpc); -void sfw_unpack_message(struct srpc_msg *msg); -void sfw_free_pages(struct srpc_server_rpc *rpc); -void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i); -int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, - int sink); -int sfw_make_session(struct srpc_mksn_reqst *request, - struct srpc_mksn_reply *reply); - -struct srpc_client_rpc * -srpc_create_client_rpc(struct lnet_process_id peer, int service, - int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv); -void srpc_post_rpc(struct srpc_client_rpc *rpc); -void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); -void srpc_free_bulk(struct srpc_bulk *bk); -struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off, - unsigned int bulk_npg, unsigned int bulk_len, - int sink); -void srpc_send_rpc(struct swi_workitem *wi); -int srpc_send_reply(struct srpc_server_rpc *rpc); -int srpc_add_service(struct srpc_service *sv); -int srpc_remove_service(struct srpc_service *sv); -void srpc_shutdown_service(struct srpc_service *sv); -void srpc_abort_service(struct srpc_service *sv); -int srpc_finish_service(struct srpc_service *sv); -int srpc_service_add_buffers(struct srpc_service *sv, int nbuffer); -void srpc_service_remove_buffers(struct srpc_service *sv, int nbuffer); -void srpc_get_counters(struct srpc_counters *cnt); -void srpc_set_counters(const struct srpc_counters *cnt); - -extern struct workqueue_struct *lst_serial_wq; -extern struct workqueue_struct **lst_test_wq; - -static inline int -srpc_serv_is_framework(struct srpc_service *svc) -{ - return svc->sv_id < SRPC_FRAMEWORK_SERVICE_MAX_ID; -} - -static void -swi_wi_action(struct work_struct *wi) -{ - struct swi_workitem *swi; - - swi = container_of(wi, struct swi_workitem, swi_work); - - swi->swi_action(swi); -} - -static inline void -swi_init_workitem(struct swi_workitem *swi, - swi_action_t action, struct workqueue_struct *wq) -{ - swi->swi_wq = wq; - swi->swi_action = action; - swi->swi_state = SWI_STATE_NEWBORN; - INIT_WORK(&swi->swi_work, swi_wi_action); -} - -static inline void -swi_schedule_workitem(struct swi_workitem *wi) -{ - queue_work(wi->swi_wq, &wi->swi_work); -} - -static inline int -swi_cancel_workitem(struct swi_workitem *swi) -{ - return cancel_work_sync(&swi->swi_work); -} - -int sfw_startup(void); -int srpc_startup(void); -void sfw_shutdown(void); -void srpc_shutdown(void); - -static inline void -srpc_destroy_client_rpc(struct srpc_client_rpc *rpc) -{ - LASSERT(rpc); - LASSERT(!srpc_event_pending(rpc)); - LASSERT(!atomic_read(&rpc->crpc_refcount)); - - if (!rpc->crpc_fini) - kfree(rpc); - else - (*rpc->crpc_fini)(rpc); -} - -static inline void -srpc_init_client_rpc(struct srpc_client_rpc *rpc, struct lnet_process_id peer, - int service, int nbulkiov, int bulklen, - void (*rpc_done)(struct srpc_client_rpc *), - void (*rpc_fini)(struct srpc_client_rpc *), void *priv) -{ - LASSERT(nbulkiov <= LNET_MAX_IOV); - - memset(rpc, 0, offsetof(struct srpc_client_rpc, - crpc_bulk.bk_iovs[nbulkiov])); - - INIT_LIST_HEAD(&rpc->crpc_list); - swi_init_workitem(&rpc->crpc_wi, srpc_send_rpc, - lst_test_wq[lnet_cpt_of_nid(peer.nid)]); - spin_lock_init(&rpc->crpc_lock); - atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */ - - rpc->crpc_dest = peer; - rpc->crpc_priv = priv; - rpc->crpc_service = service; - rpc->crpc_bulk.bk_len = bulklen; - rpc->crpc_bulk.bk_niov = nbulkiov; - rpc->crpc_done = rpc_done; - rpc->crpc_fini = rpc_fini; - LNetInvalidateMDHandle(&rpc->crpc_reqstmdh); - LNetInvalidateMDHandle(&rpc->crpc_replymdh); - LNetInvalidateMDHandle(&rpc->crpc_bulk.bk_mdh); - - /* no event is expected at this point */ - rpc->crpc_bulkev.ev_fired = 1; - rpc->crpc_reqstev.ev_fired = 1; - rpc->crpc_replyev.ev_fired = 1; - - rpc->crpc_reqstmsg.msg_magic = SRPC_MSG_MAGIC; - rpc->crpc_reqstmsg.msg_version = SRPC_MSG_VERSION; - rpc->crpc_reqstmsg.msg_type = srpc_service2request(service); -} - -static inline const char * -swi_state2str(int state) -{ -#define STATE2STR(x) case x: return #x - switch (state) { - default: - LBUG(); - STATE2STR(SWI_STATE_NEWBORN); - STATE2STR(SWI_STATE_REPLY_SUBMITTED); - STATE2STR(SWI_STATE_REPLY_SENT); - STATE2STR(SWI_STATE_REQUEST_SUBMITTED); - STATE2STR(SWI_STATE_REQUEST_SENT); - STATE2STR(SWI_STATE_REPLY_RECEIVED); - STATE2STR(SWI_STATE_BULK_STARTED); - STATE2STR(SWI_STATE_DONE); - } -#undef STATE2STR -} - -#define selftest_wait_events() \ - do { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout(HZ / 10); \ - } while (0) - -#define lst_wait_until(cond, lock, fmt, ...) \ -do { \ - int __I = 2; \ - while (!(cond)) { \ - CDEBUG(is_power_of_2(++__I) ? D_WARNING : D_NET, \ - fmt, ## __VA_ARGS__); \ - spin_unlock(&(lock)); \ - \ - selftest_wait_events(); \ - \ - spin_lock(&(lock)); \ - } \ -} while (0) - -static inline void -srpc_wait_service_shutdown(struct srpc_service *sv) -{ - int i = 2; - - LASSERT(sv->sv_shuttingdown); - - while (!srpc_finish_service(sv)) { - i++; - CDEBUG(((i & -i) == i) ? D_WARNING : D_NET, - "Waiting for %s service to shutdown...\n", - sv->sv_name); - selftest_wait_events(); - } -} - -extern struct sfw_test_client_ops brw_test_client; -void brw_init_test_client(void); - -extern struct srpc_service brw_test_service; -void brw_init_test_service(void); - -extern struct sfw_test_client_ops ping_test_client; -void ping_init_test_client(void); - -extern struct srpc_service ping_test_service; -void ping_init_test_service(void); - -#endif /* __SELFTEST_SELFTEST_H__ */ diff --git a/drivers/staging/lustre/lnet/selftest/timer.c b/drivers/staging/lustre/lnet/selftest/timer.c deleted file mode 100644 index 582f252b3e12..000000000000 --- a/drivers/staging/lustre/lnet/selftest/timer.c +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/timer.c - * - * Author: Isaac Huang - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include "selftest.h" - -/* - * Timers are implemented as a sorted queue of expiry times. The queue - * is slotted, with each slot holding timers which expire in a - * 2**STTIMER_MINPOLL (8) second period. The timers in each slot are - * sorted by increasing expiry time. The number of slots is 2**7 (128), - * to cover a time period of 1024 seconds into the future before wrapping. - */ -#define STTIMER_MINPOLL 3 /* log2 min poll interval (8 s) */ -#define STTIMER_SLOTTIME BIT(STTIMER_MINPOLL) -#define STTIMER_SLOTTIMEMASK (~(STTIMER_SLOTTIME - 1)) -#define STTIMER_NSLOTS BIT(7) -#define STTIMER_SLOT(t) (&stt_data.stt_hash[(((t) >> STTIMER_MINPOLL) & \ - (STTIMER_NSLOTS - 1))]) - -static struct st_timer_data { - spinlock_t stt_lock; - unsigned long stt_prev_slot; /* start time of the slot processed - * previously - */ - struct list_head stt_hash[STTIMER_NSLOTS]; - int stt_shuttingdown; - wait_queue_head_t stt_waitq; - int stt_nthreads; -} stt_data; - -void -stt_add_timer(struct stt_timer *timer) -{ - struct list_head *pos; - - spin_lock(&stt_data.stt_lock); - - LASSERT(stt_data.stt_nthreads > 0); - LASSERT(!stt_data.stt_shuttingdown); - LASSERT(timer->stt_func); - LASSERT(list_empty(&timer->stt_list)); - LASSERT(timer->stt_expires > ktime_get_real_seconds()); - - /* a simple insertion sort */ - list_for_each_prev(pos, STTIMER_SLOT(timer->stt_expires)) { - struct stt_timer *old = list_entry(pos, struct stt_timer, - stt_list); - - if (timer->stt_expires >= old->stt_expires) - break; - } - list_add(&timer->stt_list, pos); - - spin_unlock(&stt_data.stt_lock); -} - -/* - * The function returns whether it has deactivated a pending timer or not. - * (ie. del_timer() of an inactive timer returns 0, del_timer() of an - * active timer returns 1.) - * - * CAVEAT EMPTOR: - * When 0 is returned, it is possible that timer->stt_func _is_ running on - * another CPU. - */ -int -stt_del_timer(struct stt_timer *timer) -{ - int ret = 0; - - spin_lock(&stt_data.stt_lock); - - LASSERT(stt_data.stt_nthreads > 0); - LASSERT(!stt_data.stt_shuttingdown); - - if (!list_empty(&timer->stt_list)) { - ret = 1; - list_del_init(&timer->stt_list); - } - - spin_unlock(&stt_data.stt_lock); - return ret; -} - -/* called with stt_data.stt_lock held */ -static int -stt_expire_list(struct list_head *slot, time64_t now) -{ - int expired = 0; - struct stt_timer *timer; - - while (!list_empty(slot)) { - timer = list_entry(slot->next, struct stt_timer, stt_list); - - if (timer->stt_expires > now) - break; - - list_del_init(&timer->stt_list); - spin_unlock(&stt_data.stt_lock); - - expired++; - (*timer->stt_func) (timer->stt_data); - - spin_lock(&stt_data.stt_lock); - } - - return expired; -} - -static int -stt_check_timers(unsigned long *last) -{ - int expired = 0; - time64_t now; - unsigned long this_slot; - - now = ktime_get_real_seconds(); - this_slot = now & STTIMER_SLOTTIMEMASK; - - spin_lock(&stt_data.stt_lock); - - while (time_after_eq(this_slot, *last)) { - expired += stt_expire_list(STTIMER_SLOT(this_slot), now); - this_slot = this_slot - STTIMER_SLOTTIME; - } - - *last = now & STTIMER_SLOTTIMEMASK; - spin_unlock(&stt_data.stt_lock); - return expired; -} - -static int -stt_timer_main(void *arg) -{ - int rc = 0; - - while (!stt_data.stt_shuttingdown) { - stt_check_timers(&stt_data.stt_prev_slot); - - rc = wait_event_timeout(stt_data.stt_waitq, - stt_data.stt_shuttingdown, - STTIMER_SLOTTIME * HZ); - } - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads--; - spin_unlock(&stt_data.stt_lock); - return rc; -} - -static int -stt_start_timer_thread(void) -{ - struct task_struct *task; - - LASSERT(!stt_data.stt_shuttingdown); - - task = kthread_run(stt_timer_main, NULL, "st_timer"); - if (IS_ERR(task)) - return PTR_ERR(task); - - spin_lock(&stt_data.stt_lock); - stt_data.stt_nthreads++; - spin_unlock(&stt_data.stt_lock); - return 0; -} - -int -stt_startup(void) -{ - int rc = 0; - int i; - - stt_data.stt_shuttingdown = 0; - stt_data.stt_prev_slot = ktime_get_real_seconds() & STTIMER_SLOTTIMEMASK; - - spin_lock_init(&stt_data.stt_lock); - for (i = 0; i < STTIMER_NSLOTS; i++) - INIT_LIST_HEAD(&stt_data.stt_hash[i]); - - stt_data.stt_nthreads = 0; - init_waitqueue_head(&stt_data.stt_waitq); - rc = stt_start_timer_thread(); - if (rc) - CERROR("Can't spawn timer thread: %d\n", rc); - - return rc; -} - -void -stt_shutdown(void) -{ - int i; - - spin_lock(&stt_data.stt_lock); - - for (i = 0; i < STTIMER_NSLOTS; i++) - LASSERT(list_empty(&stt_data.stt_hash[i])); - - stt_data.stt_shuttingdown = 1; - - wake_up(&stt_data.stt_waitq); - lst_wait_until(!stt_data.stt_nthreads, stt_data.stt_lock, - "waiting for %d threads to terminate\n", - stt_data.stt_nthreads); - - spin_unlock(&stt_data.stt_lock); -} diff --git a/drivers/staging/lustre/lnet/selftest/timer.h b/drivers/staging/lustre/lnet/selftest/timer.h deleted file mode 100644 index 7f0ef9bd0cda..000000000000 --- a/drivers/staging/lustre/lnet/selftest/timer.h +++ /dev/null @@ -1,50 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/selftest/timer.h - * - * Author: Isaac Huang - */ -#ifndef __SELFTEST_TIMER_H__ -#define __SELFTEST_TIMER_H__ - -struct stt_timer { - struct list_head stt_list; - time64_t stt_expires; - void (*stt_func)(void *); - void *stt_data; -}; - -void stt_add_timer(struct stt_timer *timer); -int stt_del_timer(struct stt_timer *timer); -int stt_startup(void); -void stt_shutdown(void); - -#endif /* __SELFTEST_TIMER_H__ */ diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig deleted file mode 100644 index ccb78a945995..000000000000 --- a/drivers/staging/lustre/lustre/Kconfig +++ /dev/null @@ -1,45 +0,0 @@ -config LUSTRE_FS - tristate "Lustre file system client support" - depends on LNET - select CRYPTO - select CRYPTO_CRC32 - select CRYPTO_CRC32_PCLMUL if X86 - select CRYPTO_CRC32C - select CRYPTO_MD5 - select CRYPTO_SHA1 - select CRYPTO_SHA256 - select CRYPTO_SHA512 - depends on MULTIUSER - help - This option enables Lustre file system client support. Choose Y - here if you want to access a Lustre file system cluster. To compile - this file system support as a module, choose M here: the module will - be called lustre. - - To mount Lustre file systems, you also need to install the user space - mount.lustre and other user space commands which can be found in the - lustre-client package, available from - http://downloads.whamcloud.com/public/lustre/ - - Lustre file system is the most popular cluster file system in high - performance computing. Source code of both kernel space and user space - Lustre components can also be found at - http://git.whamcloud.com/?p=fs/lustre-release.git;a=summary - - If unsure, say N. - - See also http://wiki.lustre.org/ - -config LUSTRE_DEBUG_EXPENSIVE_CHECK - bool "Enable Lustre DEBUG checks" - depends on LUSTRE_FS - help - This option is mainly for debug purpose. It enables Lustre code to do - expensive checks that may have a performance impact. - - Use with caution. If unsure, say N. - -config LUSTRE_TRANSLATE_ERRNOS - bool - depends on LUSTRE_FS && !X86 - default y diff --git a/drivers/staging/lustre/lustre/Makefile b/drivers/staging/lustre/lustre/Makefile deleted file mode 100644 index 331e4fcdd5a2..000000000000 --- a/drivers/staging/lustre/lustre/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_LUSTRE_FS) += obdclass/ ptlrpc/ fld/ osc/ mgc/ \ - fid/ lov/ mdc/ lmv/ llite/ obdecho/ diff --git a/drivers/staging/lustre/lustre/fid/Makefile b/drivers/staging/lustre/lustre/fid/Makefile deleted file mode 100644 index 77b65b92667d..000000000000 --- a/drivers/staging/lustre/lustre/fid/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include/ - -obj-$(CONFIG_LUSTRE_FS) += fid.o -fid-y := fid_request.o fid_lib.o lproc_fid.o diff --git a/drivers/staging/lustre/lustre/fid/fid_internal.h b/drivers/staging/lustre/lustre/fid/fid_internal.h deleted file mode 100644 index 14569e969a31..000000000000 --- a/drivers/staging/lustre/lustre/fid/fid_internal.h +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fid/fid_internal.h - * - * Author: Yury Umanets - */ -#ifndef __FID_INTERNAL_H -#define __FID_INTERNAL_H - -#include - -/* Functions used internally in module. */ - -extern struct lprocfs_vars seq_client_debugfs_list[]; - -#endif /* __FID_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c deleted file mode 100644 index ac52b378c155..000000000000 --- a/drivers/staging/lustre/lustre/fid/fid_lib.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fid/fid_lib.c - * - * Miscellaneous fid functions. - * - * Author: Nikita Danilov - * Author: Yury Umanets - */ - -#define DEBUG_SUBSYSTEM S_FID - -#include -#include - -/** - * A cluster-wide range from which fid-sequences are granted to servers and - * then clients. - * - * Fid namespace: - *
- * Normal FID:        seq:64 [2^33,2^64-1]      oid:32          ver:32
- * IGIF      :        0:32, ino:32              gen:32          0:32
- * IDIF      :        0:31, 1:1, ost-index:16,  objd:48         0:32
- * 
- * - * The first 0x400 sequences of normal FID are reserved for special purpose. - * FID_SEQ_START + 1 is for local file id generation. - * FID_SEQ_START + 2 is for .lustre directory and its objects - */ -const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = { - .lsr_start = FID_SEQ_NORMAL, - .lsr_end = (__u64)~0ULL, -}; - -/* Zero range, used for init and other purposes. */ -const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE = { - .lsr_start = 0, -}; - -/* Lustre Big Fs Lock fid. */ -const struct lu_fid LUSTRE_BFL_FID = { .f_seq = FID_SEQ_SPECIAL, - .f_oid = FID_OID_SPECIAL_BFL, - .f_ver = 0x0000000000000000 }; -EXPORT_SYMBOL(LUSTRE_BFL_FID); - -/** Special fid for ".lustre" directory */ -const struct lu_fid LU_DOT_LUSTRE_FID = { .f_seq = FID_SEQ_DOT_LUSTRE, - .f_oid = FID_OID_DOT_LUSTRE, - .f_ver = 0x0000000000000000 }; -EXPORT_SYMBOL(LU_DOT_LUSTRE_FID); - -/** Special fid for "fid" special object in .lustre */ -const struct lu_fid LU_OBF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE, - .f_oid = FID_OID_DOT_LUSTRE_OBF, - .f_ver = 0x0000000000000000 }; -EXPORT_SYMBOL(LU_OBF_FID); diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c deleted file mode 100644 index a34fd90ca5e5..000000000000 --- a/drivers/staging/lustre/lustre/fid/fid_request.c +++ /dev/null @@ -1,410 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fid/fid_request.c - * - * Lustre Sequence Manager - * - * Author: Yury Umanets - */ - -#define DEBUG_SUBSYSTEM S_FID - -#include - -#include -#include -#include -#include -/* mdc RPC locks */ -#include -#include "fid_internal.h" - -static struct dentry *seq_debugfs_dir; - -static int seq_client_rpc(struct lu_client_seq *seq, - struct lu_seq_range *output, __u32 opc, - const char *opcname) -{ - struct obd_export *exp = seq->lcs_exp; - struct ptlrpc_request *req; - struct lu_seq_range *out, *in; - __u32 *op; - unsigned int debug_mask; - int rc; - - LASSERT(exp && !IS_ERR(exp)); - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_SEQ_QUERY, - LUSTRE_MDS_VERSION, SEQ_QUERY); - if (!req) - return -ENOMEM; - - /* Init operation code */ - op = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_OPC); - *op = opc; - - /* Zero out input range, this is not recovery yet. */ - in = req_capsule_client_get(&req->rq_pill, &RMF_SEQ_RANGE); - lu_seq_range_init(in); - - ptlrpc_request_set_replen(req); - - in->lsr_index = seq->lcs_space.lsr_index; - if (seq->lcs_type == LUSTRE_SEQ_METADATA) - fld_range_set_mdt(in); - else - fld_range_set_ost(in); - - if (opc == SEQ_ALLOC_SUPER) { - req->rq_request_portal = SEQ_CONTROLLER_PORTAL; - req->rq_reply_portal = MDC_REPLY_PORTAL; - /* During allocating super sequence for data object, - * the current thread might hold the export of MDT0(MDT0 - * precreating objects on this OST), and it will send the - * request to MDT0 here, so we can not keep resending the - * request here, otherwise if MDT0 is failed(umounted), - * it can not release the export of MDT0 - */ - if (seq->lcs_type == LUSTRE_SEQ_DATA) { - req->rq_no_delay = 1; - req->rq_no_resend = 1; - } - debug_mask = D_CONSOLE; - } else { - if (seq->lcs_type == LUSTRE_SEQ_METADATA) { - req->rq_reply_portal = MDC_REPLY_PORTAL; - req->rq_request_portal = SEQ_METADATA_PORTAL; - } else { - req->rq_reply_portal = OSC_REPLY_PORTAL; - req->rq_request_portal = SEQ_DATA_PORTAL; - } - debug_mask = D_INFO; - } - - ptlrpc_at_set_req_timeout(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out_req; - - out = req_capsule_server_get(&req->rq_pill, &RMF_SEQ_RANGE); - - if (!lu_seq_range_is_sane(out)) { - CERROR("%s: Invalid range received from server: " - DRANGE "\n", seq->lcs_name, PRANGE(out)); - rc = -EINVAL; - goto out_req; - } - - if (lu_seq_range_is_exhausted(out)) { - CERROR("%s: Range received from server is exhausted: " - DRANGE "]\n", seq->lcs_name, PRANGE(out)); - rc = -EINVAL; - goto out_req; - } - - *output = *out; - CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence " DRANGE "]\n", - seq->lcs_name, opcname, PRANGE(output)); - -out_req: - ptlrpc_req_finished(req); - return rc; -} - -/* Request sequence-controller node to allocate new meta-sequence. */ -static int seq_client_alloc_meta(const struct lu_env *env, - struct lu_client_seq *seq) -{ - int rc; - - do { - /* If meta server return -EINPROGRESS or EAGAIN, - * it means meta server might not be ready to - * allocate super sequence from sequence controller - * (MDT0)yet - */ - rc = seq_client_rpc(seq, &seq->lcs_space, - SEQ_ALLOC_META, "meta"); - } while (rc == -EINPROGRESS || rc == -EAGAIN); - - return rc; -} - -/* Allocate new sequence for client. */ -static int seq_client_alloc_seq(const struct lu_env *env, - struct lu_client_seq *seq, u64 *seqnr) -{ - int rc; - - LASSERT(lu_seq_range_is_sane(&seq->lcs_space)); - - if (lu_seq_range_is_exhausted(&seq->lcs_space)) { - rc = seq_client_alloc_meta(env, seq); - if (rc) { - CERROR("%s: Can't allocate new meta-sequence, rc %d\n", - seq->lcs_name, rc); - *seqnr = U64_MAX; - return rc; - } - CDEBUG(D_INFO, "%s: New range - " DRANGE "\n", - seq->lcs_name, PRANGE(&seq->lcs_space)); - } else { - rc = 0; - } - - LASSERT(!lu_seq_range_is_exhausted(&seq->lcs_space)); - *seqnr = seq->lcs_space.lsr_start; - seq->lcs_space.lsr_start += 1; - - CDEBUG(D_INFO, "%s: Allocated sequence [%#llx]\n", seq->lcs_name, - *seqnr); - - return rc; -} - -/* Allocate new fid on passed client @seq and save it to @fid. */ -int seq_client_alloc_fid(const struct lu_env *env, - struct lu_client_seq *seq, struct lu_fid *fid) -{ - int rc; - - LASSERT(seq); - LASSERT(fid); - - spin_lock(&seq->lcs_lock); - - if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) - seq->lcs_fid.f_oid = seq->lcs_width; - - wait_event_cmd(seq->lcs_waitq, - (!fid_is_zero(&seq->lcs_fid) && - fid_oid(&seq->lcs_fid) < seq->lcs_width) || - !seq->lcs_update, - spin_unlock(&seq->lcs_lock), - spin_lock(&seq->lcs_lock)); - - if (!fid_is_zero(&seq->lcs_fid) && - fid_oid(&seq->lcs_fid) < seq->lcs_width) { - /* Just bump last allocated fid and return to caller. */ - seq->lcs_fid.f_oid += 1; - rc = 0; - } else { - u64 seqnr; - - LASSERT(seq->lcs_update == 0); - seq->lcs_update = 1; - spin_unlock(&seq->lcs_lock); - - rc = seq_client_alloc_seq(env, seq, &seqnr); - - spin_lock(&seq->lcs_lock); - seq->lcs_update = 0; - wake_up(&seq->lcs_waitq); - - if (rc) { - CERROR("%s: Can't allocate new sequence, rc %d\n", - seq->lcs_name, rc); - spin_unlock(&seq->lcs_lock); - return rc; - } - - CDEBUG(D_INFO, "%s: Switch to sequence [0x%16.16llx]\n", - seq->lcs_name, seqnr); - - seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; - seq->lcs_fid.f_seq = seqnr; - seq->lcs_fid.f_ver = 0; - - /* - * Inform caller that sequence switch is performed to allow it - * to setup FLD for it. - */ - rc = 1; - } - - *fid = seq->lcs_fid; - spin_unlock(&seq->lcs_lock); - - CDEBUG(D_INFO, - "%s: Allocated FID " DFID "\n", seq->lcs_name, PFID(fid)); - return rc; -} -EXPORT_SYMBOL(seq_client_alloc_fid); - -/* - * Finish the current sequence due to disconnect. - * See mdc_import_event() - */ -void seq_client_flush(struct lu_client_seq *seq) -{ - - LASSERT(seq); - spin_lock(&seq->lcs_lock); - - wait_event_cmd(seq->lcs_waitq, - !seq->lcs_update, - spin_unlock(&seq->lcs_lock), - spin_lock(&seq->lcs_lock)); - - fid_zero(&seq->lcs_fid); - /** - * this id shld not be used for seq range allocation. - * set to -1 for dgb check. - */ - - seq->lcs_space.lsr_index = -1; - - lu_seq_range_init(&seq->lcs_space); - spin_unlock(&seq->lcs_lock); -} -EXPORT_SYMBOL(seq_client_flush); - -static void seq_client_debugfs_fini(struct lu_client_seq *seq) -{ - debugfs_remove_recursive(seq->lcs_debugfs_entry); -} - -static void seq_client_debugfs_init(struct lu_client_seq *seq) -{ - seq->lcs_debugfs_entry = debugfs_create_dir(seq->lcs_name, - seq_debugfs_dir); - - ldebugfs_add_vars(seq->lcs_debugfs_entry, seq_client_debugfs_list, seq); -} - -static void seq_client_fini(struct lu_client_seq *seq) -{ - seq_client_debugfs_fini(seq); - - if (seq->lcs_exp) { - class_export_put(seq->lcs_exp); - seq->lcs_exp = NULL; - } -} - -static void seq_client_init(struct lu_client_seq *seq, struct obd_export *exp, - enum lu_cli_type type, const char *prefix) -{ - LASSERT(seq); - LASSERT(prefix); - - seq->lcs_type = type; - - spin_lock_init(&seq->lcs_lock); - if (type == LUSTRE_SEQ_METADATA) - seq->lcs_width = LUSTRE_METADATA_SEQ_MAX_WIDTH; - else - seq->lcs_width = LUSTRE_DATA_SEQ_MAX_WIDTH; - - init_waitqueue_head(&seq->lcs_waitq); - /* Make sure that things are clear before work is started. */ - seq_client_flush(seq); - - seq->lcs_exp = class_export_get(exp); - - snprintf(seq->lcs_name, sizeof(seq->lcs_name), - "cli-%s", prefix); - - seq_client_debugfs_init(seq); -} - -int client_fid_init(struct obd_device *obd, - struct obd_export *exp, enum lu_cli_type type) -{ - struct client_obd *cli = &obd->u.cli; - char *prefix; - int rc; - - cli->cl_seq = kzalloc(sizeof(*cli->cl_seq), GFP_NOFS); - if (!cli->cl_seq) - return -ENOMEM; - - prefix = kzalloc(MAX_OBD_NAME + 5, GFP_NOFS); - if (!prefix) { - rc = -ENOMEM; - goto out_free_seq; - } - - snprintf(prefix, MAX_OBD_NAME + 5, "cli-%s", obd->obd_name); - - /* Init client side sequence-manager */ - seq_client_init(cli->cl_seq, exp, type, prefix); - kfree(prefix); - - return 0; -out_free_seq: - kfree(cli->cl_seq); - cli->cl_seq = NULL; - return rc; -} -EXPORT_SYMBOL(client_fid_init); - -int client_fid_fini(struct obd_device *obd) -{ - struct client_obd *cli = &obd->u.cli; - - if (cli->cl_seq) { - seq_client_fini(cli->cl_seq); - kfree(cli->cl_seq); - cli->cl_seq = NULL; - } - - return 0; -} -EXPORT_SYMBOL(client_fid_fini); - -static int __init fid_init(void) -{ - int rc; - - rc = libcfs_setup(); - if (rc) - return rc; - - seq_debugfs_dir = debugfs_create_dir(LUSTRE_SEQ_NAME, - debugfs_lustre_root); - return 0; -} - -static void __exit fid_exit(void) -{ - debugfs_remove_recursive(seq_debugfs_dir); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre File IDentifier"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(fid_init); -module_exit(fid_exit); diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c deleted file mode 100644 index 0aabf473c9bd..000000000000 --- a/drivers/staging/lustre/lustre/fid/lproc_fid.c +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fid/lproc_fid.c - * - * Lustre Sequence Manager - * - * Author: Yury Umanets - */ - -#define DEBUG_SUBSYSTEM S_FID - -#include - -#include -#include -#include -#include -#include -#include "fid_internal.h" - -/* Format: [0x64BIT_INT - 0x64BIT_INT] + 32 bytes just in case */ -#define MAX_FID_RANGE_STRLEN (32 + 2 * 2 * sizeof(__u64)) -/* - * Note: this function is only used for testing, it is no safe for production - * use. - */ -static int -ldebugfs_fid_write_common(const char __user *buffer, size_t count, - struct lu_seq_range *range) -{ - struct lu_seq_range tmp; - int rc; - char kernbuf[MAX_FID_RANGE_STRLEN]; - - LASSERT(range); - - if (count >= sizeof(kernbuf)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - - kernbuf[count] = 0; - - if (count == 5 && strcmp(kernbuf, "clear") == 0) { - memset(range, 0, sizeof(*range)); - return count; - } - - /* of the form "[0x0000000240000400 - 0x000000028000400]" */ - rc = sscanf(kernbuf, "[%llx - %llx]\n", - (unsigned long long *)&tmp.lsr_start, - (unsigned long long *)&tmp.lsr_end); - if (rc != 2) - return -EINVAL; - if (!lu_seq_range_is_sane(&tmp) || lu_seq_range_is_zero(&tmp) || - tmp.lsr_start < range->lsr_start || tmp.lsr_end > range->lsr_end) - return -EINVAL; - *range = tmp; - return count; -} - -/* Client side debugfs stuff */ -static ssize_t -ldebugfs_fid_space_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct lu_client_seq *seq; - struct lu_seq_range range; - int rc; - - seq = ((struct seq_file *)file->private_data)->private; - - rc = ldebugfs_fid_write_common(buffer, count, &range); - - spin_lock(&seq->lcs_lock); - if (seq->lcs_update) - /* An RPC call is active to update lcs_space */ - rc = -EBUSY; - if (rc > 0) - seq->lcs_space = range; - spin_unlock(&seq->lcs_lock); - - if (rc > 0) { - CDEBUG(D_INFO, "%s: Space: " DRANGE "\n", - seq->lcs_name, PRANGE(&range)); - } - - return rc; -} - -static int -ldebugfs_fid_space_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - int rc = 0; - - spin_lock(&seq->lcs_lock); - if (seq->lcs_update) - rc = -EBUSY; - else - seq_printf(m, "[%#llx - %#llx]:%x:%s\n", PRANGE(&seq->lcs_space)); - spin_unlock(&seq->lcs_lock); - - return rc; -} - -static ssize_t -ldebugfs_fid_width_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct lu_client_seq *seq; - __u64 max; - int rc, val; - - seq = ((struct seq_file *)file->private_data)->private; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - spin_lock(&seq->lcs_lock); - if (seq->lcs_type == LUSTRE_SEQ_DATA) - max = LUSTRE_DATA_SEQ_MAX_WIDTH; - else - max = LUSTRE_METADATA_SEQ_MAX_WIDTH; - - if (val <= max && val > 0) { - seq->lcs_width = val; - - CDEBUG(D_INFO, "%s: Sequence size: %llu\n", seq->lcs_name, - seq->lcs_width); - } - - spin_unlock(&seq->lcs_lock); - - return count; -} - -static int -ldebugfs_fid_width_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - - spin_lock(&seq->lcs_lock); - seq_printf(m, "%llu\n", seq->lcs_width); - spin_unlock(&seq->lcs_lock); - - return 0; -} - -static int -ldebugfs_fid_fid_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - - spin_lock(&seq->lcs_lock); - seq_printf(m, DFID "\n", PFID(&seq->lcs_fid)); - spin_unlock(&seq->lcs_lock); - - return 0; -} - -static int -ldebugfs_fid_server_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_seq *seq = (struct lu_client_seq *)m->private; - struct client_obd *cli; - - if (seq->lcs_exp) { - cli = &seq->lcs_exp->exp_obd->u.cli; - seq_printf(m, "%s\n", cli->cl_target_uuid.uuid); - } - - return 0; -} - -LPROC_SEQ_FOPS(ldebugfs_fid_space); -LPROC_SEQ_FOPS(ldebugfs_fid_width); -LPROC_SEQ_FOPS_RO(ldebugfs_fid_server); -LPROC_SEQ_FOPS_RO(ldebugfs_fid_fid); - -struct lprocfs_vars seq_client_debugfs_list[] = { - { .name = "space", - .fops = &ldebugfs_fid_space_fops }, - { .name = "width", - .fops = &ldebugfs_fid_width_fops }, - { .name = "server", - .fops = &ldebugfs_fid_server_fops }, - { .name = "fid", - .fops = &ldebugfs_fid_fid_fops }, - { NULL } -}; diff --git a/drivers/staging/lustre/lustre/fld/Makefile b/drivers/staging/lustre/lustre/fld/Makefile deleted file mode 100644 index 426deba8b815..000000000000 --- a/drivers/staging/lustre/lustre/fld/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include/ - -obj-$(CONFIG_LUSTRE_FS) += fld.o -fld-y := fld_request.o fld_cache.o lproc_fld.o diff --git a/drivers/staging/lustre/lustre/fld/fld_cache.c b/drivers/staging/lustre/lustre/fld/fld_cache.c deleted file mode 100644 index a7415c9a1c28..000000000000 --- a/drivers/staging/lustre/lustre/fld/fld_cache.c +++ /dev/null @@ -1,516 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fld/fld_cache.c - * - * FLD (Fids Location Database) - * - * Author: Pravin Shelar - * Author: Yury Umanets - */ - -#define DEBUG_SUBSYSTEM S_FLD - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include "fld_internal.h" - -/** - * create fld cache. - */ -struct fld_cache *fld_cache_init(const char *name, - int cache_size, int cache_threshold) -{ - struct fld_cache *cache; - - LASSERT(name); - LASSERT(cache_threshold < cache_size); - - cache = kzalloc(sizeof(*cache), GFP_NOFS); - if (!cache) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&cache->fci_entries_head); - INIT_LIST_HEAD(&cache->fci_lru); - - cache->fci_cache_count = 0; - rwlock_init(&cache->fci_lock); - - strlcpy(cache->fci_name, name, - sizeof(cache->fci_name)); - - cache->fci_cache_size = cache_size; - cache->fci_threshold = cache_threshold; - - /* Init fld cache info. */ - memset(&cache->fci_stat, 0, sizeof(cache->fci_stat)); - - CDEBUG(D_INFO, "%s: FLD cache - Size: %d, Threshold: %d\n", - cache->fci_name, cache_size, cache_threshold); - - return cache; -} - -/** - * destroy fld cache. - */ -void fld_cache_fini(struct fld_cache *cache) -{ - __u64 pct; - - LASSERT(cache); - fld_cache_flush(cache); - - if (cache->fci_stat.fst_count > 0) { - pct = cache->fci_stat.fst_cache * 100; - do_div(pct, cache->fci_stat.fst_count); - } else { - pct = 0; - } - - CDEBUG(D_INFO, "FLD cache statistics (%s):\n", cache->fci_name); - CDEBUG(D_INFO, " Total reqs: %llu\n", cache->fci_stat.fst_count); - CDEBUG(D_INFO, " Cache reqs: %llu\n", cache->fci_stat.fst_cache); - CDEBUG(D_INFO, " Cache hits: %llu%%\n", pct); - - kfree(cache); -} - -/** - * delete given node from list. - */ -static void fld_cache_entry_delete(struct fld_cache *cache, - struct fld_cache_entry *node) -{ - list_del(&node->fce_list); - list_del(&node->fce_lru); - cache->fci_cache_count--; - kfree(node); -} - -/** - * fix list by checking new entry with NEXT entry in order. - */ -static void fld_fix_new_list(struct fld_cache *cache) -{ - struct fld_cache_entry *f_curr; - struct fld_cache_entry *f_next; - struct lu_seq_range *c_range; - struct lu_seq_range *n_range; - struct list_head *head = &cache->fci_entries_head; - -restart_fixup: - - list_for_each_entry_safe(f_curr, f_next, head, fce_list) { - c_range = &f_curr->fce_range; - n_range = &f_next->fce_range; - - LASSERT(lu_seq_range_is_sane(c_range)); - if (&f_next->fce_list == head) - break; - - if (c_range->lsr_flags != n_range->lsr_flags) - continue; - - LASSERTF(c_range->lsr_start <= n_range->lsr_start, - "cur lsr_start " DRANGE " next lsr_start " DRANGE "\n", - PRANGE(c_range), PRANGE(n_range)); - - /* check merge possibility with next range */ - if (c_range->lsr_end == n_range->lsr_start) { - if (c_range->lsr_index != n_range->lsr_index) - continue; - n_range->lsr_start = c_range->lsr_start; - fld_cache_entry_delete(cache, f_curr); - continue; - } - - /* check if current range overlaps with next range. */ - if (n_range->lsr_start < c_range->lsr_end) { - if (c_range->lsr_index == n_range->lsr_index) { - n_range->lsr_start = c_range->lsr_start; - n_range->lsr_end = max(c_range->lsr_end, - n_range->lsr_end); - fld_cache_entry_delete(cache, f_curr); - } else { - if (n_range->lsr_end <= c_range->lsr_end) { - *n_range = *c_range; - fld_cache_entry_delete(cache, f_curr); - } else { - n_range->lsr_start = c_range->lsr_end; - } - } - - /* we could have overlap over next - * range too. better restart. - */ - goto restart_fixup; - } - - /* kill duplicates */ - if (c_range->lsr_start == n_range->lsr_start && - c_range->lsr_end == n_range->lsr_end) - fld_cache_entry_delete(cache, f_curr); - } -} - -/** - * add node to fld cache - */ -static inline void fld_cache_entry_add(struct fld_cache *cache, - struct fld_cache_entry *f_new, - struct list_head *pos) -{ - list_add(&f_new->fce_list, pos); - list_add(&f_new->fce_lru, &cache->fci_lru); - - cache->fci_cache_count++; - fld_fix_new_list(cache); -} - -/** - * Check if cache needs to be shrunk. If so - do it. - * Remove one entry in list and so on until cache is shrunk enough. - */ -static int fld_cache_shrink(struct fld_cache *cache) -{ - int num = 0; - - if (cache->fci_cache_count < cache->fci_cache_size) - return 0; - - while (cache->fci_cache_count + cache->fci_threshold > - cache->fci_cache_size && - !list_empty(&cache->fci_lru)) { - struct fld_cache_entry *flde = - list_last_entry(&cache->fci_lru, - struct fld_cache_entry, fce_lru); - - fld_cache_entry_delete(cache, flde); - num++; - } - - CDEBUG(D_INFO, "%s: FLD cache - Shrunk by %d entries\n", - cache->fci_name, num); - - return 0; -} - -/** - * kill all fld cache entries. - */ -void fld_cache_flush(struct fld_cache *cache) -{ - write_lock(&cache->fci_lock); - cache->fci_cache_size = 0; - fld_cache_shrink(cache); - write_unlock(&cache->fci_lock); -} - -/** - * punch hole in existing range. divide this range and add new - * entry accordingly. - */ - -static void fld_cache_punch_hole(struct fld_cache *cache, - struct fld_cache_entry *f_curr, - struct fld_cache_entry *f_new) -{ - const struct lu_seq_range *range = &f_new->fce_range; - const u64 new_start = range->lsr_start; - const u64 new_end = range->lsr_end; - struct fld_cache_entry *fldt; - - fldt = kzalloc(sizeof(*fldt), GFP_ATOMIC); - if (!fldt) { - kfree(f_new); - /* overlap is not allowed, so don't mess up list. */ - return; - } - /* break f_curr RANGE into three RANGES: - * f_curr, f_new , fldt - */ - - /* f_new = *range */ - - /* fldt */ - fldt->fce_range.lsr_start = new_end; - fldt->fce_range.lsr_end = f_curr->fce_range.lsr_end; - fldt->fce_range.lsr_index = f_curr->fce_range.lsr_index; - - /* f_curr */ - f_curr->fce_range.lsr_end = new_start; - - /* add these two entries to list */ - fld_cache_entry_add(cache, f_new, &f_curr->fce_list); - fld_cache_entry_add(cache, fldt, &f_new->fce_list); - - /* no need to fixup */ -} - -/** - * handle range overlap in fld cache. - */ -static void fld_cache_overlap_handle(struct fld_cache *cache, - struct fld_cache_entry *f_curr, - struct fld_cache_entry *f_new) -{ - const struct lu_seq_range *range = &f_new->fce_range; - const u64 new_start = range->lsr_start; - const u64 new_end = range->lsr_end; - const u32 mdt = range->lsr_index; - - /* this is overlap case, these case are checking overlapping with - * prev range only. fixup will handle overlapping with next range. - */ - - if (f_curr->fce_range.lsr_index == mdt) { - f_curr->fce_range.lsr_start = min(f_curr->fce_range.lsr_start, - new_start); - - f_curr->fce_range.lsr_end = max(f_curr->fce_range.lsr_end, - new_end); - - kfree(f_new); - fld_fix_new_list(cache); - - } else if (new_start <= f_curr->fce_range.lsr_start && - f_curr->fce_range.lsr_end <= new_end) { - /* case 1: new range completely overshadowed existing range. - * e.g. whole range migrated. update fld cache entry - */ - - f_curr->fce_range = *range; - kfree(f_new); - fld_fix_new_list(cache); - - } else if (f_curr->fce_range.lsr_start < new_start && - new_end < f_curr->fce_range.lsr_end) { - /* case 2: new range fit within existing range. */ - - fld_cache_punch_hole(cache, f_curr, f_new); - - } else if (new_end <= f_curr->fce_range.lsr_end) { - /* case 3: overlap: - * [new_start [c_start new_end) c_end) - */ - - LASSERT(new_start <= f_curr->fce_range.lsr_start); - - f_curr->fce_range.lsr_start = new_end; - fld_cache_entry_add(cache, f_new, f_curr->fce_list.prev); - - } else if (f_curr->fce_range.lsr_start <= new_start) { - /* case 4: overlap: - * [c_start [new_start c_end) new_end) - */ - - LASSERT(f_curr->fce_range.lsr_end <= new_end); - - f_curr->fce_range.lsr_end = new_start; - fld_cache_entry_add(cache, f_new, &f_curr->fce_list); - } else { - CERROR("NEW range =" DRANGE " curr = " DRANGE "\n", - PRANGE(range), PRANGE(&f_curr->fce_range)); - } -} - -struct fld_cache_entry -*fld_cache_entry_create(const struct lu_seq_range *range) -{ - struct fld_cache_entry *f_new; - - LASSERT(lu_seq_range_is_sane(range)); - - f_new = kzalloc(sizeof(*f_new), GFP_NOFS); - if (!f_new) - return ERR_PTR(-ENOMEM); - - f_new->fce_range = *range; - return f_new; -} - -/** - * Insert FLD entry in FLD cache. - * - * This function handles all cases of merging and breaking up of - * ranges. - */ -static int fld_cache_insert_nolock(struct fld_cache *cache, - struct fld_cache_entry *f_new) -{ - struct fld_cache_entry *f_curr; - struct fld_cache_entry *n; - struct list_head *head; - struct list_head *prev = NULL; - const u64 new_start = f_new->fce_range.lsr_start; - const u64 new_end = f_new->fce_range.lsr_end; - __u32 new_flags = f_new->fce_range.lsr_flags; - - /* - * Duplicate entries are eliminated in insert op. - * So we don't need to search new entry before starting - * insertion loop. - */ - - if (!cache->fci_no_shrink) - fld_cache_shrink(cache); - - head = &cache->fci_entries_head; - - list_for_each_entry_safe(f_curr, n, head, fce_list) { - /* add list if next is end of list */ - if (new_end < f_curr->fce_range.lsr_start || - (new_end == f_curr->fce_range.lsr_start && - new_flags != f_curr->fce_range.lsr_flags)) - break; - - prev = &f_curr->fce_list; - /* check if this range is to left of new range. */ - if (new_start < f_curr->fce_range.lsr_end && - new_flags == f_curr->fce_range.lsr_flags) { - fld_cache_overlap_handle(cache, f_curr, f_new); - goto out; - } - } - - if (!prev) - prev = head; - - CDEBUG(D_INFO, "insert range " DRANGE "\n", PRANGE(&f_new->fce_range)); - /* Add new entry to cache and lru list. */ - fld_cache_entry_add(cache, f_new, prev); -out: - return 0; -} - -int fld_cache_insert(struct fld_cache *cache, - const struct lu_seq_range *range) -{ - struct fld_cache_entry *flde; - int rc; - - flde = fld_cache_entry_create(range); - if (IS_ERR(flde)) - return PTR_ERR(flde); - - write_lock(&cache->fci_lock); - rc = fld_cache_insert_nolock(cache, flde); - write_unlock(&cache->fci_lock); - if (rc) - kfree(flde); - - return rc; -} - -/** - * Delete FLD entry in FLD cache. - * - */ - -struct fld_cache_entry -*fld_cache_entry_lookup_nolock(struct fld_cache *cache, - struct lu_seq_range *range) -{ - struct fld_cache_entry *flde; - struct fld_cache_entry *got = NULL; - struct list_head *head; - - head = &cache->fci_entries_head; - list_for_each_entry(flde, head, fce_list) { - if (range->lsr_start == flde->fce_range.lsr_start || - (range->lsr_end == flde->fce_range.lsr_end && - range->lsr_flags == flde->fce_range.lsr_flags)) { - got = flde; - break; - } - } - - return got; -} - -/** - * lookup \a seq sequence for range in fld cache. - */ -struct fld_cache_entry -*fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range) -{ - struct fld_cache_entry *got = NULL; - - read_lock(&cache->fci_lock); - got = fld_cache_entry_lookup_nolock(cache, range); - read_unlock(&cache->fci_lock); - return got; -} - -/** - * lookup \a seq sequence for range in fld cache. - */ -int fld_cache_lookup(struct fld_cache *cache, - const u64 seq, struct lu_seq_range *range) -{ - struct fld_cache_entry *flde; - struct fld_cache_entry *prev = NULL; - struct list_head *head; - - read_lock(&cache->fci_lock); - head = &cache->fci_entries_head; - - cache->fci_stat.fst_count++; - list_for_each_entry(flde, head, fce_list) { - if (flde->fce_range.lsr_start > seq) { - if (prev) - *range = prev->fce_range; - break; - } - - prev = flde; - if (lu_seq_range_within(&flde->fce_range, seq)) { - *range = flde->fce_range; - - cache->fci_stat.fst_cache++; - read_unlock(&cache->fci_lock); - return 0; - } - } - read_unlock(&cache->fci_lock); - return -ENOENT; -} diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h deleted file mode 100644 index e1d6aaa5c2b4..000000000000 --- a/drivers/staging/lustre/lustre/fld/fld_internal.h +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fld/fld_internal.h - * - * Subsystem Description: - * FLD is FID Location Database, which stores where (IE, on which MDT) - * FIDs are located. - * The database is basically a record file, each record consists of a FID - * sequence range, MDT/OST index, and flags. The FLD for the whole FS - * is only stored on the sequence controller(MDT0) right now, but each target - * also has its local FLD, which only stores the local sequence. - * - * The FLD subsystem usually has two tasks: - * 1. maintain the database, i.e. when the sequence controller allocates - * new sequence ranges to some nodes, it will call the FLD API to insert the - * location information in FLDB. - * - * 2. Handle requests from other nodes, i.e. if client needs to know where - * the FID is located, if it can not find the information in the local cache, - * it will send a FLD lookup RPC to the FLD service, and the FLD service will - * look up the FLDB entry and return the location information to client. - * - * - * Author: Yury Umanets - * Author: Tom WangDi - */ -#ifndef __FLD_INTERNAL_H -#define __FLD_INTERNAL_H - -#include - -#include -#include - -struct fld_stats { - __u64 fst_count; - __u64 fst_cache; - __u64 fst_inflight; -}; - -struct lu_fld_hash { - const char *fh_name; - int (*fh_hash_func)(struct lu_client_fld *, __u64); - struct lu_fld_target *(*fh_scan_func)(struct lu_client_fld *, __u64); -}; - -struct fld_cache_entry { - struct list_head fce_lru; - struct list_head fce_list; - /** fld cache entries are sorted on range->lsr_start field. */ - struct lu_seq_range fce_range; -}; - -struct fld_cache { - /** - * Cache guard, protects fci_hash mostly because others immutable after - * init is finished. - */ - rwlock_t fci_lock; - - /** Cache shrink threshold */ - int fci_threshold; - - /** Preferred number of cached entries */ - int fci_cache_size; - - /** Current number of cached entries. Protected by \a fci_lock */ - int fci_cache_count; - - /** LRU list fld entries. */ - struct list_head fci_lru; - - /** sorted fld entries. */ - struct list_head fci_entries_head; - - /** Cache statistics. */ - struct fld_stats fci_stat; - - /** Cache name used for debug and messages. */ - char fci_name[LUSTRE_MDT_MAXNAMELEN]; - unsigned int fci_no_shrink:1; -}; - -enum { - /* 4M of FLD cache will not hurt client a lot. */ - FLD_SERVER_CACHE_SIZE = (4 * 0x100000), - - /* 1M of FLD cache will not hurt client a lot. */ - FLD_CLIENT_CACHE_SIZE = (1 * 0x100000) -}; - -enum { - /* Cache threshold is 10 percent of size. */ - FLD_SERVER_CACHE_THRESHOLD = 10, - - /* Cache threshold is 10 percent of size. */ - FLD_CLIENT_CACHE_THRESHOLD = 10 -}; - -extern struct lu_fld_hash fld_hash[]; - -int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op, - struct ptlrpc_request **reqp); - -extern struct lprocfs_vars fld_client_debugfs_list[]; - -struct fld_cache *fld_cache_init(const char *name, - int cache_size, int cache_threshold); - -void fld_cache_fini(struct fld_cache *cache); - -void fld_cache_flush(struct fld_cache *cache); - -int fld_cache_insert(struct fld_cache *cache, - const struct lu_seq_range *range); - -struct fld_cache_entry -*fld_cache_entry_create(const struct lu_seq_range *range); - -int fld_cache_lookup(struct fld_cache *cache, - const u64 seq, struct lu_seq_range *range); - -struct fld_cache_entry* -fld_cache_entry_lookup(struct fld_cache *cache, struct lu_seq_range *range); - -struct fld_cache_entry -*fld_cache_entry_lookup_nolock(struct fld_cache *cache, - struct lu_seq_range *range); - -static inline const char * -fld_target_name(struct lu_fld_target *tar) -{ - if (tar->ft_srv) - return tar->ft_srv->lsf_name; - - return (const char *)tar->ft_exp->exp_obd->obd_name; -} - -#endif /* __FLD_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c deleted file mode 100644 index 97f7ea632346..000000000000 --- a/drivers/staging/lustre/lustre/fld/fld_request.c +++ /dev/null @@ -1,446 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fld/fld_request.c - * - * FLD (Fids Location Database) - * - * Author: Yury Umanets - */ - -#define DEBUG_SUBSYSTEM S_FLD - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include "fld_internal.h" - -static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq) -{ - LASSERT(fld->lcf_count > 0); - return do_div(seq, fld->lcf_count); -} - -static struct lu_fld_target * -fld_rrb_scan(struct lu_client_fld *fld, u64 seq) -{ - struct lu_fld_target *target; - int hash; - - /* Because almost all of special sequence located in MDT0, - * it should go to index 0 directly, instead of calculating - * hash again, and also if other MDTs is not being connected, - * the fld lookup requests(for seq on MDT0) should not be - * blocked because of other MDTs - */ - if (fid_seq_is_norm(seq)) - hash = fld_rrb_hash(fld, seq); - else - hash = 0; - -again: - list_for_each_entry(target, &fld->lcf_targets, ft_chain) { - if (target->ft_idx == hash) - return target; - } - - if (hash != 0) { - /* It is possible the remote target(MDT) are not connected to - * with client yet, so we will refer this to MDT0, which should - * be connected during mount - */ - hash = 0; - goto again; - } - - CERROR("%s: Can't find target by hash %d (seq %#llx). Targets (%d):\n", - fld->lcf_name, hash, seq, fld->lcf_count); - - list_for_each_entry(target, &fld->lcf_targets, ft_chain) { - const char *srv_name = target->ft_srv ? - target->ft_srv->lsf_name : ""; - const char *exp_name = target->ft_exp ? - (char *)target->ft_exp->exp_obd->obd_uuid.uuid : - ""; - - CERROR(" exp: 0x%p (%s), srv: 0x%p (%s), idx: %llu\n", - target->ft_exp, exp_name, target->ft_srv, - srv_name, target->ft_idx); - } - - /* - * If target is not found, there is logical error anyway, so here is - * LBUG() to catch this situation. - */ - LBUG(); - return NULL; -} - -struct lu_fld_hash fld_hash[] = { - { - .fh_name = "RRB", - .fh_hash_func = fld_rrb_hash, - .fh_scan_func = fld_rrb_scan - }, - { - NULL, - } -}; - -static struct lu_fld_target * -fld_client_get_target(struct lu_client_fld *fld, u64 seq) -{ - struct lu_fld_target *target; - - LASSERT(fld->lcf_hash); - - spin_lock(&fld->lcf_lock); - target = fld->lcf_hash->fh_scan_func(fld, seq); - spin_unlock(&fld->lcf_lock); - - if (target) { - CDEBUG(D_INFO, "%s: Found target (idx %llu) by seq %#llx\n", - fld->lcf_name, target->ft_idx, seq); - } - - return target; -} - -/* - * Add export to FLD. This is usually done by CMM and LMV as they are main users - * of FLD module. - */ -int fld_client_add_target(struct lu_client_fld *fld, - struct lu_fld_target *tar) -{ - const char *name; - struct lu_fld_target *target, *tmp; - - LASSERT(tar); - name = fld_target_name(tar); - LASSERT(name); - LASSERT(tar->ft_srv || tar->ft_exp); - - CDEBUG(D_INFO, "%s: Adding target %s (idx %llu)\n", - fld->lcf_name, name, tar->ft_idx); - - target = kzalloc(sizeof(*target), GFP_NOFS); - if (!target) - return -ENOMEM; - - spin_lock(&fld->lcf_lock); - list_for_each_entry(tmp, &fld->lcf_targets, ft_chain) { - if (tmp->ft_idx == tar->ft_idx) { - spin_unlock(&fld->lcf_lock); - kfree(target); - CERROR("Target %s exists in FLD and known as %s:#%llu\n", - name, fld_target_name(tmp), tmp->ft_idx); - return -EEXIST; - } - } - - target->ft_exp = tar->ft_exp; - if (target->ft_exp) - class_export_get(target->ft_exp); - target->ft_srv = tar->ft_srv; - target->ft_idx = tar->ft_idx; - - list_add_tail(&target->ft_chain, &fld->lcf_targets); - - fld->lcf_count++; - spin_unlock(&fld->lcf_lock); - - return 0; -} -EXPORT_SYMBOL(fld_client_add_target); - -/* Remove export from FLD */ -int fld_client_del_target(struct lu_client_fld *fld, __u64 idx) -{ - struct lu_fld_target *target, *tmp; - - spin_lock(&fld->lcf_lock); - list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) { - if (target->ft_idx == idx) { - fld->lcf_count--; - list_del(&target->ft_chain); - spin_unlock(&fld->lcf_lock); - - if (target->ft_exp) - class_export_put(target->ft_exp); - - kfree(target); - return 0; - } - } - spin_unlock(&fld->lcf_lock); - return -ENOENT; -} - -static struct dentry *fld_debugfs_dir; - -static void fld_client_debugfs_init(struct lu_client_fld *fld) -{ - fld->lcf_debugfs_entry = debugfs_create_dir(fld->lcf_name, - fld_debugfs_dir); - - ldebugfs_add_vars(fld->lcf_debugfs_entry, fld_client_debugfs_list, fld); -} - -void fld_client_debugfs_fini(struct lu_client_fld *fld) -{ - debugfs_remove_recursive(fld->lcf_debugfs_entry); -} -EXPORT_SYMBOL(fld_client_debugfs_fini); - -static inline int hash_is_sane(int hash) -{ - return (hash >= 0 && hash < ARRAY_SIZE(fld_hash)); -} - -int fld_client_init(struct lu_client_fld *fld, - const char *prefix, int hash) -{ - int cache_size, cache_threshold; - int rc = 0; - - snprintf(fld->lcf_name, sizeof(fld->lcf_name), - "cli-%s", prefix); - - if (!hash_is_sane(hash)) { - CERROR("%s: Wrong hash function %#x\n", - fld->lcf_name, hash); - return -EINVAL; - } - - fld->lcf_count = 0; - spin_lock_init(&fld->lcf_lock); - fld->lcf_hash = &fld_hash[hash]; - INIT_LIST_HEAD(&fld->lcf_targets); - - cache_size = FLD_CLIENT_CACHE_SIZE / - sizeof(struct fld_cache_entry); - - cache_threshold = cache_size * - FLD_CLIENT_CACHE_THRESHOLD / 100; - - fld->lcf_cache = fld_cache_init(fld->lcf_name, - cache_size, cache_threshold); - if (IS_ERR(fld->lcf_cache)) { - rc = PTR_ERR(fld->lcf_cache); - fld->lcf_cache = NULL; - goto out; - } - - fld_client_debugfs_init(fld); -out: - CDEBUG(D_INFO, "%s: Using \"%s\" hash\n", - fld->lcf_name, fld->lcf_hash->fh_name); - return rc; -} -EXPORT_SYMBOL(fld_client_init); - -void fld_client_fini(struct lu_client_fld *fld) -{ - struct lu_fld_target *target, *tmp; - - spin_lock(&fld->lcf_lock); - list_for_each_entry_safe(target, tmp, &fld->lcf_targets, ft_chain) { - fld->lcf_count--; - list_del(&target->ft_chain); - if (target->ft_exp) - class_export_put(target->ft_exp); - kfree(target); - } - spin_unlock(&fld->lcf_lock); - - if (fld->lcf_cache) { - if (!IS_ERR(fld->lcf_cache)) - fld_cache_fini(fld->lcf_cache); - fld->lcf_cache = NULL; - } -} -EXPORT_SYMBOL(fld_client_fini); - -int fld_client_rpc(struct obd_export *exp, - struct lu_seq_range *range, __u32 fld_op, - struct ptlrpc_request **reqp) -{ - struct ptlrpc_request *req = NULL; - struct lu_seq_range *prange; - __u32 *op; - int rc = 0; - struct obd_import *imp; - - LASSERT(exp); - - imp = class_exp2cliimp(exp); - switch (fld_op) { - case FLD_QUERY: - req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY, - LUSTRE_MDS_VERSION, FLD_QUERY); - if (!req) - return -ENOMEM; - - /* - * XXX: only needed when talking to old server(< 2.6), it should - * be removed when < 2.6 server is not supported - */ - op = req_capsule_client_get(&req->rq_pill, &RMF_FLD_OPC); - *op = FLD_LOOKUP; - - if (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) - req->rq_allow_replay = 1; - break; - case FLD_READ: - req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_READ, - LUSTRE_MDS_VERSION, FLD_READ); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, - RCL_SERVER, PAGE_SIZE); - break; - default: - rc = -EINVAL; - break; - } - if (rc) - return rc; - - prange = req_capsule_client_get(&req->rq_pill, &RMF_FLD_MDFLD); - *prange = *range; - ptlrpc_request_set_replen(req); - req->rq_request_portal = FLD_REQUEST_PORTAL; - req->rq_reply_portal = MDC_REPLY_PORTAL; - ptlrpc_at_set_req_timeout(req); - - obd_get_request_slot(&exp->exp_obd->u.cli); - rc = ptlrpc_queue_wait(req); - obd_put_request_slot(&exp->exp_obd->u.cli); - if (rc) - goto out_req; - - if (fld_op == FLD_QUERY) { - prange = req_capsule_server_get(&req->rq_pill, &RMF_FLD_MDFLD); - if (!prange) { - rc = -EFAULT; - goto out_req; - } - *range = *prange; - } - -out_req: - if (rc || !reqp) { - ptlrpc_req_finished(req); - req = NULL; - } - - if (reqp) - *reqp = req; - - return rc; -} - -int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, - __u32 flags, const struct lu_env *env) -{ - struct lu_seq_range res = { 0 }; - struct lu_fld_target *target; - int rc; - - rc = fld_cache_lookup(fld->lcf_cache, seq, &res); - if (rc == 0) { - *mds = res.lsr_index; - return 0; - } - - /* Can not find it in the cache */ - target = fld_client_get_target(fld, seq); - LASSERT(target); - - CDEBUG(D_INFO, - "%s: Lookup fld entry (seq: %#llx) on target %s (idx %llu)\n", - fld->lcf_name, seq, fld_target_name(target), target->ft_idx); - - res.lsr_start = seq; - fld_range_set_type(&res, flags); - rc = fld_client_rpc(target->ft_exp, &res, FLD_QUERY, NULL); - - if (rc == 0) { - *mds = res.lsr_index; - - fld_cache_insert(fld->lcf_cache, &res); - } - return rc; -} -EXPORT_SYMBOL(fld_client_lookup); - -void fld_client_flush(struct lu_client_fld *fld) -{ - fld_cache_flush(fld->lcf_cache); -} - -static int __init fld_init(void) -{ - int rc; - - rc = libcfs_setup(); - if (rc) - return rc; - - fld_debugfs_dir = debugfs_create_dir(LUSTRE_FLD_NAME, - debugfs_lustre_root); - return 0; -} - -static void __exit fld_exit(void) -{ - debugfs_remove_recursive(fld_debugfs_dir); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre FID Location Database"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(fld_init) -module_exit(fld_exit) diff --git a/drivers/staging/lustre/lustre/fld/lproc_fld.c b/drivers/staging/lustre/lustre/fld/lproc_fld.c deleted file mode 100644 index 0bcfb26ef8aa..000000000000 --- a/drivers/staging/lustre/lustre/fld/lproc_fld.c +++ /dev/null @@ -1,154 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/fld/lproc_fld.c - * - * FLD (FIDs Location Database) - * - * Author: Yury Umanets - * Di Wang - */ - -#define DEBUG_SUBSYSTEM S_FLD - -#include - -#include -#include -#include -#include -#include -#include -#include "fld_internal.h" - -static int -fld_debugfs_targets_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_fld *fld = (struct lu_client_fld *)m->private; - struct lu_fld_target *target; - - spin_lock(&fld->lcf_lock); - list_for_each_entry(target, &fld->lcf_targets, ft_chain) - seq_printf(m, "%s\n", fld_target_name(target)); - spin_unlock(&fld->lcf_lock); - - return 0; -} - -static int -fld_debugfs_hash_seq_show(struct seq_file *m, void *unused) -{ - struct lu_client_fld *fld = (struct lu_client_fld *)m->private; - - spin_lock(&fld->lcf_lock); - seq_printf(m, "%s\n", fld->lcf_hash->fh_name); - spin_unlock(&fld->lcf_lock); - - return 0; -} - -static ssize_t -fld_debugfs_hash_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct lu_client_fld *fld; - struct lu_fld_hash *hash = NULL; - char fh_name[8]; - int i; - - if (count > sizeof(fh_name)) - return -ENAMETOOLONG; - - if (copy_from_user(fh_name, buffer, count) != 0) - return -EFAULT; - - fld = ((struct seq_file *)file->private_data)->private; - - for (i = 0; fld_hash[i].fh_name; i++) { - if (count != strlen(fld_hash[i].fh_name)) - continue; - - if (!strncmp(fld_hash[i].fh_name, fh_name, count)) { - hash = &fld_hash[i]; - break; - } - } - - if (hash) { - spin_lock(&fld->lcf_lock); - fld->lcf_hash = hash; - spin_unlock(&fld->lcf_lock); - - CDEBUG(D_INFO, "%s: Changed hash to \"%s\"\n", - fld->lcf_name, hash->fh_name); - } - - return count; -} - -static ssize_t -fld_debugfs_cache_flush_write(struct file *file, const char __user *buffer, - size_t count, loff_t *pos) -{ - struct lu_client_fld *fld = file->private_data; - - fld_cache_flush(fld->lcf_cache); - - CDEBUG(D_INFO, "%s: Lookup cache is flushed\n", fld->lcf_name); - - return count; -} - -static int -fld_debugfs_cache_flush_release(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return 0; -} - -static const struct file_operations fld_debugfs_cache_flush_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .write = fld_debugfs_cache_flush_write, - .release = fld_debugfs_cache_flush_release, -}; - -LPROC_SEQ_FOPS_RO(fld_debugfs_targets); -LPROC_SEQ_FOPS(fld_debugfs_hash); - -struct lprocfs_vars fld_client_debugfs_list[] = { - { "targets", &fld_debugfs_targets_fops }, - { "hash", &fld_debugfs_hash_fops }, - { "cache_flush", &fld_debugfs_cache_flush_fops }, - { NULL } -}; diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h deleted file mode 100644 index 6f7b991be809..000000000000 --- a/drivers/staging/lustre/lustre/include/cl_object.h +++ /dev/null @@ -1,2463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#ifndef _LUSTRE_CL_OBJECT_H -#define _LUSTRE_CL_OBJECT_H - -/** \defgroup clio clio - * - * Client objects implement io operations and cache pages. - * - * Examples: lov and osc are implementations of cl interface. - * - * Big Theory Statement. - * - * Layered objects. - * - * Client implementation is based on the following data-types: - * - * - cl_object - * - * - cl_page - * - * - cl_lock represents an extent lock on an object. - * - * - cl_io represents high-level i/o activity such as whole read/write - * system call, or write-out of pages from under the lock being - * canceled. cl_io has sub-ios that can be stopped and resumed - * independently, thus achieving high degree of transfer - * parallelism. Single cl_io can be advanced forward by - * the multiple threads (although in the most usual case of - * read/write system call it is associated with the single user - * thread, that issued the system call). - * - * Terminology - * - * - to avoid confusion high-level I/O operation like read or write system - * call is referred to as "an io", whereas low-level I/O operation, like - * RPC, is referred to as "a transfer" - * - * - "generic code" means generic (not file system specific) code in the - * hosting environment. "cl-code" means code (mostly in cl_*.c files) that - * is not layer specific. - * - * Locking. - * - * - i_mutex - * - PG_locked - * - cl_object_header::coh_page_guard - * - lu_site::ls_guard - * - * See the top comment in cl_object.c for the description of overall locking and - * reference-counting design. - * - * See comments below for the description of i/o, page, and dlm-locking - * design. - * - * @{ - */ - -/* - * super-class definitions. - */ -#include -#include -#include -#include -#include -#include -#include - -struct inode; - -struct cl_device; - -struct cl_object; - -struct cl_page; -struct cl_page_slice; -struct cl_lock; -struct cl_lock_slice; - -struct cl_lock_operations; -struct cl_page_operations; - -struct cl_io; -struct cl_io_slice; - -struct cl_req_attr; - -/** - * Device in the client stack. - * - * \see vvp_device, lov_device, lovsub_device, osc_device - */ -struct cl_device { - /** Super-class. */ - struct lu_device cd_lu_dev; -}; - -/** \addtogroup cl_object cl_object - * @{ - */ -/** - * "Data attributes" of cl_object. Data attributes can be updated - * independently for a sub-object, and top-object's attributes are calculated - * from sub-objects' ones. - */ -struct cl_attr { - /** Object size, in bytes */ - loff_t cat_size; - /** - * Known minimal size, in bytes. - * - * This is only valid when at least one DLM lock is held. - */ - loff_t cat_kms; - /** Modification time. Measured in seconds since epoch. */ - time64_t cat_mtime; - /** Access time. Measured in seconds since epoch. */ - time64_t cat_atime; - /** Change time. Measured in seconds since epoch. */ - time64_t cat_ctime; - /** - * Blocks allocated to this cl_object on the server file system. - * - * \todo XXX An interface for block size is needed. - */ - __u64 cat_blocks; - /** - * User identifier for quota purposes. - */ - uid_t cat_uid; - /** - * Group identifier for quota purposes. - */ - gid_t cat_gid; - - /* nlink of the directory */ - __u64 cat_nlink; -}; - -/** - * Fields in cl_attr that are being set. - */ -enum cl_attr_valid { - CAT_SIZE = 1 << 0, - CAT_KMS = 1 << 1, - CAT_MTIME = 1 << 3, - CAT_ATIME = 1 << 4, - CAT_CTIME = 1 << 5, - CAT_BLOCKS = 1 << 6, - CAT_UID = 1 << 7, - CAT_GID = 1 << 8 -}; - -/** - * Sub-class of lu_object with methods common for objects on the client - * stacks. - * - * cl_object: represents a regular file system object, both a file and a - * stripe. cl_object is based on lu_object: it is identified by a fid, - * layered, cached, hashed, and lrued. Important distinction with the server - * side, where md_object and dt_object are used, is that cl_object "fans out" - * at the lov/sns level: depending on the file layout, single file is - * represented as a set of "sub-objects" (stripes). At the implementation - * level, struct lov_object contains an array of cl_objects. Each sub-object - * is a full-fledged cl_object, having its fid, living in the lru and hash - * table. - * - * This leads to the next important difference with the server side: on the - * client, it's quite usual to have objects with the different sequence of - * layers. For example, typical top-object is composed of the following - * layers: - * - * - vvp - * - lov - * - * whereas its sub-objects are composed of - * - * - lovsub - * - osc - * - * layers. Here "lovsub" is a mostly dummy layer, whose purpose is to keep - * track of the object-subobject relationship. - * - * Sub-objects are not cached independently: when top-object is about to - * be discarded from the memory, all its sub-objects are torn-down and - * destroyed too. - * - * \see vvp_object, lov_object, lovsub_object, osc_object - */ -struct cl_object { - /** super class */ - struct lu_object co_lu; - /** per-object-layer operations */ - const struct cl_object_operations *co_ops; - /** offset of page slice in cl_page buffer */ - int co_slice_off; -}; - -/** - * Description of the client object configuration. This is used for the - * creation of a new client object that is identified by a more state than - * fid. - */ -struct cl_object_conf { - /** Super-class. */ - struct lu_object_conf coc_lu; - union { - /** - * Object layout. This is consumed by lov. - */ - struct lu_buf coc_layout; - /** - * Description of particular stripe location in the - * cluster. This is consumed by osc. - */ - struct lov_oinfo *coc_oinfo; - } u; - /** - * VFS inode. This is consumed by vvp. - */ - struct inode *coc_inode; - /** - * Layout lock handle. - */ - struct ldlm_lock *coc_lock; - /** - * Operation to handle layout, OBJECT_CONF_XYZ. - */ - int coc_opc; -}; - -enum { - /** configure layout, set up a new stripe, must be called while - * holding layout lock. - */ - OBJECT_CONF_SET = 0, - /** invalidate the current stripe configuration due to losing - * layout lock. - */ - OBJECT_CONF_INVALIDATE = 1, - /** wait for old layout to go away so that new layout can be set up. */ - OBJECT_CONF_WAIT = 2 -}; - -enum { - CL_LAYOUT_GEN_NONE = (u32)-2, /* layout lock was cancelled */ - CL_LAYOUT_GEN_EMPTY = (u32)-1, /* for empty layout */ -}; - -struct cl_layout { - /** the buffer to return the layout in lov_mds_md format. */ - struct lu_buf cl_buf; - /** size of layout in lov_mds_md format. */ - size_t cl_size; - /** Layout generation. */ - u32 cl_layout_gen; -}; - -/** - * Operations implemented for each cl object layer. - * - * \see vvp_ops, lov_ops, lovsub_ops, osc_ops - */ -struct cl_object_operations { - /** - * Initialize page slice for this layer. Called top-to-bottom through - * every object layer when a new cl_page is instantiated. Layer - * keeping private per-page data, or requiring its own page operations - * vector should allocate these data here, and attach then to the page - * by calling cl_page_slice_add(). \a vmpage is locked (in the VM - * sense). Optional. - * - * \retval NULL success. - * - * \retval ERR_PTR(errno) failure code. - * - * \retval valid-pointer pointer to already existing referenced page - * to be used instead of newly created. - */ - int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); - /** - * Initialize lock slice for this layer. Called top-to-bottom through - * every object layer when a new cl_lock is instantiated. Layer - * keeping private per-lock data, or requiring its own lock operations - * vector should allocate these data here, and attach then to the lock - * by calling cl_lock_slice_add(). Mandatory. - */ - int (*coo_lock_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); - /** - * Initialize io state for a given layer. - * - * called top-to-bottom once per io existence to initialize io - * state. If layer wants to keep some state for this type of io, it - * has to embed struct cl_io_slice in lu_env::le_ses, and register - * slice with cl_io_slice_add(). It is guaranteed that all threads - * participating in this io share the same session. - */ - int (*coo_io_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); - /** - * Fill portion of \a attr that this layer controls. This method is - * called top-to-bottom through all object layers. - * - * \pre cl_object_header::coh_attr_guard of the top-object is locked. - * - * \return 0: to continue - * \return +ve: to stop iterating through layers (but 0 is returned - * from enclosing cl_object_attr_get()) - * \return -ve: to signal error - */ - int (*coo_attr_get)(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr); - /** - * Update attributes. - * - * \a valid is a bitmask composed from enum #cl_attr_valid, and - * indicating what attributes are to be set. - * - * \pre cl_object_header::coh_attr_guard of the top-object is locked. - * - * \return the same convention as for - * cl_object_operations::coo_attr_get() is used. - */ - int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid); - /** - * Update object configuration. Called top-to-bottom to modify object - * configuration. - * - * XXX error conditions and handling. - */ - int (*coo_conf_set)(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf); - /** - * Glimpse ast. Executed when glimpse ast arrives for a lock on this - * object. Layers are supposed to fill parts of \a lvb that will be - * shipped to the glimpse originator as a glimpse result. - * - * \see vvp_object_glimpse(), lovsub_object_glimpse(), - * \see osc_object_glimpse() - */ - int (*coo_glimpse)(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb); - /** - * Object prune method. Called when the layout is going to change on - * this object, therefore each layer has to clean up their cache, - * mainly pages and locks. - */ - int (*coo_prune)(const struct lu_env *env, struct cl_object *obj); - /** - * Object getstripe method. - */ - int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *lum); - /** - * Get FIEMAP mapping from the object. - */ - int (*coo_fiemap)(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, - struct fiemap *fiemap, size_t *buflen); - /** - * Get layout and generation of the object. - */ - int (*coo_layout_get)(const struct lu_env *env, struct cl_object *obj, - struct cl_layout *layout); - /** - * Get maximum size of the object. - */ - loff_t (*coo_maxbytes)(struct cl_object *obj); - /** - * Set request attributes. - */ - void (*coo_req_attr_set)(const struct lu_env *env, - struct cl_object *obj, - struct cl_req_attr *attr); -}; - -/** - * Extended header for client object. - */ -struct cl_object_header { - /** Standard lu_object_header. cl_object::co_lu::lo_header points - * here. - */ - struct lu_object_header coh_lu; - - /** - * Parent object. It is assumed that an object has a well-defined - * parent, but not a well-defined child (there may be multiple - * sub-objects, for the same top-object). cl_object_header::coh_parent - * field allows certain code to be written generically, without - * limiting possible cl_object layouts unduly. - */ - struct cl_object_header *coh_parent; - /** - * Protects consistency between cl_attr of parent object and - * attributes of sub-objects, that the former is calculated ("merged") - * from. - * - * \todo XXX this can be read/write lock if needed. - */ - spinlock_t coh_attr_guard; - /** - * Size of cl_page + page slices - */ - unsigned short coh_page_bufsize; - /** - * Number of objects above this one: 0 for a top-object, 1 for its - * sub-object, etc. - */ - unsigned char coh_nesting; -}; - -/** - * Helper macro: iterate over all layers of the object \a obj, assigning every - * layer top-to-bottom to \a slice. - */ -#define cl_object_for_each(slice, obj) \ - list_for_each_entry((slice), \ - &(obj)->co_lu.lo_header->loh_layers, \ - co_lu.lo_linkage) -/** - * Helper macro: iterate over all layers of the object \a obj, assigning every - * layer bottom-to-top to \a slice. - */ -#define cl_object_for_each_reverse(slice, obj) \ - list_for_each_entry_reverse((slice), \ - &(obj)->co_lu.lo_header->loh_layers, \ - co_lu.lo_linkage) -/** @} cl_object */ - -#define CL_PAGE_EOF ((pgoff_t)~0ull) - -/** \addtogroup cl_page cl_page - * @{ - */ - -/** \struct cl_page - * Layered client page. - * - * cl_page: represents a portion of a file, cached in the memory. All pages - * of the given file are of the same size, and are kept in the radix tree - * hanging off the cl_object. cl_page doesn't fan out, but as sub-objects - * of the top-level file object are first class cl_objects, they have their - * own radix trees of pages and hence page is implemented as a sequence of - * struct cl_pages's, linked into double-linked list through - * cl_page::cp_parent and cl_page::cp_child pointers, each residing in the - * corresponding radix tree at the corresponding logical offset. - * - * cl_page is associated with VM page of the hosting environment (struct - * page in Linux kernel, for example), struct page. It is assumed, that this - * association is implemented by one of cl_page layers (top layer in the - * current design) that - * - * - intercepts per-VM-page call-backs made by the environment (e.g., - * memory pressure), - * - * - translates state (page flag bits) and locking between lustre and - * environment. - * - * The association between cl_page and struct page is immutable and - * established when cl_page is created. - * - * cl_page can be "owned" by a particular cl_io (see below), guaranteeing - * this io an exclusive access to this page w.r.t. other io attempts and - * various events changing page state (such as transfer completion, or - * eviction of the page from the memory). Note, that in general cl_io - * cannot be identified with a particular thread, and page ownership is not - * exactly equal to the current thread holding a lock on the page. Layer - * implementing association between cl_page and struct page has to implement - * ownership on top of available synchronization mechanisms. - * - * While lustre client maintains the notion of an page ownership by io, - * hosting MM/VM usually has its own page concurrency control - * mechanisms. For example, in Linux, page access is synchronized by the - * per-page PG_locked bit-lock, and generic kernel code (generic_file_*()) - * takes care to acquire and release such locks as necessary around the - * calls to the file system methods (->readpage(), ->prepare_write(), - * ->commit_write(), etc.). This leads to the situation when there are two - * different ways to own a page in the client: - * - * - client code explicitly and voluntary owns the page (cl_page_own()); - * - * - VM locks a page and then calls the client, that has "to assume" - * the ownership from the VM (cl_page_assume()). - * - * Dual methods to release ownership are cl_page_disown() and - * cl_page_unassume(). - * - * cl_page is reference counted (cl_page::cp_ref). When reference counter - * drops to 0, the page is returned to the cache, unless it is in - * cl_page_state::CPS_FREEING state, in which case it is immediately - * destroyed. - * - * The general logic guaranteeing the absence of "existential races" for - * pages is the following: - * - * - there are fixed known ways for a thread to obtain a new reference - * to a page: - * - * - by doing a lookup in the cl_object radix tree, protected by the - * spin-lock; - * - * - by starting from VM-locked struct page and following some - * hosting environment method (e.g., following ->private pointer in - * the case of Linux kernel), see cl_vmpage_page(); - * - * - when the page enters cl_page_state::CPS_FREEING state, all these - * ways are severed with the proper synchronization - * (cl_page_delete()); - * - * - entry into cl_page_state::CPS_FREEING is serialized by the VM page - * lock; - * - * - no new references to the page in cl_page_state::CPS_FREEING state - * are allowed (checked in cl_page_get()). - * - * Together this guarantees that when last reference to a - * cl_page_state::CPS_FREEING page is released, it is safe to destroy the - * page, as neither references to it can be acquired at that point, nor - * ones exist. - * - * cl_page is a state machine. States are enumerated in enum - * cl_page_state. Possible state transitions are enumerated in - * cl_page_state_set(). State transition process (i.e., actual changing of - * cl_page::cp_state field) is protected by the lock on the underlying VM - * page. - * - * Linux Kernel implementation. - * - * Binding between cl_page and struct page (which is a typedef for - * struct page) is implemented in the vvp layer. cl_page is attached to the - * ->private pointer of the struct page, together with the setting of - * PG_private bit in page->flags, and acquiring additional reference on the - * struct page (much like struct buffer_head, or any similar file system - * private data structures). - * - * PG_locked lock is used to implement both ownership and transfer - * synchronization, that is, page is VM-locked in CPS_{OWNED,PAGE{IN,OUT}} - * states. No additional references are acquired for the duration of the - * transfer. - * - * \warning *THIS IS NOT* the behavior expected by the Linux kernel, where - * write-out is "protected" by the special PG_writeback bit. - */ - -/** - * States of cl_page. cl_page.c assumes particular order here. - * - * The page state machine is rather crude, as it doesn't recognize finer page - * states like "dirty" or "up to date". This is because such states are not - * always well defined for the whole stack (see, for example, the - * implementation of the read-ahead, that hides page up-to-dateness to track - * cache hits accurately). Such sub-states are maintained by the layers that - * are interested in them. - */ -enum cl_page_state { - /** - * Page is in the cache, un-owned. Page leaves cached state in the - * following cases: - * - * - [cl_page_state::CPS_OWNED] io comes across the page and - * owns it; - * - * - [cl_page_state::CPS_PAGEOUT] page is dirty, the - * req-formation engine decides that it wants to include this page - * into an RPC being constructed, and yanks it from the cache; - * - * - [cl_page_state::CPS_FREEING] VM callback is executed to - * evict the page form the memory; - * - * \invariant cl_page::cp_owner == NULL && cl_page::cp_req == NULL - */ - CPS_CACHED, - /** - * Page is exclusively owned by some cl_io. Page may end up in this - * state as a result of - * - * - io creating new page and immediately owning it; - * - * - [cl_page_state::CPS_CACHED] io finding existing cached page - * and owning it; - * - * - [cl_page_state::CPS_OWNED] io finding existing owned page - * and waiting for owner to release the page; - * - * Page leaves owned state in the following cases: - * - * - [cl_page_state::CPS_CACHED] io decides to leave the page in - * the cache, doing nothing; - * - * - [cl_page_state::CPS_PAGEIN] io starts read transfer for - * this page; - * - * - [cl_page_state::CPS_PAGEOUT] io starts immediate write - * transfer for this page; - * - * - [cl_page_state::CPS_FREEING] io decides to destroy this - * page (e.g., as part of truncate or extent lock cancellation). - * - * \invariant cl_page::cp_owner != NULL && cl_page::cp_req == NULL - */ - CPS_OWNED, - /** - * Page is being written out, as a part of a transfer. This state is - * entered when req-formation logic decided that it wants this page to - * be sent through the wire _now_. Specifically, it means that once - * this state is achieved, transfer completion handler (with either - * success or failure indication) is guaranteed to be executed against - * this page independently of any locks and any scheduling decisions - * made by the hosting environment (that effectively means that the - * page is never put into cl_page_state::CPS_PAGEOUT state "in - * advance". This property is mentioned, because it is important when - * reasoning about possible dead-locks in the system). The page can - * enter this state as a result of - * - * - [cl_page_state::CPS_OWNED] an io requesting an immediate - * write-out of this page, or - * - * - [cl_page_state::CPS_CACHED] req-forming engine deciding - * that it has enough dirty pages cached to issue a "good" - * transfer. - * - * The page leaves cl_page_state::CPS_PAGEOUT state when the transfer - * is completed---it is moved into cl_page_state::CPS_CACHED state. - * - * Underlying VM page is locked for the duration of transfer. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL - */ - CPS_PAGEOUT, - /** - * Page is being read in, as a part of a transfer. This is quite - * similar to the cl_page_state::CPS_PAGEOUT state, except that - * read-in is always "immediate"---there is no such thing a sudden - * construction of read request from cached, presumably not up to date, - * pages. - * - * Underlying VM page is locked for the duration of transfer. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req != NULL - */ - CPS_PAGEIN, - /** - * Page is being destroyed. This state is entered when client decides - * that page has to be deleted from its host object, as, e.g., a part - * of truncate. - * - * Once this state is reached, there is no way to escape it. - * - * \invariant: cl_page::cp_owner == NULL && cl_page::cp_req == NULL - */ - CPS_FREEING, - CPS_NR -}; - -enum cl_page_type { - /** Host page, the page is from the host inode which the cl_page - * belongs to. - */ - CPT_CACHEABLE = 1, - - /** Transient page, the transient cl_page is used to bind a cl_page - * to vmpage which is not belonging to the same object of cl_page. - * it is used in DirectIO and lockless IO. - */ - CPT_TRANSIENT, -}; - -/** - * Fields are protected by the lock on struct page, except for atomics and - * immutables. - * - * \invariant Data type invariants are in cl_page_invariant(). Basically: - * cl_page::cp_parent and cl_page::cp_child are a well-formed double-linked - * list, consistent with the parent/child pointers in the cl_page::cp_obj and - * cl_page::cp_owner (when set). - */ -struct cl_page { - /** Reference counter. */ - atomic_t cp_ref; - /** An object this page is a part of. Immutable after creation. */ - struct cl_object *cp_obj; - /** vmpage */ - struct page *cp_vmpage; - /** Linkage of pages within group. Pages must be owned */ - struct list_head cp_batch; - /** List of slices. Immutable after creation. */ - struct list_head cp_layers; - /** - * Page state. This field is const to avoid accidental update, it is - * modified only internally within cl_page.c. Protected by a VM lock. - */ - const enum cl_page_state cp_state; - /** - * Page type. Only CPT_TRANSIENT is used so far. Immutable after - * creation. - */ - enum cl_page_type cp_type; - - /** - * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned - * by sub-io. Protected by a VM lock. - */ - struct cl_io *cp_owner; - /** List of references to this page, for debugging. */ - struct lu_ref cp_reference; - /** Link to an object, for debugging. */ - struct lu_ref_link cp_obj_ref; - /** Link to a queue, for debugging. */ - struct lu_ref_link cp_queue_ref; - /** Assigned if doing a sync_io */ - struct cl_sync_io *cp_sync_io; -}; - -/** - * Per-layer part of cl_page. - * - * \see vvp_page, lov_page, osc_page - */ -struct cl_page_slice { - struct cl_page *cpl_page; - pgoff_t cpl_index; - /** - * Object slice corresponding to this page slice. Immutable after - * creation. - */ - struct cl_object *cpl_obj; - const struct cl_page_operations *cpl_ops; - /** Linkage into cl_page::cp_layers. Immutable after creation. */ - struct list_head cpl_linkage; -}; - -/** - * Lock mode. For the client extent locks. - * - * \ingroup cl_lock - */ -enum cl_lock_mode { - CLM_READ, - CLM_WRITE, - CLM_GROUP -}; - -/** - * Requested transfer type. - */ -enum cl_req_type { - CRT_READ, - CRT_WRITE, - CRT_NR -}; - -/** - * Per-layer page operations. - * - * Methods taking an \a io argument are for the activity happening in the - * context of given \a io. Page is assumed to be owned by that io, except for - * the obvious cases (like cl_page_operations::cpo_own()). - * - * \see vvp_page_ops, lov_page_ops, osc_page_ops - */ -struct cl_page_operations { - /** - * cl_page<->struct page methods. Only one layer in the stack has to - * implement these. Current code assumes that this functionality is - * provided by the topmost layer, see cl_page_disown0() as an example. - */ - - /** - * Called when \a io acquires this page into the exclusive - * ownership. When this method returns, it is guaranteed that the is - * not owned by other io, and no transfer is going on against - * it. Optional. - * - * \see cl_page_own() - * \see vvp_page_own(), lov_page_own() - */ - int (*cpo_own)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, int nonblock); - /** Called when ownership it yielded. Optional. - * - * \see cl_page_disown() - * \see vvp_page_disown() - */ - void (*cpo_disown)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** - * Called for a page that is already "owned" by \a io from VM point of - * view. Optional. - * - * \see cl_page_assume() - * \see vvp_page_assume(), lov_page_assume() - */ - void (*cpo_assume)(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); - /** Dual to cl_page_operations::cpo_assume(). Optional. Called - * bottom-to-top when IO releases a page without actually unlocking - * it. - * - * \see cl_page_unassume() - * \see vvp_page_unassume() - */ - void (*cpo_unassume)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Announces whether the page contains valid data or not by \a uptodate. - * - * \see cl_page_export() - * \see vvp_page_export() - */ - void (*cpo_export)(const struct lu_env *env, - const struct cl_page_slice *slice, int uptodate); - /** - * Checks whether underlying VM page is locked (in the suitable - * sense). Used for assertions. - * - * \retval -EBUSY: page is protected by a lock of a given mode; - * \retval -ENODATA: page is not protected by a lock; - * \retval 0: this layer cannot decide. (Should never happen.) - */ - int (*cpo_is_vmlocked)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** - * Page destruction. - */ - - /** - * Called when page is truncated from the object. Optional. - * - * \see cl_page_discard() - * \see vvp_page_discard(), osc_page_discard() - */ - void (*cpo_discard)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Called when page is removed from the cache, and is about to being - * destroyed. Optional. - * - * \see cl_page_delete() - * \see vvp_page_delete(), osc_page_delete() - */ - void (*cpo_delete)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** Destructor. Frees resources and slice itself. */ - void (*cpo_fini)(const struct lu_env *env, - struct cl_page_slice *slice); - /** - * Optional debugging helper. Prints given page slice. - * - * \see cl_page_print() - */ - int (*cpo_print)(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t p); - /** - * \name transfer - * - * Transfer methods. - * - * @{ - */ - /** - * Request type dependent vector of operations. - * - * Transfer operations depend on transfer mode (cl_req_type). To avoid - * passing transfer mode to each and every of these methods, and to - * avoid branching on request type inside of the methods, separate - * methods for cl_req_type:CRT_READ and cl_req_type:CRT_WRITE are - * provided. That is, method invocation usually looks like - * - * slice->cp_ops.io[req->crq_type].cpo_method(env, slice, ...); - */ - struct { - /** - * Called when a page is submitted for a transfer as a part of - * cl_page_list. - * - * \return 0 : page is eligible for submission; - * \return -EALREADY : skip this page; - * \return -ve : error. - * - * \see cl_page_prep() - */ - int (*cpo_prep)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** - * Completion handler. This is guaranteed to be eventually - * fired after cl_page_operations::cpo_prep() or - * cl_page_operations::cpo_make_ready() call. - * - * This method can be called in a non-blocking context. It is - * guaranteed however, that the page involved and its object - * are pinned in memory (and, hence, calling cl_page_put() is - * safe). - * - * \see cl_page_completion() - */ - void (*cpo_completion)(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret); - /** - * Called when cached page is about to be added to the - * ptlrpc request as a part of req formation. - * - * \return 0 : proceed with this page; - * \return -EAGAIN : skip this page; - * \return -ve : error. - * - * \see cl_page_make_ready() - */ - int (*cpo_make_ready)(const struct lu_env *env, - const struct cl_page_slice *slice); - } io[CRT_NR]; - /** - * Tell transfer engine that only [to, from] part of a page should be - * transmitted. - * - * This is used for immediate transfers. - * - * \todo XXX this is not very good interface. It would be much better - * if all transfer parameters were supplied as arguments to - * cl_io_operations::cio_submit() call, but it is not clear how to do - * this for page queues. - * - * \see cl_page_clip() - */ - void (*cpo_clip)(const struct lu_env *env, - const struct cl_page_slice *slice, - int from, int to); - /** - * \pre the page was queued for transferring. - * \post page is removed from client's pending list, or -EBUSY - * is returned if it has already been in transferring. - * - * This is one of seldom page operation which is: - * 0. called from top level; - * 1. don't have vmpage locked; - * 2. every layer should synchronize execution of its ->cpo_cancel() - * with completion handlers. Osc uses client obd lock for this - * purpose. Based on there is no vvp_page_cancel and - * lov_page_cancel(), cpo_cancel is defacto protected by client lock. - * - * \see osc_page_cancel(). - */ - int (*cpo_cancel)(const struct lu_env *env, - const struct cl_page_slice *slice); - /** - * Write out a page by kernel. This is only called by ll_writepage - * right now. - * - * \see cl_page_flush() - */ - int (*cpo_flush)(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io); - /** @} transfer */ -}; - -/** - * Helper macro, dumping detailed information about \a page into a log. - */ -#define CL_PAGE_DEBUG(mask, env, page, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - cl_page_print(env, &msgdata, lu_cdebug_printer, page); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -/** - * Helper macro, dumping shorter information about \a page into a log. - */ -#define CL_PAGE_HEADER(mask, env, page, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - cl_page_header_print(env, &msgdata, lu_cdebug_printer, page); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -static inline struct page *cl_page_vmpage(struct cl_page *page) -{ - LASSERT(page->cp_vmpage); - return page->cp_vmpage; -} - -/** - * Check if a cl_page is in use. - * - * Client cache holds a refcount, this refcount will be dropped when - * the page is taken out of cache, see vvp_page_delete(). - */ -static inline bool __page_in_use(const struct cl_page *page, int refc) -{ - return (atomic_read(&page->cp_ref) > refc + 1); -} - -/** - * Caller itself holds a refcount of cl_page. - */ -#define cl_page_in_use(pg) __page_in_use(pg, 1) -/** - * Caller doesn't hold a refcount. - */ -#define cl_page_in_use_noref(pg) __page_in_use(pg, 0) - -/** @} cl_page */ - -/** \addtogroup cl_lock cl_lock - * @{ - */ -/** \struct cl_lock - * - * Extent locking on the client. - * - * LAYERING - * - * The locking model of the new client code is built around - * - * struct cl_lock - * - * data-type representing an extent lock on a regular file. cl_lock is a - * layered object (much like cl_object and cl_page), it consists of a header - * (struct cl_lock) and a list of layers (struct cl_lock_slice), linked to - * cl_lock::cll_layers list through cl_lock_slice::cls_linkage. - * - * Typical cl_lock consists of the two layers: - * - * - vvp_lock (vvp specific data), and - * - lov_lock (lov specific data). - * - * lov_lock contains an array of sub-locks. Each of these sub-locks is a - * normal cl_lock: it has a header (struct cl_lock) and a list of layers: - * - * - lovsub_lock, and - * - osc_lock - * - * Each sub-lock is associated with a cl_object (representing stripe - * sub-object or the file to which top-level cl_lock is associated to), and is - * linked into that cl_object::coh_locks. In this respect cl_lock is similar to - * cl_object (that at lov layer also fans out into multiple sub-objects), and - * is different from cl_page, that doesn't fan out (there is usually exactly - * one osc_page for every vvp_page). We shall call vvp-lov portion of the lock - * a "top-lock" and its lovsub-osc portion a "sub-lock". - * - * LIFE CYCLE - * - * cl_lock is a cacheless data container for the requirements of locks to - * complete the IO. cl_lock is created before I/O starts and destroyed when the - * I/O is complete. - * - * cl_lock depends on LDLM lock to fulfill lock semantics. LDLM lock is attached - * to cl_lock at OSC layer. LDLM lock is still cacheable. - * - * INTERFACE AND USAGE - * - * Two major methods are supported for cl_lock: clo_enqueue and clo_cancel. A - * cl_lock is enqueued by cl_lock_request(), which will call clo_enqueue() - * methods for each layer to enqueue the lock. At the LOV layer, if a cl_lock - * consists of multiple sub cl_locks, each sub locks will be enqueued - * correspondingly. At OSC layer, the lock enqueue request will tend to reuse - * cached LDLM lock; otherwise a new LDLM lock will have to be requested from - * OST side. - * - * cl_lock_cancel() must be called to release a cl_lock after use. clo_cancel() - * method will be called for each layer to release the resource held by this - * lock. At OSC layer, the reference count of LDLM lock, which is held at - * clo_enqueue time, is released. - * - * LDLM lock can only be canceled if there is no cl_lock using it. - * - * Overall process of the locking during IO operation is as following: - * - * - once parameters for IO are setup in cl_io, cl_io_operations::cio_lock() - * is called on each layer. Responsibility of this method is to add locks, - * needed by a given layer into cl_io.ci_lockset. - * - * - once locks for all layers were collected, they are sorted to avoid - * dead-locks (cl_io_locks_sort()), and enqueued. - * - * - when all locks are acquired, IO is performed; - * - * - locks are released after IO is complete. - * - * Striping introduces major additional complexity into locking. The - * fundamental problem is that it is generally unsafe to actively use (hold) - * two locks on the different OST servers at the same time, as this introduces - * inter-server dependency and can lead to cascading evictions. - * - * Basic solution is to sub-divide large read/write IOs into smaller pieces so - * that no multi-stripe locks are taken (note that this design abandons POSIX - * read/write semantics). Such pieces ideally can be executed concurrently. At - * the same time, certain types of IO cannot be sub-divived, without - * sacrificing correctness. This includes: - * - * - O_APPEND write, where [0, EOF] lock has to be taken, to guarantee - * atomicity; - * - * - ftruncate(fd, offset), where [offset, EOF] lock has to be taken. - * - * Also, in the case of read(fd, buf, count) or write(fd, buf, count), where - * buf is a part of memory mapped Lustre file, a lock or locks protecting buf - * has to be held together with the usual lock on [offset, offset + count]. - * - * Interaction with DLM - * - * In the expected setup, cl_lock is ultimately backed up by a collection of - * DLM locks (struct ldlm_lock). Association between cl_lock and DLM lock is - * implemented in osc layer, that also matches DLM events (ASTs, cancellation, - * etc.) into cl_lock_operation calls. See struct osc_lock for a more detailed - * description of interaction with DLM. - */ - -/** - * Lock description. - */ -struct cl_lock_descr { - /** Object this lock is granted for. */ - struct cl_object *cld_obj; - /** Index of the first page protected by this lock. */ - pgoff_t cld_start; - /** Index of the last page (inclusive) protected by this lock. */ - pgoff_t cld_end; - /** Group ID, for group lock */ - __u64 cld_gid; - /** Lock mode. */ - enum cl_lock_mode cld_mode; - /** - * flags to enqueue lock. A combination of bit-flags from - * enum cl_enq_flags. - */ - __u32 cld_enq_flags; -}; - -#define DDESCR "%s(%d):[%lu, %lu]:%x" -#define PDESCR(descr) \ - cl_lock_mode_name((descr)->cld_mode), (descr)->cld_mode, \ - (descr)->cld_start, (descr)->cld_end, (descr)->cld_enq_flags - -const char *cl_lock_mode_name(const enum cl_lock_mode mode); - -/** - * Layered client lock. - */ -struct cl_lock { - /** List of slices. Immutable after creation. */ - struct list_head cll_layers; - /** lock attribute, extent, cl_object, etc. */ - struct cl_lock_descr cll_descr; -}; - -/** - * Per-layer part of cl_lock - * - * \see vvp_lock, lov_lock, lovsub_lock, osc_lock - */ -struct cl_lock_slice { - struct cl_lock *cls_lock; - /** Object slice corresponding to this lock slice. Immutable after - * creation. - */ - struct cl_object *cls_obj; - const struct cl_lock_operations *cls_ops; - /** Linkage into cl_lock::cll_layers. Immutable after creation. */ - struct list_head cls_linkage; -}; - -/** - * - * \see vvp_lock_ops, lov_lock_ops, lovsub_lock_ops, osc_lock_ops - */ -struct cl_lock_operations { - /** @{ */ - /** - * Attempts to enqueue the lock. Called top-to-bottom. - * - * \retval 0 this layer has enqueued the lock successfully - * \retval >0 this layer has enqueued the lock, but need to wait on - * @anchor for resources - * \retval -ve failure - * - * \see vvp_lock_enqueue(), lov_lock_enqueue(), lovsub_lock_enqueue(), - * \see osc_lock_enqueue() - */ - int (*clo_enqueue)(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, struct cl_sync_io *anchor); - /** - * Cancel a lock, release its DLM lock ref, while does not cancel the - * DLM lock - */ - void (*clo_cancel)(const struct lu_env *env, - const struct cl_lock_slice *slice); - /** @} */ - /** - * Destructor. Frees resources and the slice. - * - * \see vvp_lock_fini(), lov_lock_fini(), lovsub_lock_fini(), - * \see osc_lock_fini() - */ - void (*clo_fini)(const struct lu_env *env, struct cl_lock_slice *slice); - /** - * Optional debugging helper. Prints given lock slice. - */ - int (*clo_print)(const struct lu_env *env, - void *cookie, lu_printer_t p, - const struct cl_lock_slice *slice); -}; - -#define CL_LOCK_DEBUG(mask, env, lock, format, ...) \ -do { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - cl_lock_print(env, &msgdata, lu_cdebug_printer, lock); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -#define CL_LOCK_ASSERT(expr, env, lock) do { \ - if (likely(expr)) \ - break; \ - \ - CL_LOCK_DEBUG(D_ERROR, env, lock, "failed at %s.\n", #expr); \ - LBUG(); \ -} while (0) - -/** @} cl_lock */ - -/** \addtogroup cl_page_list cl_page_list - * Page list used to perform collective operations on a group of pages. - * - * Pages are added to the list one by one. cl_page_list acquires a reference - * for every page in it. Page list is used to perform collective operations on - * pages: - * - * - submit pages for an immediate transfer, - * - * - own pages on behalf of certain io (waiting for each page in turn), - * - * - discard pages. - * - * When list is finalized, it releases references on all pages it still has. - * - * \todo XXX concurrency control. - * - * @{ - */ -struct cl_page_list { - unsigned int pl_nr; - struct list_head pl_pages; - struct task_struct *pl_owner; -}; - -/** - * A 2-queue of pages. A convenience data-type for common use case, 2-queue - * contains an incoming page list and an outgoing page list. - */ -struct cl_2queue { - struct cl_page_list c2_qin; - struct cl_page_list c2_qout; -}; - -/** @} cl_page_list */ - -/** \addtogroup cl_io cl_io - * @{ - */ -/** \struct cl_io - * I/O - * - * cl_io represents a high level I/O activity like - * read(2)/write(2)/truncate(2) system call, or cancellation of an extent - * lock. - * - * cl_io is a layered object, much like cl_{object,page,lock} but with one - * important distinction. We want to minimize number of calls to the allocator - * in the fast path, e.g., in the case of read(2) when everything is cached: - * client already owns the lock over region being read, and data are cached - * due to read-ahead. To avoid allocation of cl_io layers in such situations, - * per-layer io state is stored in the session, associated with the io, see - * struct {vvp,lov,osc}_io for example. Sessions allocation is amortized - * by using free-lists, see cl_env_get(). - * - * There is a small predefined number of possible io types, enumerated in enum - * cl_io_type. - * - * cl_io is a state machine, that can be advanced concurrently by the multiple - * threads. It is up to these threads to control the concurrency and, - * specifically, to detect when io is done, and its state can be safely - * released. - * - * For read/write io overall execution plan is as following: - * - * (0) initialize io state through all layers; - * - * (1) loop: prepare chunk of work to do - * - * (2) call all layers to collect locks they need to process current chunk - * - * (3) sort all locks to avoid dead-locks, and acquire them - * - * (4) process the chunk: call per-page methods - * cl_io_operations::cio_prepare_write(), - * cl_io_operations::cio_commit_write() for write) - * - * (5) release locks - * - * (6) repeat loop. - * - * To implement the "parallel IO mode", lov layer creates sub-io's (lazily to - * address allocation efficiency issues mentioned above), and returns with the - * special error condition from per-page method when current sub-io has to - * block. This causes io loop to be repeated, and lov switches to the next - * sub-io in its cl_io_operations::cio_iter_init() implementation. - */ - -/** IO types */ -enum cl_io_type { - /** read system call */ - CIT_READ = 1, - /** write system call */ - CIT_WRITE, - /** truncate, utime system calls */ - CIT_SETATTR, - /** get data version */ - CIT_DATA_VERSION, - /** - * page fault handling - */ - CIT_FAULT, - /** - * fsync system call handling - * To write out a range of file - */ - CIT_FSYNC, - /** - * Miscellaneous io. This is used for occasional io activity that - * doesn't fit into other types. Currently this is used for: - * - * - cancellation of an extent lock. This io exists as a context - * to write dirty pages from under the lock being canceled back - * to the server; - * - * - VM induced page write-out. An io context for writing page out - * for memory cleansing; - * - * - glimpse. An io context to acquire glimpse lock. - * - * - grouplock. An io context to acquire group lock. - * - * CIT_MISC io is used simply as a context in which locks and pages - * are manipulated. Such io has no internal "process", that is, - * cl_io_loop() is never called for it. - */ - CIT_MISC, - CIT_OP_NR -}; - -/** - * States of cl_io state machine - */ -enum cl_io_state { - /** Not initialized. */ - CIS_ZERO, - /** Initialized. */ - CIS_INIT, - /** IO iteration started. */ - CIS_IT_STARTED, - /** Locks taken. */ - CIS_LOCKED, - /** Actual IO is in progress. */ - CIS_IO_GOING, - /** IO for the current iteration finished. */ - CIS_IO_FINISHED, - /** Locks released. */ - CIS_UNLOCKED, - /** Iteration completed. */ - CIS_IT_ENDED, - /** cl_io finalized. */ - CIS_FINI -}; - -/** - * IO state private for a layer. - * - * This is usually embedded into layer session data, rather than allocated - * dynamically. - * - * \see vvp_io, lov_io, osc_io - */ -struct cl_io_slice { - struct cl_io *cis_io; - /** corresponding object slice. Immutable after creation. */ - struct cl_object *cis_obj; - /** io operations. Immutable after creation. */ - const struct cl_io_operations *cis_iop; - /** - * linkage into a list of all slices for a given cl_io, hanging off - * cl_io::ci_layers. Immutable after creation. - */ - struct list_head cis_linkage; -}; - -typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *, - struct cl_page *); - -struct cl_read_ahead { - /* - * Maximum page index the readahead window will end. - * This is determined DLM lock coverage, RPC and stripe boundary. - * cra_end is included. - */ - pgoff_t cra_end; - /* optimal RPC size for this read, by pages */ - unsigned long cra_rpc_size; - /* - * Release callback. If readahead holds resources underneath, this - * function should be called to release it. - */ - void (*cra_release)(const struct lu_env *env, void *cbdata); - /* Callback data for cra_release routine */ - void *cra_cbdata; -}; - -static inline void cl_read_ahead_release(const struct lu_env *env, - struct cl_read_ahead *ra) -{ - if (ra->cra_release) - ra->cra_release(env, ra->cra_cbdata); - memset(ra, 0, sizeof(*ra)); -} - -/** - * Per-layer io operations. - * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops - */ -struct cl_io_operations { - /** - * Vector of io state transition methods for every io type. - * - * \see cl_page_operations::io - */ - struct { - /** - * Prepare io iteration at a given layer. - * - * Called top-to-bottom at the beginning of each iteration of - * "io loop" (if it makes sense for this type of io). Here - * layer selects what work it will do during this iteration. - * - * \see cl_io_operations::cio_iter_fini() - */ - int (*cio_iter_init)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Finalize io iteration. - * - * Called bottom-to-top at the end of each iteration of "io - * loop". Here layers can decide whether IO has to be - * continued. - * - * \see cl_io_operations::cio_iter_init() - */ - void (*cio_iter_fini)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Collect locks for the current iteration of io. - * - * Called top-to-bottom to collect all locks necessary for - * this iteration. This methods shouldn't actually enqueue - * anything, instead it should post a lock through - * cl_io_lock_add(). Once all locks are collected, they are - * sorted and enqueued in the proper order. - */ - int (*cio_lock)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Finalize unlocking. - * - * Called bottom-to-top to finish layer specific unlocking - * functionality, after generic code released all locks - * acquired by cl_io_operations::cio_lock(). - */ - void (*cio_unlock)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Start io iteration. - * - * Once all locks are acquired, called top-to-bottom to - * commence actual IO. In the current implementation, - * top-level vvp_io_{read,write}_start() does all the work - * synchronously by calling generic_file_*(), so other layers - * are called when everything is done. - */ - int (*cio_start)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Called top-to-bottom at the end of io loop. Here layer - * might wait for an unfinished asynchronous io. - */ - void (*cio_end)(const struct lu_env *env, - const struct cl_io_slice *slice); - /** - * Called bottom-to-top to notify layers that read/write IO - * iteration finished, with \a nob bytes transferred. - */ - void (*cio_advance)(const struct lu_env *env, - const struct cl_io_slice *slice, - size_t nob); - /** - * Called once per io, bottom-to-top to release io resources. - */ - void (*cio_fini)(const struct lu_env *env, - const struct cl_io_slice *slice); - } op[CIT_OP_NR]; - - /** - * Submit pages from \a queue->c2_qin for IO, and move - * successfully submitted pages into \a queue->c2_qout. Return - * non-zero if failed to submit even the single page. If - * submission failed after some pages were moved into \a - * queue->c2_qout, completion callback with non-zero ioret is - * executed on them. - */ - int (*cio_submit)(const struct lu_env *env, - const struct cl_io_slice *slice, - enum cl_req_type crt, - struct cl_2queue *queue); - /** - * Queue async page for write. - * The difference between cio_submit and cio_queue is that - * cio_submit is for urgent request. - */ - int (*cio_commit_async)(const struct lu_env *env, - const struct cl_io_slice *slice, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb); - /** - * Decide maximum read ahead extent - * - * \pre io->ci_type == CIT_READ - */ - int (*cio_read_ahead)(const struct lu_env *env, - const struct cl_io_slice *slice, - pgoff_t start, struct cl_read_ahead *ra); - /** - * Optional debugging helper. Print given io slice. - */ - int (*cio_print)(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_io_slice *slice); -}; - -/** - * Flags to lock enqueue procedure. - * \ingroup cl_lock - */ -enum cl_enq_flags { - /** - * instruct server to not block, if conflicting lock is found. Instead - * -EWOULDBLOCK is returned immediately. - */ - CEF_NONBLOCK = 0x00000001, - /** - * take lock asynchronously (out of order), as it cannot - * deadlock. This is for LDLM_FL_HAS_INTENT locks used for glimpsing. - */ - CEF_ASYNC = 0x00000002, - /** - * tell the server to instruct (though a flag in the blocking ast) an - * owner of the conflicting lock, that it can drop dirty pages - * protected by this lock, without sending them to the server. - */ - CEF_DISCARD_DATA = 0x00000004, - /** - * tell the sub layers that it must be a `real' lock. This is used for - * mmapped-buffer locks and glimpse locks that must be never converted - * into lockless mode. - * - * \see vvp_mmap_locks(), cl_glimpse_lock(). - */ - CEF_MUST = 0x00000008, - /** - * tell the sub layers that never request a `real' lock. This flag is - * not used currently. - * - * cl_io::ci_lockreq and CEF_{MUST,NEVER} flags specify lockless - * conversion policy: ci_lockreq describes generic information of lock - * requirement for this IO, especially for locks which belong to the - * object doing IO; however, lock itself may have precise requirements - * that are described by the enqueue flags. - */ - CEF_NEVER = 0x00000010, - /** - * for async glimpse lock. - */ - CEF_AGL = 0x00000020, - /** - * enqueue a lock to test DLM lock existence. - */ - CEF_PEEK = 0x00000040, - /** - * Lock match only. Used by group lock in I/O as group lock - * is known to exist. - */ - CEF_LOCK_MATCH = BIT(7), - /** - * mask of enq_flags. - */ - CEF_MASK = 0x000000ff, -}; - -/** - * Link between lock and io. Intermediate structure is needed, because the - * same lock can be part of multiple io's simultaneously. - */ -struct cl_io_lock_link { - /** linkage into one of cl_lockset lists. */ - struct list_head cill_linkage; - struct cl_lock cill_lock; - /** optional destructor */ - void (*cill_fini)(const struct lu_env *env, - struct cl_io_lock_link *link); -}; -#define cill_descr cill_lock.cll_descr - -/** - * Lock-set represents a collection of locks, that io needs at a - * time. Generally speaking, client tries to avoid holding multiple locks when - * possible, because - * - * - holding extent locks over multiple ost's introduces the danger of - * "cascading timeouts"; - * - * - holding multiple locks over the same ost is still dead-lock prone, - * see comment in osc_lock_enqueue(), - * - * but there are certain situations where this is unavoidable: - * - * - O_APPEND writes have to take [0, EOF] lock for correctness; - * - * - truncate has to take [new-size, EOF] lock for correctness; - * - * - SNS has to take locks across full stripe for correctness; - * - * - in the case when user level buffer, supplied to {read,write}(file0), - * is a part of a memory mapped lustre file, client has to take a dlm - * locks on file0, and all files that back up the buffer (or a part of - * the buffer, that is being processed in the current chunk, in any - * case, there are situations where at least 2 locks are necessary). - * - * In such cases we at least try to take locks in the same consistent - * order. To this end, all locks are first collected, then sorted, and then - * enqueued. - */ -struct cl_lockset { - /** locks to be acquired. */ - struct list_head cls_todo; - /** locks acquired. */ - struct list_head cls_done; -}; - -/** - * Lock requirements(demand) for IO. It should be cl_io_lock_req, - * but 'req' is always to be thought as 'request' :-) - */ -enum cl_io_lock_dmd { - /** Always lock data (e.g., O_APPEND). */ - CILR_MANDATORY = 0, - /** Layers are free to decide between local and global locking. */ - CILR_MAYBE, - /** Never lock: there is no cache (e.g., lockless IO). */ - CILR_NEVER -}; - -enum cl_fsync_mode { - /** start writeback, do not wait for them to finish */ - CL_FSYNC_NONE = 0, - /** start writeback and wait for them to finish */ - CL_FSYNC_LOCAL = 1, - /** discard all of dirty pages in a specific file range */ - CL_FSYNC_DISCARD = 2, - /** start writeback and make sure they have reached storage before - * return. OST_SYNC RPC must be issued and finished - */ - CL_FSYNC_ALL = 3 -}; - -struct cl_io_rw_common { - loff_t crw_pos; - size_t crw_count; - int crw_nonblock; -}; - -/** - * State for io. - * - * cl_io is shared by all threads participating in this IO (in current - * implementation only one thread advances IO, but parallel IO design and - * concurrent copy_*_user() require multiple threads acting on the same IO. It - * is up to these threads to serialize their activities, including updates to - * mutable cl_io fields. - */ -struct cl_io { - /** type of this IO. Immutable after creation. */ - enum cl_io_type ci_type; - /** current state of cl_io state machine. */ - enum cl_io_state ci_state; - /** main object this io is against. Immutable after creation. */ - struct cl_object *ci_obj; - /** - * Upper layer io, of which this io is a part of. Immutable after - * creation. - */ - struct cl_io *ci_parent; - /** List of slices. Immutable after creation. */ - struct list_head ci_layers; - /** list of locks (to be) acquired by this io. */ - struct cl_lockset ci_lockset; - /** lock requirements, this is just a help info for sublayers. */ - enum cl_io_lock_dmd ci_lockreq; - union { - struct cl_rd_io { - struct cl_io_rw_common rd; - } ci_rd; - struct cl_wr_io { - struct cl_io_rw_common wr; - int wr_append; - int wr_sync; - } ci_wr; - struct cl_io_rw_common ci_rw; - struct cl_setattr_io { - struct ost_lvb sa_attr; - unsigned int sa_attr_flags; - unsigned int sa_valid; - int sa_stripe_index; - const struct lu_fid *sa_parent_fid; - } ci_setattr; - struct cl_data_version_io { - u64 dv_data_version; - int dv_flags; - } ci_data_version; - struct cl_fault_io { - /** page index within file. */ - pgoff_t ft_index; - /** bytes valid byte on a faulted page. */ - size_t ft_nob; - /** writable page? for nopage() only */ - int ft_writable; - /** page of an executable? */ - int ft_executable; - /** page_mkwrite() */ - int ft_mkwrite; - /** resulting page */ - struct cl_page *ft_page; - } ci_fault; - struct cl_fsync_io { - loff_t fi_start; - loff_t fi_end; - /** file system level fid */ - struct lu_fid *fi_fid; - enum cl_fsync_mode fi_mode; - /* how many pages were written/discarded */ - unsigned int fi_nr_written; - } ci_fsync; - } u; - struct cl_2queue ci_queue; - size_t ci_nob; - int ci_result; - unsigned int ci_continue:1, - /** - * This io has held grouplock, to inform sublayers that - * don't do lockless i/o. - */ - ci_no_srvlock:1, - /** - * The whole IO need to be restarted because layout has been changed - */ - ci_need_restart:1, - /** - * to not refresh layout - the IO issuer knows that the layout won't - * change(page operations, layout change causes all page to be - * discarded), or it doesn't matter if it changes(sync). - */ - ci_ignore_layout:1, - /** - * Check if layout changed after the IO finishes. Mainly for HSM - * requirement. If IO occurs to openning files, it doesn't need to - * verify layout because HSM won't release openning files. - * Right now, only two operations need to verify layout: glimpse - * and setattr. - */ - ci_verify_layout:1, - /** - * file is released, restore has to be triggered by vvp layer - */ - ci_restore_needed:1, - /** - * O_NOATIME - */ - ci_noatime:1; - /** - * Number of pages owned by this IO. For invariant checking. - */ - unsigned int ci_owned_nr; -}; - -/** @} cl_io */ - -/** - * Per-transfer attributes. - */ -struct cl_req_attr { - enum cl_req_type cra_type; - u64 cra_flags; - struct cl_page *cra_page; - - /** Generic attributes for the server consumption. */ - struct obdo *cra_oa; - /** Jobid */ - char cra_jobid[LUSTRE_JOBID_SIZE]; -}; - -enum cache_stats_item { - /** how many cache lookups were performed */ - CS_lookup = 0, - /** how many times cache lookup resulted in a hit */ - CS_hit, - /** how many entities are in the cache right now */ - CS_total, - /** how many entities in the cache are actively used (and cannot be - * evicted) right now - */ - CS_busy, - /** how many entities were created at all */ - CS_create, - CS_NR -}; - -#define CS_NAMES { "lookup", "hit", "total", "busy", "create" } - -/** - * Stats for a generic cache (similar to inode, lu_object, etc. caches). - */ -struct cache_stats { - const char *cs_name; - atomic_t cs_stats[CS_NR]; -}; - -/** These are not exported so far */ -void cache_stats_init(struct cache_stats *cs, const char *name); - -/** - * Client-side site. This represents particular client stack. "Global" - * variables should (directly or indirectly) be added here to allow multiple - * clients to co-exist in the single address space. - */ -struct cl_site { - struct lu_site cs_lu; - /** - * Statistical counters. Atomics do not scale, something better like - * per-cpu counters is needed. - * - * These are exported as /sys/kernel/debug/lustre/llite/.../site - * - * When interpreting keep in mind that both sub-locks (and sub-pages) - * and top-locks (and top-pages) are accounted here. - */ - struct cache_stats cs_pages; - atomic_t cs_pages_state[CPS_NR]; -}; - -int cl_site_init(struct cl_site *s, struct cl_device *top); -void cl_site_fini(struct cl_site *s); -void cl_stack_fini(const struct lu_env *env, struct cl_device *cl); - -/** - * Output client site statistical counters into a buffer. Suitable for - * ll_rd_*()-style functions. - */ -int cl_site_stats_print(const struct cl_site *site, struct seq_file *m); - -/** - * \name helpers - * - * Type conversion and accessory functions. - */ -/** @{ */ - -static inline struct cl_site *lu2cl_site(const struct lu_site *site) -{ - return container_of(site, struct cl_site, cs_lu); -} - -static inline int lu_device_is_cl(const struct lu_device *d) -{ - return d->ld_type->ldt_tags & LU_DEVICE_CL; -} - -static inline struct cl_device *lu2cl_dev(const struct lu_device *d) -{ - LASSERT(!d || IS_ERR(d) || lu_device_is_cl(d)); - return container_of_safe(d, struct cl_device, cd_lu_dev); -} - -static inline struct lu_device *cl2lu_dev(struct cl_device *d) -{ - return &d->cd_lu_dev; -} - -static inline struct cl_object *lu2cl(const struct lu_object *o) -{ - LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->lo_dev)); - return container_of_safe(o, struct cl_object, co_lu); -} - -static inline const struct cl_object_conf * -lu2cl_conf(const struct lu_object_conf *conf) -{ - return container_of_safe(conf, struct cl_object_conf, coc_lu); -} - -static inline struct cl_object *cl_object_next(const struct cl_object *obj) -{ - return obj ? lu2cl(lu_object_next(&obj->co_lu)) : NULL; -} - -static inline struct cl_device *cl_object_device(const struct cl_object *o) -{ - LASSERT(!o || IS_ERR(o) || lu_device_is_cl(o->co_lu.lo_dev)); - return container_of_safe(o->co_lu.lo_dev, struct cl_device, cd_lu_dev); -} - -static inline struct cl_object_header *luh2coh(const struct lu_object_header *h) -{ - return container_of_safe(h, struct cl_object_header, coh_lu); -} - -static inline struct cl_site *cl_object_site(const struct cl_object *obj) -{ - return lu2cl_site(obj->co_lu.lo_dev->ld_site); -} - -static inline -struct cl_object_header *cl_object_header(const struct cl_object *obj) -{ - return luh2coh(obj->co_lu.lo_header); -} - -static inline int cl_device_init(struct cl_device *d, struct lu_device_type *t) -{ - return lu_device_init(&d->cd_lu_dev, t); -} - -static inline void cl_device_fini(struct cl_device *d) -{ - lu_device_fini(&d->cd_lu_dev); -} - -void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, pgoff_t index, - const struct cl_page_operations *ops); -void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, - struct cl_object *obj, - const struct cl_lock_operations *ops); -void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice, - struct cl_object *obj, const struct cl_io_operations *ops); -/** @} helpers */ - -/** \defgroup cl_object cl_object - * @{ - */ -struct cl_object *cl_object_top(struct cl_object *o); -struct cl_object *cl_object_find(const struct lu_env *env, struct cl_device *cd, - const struct lu_fid *fid, - const struct cl_object_conf *c); - -int cl_object_header_init(struct cl_object_header *h); -void cl_object_put(const struct lu_env *env, struct cl_object *o); -void cl_object_get(struct cl_object *o); -void cl_object_attr_lock(struct cl_object *o); -void cl_object_attr_unlock(struct cl_object *o); -int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr); -int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid); -int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, - struct ost_lvb *lvb); -int cl_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf); -int cl_object_prune(const struct lu_env *env, struct cl_object *obj); -void cl_object_kill(const struct lu_env *env, struct cl_object *obj); -int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *lum); -int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, struct fiemap *fiemap, - size_t *buflen); -int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, - struct cl_layout *cl); -loff_t cl_object_maxbytes(struct cl_object *obj); - -/** - * Returns true, iff \a o0 and \a o1 are slices of the same object. - */ -static inline int cl_object_same(struct cl_object *o0, struct cl_object *o1) -{ - return cl_object_header(o0) == cl_object_header(o1); -} - -static inline void cl_object_page_init(struct cl_object *clob, int size) -{ - clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize; - cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size); - WARN_ON(cl_object_header(clob)->coh_page_bufsize > 512); -} - -static inline void *cl_object_page_slice(struct cl_object *clob, - struct cl_page *page) -{ - return (void *)((char *)page + clob->co_slice_off); -} - -/** - * Return refcount of cl_object. - */ -static inline int cl_object_refc(struct cl_object *clob) -{ - struct lu_object_header *header = clob->co_lu.lo_header; - - return atomic_read(&header->loh_ref); -} - -/** @} cl_object */ - -/** \defgroup cl_page cl_page - * @{ - */ -enum { - CLP_GANG_OKAY = 0, - CLP_GANG_RESCHED, - CLP_GANG_AGAIN, - CLP_GANG_ABORT -}; - -/* callback of cl_page_gang_lookup() */ -struct cl_page *cl_page_find(const struct lu_env *env, struct cl_object *obj, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type); -struct cl_page *cl_page_alloc(const struct lu_env *env, - struct cl_object *o, pgoff_t ind, - struct page *vmpage, - enum cl_page_type type); -void cl_page_get(struct cl_page *page); -void cl_page_put(const struct lu_env *env, struct cl_page *page); -void cl_page_print(const struct lu_env *env, void *cookie, lu_printer_t printer, - const struct cl_page *pg); -void cl_page_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_page *pg); -struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj); - -const struct cl_page_slice *cl_page_at(const struct cl_page *page, - const struct lu_device_type *dtype); - -/** - * \name ownership - * - * Functions dealing with the ownership of page by io. - */ -/** @{ */ - -int cl_page_own(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -int cl_page_own_try(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_assume(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_unassume(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg); -void cl_page_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page *page); -void cl_page_disown0(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg); -int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io); - -/** @} ownership */ - -/** - * \name transfer - * - * Functions dealing with the preparation of a page for a transfer, and - * tracking transfer state. - */ -/** @{ */ -int cl_page_prep(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt); -void cl_page_completion(const struct lu_env *env, - struct cl_page *pg, enum cl_req_type crt, int ioret); -int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg, - enum cl_req_type crt); -int cl_page_cache_add(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt); -void cl_page_clip(const struct lu_env *env, struct cl_page *pg, - int from, int to); -int cl_page_cancel(const struct lu_env *env, struct cl_page *page); -int cl_page_flush(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); - -/** @} transfer */ - -/** - * \name helper routines - * Functions to discard, delete and export a cl_page. - */ -/** @{ */ -void cl_page_discard(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg); -void cl_page_delete(const struct lu_env *env, struct cl_page *pg); -int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg); -void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate); -loff_t cl_offset(const struct cl_object *obj, pgoff_t idx); -pgoff_t cl_index(const struct cl_object *obj, loff_t offset); -size_t cl_page_size(const struct cl_object *obj); -int cl_pages_prune(const struct lu_env *env, struct cl_object *obj); - -void cl_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_lock *lock); -void cl_lock_descr_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct cl_lock_descr *descr); -/* @} helper */ - -/** - * Data structure managing a client's cached pages. A count of - * "unstable" pages is maintained, and an LRU of clean pages is - * maintained. "unstable" pages are pages pinned by the ptlrpc - * layer for recovery purposes. - */ -struct cl_client_cache { - /** - * # of client cache refcount - * # of users (OSCs) + 2 (held by llite and lov) - */ - atomic_t ccc_users; - /** - * # of threads are doing shrinking - */ - unsigned int ccc_lru_shrinkers; - /** - * # of LRU entries available - */ - atomic_long_t ccc_lru_left; - /** - * List of entities(OSCs) for this LRU cache - */ - struct list_head ccc_lru; - /** - * Max # of LRU entries - */ - unsigned long ccc_lru_max; - /** - * Lock to protect ccc_lru list - */ - spinlock_t ccc_lru_lock; - /** - * Set if unstable check is enabled - */ - unsigned int ccc_unstable_check:1; - /** - * # of unstable pages for this mount point - */ - atomic_long_t ccc_unstable_nr; - /** - * Waitq for awaiting unstable pages to reach zero. - * Used at umounting time and signaled on BRW commit - */ - wait_queue_head_t ccc_unstable_waitq; - -}; - -/** - * cl_cache functions - */ -struct cl_client_cache *cl_cache_init(unsigned long lru_page_max); -void cl_cache_incref(struct cl_client_cache *cache); -void cl_cache_decref(struct cl_client_cache *cache); - -/** @} cl_page */ - -/** \defgroup cl_lock cl_lock - * @{ - */ - -int cl_lock_request(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock); -int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, - const struct cl_io *io); -void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock); -const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, - const struct lu_device_type *dtype); -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock); -int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock, struct cl_sync_io *anchor); -void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock); - -/** @} cl_lock */ - -/** \defgroup cl_io cl_io - * @{ - */ - -int cl_io_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj); -int cl_io_sub_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj); -int cl_io_rw_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, loff_t pos, size_t count); -int cl_io_loop(const struct lu_env *env, struct cl_io *io); - -void cl_io_fini(const struct lu_env *env, struct cl_io *io); -int cl_io_iter_init(const struct lu_env *env, struct cl_io *io); -void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io); -int cl_io_lock(const struct lu_env *env, struct cl_io *io); -void cl_io_unlock(const struct lu_env *env, struct cl_io *io); -int cl_io_start(const struct lu_env *env, struct cl_io *io); -void cl_io_end(const struct lu_env *env, struct cl_io *io); -int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, - struct cl_io_lock_link *link); -int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, - struct cl_lock_descr *descr); -int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, - enum cl_req_type iot, struct cl_2queue *queue); -int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, - enum cl_req_type iot, struct cl_2queue *queue, - long timeout); -int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb); -int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, - pgoff_t start, struct cl_read_ahead *ra); -int cl_io_is_going(const struct lu_env *env); - -/** - * True, iff \a io is an O_APPEND write(2). - */ -static inline int cl_io_is_append(const struct cl_io *io) -{ - return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_append; -} - -static inline int cl_io_is_sync_write(const struct cl_io *io) -{ - return io->ci_type == CIT_WRITE && io->u.ci_wr.wr_sync; -} - -static inline int cl_io_is_mkwrite(const struct cl_io *io) -{ - return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite; -} - -/** - * True, iff \a io is a truncate(2). - */ -static inline int cl_io_is_trunc(const struct cl_io *io) -{ - return io->ci_type == CIT_SETATTR && - (io->u.ci_setattr.sa_valid & ATTR_SIZE); -} - -struct cl_io *cl_io_top(struct cl_io *io); - -#define CL_IO_SLICE_CLEAN(foo_io, base) \ -do { \ - typeof(foo_io) __foo_io = (foo_io); \ - \ - BUILD_BUG_ON(offsetof(typeof(*__foo_io), base) != 0); \ - memset(&__foo_io->base + 1, 0, \ - sizeof(*__foo_io) - sizeof(__foo_io->base)); \ -} while (0) - -/** @} cl_io */ - -/** \defgroup cl_page_list cl_page_list - * @{ - */ - -/** - * Last page in the page list. - */ -static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist) -{ - LASSERT(plist->pl_nr > 0); - return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch); -} - -static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist) -{ - LASSERT(plist->pl_nr > 0); - return list_entry(plist->pl_pages.next, struct cl_page, cp_batch); -} - -/** - * Iterate over pages in a page list. - */ -#define cl_page_list_for_each(page, list) \ - list_for_each_entry((page), &(list)->pl_pages, cp_batch) - -/** - * Iterate over pages in a page list, taking possible removals into account. - */ -#define cl_page_list_for_each_safe(page, temp, list) \ - list_for_each_entry_safe((page), (temp), &(list)->pl_pages, cp_batch) - -void cl_page_list_init(struct cl_page_list *plist); -void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page); -void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page); -void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page); -void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head); -void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, - struct cl_page *page); -void cl_page_list_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *plist); -void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist); - -void cl_2queue_init(struct cl_2queue *queue); -void cl_2queue_disown(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue); -void cl_2queue_discard(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue); -void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue); -void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page); - -/** @} cl_page_list */ - -void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr); - -/** \defgroup cl_sync_io cl_sync_io - * @{ - */ - -/** - * Anchor for synchronous transfer. This is allocated on a stack by thread - * doing synchronous transfer, and a pointer to this structure is set up in - * every page submitted for transfer. Transfer completion routine updates - * anchor and wakes up waiting thread when transfer is complete. - */ -struct cl_sync_io { - /** number of pages yet to be transferred. */ - atomic_t csi_sync_nr; - /** error code. */ - int csi_sync_rc; - /** barrier of destroy this structure */ - atomic_t csi_barrier; - /** completion to be signaled when transfer is complete. */ - wait_queue_head_t csi_waitq; - /** callback to invoke when this IO is finished */ - void (*csi_end_io)(const struct lu_env *, - struct cl_sync_io *); -}; - -void cl_sync_io_init(struct cl_sync_io *anchor, int nr, - void (*end)(const struct lu_env *, struct cl_sync_io *)); -int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, - long timeout); -void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, - int ioret); -void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor); - -/** @} cl_sync_io */ - -/** \defgroup cl_env cl_env - * - * lu_env handling for a client. - * - * lu_env is an environment within which lustre code executes. Its major part - * is lu_context---a fast memory allocation mechanism that is used to conserve - * precious kernel stack space. Originally lu_env was designed for a server, - * where - * - * - there is a (mostly) fixed number of threads, and - * - * - call chains have no non-lustre portions inserted between lustre code. - * - * On a client both these assumption fails, because every user thread can - * potentially execute lustre code as part of a system call, and lustre calls - * into VFS or MM that call back into lustre. - * - * To deal with that, cl_env wrapper functions implement the following - * optimizations: - * - * - allocation and destruction of environment is amortized by caching no - * longer used environments instead of destroying them; - * - * \see lu_env, lu_context, lu_context_key - * @{ - */ - -struct lu_env *cl_env_get(u16 *refcheck); -struct lu_env *cl_env_alloc(u16 *refcheck, __u32 tags); -void cl_env_put(struct lu_env *env, u16 *refcheck); -unsigned int cl_env_cache_purge(unsigned int nr); -struct lu_env *cl_env_percpu_get(void); -void cl_env_percpu_put(struct lu_env *env); - -/** @} cl_env */ - -/* - * Misc - */ -void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb); - -struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site, - struct lu_device_type *ldt, - struct lu_device *next); -/** @} clio */ - -int cl_global_init(void); -void cl_global_fini(void); - -#endif /* _LINUX_CL_OBJECT_H */ diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h deleted file mode 100644 index 7d119c1a0469..000000000000 --- a/drivers/staging/lustre/lustre/include/interval_tree.h +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/interval_tree.h - * - * Author: Huang Wei - * Author: Jay Xiong - */ - -#ifndef _INTERVAL_H__ -#define _INTERVAL_H__ - -#include -#include -#include - -struct interval_node { - struct interval_node *in_left; - struct interval_node *in_right; - struct interval_node *in_parent; - unsigned in_color:1, - in_intree:1, /** set if the node is in tree */ - in_res1:30; - __u8 in_res2[4]; /** tags, 8-bytes aligned */ - __u64 in_max_high; - struct interval_node_extent { - __u64 start; - __u64 end; - } in_extent; -}; - -enum interval_iter { - INTERVAL_ITER_CONT = 1, - INTERVAL_ITER_STOP = 2 -}; - -static inline int interval_is_intree(struct interval_node *node) -{ - return node->in_intree == 1; -} - -static inline __u64 interval_low(struct interval_node *node) -{ - return node->in_extent.start; -} - -static inline __u64 interval_high(struct interval_node *node) -{ - return node->in_extent.end; -} - -static inline int interval_set(struct interval_node *node, - __u64 start, __u64 end) -{ - if (start > end) - return -ERANGE; - node->in_extent.start = start; - node->in_extent.end = end; - node->in_max_high = end; - return 0; -} - -/* - * Rules to write an interval callback. - * - the callback returns INTERVAL_ITER_STOP when it thinks the iteration - * should be stopped. It will then cause the iteration function to return - * immediately with return value INTERVAL_ITER_STOP. - * - callbacks for interval_iterate and interval_iterate_reverse: Every - * nodes in the tree will be set to @node before the callback being called - * - callback for interval_search: Only overlapped node will be set to @node - * before the callback being called. - */ -typedef enum interval_iter (*interval_callback_t)(struct interval_node *node, - void *args); - -struct interval_node *interval_insert(struct interval_node *node, - struct interval_node **root); -void interval_erase(struct interval_node *node, struct interval_node **root); - -/* - * Search the extents in the tree and call @func for each overlapped - * extents. - */ -enum interval_iter interval_search(struct interval_node *root, - struct interval_node_extent *ex, - interval_callback_t func, void *data); - -enum interval_iter interval_iterate_reverse(struct interval_node *root, - interval_callback_t func, - void *data); - -#endif diff --git a/drivers/staging/lustre/lustre/include/llog_swab.h b/drivers/staging/lustre/lustre/include/llog_swab.h deleted file mode 100644 index 0433b79efdcb..000000000000 --- a/drivers/staging/lustre/lustre/include/llog_swab.h +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each type has corresponding 'lustre_swab_xxxtypexxx()' routines - * are implemented in ptlrpc/pack_generic.c. These 'swabbers' convert the - * type from "other" endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - */ - -#ifndef _LLOG_SWAB_H_ -#define _LLOG_SWAB_H_ - -#include - -struct lustre_cfg; - -void lustre_swab_lu_fid(struct lu_fid *fid); -void lustre_swab_ost_id(struct ost_id *oid); -void lustre_swab_llogd_body(struct llogd_body *d); -void lustre_swab_llog_hdr(struct llog_log_hdr *h); -void lustre_swab_llogd_conn_body(struct llogd_conn_body *d); -void lustre_swab_llog_rec(struct llog_rec_hdr *rec); -void lustre_swab_lu_seq_range(struct lu_seq_range *range); -void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); -void lustre_swab_cfg_marker(struct cfg_marker *marker, - int swab, int size); - -#endif diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h deleted file mode 100644 index 495e6f5f676b..000000000000 --- a/drivers/staging/lustre/lustre/include/lprocfs_status.h +++ /dev/null @@ -1,646 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lprocfs_status.h - * - * Top level header file for LProc SNMP - * - * Author: Hariharan Thantry thantry@users.sourceforge.net - */ -#ifndef _LPROCFS_SNMP_H -#define _LPROCFS_SNMP_H - -#include -#include -#include -#include -#include - -#include -#include - -struct lprocfs_vars { - const char *name; - const struct file_operations *fops; - void *data; - /** - * sysfs file mode. - */ - umode_t proc_mode; -}; - -struct lprocfs_static_vars { - struct lprocfs_vars *obd_vars; - const struct attribute_group *sysfs_vars; -}; - -/* if we find more consumers this could be generalized */ -#define OBD_HIST_MAX 32 -struct obd_histogram { - spinlock_t oh_lock; - unsigned long oh_buckets[OBD_HIST_MAX]; -}; - -enum { - BRW_R_PAGES = 0, - BRW_W_PAGES, - BRW_R_RPC_HIST, - BRW_W_RPC_HIST, - BRW_R_IO_TIME, - BRW_W_IO_TIME, - BRW_R_DISCONT_PAGES, - BRW_W_DISCONT_PAGES, - BRW_R_DISCONT_BLOCKS, - BRW_W_DISCONT_BLOCKS, - BRW_R_DISK_IOSIZE, - BRW_W_DISK_IOSIZE, - BRW_R_DIO_FRAGS, - BRW_W_DIO_FRAGS, - BRW_LAST, -}; - -struct brw_stats { - struct obd_histogram hist[BRW_LAST]; -}; - -enum { - RENAME_SAMEDIR_SIZE = 0, - RENAME_CROSSDIR_SRC_SIZE, - RENAME_CROSSDIR_TGT_SIZE, - RENAME_LAST, -}; - -struct rename_stats { - struct obd_histogram hist[RENAME_LAST]; -}; - -/* An lprocfs counter can be configured using the enum bit masks below. - * - * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already - * protects this counter from concurrent updates. If not specified, - * lprocfs an internal per-counter lock variable. External locks are - * not used to protect counter increments, but are used to protect - * counter readout and resets. - * - * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, - * (i.e. counter can be incremented by more than "1"). When specified, - * the counter maintains min, max and sum in addition to a simple - * invocation count. This allows averages to be computed. - * If not specified, the counter is an increment-by-1 counter. - * min, max, sum, etc. are not maintained. - * - * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of - * squares (for multi-valued counter samples only). This allows - * external computation of standard deviation, but involves a 64-bit - * multiply per counter increment. - */ - -enum { - LPROCFS_CNTR_EXTERNALLOCK = 0x0001, - LPROCFS_CNTR_AVGMINMAX = 0x0002, - LPROCFS_CNTR_STDDEV = 0x0004, - - /* counter data type */ - LPROCFS_TYPE_REGS = 0x0100, - LPROCFS_TYPE_BYTES = 0x0200, - LPROCFS_TYPE_PAGES = 0x0400, - LPROCFS_TYPE_CYCLE = 0x0800, -}; - -#define LC_MIN_INIT ((~(__u64)0) >> 1) - -struct lprocfs_counter_header { - unsigned int lc_config; - const char *lc_name; /* must be static */ - const char *lc_units; /* must be static */ -}; - -struct lprocfs_counter { - __s64 lc_count; - __s64 lc_min; - __s64 lc_max; - __s64 lc_sumsquare; - /* - * Every counter has lc_array_sum[0], while lc_array_sum[1] is only - * for irq context counter, i.e. stats with - * LPROCFS_STATS_FLAG_IRQ_SAFE flag, its counter need - * lc_array_sum[1] - */ - __s64 lc_array_sum[1]; -}; - -#define lc_sum lc_array_sum[0] -#define lc_sum_irq lc_array_sum[1] - -struct lprocfs_percpu { -#ifndef __GNUC__ - __s64 pad; -#endif - struct lprocfs_counter lp_cntr[0]; -}; - -enum lprocfs_stats_lock_ops { - LPROCFS_GET_NUM_CPU = 0x0001, /* number allocated per-CPU stats */ - LPROCFS_GET_SMP_ID = 0x0002, /* current stat to be updated */ -}; - -enum lprocfs_stats_flags { - LPROCFS_STATS_FLAG_NONE = 0x0000, /* per cpu counter */ - LPROCFS_STATS_FLAG_NOPERCPU = 0x0001, /* stats have no percpu - * area and need locking - */ - LPROCFS_STATS_FLAG_IRQ_SAFE = 0x0002, /* alloc need irq safe */ -}; - -enum lprocfs_fields_flags { - LPROCFS_FIELDS_FLAGS_CONFIG = 0x0001, - LPROCFS_FIELDS_FLAGS_SUM = 0x0002, - LPROCFS_FIELDS_FLAGS_MIN = 0x0003, - LPROCFS_FIELDS_FLAGS_MAX = 0x0004, - LPROCFS_FIELDS_FLAGS_AVG = 0x0005, - LPROCFS_FIELDS_FLAGS_SUMSQUARE = 0x0006, - LPROCFS_FIELDS_FLAGS_COUNT = 0x0007, -}; - -struct lprocfs_stats { - /* # of counters */ - unsigned short ls_num; - /* 1 + the biggest cpu # whose ls_percpu slot has been allocated */ - unsigned short ls_biggest_alloc_num; - enum lprocfs_stats_flags ls_flags; - /* Lock used when there are no percpu stats areas; For percpu stats, - * it is used to protect ls_biggest_alloc_num change - */ - spinlock_t ls_lock; - - /* has ls_num of counter headers */ - struct lprocfs_counter_header *ls_cnt_header; - struct lprocfs_percpu *ls_percpu[0]; -}; - -#define OPC_RANGE(seg) (seg ## _LAST_OPC - seg ## _FIRST_OPC) - -/* Pack all opcodes down into a single monotonically increasing index */ -static inline int opcode_offset(__u32 opc) -{ - if (opc < OST_LAST_OPC) { - /* OST opcode */ - return (opc - OST_FIRST_OPC); - } else if (opc < MDS_LAST_OPC) { - /* MDS opcode */ - return (opc - MDS_FIRST_OPC + - OPC_RANGE(OST)); - } else if (opc < LDLM_LAST_OPC) { - /* LDLM Opcode */ - return (opc - LDLM_FIRST_OPC + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < MGS_LAST_OPC) { - /* MGS Opcode */ - return (opc - MGS_FIRST_OPC + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < OBD_LAST_OPC) { - /* OBD Ping */ - return (opc - OBD_FIRST_OPC + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < LLOG_LAST_OPC) { - /* LLOG Opcode */ - return (opc - LLOG_FIRST_OPC + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < QUOTA_LAST_OPC) { - /* LQUOTA Opcode */ - return (opc - QUOTA_FIRST_OPC + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < SEQ_LAST_OPC) { - /* SEQ opcode */ - return (opc - SEQ_FIRST_OPC + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < SEC_LAST_OPC) { - /* SEC opcode */ - return (opc - SEC_FIRST_OPC + - OPC_RANGE(SEQ) + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else if (opc < FLD_LAST_OPC) { - /* FLD opcode */ - return (opc - FLD_FIRST_OPC + - OPC_RANGE(SEC) + - OPC_RANGE(SEQ) + - OPC_RANGE(QUOTA) + - OPC_RANGE(LLOG) + - OPC_RANGE(OBD) + - OPC_RANGE(MGS) + - OPC_RANGE(LDLM) + - OPC_RANGE(MDS) + - OPC_RANGE(OST)); - } else { - /* Unknown Opcode */ - return -1; - } -} - -#define LUSTRE_MAX_OPCODES (OPC_RANGE(OST) + \ - OPC_RANGE(MDS) + \ - OPC_RANGE(LDLM) + \ - OPC_RANGE(MGS) + \ - OPC_RANGE(OBD) + \ - OPC_RANGE(LLOG) + \ - OPC_RANGE(SEC) + \ - OPC_RANGE(SEQ) + \ - OPC_RANGE(SEC) + \ - OPC_RANGE(FLD)) - -#define EXTRA_MAX_OPCODES ((PTLRPC_LAST_CNTR - PTLRPC_FIRST_CNTR) + \ - OPC_RANGE(EXTRA)) - -enum { - PTLRPC_REQWAIT_CNTR = 0, - PTLRPC_REQQDEPTH_CNTR, - PTLRPC_REQACTIVE_CNTR, - PTLRPC_TIMEOUT, - PTLRPC_REQBUF_AVAIL_CNTR, - PTLRPC_LAST_CNTR -}; - -#define PTLRPC_FIRST_CNTR PTLRPC_REQWAIT_CNTR - -enum { - LDLM_GLIMPSE_ENQUEUE = 0, - LDLM_PLAIN_ENQUEUE, - LDLM_EXTENT_ENQUEUE, - LDLM_FLOCK_ENQUEUE, - LDLM_IBITS_ENQUEUE, - MDS_REINT_SETATTR, - MDS_REINT_CREATE, - MDS_REINT_LINK, - MDS_REINT_UNLINK, - MDS_REINT_RENAME, - MDS_REINT_OPEN, - MDS_REINT_SETXATTR, - BRW_READ_BYTES, - BRW_WRITE_BYTES, - EXTRA_LAST_OPC -}; - -#define EXTRA_FIRST_OPC LDLM_GLIMPSE_ENQUEUE -/* class_obd.c */ -extern struct dentry *debugfs_lustre_root; -extern struct kobject *lustre_kobj; - -struct obd_device; -struct obd_histogram; - -/* Days / hours / mins / seconds format */ -struct dhms { - int d, h, m, s; -}; - -static inline void s2dhms(struct dhms *ts, time64_t secs64) -{ - unsigned int secs; - - ts->d = div_u64_rem(secs64, 86400, &secs); - ts->h = secs / 3600; - secs = secs % 3600; - ts->m = secs / 60; - ts->s = secs % 60; -} - -#define DHMS_FMT "%dd%dh%02dm%02ds" -#define DHMS_VARS(x) (x)->d, (x)->h, (x)->m, (x)->s - -#define JOBSTATS_JOBID_VAR_MAX_LEN 20 -#define JOBSTATS_DISABLE "disable" -#define JOBSTATS_PROCNAME_UID "procname_uid" -#define JOBSTATS_NODELOCAL "nodelocal" - -/* obd_config.c */ -void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)); - -int lprocfs_write_frac_helper(const char __user *buffer, - unsigned long count, int *val, int mult); -int lprocfs_read_frac_helper(char *buffer, unsigned long count, - long val, int mult); - -int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, - unsigned int cpuid); -int lprocfs_stats_lock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags); -void lprocfs_stats_unlock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags); - -static inline unsigned int -lprocfs_stats_counter_size(struct lprocfs_stats *stats) -{ - unsigned int percpusize; - - percpusize = offsetof(struct lprocfs_percpu, lp_cntr[stats->ls_num]); - - /* irq safe stats need lc_array_sum[1] */ - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpusize += stats->ls_num * sizeof(__s64); - - if ((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0) - percpusize = L1_CACHE_ALIGN(percpusize); - - return percpusize; -} - -static inline struct lprocfs_counter * -lprocfs_stats_counter_get(struct lprocfs_stats *stats, unsigned int cpuid, - int index) -{ - struct lprocfs_counter *cntr; - - cntr = &stats->ls_percpu[cpuid]->lp_cntr[index]; - - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - cntr = (void *)cntr + index * sizeof(__s64); - - return cntr; -} - -/* Two optimized LPROCFS counter increment functions are provided: - * lprocfs_counter_incr(cntr, value) - optimized for by-one counters - * lprocfs_counter_add(cntr) - use for multi-valued counters - * Counter data layout allows config flag, counter lock and the - * count itself to reside within a single cache line. - */ - -void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount); -void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount); - -#define lprocfs_counter_incr(stats, idx) \ - lprocfs_counter_add(stats, idx, 1) -#define lprocfs_counter_decr(stats, idx) \ - lprocfs_counter_sub(stats, idx, 1) - -__s64 lprocfs_read_helper(struct lprocfs_counter *lc, - struct lprocfs_counter_header *header, - enum lprocfs_stats_flags flags, - enum lprocfs_fields_flags field); -__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, - enum lprocfs_fields_flags field); - -extern struct lprocfs_stats * -lprocfs_alloc_stats(unsigned int num, enum lprocfs_stats_flags flags); -void lprocfs_clear_stats(struct lprocfs_stats *stats); -void lprocfs_free_stats(struct lprocfs_stats **stats); -void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned int conf, const char *name, - const char *units); -struct obd_export; -int lprocfs_exp_cleanup(struct obd_export *exp); -extern const struct file_operations lprocfs_stats_seq_fops; - -/* lprocfs_status.c */ -void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *var, - void *data); - -int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list, - const struct attribute_group *attrs); -int lprocfs_obd_cleanup(struct obd_device *obd); - -/* Generic callbacks */ - -int lprocfs_rd_uint(struct seq_file *m, void *data); -int lprocfs_wr_uint(struct file *file, const char __user *buffer, - unsigned long count, void *data); -int lprocfs_rd_server_uuid(struct seq_file *m, void *data); -int lprocfs_rd_conn_uuid(struct seq_file *m, void *data); -int lprocfs_rd_import(struct seq_file *m, void *data); -int lprocfs_rd_state(struct seq_file *m, void *data); -int lprocfs_rd_connect_flags(struct seq_file *m, void *data); - -struct adaptive_timeout; -int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at); -int lprocfs_rd_timeouts(struct seq_file *m, void *data); -int lprocfs_wr_ping(struct file *file, const char __user *buffer, - size_t count, loff_t *off); -int lprocfs_wr_import(struct file *file, const char __user *buffer, - size_t count, loff_t *off); -int lprocfs_rd_pinger_recov(struct seq_file *m, void *n); -int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, - size_t count, loff_t *off); - -/* Statfs helpers */ - -int lprocfs_write_helper(const char __user *buffer, unsigned long count, - int *val); -int lprocfs_write_u64_helper(const char __user *buffer, - unsigned long count, __u64 *val); -int lprocfs_write_frac_u64_helper(const char __user *buffer, - unsigned long count, - __u64 *val, int mult); -char *lprocfs_find_named_value(const char *buffer, const char *name, - size_t *count); -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_clear(struct obd_histogram *oh); -unsigned long lprocfs_oh_sum(struct obd_histogram *oh); - -void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, - struct lprocfs_counter *cnt); - -int lprocfs_single_release(struct inode *inode, struct file *file); -int lprocfs_seq_release(struct inode *inode, struct file *file); - -/* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only - * proc entries; otherwise, you will define name##_seq_write function also for - * a read-write proc entry, and then call LPROC_SEQ_SEQ instead. Finally, - * call ldebugfs_obd_seq_create(obd, filename, 0444, &name#_fops, data); - */ -#define __LPROC_SEQ_FOPS(name, custom_seq_write) \ -static int name##_single_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, name##_seq_show, inode->i_private); \ -} \ -static const struct file_operations name##_fops = { \ - .owner = THIS_MODULE, \ - .open = name##_single_open, \ - .read = seq_read, \ - .write = custom_seq_write, \ - .llseek = seq_lseek, \ - .release = lprocfs_single_release, \ -} - -#define LPROC_SEQ_FOPS_RO(name) __LPROC_SEQ_FOPS(name, NULL) -#define LPROC_SEQ_FOPS(name) __LPROC_SEQ_FOPS(name, name##_seq_write) - -#define LPROC_SEQ_FOPS_RO_TYPE(name, type) \ - static int name##_##type##_seq_show(struct seq_file *m, void *v)\ - { \ - return lprocfs_rd_##type(m, m->private); \ - } \ - LPROC_SEQ_FOPS_RO(name##_##type) - -#define LPROC_SEQ_FOPS_RW_TYPE(name, type) \ - static int name##_##type##_seq_show(struct seq_file *m, void *v)\ - { \ - return lprocfs_rd_##type(m, m->private); \ - } \ - static ssize_t name##_##type##_seq_write(struct file *file, \ - const char __user *buffer, size_t count, \ - loff_t *off) \ - { \ - struct seq_file *seq = file->private_data; \ - return lprocfs_wr_##type(file, buffer, \ - count, seq->private); \ - } \ - LPROC_SEQ_FOPS(name##_##type) - -#define LPROC_SEQ_FOPS_WR_ONLY(name, type) \ - static ssize_t name##_##type##_write(struct file *file, \ - const char __user *buffer, size_t count, \ - loff_t *off) \ - { \ - return lprocfs_wr_##type(file, buffer, count, off); \ - } \ - static int name##_##type##_open(struct inode *inode, struct file *file) \ - { \ - return single_open(file, NULL, inode->i_private); \ - } \ - static const struct file_operations name##_##type##_fops = { \ - .open = name##_##type##_open, \ - .write = name##_##type##_write, \ - .release = lprocfs_single_release, \ - } - -struct lustre_attr { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, struct attribute *attr, - char *buf); - ssize_t (*store)(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len); -}; - -#define LUSTRE_ATTR(name, mode, show, store) \ -static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store) - -#define LUSTRE_RO_ATTR(name) LUSTRE_ATTR(name, 0444, name##_show, NULL) -#define LUSTRE_RW_ATTR(name) LUSTRE_ATTR(name, 0644, name##_show, name##_store) - -extern const struct sysfs_ops lustre_sysfs_ops; - -struct root_squash_info; -int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name); -int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name); - -/* all quota proc functions */ -int lprocfs_quota_rd_bunit(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_bunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_btune(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_btune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_iunit(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_iunit(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_itune(char *page, char **start, - loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_itune(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_type(char *page, char **start, loff_t off, int count, - int *eof, void *data); -int lprocfs_quota_wr_type(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_switch_seconds(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_switch_seconds(struct file *file, - const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_sync_blk(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_sync_blk(struct file *file, const char *buffer, - unsigned long count, void *data); -int lprocfs_quota_rd_switch_qs(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_switch_qs(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_boundary_factor(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_boundary_factor(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_least_bunit(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_least_bunit(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_least_iunit(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_least_iunit(struct file *file, - const char *buffer, unsigned long count, - void *data); -int lprocfs_quota_rd_qs_factor(char *page, char **start, loff_t off, - int count, int *eof, void *data); -int lprocfs_quota_wr_qs_factor(struct file *file, - const char *buffer, unsigned long count, - void *data); -#endif /* LPROCFS_SNMP_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h deleted file mode 100644 index f29bbca5af65..000000000000 --- a/drivers/staging/lustre/lustre/include/lu_object.h +++ /dev/null @@ -1,1305 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LUSTRE_LU_OBJECT_H -#define __LUSTRE_LU_OBJECT_H - -#include -#include -#include -#include -#include - -struct seq_file; -struct lustre_cfg; -struct lprocfs_stats; - -/** \defgroup lu lu - * lu_* data-types represent server-side entities shared by data and meta-data - * stacks. - * - * Design goals: - * - * -# support for layering. - * - * Server side object is split into layers, one per device in the - * corresponding device stack. Individual layer is represented by struct - * lu_object. Compound layered object --- by struct lu_object_header. Most - * interface functions take lu_object as an argument and operate on the - * whole compound object. This decision was made due to the following - * reasons: - * - * - it's envisaged that lu_object will be used much more often than - * lu_object_header; - * - * - we want lower (non-top) layers to be able to initiate operations - * on the whole object. - * - * Generic code supports layering more complex than simple stacking, e.g., - * it is possible that at some layer object "spawns" multiple sub-objects - * on the lower layer. - * - * -# fid-based identification. - * - * Compound object is uniquely identified by its fid. Objects are indexed - * by their fids (hash table is used for index). - * - * -# caching and life-cycle management. - * - * Object's life-time is controlled by reference counting. When reference - * count drops to 0, object is returned to cache. Cached objects still - * retain their identity (i.e., fid), and can be recovered from cache. - * - * Objects are kept in the global LRU list, and lu_site_purge() function - * can be used to reclaim given number of unused objects from the tail of - * the LRU. - * - * -# avoiding recursion. - * - * Generic code tries to replace recursion through layers by iterations - * where possible. Additionally to the end of reducing stack consumption, - * data, when practically possible, are allocated through lu_context_key - * interface rather than on stack. - * @{ - */ - -struct lu_site; -struct lu_object; -struct lu_device; -struct lu_object_header; -struct lu_context; -struct lu_env; - -/** - * Operations common for data and meta-data devices. - */ -struct lu_device_operations { - /** - * Allocate object for the given device (without lower-layer - * parts). This is called by lu_object_operations::loo_object_init() - * from the parent layer, and should setup at least lu_object::lo_dev - * and lu_object::lo_ops fields of resulting lu_object. - * - * Object creation protocol. - * - * Due to design goal of avoiding recursion, object creation (see - * lu_object_alloc()) is somewhat involved: - * - * - first, lu_device_operations::ldo_object_alloc() method of the - * top-level device in the stack is called. It should allocate top - * level object (including lu_object_header), but without any - * lower-layer sub-object(s). - * - * - then lu_object_alloc() sets fid in the header of newly created - * object. - * - * - then lu_object_operations::loo_object_init() is called. It has - * to allocate lower-layer object(s). To do this, - * lu_object_operations::loo_object_init() calls ldo_object_alloc() - * of the lower-layer device(s). - * - * - for all new objects allocated by - * lu_object_operations::loo_object_init() (and inserted into object - * stack), lu_object_operations::loo_object_init() is called again - * repeatedly, until no new objects are created. - * - * \post ergo(!IS_ERR(result), result->lo_dev == d && - * result->lo_ops != NULL); - */ - struct lu_object *(*ldo_object_alloc)(const struct lu_env *env, - const struct lu_object_header *h, - struct lu_device *d); - /** - * process config specific for device. - */ - int (*ldo_process_config)(const struct lu_env *env, - struct lu_device *, struct lustre_cfg *); - int (*ldo_recovery_complete)(const struct lu_env *, - struct lu_device *); - - /** - * initialize local objects for device. this method called after layer - * has been initialized (after LCFG_SETUP stage) and before it starts - * serving user requests. - */ - - int (*ldo_prepare)(const struct lu_env *, - struct lu_device *parent, - struct lu_device *dev); - -}; - -/** - * For lu_object_conf flags - */ -enum loc_flags { - /* This is a new object to be allocated, or the file - * corresponding to the object does not exists. - */ - LOC_F_NEW = 0x00000001, -}; - -/** - * Object configuration, describing particulars of object being created. On - * server this is not used, as server objects are full identified by fid. On - * client configuration contains struct lustre_md. - */ -struct lu_object_conf { - /** - * Some hints for obj find and alloc. - */ - enum loc_flags loc_flags; -}; - -/** - * Type of "printer" function used by lu_object_operations::loo_object_print() - * method. - * - * Printer function is needed to provide some flexibility in (semi-)debugging - * output: possible implementations: printk, CDEBUG, sysfs/seq_file - */ -typedef int (*lu_printer_t)(const struct lu_env *env, - void *cookie, const char *format, ...) - __printf(3, 4); - -/** - * Operations specific for particular lu_object. - */ -struct lu_object_operations { - /** - * Allocate lower-layer parts of the object by calling - * lu_device_operations::ldo_object_alloc() of the corresponding - * underlying device. - * - * This method is called once for each object inserted into object - * stack. It's responsibility of this method to insert lower-layer - * object(s) it create into appropriate places of object stack. - */ - int (*loo_object_init)(const struct lu_env *env, - struct lu_object *o, - const struct lu_object_conf *conf); - /** - * Called (in top-to-bottom order) during object allocation after all - * layers were allocated and initialized. Can be used to perform - * initialization depending on lower layers. - */ - int (*loo_object_start)(const struct lu_env *env, - struct lu_object *o); - /** - * Called before lu_object_operations::loo_object_free() to signal - * that object is being destroyed. Dual to - * lu_object_operations::loo_object_init(). - */ - void (*loo_object_delete)(const struct lu_env *env, - struct lu_object *o); - /** - * Dual to lu_device_operations::ldo_object_alloc(). Called when - * object is removed from memory. - */ - void (*loo_object_free)(const struct lu_env *env, - struct lu_object *o); - /** - * Called when last active reference to the object is released (and - * object returns to the cache). This method is optional. - */ - void (*loo_object_release)(const struct lu_env *env, - struct lu_object *o); - /** - * Optional debugging helper. Print given object. - */ - int (*loo_object_print)(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o); - /** - * Optional debugging method. Returns true iff method is internally - * consistent. - */ - int (*loo_object_invariant)(const struct lu_object *o); -}; - -/** - * Type of lu_device. - */ -struct lu_device_type; - -/** - * Device: a layer in the server side abstraction stacking. - */ -struct lu_device { - /** - * reference count. This is incremented, in particular, on each object - * created at this layer. - * - * \todo XXX which means that atomic_t is probably too small. - */ - atomic_t ld_ref; - /** - * Pointer to device type. Never modified once set. - */ - struct lu_device_type *ld_type; - /** - * Operation vector for this device. - */ - const struct lu_device_operations *ld_ops; - /** - * Stack this device belongs to. - */ - struct lu_site *ld_site; - - /** \todo XXX: temporary back pointer into obd. */ - struct obd_device *ld_obd; - /** - * A list of references to this object, for debugging. - */ - struct lu_ref ld_reference; - /** - * Link the device to the site. - **/ - struct list_head ld_linkage; -}; - -struct lu_device_type_operations; - -/** - * Tag bits for device type. They are used to distinguish certain groups of - * device types. - */ -enum lu_device_tag { - /** this is meta-data device */ - LU_DEVICE_MD = (1 << 0), - /** this is data device */ - LU_DEVICE_DT = (1 << 1), - /** data device in the client stack */ - LU_DEVICE_CL = (1 << 2) -}; - -/** - * Type of device. - */ -struct lu_device_type { - /** - * Tag bits. Taken from enum lu_device_tag. Never modified once set. - */ - __u32 ldt_tags; - /** - * Name of this class. Unique system-wide. Never modified once set. - */ - char *ldt_name; - /** - * Operations for this type. - */ - const struct lu_device_type_operations *ldt_ops; - /** - * \todo XXX: temporary pointer to associated obd_type. - */ - struct obd_type *ldt_obd_type; - /** - * \todo XXX: temporary: context tags used by obd_*() calls. - */ - __u32 ldt_ctx_tags; - /** - * Number of existing device type instances. - */ - atomic_t ldt_device_nr; - /** - * Linkage into a global list of all device types. - * - * \see lu_device_types. - */ - struct list_head ldt_linkage; -}; - -/** - * Operations on a device type. - */ -struct lu_device_type_operations { - /** - * Allocate new device. - */ - struct lu_device *(*ldto_device_alloc)(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *lcfg); - /** - * Free device. Dual to - * lu_device_type_operations::ldto_device_alloc(). Returns pointer to - * the next device in the stack. - */ - struct lu_device *(*ldto_device_free)(const struct lu_env *, - struct lu_device *); - - /** - * Initialize the devices after allocation - */ - int (*ldto_device_init)(const struct lu_env *env, - struct lu_device *, const char *, - struct lu_device *); - /** - * Finalize device. Dual to - * lu_device_type_operations::ldto_device_init(). Returns pointer to - * the next device in the stack. - */ - struct lu_device *(*ldto_device_fini)(const struct lu_env *env, - struct lu_device *); - /** - * Initialize device type. This is called on module load. - */ - int (*ldto_init)(struct lu_device_type *t); - /** - * Finalize device type. Dual to - * lu_device_type_operations::ldto_init(). Called on module unload. - */ - void (*ldto_fini)(struct lu_device_type *t); - /** - * Called when the first device is created. - */ - void (*ldto_start)(struct lu_device_type *t); - /** - * Called when number of devices drops to 0. - */ - void (*ldto_stop)(struct lu_device_type *t); -}; - -static inline int lu_device_is_md(const struct lu_device *d) -{ - return ergo(d, d->ld_type->ldt_tags & LU_DEVICE_MD); -} - -/** - * Common object attributes. - */ -struct lu_attr { - /** size in bytes */ - __u64 la_size; - /** modification time in seconds since Epoch */ - s64 la_mtime; - /** access time in seconds since Epoch */ - s64 la_atime; - /** change time in seconds since Epoch */ - s64 la_ctime; - /** 512-byte blocks allocated to object */ - __u64 la_blocks; - /** permission bits and file type */ - __u32 la_mode; - /** owner id */ - __u32 la_uid; - /** group id */ - __u32 la_gid; - /** object flags */ - __u32 la_flags; - /** number of persistent references to this object */ - __u32 la_nlink; - /** blk bits of the object*/ - __u32 la_blkbits; - /** blk size of the object*/ - __u32 la_blksize; - /** real device */ - __u32 la_rdev; - /** - * valid bits - * - * \see enum la_valid - */ - __u64 la_valid; -}; - -/** Bit-mask of valid attributes */ -enum la_valid { - LA_ATIME = 1 << 0, - LA_MTIME = 1 << 1, - LA_CTIME = 1 << 2, - LA_SIZE = 1 << 3, - LA_MODE = 1 << 4, - LA_UID = 1 << 5, - LA_GID = 1 << 6, - LA_BLOCKS = 1 << 7, - LA_TYPE = 1 << 8, - LA_FLAGS = 1 << 9, - LA_NLINK = 1 << 10, - LA_RDEV = 1 << 11, - LA_BLKSIZE = 1 << 12, - LA_KILL_SUID = 1 << 13, - LA_KILL_SGID = 1 << 14, -}; - -/** - * Layer in the layered object. - */ -struct lu_object { - /** - * Header for this object. - */ - struct lu_object_header *lo_header; - /** - * Device for this layer. - */ - struct lu_device *lo_dev; - /** - * Operations for this object. - */ - const struct lu_object_operations *lo_ops; - /** - * Linkage into list of all layers. - */ - struct list_head lo_linkage; - /** - * Link to the device, for debugging. - */ - struct lu_ref_link lo_dev_ref; -}; - -enum lu_object_header_flags { - /** - * Don't keep this object in cache. Object will be destroyed as soon - * as last reference to it is released. This flag cannot be cleared - * once set. - */ - LU_OBJECT_HEARD_BANSHEE = 0, - /** - * Mark this object has already been taken out of cache. - */ - LU_OBJECT_UNHASHED = 1, -}; - -enum lu_object_header_attr { - LOHA_EXISTS = 1 << 0, - LOHA_REMOTE = 1 << 1, - /** - * UNIX file type is stored in S_IFMT bits. - */ - LOHA_FT_START = 001 << 12, /**< S_IFIFO */ - LOHA_FT_END = 017 << 12, /**< S_IFMT */ -}; - -/** - * "Compound" object, consisting of multiple layers. - * - * Compound object with given fid is unique with given lu_site. - * - * Note, that object does *not* necessary correspond to the real object in the - * persistent storage: object is an anchor for locking and method calling, so - * it is created for things like not-yet-existing child created by mkdir or - * create calls. lu_object_operations::loo_exists() can be used to check - * whether object is backed by persistent storage entity. - */ -struct lu_object_header { - /** - * Fid, uniquely identifying this object. - */ - struct lu_fid loh_fid; - /** - * Object flags from enum lu_object_header_flags. Set and checked - * atomically. - */ - unsigned long loh_flags; - /** - * Object reference count. Protected by lu_site::ls_guard. - */ - atomic_t loh_ref; - /** - * Common object attributes, cached for efficiency. From enum - * lu_object_header_attr. - */ - __u32 loh_attr; - /** - * Linkage into per-site hash table. Protected by lu_site::ls_guard. - */ - struct hlist_node loh_hash; - /** - * Linkage into per-site LRU list. Protected by lu_site::ls_guard. - */ - struct list_head loh_lru; - /** - * Linkage into list of layers. Never modified once set (except lately - * during object destruction). No locking is necessary. - */ - struct list_head loh_layers; - /** - * A list of references to this object, for debugging. - */ - struct lu_ref loh_reference; -}; - -struct fld; -struct lu_site_bkt_data; - -enum { - LU_SS_CREATED = 0, - LU_SS_CACHE_HIT, - LU_SS_CACHE_MISS, - LU_SS_CACHE_RACE, - LU_SS_CACHE_DEATH_RACE, - LU_SS_LRU_PURGED, - LU_SS_LAST_STAT -}; - -/** - * lu_site is a "compartment" within which objects are unique, and LRU - * discipline is maintained. - * - * lu_site exists so that multiple layered stacks can co-exist in the same - * address space. - * - * lu_site has the same relation to lu_device as lu_object_header to - * lu_object. - */ -struct lu_site { - /** - * objects hash table - */ - struct cfs_hash *ls_obj_hash; - /** - * index of bucket on hash table while purging - */ - unsigned int ls_purge_start; - /** - * Top-level device for this stack. - */ - struct lu_device *ls_top_dev; - /** - * Bottom-level device for this stack - */ - struct lu_device *ls_bottom_dev; - /** - * Linkage into global list of sites. - */ - struct list_head ls_linkage; - /** - * List for lu device for this site, protected - * by ls_ld_lock. - **/ - struct list_head ls_ld_linkage; - spinlock_t ls_ld_lock; - - /** - * Lock to serialize site purge. - */ - struct mutex ls_purge_mutex; - - /** - * lu_site stats - */ - struct lprocfs_stats *ls_stats; - /** - * XXX: a hack! fld has to find md_site via site, remove when possible - */ - struct seq_server_site *ld_seq_site; - /** - * Number of objects in lsb_lru_lists - used for shrinking - */ - struct percpu_counter ls_lru_len_counter; -}; - -wait_queue_head_t * -lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid); - -static inline struct seq_server_site *lu_site2seq(const struct lu_site *s) -{ - return s->ld_seq_site; -} - -/** \name ctors - * Constructors/destructors. - * @{ - */ - -int lu_site_init(struct lu_site *s, struct lu_device *d); -void lu_site_fini(struct lu_site *s); -int lu_site_init_finish(struct lu_site *s); -void lu_stack_fini(const struct lu_env *env, struct lu_device *top); -void lu_device_get(struct lu_device *d); -void lu_device_put(struct lu_device *d); -int lu_device_init(struct lu_device *d, struct lu_device_type *t); -void lu_device_fini(struct lu_device *d); -int lu_object_header_init(struct lu_object_header *h); -void lu_object_header_fini(struct lu_object_header *h); -int lu_object_init(struct lu_object *o, - struct lu_object_header *h, struct lu_device *d); -void lu_object_fini(struct lu_object *o); -void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); -void lu_object_add(struct lu_object *before, struct lu_object *o); - -/** - * Helpers to initialize and finalize device types. - */ - -int lu_device_type_init(struct lu_device_type *ldt); -void lu_device_type_fini(struct lu_device_type *ldt); - -/** @} ctors */ - -/** \name caching - * Caching and reference counting. - * @{ - */ - -/** - * Acquire additional reference to the given object. This function is used to - * attain additional reference. To acquire initial reference use - * lu_object_find(). - */ -static inline void lu_object_get(struct lu_object *o) -{ - LASSERT(atomic_read(&o->lo_header->loh_ref) > 0); - atomic_inc(&o->lo_header->loh_ref); -} - -/** - * Return true of object will not be cached after last reference to it is - * released. - */ -static inline int lu_object_is_dying(const struct lu_object_header *h) -{ - return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); -} - -void lu_object_put(const struct lu_env *env, struct lu_object *o); -void lu_object_unhash(const struct lu_env *env, struct lu_object *o); -int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, int nr, - bool canblock); - -static inline int lu_site_purge(const struct lu_env *env, struct lu_site *s, - int nr) -{ - return lu_site_purge_objects(env, s, nr, true); -} - -void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, - lu_printer_t printer); -struct lu_object *lu_object_find_at(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf); -struct lu_object *lu_object_find_slice(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf); -/** @} caching */ - -/** \name helpers - * Helpers. - * @{ - */ - -/** - * First (topmost) sub-object of given compound object - */ -static inline struct lu_object *lu_object_top(struct lu_object_header *h) -{ - LASSERT(!list_empty(&h->loh_layers)); - return list_first_entry(&h->loh_layers, struct lu_object, lo_linkage); -} - -/** - * Next sub-object in the layering - */ -static inline const struct lu_object *lu_object_next(const struct lu_object *o) -{ - return list_next_entry(o, lo_linkage); -} - -/** - * Pointer to the fid of this object. - */ -static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) -{ - return &o->lo_header->loh_fid; -} - -/** - * return device operations vector for this object - */ -static inline const struct lu_device_operations * -lu_object_ops(const struct lu_object *o) -{ - return o->lo_dev->ld_ops; -} - -/** - * Given a compound object, find its slice, corresponding to the device type - * \a dtype. - */ -struct lu_object *lu_object_locate(struct lu_object_header *h, - const struct lu_device_type *dtype); - -/** - * Printer function emitting messages through libcfs_debug_msg(). - */ -int lu_cdebug_printer(const struct lu_env *env, - void *cookie, const char *format, ...); - -/** - * Print object description followed by a user-supplied message. - */ -#define LU_OBJECT_DEBUG(mask, env, object, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - lu_object_print(env, &msgdata, lu_cdebug_printer, object);\ - CDEBUG(mask, format "\n", ## __VA_ARGS__); \ - } \ -} while (0) - -/** - * Print short object description followed by a user-supplied message. - */ -#define LU_OBJECT_HEADER(mask, env, object, format, ...) \ -do { \ - if (cfs_cdebug_show(mask, DEBUG_SUBSYSTEM)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, NULL); \ - lu_object_header_print(env, &msgdata, lu_cdebug_printer,\ - (object)->lo_header); \ - lu_cdebug_printer(env, &msgdata, "\n"); \ - CDEBUG(mask, format, ## __VA_ARGS__); \ - } \ -} while (0) - -void lu_object_print (const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct lu_object *o); -void lu_object_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct lu_object_header *hdr); - -/** - * Check object consistency. - */ -int lu_object_invariant(const struct lu_object *o); - -/** - * Check whether object exists, no matter on local or remote storage. - * Note: LOHA_EXISTS will be set once some one created the object, - * and it does not needs to be committed to storage. - */ -#define lu_object_exists(o) ((o)->lo_header->loh_attr & LOHA_EXISTS) - -/** - * Check whether object on the remote storage. - */ -#define lu_object_remote(o) unlikely((o)->lo_header->loh_attr & LOHA_REMOTE) - -static inline int lu_object_assert_exists(const struct lu_object *o) -{ - return lu_object_exists(o); -} - -static inline int lu_object_assert_not_exists(const struct lu_object *o) -{ - return !lu_object_exists(o); -} - -/** - * Attr of this object. - */ -static inline __u32 lu_object_attr(const struct lu_object *o) -{ - LASSERT(lu_object_exists(o) != 0); - return o->lo_header->loh_attr; -} - -static inline void lu_object_ref_add(struct lu_object *o, - const char *scope, - const void *source) -{ - lu_ref_add(&o->lo_header->loh_reference, scope, source); -} - -static inline void lu_object_ref_add_at(struct lu_object *o, - struct lu_ref_link *link, - const char *scope, - const void *source) -{ - lu_ref_add_at(&o->lo_header->loh_reference, link, scope, source); -} - -static inline void lu_object_ref_del(struct lu_object *o, - const char *scope, const void *source) -{ - lu_ref_del(&o->lo_header->loh_reference, scope, source); -} - -static inline void lu_object_ref_del_at(struct lu_object *o, - struct lu_ref_link *link, - const char *scope, const void *source) -{ - lu_ref_del_at(&o->lo_header->loh_reference, link, scope, source); -} - -/** input params, should be filled out by mdt */ -struct lu_rdpg { - /** hash */ - __u64 rp_hash; - /** count in bytes */ - unsigned int rp_count; - /** number of pages */ - unsigned int rp_npages; - /** requested attr */ - __u32 rp_attrs; - /** pointers to pages */ - struct page **rp_pages; -}; - -enum lu_xattr_flags { - LU_XATTR_REPLACE = (1 << 0), - LU_XATTR_CREATE = (1 << 1) -}; - -/** @} helpers */ - -/** \name lu_context - * @{ - */ - -/** For lu_context health-checks */ -enum lu_context_state { - LCS_INITIALIZED = 1, - LCS_ENTERED, - LCS_LEFT, - LCS_FINALIZED -}; - -/** - * lu_context. Execution context for lu_object methods. Currently associated - * with thread. - * - * All lu_object methods, except device and device type methods (called during - * system initialization and shutdown) are executed "within" some - * lu_context. This means, that pointer to some "current" lu_context is passed - * as an argument to all methods. - * - * All service ptlrpc threads create lu_context as part of their - * initialization. It is possible to create "stand-alone" context for other - * execution environments (like system calls). - * - * lu_object methods mainly use lu_context through lu_context_key interface - * that allows each layer to associate arbitrary pieces of data with each - * context (see pthread_key_create(3) for similar interface). - * - * On a client, lu_context is bound to a thread, see cl_env_get(). - * - * \see lu_context_key - */ -struct lu_context { - /** - * lu_context is used on the client side too. Yet we don't want to - * allocate values of server-side keys for the client contexts and - * vice versa. - * - * To achieve this, set of tags in introduced. Contexts and keys are - * marked with tags. Key value are created only for context whose set - * of tags has non-empty intersection with one for key. Tags are taken - * from enum lu_context_tag. - */ - __u32 lc_tags; - enum lu_context_state lc_state; - /** - * Pointer to the home service thread. NULL for other execution - * contexts. - */ - struct ptlrpc_thread *lc_thread; - /** - * Pointer to an array with key values. Internal implementation - * detail. - */ - void **lc_value; - /** - * Linkage into a list of all remembered contexts. Only - * `non-transient' contexts, i.e., ones created for service threads - * are placed here. - */ - struct list_head lc_remember; - /** - * Version counter used to skip calls to lu_context_refill() when no - * keys were registered. - */ - unsigned int lc_version; - /** - * Debugging cookie. - */ - unsigned int lc_cookie; -}; - -/** - * lu_context_key interface. Similar to pthread_key. - */ - -enum lu_context_tag { - /** - * Thread on md server - */ - LCT_MD_THREAD = 1 << 0, - /** - * Thread on dt server - */ - LCT_DT_THREAD = 1 << 1, - /** - * Context for transaction handle - */ - LCT_TX_HANDLE = 1 << 2, - /** - * Thread on client - */ - LCT_CL_THREAD = 1 << 3, - /** - * A per-request session on a server, and a per-system-call session on - * a client. - */ - LCT_SESSION = 1 << 4, - /** - * A per-request data on OSP device - */ - LCT_OSP_THREAD = 1 << 5, - /** - * MGS device thread - */ - LCT_MG_THREAD = 1 << 6, - /** - * Context for local operations - */ - LCT_LOCAL = 1 << 7, - /** - * session for server thread - **/ - LCT_SERVER_SESSION = BIT(8), - /** - * Set when at least one of keys, having values in this context has - * non-NULL lu_context_key::lct_exit() method. This is used to - * optimize lu_context_exit() call. - */ - LCT_HAS_EXIT = 1 << 28, - /** - * Don't add references for modules creating key values in that context. - * This is only for contexts used internally by lu_object framework. - */ - LCT_NOREF = 1 << 29, - /** - * Key is being prepared for retiring, don't create new values for it. - */ - LCT_QUIESCENT = 1 << 30, - /** - * Context should be remembered. - */ - LCT_REMEMBER = 1 << 31, - /** - * Contexts usable in cache shrinker thread. - */ - LCT_SHRINKER = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD | - LCT_NOREF -}; - -/** - * Key. Represents per-context value slot. - * - * Keys are usually registered when module owning the key is initialized, and - * de-registered when module is unloaded. Once key is registered, all new - * contexts with matching tags, will get key value. "Old" contexts, already - * initialized at the time of key registration, can be forced to get key value - * by calling lu_context_refill(). - * - * Every key value is counted in lu_context_key::lct_used and acquires a - * reference on an owning module. This means, that all key values have to be - * destroyed before module can be unloaded. This is usually achieved by - * stopping threads started by the module, that created contexts in their - * entry functions. Situation is complicated by the threads shared by multiple - * modules, like ptlrpcd daemon on a client. To work around this problem, - * contexts, created in such threads, are `remembered' (see - * LCT_REMEMBER)---i.e., added into a global list. When module is preparing - * for unloading it does the following: - * - * - marks its keys as `quiescent' (lu_context_tag::LCT_QUIESCENT) - * preventing new key values from being allocated in the new contexts, - * and - * - * - scans a list of remembered contexts, destroying values of module - * keys, thus releasing references to the module. - * - * This is done by lu_context_key_quiesce(). If module is re-activated - * before key has been de-registered, lu_context_key_revive() call clears - * `quiescent' marker. - * - * lu_context code doesn't provide any internal synchronization for these - * activities---it's assumed that startup (including threads start-up) and - * shutdown are serialized by some external means. - * - * \see lu_context - */ -struct lu_context_key { - /** - * Set of tags for which values of this key are to be instantiated. - */ - __u32 lct_tags; - /** - * Value constructor. This is called when new value is created for a - * context. Returns pointer to new value of error pointer. - */ - void *(*lct_init)(const struct lu_context *ctx, - struct lu_context_key *key); - /** - * Value destructor. Called when context with previously allocated - * value of this slot is destroyed. \a data is a value that was returned - * by a matching call to lu_context_key::lct_init(). - */ - void (*lct_fini)(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - /** - * Optional method called on lu_context_exit() for all allocated - * keys. Can be used by debugging code checking that locks are - * released, etc. - */ - void (*lct_exit)(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - /** - * Internal implementation detail: index within lu_context::lc_value[] - * reserved for this key. - */ - int lct_index; - /** - * Internal implementation detail: number of values created for this - * key. - */ - atomic_t lct_used; - /** - * Internal implementation detail: module for this key. - */ - struct module *lct_owner; - /** - * References to this key. For debugging. - */ - struct lu_ref lct_reference; -}; - -#define LU_KEY_INIT(mod, type) \ - static void *mod##_key_init(const struct lu_context *ctx, \ - struct lu_context_key *key) \ - { \ - type *value; \ - \ - BUILD_BUG_ON(sizeof(*value) > PAGE_SIZE); \ - \ - value = kzalloc(sizeof(*value), GFP_NOFS); \ - if (!value) \ - value = ERR_PTR(-ENOMEM); \ - \ - return value; \ - } \ - struct __##mod##__dummy_init {; } /* semicolon catcher */ - -#define LU_KEY_FINI(mod, type) \ - static void mod##_key_fini(const struct lu_context *ctx, \ - struct lu_context_key *key, void *data) \ - { \ - type *info = data; \ - \ - kfree(info); \ - } \ - struct __##mod##__dummy_fini {; } /* semicolon catcher */ - -#define LU_KEY_INIT_FINI(mod, type) \ - LU_KEY_INIT(mod, type); \ - LU_KEY_FINI(mod, type) - -#define LU_CONTEXT_KEY_DEFINE(mod, tags) \ - struct lu_context_key mod##_thread_key = { \ - .lct_tags = tags, \ - .lct_init = mod##_key_init, \ - .lct_fini = mod##_key_fini \ - } - -#define LU_CONTEXT_KEY_INIT(key) \ -do { \ - (key)->lct_owner = THIS_MODULE; \ -} while (0) - -int lu_context_key_register(struct lu_context_key *key); -void lu_context_key_degister(struct lu_context_key *key); -void *lu_context_key_get(const struct lu_context *ctx, - const struct lu_context_key *key); -void lu_context_key_quiesce(struct lu_context_key *key); -void lu_context_key_revive(struct lu_context_key *key); - -/* - * LU_KEY_INIT_GENERIC() has to be a macro to correctly determine an - * owning module. - */ - -#define LU_KEY_INIT_GENERIC(mod) \ - static void mod##_key_init_generic(struct lu_context_key *k, ...) \ - { \ - struct lu_context_key *key = k; \ - va_list args; \ - \ - va_start(args, k); \ - do { \ - LU_CONTEXT_KEY_INIT(key); \ - key = va_arg(args, struct lu_context_key *); \ - } while (key); \ - va_end(args); \ - } - -#define LU_TYPE_INIT(mod, ...) \ - LU_KEY_INIT_GENERIC(mod) \ - static int mod##_type_init(struct lu_device_type *t) \ - { \ - mod##_key_init_generic(__VA_ARGS__, NULL); \ - return lu_context_key_register_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_init {; } - -#define LU_TYPE_FINI(mod, ...) \ - static void mod##_type_fini(struct lu_device_type *t) \ - { \ - lu_context_key_degister_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_fini {; } - -#define LU_TYPE_START(mod, ...) \ - static void mod##_type_start(struct lu_device_type *t) \ - { \ - lu_context_key_revive_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_start {; } - -#define LU_TYPE_STOP(mod, ...) \ - static void mod##_type_stop(struct lu_device_type *t) \ - { \ - lu_context_key_quiesce_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_stop {; } - -#define LU_TYPE_INIT_FINI(mod, ...) \ - LU_TYPE_INIT(mod, __VA_ARGS__); \ - LU_TYPE_FINI(mod, __VA_ARGS__); \ - LU_TYPE_START(mod, __VA_ARGS__); \ - LU_TYPE_STOP(mod, __VA_ARGS__) - -int lu_context_init(struct lu_context *ctx, __u32 tags); -void lu_context_fini(struct lu_context *ctx); -void lu_context_enter(struct lu_context *ctx); -void lu_context_exit(struct lu_context *ctx); -int lu_context_refill(struct lu_context *ctx); - -/* - * Helper functions to operate on multiple keys. These are used by the default - * device type operations, defined by LU_TYPE_INIT_FINI(). - */ - -int lu_context_key_register_many(struct lu_context_key *k, ...); -void lu_context_key_degister_many(struct lu_context_key *k, ...); -void lu_context_key_revive_many(struct lu_context_key *k, ...); -void lu_context_key_quiesce_many(struct lu_context_key *k, ...); - -/** - * Environment. - */ -struct lu_env { - /** - * "Local" context, used to store data instead of stack. - */ - struct lu_context le_ctx; - /** - * "Session" context for per-request data. - */ - struct lu_context *le_ses; -}; - -int lu_env_init(struct lu_env *env, __u32 tags); -void lu_env_fini(struct lu_env *env); -int lu_env_refill(struct lu_env *env); - -/** @} lu_context */ - -/** - * Output site statistical counters into a buffer. Suitable for - * ll_rd_*()-style functions. - */ -int lu_site_stats_print(const struct lu_site *s, struct seq_file *m); - -/** - * Common name structure to be passed around for various name related methods. - */ -struct lu_name { - const char *ln_name; - int ln_namelen; -}; - -/** - * Validate names (path components) - * - * To be valid \a name must be non-empty, '\0' terminated of length \a - * name_len, and not contain '/'. The maximum length of a name (before - * say -ENAMETOOLONG will be returned) is really controlled by llite - * and the server. We only check for something insane coming from bad - * integer handling here. - */ -static inline bool lu_name_is_valid_2(const char *name, size_t name_len) -{ - return name && name_len > 0 && name_len < INT_MAX && - name[name_len] == '\0' && strlen(name) == name_len && - !memchr(name, '/', name_len); -} - -/** - * Common buffer structure to be passed around for various xattr_{s,g}et() - * methods. - */ -struct lu_buf { - void *lb_buf; - size_t lb_len; -}; - -/** - * One-time initializers, called at obdclass module initialization, not - * exported. - */ - -/** - * Initialization of global lu_* data. - */ -int lu_global_init(void); - -/** - * Dual to lu_global_init(). - */ -void lu_global_fini(void); - -struct lu_kmem_descr { - struct kmem_cache **ckd_cache; - const char *ckd_name; - const size_t ckd_size; -}; - -int lu_kmem_init(struct lu_kmem_descr *caches); -void lu_kmem_fini(struct lu_kmem_descr *caches); - -extern __u32 lu_context_tags_default; -extern __u32 lu_session_tags_default; - -/** @} lu */ -#endif /* __LUSTRE_LU_OBJECT_H */ diff --git a/drivers/staging/lustre/lustre/include/lu_ref.h b/drivers/staging/lustre/lustre/include/lu_ref.h deleted file mode 100644 index ad0c24d29ffa..000000000000 --- a/drivers/staging/lustre/lustre/include/lu_ref.h +++ /dev/null @@ -1,178 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - * - * Author: Nikita Danilov - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __LUSTRE_LU_REF_H -#define __LUSTRE_LU_REF_H - -#include - -/** \defgroup lu_ref lu_ref - * - * An interface to track references between objects. Mostly for debugging. - * - * Suppose there is a reference counted data-structure struct foo. To track - * who acquired references to instance of struct foo, add lu_ref field to it: - * - * \code - * struct foo { - * atomic_t foo_refcount; - * struct lu_ref foo_reference; - * ... - * }; - * \endcode - * - * foo::foo_reference has to be initialized by calling - * lu_ref_init(). Typically there will be functions or macros to increment and - * decrement foo::foo_refcount, let's say they are foo_get(struct foo *foo) - * and foo_put(struct foo *foo), respectively. - * - * Whenever foo_get() is called to acquire a reference on a foo, lu_ref_add() - * has to be called to insert into foo::foo_reference a record, describing - * acquired reference. Dually, lu_ref_del() removes matching record. Typical - * usages are: - * - * \code - * struct bar *bar; - * - * // bar owns a reference to foo. - * bar->bar_foo = foo_get(foo); - * lu_ref_add(&foo->foo_reference, "bar", bar); - * - * ... - * - * // reference from bar to foo is released. - * lu_ref_del(&foo->foo_reference, "bar", bar); - * foo_put(bar->bar_foo); - * - * - * // current thread acquired a temporary reference to foo. - * foo_get(foo); - * lu_ref_add(&foo->reference, __func__, current); - * - * ... - * - * // temporary reference is released. - * lu_ref_del(&foo->reference, __func__, current); - * foo_put(foo); - * \endcode - * - * \e Et \e cetera. Often it makes sense to include lu_ref_add() and - * lu_ref_del() calls into foo_get() and foo_put(). When an instance of struct - * foo is destroyed, lu_ref_fini() has to be called that checks that no - * pending references remain. lu_ref_print() can be used to dump a list of - * pending references, while hunting down a leak. - * - * For objects to which a large number of references can be acquired, - * lu_ref_del() can become cpu consuming, as it has to scan the list of - * references. To work around this, remember result of lu_ref_add() (usually - * in the same place where pointer to struct foo is stored), and use - * lu_ref_del_at(): - * - * \code - * // There is a large number of bar's for a single foo. - * bar->bar_foo = foo_get(foo); - * bar->bar_foo_ref = lu_ref_add(&foo->foo_reference, "bar", bar); - * - * ... - * - * // reference from bar to foo is released. - * lu_ref_del_at(&foo->foo_reference, bar->bar_foo_ref, "bar", bar); - * foo_put(bar->bar_foo); - * \endcode - * - * lu_ref interface degrades gracefully in case of memory shortages. - * - * @{ - */ - -/* - * dummy data structures/functions to pass compile for now. - * We need to reimplement them with kref. - */ -struct lu_ref {}; -struct lu_ref_link {}; - -static inline void lu_ref_init(struct lu_ref *ref) -{ -} - -static inline void lu_ref_fini(struct lu_ref *ref) -{ -} - -static inline struct lu_ref_link *lu_ref_add(struct lu_ref *ref, - const char *scope, - const void *source) -{ - return NULL; -} - -static inline struct lu_ref_link *lu_ref_add_atomic(struct lu_ref *ref, - const char *scope, - const void *source) -{ - return NULL; -} - -static inline void lu_ref_add_at(struct lu_ref *ref, - struct lu_ref_link *link, - const char *scope, - const void *source) -{ -} - -static inline void lu_ref_del(struct lu_ref *ref, const char *scope, - const void *source) -{ -} - -static inline void lu_ref_set_at(struct lu_ref *ref, struct lu_ref_link *link, - const char *scope, const void *source0, - const void *source1) -{ -} - -static inline void lu_ref_del_at(struct lu_ref *ref, struct lu_ref_link *link, - const char *scope, const void *source) -{ -} - -static inline int lu_ref_global_init(void) -{ - return 0; -} - -static inline void lu_ref_global_fini(void) -{ -} - -static inline void lu_ref_print(const struct lu_ref *ref) -{ -} - -static inline void lu_ref_print_all(void) -{ -} - -/** @} lu */ - -#endif /* __LUSTRE_LU_REF_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_acl.h b/drivers/staging/lustre/lustre/include/lustre_acl.h deleted file mode 100644 index e7575a172b5f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_acl.h +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_acl.h - */ - -#ifndef _LUSTRE_ACL_H -#define _LUSTRE_ACL_H - -#include -#include -#ifdef CONFIG_FS_POSIX_ACL -#include - -#define LUSTRE_POSIX_ACL_MAX_ENTRIES 32 -#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD \ - (sizeof(struct posix_acl_xattr_header) + \ - LUSTRE_POSIX_ACL_MAX_ENTRIES * sizeof(struct posix_acl_xattr_entry)) - -#else /* ! CONFIG_FS_POSIX_ACL */ -#define LUSTRE_POSIX_ACL_MAX_SIZE_OLD 0 -#endif /* CONFIG_FS_POSIX_ACL */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h deleted file mode 100644 index 3c6db0d632dc..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_compat.h +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_COMPAT_H -#define _LUSTRE_COMPAT_H - -#include -#include -#include -#include - -#include - -/* - * set ATTR_BLOCKS to a high value to avoid any risk of collision with other - * ATTR_* attributes (see bug 13828) - */ -#define ATTR_BLOCKS (1 << 27) - -#define current_ngroups current_cred()->group_info->ngroups -#define current_groups current_cred()->group_info->small_block - -/* - * OBD need working random driver, thus all our - * initialization routines must be called after device - * driver initialization - */ -#ifndef MODULE -#undef module_init -#define module_init(a) late_initcall(a) -#endif - -#define LTIME_S(time) (time.tv_sec) - -#ifndef QUOTA_OK -# define QUOTA_OK 0 -#endif -#ifndef NO_QUOTA -# define NO_QUOTA (-EDQUOT) -#endif - -#if !defined(_ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_) && !defined(ext2_set_bit) -# define ext2_set_bit __test_and_set_bit_le -# define ext2_clear_bit __test_and_clear_bit_le -# define ext2_test_bit test_bit_le -# define ext2_find_first_zero_bit find_first_zero_bit_le -# define ext2_find_next_zero_bit find_next_zero_bit_le -#endif - -#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) - -#endif /* _LUSTRE_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_debug.h b/drivers/staging/lustre/lustre/include/lustre_debug.h deleted file mode 100644 index 721a81f923e3..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_debug.h +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_DEBUG_H -#define _LUSTRE_DEBUG_H - -/** \defgroup debug debug - * - * @{ - */ - -#include -#include - -/* lib/debug.c */ -int dump_req(struct ptlrpc_request *req); -int block_debug_setup(void *addr, int len, __u64 off, __u64 id); -int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); - -/** @} debug */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_disk.h b/drivers/staging/lustre/lustre/include/lustre_disk.h deleted file mode 100644 index 886e817644d6..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_disk.h +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_disk.h - * - * Lustre disk format definitions. - * - * Author: Nathan Rutman - */ - -#ifndef _LUSTRE_DISK_H -#define _LUSTRE_DISK_H - -/** \defgroup disk disk - * - * @{ - */ - -#include -#include -#include - -/****************** persistent mount data *********************/ - -#define LDD_F_SV_TYPE_MDT 0x0001 -#define LDD_F_SV_TYPE_OST 0x0002 -#define LDD_F_SV_TYPE_MGS 0x0004 -#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \ - LDD_F_SV_TYPE_OST | \ - LDD_F_SV_TYPE_MGS) -#define LDD_F_SV_ALL 0x0008 - -/****************** mount command *********************/ - -/* The lmd is only used internally by Lustre; mount simply passes - * everything as string options - */ - -#define LMD_MAGIC 0xbdacbd03 -#define LMD_PARAMS_MAXLEN 4096 - -/* gleaned from the mount command - no persistent info here */ -struct lustre_mount_data { - __u32 lmd_magic; - __u32 lmd_flags; /* lustre mount flags */ - int lmd_mgs_failnodes; /* mgs failover node count */ - int lmd_exclude_count; - int lmd_recovery_time_soft; - int lmd_recovery_time_hard; - char *lmd_dev; /* device name */ - char *lmd_profile; /* client only */ - char *lmd_mgssec; /* sptlrpc flavor to mgs */ - char *lmd_opts; /* lustre mount options (as opposed to - * _device_ mount options) - */ - char *lmd_params; /* lustre params */ - __u32 *lmd_exclude; /* array of OSTs to ignore */ - char *lmd_mgs; /* MGS nid */ - char *lmd_osd_type; /* OSD type */ -}; - -#define LMD_FLG_SERVER 0x0001 /* Mounting a server */ -#define LMD_FLG_CLIENT 0x0002 /* Mounting a client */ -#define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */ -#define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers, - * no other services - */ -#define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, - * reusing existing MGS services - */ -#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */ -#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */ -#define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */ -#define LMD_FLG_MGS 0x0200 /* Also start MGS along with server */ -#define LMD_FLG_IAM 0x0400 /* IAM dir */ -#define LMD_FLG_NO_PRIMNODE 0x0800 /* all nodes are service nodes */ -#define LMD_FLG_VIRGIN 0x1000 /* the service registers first time */ -#define LMD_FLG_UPDATE 0x2000 /* update parameters */ -#define LMD_FLG_HSM 0x4000 /* Start coordinator */ - -#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) - -/****************** superblock additional info *********************/ - -struct ll_sb_info; - -struct lustre_sb_info { - int lsi_flags; - struct obd_device *lsi_mgc; /* mgc obd */ - struct lustre_mount_data *lsi_lmd; /* mount command info */ - struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ - struct dt_device *lsi_dt_dev; /* dt device to access disk fs*/ - atomic_t lsi_mounts; /* references to the srv_mnt */ - char lsi_svname[MTI_NAME_MAXLEN]; - char lsi_osd_obdname[64]; - char lsi_osd_uuid[64]; - struct obd_export *lsi_osd_exp; - char lsi_osd_type[16]; - char lsi_fstype[16]; -}; - -#define LSI_UMOUNT_FAILOVER 0x00200000 - -#define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info)) -#define s2lsi_nocast(sb) ((sb)->s_fs_info) - -#define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile) - -/****************** prototypes *********************/ - -/* obd_mount.c */ - -int lustre_start_mgc(struct super_block *sb); -void lustre_register_super_ops(struct module *mod, - int (*cfs)(struct super_block *sb), - void (*ksc)(struct super_block *sb)); -int lustre_common_put_super(struct super_block *sb); - -int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type); - -/** @} disk */ - -#endif /* _LUSTRE_DISK_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h deleted file mode 100644 index 2c55241258cc..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_dlm.h +++ /dev/null @@ -1,1346 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** \defgroup LDLM Lustre Distributed Lock Manager - * - * Lustre DLM is based on VAX DLM. - * Its two main roles are: - * - To provide locking assuring consistency of data on all Lustre nodes. - * - To allow clients to cache state protected by a lock by holding the - * lock until a conflicting lock is requested or it is expired by the LRU. - * - * @{ - */ - -#ifndef _LUSTRE_DLM_H__ -#define _LUSTRE_DLM_H__ - -#include -#include -#include -#include -#include /* for interval_node{}, ldlm_extent */ -#include - -#include "lustre_dlm_flags.h" - -struct obd_ops; -struct obd_device; - -#define OBD_LDLM_DEVICENAME "ldlm" - -#define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) -#define LDLM_DEFAULT_MAX_ALIVE (65 * 60 * HZ) /* 65 min */ -#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 - -/** - * LDLM non-error return states - */ -enum ldlm_error { - ELDLM_OK = 0, - ELDLM_LOCK_MATCHED = 1, - - ELDLM_LOCK_CHANGED = 300, - ELDLM_LOCK_ABORTED = 301, - ELDLM_LOCK_REPLACED = 302, - ELDLM_NO_LOCK_DATA = 303, - ELDLM_LOCK_WOULDBLOCK = 304, - - ELDLM_NAMESPACE_EXISTS = 400, - ELDLM_BAD_NAMESPACE = 401 -}; - -/** - * LDLM namespace type. - * The "client" type is actually an indication that this is a narrow local view - * into complete namespace on the server. Such namespaces cannot make any - * decisions about lack of conflicts or do any autonomous lock granting without - * first speaking to a server. - */ -enum ldlm_side { - LDLM_NAMESPACE_SERVER = 1 << 0, - LDLM_NAMESPACE_CLIENT = 1 << 1 -}; - -/** - * The blocking callback is overloaded to perform two functions. These flags - * indicate which operation should be performed. - */ -#define LDLM_CB_BLOCKING 1 -#define LDLM_CB_CANCELING 2 - -/** - * \name Lock Compatibility Matrix. - * - * A lock has both a type (extent, flock, inode bits, or plain) and a mode. - * Lock types are described in their respective implementation files: - * ldlm_{extent,flock,inodebits,plain}.c. - * - * There are six lock modes along with a compatibility matrix to indicate if - * two locks are compatible. - * - * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock - * on the parent. - * - PW: Protective Write (normal write) mode. When a client requests a write - * lock from an OST, a lock with PW mode will be issued. - * - PR: Protective Read (normal read) mode. When a client requests a read from - * an OST, a lock with PR mode will be issued. Also, if the client opens a - * file for execution, it is granted a lock with PR mode. - * - CW: Concurrent Write mode. The type of lock that the MDS grants if a client - * requests a write lock during a file open operation. - * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants - * an inodebit lock with the CR mode on the intermediate path component. - * - NL Null mode. - * - *
- *       NL  CR  CW  PR  PW  EX
- *  NL    1   1   1   1   1   1
- *  CR    1   1   1   1   1   0
- *  CW    1   1   1   0   0   0
- *  PR    1   1   0   1   0   0
- *  PW    1   1   0   0   0   0
- *  EX    1   0   0   0   0   0
- * 
- */ -/** @{ */ -#define LCK_COMPAT_EX LCK_NL -#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR) -#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR) -#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW) -#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW) -#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX | LCK_GROUP) -#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) -#define LCK_COMPAT_COS (LCK_COS) -/** @} Lock Compatibility Matrix */ - -extern enum ldlm_mode lck_compat_array[]; - -static inline void lockmode_verify(enum ldlm_mode mode) -{ - LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); -} - -static inline int lockmode_compat(enum ldlm_mode exist_mode, - enum ldlm_mode new_mode) -{ - return (lck_compat_array[exist_mode] & new_mode); -} - -/* - * - * cluster name spaces - * - */ - -#define DLM_OST_NAMESPACE 1 -#define DLM_MDS_NAMESPACE 2 - -/* XXX - - do we just separate this by security domains and use a prefix for - multiple namespaces in the same domain? - - -*/ - -/** - * Locking rules for LDLM: - * - * lr_lock - * - * lr_lock - * waiting_locks_spinlock - * - * lr_lock - * led_lock - * - * lr_lock - * ns_lock - * - * lr_lvb_mutex - * lr_lock - * - */ - -struct ldlm_pool; -struct ldlm_lock; -struct ldlm_resource; -struct ldlm_namespace; - -/** - * Operations on LDLM pools. - * LDLM pool is a pool of locks in the namespace without any implicitly - * specified limits. - * Locks in the pool are organized in LRU. - * Local memory pressure or server instructions (e.g. mempressure on server) - * can trigger freeing of locks from the pool - */ -struct ldlm_pool_ops { - /** Recalculate pool \a pl usage */ - int (*po_recalc)(struct ldlm_pool *pl); - /** Cancel at least \a nr locks from pool \a pl */ - int (*po_shrink)(struct ldlm_pool *pl, int nr, - gfp_t gfp_mask); -}; - -/** One second for pools thread check interval. Each pool has own period. */ -#define LDLM_POOLS_THREAD_PERIOD (1) - -/** ~6% margin for modest pools. See ldlm_pool.c for details. */ -#define LDLM_POOLS_MODEST_MARGIN_SHIFT (4) - -/** Default recalc period for server side pools in sec. */ -#define LDLM_POOL_SRV_DEF_RECALC_PERIOD (1) - -/** Default recalc period for client side pools in sec. */ -#define LDLM_POOL_CLI_DEF_RECALC_PERIOD (10) - -/** - * LDLM pool structure to track granted locks. - * For purposes of determining when to release locks on e.g. memory pressure. - * This feature is commonly referred to as lru_resize. - */ -struct ldlm_pool { - /** Pool debugfs directory. */ - struct dentry *pl_debugfs_entry; - /** Pool name, must be long enough to hold compound proc entry name. */ - char pl_name[100]; - /** Lock for protecting SLV/CLV updates. */ - spinlock_t pl_lock; - /** Number of allowed locks in in pool, both, client and server side. */ - atomic_t pl_limit; - /** Number of granted locks in */ - atomic_t pl_granted; - /** Grant rate per T. */ - atomic_t pl_grant_rate; - /** Cancel rate per T. */ - atomic_t pl_cancel_rate; - /** Server lock volume (SLV). Protected by pl_lock. */ - __u64 pl_server_lock_volume; - /** Current biggest client lock volume. Protected by pl_lock. */ - __u64 pl_client_lock_volume; - /** Lock volume factor. SLV on client is calculated as following: - * server_slv * lock_volume_factor. - */ - atomic_t pl_lock_volume_factor; - /** Time when last SLV from server was obtained. */ - time64_t pl_recalc_time; - /** Recalculation period for pool. */ - time64_t pl_recalc_period; - /** Recalculation and shrink operations. */ - const struct ldlm_pool_ops *pl_ops; - /** Number of planned locks for next period. */ - int pl_grant_plan; - /** Pool statistics. */ - struct lprocfs_stats *pl_stats; - - /* sysfs object */ - struct kobject pl_kobj; - struct completion pl_kobj_unregister; -}; - -typedef int (*ldlm_cancel_cbt)(struct ldlm_lock *lock); - -/** - * LVB operations. - * LVB is Lock Value Block. This is a special opaque (to LDLM) value that could - * be associated with an LDLM lock and transferred from client to server and - * back. - * - * Currently LVBs are used by: - * - OSC-OST code to maintain current object size/times - * - layout lock code to return the layout when the layout lock is granted - */ -struct ldlm_valblock_ops { - int (*lvbo_init)(struct ldlm_resource *res); - int (*lvbo_update)(struct ldlm_resource *res, - struct ptlrpc_request *r, - int increase); - int (*lvbo_free)(struct ldlm_resource *res); - /* Return size of lvb data appropriate RPC size can be reserved */ - int (*lvbo_size)(struct ldlm_lock *lock); - /* Called to fill in lvb data to RPC buffer @buf */ - int (*lvbo_fill)(struct ldlm_lock *lock, void *buf, int buflen); -}; - -/** - * LDLM pools related, type of lock pool in the namespace. - * Greedy means release cached locks aggressively - */ -enum ldlm_appetite { - LDLM_NAMESPACE_GREEDY = 1 << 0, - LDLM_NAMESPACE_MODEST = 1 << 1 -}; - -struct ldlm_ns_bucket { - /** back pointer to namespace */ - struct ldlm_namespace *nsb_namespace; - /** - * Estimated lock callback time. Used by adaptive timeout code to - * avoid spurious client evictions due to unresponsiveness when in - * fact the network or overall system load is at fault - */ - struct adaptive_timeout nsb_at_estimate; -}; - -enum { - /** LDLM namespace lock stats */ - LDLM_NSS_LOCKS = 0, - LDLM_NSS_LAST -}; - -enum ldlm_ns_type { - /** invalid type */ - LDLM_NS_TYPE_UNKNOWN = 0, - /** mdc namespace */ - LDLM_NS_TYPE_MDC, - /** mds namespace */ - LDLM_NS_TYPE_MDT, - /** osc namespace */ - LDLM_NS_TYPE_OSC, - /** ost namespace */ - LDLM_NS_TYPE_OST, - /** mgc namespace */ - LDLM_NS_TYPE_MGC, - /** mgs namespace */ - LDLM_NS_TYPE_MGT, -}; - -/** - * LDLM Namespace. - * - * Namespace serves to contain locks related to a particular service. - * There are two kinds of namespaces: - * - Server namespace has knowledge of all locks and is therefore authoritative - * to make decisions like what locks could be granted and what conflicts - * exist during new lock enqueue. - * - Client namespace only has limited knowledge about locks in the namespace, - * only seeing locks held by the client. - * - * Every Lustre service has one server namespace present on the server serving - * that service. Every client connected to the service has a client namespace - * for it. - * Every lock obtained by client in that namespace is actually represented by - * two in-memory locks. One on the server and one on the client. The locks are - * linked by a special cookie by which one node can tell to the other which lock - * it actually means during communications. Such locks are called remote locks. - * The locks held by server only without any reference to a client are called - * local locks. - */ -struct ldlm_namespace { - /** Backward link to OBD, required for LDLM pool to store new SLV. */ - struct obd_device *ns_obd; - - /** Flag indicating if namespace is on client instead of server */ - enum ldlm_side ns_client; - - /** name of this namespace */ - char *ns_name; - - /** Resource hash table for namespace. */ - struct cfs_hash *ns_rs_hash; - - /** serialize */ - spinlock_t ns_lock; - - /** big refcount (by bucket) */ - atomic_t ns_bref; - - /** - * Namespace connect flags supported by server (may be changed via - * sysfs, LRU resize may be disabled/enabled). - */ - __u64 ns_connect_flags; - - /** Client side original connect flags supported by server. */ - __u64 ns_orig_connect_flags; - - /* namespace debugfs dir entry */ - struct dentry *ns_debugfs_entry; - - /** - * Position in global namespace list linking all namespaces on - * the node. - */ - struct list_head ns_list_chain; - - /** - * List of unused locks for this namespace. This list is also called - * LRU lock list. - * Unused locks are locks with zero reader/writer reference counts. - * This list is only used on clients for lock caching purposes. - * When we want to release some locks voluntarily or if server wants - * us to release some locks due to e.g. memory pressure, we take locks - * to release from the head of this list. - * Locks are linked via l_lru field in \see struct ldlm_lock. - */ - struct list_head ns_unused_list; - /** Number of locks in the LRU list above */ - int ns_nr_unused; - - /** - * Maximum number of locks permitted in the LRU. If 0, means locks - * are managed by pools and there is no preset limit, rather it is all - * controlled by available memory on this client and on server. - */ - unsigned int ns_max_unused; - /** Maximum allowed age (last used time) for locks in the LRU */ - unsigned int ns_max_age; - - /** - * Used to rate-limit ldlm_namespace_dump calls. - * \see ldlm_namespace_dump. Increased by 10 seconds every time - * it is called. - */ - unsigned long ns_next_dump; - - /** - * LVB operations for this namespace. - * \see struct ldlm_valblock_ops - */ - struct ldlm_valblock_ops *ns_lvbo; - - /** - * Used by filter code to store pointer to OBD of the service. - * Should be dropped in favor of \a ns_obd - */ - void *ns_lvbp; - - /** - * Wait queue used by __ldlm_namespace_free. Gets woken up every time - * a resource is removed. - */ - wait_queue_head_t ns_waitq; - /** LDLM pool structure for this namespace */ - struct ldlm_pool ns_pool; - /** Definition of how eagerly unused locks will be released from LRU */ - enum ldlm_appetite ns_appetite; - - /** Limit of parallel AST RPC count. */ - unsigned ns_max_parallel_ast; - - /** - * Callback to check if a lock is good to be canceled by ELC or - * during recovery. - */ - ldlm_cancel_cbt ns_cancel; - - /** LDLM lock stats */ - struct lprocfs_stats *ns_stats; - - /** - * Flag to indicate namespace is being freed. Used to determine if - * recalculation of LDLM pool statistics should be skipped. - */ - unsigned ns_stopping:1; - - struct kobject ns_kobj; /* sysfs object */ - struct completion ns_kobj_unregister; -}; - -/** - * Returns 1 if namespace \a ns supports early lock cancel (ELC). - */ -static inline int ns_connect_cancelset(struct ldlm_namespace *ns) -{ - return !!(ns->ns_connect_flags & OBD_CONNECT_CANCELSET); -} - -/** - * Returns 1 if this namespace supports lru_resize. - */ -static inline int ns_connect_lru_resize(struct ldlm_namespace *ns) -{ - return !!(ns->ns_connect_flags & OBD_CONNECT_LRU_RESIZE); -} - -static inline void ns_register_cancel(struct ldlm_namespace *ns, - ldlm_cancel_cbt arg) -{ - ns->ns_cancel = arg; -} - -struct ldlm_lock; - -/** Type for blocking callback function of a lock. */ -typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, void *data, - int flag); -/** Type for completion callback function of a lock. */ -typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, __u64 flags, - void *data); -/** Type for glimpse callback function of a lock. */ -typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); - -/** Work list for sending GL ASTs to multiple locks. */ -struct ldlm_glimpse_work { - struct ldlm_lock *gl_lock; /* lock to glimpse */ - struct list_head gl_list; /* linkage to other gl work structs */ - __u32 gl_flags;/* see LDLM_GL_WORK_* below */ - union ldlm_gl_desc *gl_desc; /* glimpse descriptor to be packed in - * glimpse callback request - */ -}; - -/** The ldlm_glimpse_work is allocated on the stack and should not be freed. */ -#define LDLM_GL_WORK_NOFREE 0x1 - -/** Interval node data for each LDLM_EXTENT lock. */ -struct ldlm_interval { - struct interval_node li_node; /* node for tree management */ - struct list_head li_group; /* the locks which have the same - * policy - group of the policy - */ -}; - -#define to_ldlm_interval(n) container_of(n, struct ldlm_interval, li_node) - -/** - * Interval tree for extent locks. - * The interval tree must be accessed under the resource lock. - * Interval trees are used for granted extent locks to speed up conflicts - * lookup. See ldlm/interval_tree.c for more details. - */ -struct ldlm_interval_tree { - /** Tree size. */ - int lit_size; - enum ldlm_mode lit_mode; /* lock mode */ - struct interval_node *lit_root; /* actual ldlm_interval */ -}; - -/** Whether to track references to exports by LDLM locks. */ -#define LUSTRE_TRACKS_LOCK_EXP_REFS (0) - -/** Cancel flags. */ -enum ldlm_cancel_flags { - LCF_ASYNC = 0x1, /* Cancel locks asynchronously. */ - LCF_LOCAL = 0x2, /* Cancel locks locally, not notifing server */ - LCF_BL_AST = 0x4, /* Cancel locks marked as LDLM_FL_BL_AST - * in the same RPC - */ -}; - -struct ldlm_flock { - __u64 start; - __u64 end; - __u64 owner; - __u64 blocking_owner; - struct obd_export *blocking_export; - __u32 pid; -}; - -union ldlm_policy_data { - struct ldlm_extent l_extent; - struct ldlm_flock l_flock; - struct ldlm_inodebits l_inodebits; -}; - -void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); - -enum lvb_type { - LVB_T_NONE = 0, - LVB_T_OST = 1, - LVB_T_LQUOTA = 2, - LVB_T_LAYOUT = 3, -}; - -/** - * LDLM_GID_ANY is used to match any group id in ldlm_lock_match(). - */ -#define LDLM_GID_ANY ((__u64)-1) - -/** - * LDLM lock structure - * - * Represents a single LDLM lock and its state in memory. Each lock is - * associated with a single ldlm_resource, the object which is being - * locked. There may be multiple ldlm_locks on a single resource, - * depending on the lock type and whether the locks are conflicting or - * not. - */ -struct ldlm_lock { - /** - * Local lock handle. - * When remote side wants to tell us about a lock, they address - * it by this opaque handle. The handle does not hold a - * reference on the ldlm_lock, so it can be safely passed to - * other threads or nodes. When the lock needs to be accessed - * from the handle, it is looked up again in the lock table, and - * may no longer exist. - * - * Must be first in the structure. - */ - struct portals_handle l_handle; - /** - * Lock reference count. - * This is how many users have pointers to actual structure, so that - * we do not accidentally free lock structure that is in use. - */ - atomic_t l_refc; - /** - * Internal spinlock protects l_resource. We should hold this lock - * first before taking res_lock. - */ - spinlock_t l_lock; - /** - * Pointer to actual resource this lock is in. - * ldlm_lock_change_resource() can change this. - */ - struct ldlm_resource *l_resource; - /** - * List item for client side LRU list. - * Protected by ns_lock in struct ldlm_namespace. - */ - struct list_head l_lru; - /** - * Linkage to resource's lock queues according to current lock state. - * (could be granted, waiting or converting) - * Protected by lr_lock in struct ldlm_resource. - */ - struct list_head l_res_link; - /** - * Tree node for ldlm_extent. - */ - struct ldlm_interval *l_tree_node; - /** - * Requested mode. - * Protected by lr_lock. - */ - enum ldlm_mode l_req_mode; - /** - * Granted mode, also protected by lr_lock. - */ - enum ldlm_mode l_granted_mode; - /** Lock completion handler pointer. Called when lock is granted. */ - ldlm_completion_callback l_completion_ast; - /** - * Lock blocking AST handler pointer. - * It plays two roles: - * - as a notification of an attempt to queue a conflicting lock (once) - * - as a notification when the lock is being cancelled. - * - * As such it's typically called twice: once for the initial conflict - * and then once more when the last user went away and the lock is - * cancelled (could happen recursively). - */ - ldlm_blocking_callback l_blocking_ast; - /** - * Lock glimpse handler. - * Glimpse handler is used to obtain LVB updates from a client by - * server - */ - ldlm_glimpse_callback l_glimpse_ast; - - /** - * Lock export. - * This is a pointer to actual client export for locks that were granted - * to clients. Used server-side. - */ - struct obd_export *l_export; - /** - * Lock connection export. - * Pointer to server export on a client. - */ - struct obd_export *l_conn_export; - - /** - * Remote lock handle. - * If the lock is remote, this is the handle of the other side lock - * (l_handle) - */ - struct lustre_handle l_remote_handle; - - /** - * Representation of private data specific for a lock type. - * Examples are: extent range for extent lock or bitmask for ibits locks - */ - union ldlm_policy_data l_policy_data; - - /** - * Lock state flags. Protected by lr_lock. - * \see lustre_dlm_flags.h where the bits are defined. - */ - __u64 l_flags; - - /** - * Lock r/w usage counters. - * Protected by lr_lock. - */ - __u32 l_readers; - __u32 l_writers; - /** - * If the lock is granted, a process sleeps on this waitq to learn when - * it's no longer in use. If the lock is not granted, a process sleeps - * on this waitq to learn when it becomes granted. - */ - wait_queue_head_t l_waitq; - - /** - * Seconds. It will be updated if there is any activity related to - * the lock, e.g. enqueue the lock or send blocking AST. - */ - time64_t l_last_activity; - - /** - * Time last used by e.g. being matched by lock match. - * Jiffies. Should be converted to time if needed. - */ - unsigned long l_last_used; - - /** Originally requested extent for the extent lock. */ - struct ldlm_extent l_req_extent; - - /* - * Client-side-only members. - */ - - enum lvb_type l_lvb_type; - - /** - * Temporary storage for a LVB received during an enqueue operation. - */ - __u32 l_lvb_len; - void *l_lvb_data; - - /** Private storage for lock user. Opaque to LDLM. */ - void *l_ast_data; - - /* - * Server-side-only members. - */ - - /** - * Connection cookie for the client originating the operation. - * Used by Commit on Share (COS) code. Currently only used for - * inodebits locks on MDS. - */ - __u64 l_client_cookie; - - /** - * List item for locks waiting for cancellation from clients. - * The lists this could be linked into are: - * waiting_locks_list (protected by waiting_locks_spinlock), - * then if the lock timed out, it is moved to - * expired_lock_thread.elt_expired_locks for further processing. - * Protected by elt_lock. - */ - struct list_head l_pending_chain; - - /** - * Set when lock is sent a blocking AST. Time in seconds when timeout - * is reached and client holding this lock could be evicted. - * This timeout could be further extended by e.g. certain IO activity - * under this lock. - * \see ost_rw_prolong_locks - */ - unsigned long l_callback_timeout; - - /** Local PID of process which created this lock. */ - __u32 l_pid; - - /** - * Number of times blocking AST was sent for this lock. - * This is for debugging. Valid values are 0 and 1, if there is an - * attempt to send blocking AST more than once, an assertion would be - * hit. \see ldlm_work_bl_ast_lock - */ - int l_bl_ast_run; - /** List item ldlm_add_ast_work_item() for case of blocking ASTs. */ - struct list_head l_bl_ast; - /** List item ldlm_add_ast_work_item() for case of completion ASTs. */ - struct list_head l_cp_ast; - /** For ldlm_add_ast_work_item() for "revoke" AST used in COS. */ - struct list_head l_rk_ast; - - /** - * Pointer to a conflicting lock that caused blocking AST to be sent - * for this lock - */ - struct ldlm_lock *l_blocking_lock; - - /** - * Protected by lr_lock, linkages to "skip lists". - * For more explanations of skip lists see ldlm/ldlm_inodebits.c - */ - struct list_head l_sl_mode; - struct list_head l_sl_policy; - - /** Reference tracking structure to debug leaked locks. */ - struct lu_ref l_reference; -#if LUSTRE_TRACKS_LOCK_EXP_REFS - /* Debugging stuff for bug 20498, for tracking export references. */ - /** number of export references taken */ - int l_exp_refs_nr; - /** link all locks referencing one export */ - struct list_head l_exp_refs_link; - /** referenced export object */ - struct obd_export *l_exp_refs_target; -#endif -}; - -/** - * LDLM resource description. - * Basically, resource is a representation for a single object. - * Object has a name which is currently 4 64-bit integers. LDLM user is - * responsible for creation of a mapping between objects it wants to be - * protected and resource names. - * - * A resource can only hold locks of a single lock type, though there may be - * multiple ldlm_locks on a single resource, depending on the lock type and - * whether the locks are conflicting or not. - */ -struct ldlm_resource { - struct ldlm_ns_bucket *lr_ns_bucket; - - /** - * List item for list in namespace hash. - * protected by ns_lock - */ - struct hlist_node lr_hash; - - /** Spinlock to protect locks under this resource. */ - spinlock_t lr_lock; - - /** - * protected by lr_lock - * @{ - */ - /** List of locks in granted state */ - struct list_head lr_granted; - /** - * List of locks that could not be granted due to conflicts and - * that are waiting for conflicts to go away - */ - struct list_head lr_waiting; - /** @} */ - - /** Type of locks this resource can hold. Only one type per resource. */ - enum ldlm_type lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK,IBITS} */ - - /** Resource name */ - struct ldlm_res_id lr_name; - /** Reference count for this resource */ - atomic_t lr_refcount; - - /** - * Interval trees (only for extent locks) for all modes of this resource - */ - struct ldlm_interval_tree lr_itree[LCK_MODE_NUM]; - - /** - * Server-side-only lock value block elements. - * To serialize lvbo_init. - */ - struct mutex lr_lvb_mutex; - int lr_lvb_len; - - /** When the resource was considered as contended. */ - unsigned long lr_contention_time; - /** List of references to this resource. For debugging. */ - struct lu_ref lr_reference; - - struct inode *lr_lvb_inode; -}; - -static inline bool ldlm_has_layout(struct ldlm_lock *lock) -{ - return lock->l_resource->lr_type == LDLM_IBITS && - lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_LAYOUT; -} - -static inline char * -ldlm_ns_name(struct ldlm_namespace *ns) -{ - return ns->ns_name; -} - -static inline struct ldlm_namespace * -ldlm_res_to_ns(struct ldlm_resource *res) -{ - return res->lr_ns_bucket->nsb_namespace; -} - -static inline struct ldlm_namespace * -ldlm_lock_to_ns(struct ldlm_lock *lock) -{ - return ldlm_res_to_ns(lock->l_resource); -} - -static inline char * -ldlm_lock_to_ns_name(struct ldlm_lock *lock) -{ - return ldlm_ns_name(ldlm_lock_to_ns(lock)); -} - -static inline struct adaptive_timeout * -ldlm_lock_to_ns_at(struct ldlm_lock *lock) -{ - return &lock->l_resource->lr_ns_bucket->nsb_at_estimate; -} - -static inline int ldlm_lvbo_init(struct ldlm_resource *res) -{ - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) - return ns->ns_lvbo->lvbo_init(res); - - return 0; -} - -static inline int ldlm_lvbo_size(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_size) - return ns->ns_lvbo->lvbo_size(lock); - - return 0; -} - -static inline int ldlm_lvbo_fill(struct ldlm_lock *lock, void *buf, int len) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - if (ns->ns_lvbo) - return ns->ns_lvbo->lvbo_fill(lock, buf, len); - - return 0; -} - -struct ldlm_ast_work { - struct ldlm_lock *w_lock; - int w_blocking; - struct ldlm_lock_desc w_desc; - struct list_head w_list; - int w_flags; - void *w_data; - int w_datalen; -}; - -/** - * Common ldlm_enqueue parameters - */ -struct ldlm_enqueue_info { - enum ldlm_type ei_type; /** Type of the lock being enqueued. */ - enum ldlm_mode ei_mode; /** Mode of the lock being enqueued. */ - void *ei_cb_bl; /** blocking lock callback */ - void *ei_cb_cp; /** lock completion callback */ - void *ei_cb_gl; /** lock glimpse callback */ - void *ei_cbdata; /** Data to be passed into callbacks. */ - unsigned int ei_enq_slave:1; /* whether enqueue slave stripes */ -}; - -extern struct obd_ops ldlm_obd_ops; - -extern char *ldlm_lockname[]; -const char *ldlm_it2str(enum ldlm_intent_flags it); - -/** - * Just a fancy CDEBUG call with log level preset to LDLM_DEBUG. - * For the cases where we do not have actual lock to print along - * with a debugging message that is ldlm-related - */ -#define LDLM_DEBUG_NOLOCK(format, a...) \ - CDEBUG(D_DLMTRACE, "### " format "\n", ##a) - -/** - * Support function for lock information printing into debug logs. - * \see LDLM_DEBUG - */ -#define ldlm_lock_debug(msgdata, mask, cdls, lock, fmt, a...) do { \ - CFS_CHECK_STACK(msgdata, mask, cdls); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - _ldlm_lock_debug(lock, msgdata, fmt, ##a); \ -} while (0) - -void _ldlm_lock_debug(struct ldlm_lock *lock, - struct libcfs_debug_msg_data *data, - const char *fmt, ...) - __printf(3, 4); - -/** - * Rate-limited version of lock printing function. - */ -#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do { \ - static struct cfs_debug_limit_state _ldlm_cdls; \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, mask, &_ldlm_cdls); \ - ldlm_lock_debug(&msgdata, mask, &_ldlm_cdls, lock, "### " fmt, ##a);\ -} while (0) - -#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a) -#define LDLM_WARN(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a) - -/** Non-rate-limited lock printing function for debugging purposes. */ -#define LDLM_DEBUG(lock, fmt, a...) do { \ - if (likely(lock)) { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_DLMTRACE, NULL); \ - ldlm_lock_debug(&msgdata, D_DLMTRACE, NULL, lock, \ - "### " fmt, ##a); \ - } else { \ - LDLM_DEBUG_NOLOCK("no dlm lock: " fmt, ##a); \ - } \ -} while (0) - -typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, __u64 *flags, - int first_enq, enum ldlm_error *err, - struct list_head *work_list); - -/** - * Return values for lock iterators. - * Also used during deciding of lock grants and cancellations. - */ -#define LDLM_ITER_CONTINUE 1 /* keep iterating */ -#define LDLM_ITER_STOP 2 /* stop iterating */ - -typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *); -typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *); - -/** \defgroup ldlm_iterator Lock iterators - * - * LDLM provides for a way to iterate through every lock on a resource or - * namespace or every resource in a namespace. - * @{ - */ -int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *, - ldlm_iterator_t iter, void *data); -/** @} ldlm_iterator */ - -int ldlm_replay_locks(struct obd_import *imp); - -/* ldlm_flock.c */ -int ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); - -/* ldlm_extent.c */ -__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); - -struct ldlm_callback_suite { - ldlm_completion_callback lcs_completion; - ldlm_blocking_callback lcs_blocking; - ldlm_glimpse_callback lcs_glimpse; -}; - -/* ldlm_lockd.c */ -int ldlm_get_ref(void); -void ldlm_put_ref(void); -struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req); - -/* ldlm_lock.c */ -void ldlm_lock2handle(const struct ldlm_lock *lock, - struct lustre_handle *lockh); -struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *, __u64 flags); -void ldlm_cancel_callback(struct ldlm_lock *); -int ldlm_lock_remove_from_lru(struct ldlm_lock *); -int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data); - -/** - * Obtain a lock reference by its handle. - */ -static inline struct ldlm_lock *ldlm_handle2lock(const struct lustre_handle *h) -{ - return __ldlm_handle2lock(h, 0); -} - -#define LDLM_LOCK_REF_DEL(lock) \ - lu_ref_del(&lock->l_reference, "handle", current) - -static inline struct ldlm_lock * -ldlm_handle2lock_long(const struct lustre_handle *h, __u64 flags) -{ - struct ldlm_lock *lock; - - lock = __ldlm_handle2lock(h, flags); - if (lock) - LDLM_LOCK_REF_DEL(lock); - return lock; -} - -/** - * Update Lock Value Block Operations (LVBO) on a resource taking into account - * data from request \a r - */ -static inline int ldlm_res_lvbo_update(struct ldlm_resource *res, - struct ptlrpc_request *r, int increase) -{ - if (ldlm_res_to_ns(res)->ns_lvbo && - ldlm_res_to_ns(res)->ns_lvbo->lvbo_update) { - return ldlm_res_to_ns(res)->ns_lvbo->lvbo_update(res, r, - increase); - } - return 0; -} - -int ldlm_error2errno(enum ldlm_error error); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS -void ldlm_dump_export_locks(struct obd_export *exp); -#endif - -/** - * Release a temporary lock reference obtained by ldlm_handle2lock() or - * __ldlm_handle2lock(). - */ -#define LDLM_LOCK_PUT(lock) \ -do { \ - LDLM_LOCK_REF_DEL(lock); \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -/** - * Release a lock reference obtained by some other means (see - * LDLM_LOCK_PUT()). - */ -#define LDLM_LOCK_RELEASE(lock) \ -do { \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -#define LDLM_LOCK_GET(lock) \ -({ \ - ldlm_lock_get(lock); \ - /*LDLM_DEBUG((lock), "get");*/ \ - lock; \ -}) - -#define ldlm_lock_list_put(head, member, count) \ -({ \ - struct ldlm_lock *_lock, *_next; \ - int c = count; \ - list_for_each_entry_safe(_lock, _next, head, member) { \ - if (c-- == 0) \ - break; \ - list_del_init(&_lock->member); \ - LDLM_LOCK_RELEASE(_lock); \ - } \ - LASSERT(c <= 0); \ -}) - -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); -void ldlm_lock_put(struct ldlm_lock *lock); -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode); -int ldlm_lock_addref_try(const struct lustre_handle *lockh, - enum ldlm_mode mode); -void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode); -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, - enum ldlm_mode mode); -void ldlm_lock_fail_match_locked(struct ldlm_lock *lock); -void ldlm_lock_allow_match(struct ldlm_lock *lock); -void ldlm_lock_allow_match_locked(struct ldlm_lock *lock); -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *, - enum ldlm_type type, union ldlm_policy_data *, - enum ldlm_mode mode, struct lustre_handle *, - int unref); -enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, - __u64 *bits); -void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_lock_dump_handle(int level, const struct lustre_handle *); -void ldlm_unlink_lock_skiplist(struct ldlm_lock *req); - -/* resource.c */ -struct ldlm_namespace * -ldlm_namespace_new(struct obd_device *obd, char *name, - enum ldlm_side client, enum ldlm_appetite apt, - enum ldlm_ns_type ns_type); -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags); -void ldlm_namespace_free_prior(struct ldlm_namespace *ns, - struct obd_import *imp, - int force); -void ldlm_namespace_free_post(struct ldlm_namespace *ns); -void ldlm_namespace_get(struct ldlm_namespace *ns); -void ldlm_namespace_put(struct ldlm_namespace *ns); -void ldlm_debugfs_setup(void); -void ldlm_debugfs_cleanup(void); - -/* resource.c - internal */ -struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, - struct ldlm_resource *parent, - const struct ldlm_res_id *, - enum ldlm_type type, int create); -void ldlm_resource_putref(struct ldlm_resource *res); -void ldlm_resource_add_lock(struct ldlm_resource *res, - struct list_head *head, - struct ldlm_lock *lock); -void ldlm_resource_unlink_lock(struct ldlm_lock *lock); -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(enum ldlm_side client, int level); -void ldlm_namespace_dump(int level, struct ldlm_namespace *); -void ldlm_resource_dump(int level, struct ldlm_resource *); -int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, - const struct ldlm_res_id *); - -#define LDLM_RESOURCE_ADDREF(res) do { \ - lu_ref_add_atomic(&(res)->lr_reference, __func__, current); \ -} while (0) - -#define LDLM_RESOURCE_DELREF(res) do { \ - lu_ref_del(&(res)->lr_reference, __func__, current); \ -} while (0) - -/* ldlm_request.c */ -/** \defgroup ldlm_local_ast Default AST handlers for local locks - * These AST handlers are typically used for server-side local locks and are - * also used by client-side lock handlers to perform minimum level base - * processing. - * @{ - */ -int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data); -/** @} ldlm_local_ast */ - -/** \defgroup ldlm_cli_api API to operate on locks from actual LDLM users. - * These are typically used by client and server (*_local versions) - * to obtain and release locks. - * @{ - */ -int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_enqueue_info *einfo, - const struct ldlm_res_id *res_id, - union ldlm_policy_data const *policy, __u64 *flags, - void *lvb, __u32 lvb_len, enum lvb_type lvb_type, - struct lustre_handle *lockh, int async); -int ldlm_prep_enqueue_req(struct obd_export *exp, - struct ptlrpc_request *req, - struct list_head *cancels, - int count); -int ldlm_prep_elc_req(struct obd_export *exp, - struct ptlrpc_request *req, - int version, int opc, int canceloff, - struct list_head *cancels, int count); - -int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, - enum ldlm_type type, __u8 with_policy, - enum ldlm_mode mode, - __u64 *flags, void *lvb, __u32 lvb_len, - const struct lustre_handle *lockh, int rc); -int ldlm_cli_update_pool(struct ptlrpc_request *req); -int ldlm_cli_cancel(const struct lustre_handle *lockh, - enum ldlm_cancel_flags cancel_flags); -int ldlm_cli_cancel_unused(struct ldlm_namespace *, const struct ldlm_res_id *, - enum ldlm_cancel_flags flags, void *opaque); -int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque); -int ldlm_cancel_resource_local(struct ldlm_resource *res, - struct list_head *cancels, - union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 lock_flags, - enum ldlm_cancel_flags cancel_flags, - void *opaque); -int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, - enum ldlm_cancel_flags flags); -int ldlm_cli_cancel_list(struct list_head *head, int count, - struct ptlrpc_request *req, - enum ldlm_cancel_flags flags); -/** @} ldlm_cli_api */ - -/* mds/handler.c */ -/* This has to be here because recursive inclusion sucks. */ -int intent_disposition(struct ldlm_reply *rep, int flag); -void intent_set_disposition(struct ldlm_reply *rep, int flag); - -/** - * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more - * than one lock_res is dead-lock safe. - */ -enum lock_res_type { - LRT_NORMAL, - LRT_NEW -}; - -/** Lock resource. */ -static inline void lock_res(struct ldlm_resource *res) -{ - spin_lock(&res->lr_lock); -} - -/** Lock resource with a way to instruct lockdep code about nestedness-safe. */ -static inline void lock_res_nested(struct ldlm_resource *res, - enum lock_res_type mode) -{ - spin_lock_nested(&res->lr_lock, mode); -} - -/** Unlock resource. */ -static inline void unlock_res(struct ldlm_resource *res) -{ - spin_unlock(&res->lr_lock); -} - -/** Check if resource is already locked, assert if not. */ -static inline void check_res_locked(struct ldlm_resource *res) -{ - assert_spin_locked(&res->lr_lock); -} - -struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock); -void unlock_res_and_lock(struct ldlm_lock *lock); - -/* ldlm_pool.c */ -/** \defgroup ldlm_pools Various LDLM pool related functions - * There are not used outside of ldlm. - * @{ - */ -int ldlm_pools_init(void); -void ldlm_pools_fini(void); - -int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, enum ldlm_side client); -void ldlm_pool_fini(struct ldlm_pool *pl); -void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock); -void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock); -/** @} */ - -static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1, - const struct ldlm_extent *ex2) -{ - return ex1->start <= ex2->end && ex2->start <= ex1->end; -} - -/* check if @ex1 contains @ex2 */ -static inline int ldlm_extent_contain(const struct ldlm_extent *ex1, - const struct ldlm_extent *ex2) -{ - return ex1->start <= ex2->start && ex1->end >= ex2->end; -} - -#endif -/** @} LDLM */ diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h deleted file mode 100644 index 53db031c4c8c..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h +++ /dev/null @@ -1,402 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* -*- buffer-read-only: t -*- vi: set ro: - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program. If not, see . - */ -/** - * \file lustre_dlm_flags.h - * The flags and collections of flags (masks) for \see struct ldlm_lock. - * - * \addtogroup LDLM Lustre Distributed Lock Manager - * @{ - * - * \name flags - * The flags and collections of flags (masks) for \see struct ldlm_lock. - * @{ - */ -#ifndef LDLM_ALL_FLAGS_MASK - -/** l_flags bits marked as "all_flags" bits */ -#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F932FULL - -/** extent, mode, or resource changed */ -#define LDLM_FL_LOCK_CHANGED 0x0000000000000001ULL /* bit 0 */ -#define ldlm_is_lock_changed(_l) LDLM_TEST_FLAG((_l), 1ULL << 0) -#define ldlm_set_lock_changed(_l) LDLM_SET_FLAG((_l), 1ULL << 0) -#define ldlm_clear_lock_changed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 0) - -/** - * Server placed lock on granted list, or a recovering client wants the - * lock added to the granted list, no questions asked. - */ -#define LDLM_FL_BLOCK_GRANTED 0x0000000000000002ULL /* bit 1 */ -#define ldlm_is_block_granted(_l) LDLM_TEST_FLAG((_l), 1ULL << 1) -#define ldlm_set_block_granted(_l) LDLM_SET_FLAG((_l), 1ULL << 1) -#define ldlm_clear_block_granted(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 1) - -/** - * Server placed lock on conv list, or a recovering client wants the lock - * added to the conv list, no questions asked. - */ -#define LDLM_FL_BLOCK_CONV 0x0000000000000004ULL /* bit 2 */ -#define ldlm_is_block_conv(_l) LDLM_TEST_FLAG((_l), 1ULL << 2) -#define ldlm_set_block_conv(_l) LDLM_SET_FLAG((_l), 1ULL << 2) -#define ldlm_clear_block_conv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 2) - -/** - * Server placed lock on wait list, or a recovering client wants the lock - * added to the wait list, no questions asked. - */ -#define LDLM_FL_BLOCK_WAIT 0x0000000000000008ULL /* bit 3 */ -#define ldlm_is_block_wait(_l) LDLM_TEST_FLAG((_l), 1ULL << 3) -#define ldlm_set_block_wait(_l) LDLM_SET_FLAG((_l), 1ULL << 3) -#define ldlm_clear_block_wait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 3) - -/** blocking or cancel packet was queued for sending. */ -#define LDLM_FL_AST_SENT 0x0000000000000020ULL /* bit 5 */ -#define ldlm_is_ast_sent(_l) LDLM_TEST_FLAG((_l), 1ULL << 5) -#define ldlm_set_ast_sent(_l) LDLM_SET_FLAG((_l), 1ULL << 5) -#define ldlm_clear_ast_sent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 5) - -/** - * Lock is being replayed. This could probably be implied by the fact that - * one of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. - */ -#define LDLM_FL_REPLAY 0x0000000000000100ULL /* bit 8 */ -#define ldlm_is_replay(_l) LDLM_TEST_FLAG((_l), 1ULL << 8) -#define ldlm_set_replay(_l) LDLM_SET_FLAG((_l), 1ULL << 8) -#define ldlm_clear_replay(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 8) - -/** Don't grant lock, just do intent. */ -#define LDLM_FL_INTENT_ONLY 0x0000000000000200ULL /* bit 9 */ -#define ldlm_is_intent_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 9) -#define ldlm_set_intent_only(_l) LDLM_SET_FLAG((_l), 1ULL << 9) -#define ldlm_clear_intent_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 9) - -/** lock request has intent */ -#define LDLM_FL_HAS_INTENT 0x0000000000001000ULL /* bit 12 */ -#define ldlm_is_has_intent(_l) LDLM_TEST_FLAG((_l), 1ULL << 12) -#define ldlm_set_has_intent(_l) LDLM_SET_FLAG((_l), 1ULL << 12) -#define ldlm_clear_has_intent(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 12) - -/** flock deadlock detected */ -#define LDLM_FL_FLOCK_DEADLOCK 0x0000000000008000ULL /* bit 15 */ -#define ldlm_is_flock_deadlock(_l) LDLM_TEST_FLAG((_l), 1ULL << 15) -#define ldlm_set_flock_deadlock(_l) LDLM_SET_FLAG((_l), 1ULL << 15) -#define ldlm_clear_flock_deadlock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 15) - -/** discard (no writeback) on cancel */ -#define LDLM_FL_DISCARD_DATA 0x0000000000010000ULL /* bit 16 */ -#define ldlm_is_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 16) -#define ldlm_set_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 16) -#define ldlm_clear_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 16) - -/** Blocked by group lock - wait indefinitely */ -#define LDLM_FL_NO_TIMEOUT 0x0000000000020000ULL /* bit 17 */ -#define ldlm_is_no_timeout(_l) LDLM_TEST_FLAG((_l), 1ULL << 17) -#define ldlm_set_no_timeout(_l) LDLM_SET_FLAG((_l), 1ULL << 17) -#define ldlm_clear_no_timeout(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 17) - -/** - * Server told not to wait if blocked. For AGL, OST will not send glimpse - * callback. - */ -#define LDLM_FL_BLOCK_NOWAIT 0x0000000000040000ULL /* bit 18 */ -#define ldlm_is_block_nowait(_l) LDLM_TEST_FLAG((_l), 1ULL << 18) -#define ldlm_set_block_nowait(_l) LDLM_SET_FLAG((_l), 1ULL << 18) -#define ldlm_clear_block_nowait(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 18) - -/** return blocking lock */ -#define LDLM_FL_TEST_LOCK 0x0000000000080000ULL /* bit 19 */ -#define ldlm_is_test_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 19) -#define ldlm_set_test_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 19) -#define ldlm_clear_test_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 19) - -/** match lock only */ -#define LDLM_FL_MATCH_LOCK 0x0000000000100000ULL /* bit 20 */ - -/** - * Immediately cancel such locks when they block some other locks. Send - * cancel notification to original lock holder, but expect no reply. This - * is for clients (like liblustre) that cannot be expected to reliably - * response to blocking AST. - */ -#define LDLM_FL_CANCEL_ON_BLOCK 0x0000000000800000ULL /* bit 23 */ -#define ldlm_is_cancel_on_block(_l) LDLM_TEST_FLAG((_l), 1ULL << 23) -#define ldlm_set_cancel_on_block(_l) LDLM_SET_FLAG((_l), 1ULL << 23) -#define ldlm_clear_cancel_on_block(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 23) - -/** - * measure lock contention and return -EUSERS if locking contention is high - */ -#define LDLM_FL_DENY_ON_CONTENTION 0x0000000040000000ULL /* bit 30 */ -#define ldlm_is_deny_on_contention(_l) LDLM_TEST_FLAG((_l), 1ULL << 30) -#define ldlm_set_deny_on_contention(_l) LDLM_SET_FLAG((_l), 1ULL << 30) -#define ldlm_clear_deny_on_contention(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 30) - -/** - * These are flags that are mapped into the flags and ASTs of blocking - * locks Add FL_DISCARD to blocking ASTs - */ -#define LDLM_FL_AST_DISCARD_DATA 0x0000000080000000ULL /* bit 31 */ -#define ldlm_is_ast_discard_data(_l) LDLM_TEST_FLAG((_l), 1ULL << 31) -#define ldlm_set_ast_discard_data(_l) LDLM_SET_FLAG((_l), 1ULL << 31) -#define ldlm_clear_ast_discard_data(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 31) - -/** - * Used for marking lock as a target for -EINTR while cp_ast sleep emulation - * + race with upcoming bl_ast. - */ -#define LDLM_FL_FAIL_LOC 0x0000000100000000ULL /* bit 32 */ -#define ldlm_is_fail_loc(_l) LDLM_TEST_FLAG((_l), 1ULL << 32) -#define ldlm_set_fail_loc(_l) LDLM_SET_FLAG((_l), 1ULL << 32) -#define ldlm_clear_fail_loc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 32) - -/** - * Used while processing the unused list to know that we have already - * handled this lock and decided to skip it. - */ -#define LDLM_FL_SKIPPED 0x0000000200000000ULL /* bit 33 */ -#define ldlm_is_skipped(_l) LDLM_TEST_FLAG((_l), 1ULL << 33) -#define ldlm_set_skipped(_l) LDLM_SET_FLAG((_l), 1ULL << 33) -#define ldlm_clear_skipped(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 33) - -/** this lock is being destroyed */ -#define LDLM_FL_CBPENDING 0x0000000400000000ULL /* bit 34 */ -#define ldlm_is_cbpending(_l) LDLM_TEST_FLAG((_l), 1ULL << 34) -#define ldlm_set_cbpending(_l) LDLM_SET_FLAG((_l), 1ULL << 34) -#define ldlm_clear_cbpending(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 34) - -/** not a real flag, not saved in lock */ -#define LDLM_FL_WAIT_NOREPROC 0x0000000800000000ULL /* bit 35 */ -#define ldlm_is_wait_noreproc(_l) LDLM_TEST_FLAG((_l), 1ULL << 35) -#define ldlm_set_wait_noreproc(_l) LDLM_SET_FLAG((_l), 1ULL << 35) -#define ldlm_clear_wait_noreproc(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 35) - -/** cancellation callback already run */ -#define LDLM_FL_CANCEL 0x0000001000000000ULL /* bit 36 */ -#define ldlm_is_cancel(_l) LDLM_TEST_FLAG((_l), 1ULL << 36) -#define ldlm_set_cancel(_l) LDLM_SET_FLAG((_l), 1ULL << 36) -#define ldlm_clear_cancel(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 36) - -/** whatever it might mean -- never transmitted? */ -#define LDLM_FL_LOCAL_ONLY 0x0000002000000000ULL /* bit 37 */ -#define ldlm_is_local_only(_l) LDLM_TEST_FLAG((_l), 1ULL << 37) -#define ldlm_set_local_only(_l) LDLM_SET_FLAG((_l), 1ULL << 37) -#define ldlm_clear_local_only(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 37) - -/** don't run the cancel callback under ldlm_cli_cancel_unused */ -#define LDLM_FL_FAILED 0x0000004000000000ULL /* bit 38 */ -#define ldlm_is_failed(_l) LDLM_TEST_FLAG((_l), 1ULL << 38) -#define ldlm_set_failed(_l) LDLM_SET_FLAG((_l), 1ULL << 38) -#define ldlm_clear_failed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 38) - -/** lock cancel has already been sent */ -#define LDLM_FL_CANCELING 0x0000008000000000ULL /* bit 39 */ -#define ldlm_is_canceling(_l) LDLM_TEST_FLAG((_l), 1ULL << 39) -#define ldlm_set_canceling(_l) LDLM_SET_FLAG((_l), 1ULL << 39) -#define ldlm_clear_canceling(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 39) - -/** local lock (ie, no srv/cli split) */ -#define LDLM_FL_LOCAL 0x0000010000000000ULL /* bit 40 */ -#define ldlm_is_local(_l) LDLM_TEST_FLAG((_l), 1ULL << 40) -#define ldlm_set_local(_l) LDLM_SET_FLAG((_l), 1ULL << 40) -#define ldlm_clear_local(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 40) - -/** - * XXX FIXME: This is being added to b_size as a low-risk fix to the - * fact that the LVB filling happens _after_ the lock has been granted, - * so another thread can match it before the LVB has been updated. As a - * dirty hack, we set LDLM_FL_LVB_READY only after we've done the LVB poop. - * this is only needed on LOV/OSC now, where LVB is actually used and - * callers must set it in input flags. - * - * The proper fix is to do the granting inside of the completion AST, - * which can be replaced with a LVB-aware wrapping function for OSC locks. - * That change is pretty high-risk, though, and would need a lot more - * testing. - */ -#define LDLM_FL_LVB_READY 0x0000020000000000ULL /* bit 41 */ -#define ldlm_is_lvb_ready(_l) LDLM_TEST_FLAG((_l), 1ULL << 41) -#define ldlm_set_lvb_ready(_l) LDLM_SET_FLAG((_l), 1ULL << 41) -#define ldlm_clear_lvb_ready(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 41) - -/** - * A lock contributes to the known minimum size (KMS) calculation until it - * has finished the part of its cancellation that performs write back on its - * dirty pages. It can remain on the granted list during this whole time. - * Threads racing to update the KMS after performing their writeback need - * to know to exclude each other's locks from the calculation as they walk - * the granted list. - */ -#define LDLM_FL_KMS_IGNORE 0x0000040000000000ULL /* bit 42 */ -#define ldlm_is_kms_ignore(_l) LDLM_TEST_FLAG((_l), 1ULL << 42) -#define ldlm_set_kms_ignore(_l) LDLM_SET_FLAG((_l), 1ULL << 42) -#define ldlm_clear_kms_ignore(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 42) - -/** completion AST to be executed */ -#define LDLM_FL_CP_REQD 0x0000080000000000ULL /* bit 43 */ -#define ldlm_is_cp_reqd(_l) LDLM_TEST_FLAG((_l), 1ULL << 43) -#define ldlm_set_cp_reqd(_l) LDLM_SET_FLAG((_l), 1ULL << 43) -#define ldlm_clear_cp_reqd(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 43) - -/** cleanup_resource has already handled the lock */ -#define LDLM_FL_CLEANED 0x0000100000000000ULL /* bit 44 */ -#define ldlm_is_cleaned(_l) LDLM_TEST_FLAG((_l), 1ULL << 44) -#define ldlm_set_cleaned(_l) LDLM_SET_FLAG((_l), 1ULL << 44) -#define ldlm_clear_cleaned(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 44) - -/** - * optimization hint: LDLM can run blocking callback from current context - * w/o involving separate thread. in order to decrease cs rate - */ -#define LDLM_FL_ATOMIC_CB 0x0000200000000000ULL /* bit 45 */ -#define ldlm_is_atomic_cb(_l) LDLM_TEST_FLAG((_l), 1ULL << 45) -#define ldlm_set_atomic_cb(_l) LDLM_SET_FLAG((_l), 1ULL << 45) -#define ldlm_clear_atomic_cb(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 45) - -/** - * It may happen that a client initiates two operations, e.g. unlink and - * mkdir, such that the server sends a blocking AST for conflicting locks - * to this client for the first operation, whereas the second operation - * has canceled this lock and is waiting for rpc_lock which is taken by - * the first operation. LDLM_FL_BL_AST is set by ldlm_callback_handler() in - * the lock to prevent the Early Lock Cancel (ELC) code from cancelling it. - */ -#define LDLM_FL_BL_AST 0x0000400000000000ULL /* bit 46 */ -#define ldlm_is_bl_ast(_l) LDLM_TEST_FLAG((_l), 1ULL << 46) -#define ldlm_set_bl_ast(_l) LDLM_SET_FLAG((_l), 1ULL << 46) -#define ldlm_clear_bl_ast(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 46) - -/** - * Set by ldlm_cancel_callback() when lock cache is dropped to let - * ldlm_callback_handler() return EINVAL to the server. It is used when - * ELC RPC is already prepared and is waiting for rpc_lock, too late to - * send a separate CANCEL RPC. - */ -#define LDLM_FL_BL_DONE 0x0000800000000000ULL /* bit 47 */ -#define ldlm_is_bl_done(_l) LDLM_TEST_FLAG((_l), 1ULL << 47) -#define ldlm_set_bl_done(_l) LDLM_SET_FLAG((_l), 1ULL << 47) -#define ldlm_clear_bl_done(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 47) - -/** - * Don't put lock into the LRU list, so that it is not canceled due - * to aging. Used by MGC locks, they are cancelled only at unmount or - * by callback. - */ -#define LDLM_FL_NO_LRU 0x0001000000000000ULL /* bit 48 */ -#define ldlm_is_no_lru(_l) LDLM_TEST_FLAG((_l), 1ULL << 48) -#define ldlm_set_no_lru(_l) LDLM_SET_FLAG((_l), 1ULL << 48) -#define ldlm_clear_no_lru(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 48) - -/** - * Set for locks that failed and where the server has been notified. - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_FAIL_NOTIFIED 0x0002000000000000ULL /* bit 49 */ -#define ldlm_is_fail_notified(_l) LDLM_TEST_FLAG((_l), 1ULL << 49) -#define ldlm_set_fail_notified(_l) LDLM_SET_FLAG((_l), 1ULL << 49) -#define ldlm_clear_fail_notified(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 49) - -/** - * Set for locks that were removed from class hash table and will - * be destroyed when last reference to them is released. Set by - * ldlm_lock_destroy_internal(). - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_DESTROYED 0x0004000000000000ULL /* bit 50 */ -#define ldlm_is_destroyed(_l) LDLM_TEST_FLAG((_l), 1ULL << 50) -#define ldlm_set_destroyed(_l) LDLM_SET_FLAG((_l), 1ULL << 50) -#define ldlm_clear_destroyed(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 50) - -/** flag whether this is a server namespace lock */ -#define LDLM_FL_SERVER_LOCK 0x0008000000000000ULL /* bit 51 */ -#define ldlm_is_server_lock(_l) LDLM_TEST_FLAG((_l), 1ULL << 51) -#define ldlm_set_server_lock(_l) LDLM_SET_FLAG((_l), 1ULL << 51) -#define ldlm_clear_server_lock(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 51) - -/** - * It's set in lock_res_and_lock() and unset in unlock_res_and_lock(). - * - * NB: compared with check_res_locked(), checking this bit is cheaper. - * Also, spin_is_locked() is deprecated for kernel code; one reason is - * because it works only for SMP so user needs to add extra macros like - * LASSERT_SPIN_LOCKED for uniprocessor kernels. - */ -#define LDLM_FL_RES_LOCKED 0x0010000000000000ULL /* bit 52 */ -#define ldlm_is_res_locked(_l) LDLM_TEST_FLAG((_l), 1ULL << 52) -#define ldlm_set_res_locked(_l) LDLM_SET_FLAG((_l), 1ULL << 52) -#define ldlm_clear_res_locked(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 52) - -/** - * It's set once we call ldlm_add_waiting_lock_res_locked() to start the - * lock-timeout timer and it will never be reset. - * - * Protected by lock and resource locks. - */ -#define LDLM_FL_WAITED 0x0020000000000000ULL /* bit 53 */ -#define ldlm_is_waited(_l) LDLM_TEST_FLAG((_l), 1ULL << 53) -#define ldlm_set_waited(_l) LDLM_SET_FLAG((_l), 1ULL << 53) -#define ldlm_clear_waited(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 53) - -/** Flag whether this is a server namespace lock. */ -#define LDLM_FL_NS_SRV 0x0040000000000000ULL /* bit 54 */ -#define ldlm_is_ns_srv(_l) LDLM_TEST_FLAG((_l), 1ULL << 54) -#define ldlm_set_ns_srv(_l) LDLM_SET_FLAG((_l), 1ULL << 54) -#define ldlm_clear_ns_srv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 54) - -/** Flag whether this lock can be reused. Used by exclusive open. */ -#define LDLM_FL_EXCL 0x0080000000000000ULL /* bit 55 */ -#define ldlm_is_excl(_l) LDLM_TEST_FLAG((_l), 1ULL << 55) -#define ldlm_set_excl(_l) LDLM_SET_FLAG((_l), 1ULL << 55) -#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55) - -/** l_flags bits marked as "ast" bits */ -#define LDLM_FL_AST_MASK (LDLM_FL_FLOCK_DEADLOCK |\ - LDLM_FL_AST_DISCARD_DATA) - -/** l_flags bits marked as "blocked" bits */ -#define LDLM_FL_BLOCKED_MASK (LDLM_FL_BLOCK_GRANTED |\ - LDLM_FL_BLOCK_CONV |\ - LDLM_FL_BLOCK_WAIT) - -/** l_flags bits marked as "gone" bits */ -#define LDLM_FL_GONE_MASK (LDLM_FL_DESTROYED |\ - LDLM_FL_FAILED) - -/** l_flags bits marked as "inherit" bits */ -/* Flags inherited from wire on enqueue/reply between client/server. */ -/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */ -/* TEST_LOCK flag to not let TEST lock to be granted. */ -#define LDLM_FL_INHERIT_MASK (LDLM_FL_CANCEL_ON_BLOCK |\ - LDLM_FL_NO_TIMEOUT |\ - LDLM_FL_TEST_LOCK) - -/** test for ldlm_lock flag bit set */ -#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) - -/** multi-bit test: are any of mask bits set? */ -#define LDLM_HAVE_MASK(_l, _m) ((_l)->l_flags & LDLM_FL_##_m##_MASK) - -/** set a ldlm_lock flag bit */ -#define LDLM_SET_FLAG(_l, _b) ((_l)->l_flags |= (_b)) - -/** clear a ldlm_lock flag bit */ -#define LDLM_CLEAR_FLAG(_l, _b) ((_l)->l_flags &= ~(_b)) - -/** @} subgroup */ -/** @} group */ - -#endif /* LDLM_ALL_FLAGS_MASK */ diff --git a/drivers/staging/lustre/lustre/include/lustre_errno.h b/drivers/staging/lustre/lustre/include/lustre_errno.h deleted file mode 100644 index 59fbb9f47ff1..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_errno.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.txt - * - * GPL HEADER END - */ -/* - * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved. - * - * Copyright (c) 2013, Intel Corporation. - */ - -#ifndef LUSTRE_ERRNO_H -#define LUSTRE_ERRNO_H - -/* - * Only "network" errnos, which are defined below, are allowed on wire (or on - * disk). Generic routines exist to help translate between these and a subset - * of the "host" errnos. Some host errnos (e.g., EDEADLOCK) are intentionally - * left out. See also the comment on lustre_errno_hton_mapping[]. - * - * To maintain compatibility with existing x86 clients and servers, each of - * these network errnos has the same numerical value as its corresponding host - * errno on x86. - */ -#define LUSTRE_EPERM 1 /* Operation not permitted */ -#define LUSTRE_ENOENT 2 /* No such file or directory */ -#define LUSTRE_ESRCH 3 /* No such process */ -#define LUSTRE_EINTR 4 /* Interrupted system call */ -#define LUSTRE_EIO 5 /* I/O error */ -#define LUSTRE_ENXIO 6 /* No such device or address */ -#define LUSTRE_E2BIG 7 /* Argument list too long */ -#define LUSTRE_ENOEXEC 8 /* Exec format error */ -#define LUSTRE_EBADF 9 /* Bad file number */ -#define LUSTRE_ECHILD 10 /* No child processes */ -#define LUSTRE_EAGAIN 11 /* Try again */ -#define LUSTRE_ENOMEM 12 /* Out of memory */ -#define LUSTRE_EACCES 13 /* Permission denied */ -#define LUSTRE_EFAULT 14 /* Bad address */ -#define LUSTRE_ENOTBLK 15 /* Block device required */ -#define LUSTRE_EBUSY 16 /* Device or resource busy */ -#define LUSTRE_EEXIST 17 /* File exists */ -#define LUSTRE_EXDEV 18 /* Cross-device link */ -#define LUSTRE_ENODEV 19 /* No such device */ -#define LUSTRE_ENOTDIR 20 /* Not a directory */ -#define LUSTRE_EISDIR 21 /* Is a directory */ -#define LUSTRE_EINVAL 22 /* Invalid argument */ -#define LUSTRE_ENFILE 23 /* File table overflow */ -#define LUSTRE_EMFILE 24 /* Too many open files */ -#define LUSTRE_ENOTTY 25 /* Not a typewriter */ -#define LUSTRE_ETXTBSY 26 /* Text file busy */ -#define LUSTRE_EFBIG 27 /* File too large */ -#define LUSTRE_ENOSPC 28 /* No space left on device */ -#define LUSTRE_ESPIPE 29 /* Illegal seek */ -#define LUSTRE_EROFS 30 /* Read-only file system */ -#define LUSTRE_EMLINK 31 /* Too many links */ -#define LUSTRE_EPIPE 32 /* Broken pipe */ -#define LUSTRE_EDOM 33 /* Math argument out of func domain */ -#define LUSTRE_ERANGE 34 /* Math result not representable */ -#define LUSTRE_EDEADLK 35 /* Resource deadlock would occur */ -#define LUSTRE_ENAMETOOLONG 36 /* File name too long */ -#define LUSTRE_ENOLCK 37 /* No record locks available */ -#define LUSTRE_ENOSYS 38 /* Function not implemented */ -#define LUSTRE_ENOTEMPTY 39 /* Directory not empty */ -#define LUSTRE_ELOOP 40 /* Too many symbolic links found */ -#define LUSTRE_ENOMSG 42 /* No message of desired type */ -#define LUSTRE_EIDRM 43 /* Identifier removed */ -#define LUSTRE_ECHRNG 44 /* Channel number out of range */ -#define LUSTRE_EL2NSYNC 45 /* Level 2 not synchronized */ -#define LUSTRE_EL3HLT 46 /* Level 3 halted */ -#define LUSTRE_EL3RST 47 /* Level 3 reset */ -#define LUSTRE_ELNRNG 48 /* Link number out of range */ -#define LUSTRE_EUNATCH 49 /* Protocol driver not attached */ -#define LUSTRE_ENOCSI 50 /* No CSI structure available */ -#define LUSTRE_EL2HLT 51 /* Level 2 halted */ -#define LUSTRE_EBADE 52 /* Invalid exchange */ -#define LUSTRE_EBADR 53 /* Invalid request descriptor */ -#define LUSTRE_EXFULL 54 /* Exchange full */ -#define LUSTRE_ENOANO 55 /* No anode */ -#define LUSTRE_EBADRQC 56 /* Invalid request code */ -#define LUSTRE_EBADSLT 57 /* Invalid slot */ -#define LUSTRE_EBFONT 59 /* Bad font file format */ -#define LUSTRE_ENOSTR 60 /* Device not a stream */ -#define LUSTRE_ENODATA 61 /* No data available */ -#define LUSTRE_ETIME 62 /* Timer expired */ -#define LUSTRE_ENOSR 63 /* Out of streams resources */ -#define LUSTRE_ENONET 64 /* Machine is not on the network */ -#define LUSTRE_ENOPKG 65 /* Package not installed */ -#define LUSTRE_EREMOTE 66 /* Object is remote */ -#define LUSTRE_ENOLINK 67 /* Link has been severed */ -#define LUSTRE_EADV 68 /* Advertise error */ -#define LUSTRE_ESRMNT 69 /* Srmount error */ -#define LUSTRE_ECOMM 70 /* Communication error on send */ -#define LUSTRE_EPROTO 71 /* Protocol error */ -#define LUSTRE_EMULTIHOP 72 /* Multihop attempted */ -#define LUSTRE_EDOTDOT 73 /* RFS specific error */ -#define LUSTRE_EBADMSG 74 /* Not a data message */ -#define LUSTRE_EOVERFLOW 75 /* Value too large for data type */ -#define LUSTRE_ENOTUNIQ 76 /* Name not unique on network */ -#define LUSTRE_EBADFD 77 /* File descriptor in bad state */ -#define LUSTRE_EREMCHG 78 /* Remote address changed */ -#define LUSTRE_ELIBACC 79 /* Can't access needed shared library */ -#define LUSTRE_ELIBBAD 80 /* Access corrupted shared library */ -#define LUSTRE_ELIBSCN 81 /* .lib section in a.out corrupted */ -#define LUSTRE_ELIBMAX 82 /* Trying to link too many libraries */ -#define LUSTRE_ELIBEXEC 83 /* Cannot exec a shared lib directly */ -#define LUSTRE_EILSEQ 84 /* Illegal byte sequence */ -#define LUSTRE_ERESTART 85 /* Restart interrupted system call */ -#define LUSTRE_ESTRPIPE 86 /* Streams pipe error */ -#define LUSTRE_EUSERS 87 /* Too many users */ -#define LUSTRE_ENOTSOCK 88 /* Socket operation on non-socket */ -#define LUSTRE_EDESTADDRREQ 89 /* Destination address required */ -#define LUSTRE_EMSGSIZE 90 /* Message too long */ -#define LUSTRE_EPROTOTYPE 91 /* Protocol wrong type for socket */ -#define LUSTRE_ENOPROTOOPT 92 /* Protocol not available */ -#define LUSTRE_EPROTONOSUPPORT 93 /* Protocol not supported */ -#define LUSTRE_ESOCKTNOSUPPORT 94 /* Socket type not supported */ -#define LUSTRE_EOPNOTSUPP 95 /* Operation not supported */ -#define LUSTRE_EPFNOSUPPORT 96 /* Protocol family not supported */ -#define LUSTRE_EAFNOSUPPORT 97 /* Address family not supported */ -#define LUSTRE_EADDRINUSE 98 /* Address already in use */ -#define LUSTRE_EADDRNOTAVAIL 99 /* Cannot assign requested address */ -#define LUSTRE_ENETDOWN 100 /* Network is down */ -#define LUSTRE_ENETUNREACH 101 /* Network is unreachable */ -#define LUSTRE_ENETRESET 102 /* Network connection drop for reset */ -#define LUSTRE_ECONNABORTED 103 /* Software caused connection abort */ -#define LUSTRE_ECONNRESET 104 /* Connection reset by peer */ -#define LUSTRE_ENOBUFS 105 /* No buffer space available */ -#define LUSTRE_EISCONN 106 /* Transport endpoint is connected */ -#define LUSTRE_ENOTCONN 107 /* Transport endpoint not connected */ -#define LUSTRE_ESHUTDOWN 108 /* Cannot send after shutdown */ -#define LUSTRE_ETOOMANYREFS 109 /* Too many references: cannot splice */ -#define LUSTRE_ETIMEDOUT 110 /* Connection timed out */ -#define LUSTRE_ECONNREFUSED 111 /* Connection refused */ -#define LUSTRE_EHOSTDOWN 112 /* Host is down */ -#define LUSTRE_EHOSTUNREACH 113 /* No route to host */ -#define LUSTRE_EALREADY 114 /* Operation already in progress */ -#define LUSTRE_EINPROGRESS 115 /* Operation now in progress */ -#define LUSTRE_ESTALE 116 /* Stale file handle */ -#define LUSTRE_EUCLEAN 117 /* Structure needs cleaning */ -#define LUSTRE_ENOTNAM 118 /* Not a XENIX named type file */ -#define LUSTRE_ENAVAIL 119 /* No XENIX semaphores available */ -#define LUSTRE_EISNAM 120 /* Is a named type file */ -#define LUSTRE_EREMOTEIO 121 /* Remote I/O error */ -#define LUSTRE_EDQUOT 122 /* Quota exceeded */ -#define LUSTRE_ENOMEDIUM 123 /* No medium found */ -#define LUSTRE_EMEDIUMTYPE 124 /* Wrong medium type */ -#define LUSTRE_ECANCELED 125 /* Operation Canceled */ -#define LUSTRE_ENOKEY 126 /* Required key not available */ -#define LUSTRE_EKEYEXPIRED 127 /* Key has expired */ -#define LUSTRE_EKEYREVOKED 128 /* Key has been revoked */ -#define LUSTRE_EKEYREJECTED 129 /* Key was rejected by service */ -#define LUSTRE_EOWNERDEAD 130 /* Owner died */ -#define LUSTRE_ENOTRECOVERABLE 131 /* State not recoverable */ -#define LUSTRE_ERESTARTSYS 512 -#define LUSTRE_ERESTARTNOINTR 513 -#define LUSTRE_ERESTARTNOHAND 514 /* restart if no handler.. */ -#define LUSTRE_ENOIOCTLCMD 515 /* No ioctl command */ -#define LUSTRE_ERESTART_RESTARTBLOCK 516 /* restart via sys_restart_syscall */ -#define LUSTRE_EBADHANDLE 521 /* Illegal NFS file handle */ -#define LUSTRE_ENOTSYNC 522 /* Update synchronization mismatch */ -#define LUSTRE_EBADCOOKIE 523 /* Cookie is stale */ -#define LUSTRE_ENOTSUPP 524 /* Operation is not supported */ -#define LUSTRE_ETOOSMALL 525 /* Buffer or request is too small */ -#define LUSTRE_ESERVERFAULT 526 /* An untranslatable error occurred */ -#define LUSTRE_EBADTYPE 527 /* Type not supported by server */ -#define LUSTRE_EJUKEBOX 528 /* Request won't finish until timeout */ -#define LUSTRE_EIOCBQUEUED 529 /* iocb queued await completion event */ -#define LUSTRE_EIOCBRETRY 530 /* iocb queued, will trigger a retry */ - -/* - * Translations are optimized away on x86. Host errnos that shouldn't be put - * on wire could leak through as a result. Do not count on this side effect. - */ -#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS -unsigned int lustre_errno_hton(unsigned int h); -unsigned int lustre_errno_ntoh(unsigned int n); -#else -#define lustre_errno_hton(h) (h) -#define lustre_errno_ntoh(n) (n) -#endif - -#endif /* LUSTRE_ERRNO_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_export.h b/drivers/staging/lustre/lustre/include/lustre_export.h deleted file mode 100644 index 79ad5aae86b9..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_export.h +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup obd_export PortalRPC export definitions - * - * @{ - */ - -#ifndef __EXPORT_H -#define __EXPORT_H - -/** \defgroup export export - * - * @{ - */ - -#include -#include -#include - -enum obd_option { - OBD_OPT_FORCE = 0x0001, - OBD_OPT_FAILOVER = 0x0002, - OBD_OPT_ABORT_RECOV = 0x0004, -}; - -/** - * Export structure. Represents target-side of connection in portals. - * Also used in Lustre to connect between layers on the same node when - * there is no network-connection in-between. - * For every connected client there is an export structure on the server - * attached to the same obd device. - */ -struct obd_export { - /** - * Export handle, it's id is provided to client on connect - * Subsequent client RPCs contain this handle id to identify - * what export they are talking to. - */ - struct portals_handle exp_handle; - atomic_t exp_refcount; - /** - * Set of counters below is to track where export references are - * kept. The exp_rpc_count is used for reconnect handling also, - * the cb_count and locks_count are for debug purposes only for now. - * The sum of them should be less than exp_refcount by 3 - */ - atomic_t exp_rpc_count; /* RPC references */ - atomic_t exp_cb_count; /* Commit callback references */ - /** Number of queued replay requests to be processes */ - atomic_t exp_replay_count; - atomic_t exp_locks_count; /** Lock references */ -#if LUSTRE_TRACKS_LOCK_EXP_REFS - struct list_head exp_locks_list; - spinlock_t exp_locks_list_guard; -#endif - /** UUID of client connected to this export */ - struct obd_uuid exp_client_uuid; - /** To link all exports on an obd device */ - struct list_head exp_obd_chain; - /** work_struct for destruction of export */ - struct work_struct exp_zombie_work; - struct rhash_head exp_uuid_hash; /** uuid-export hash*/ - /** Obd device of this export */ - struct obd_device *exp_obd; - /** - * "reverse" import to send requests (e.g. from ldlm) back to client - * exp_lock protect its change - */ - struct obd_import *exp_imp_reverse; - struct lprocfs_stats *exp_md_stats; - /** Active connection */ - struct ptlrpc_connection *exp_connection; - /** Connection count value from last successful reconnect rpc */ - __u32 exp_conn_cnt; - struct list_head exp_outstanding_replies; - struct list_head exp_uncommitted_replies; - spinlock_t exp_uncommitted_replies_lock; - /** Last committed transno for this export */ - __u64 exp_last_committed; - /** On replay all requests waiting for replay are linked here */ - struct list_head exp_req_replay_queue; - /** - * protects exp_flags, exp_outstanding_replies and the change - * of exp_imp_reverse - */ - spinlock_t exp_lock; - /** Compatibility flags for this export are embedded into - * exp_connect_data - */ - struct obd_connect_data exp_connect_data; - enum obd_option exp_flags; - unsigned long exp_failed:1, - exp_disconnected:1, - exp_connecting:1, - exp_flvr_changed:1, - exp_flvr_adapt:1; - /* also protected by exp_lock */ - enum lustre_sec_part exp_sp_peer; - struct sptlrpc_flavor exp_flvr; /* current */ - struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */ - time64_t exp_flvr_expire[2]; /* seconds */ - - /** protects exp_hp_rpcs */ - spinlock_t exp_rpc_lock; - struct list_head exp_hp_rpcs; /* (potential) HP RPCs */ - - /** blocking dlm lock list, protected by exp_bl_list_lock */ - struct list_head exp_bl_list; - spinlock_t exp_bl_list_lock; -}; - -static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp) -{ - return &exp->exp_connect_data.ocd_connect_flags; -} - -static inline __u64 exp_connect_flags(struct obd_export *exp) -{ - return *exp_connect_flags_ptr(exp); -} - -static inline int exp_max_brw_size(struct obd_export *exp) -{ - if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE) - return exp->exp_connect_data.ocd_brw_size; - - return ONE_MB_BRW_SIZE; -} - -static inline int exp_connect_multibulk(struct obd_export *exp) -{ - return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE; -} - -static inline int exp_connect_cancelset(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET); -} - -static inline int exp_connect_lru_resize(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE); -} - -static inline int exp_connect_vbr(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR); -} - -static inline int exp_connect_som(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_SOM); -} - -static inline int exp_connect_umask(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK); -} - -static inline int imp_connect_lru_resize(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE); -} - -static inline int exp_connect_layout(struct obd_export *exp) -{ - return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK); -} - -static inline bool exp_connect_lvb_type(struct obd_export *exp) -{ - if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE) - return true; - else - return false; -} - -static inline bool imp_connect_lvb_type(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE) - return true; - else - return false; -} - -static inline __u64 exp_connect_ibits(struct obd_export *exp) -{ - struct obd_connect_data *ocd; - - ocd = &exp->exp_connect_data; - return ocd->ocd_ibits_known; -} - -static inline bool imp_connect_disp_stripe(struct obd_import *imp) -{ - struct obd_connect_data *ocd; - - ocd = &imp->imp_connect_data; - return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE; -} - -struct obd_export *class_conn2export(struct lustre_handle *conn); - -#define KKUC_CT_DATA_MAGIC 0x092013cea -struct kkuc_ct_data { - __u32 kcd_magic; - struct obd_uuid kcd_uuid; - __u32 kcd_archive; -}; - -/** @} export */ - -#endif /* __EXPORT_H */ -/** @} obd_export */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h deleted file mode 100644 index 094ad282de2c..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_fid.h +++ /dev/null @@ -1,676 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_fid.h - * - * Author: Yury Umanets - */ - -#ifndef __LUSTRE_FID_H -#define __LUSTRE_FID_H - -/** \defgroup fid fid - * - * @{ - * - * http://wiki.lustre.org/index.php/Architecture_-_Interoperability_fids_zfs - * describes the FID namespace and interoperability requirements for FIDs. - * The important parts of that document are included here for reference. - * - * FID - * File IDentifier generated by client from range allocated by the SEQuence - * service and stored in struct lu_fid. The FID is composed of three parts: - * SEQuence, ObjectID, and VERsion. The SEQ component is a filesystem - * unique 64-bit integer, and only one client is ever assigned any SEQ value. - * The first 0x400 FID_SEQ_NORMAL [2^33, 2^33 + 0x400] values are reserved - * for system use. The OID component is a 32-bit value generated by the - * client on a per-SEQ basis to allow creating many unique FIDs without - * communication with the server. The VER component is a 32-bit value that - * distinguishes between different FID instantiations, such as snapshots or - * separate subtrees within the filesystem. FIDs with the same VER field - * are considered part of the same namespace. - * - * OLD filesystems are those upgraded from Lustre 1.x that predate FIDs, and - * MDTs use 32-bit ldiskfs internal inode/generation numbers (IGIFs), while - * OSTs use 64-bit Lustre object IDs and generation numbers. - * - * NEW filesystems are those formatted since the introduction of FIDs. - * - * IGIF - * Inode and Generation In FID, a surrogate FID used to globally identify - * an existing object on OLD formatted MDT file system. This would only be - * used on MDT0 in a DNE filesystem, because there cannot be more than one - * MDT in an OLD formatted filesystem. Belongs to sequence in [12, 2^32 - 1] - * range, where inode number is stored in SEQ, and inode generation is in OID. - * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem, - * which is the maximum possible for an ldiskfs backend. It also assumes - * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible - * to clients, which has always been true. - * - * IDIF - * object ID In FID, a surrogate FID used to globally identify an existing - * OST object on OLD formatted OST file system. Belongs to a sequence in - * [2^32, 2^33 - 1]. Sequence number is calculated as: - * - * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff) - * - * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object - * ID. The generation of unique SEQ values per OST allows the IDIF FIDs to - * be identified in the FLD correctly. The OID field is calculated as: - * - * objid & 0xffffffff - * - * that is, it consists of lower 32 bits of object ID. For objects within - * the IDIF range, object ID extraction will be: - * - * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid; - * o_seq = 0; // formerly group number - * - * NOTE: This assumes that no more than 2^48-1 objects have ever been created - * on any OST, and that no more than 65535 OSTs are in use. Both are very - * reasonable assumptions, i.e. an IDIF can uniquely map all objects assuming - * a maximum creation rate of 1M objects per second for a maximum of 9 years, - * or combinations thereof. - * - * OST_MDT0 - * Surrogate FID used to identify an existing object on OLD formatted OST - * filesystem. Belongs to the reserved SEQuence 0, and is used prior to - * the introduction of FID-on-OST, at which point IDIF will be used to - * identify objects as residing on a specific OST. - * - * LLOG - * For Lustre Log objects the object sequence 1 is used. This is compatible - * with both OLD and NEW namespaces, as this SEQ number is in the - * ext3/ldiskfs reserved inode range and does not conflict with IGIF - * sequence numbers. - * - * ECHO - * For testing OST IO performance the object sequence 2 is used. This is - * compatible with both OLD and NEW namespaces, as this SEQ number is in - * the ext3/ldiskfs reserved inode range and does not conflict with IGIF - * sequence numbers. - * - * OST_MDT1 .. OST_MAX - * For testing with multiple MDTs the object sequence 3 through 9 is used, - * allowing direct mapping of MDTs 1 through 7 respectively, for a total - * of 8 MDTs including OST_MDT0. This matches the legacy CMD project "group" - * mappings. However, this SEQ range is only for testing prior to any - * production DNE release, as the objects in this range conflict across all - * OSTs, as the OST index is not part of the FID. For production DNE usage, - * OST objects created by MDT1+ will use FID_SEQ_NORMAL FIDs. - * - * DLM OST objid to IDIF mapping - * For compatibility with existing OLD OST network protocol structures, the - * FID must map onto the o_id and o_seq in a manner that ensures existing - * objects are identified consistently for IO, as well as onto the LDLM - * namespace to ensure IDIFs there is only a single resource name for any - * object in the DLM. The OLD OST object DLM resource mapping is: - * - * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases - * - * The NEW OST object DLM resource mapping is the same for both MDT and OST: - * - * resource[] = {SEQ, OID, VER, HASH}; - * - * NOTE: for mapping IDIF values to DLM resource names the o_id may be - * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible - * for the o_id numbers to overlap FID SEQ numbers in the resource. However, - * in all production releases the OLD o_seq field is always zero, and all - * valid FID OID values are non-zero, so the lock resources will not collide. - * Even so, the MDT and OST resources are also in different LDLM namespaces. - */ - -#include -#include -#include -#include - -struct lu_env; -struct lu_site; -struct lu_context; -struct obd_device; -struct obd_export; - -/* Whole sequences space range and zero range definitions */ -extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE; -extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE; -extern const struct lu_fid LUSTRE_BFL_FID; -extern const struct lu_fid LU_OBF_FID; -extern const struct lu_fid LU_DOT_LUSTRE_FID; - -enum { - /* - * This is how may metadata FIDs may be allocated in one sequence(128k) - */ - LUSTRE_METADATA_SEQ_MAX_WIDTH = 0x0000000000020000ULL, - - /* - * This is how many data FIDs could be allocated in one sequence(4B - 1) - */ - LUSTRE_DATA_SEQ_MAX_WIDTH = 0x00000000FFFFFFFFULL, - - /* - * How many sequences to allocate to a client at once. - */ - LUSTRE_SEQ_META_WIDTH = 0x0000000000000001ULL, - - /* - * seq allocation pool size. - */ - LUSTRE_SEQ_BATCH_WIDTH = LUSTRE_SEQ_META_WIDTH * 1000, - - /* - * This is how many sequences may be in one super-sequence allocated to - * MDTs. - */ - LUSTRE_SEQ_SUPER_WIDTH = ((1ULL << 30ULL) * LUSTRE_SEQ_META_WIDTH) -}; - -enum { - /** 2^6 FIDs for OI containers */ - OSD_OI_FID_OID_BITS = 6, - /** reserve enough FIDs in case we want more in the future */ - OSD_OI_FID_OID_BITS_MAX = 10, -}; - -/** special OID for local objects */ -enum local_oid { - /** \see fld_mod_init */ - FLD_INDEX_OID = 3UL, - /** \see fid_mod_init */ - FID_SEQ_CTL_OID = 4UL, - FID_SEQ_SRV_OID = 5UL, - /** \see mdd_mod_init */ - MDD_ROOT_INDEX_OID = 6UL, /* deprecated in 2.4 */ - MDD_ORPHAN_OID = 7UL, /* deprecated in 2.4 */ - MDD_LOV_OBJ_OID = 8UL, - MDD_CAPA_KEYS_OID = 9UL, - /** \see mdt_mod_init */ - LAST_RECV_OID = 11UL, - OSD_FS_ROOT_OID = 13UL, - ACCT_USER_OID = 15UL, - ACCT_GROUP_OID = 16UL, - LFSCK_BOOKMARK_OID = 17UL, - OTABLE_IT_OID = 18UL, - /* These two definitions are obsolete - * OFD_GROUP0_LAST_OID = 20UL, - * OFD_GROUP4K_LAST_OID = 20UL+4096, - */ - OFD_LAST_GROUP_OID = 4117UL, - LLOG_CATALOGS_OID = 4118UL, - MGS_CONFIGS_OID = 4119UL, - OFD_HEALTH_CHECK_OID = 4120UL, - MDD_LOV_OBJ_OSEQ = 4121UL, - LFSCK_NAMESPACE_OID = 4122UL, - REMOTE_PARENT_DIR_OID = 4123UL, - SLAVE_LLOG_CATALOGS_OID = 4124UL, -}; - -static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid) -{ - fid->f_seq = FID_SEQ_LOCAL_FILE; - fid->f_oid = oid; - fid->f_ver = 0; -} - -static inline void lu_local_name_obj_fid(struct lu_fid *fid, __u32 oid) -{ - fid->f_seq = FID_SEQ_LOCAL_NAME; - fid->f_oid = oid; - fid->f_ver = 0; -} - -/* For new FS (>= 2.4), the root FID will be changed to - * [FID_SEQ_ROOT:1:0], for existing FS, (upgraded to 2.4), - * the root FID will still be IGIF - */ -static inline int fid_is_root(const struct lu_fid *fid) -{ - return unlikely((fid_seq(fid) == FID_SEQ_ROOT && - fid_oid(fid) == 1)); -} - -static inline int fid_is_dot_lustre(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && - fid_oid(fid) == FID_OID_DOT_LUSTRE); -} - -static inline int fid_is_obf(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_DOT_LUSTRE && - fid_oid(fid) == FID_OID_DOT_LUSTRE_OBF); -} - -static inline int fid_is_otable_it(const struct lu_fid *fid) -{ - return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE && - fid_oid(fid) == OTABLE_IT_OID); -} - -static inline int fid_is_acct(const struct lu_fid *fid) -{ - return fid_seq(fid) == FID_SEQ_LOCAL_FILE && - (fid_oid(fid) == ACCT_USER_OID || - fid_oid(fid) == ACCT_GROUP_OID); -} - -static inline int fid_is_quota(const struct lu_fid *fid) -{ - return fid_seq(fid) == FID_SEQ_QUOTA || - fid_seq(fid) == FID_SEQ_QUOTA_GLB; -} - -static inline int fid_seq_in_fldb(__u64 seq) -{ - return fid_seq_is_igif(seq) || fid_seq_is_norm(seq) || - fid_seq_is_root(seq) || fid_seq_is_dot(seq); -} - -static inline void lu_last_id_fid(struct lu_fid *fid, __u64 seq, __u32 ost_idx) -{ - if (fid_seq_is_mdt0(seq)) { - fid->f_seq = fid_idif_seq(0, ost_idx); - } else { - LASSERTF(fid_seq_is_norm(seq) || fid_seq_is_echo(seq) || - fid_seq_is_idif(seq), "%#llx\n", seq); - fid->f_seq = seq; - } - fid->f_oid = 0; - fid->f_ver = 0; -} - -/* seq client type */ -enum lu_cli_type { - LUSTRE_SEQ_METADATA = 1, - LUSTRE_SEQ_DATA -}; - -enum lu_mgr_type { - LUSTRE_SEQ_SERVER, - LUSTRE_SEQ_CONTROLLER -}; - -/* Client sequence manager interface. */ -struct lu_client_seq { - /* Sequence-controller export. */ - struct obd_export *lcs_exp; - spinlock_t lcs_lock; - - /* - * Range of allowed for allocation sequences. When using lu_client_seq on - * clients, this contains meta-sequence range. And for servers this - * contains super-sequence range. - */ - struct lu_seq_range lcs_space; - - /* Seq related proc */ - struct dentry *lcs_debugfs_entry; - - /* This holds last allocated fid in last obtained seq */ - struct lu_fid lcs_fid; - - /* LUSTRE_SEQ_METADATA or LUSTRE_SEQ_DATA */ - enum lu_cli_type lcs_type; - - /* - * Service uuid, passed from MDT + seq name to form unique seq name to - * use it with procfs. - */ - char lcs_name[LUSTRE_MDT_MAXNAMELEN]; - - /* - * Sequence width, that is how many objects may be allocated in one - * sequence. Default value for it is LUSTRE_SEQ_MAX_WIDTH. - */ - __u64 lcs_width; - - /* wait queue for fid allocation and update indicator */ - wait_queue_head_t lcs_waitq; - int lcs_update; -}; - -/* Client methods */ -void seq_client_flush(struct lu_client_seq *seq); - -int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, - struct lu_fid *fid); -/* Fids common stuff */ -int fid_is_local(const struct lu_env *env, - struct lu_site *site, const struct lu_fid *fid); - -enum lu_cli_type; -int client_fid_init(struct obd_device *obd, struct obd_export *exp, - enum lu_cli_type type); -int client_fid_fini(struct obd_device *obd); - -/* fid locking */ - -struct ldlm_namespace; - -/* - * Build (DLM) resource name from FID. - * - * NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2], - * but was moved into name[1] along with the OID to avoid consuming the - * renaming name[2,3] fields that need to be used for the quota identifier. - */ -static inline void -fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res) -{ - memset(res, 0, sizeof(*res)); - res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid); - res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid); -} - -/* - * Return true if resource is for object identified by FID. - */ -static inline bool fid_res_name_eq(const struct lu_fid *fid, - const struct ldlm_res_id *res) -{ - return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) && - res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid); -} - -/* - * Extract FID from LDLM resource. Reverse of fid_build_reg_res_name(). - */ -static inline void -fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res) -{ - fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF]; - fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]); - fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32); - LASSERT(fid_res_name_eq(fid, res)); -} - -/* - * Build (DLM) resource identifier from global quota FID and quota ID. - */ -static inline void -fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid, - struct ldlm_res_id *res) -{ - fid_build_reg_res_name(glb_fid, res); - res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid); - res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid); -} - -/* - * Extract global FID and quota ID from resource name - */ -static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid, - union lquota_id *qid, - const struct ldlm_res_id *res) -{ - fid_extract_from_res_name(glb_fid, res); - qid->qid_fid.f_seq = res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF]; - qid->qid_fid.f_oid = (__u32)res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF]; - qid->qid_fid.f_ver = - (__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32); -} - -static inline void -fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash, - struct ldlm_res_id *res) -{ - fid_build_reg_res_name(fid, res); - res->name[LUSTRE_RES_ID_HSH_OFF] = hash; -} - -/** - * Build DLM resource name from object id & seq, which will be removed - * finally, when we replace ost_id with FID in data stack. - * - * Currently, resid from the old client, whose res[0] = object_id, - * res[1] = object_seq, is just opposite with Metatdata - * resid, where, res[0] = fid->f_seq, res[1] = fid->f_oid. - * To unify the resid identification, we will reverse the data - * resid to keep it same with Metadata resid, i.e. - * - * For resid from the old client, - * res[0] = objid, res[1] = 0, still keep the original order, - * for compatibility. - * - * For new resid - * res will be built from normal FID directly, i.e. res[0] = f_seq, - * res[1] = f_oid + f_ver. - */ -static inline void ostid_build_res_name(const struct ost_id *oi, - struct ldlm_res_id *name) -{ - memset(name, 0, sizeof(*name)); - if (fid_seq_is_mdt0(ostid_seq(oi))) { - name->name[LUSTRE_RES_ID_SEQ_OFF] = ostid_id(oi); - name->name[LUSTRE_RES_ID_VER_OID_OFF] = ostid_seq(oi); - } else { - fid_build_reg_res_name(&oi->oi_fid, name); - } -} - -/** - * Return true if the resource is for the object identified by this id & group. - */ -static inline int ostid_res_name_eq(const struct ost_id *oi, - const struct ldlm_res_id *name) -{ - /* Note: it is just a trick here to save some effort, probably the - * correct way would be turn them into the FID and compare - */ - if (fid_seq_is_mdt0(ostid_seq(oi))) { - return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_id(oi) && - name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_seq(oi); - } else { - return name->name[LUSTRE_RES_ID_SEQ_OFF] == ostid_seq(oi) && - name->name[LUSTRE_RES_ID_VER_OID_OFF] == ostid_id(oi); - } -} - -/** - * Note: we need check oi_seq to decide where to set oi_id, - * so oi_seq should always be set ahead of oi_id. - */ -static inline int ostid_set_id(struct ost_id *oi, __u64 oid) -{ - if (fid_seq_is_mdt0(oi->oi.oi_seq)) { - if (oid >= IDIF_MAX_OID) - return -E2BIG; - oi->oi.oi_id = oid; - } else if (fid_is_idif(&oi->oi_fid)) { - if (oid >= IDIF_MAX_OID) - return -E2BIG; - oi->oi_fid.f_seq = fid_idif_seq(oid, - fid_idif_ost_idx(&oi->oi_fid)); - oi->oi_fid.f_oid = oid; - oi->oi_fid.f_ver = oid >> 48; - } else { - if (oid >= OBIF_MAX_OID) - return -E2BIG; - oi->oi_fid.f_oid = oid; - } - return 0; -} - -/* pack any OST FID into an ostid (id/seq) for the wire/disk */ -static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid) -{ - int rc = 0; - - if (fid_seq_is_igif(fid->f_seq)) - return -EBADF; - - if (fid_is_idif(fid)) { - u64 objid = fid_idif_id(fid_seq(fid), fid_oid(fid), - fid_ver(fid)); - - ostid_set_seq_mdt0(ostid); - rc = ostid_set_id(ostid, objid); - } else { - ostid->oi_fid = *fid; - } - - return rc; -} - -/* The same as osc_build_res_name() */ -static inline void ost_fid_build_resid(const struct lu_fid *fid, - struct ldlm_res_id *resname) -{ - if (fid_is_mdt0(fid) || fid_is_idif(fid)) { - struct ost_id oi; - - oi.oi.oi_id = 0; /* gcc 4.7.2 complains otherwise */ - if (fid_to_ostid(fid, &oi) != 0) - return; - ostid_build_res_name(&oi, resname); - } else { - fid_build_reg_res_name(fid, resname); - } -} - -/** - * Flatten 128-bit FID values into a 64-bit value for use as an inode number. - * For non-IGIF FIDs this starts just over 2^32, and continues without - * conflict until 2^64, at which point we wrap the high 24 bits of the SEQ - * into the range where there may not be many OID values in use, to minimize - * the risk of conflict. - * - * Suppose LUSTRE_SEQ_MAX_WIDTH less than (1 << 24) which is currently true, - * the time between re-used inode numbers is very long - 2^40 SEQ numbers, - * or about 2^40 client mounts, if clients create less than 2^24 files/mount. - */ -static inline __u64 fid_flatten(const struct lu_fid *fid) -{ - __u64 ino; - __u64 seq; - - if (fid_is_igif(fid)) { - ino = lu_igif_ino(fid); - return ino; - } - - seq = fid_seq(fid); - - ino = (seq << 24) + ((seq >> 24) & 0xffffff0000ULL) + fid_oid(fid); - - return ino ? ino : fid_oid(fid); -} - -static inline __u32 fid_hash(const struct lu_fid *f, int bits) -{ - /* all objects with same id and different versions will belong to same - * collisions list. - */ - return hash_long(fid_flatten(f), bits); -} - -/** - * map fid to 32 bit value for ino on 32bit systems. - */ -static inline __u32 fid_flatten32(const struct lu_fid *fid) -{ - __u32 ino; - __u64 seq; - - if (fid_is_igif(fid)) { - ino = lu_igif_ino(fid); - return ino; - } - - seq = fid_seq(fid) - FID_SEQ_START; - - /* Map the high bits of the OID into higher bits of the inode number so - * that inodes generated at about the same time have a reduced chance - * of collisions. This will give a period of 2^12 = 1024 unique clients - * (from SEQ) and up to min(LUSTRE_SEQ_MAX_WIDTH, 2^20) = 128k objects - * (from OID), or up to 128M inodes without collisions for new files. - */ - ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) + - (seq >> (64 - (40 - 8)) & 0xffffff00) + - (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8); - - return ino ? ino : fid_oid(fid); -} - -static inline int lu_fid_diff(const struct lu_fid *fid1, - const struct lu_fid *fid2) -{ - LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:" DFID ", fid2:" DFID "\n", - PFID(fid1), PFID(fid2)); - - if (fid_is_idif(fid1) && fid_is_idif(fid2)) - return fid_idif_id(fid1->f_seq, fid1->f_oid, fid1->f_ver) - - fid_idif_id(fid2->f_seq, fid2->f_oid, fid2->f_ver); - - return fid_oid(fid1) - fid_oid(fid2); -} - -#define LUSTRE_SEQ_SRV_NAME "seq_srv" -#define LUSTRE_SEQ_CTL_NAME "seq_ctl" - -/* Range common stuff */ -static inline void range_cpu_to_le(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = cpu_to_le64(src->lsr_start); - dst->lsr_end = cpu_to_le64(src->lsr_end); - dst->lsr_index = cpu_to_le32(src->lsr_index); - dst->lsr_flags = cpu_to_le32(src->lsr_flags); -} - -static inline void range_le_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = le64_to_cpu(src->lsr_start); - dst->lsr_end = le64_to_cpu(src->lsr_end); - dst->lsr_index = le32_to_cpu(src->lsr_index); - dst->lsr_flags = le32_to_cpu(src->lsr_flags); -} - -static inline void range_cpu_to_be(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = cpu_to_be64(src->lsr_start); - dst->lsr_end = cpu_to_be64(src->lsr_end); - dst->lsr_index = cpu_to_be32(src->lsr_index); - dst->lsr_flags = cpu_to_be32(src->lsr_flags); -} - -static inline void range_be_to_cpu(struct lu_seq_range *dst, const struct lu_seq_range *src) -{ - dst->lsr_start = be64_to_cpu(src->lsr_start); - dst->lsr_end = be64_to_cpu(src->lsr_end); - dst->lsr_index = be32_to_cpu(src->lsr_index); - dst->lsr_flags = be32_to_cpu(src->lsr_flags); -} - -/** @} fid */ - -#endif /* __LUSTRE_FID_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_fld.h b/drivers/staging/lustre/lustre/include/lustre_fld.h deleted file mode 100644 index f42122a4dfaa..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_fld.h +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LINUX_FLD_H -#define __LINUX_FLD_H - -/** \defgroup fld fld - * - * @{ - */ - -#include -#include - -struct lu_client_fld; -struct lu_server_fld; -struct lu_fld_hash; -struct fld_cache; - -extern const struct dt_index_features fld_index_features; -extern const char fld_index_name[]; - -/* - * FLD (Fid Location Database) interface. - */ -enum { - LUSTRE_CLI_FLD_HASH_DHT = 0, - LUSTRE_CLI_FLD_HASH_RRB -}; - -struct lu_fld_target { - struct list_head ft_chain; - struct obd_export *ft_exp; - struct lu_server_fld *ft_srv; - __u64 ft_idx; -}; - -struct lu_server_fld { - /** - * super sequence controller export, needed to forward fld - * lookup request. - */ - struct obd_export *lsf_control_exp; - - /** Client FLD cache. */ - struct fld_cache *lsf_cache; - - /** Protect index modifications */ - struct mutex lsf_lock; - - /** Fld service name in form "fld-srv-lustre-MDTXXX" */ - char lsf_name[LUSTRE_MDT_MAXNAMELEN]; - -}; - -struct lu_client_fld { - /** Client side debugfs entry. */ - struct dentry *lcf_debugfs_entry; - - /** List of exports client FLD knows about. */ - struct list_head lcf_targets; - - /** Current hash to be used to chose an export. */ - struct lu_fld_hash *lcf_hash; - - /** Exports count. */ - int lcf_count; - - /** Lock protecting exports list and fld_hash. */ - spinlock_t lcf_lock; - - /** Client FLD cache. */ - struct fld_cache *lcf_cache; - - /** Client fld debugfs entry name. */ - char lcf_name[LUSTRE_MDT_MAXNAMELEN]; -}; - -/* Client methods */ -int fld_client_init(struct lu_client_fld *fld, - const char *prefix, int hash); - -void fld_client_fini(struct lu_client_fld *fld); - -void fld_client_flush(struct lu_client_fld *fld); - -int fld_client_lookup(struct lu_client_fld *fld, u64 seq, u32 *mds, - __u32 flags, const struct lu_env *env); - -int fld_client_create(struct lu_client_fld *fld, - struct lu_seq_range *range, - const struct lu_env *env); - -int fld_client_delete(struct lu_client_fld *fld, u64 seq, - const struct lu_env *env); - -int fld_client_add_target(struct lu_client_fld *fld, - struct lu_fld_target *tar); - -int fld_client_del_target(struct lu_client_fld *fld, - __u64 idx); - -void fld_client_debugfs_fini(struct lu_client_fld *fld); - -/** @} fld */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_ha.h b/drivers/staging/lustre/lustre/include/lustre_ha.h deleted file mode 100644 index cbd68985ada9..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_ha.h +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_HA_H -#define _LUSTRE_HA_H - -/** \defgroup ha ha - * - * @{ - */ - -struct obd_import; -struct obd_export; -struct obd_device; -struct ptlrpc_request; - -int ptlrpc_replay(struct obd_import *imp); -int ptlrpc_resend(struct obd_import *imp); -void ptlrpc_free_committed(struct obd_import *imp); -void ptlrpc_wake_delayed(struct obd_import *imp); -int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async); -int ptlrpc_set_import_active(struct obd_import *imp, int active); -void ptlrpc_activate_import(struct obd_import *imp); -void ptlrpc_deactivate_import(struct obd_import *imp); -void ptlrpc_invalidate_import(struct obd_import *imp); -void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt); -void ptlrpc_pinger_force(struct obd_import *imp); - -/** @} ha */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h deleted file mode 100644 index 3556ce8d94e8..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_handles.h +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LUSTRE_HANDLES_H_ -#define __LUSTRE_HANDLES_H_ - -/** \defgroup handles handles - * - * @{ - */ - -#include -#include -#include -#include -#include - -struct portals_handle_ops { - void (*hop_addref)(void *object); - void (*hop_free)(void *object, int size); -}; - -/* These handles are most easily used by having them appear at the very top of - * whatever object that you want to make handles for. ie: - * - * struct ldlm_lock { - * struct portals_handle handle; - * ... - * }; - * - * Now you're able to assign the results of cookie2handle directly to an - * ldlm_lock. If it's not at the top, you'll want to use container_of() - * to compute the start of the structure based on the handle field. - */ -struct portals_handle { - struct list_head h_link; - __u64 h_cookie; - const void *h_owner; - struct portals_handle_ops *h_ops; - - /* newly added fields to handle the RCU issue. -jxiong */ - struct rcu_head h_rcu; - spinlock_t h_lock; - unsigned int h_size:31; - unsigned int h_in:1; -}; - -/* handles.c */ - -/* Add a handle to the hash table */ -void class_handle_hash(struct portals_handle *, - struct portals_handle_ops *ops); -void class_handle_unhash(struct portals_handle *); -void *class_handle2object(__u64 cookie, const void *owner); -void class_handle_free_cb(struct rcu_head *rcu); -int class_handle_init(void); -void class_handle_cleanup(void); - -/** @} handles */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h deleted file mode 100644 index ac3805ead620..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_import.h +++ /dev/null @@ -1,369 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup obd_import PtlRPC import definitions - * Imports are client-side representation of remote obd target. - * - * @{ - */ - -#ifndef __IMPORT_H -#define __IMPORT_H - -/** \defgroup export export - * - * @{ - */ - -#include -#include -#include - -/** - * Adaptive Timeout stuff - * - * @{ - */ -#define D_ADAPTTO D_OTHER -#define AT_BINS 4 /* "bin" means "N seconds of history" */ -#define AT_FLG_NOHIST 0x1 /* use last reported value only */ - -struct adaptive_timeout { - time64_t at_binstart; /* bin start time */ - unsigned int at_hist[AT_BINS]; /* timeout history bins */ - unsigned int at_flags; - unsigned int at_current; /* current timeout value */ - unsigned int at_worst_ever; /* worst-ever timeout value */ - time64_t at_worst_time; /* worst-ever timeout timestamp */ - spinlock_t at_lock; -}; - -struct ptlrpc_at_array { - struct list_head *paa_reqs_array; /** array to hold requests */ - __u32 paa_size; /** the size of array */ - __u32 paa_count; /** the total count of reqs */ - time64_t paa_deadline; /** the earliest deadline of reqs */ - __u32 *paa_reqs_count; /** the count of reqs in each entry */ -}; - -#define IMP_AT_MAX_PORTALS 8 -struct imp_at { - int iat_portal[IMP_AT_MAX_PORTALS]; - struct adaptive_timeout iat_net_latency; - struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS]; -}; - -/** @} */ - -/** Possible import states */ -enum lustre_imp_state { - LUSTRE_IMP_CLOSED = 1, - LUSTRE_IMP_NEW = 2, - LUSTRE_IMP_DISCON = 3, - LUSTRE_IMP_CONNECTING = 4, - LUSTRE_IMP_REPLAY = 5, - LUSTRE_IMP_REPLAY_LOCKS = 6, - LUSTRE_IMP_REPLAY_WAIT = 7, - LUSTRE_IMP_RECOVER = 8, - LUSTRE_IMP_FULL = 9, - LUSTRE_IMP_EVICTED = 10, -}; - -/** Returns test string representation of numeric import state \a state */ -static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) -{ - static char *import_state_names[] = { - "", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", - "RECOVER", "FULL", "EVICTED", - }; - - LASSERT(state <= LUSTRE_IMP_EVICTED); - return import_state_names[state]; -} - -/** - * List of import event types - */ -enum obd_import_event { - IMP_EVENT_DISCON = 0x808001, - IMP_EVENT_INACTIVE = 0x808002, - IMP_EVENT_INVALIDATE = 0x808003, - IMP_EVENT_ACTIVE = 0x808004, - IMP_EVENT_OCD = 0x808005, - IMP_EVENT_DEACTIVATE = 0x808006, - IMP_EVENT_ACTIVATE = 0x808007, -}; - -/** - * Definition of import connection structure - */ -struct obd_import_conn { - /** Item for linking connections together */ - struct list_head oic_item; - /** Pointer to actual PortalRPC connection */ - struct ptlrpc_connection *oic_conn; - /** uuid of remote side */ - struct obd_uuid oic_uuid; - /** - * Time (64 bit jiffies) of last connection attempt on this connection - */ - __u64 oic_last_attempt; -}; - -/* state history */ -#define IMP_STATE_HIST_LEN 16 -struct import_state_hist { - enum lustre_imp_state ish_state; - time64_t ish_time; -}; - -/** - * Definition of PortalRPC import structure. - * Imports are representing client-side view to remote target. - */ -struct obd_import { - /** Local handle (== id) for this import. */ - struct portals_handle imp_handle; - /** Reference counter */ - atomic_t imp_refcount; - struct lustre_handle imp_dlm_handle; /* client's ldlm export */ - /** Currently active connection */ - struct ptlrpc_connection *imp_connection; - /** PortalRPC client structure for this import */ - struct ptlrpc_client *imp_client; - /** List element for linking into pinger chain */ - struct list_head imp_pinger_chain; - /** work struct for destruction of import */ - struct work_struct imp_zombie_work; - - /** - * Lists of requests that are retained for replay, waiting for a reply, - * or waiting for recovery to complete, respectively. - * @{ - */ - struct list_head imp_replay_list; - struct list_head imp_sending_list; - struct list_head imp_delayed_list; - /** @} */ - - /** - * List of requests that are retained for committed open replay. Once - * open is committed, open replay request will be moved from the - * imp_replay_list into the imp_committed_list. - * The imp_replay_cursor is for accelerating searching during replay. - * @{ - */ - struct list_head imp_committed_list; - struct list_head *imp_replay_cursor; - /** @} */ - - /** List of not replied requests */ - struct list_head imp_unreplied_list; - /** Known maximal replied XID */ - __u64 imp_known_replied_xid; - - /** obd device for this import */ - struct obd_device *imp_obd; - - /** - * some seciruty-related fields - * @{ - */ - struct ptlrpc_sec *imp_sec; - struct mutex imp_sec_mutex; - time64_t imp_sec_expire; - /** @} */ - - /** Wait queue for those who need to wait for recovery completion */ - wait_queue_head_t imp_recovery_waitq; - - /** Number of requests currently in-flight */ - atomic_t imp_inflight; - /** Number of requests currently unregistering */ - atomic_t imp_unregistering; - /** Number of replay requests inflight */ - atomic_t imp_replay_inflight; - /** Number of currently happening import invalidations */ - atomic_t imp_inval_count; - /** Numbner of request timeouts */ - atomic_t imp_timeouts; - /** Current import state */ - enum lustre_imp_state imp_state; - /** Last replay state */ - enum lustre_imp_state imp_replay_state; - /** History of import states */ - struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; - int imp_state_hist_idx; - /** Current import generation. Incremented on every reconnect */ - int imp_generation; - /** Incremented every time we send reconnection request */ - __u32 imp_conn_cnt; - /** - * \see ptlrpc_free_committed remembers imp_generation value here - * after a check to save on unnecessary replay list iterations - */ - int imp_last_generation_checked; - /** Last transno we replayed */ - __u64 imp_last_replay_transno; - /** Last transno committed on remote side */ - __u64 imp_peer_committed_transno; - /** - * \see ptlrpc_free_committed remembers last_transno since its last - * check here and if last_transno did not change since last run of - * ptlrpc_free_committed and import generation is the same, we can - * skip looking for requests to remove from replay list as optimisation - */ - __u64 imp_last_transno_checked; - /** - * Remote export handle. This is how remote side knows what export - * we are talking to. Filled from response to connect request - */ - struct lustre_handle imp_remote_handle; - /** When to perform next ping. time in jiffies. */ - unsigned long imp_next_ping; - /** When we last successfully connected. time in 64bit jiffies */ - __u64 imp_last_success_conn; - - /** List of all possible connection for import. */ - struct list_head imp_conn_list; - /** - * Current connection. \a imp_connection is imp_conn_current->oic_conn - */ - struct obd_import_conn *imp_conn_current; - - /** Protects flags, level, generation, conn_cnt, *_list */ - spinlock_t imp_lock; - - /* flags */ - unsigned long imp_no_timeout:1, /* timeouts are disabled */ - imp_invalid:1, /* evicted */ - /* administratively disabled */ - imp_deactive:1, - /* try to recover the import */ - imp_replayable:1, - /* don't run recovery (timeout instead) */ - imp_dlm_fake:1, - /* use 1/2 timeout on MDS' OSCs */ - imp_server_timeout:1, - /* VBR: imp in delayed recovery */ - imp_delayed_recovery:1, - /* VBR: if gap was found then no lock replays - */ - imp_no_lock_replay:1, - /* recovery by versions was failed */ - imp_vbr_failed:1, - /* force an immediate ping */ - imp_force_verify:1, - /* force a scheduled ping */ - imp_force_next_verify:1, - /* pingable */ - imp_pingable:1, - /* resend for replay */ - imp_resend_replay:1, - /* disable normal recovery, for test only. */ - imp_no_pinger_recover:1, -#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE - /* need IR MNE swab */ - imp_need_mne_swab:1, -#endif - /* import must be reconnected instead of - * chosing new connection - */ - imp_force_reconnect:1, - /* import has tried to connect with server */ - imp_connect_tried:1, - /* connected but not FULL yet */ - imp_connected:1; - __u32 imp_connect_op; - struct obd_connect_data imp_connect_data; - __u64 imp_connect_flags_orig; - int imp_connect_error; - - __u32 imp_msg_magic; - __u32 imp_msghdr_flags; /* adjusted based on server capability */ - - struct imp_at imp_at; /* adaptive timeout data */ - time64_t imp_last_reply_time; /* for health check */ -}; - -/* import.c */ -static inline unsigned int at_est2timeout(unsigned int val) -{ - /* add an arbitrary minimum: 125% +5 sec */ - return (val + (val >> 2) + 5); -} - -static inline unsigned int at_timeout2est(unsigned int val) -{ - /* restore estimate value from timeout: e=4/5(t-5) */ - LASSERT(val); - return (max((val << 2) / 5, 5U) - 4); -} - -static inline void at_reset(struct adaptive_timeout *at, int val) -{ - spin_lock(&at->at_lock); - at->at_current = val; - at->at_worst_ever = val; - at->at_worst_time = ktime_get_real_seconds(); - spin_unlock(&at->at_lock); -} - -static inline void at_init(struct adaptive_timeout *at, int val, int flags) -{ - memset(at, 0, sizeof(*at)); - spin_lock_init(&at->at_lock); - at->at_flags = flags; - at_reset(at, val); -} - -extern unsigned int at_min; -static inline int at_get(struct adaptive_timeout *at) -{ - return (at->at_current > at_min) ? at->at_current : at_min; -} - -int at_measured(struct adaptive_timeout *at, unsigned int val); -int import_at_get_index(struct obd_import *imp, int portal); -extern unsigned int at_max; -#define AT_OFF (at_max == 0) - -/* genops.c */ -struct obd_export; -struct obd_import *class_exp2cliimp(struct obd_export *); - -/** @} import */ - -#endif /* __IMPORT_H */ - -/** @} obd_import */ diff --git a/drivers/staging/lustre/lustre/include/lustre_intent.h b/drivers/staging/lustre/lustre/include/lustre_intent.h deleted file mode 100644 index 51e5c0e03872..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_intent.h +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LUSTRE_INTENT_H -#define LUSTRE_INTENT_H - -#include - -/* intent IT_XXX are defined in lustre/include/obd.h */ - -struct lookup_intent { - int it_op; - int it_create_mode; - __u64 it_flags; - int it_disposition; - int it_status; - __u64 it_lock_handle; - __u64 it_lock_bits; - int it_lock_mode; - int it_remote_lock_mode; - __u64 it_remote_lock_handle; - struct ptlrpc_request *it_request; - unsigned int it_lock_set:1; -}; - -static inline int it_disposition(struct lookup_intent *it, int flag) -{ - return it->it_disposition & flag; -} - -static inline void it_set_disposition(struct lookup_intent *it, int flag) -{ - it->it_disposition |= flag; -} - -static inline void it_clear_disposition(struct lookup_intent *it, int flag) -{ - it->it_disposition &= ~flag; -} - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h b/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h deleted file mode 100644 index 2b3fa8430185..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_kernelcomm.h +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * - * Author: Nathan Rutman - * - * Kernel <-> userspace communication routines. - * The definitions below are used in the kernel and userspace. - */ - -#ifndef __LUSTRE_KERNELCOMM_H__ -#define __LUSTRE_KERNELCOMM_H__ - -/* For declarations shared with userspace */ -#include - -/* prototype for callback function on kuc groups */ -typedef int (*libcfs_kkuc_cb_t)(void *data, void *cb_arg); - -/* Kernel methods */ -int libcfs_kkuc_msg_put(struct file *fp, void *payload); -int libcfs_kkuc_group_put(unsigned int group, void *payload); -int libcfs_kkuc_group_add(struct file *fp, int uid, unsigned int group, - void *data, size_t data_len); -int libcfs_kkuc_group_rem(int uid, unsigned int group); -int libcfs_kkuc_group_foreach(unsigned int group, libcfs_kkuc_cb_t cb_func, - void *cb_arg); - -#endif /* __LUSTRE_KERNELCOMM_H__ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h deleted file mode 100644 index 87748e9902a7..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_lib.h - * - * Basic Lustre library routines. - */ - -#ifndef _LUSTRE_LIB_H -#define _LUSTRE_LIB_H - -/** \defgroup lib lib - * - * @{ - */ - -#include -#include -#include -#include -#include -#include - -/* target.c */ -struct ptlrpc_request; -struct obd_export; -struct lu_target; -struct l_wait_info; -#include -#include - -#define LI_POISON 0x5a5a5a5a -#if BITS_PER_LONG > 32 -# define LL_POISON 0x5a5a5a5a5a5a5a5aL -#else -# define LL_POISON 0x5a5a5a5aL -#endif -#define LP_POISON ((void *)LL_POISON) - -int target_pack_pool_reply(struct ptlrpc_request *req); -int do_set_info_async(struct obd_import *imp, - int opcode, int version, - u32 keylen, void *key, - u32 vallen, void *val, - struct ptlrpc_request_set *set); - -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); - -#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ - sigmask(SIGTERM) | sigmask(SIGQUIT) | \ - sigmask(SIGALRM)) -static inline int l_fatal_signal_pending(struct task_struct *p) -{ - return signal_pending(p) && sigtestsetmask(&p->pending.signal, LUSTRE_FATAL_SIGS); -} - -/** @} lib */ - - - -/* l_wait_event_abortable() is a bit like wait_event_killable() - * except there is a fixed set of signals which will abort: - * LUSTRE_FATAL_SIGS - */ -#define l_wait_event_abortable(wq, condition) \ -({ \ - sigset_t __new_blocked, __old_blocked; \ - int __ret = 0; \ - siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \ - sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \ - __ret = wait_event_interruptible(wq, condition); \ - sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \ - __ret; \ -}) - -#define l_wait_event_abortable_timeout(wq, condition, timeout) \ -({ \ - sigset_t __new_blocked, __old_blocked; \ - int __ret = 0; \ - siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \ - sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \ - __ret = wait_event_interruptible_timeout(wq, condition, timeout);\ - sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \ - __ret; \ -}) - -#define l_wait_event_abortable_exclusive(wq, condition) \ -({ \ - sigset_t __new_blocked, __old_blocked; \ - int __ret = 0; \ - siginitset(&__new_blocked, LUSTRE_FATAL_SIGS); \ - sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked); \ - __ret = wait_event_interruptible_exclusive(wq, condition); \ - sigprocmask(SIG_SETMASK, &__old_blocked, NULL); \ - __ret; \ -}) -#endif /* _LUSTRE_LIB_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h deleted file mode 100644 index 03db1511bfd3..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_linkea.h +++ /dev/null @@ -1,93 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2013, 2014, Intel Corporation. - * Use is subject to license terms. - * - * Author: di wang - */ - -/* There are several reasons to restrict the linkEA size: - * - * 1. Under DNE mode, if we do not restrict the linkEA size, and if there - * are too many cross-MDTs hard links to the same object, then it will - * casue the llog overflow. - * - * 2. Some backend has limited size for EA. For example, if without large - * EA enabled, the ldiskfs will make all EAs to share one (4K) EA block. - * - * 3. Too many entries in linkEA will seriously affect linkEA performance - * because we only support to locate linkEA entry consecutively. - */ -#define MAX_LINKEA_SIZE 4096 - -struct linkea_data { - /** - * Buffer to keep link EA body. - */ - struct lu_buf *ld_buf; - /** - * The matched header, entry and its length in the EA - */ - struct link_ea_header *ld_leh; - struct link_ea_entry *ld_lee; - int ld_reclen; -}; - -int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf); -int linkea_init(struct linkea_data *ldata); -int linkea_init_with_rec(struct linkea_data *ldata); -void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen, - struct lu_name *lname, struct lu_fid *pfid); -int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname, - const struct lu_fid *pfid); -int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid); -void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname); -int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid); - -static inline void linkea_first_entry(struct linkea_data *ldata) -{ - LASSERT(ldata); - LASSERT(ldata->ld_leh); - - if (ldata->ld_leh->leh_reccount == 0) - ldata->ld_lee = NULL; - else - ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1); -} - -static inline void linkea_next_entry(struct linkea_data *ldata) -{ - LASSERT(ldata); - LASSERT(ldata->ld_leh); - - if (ldata->ld_lee) { - ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee + - ldata->ld_reclen); - if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh + - ldata->ld_leh->leh_len)) - ldata->ld_lee = NULL; - } -} diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h deleted file mode 100644 index 080ec1f8e19f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_lmv.h +++ /dev/null @@ -1,174 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. A copy is - * included in the COPYING file that accompanied this code. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * GPL HEADER END - */ -/* - * Copyright (c) 2013, Intel Corporation. - */ -/* - * lustre/include/lustre_lmv.h - * - * Lustre LMV structures and functions. - * - * Author: Di Wang - */ - -#ifndef _LUSTRE_LMV_H -#define _LUSTRE_LMV_H -#include - -struct lmv_oinfo { - struct lu_fid lmo_fid; - u32 lmo_mds; - struct inode *lmo_root; -}; - -struct lmv_stripe_md { - __u32 lsm_md_magic; - __u32 lsm_md_stripe_count; - __u32 lsm_md_master_mdt_index; - __u32 lsm_md_hash_type; - __u32 lsm_md_layout_version; - __u32 lsm_md_default_count; - __u32 lsm_md_default_index; - char lsm_md_pool_name[LOV_MAXPOOLNAME + 1]; - struct lmv_oinfo lsm_md_oinfo[0]; -}; - -static inline bool -lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) -{ - __u32 idx; - - if (lsm1->lsm_md_magic != lsm2->lsm_md_magic || - lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count || - lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index || - lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type || - lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version || - strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name) != 0) - return false; - - for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { - if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, - &lsm2->lsm_md_oinfo[idx].lmo_fid)) - return false; - } - - return true; -} - -union lmv_mds_md; - -void lmv_free_memmd(struct lmv_stripe_md *lsm); - -static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst, - const struct lmv_mds_md_v1 *lmv_src) -{ - __u32 i; - - lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic); - lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count); - lmv_dst->lmv_master_mdt_index = - le32_to_cpu(lmv_src->lmv_master_mdt_index); - lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type); - lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version); - - for (i = 0; i < lmv_src->lmv_stripe_count; i++) - fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i], - &lmv_src->lmv_stripe_fids[i]); -} - -static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst, - const union lmv_mds_md *lmv_src) -{ - switch (le32_to_cpu(lmv_src->lmv_magic)) { - case LMV_MAGIC_V1: - lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1); - break; - default: - break; - } -} - -/* This hash is only for testing purpose */ -static inline unsigned int -lmv_hash_all_chars(unsigned int count, const char *name, int namelen) -{ - const unsigned char *p = (const unsigned char *)name; - unsigned int c = 0; - - while (--namelen >= 0) - c += p[namelen]; - - c = c % count; - - return c; -} - -static inline unsigned int -lmv_hash_fnv1a(unsigned int count, const char *name, int namelen) -{ - __u64 hash; - - hash = lustre_hash_fnv_1a_64(name, namelen); - - return do_div(hash, count); -} - -static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type, - unsigned int stripe_count, - const char *name, int namelen) -{ - __u32 hash_type = lmv_hash_type & LMV_HASH_TYPE_MASK; - int idx; - - LASSERT(namelen > 0); - if (stripe_count <= 1) - return 0; - - /* for migrating object, always start from 0 stripe */ - if (lmv_hash_type & LMV_HASH_FLAG_MIGRATION) - return 0; - - switch (hash_type) { - case LMV_HASH_TYPE_ALL_CHARS: - idx = lmv_hash_all_chars(stripe_count, name, namelen); - break; - case LMV_HASH_TYPE_FNV_1A_64: - idx = lmv_hash_fnv1a(stripe_count, name, namelen); - break; - default: - idx = -EBADFD; - break; - } - CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name, - hash_type, idx); - - return idx; -} - -static inline bool lmv_is_known_hash_type(__u32 type) -{ - return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 || - (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS; -} - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h deleted file mode 100644 index 07f4e600386b..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_log.h +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_log.h - * - * Generic infrastructure for managing a collection of logs. - * These logs are used for: - * - * - orphan recovery: OST adds record on create - * - mtime/size consistency: the OST adds a record on first write - * - open/unlinked objects: OST adds a record on destroy - * - * - mds unlink log: the MDS adds an entry upon delete - * - * - raid1 replication log between OST's - * - MDS replication logs - */ - -#ifndef _LUSTRE_LOG_H -#define _LUSTRE_LOG_H - -/** \defgroup log log - * - * @{ - */ - -#include -#include - -#define LOG_NAME_LIMIT(logname, name) \ - snprintf(logname, sizeof(logname), "LOGS/%s", name) -#define LLOG_EEMPTY 4711 - -enum llog_open_param { - LLOG_OPEN_EXISTS = 0x0000, - LLOG_OPEN_NEW = 0x0001, -}; - -struct plain_handle_data { - struct list_head phd_entry; - struct llog_handle *phd_cat_handle; - struct llog_cookie phd_cookie; /* cookie of this log in its cat */ -}; - -struct cat_handle_data { - struct list_head chd_head; - struct llog_handle *chd_current_log; /* currently open log */ - struct llog_handle *chd_next_log; /* llog to be used next */ -}; - -struct llog_handle; - -/* llog.c - general API */ -int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, - int flags, struct obd_uuid *uuid); -int llog_process(const struct lu_env *env, struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata); -int llog_process_or_fork(const struct lu_env *env, - struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata, bool fork); -int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt, - struct llog_handle **lgh, struct llog_logid *logid, - char *name, enum llog_open_param open_param); -int llog_close(const struct lu_env *env, struct llog_handle *cathandle); - -/* llog_process flags */ -#define LLOG_FLAG_NODEAMON 0x0001 - -/* llog_cat.c - catalog api */ -struct llog_process_data { - /** - * Any useful data needed while processing catalog. This is - * passed later to process callback. - */ - void *lpd_data; - /** - * Catalog process callback function, called for each record - * in catalog. - */ - llog_cb_t lpd_cb; - /** - * Start processing the catalog from startcat/startidx - */ - int lpd_startcat; - int lpd_startidx; -}; - -struct llog_process_cat_data { - /** - * Temporary stored first_idx while scanning log. - */ - int lpcd_first_idx; - /** - * Temporary stored last_idx while scanning log. - */ - int lpcd_last_idx; -}; - -struct thandle; - -int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle); -int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh, - llog_cb_t cb, void *data, int startcat, int startidx); - -/* llog_obd.c */ -int llog_setup(const struct lu_env *env, struct obd_device *obd, - struct obd_llog_group *olg, int index, - struct obd_device *disk_obd, struct llog_operations *op); -int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt); -int llog_cleanup(const struct lu_env *env, struct llog_ctxt *); - -/* llog_net.c */ -int llog_initiator_connect(struct llog_ctxt *ctxt); - -struct llog_operations { - int (*lop_next_block)(const struct lu_env *env, struct llog_handle *h, - int *curr_idx, int next_idx, __u64 *offset, - void *buf, int len); - int (*lop_prev_block)(const struct lu_env *env, struct llog_handle *h, - int prev_idx, void *buf, int len); - int (*lop_read_header)(const struct lu_env *env, - struct llog_handle *handle); - int (*lop_setup)(const struct lu_env *env, struct obd_device *obd, - struct obd_llog_group *olg, int ctxt_idx, - struct obd_device *disk_obd); - int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp, - int flags); - int (*lop_cleanup)(const struct lu_env *env, struct llog_ctxt *ctxt); - int (*lop_cancel)(const struct lu_env *env, struct llog_ctxt *ctxt, - struct llog_cookie *cookies, int flags); - int (*lop_connect)(struct llog_ctxt *ctxt, struct llog_logid *logid, - struct llog_gen *gen, struct obd_uuid *uuid); - /** - * Any llog file must be opened first using llog_open(). Llog can be - * opened by name, logid or without both, in last case the new logid - * will be generated. - */ - int (*lop_open)(const struct lu_env *env, struct llog_handle *lgh, - struct llog_logid *logid, char *name, - enum llog_open_param); - /** - * Opened llog may not exist and this must be checked where needed using - * the llog_exist() call. - */ - int (*lop_exist)(struct llog_handle *lgh); - /** - * Close llog file and calls llog_free_handle() implicitly. - * Any opened llog must be closed by llog_close() call. - */ - int (*lop_close)(const struct lu_env *env, struct llog_handle *handle); - /** - * Create new llog file. The llog must be opened. - * Must be used only for local llog operations. - */ - int (*lop_declare_create)(const struct lu_env *env, - struct llog_handle *handle, - struct thandle *th); - /** - * write new record in llog. It appends records usually but can edit - * existing records too. - */ - int (*lop_declare_write_rec)(const struct lu_env *env, - struct llog_handle *lgh, - struct llog_rec_hdr *rec, - int idx, struct thandle *th); - int (*lop_write_rec)(const struct lu_env *env, - struct llog_handle *loghandle, - struct llog_rec_hdr *rec, - struct llog_cookie *cookie, int cookiecount, - void *buf, int idx, struct thandle *th); - /** - * Add new record in llog catalog. Does the same as llog_write_rec() - * but using llog catalog. - */ - int (*lop_declare_add)(const struct lu_env *env, - struct llog_handle *lgh, - struct llog_rec_hdr *rec, struct thandle *th); - int (*lop_add)(const struct lu_env *env, struct llog_handle *lgh, - struct llog_rec_hdr *rec, struct llog_cookie *cookie, - void *buf, struct thandle *th); -}; - -/* In-memory descriptor for a log object or log catalog */ -struct llog_handle { - struct rw_semaphore lgh_lock; - spinlock_t lgh_hdr_lock; /* protect lgh_hdr data */ - struct llog_logid lgh_id; /* id of this log */ - struct llog_log_hdr *lgh_hdr; - size_t lgh_hdr_size; - int lgh_last_idx; - int lgh_cur_idx; /* used during llog_process */ - __u64 lgh_cur_offset; /* used during llog_process */ - struct llog_ctxt *lgh_ctxt; - union { - struct plain_handle_data phd; - struct cat_handle_data chd; - } u; - char *lgh_name; - void *private_data; - struct llog_operations *lgh_logops; - atomic_t lgh_refcount; -}; - -#define LLOG_CTXT_FLAG_UNINITIALIZED 0x00000001 -#define LLOG_CTXT_FLAG_STOP 0x00000002 - -struct llog_ctxt { - int loc_idx; /* my index the obd array of ctxt's */ - struct obd_device *loc_obd; /* points back to the containing obd*/ - struct obd_llog_group *loc_olg; /* group containing that ctxt */ - struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */ - struct obd_import *loc_imp; /* to use in RPC's: can be backward - * pointing import - */ - struct llog_operations *loc_logops; - struct llog_handle *loc_handle; - struct mutex loc_mutex; /* protect loc_imp */ - atomic_t loc_refcount; - long loc_flags; /* flags, see above defines */ - /* - * llog chunk size, and llog record size can not be bigger than - * loc_chunk_size - */ - __u32 loc_chunk_size; -}; - -#define LLOG_PROC_BREAK 0x0001 -#define LLOG_DEL_RECORD 0x0002 - -static inline int llog_handle2ops(struct llog_handle *loghandle, - struct llog_operations **lop) -{ - if (!loghandle || !loghandle->lgh_logops) - return -EINVAL; - - *lop = loghandle->lgh_logops; - return 0; -} - -static inline struct llog_ctxt *llog_ctxt_get(struct llog_ctxt *ctxt) -{ - atomic_inc(&ctxt->loc_refcount); - CDEBUG(D_INFO, "GETting ctxt %p : new refcount %d\n", ctxt, - atomic_read(&ctxt->loc_refcount)); - return ctxt; -} - -static inline void llog_ctxt_put(struct llog_ctxt *ctxt) -{ - if (!ctxt) - return; - LASSERT_ATOMIC_GT_LT(&ctxt->loc_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "PUTting ctxt %p : new refcount %d\n", ctxt, - atomic_read(&ctxt->loc_refcount) - 1); - __llog_ctxt_put(NULL, ctxt); -} - -static inline void llog_group_init(struct obd_llog_group *olg) -{ - init_waitqueue_head(&olg->olg_waitq); - spin_lock_init(&olg->olg_lock); - mutex_init(&olg->olg_cat_processing); -} - -static inline int llog_group_set_ctxt(struct obd_llog_group *olg, - struct llog_ctxt *ctxt, int index) -{ - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - - spin_lock(&olg->olg_lock); - if (olg->olg_ctxts[index]) { - spin_unlock(&olg->olg_lock); - return -EEXIST; - } - olg->olg_ctxts[index] = ctxt; - spin_unlock(&olg->olg_lock); - return 0; -} - -static inline struct llog_ctxt *llog_group_get_ctxt(struct obd_llog_group *olg, - int index) -{ - struct llog_ctxt *ctxt; - - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - - spin_lock(&olg->olg_lock); - if (!olg->olg_ctxts[index]) - ctxt = NULL; - else - ctxt = llog_ctxt_get(olg->olg_ctxts[index]); - spin_unlock(&olg->olg_lock); - return ctxt; -} - -static inline void llog_group_clear_ctxt(struct obd_llog_group *olg, int index) -{ - LASSERT(index >= 0 && index < LLOG_MAX_CTXTS); - spin_lock(&olg->olg_lock); - olg->olg_ctxts[index] = NULL; - spin_unlock(&olg->olg_lock); -} - -static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, - int index) -{ - return llog_group_get_ctxt(&obd->obd_olg, index); -} - -static inline int llog_group_ctxt_null(struct obd_llog_group *olg, int index) -{ - return (!olg->olg_ctxts[index]); -} - -static inline int llog_ctxt_null(struct obd_device *obd, int index) -{ - return llog_group_ctxt_null(&obd->obd_olg, index); -} - -static inline int llog_next_block(const struct lu_env *env, - struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, - int len) -{ - struct llog_operations *lop; - int rc; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - return rc; - if (!lop->lop_next_block) - return -EOPNOTSUPP; - - rc = lop->lop_next_block(env, loghandle, cur_idx, next_idx, - cur_offset, buf, len); - return rc; -} - -/* llog.c */ -int llog_declare_write_rec(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, int idx, - struct thandle *th); -int llog_write_rec(const struct lu_env *env, struct llog_handle *handle, - struct llog_rec_hdr *rec, struct llog_cookie *logcookies, - int numcookies, void *buf, int idx, struct thandle *th); -int lustre_process_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); -/** @} log */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h deleted file mode 100644 index a9c9992a2502..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_mdc.h +++ /dev/null @@ -1,229 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_mdc.h - * - * MDS data structures. - * See also lustre_idl.h for wire formats of requests. - */ - -#ifndef _LUSTRE_MDC_H -#define _LUSTRE_MDC_H - -/** \defgroup mdc mdc - * - * @{ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct ptlrpc_client; -struct obd_export; -struct ptlrpc_request; -struct obd_device; - -/** - * Serializes in-flight MDT-modifying RPC requests to preserve idempotency. - * - * This mutex is used to implement execute-once semantics on the MDT. - * The MDT stores the last transaction ID and result for every client in - * its last_rcvd file. If the client doesn't get a reply, it can safely - * resend the request and the MDT will reconstruct the reply being aware - * that the request has already been executed. Without this lock, - * execution status of concurrent in-flight requests would be - * overwritten. - * - * This design limits the extent to which we can keep a full pipeline of - * in-flight requests from a single client. This limitation could be - * overcome by allowing multiple slots per client in the last_rcvd file. - */ -struct mdc_rpc_lock { - /** Lock protecting in-flight RPC concurrency. */ - struct mutex rpcl_mutex; - /** Intent associated with currently executing request. */ - struct lookup_intent *rpcl_it; - /** Used for MDS/RPC load testing purposes. */ - int rpcl_fakes; -}; - -#define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL) - -static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) -{ - mutex_init(&lck->rpcl_mutex); - lck->rpcl_it = NULL; -} - -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return; - - /* This would normally block until the existing request finishes. - * If fail_loc is set it will block until the regular request is - * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set - * it will only be cleared when all fake requests are finished. - * Only when all fake requests are finished can normal requests - * be sent, to ensure they are recoverable again. - */ - again: - mutex_lock(&lck->rpcl_mutex); - - if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) { - lck->rpcl_it = MDC_FAKE_RPCL_IT; - lck->rpcl_fakes++; - mutex_unlock(&lck->rpcl_mutex); - return; - } - - /* This will only happen when the CFS_FAIL_CHECK() was - * just turned off but there are still requests in progress. - * Wait until they finish. It doesn't need to be efficient - * in this extremely rare case, just have low overhead in - * the common case when it isn't true. - */ - while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) { - mutex_unlock(&lck->rpcl_mutex); - schedule_timeout(HZ / 4); - goto again; - } - - LASSERT(!lck->rpcl_it); - lck->rpcl_it = it; -} - -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return; - - if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */ - mutex_lock(&lck->rpcl_mutex); - - LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes); - lck->rpcl_fakes--; - - if (lck->rpcl_fakes == 0) - lck->rpcl_it = NULL; - - } else { - LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it); - lck->rpcl_it = NULL; - } - - mutex_unlock(&lck->rpcl_mutex); -} - -static inline void mdc_get_mod_rpc_slot(struct ptlrpc_request *req, - struct lookup_intent *it) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - u32 opc; - u16 tag; - - opc = lustre_msg_get_opc(req->rq_reqmsg); - tag = obd_get_mod_rpc_slot(cli, opc, it); - lustre_msg_set_tag(req->rq_reqmsg, tag); -} - -static inline void mdc_put_mod_rpc_slot(struct ptlrpc_request *req, - struct lookup_intent *it) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - u32 opc; - u16 tag; - - opc = lustre_msg_get_opc(req->rq_reqmsg); - tag = lustre_msg_get_tag(req->rq_reqmsg); - obd_put_mod_rpc_slot(cli, opc, it, tag); -} - -/** - * Update the maximum possible easize. - * - * This value is learned from ptlrpc replies sent by the MDT. The - * default easize is initialized to the minimum value but allowed - * to grow up to a single page in size if required to handle the - * common case. - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] exp export for MDC device - * \param[in] body body of ptlrpc reply from MDT - * - */ -static inline void mdc_update_max_ea_from_body(struct obd_export *exp, - struct mdt_body *body) -{ - if (body->mbo_valid & OBD_MD_FLMODEASIZE) { - struct client_obd *cli = &exp->exp_obd->u.cli; - u32 def_easize; - - if (cli->cl_max_mds_easize < body->mbo_max_mdsize) - cli->cl_max_mds_easize = body->mbo_max_mdsize; - - def_easize = min_t(__u32, body->mbo_max_mdsize, - OBD_MAX_DEFAULT_EA_SIZE); - cli->cl_default_mds_easize = def_easize; - } -} - -/* mdc/mdc_locks.c */ -int it_open_error(int phase, struct lookup_intent *it); - -static inline bool cl_is_lov_delay_create(unsigned int flags) -{ - return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE; -} - -static inline void cl_lov_delay_create_clear(unsigned int *flags) -{ - if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE) - *flags &= ~O_LOV_DELAY_CREATE; -} - -/** @} mdc */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h deleted file mode 100644 index f665556556ec..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_mds.h +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_mds.h - * - * MDS data structures. - * See also lustre_idl.h for wire formats of requests. - */ - -#ifndef _LUSTRE_MDS_H -#define _LUSTRE_MDS_H - -/** \defgroup mds mds - * - * @{ - */ - -#include -#include -#include -#include - -struct mds_group_info { - struct obd_uuid *uuid; - int group; -}; - -#define MDD_OBD_NAME "mdd_obd" -#define MDD_OBD_UUID "mdd_obd_uuid" - -/** @} mds */ - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h deleted file mode 100644 index 35b43a77eb18..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ /dev/null @@ -1,2360 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** \defgroup PtlRPC Portal RPC and networking module. - * - * PortalRPC is the layer used by rest of lustre code to achieve network - * communications: establish connections with corresponding export and import - * states, listen for a service, send and receive RPCs. - * PortalRPC also includes base recovery framework: packet resending and - * replaying, reconnections, pinger. - * - * PortalRPC utilizes LNet as its transport layer. - * - * @{ - */ - -#ifndef _LUSTRE_NET_H -#define _LUSTRE_NET_H - -/** \defgroup net net - * - * @{ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -/* MD flags we _always_ use */ -#define PTLRPC_MD_OPTIONS 0 - -/** - * log2 max # of bulk operations in one request: 2=4MB/RPC, 5=32MB/RPC, ... - * In order for the client and server to properly negotiate the maximum - * possible transfer size, PTLRPC_BULK_OPS_COUNT must be a power-of-two - * value. The client is free to limit the actual RPC size for any bulk - * transfer via cl_max_pages_per_rpc to some non-power-of-two value. - * NOTE: This is limited to 16 (=64GB RPCs) by IOOBJ_MAX_BRW_BITS. - */ -#define PTLRPC_BULK_OPS_BITS 4 -#if PTLRPC_BULK_OPS_BITS > 16 -#error "More than 65536 BRW RPCs not allowed by IOOBJ_MAX_BRW_BITS." -#endif -#define PTLRPC_BULK_OPS_COUNT (1U << PTLRPC_BULK_OPS_BITS) -/** - * PTLRPC_BULK_OPS_MASK is for the convenience of the client only, and - * should not be used on the server at all. Otherwise, it imposes a - * protocol limitation on the maximum RPC size that can be used by any - * RPC sent to that server in the future. Instead, the server should - * use the negotiated per-client ocd_brw_size to determine the bulk - * RPC count. - */ -#define PTLRPC_BULK_OPS_MASK (~((__u64)PTLRPC_BULK_OPS_COUNT - 1)) - -/** - * Define maxima for bulk I/O. - * - * A single PTLRPC BRW request is sent via up to PTLRPC_BULK_OPS_COUNT - * of LNET_MTU sized RDMA transfers. Clients and servers negotiate the - * currently supported maximum between peers at connect via ocd_brw_size. - */ -#define PTLRPC_MAX_BRW_BITS (LNET_MTU_BITS + PTLRPC_BULK_OPS_BITS) -#define PTLRPC_MAX_BRW_SIZE (1 << PTLRPC_MAX_BRW_BITS) -#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE >> PAGE_SHIFT) - -#define ONE_MB_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) -#define MD_MAX_BRW_PAGES (MD_MAX_BRW_SIZE >> PAGE_SHIFT) -#define DT_MAX_BRW_SIZE PTLRPC_MAX_BRW_SIZE -#define DT_MAX_BRW_PAGES (DT_MAX_BRW_SIZE >> PAGE_SHIFT) -#define OFD_MAX_BRW_SIZE (1 << LNET_MTU_BITS) - -/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ -# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) -# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" -# endif -# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE)) -# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE" -# endif -# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_SIZE too big" -# endif -# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV * PTLRPC_BULK_OPS_COUNT) -# error "PTLRPC_MAX_BRW_PAGES too big" -# endif - -#define PTLRPC_NTHRS_INIT 2 - -/** - * Buffer Constants - * - * Constants determine how memory is used to buffer incoming service requests. - * - * ?_NBUFS # buffers to allocate when growing the pool - * ?_BUFSIZE # bytes in a single request buffer - * ?_MAXREQSIZE # maximum request service will receive - * - * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk - * of ?_NBUFS is added to the pool. - * - * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are - * considered full when less than ?_MAXREQSIZE is left in them. - */ -/** - * Thread Constants - * - * Constants determine how threads are created for ptlrpc service. - * - * ?_NTHRS_INIT # threads to create for each service partition on - * initializing. If it's non-affinity service and - * there is only one partition, it's the overall # - * threads for the service while initializing. - * ?_NTHRS_BASE # threads should be created at least for each - * ptlrpc partition to keep the service healthy. - * It's the low-water mark of threads upper-limit - * for each partition. - * ?_THR_FACTOR # threads can be added on threads upper-limit for - * each CPU core. This factor is only for reference, - * we might decrease value of factor if number of cores - * per CPT is above a limit. - * ?_NTHRS_MAX # overall threads can be created for a service, - * it's a soft limit because if service is running - * on machine with hundreds of cores and tens of - * CPU partitions, we need to guarantee each partition - * has ?_NTHRS_BASE threads, which means total threads - * will be ?_NTHRS_BASE * number_of_cpts which can - * exceed ?_NTHRS_MAX. - * - * Examples - * - * #define MDS_NTHRS_INIT 2 - * #define MDS_NTHRS_BASE 64 - * #define MDS_NTHRS_FACTOR 8 - * #define MDS_NTHRS_MAX 1024 - * - * Example 1): - * --------------------------------------------------------------------- - * Server(A) has 16 cores, user configured it to 4 partitions so each - * partition has 4 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(4) * MDS_NTHRS_FACTOR(8) = 96 - * - * Total number of threads for the service is: - * 96 * partitions(4) = 384 - * - * Example 2): - * --------------------------------------------------------------------- - * Server(B) has 32 cores, user configured it to 4 partitions so each - * partition has 8 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(8) * MDS_NTHRS_FACTOR(8) = 128 - * - * Total number of threads for the service is: - * 128 * partitions(4) = 512 - * - * Example 3): - * --------------------------------------------------------------------- - * Server(B) has 96 cores, user configured it to 8 partitions so each - * partition has 12 cores, then actual number of service threads on each - * partition is: - * MDS_NTHRS_BASE(64) + cores(12) * MDS_NTHRS_FACTOR(8) = 160 - * - * Total number of threads for the service is: - * 160 * partitions(8) = 1280 - * - * However, it's above the soft limit MDS_NTHRS_MAX, so we choose this number - * as upper limit of threads number for each partition: - * MDS_NTHRS_MAX(1024) / partitions(8) = 128 - * - * Example 4): - * --------------------------------------------------------------------- - * Server(C) have a thousand of cores and user configured it to 32 partitions - * MDS_NTHRS_BASE(64) * 32 = 2048 - * - * which is already above soft limit MDS_NTHRS_MAX(1024), but we still need - * to guarantee that each partition has at least MDS_NTHRS_BASE(64) threads - * to keep service healthy, so total number of threads will just be 2048. - * - * NB: we don't suggest to choose server with that many cores because backend - * filesystem itself, buffer cache, or underlying network stack might - * have some SMP scalability issues at that large scale. - * - * If user already has a fat machine with hundreds or thousands of cores, - * there are two choices for configuration: - * a) create CPU table from subset of all CPUs and run Lustre on - * top of this subset - * b) bind service threads on a few partitions, see modparameters of - * MDS and OSS for details -* - * NB: these calculations (and examples below) are simplified to help - * understanding, the real implementation is a little more complex, - * please see ptlrpc_server_nthreads_check() for details. - * - */ - - /* - * LDLM threads constants: - * - * Given 8 as factor and 24 as base threads number - * - * example 1) - * On 4-core machine we will have 24 + 8 * 4 = 56 threads. - * - * example 2) - * On 8-core machine with 2 partitions we will have 24 + 4 * 8 = 56 - * threads for each partition and total threads number will be 112. - * - * example 3) - * On 64-core machine with 8 partitions we will need LDLM_NTHRS_BASE(24) - * threads for each partition to keep service healthy, so total threads - * number should be 24 * 8 = 192. - * - * So with these constants, threads number will be at the similar level - * of old versions, unless target machine has over a hundred cores - */ -#define LDLM_THR_FACTOR 8 -#define LDLM_NTHRS_INIT PTLRPC_NTHRS_INIT -#define LDLM_NTHRS_BASE 24 -#define LDLM_NTHRS_MAX (num_online_cpus() == 1 ? 64 : 128) - -#define LDLM_BL_THREADS LDLM_NTHRS_AUTO_INIT -#define LDLM_CLIENT_NBUFS 1 -#define LDLM_SERVER_NBUFS 64 -#define LDLM_BUFSIZE (8 * 1024) -#define LDLM_MAXREQSIZE (5 * 1024) -#define LDLM_MAXREPSIZE (1024) - -#define MDS_MAXREQSIZE (5 * 1024) /* >= 4736 */ - -/** - * FIEMAP request can be 4K+ for now - */ -#define OST_MAXREQSIZE (16 * 1024) - -/* Macro to hide a typecast. */ -#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) - -struct ptlrpc_replay_async_args { - int praa_old_state; - int praa_old_status; -}; - -/** - * Structure to single define portal connection. - */ -struct ptlrpc_connection { - /** linkage for connections hash table */ - struct rhash_head c_hash; - /** Our own lnet nid for this connection */ - lnet_nid_t c_self; - /** Remote side nid for this connection */ - struct lnet_process_id c_peer; - /** UUID of the other side */ - struct obd_uuid c_remote_uuid; - /** reference counter for this connection */ - atomic_t c_refcount; -}; - -/** Client definition for PortalRPC */ -struct ptlrpc_client { - /** What lnet portal does this client send messages to by default */ - __u32 cli_request_portal; - /** What portal do we expect replies on */ - __u32 cli_reply_portal; - /** Name of the client */ - char *cli_name; -}; - -/** state flags of requests */ -/* XXX only ones left are those used by the bulk descs as well! */ -#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ -#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ - -#define REQ_MAX_ACK_LOCKS 8 - -union ptlrpc_async_args { - /** - * Scratchpad for passing args to completion interpreter. Users - * cast to the struct of their choosing, and BUILD_BUG_ON oversized - * arguments. For _tons_ of context, kmalloc a struct and store - * a pointer to it here. The pointer_arg ensures this struct is at - * least big enough for that. - */ - void *pointer_arg[11]; - __u64 space[7]; -}; - -struct ptlrpc_request_set; -typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); -typedef int (*set_producer_func)(struct ptlrpc_request_set *, void *); - -/** - * Definition of request set structure. - * Request set is a list of requests (not necessary to the same target) that - * once populated with RPCs could be sent in parallel. - * There are two kinds of request sets. General purpose and with dedicated - * serving thread. Example of the latter is ptlrpcd set. - * For general purpose sets once request set started sending it is impossible - * to add new requests to such set. - * Provides a way to call "completion callbacks" when all requests in the set - * returned. - */ -struct ptlrpc_request_set { - atomic_t set_refcount; - /** number of in queue requests */ - atomic_t set_new_count; - /** number of uncompleted requests */ - atomic_t set_remaining; - /** wait queue to wait on for request events */ - wait_queue_head_t set_waitq; - wait_queue_head_t *set_wakeup_ptr; - /** List of requests in the set */ - struct list_head set_requests; - /** - * List of completion callbacks to be called when the set is completed - * This is only used if \a set_interpret is NULL. - * Links struct ptlrpc_set_cbdata. - */ - struct list_head set_cblist; - /** Completion callback, if only one. */ - set_interpreter_func set_interpret; - /** opaq argument passed to completion \a set_interpret callback. */ - void *set_arg; - /** - * Lock for \a set_new_requests manipulations - * locked so that any old caller can communicate requests to - * the set holder who can then fold them into the lock-free set - */ - spinlock_t set_new_req_lock; - /** List of new yet unsent requests. Only used with ptlrpcd now. */ - struct list_head set_new_requests; - - /** rq_status of requests that have been freed already */ - int set_rc; - /** Additional fields used by the flow control extension */ - /** Maximum number of RPCs in flight */ - int set_max_inflight; - /** Callback function used to generate RPCs */ - set_producer_func set_producer; - /** opaq argument passed to the producer callback */ - void *set_producer_arg; -}; - -/** - * Description of a single ptrlrpc_set callback - */ -struct ptlrpc_set_cbdata { - /** List linkage item */ - struct list_head psc_item; - /** Pointer to interpreting function */ - set_interpreter_func psc_interpret; - /** Opaq argument to pass to the callback */ - void *psc_data; -}; - -struct ptlrpc_bulk_desc; -struct ptlrpc_service_part; -struct ptlrpc_service; - -/** - * ptlrpc callback & work item stuff - */ -struct ptlrpc_cb_id { - void (*cbid_fn)(struct lnet_event *ev); /* specific callback fn */ - void *cbid_arg; /* additional arg */ -}; - -/** Maximum number of locks to fit into reply state */ -#define RS_MAX_LOCKS 8 -#define RS_DEBUG 0 - -/** - * Structure to define reply state on the server - * Reply state holds various reply message information. Also for "difficult" - * replies (rep-ack case) we store the state after sending reply and wait - * for the client to acknowledge the reception. In these cases locks could be - * added to the state for replay/failover consistency guarantees. - */ -struct ptlrpc_reply_state { - /** Callback description */ - struct ptlrpc_cb_id rs_cb_id; - /** Linkage for list of all reply states in a system */ - struct list_head rs_list; - /** Linkage for list of all reply states on same export */ - struct list_head rs_exp_list; - /** Linkage for list of all reply states for same obd */ - struct list_head rs_obd_list; -#if RS_DEBUG - struct list_head rs_debug_list; -#endif - /** A spinlock to protect the reply state flags */ - spinlock_t rs_lock; - /** Reply state flags */ - unsigned long rs_difficult:1; /* ACK/commit stuff */ - unsigned long rs_no_ack:1; /* no ACK, even for - * difficult requests - */ - unsigned long rs_scheduled:1; /* being handled? */ - unsigned long rs_scheduled_ever:1;/* any schedule attempts? */ - unsigned long rs_handled:1; /* been handled yet? */ - unsigned long rs_on_net:1; /* reply_out_callback pending? */ - unsigned long rs_prealloc:1; /* rs from prealloc list */ - unsigned long rs_committed:1;/* the transaction was committed - * and the rs was dispatched - */ - atomic_t rs_refcount; /* number of users */ - /** Number of locks awaiting client ACK */ - int rs_nlocks; - - /** Size of the state */ - int rs_size; - /** opcode */ - __u32 rs_opc; - /** Transaction number */ - __u64 rs_transno; - /** xid */ - __u64 rs_xid; - struct obd_export *rs_export; - struct ptlrpc_service_part *rs_svcpt; - /** Lnet metadata handle for the reply */ - struct lnet_handle_md rs_md_h; - - /** Context for the service thread */ - struct ptlrpc_svc_ctx *rs_svc_ctx; - /** Reply buffer (actually sent to the client), encoded if needed */ - struct lustre_msg *rs_repbuf; /* wrapper */ - /** Size of the reply buffer */ - int rs_repbuf_len; /* wrapper buf length */ - /** Size of the reply message */ - int rs_repdata_len; /* wrapper msg length */ - /** - * Actual reply message. Its content is encrypted (if needed) to - * produce reply buffer for actual sending. In simple case - * of no network encryption we just set \a rs_repbuf to \a rs_msg - */ - struct lustre_msg *rs_msg; /* reply message */ - - /** Handles of locks awaiting client reply ACK */ - struct lustre_handle rs_locks[RS_MAX_LOCKS]; - /** Lock modes of locks in \a rs_locks */ - enum ldlm_mode rs_modes[RS_MAX_LOCKS]; -}; - -struct ptlrpc_thread; - -/** RPC stages */ -enum rq_phase { - RQ_PHASE_NEW = 0xebc0de00, - RQ_PHASE_RPC = 0xebc0de01, - RQ_PHASE_BULK = 0xebc0de02, - RQ_PHASE_INTERPRET = 0xebc0de03, - RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREG_RPC = 0xebc0de05, - RQ_PHASE_UNREG_BULK = 0xebc0de06, - RQ_PHASE_UNDEFINED = 0xebc0de07 -}; - -/** Type of request interpreter call-back */ -typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env, - struct ptlrpc_request *req, - void *arg, int rc); - -/** - * Definition of request pool structure. - * The pool is used to store empty preallocated requests for the case - * when we would actually need to send something without performing - * any allocations (to avoid e.g. OOM). - */ -struct ptlrpc_request_pool { - /** Locks the list */ - spinlock_t prp_lock; - /** list of ptlrpc_request structs */ - struct list_head prp_req_list; - /** Maximum message size that would fit into a request from this pool */ - int prp_rq_size; - /** Function to allocate more requests for this pool */ - int (*prp_populate)(struct ptlrpc_request_pool *, int); -}; - -struct lu_context; -struct lu_env; - -struct ldlm_lock; - -#include - -/** - * Basic request prioritization operations structure. - * The whole idea is centered around locks and RPCs that might affect locks. - * When a lock is contended we try to give priority to RPCs that might lead - * to fastest release of that lock. - * Currently only implemented for OSTs only in a way that makes all - * IO and truncate RPCs that are coming from a locked region where a lock is - * contended a priority over other requests. - */ -struct ptlrpc_hpreq_ops { - /** - * Check if the lock handle of the given lock is the same as - * taken from the request. - */ - int (*hpreq_lock_match)(struct ptlrpc_request *, struct ldlm_lock *); - /** - * Check if the request is a high priority one. - */ - int (*hpreq_check)(struct ptlrpc_request *); - /** - * Called after the request has been handled. - */ - void (*hpreq_fini)(struct ptlrpc_request *); -}; - -struct ptlrpc_cli_req { - /** For bulk requests on client only: bulk descriptor */ - struct ptlrpc_bulk_desc *cr_bulk; - /** optional time limit for send attempts */ - long cr_delay_limit; - /** time request was first queued */ - unsigned long cr_queued_time; - /** request sent timeval */ - struct timespec64 cr_sent_tv; - /** time for request really sent out */ - time64_t cr_sent_out; - /** when req reply unlink must finish. */ - time64_t cr_reply_deadline; - /** when req bulk unlink must finish. */ - time64_t cr_bulk_deadline; - /** when req unlink must finish. */ - time64_t cr_req_deadline; - /** Portal to which this request would be sent */ - short cr_req_ptl; - /** Portal where to wait for reply and where reply would be sent */ - short cr_rep_ptl; - /** request resending number */ - unsigned int cr_resend_nr; - /** What was import generation when this request was sent */ - int cr_imp_gen; - enum lustre_imp_state cr_send_state; - /** Per-request waitq introduced by bug 21938 for recovery waiting */ - wait_queue_head_t cr_set_waitq; - /** Link item for request set lists */ - struct list_head cr_set_chain; - /** link to waited ctx */ - struct list_head cr_ctx_chain; - - /** client's half ctx */ - struct ptlrpc_cli_ctx *cr_cli_ctx; - /** Link back to the request set */ - struct ptlrpc_request_set *cr_set; - /** outgoing request MD handle */ - struct lnet_handle_md cr_req_md_h; - /** request-out callback parameter */ - struct ptlrpc_cb_id cr_req_cbid; - /** incoming reply MD handle */ - struct lnet_handle_md cr_reply_md_h; - wait_queue_head_t cr_reply_waitq; - /** reply callback parameter */ - struct ptlrpc_cb_id cr_reply_cbid; - /** Async completion handler, called when reply is received */ - ptlrpc_interpterer_t cr_reply_interp; - /** Async completion context */ - union ptlrpc_async_args cr_async_args; - /** Opaq data for replay and commit callbacks. */ - void *cr_cb_data; - /** Link to the imp->imp_unreplied_list */ - struct list_head cr_unreplied_list; - /** - * Commit callback, called when request is committed and about to be - * freed. - */ - void (*cr_commit_cb)(struct ptlrpc_request *); - /** Replay callback, called after request is replayed at recovery */ - void (*cr_replay_cb)(struct ptlrpc_request *); -}; - -/** client request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_bulk rq_cli.cr_bulk -#define rq_delay_limit rq_cli.cr_delay_limit -#define rq_queued_time rq_cli.cr_queued_time -#define rq_sent_tv rq_cli.cr_sent_tv -#define rq_real_sent rq_cli.cr_sent_out -#define rq_reply_deadline rq_cli.cr_reply_deadline -#define rq_bulk_deadline rq_cli.cr_bulk_deadline -#define rq_req_deadline rq_cli.cr_req_deadline -#define rq_nr_resend rq_cli.cr_resend_nr -#define rq_request_portal rq_cli.cr_req_ptl -#define rq_reply_portal rq_cli.cr_rep_ptl -#define rq_import_generation rq_cli.cr_imp_gen -#define rq_send_state rq_cli.cr_send_state -#define rq_set_chain rq_cli.cr_set_chain -#define rq_ctx_chain rq_cli.cr_ctx_chain -#define rq_set rq_cli.cr_set -#define rq_set_waitq rq_cli.cr_set_waitq -#define rq_cli_ctx rq_cli.cr_cli_ctx -#define rq_req_md_h rq_cli.cr_req_md_h -#define rq_req_cbid rq_cli.cr_req_cbid -#define rq_reply_md_h rq_cli.cr_reply_md_h -#define rq_reply_waitq rq_cli.cr_reply_waitq -#define rq_reply_cbid rq_cli.cr_reply_cbid -#define rq_interpret_reply rq_cli.cr_reply_interp -#define rq_async_args rq_cli.cr_async_args -#define rq_cb_data rq_cli.cr_cb_data -#define rq_unreplied_list rq_cli.cr_unreplied_list -#define rq_commit_cb rq_cli.cr_commit_cb -#define rq_replay_cb rq_cli.cr_replay_cb - -struct ptlrpc_srv_req { - /** initial thread servicing this request */ - struct ptlrpc_thread *sr_svc_thread; - /** - * Server side list of incoming unserved requests sorted by arrival - * time. Traversed from time to time to notice about to expire - * requests and sent back "early replies" to clients to let them - * know server is alive and well, just very busy to service their - * requests in time - */ - struct list_head sr_timed_list; - /** server-side per-export list */ - struct list_head sr_exp_list; - /** server-side history, used for debuging purposes. */ - struct list_head sr_hist_list; - /** history sequence # */ - __u64 sr_hist_seq; - /** the index of service's srv_at_array into which request is linked */ - time64_t sr_at_index; - /** authed uid */ - uid_t sr_auth_uid; - /** authed uid mapped to */ - uid_t sr_auth_mapped_uid; - /** RPC is generated from what part of Lustre */ - enum lustre_sec_part sr_sp_from; - /** request session context */ - struct lu_context sr_ses; - /** \addtogroup nrs - * @{ - */ - /** stub for NRS request */ - struct ptlrpc_nrs_request sr_nrq; - /** @} nrs */ - /** request arrival time */ - struct timespec64 sr_arrival_time; - /** server's half ctx */ - struct ptlrpc_svc_ctx *sr_svc_ctx; - /** (server side), pointed directly into req buffer */ - struct ptlrpc_user_desc *sr_user_desc; - /** separated reply state */ - struct ptlrpc_reply_state *sr_reply_state; - /** server-side hp handlers */ - struct ptlrpc_hpreq_ops *sr_ops; - /** incoming request buffer */ - struct ptlrpc_request_buffer_desc *sr_rqbd; -}; - -/** server request member alias */ -/* NB: these alias should NOT be used by any new code, instead they should - * be removed step by step to avoid potential abuse - */ -#define rq_svc_thread rq_srv.sr_svc_thread -#define rq_timed_list rq_srv.sr_timed_list -#define rq_exp_list rq_srv.sr_exp_list -#define rq_history_list rq_srv.sr_hist_list -#define rq_history_seq rq_srv.sr_hist_seq -#define rq_at_index rq_srv.sr_at_index -#define rq_auth_uid rq_srv.sr_auth_uid -#define rq_auth_mapped_uid rq_srv.sr_auth_mapped_uid -#define rq_sp_from rq_srv.sr_sp_from -#define rq_session rq_srv.sr_ses -#define rq_nrq rq_srv.sr_nrq -#define rq_arrival_time rq_srv.sr_arrival_time -#define rq_reply_state rq_srv.sr_reply_state -#define rq_svc_ctx rq_srv.sr_svc_ctx -#define rq_user_desc rq_srv.sr_user_desc -#define rq_ops rq_srv.sr_ops -#define rq_rqbd rq_srv.sr_rqbd - -/** - * Represents remote procedure call. - * - * This is a staple structure used by everybody wanting to send a request - * in Lustre. - */ -struct ptlrpc_request { - /* Request type: one of PTL_RPC_MSG_* */ - int rq_type; - /** Result of request processing */ - int rq_status; - /** - * Linkage item through which this request is included into - * sending/delayed lists on client and into rqbd list on server - */ - struct list_head rq_list; - /** Lock to protect request flags and some other important bits, like - * rq_list - */ - spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock @{ */ - unsigned int rq_intr:1, rq_replied:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, - /** - * when ->rq_replay is set, request is kept by the client even - * after server commits corresponding transaction. This is - * used for operations that require sequence of multiple - * requests to be replayed. The only example currently is file - * open/close. When last request in such a sequence is - * committed, ->rq_replay is cleared on all requests in the - * sequence. - */ - rq_replay:1, - rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, - rq_early:1, - rq_req_unlinked:1, /* unlinked request buffer from lnet */ - rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ - rq_memalloc:1, /* req originated from "kswapd" */ - rq_committed:1, - rq_reply_truncated:1, - /** whether the "rq_set" is a valid one */ - rq_invalid_rqset:1, - rq_generation_set:1, - /** do not resend request on -EINPROGRESS */ - rq_no_retry_einprogress:1, - /* allow the req to be sent if the import is in recovery - * status - */ - rq_allow_replay:1, - /* bulk request, sent to server, but uncommitted */ - rq_unstable:1; - /** @} */ - - /** server-side flags @{ */ - unsigned int - rq_hp:1, /**< high priority RPC */ - rq_at_linked:1, /**< link into service's srv_at_array */ - rq_packed_final:1; /**< packed final reply */ - /** @} */ - - /** one of RQ_PHASE_* */ - enum rq_phase rq_phase; - /** one of RQ_PHASE_* to be used next */ - enum rq_phase rq_next_phase; - /** - * client-side refcount for SENT race, server-side refcount - * for multiple replies - */ - atomic_t rq_refcount; - /** - * client-side: - * !rq_truncate : # reply bytes actually received, - * rq_truncate : required repbuf_len for resend - */ - int rq_nob_received; - /** Request length */ - int rq_reqlen; - /** Reply length */ - int rq_replen; - /** Pool if request is from preallocated list */ - struct ptlrpc_request_pool *rq_pool; - /** Request message - what client sent */ - struct lustre_msg *rq_reqmsg; - /** Reply message - server response */ - struct lustre_msg *rq_repmsg; - /** Transaction number */ - __u64 rq_transno; - /** xid */ - __u64 rq_xid; - /** bulk match bits */ - u64 rq_mbits; - /** - * List item to for replay list. Not yet committed requests get linked - * there. - * Also see \a rq_replay comment above. - * It's also link chain on obd_export::exp_req_replay_queue - */ - struct list_head rq_replay_list; - /** non-shared members for client & server request*/ - union { - struct ptlrpc_cli_req rq_cli; - struct ptlrpc_srv_req rq_srv; - }; - /** - * security and encryption data - * @{ - */ - /** description of flavors for client & server */ - struct sptlrpc_flavor rq_flvr; - - /* client/server security flags */ - unsigned int - rq_ctx_init:1, /* context initiation */ - rq_ctx_fini:1, /* context destroy */ - rq_bulk_read:1, /* request bulk read */ - rq_bulk_write:1, /* request bulk write */ - /* server authentication flags */ - rq_auth_gss:1, /* authenticated by gss */ - rq_auth_usr_root:1, /* authed as root */ - rq_auth_usr_mdt:1, /* authed as mdt */ - rq_auth_usr_ost:1, /* authed as ost */ - /* security tfm flags */ - rq_pack_udesc:1, - rq_pack_bulk:1, - /* doesn't expect reply FIXME */ - rq_no_reply:1, - rq_pill_init:1, /* pill initialized */ - rq_srv_req:1; /* server request */ - - /** various buffer pointers */ - struct lustre_msg *rq_reqbuf; /**< req wrapper */ - char *rq_repbuf; /**< rep buffer */ - struct lustre_msg *rq_repdata; /**< rep wrapper msg */ - /** only in priv mode */ - struct lustre_msg *rq_clrbuf; - int rq_reqbuf_len; /* req wrapper buf len */ - int rq_reqdata_len; /* req wrapper msg len */ - int rq_repbuf_len; /* rep buffer len */ - int rq_repdata_len; /* rep wrapper msg len */ - int rq_clrbuf_len; /* only in priv mode */ - int rq_clrdata_len; /* only in priv mode */ - - /** early replies go to offset 0, regular replies go after that */ - unsigned int rq_reply_off; - - /** @} */ - - /** Fields that help to see if request and reply were swabbed or not */ - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; - - /** how many early replies (for stats) */ - int rq_early_count; - - /** Server-side, export on which request was received */ - struct obd_export *rq_export; - /** import where request is being sent */ - struct obd_import *rq_import; - /** our LNet NID */ - lnet_nid_t rq_self; - /** Peer description (the other side) */ - struct lnet_process_id rq_peer; - /** - * service time estimate (secs) - * If the request is not served by this time, it is marked as timed out. - */ - int rq_timeout; - /** - * when request/reply sent (secs), or time when request should be sent - */ - time64_t rq_sent; - /** when request must finish. */ - time64_t rq_deadline; - /** request format description */ - struct req_capsule rq_pill; -}; - -/** - * Call completion handler for rpc if any, return it's status or original - * rc if there was no handler defined for this request. - */ -static inline int ptlrpc_req_interpret(const struct lu_env *env, - struct ptlrpc_request *req, int rc) -{ - if (req->rq_interpret_reply) { - req->rq_status = req->rq_interpret_reply(env, req, - &req->rq_async_args, - rc); - return req->rq_status; - } - return rc; -} - -/* - * Can the request be moved from the regular NRS head to the high-priority NRS - * head (of the same PTLRPC service partition), if any? - * - * For a reliable result, this should be checked under svcpt->scp_req lock. - */ -static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req) -{ - struct ptlrpc_nrs_request *nrq = &req->rq_nrq; - - /** - * LU-898: Check ptlrpc_nrs_request::nr_enqueued to make sure the - * request has been enqueued first, and ptlrpc_nrs_request::nr_started - * to make sure it has not been scheduled yet (analogous to previous - * (non-NRS) checking of !list_empty(&ptlrpc_request::rq_list). - */ - return nrq->nr_enqueued && !nrq->nr_started && !req->rq_hp; -} - -/** @} nrs */ - -/** - * Returns 1 if request buffer at offset \a index was already swabbed - */ -static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - return req->rq_req_swab_mask & (1 << index); -} - -/** - * Returns 1 if request reply buffer at offset \a index was already swabbed - */ -static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - return req->rq_rep_swab_mask & (1 << index); -} - -/** - * Returns 1 if request needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req) -{ - return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Returns 1 if request reply needs to be swabbed into local cpu byteorder - */ -static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req) -{ - return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); -} - -/** - * Mark request buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); - LASSERT((req->rq_req_swab_mask & (1 << index)) == 0); - req->rq_req_swab_mask |= 1 << index; -} - -/** - * Mark request reply buffer at offset \a index that it was already swabbed - */ -static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, - size_t index) -{ - LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8); - LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0); - req->rq_rep_swab_mask |= 1 << index; -} - -/** - * Convert numerical request phase value \a phase into text string description - */ -static inline const char * -ptlrpc_phase2str(enum rq_phase phase) -{ - switch (phase) { - case RQ_PHASE_NEW: - return "New"; - case RQ_PHASE_RPC: - return "Rpc"; - case RQ_PHASE_BULK: - return "Bulk"; - case RQ_PHASE_INTERPRET: - return "Interpret"; - case RQ_PHASE_COMPLETE: - return "Complete"; - case RQ_PHASE_UNREG_RPC: - return "UnregRPC"; - case RQ_PHASE_UNREG_BULK: - return "UnregBULK"; - default: - return "?Phase?"; - } -} - -/** - * Convert numerical request phase of the request \a req into text stringi - * description - */ -static inline const char * -ptlrpc_rqphase2str(struct ptlrpc_request *req) -{ - return ptlrpc_phase2str(req->rq_phase); -} - -/** - * Debugging functions and helpers to print request structure into debug log - * @{ - */ -/* Spare the preprocessor, spoil the bugs. */ -#define FLAG(field, str) (field ? str : "") - -/** Convert bit flags into a string */ -#define DEBUG_REQ_FLAGS(req) \ - ptlrpc_rqphase2str(req), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W"), \ - FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ - FLAG(req->rq_committed, "M") - -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" - -void _debug_req(struct ptlrpc_request *req, - struct libcfs_debug_msg_data *data, const char *fmt, ...) - __printf(3, 4); - -/** - * Helper that decides if we need to print request according to current debug - * level settings - */ -#define debug_req(msgdata, mask, cdls, req, fmt, a...) \ -do { \ - CFS_CHECK_STACK(msgdata, mask, cdls); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - _debug_req((req), msgdata, fmt, ##a); \ -} while (0) - -/** - * This is the debug print function you need to use to print request structure - * content into lustre debug log. - * for most callers (level is a constant) this is resolved at compile time - */ -#define DEBUG_REQ(level, req, fmt, args...) \ -do { \ - if ((level) & (D_ERROR | D_WARNING)) { \ - static struct cfs_debug_limit_state cdls; \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \ - debug_req(&msgdata, level, &cdls, req, "@@@ "fmt" ", ## args);\ - } else { \ - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, NULL); \ - debug_req(&msgdata, level, NULL, req, "@@@ "fmt" ", ## args); \ - } \ -} while (0) -/** @} */ - -/** - * Structure that defines a single page of a bulk transfer - */ -struct ptlrpc_bulk_page { - /** Linkage to list of pages in a bulk */ - struct list_head bp_link; - /** - * Number of bytes in a page to transfer starting from \a bp_pageoffset - */ - int bp_buflen; - /** offset within a page */ - int bp_pageoffset; - /** The page itself */ - struct page *bp_page; -}; - -enum ptlrpc_bulk_op_type { - PTLRPC_BULK_OP_ACTIVE = 0x00000001, - PTLRPC_BULK_OP_PASSIVE = 0x00000002, - PTLRPC_BULK_OP_PUT = 0x00000004, - PTLRPC_BULK_OP_GET = 0x00000008, - PTLRPC_BULK_BUF_KVEC = 0x00000010, - PTLRPC_BULK_BUF_KIOV = 0x00000020, - PTLRPC_BULK_GET_SOURCE = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SINK = PTLRPC_BULK_OP_PASSIVE | PTLRPC_BULK_OP_PUT, - PTLRPC_BULK_GET_SINK = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_GET, - PTLRPC_BULK_PUT_SOURCE = PTLRPC_BULK_OP_ACTIVE | PTLRPC_BULK_OP_PUT, -}; - -static inline bool ptlrpc_is_bulk_op_get(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_OP_GET) == PTLRPC_BULK_OP_GET; -} - -static inline bool ptlrpc_is_bulk_get_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SOURCE) == PTLRPC_BULK_GET_SOURCE; -} - -static inline bool ptlrpc_is_bulk_put_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SINK) == PTLRPC_BULK_PUT_SINK; -} - -static inline bool ptlrpc_is_bulk_get_sink(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_GET_SINK) == PTLRPC_BULK_GET_SINK; -} - -static inline bool ptlrpc_is_bulk_put_source(enum ptlrpc_bulk_op_type type) -{ - return (type & PTLRPC_BULK_PUT_SOURCE) == PTLRPC_BULK_PUT_SOURCE; -} - -static inline bool ptlrpc_is_bulk_desc_kvec(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KVEC; -} - -static inline bool ptlrpc_is_bulk_desc_kiov(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_BUF_KVEC) | (type & PTLRPC_BULK_BUF_KIOV)) - == PTLRPC_BULK_BUF_KIOV; -} - -static inline bool ptlrpc_is_bulk_op_active(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_ACTIVE; -} - -static inline bool ptlrpc_is_bulk_op_passive(enum ptlrpc_bulk_op_type type) -{ - return ((type & PTLRPC_BULK_OP_ACTIVE) | - (type & PTLRPC_BULK_OP_PASSIVE)) == PTLRPC_BULK_OP_PASSIVE; -} - -struct ptlrpc_bulk_frag_ops { - /** - * Add a page \a page to the bulk descriptor \a desc - * Data to transfer in the page starts at offset \a pageoffset and - * amount of data to transfer from the page is \a len - */ - void (*add_kiov_frag)(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len); - - /* - * Add a \a fragment to the bulk descriptor \a desc. - * Data to transfer in the fragment is pointed to by \a frag - * The size of the fragment is \a len - */ - int (*add_iov_frag)(struct ptlrpc_bulk_desc *desc, void *frag, int len); - - /** - * Uninitialize and free bulk descriptor \a desc. - * Works on bulk descriptors both from server and client side. - */ - void (*release_frags)(struct ptlrpc_bulk_desc *desc); -}; - -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops; -extern const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops; - -/** - * Definition of bulk descriptor. - * Bulks are special "Two phase" RPCs where initial request message - * is sent first and it is followed bt a transfer (o receiving) of a large - * amount of data to be settled into pages referenced from the bulk descriptors. - * Bulks transfers (the actual data following the small requests) are done - * on separate LNet portals. - * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs. - * Another user is readpage for MDT. - */ -struct ptlrpc_bulk_desc { - /** completed with failure */ - unsigned long bd_failure:1; - /** client side */ - unsigned long bd_registered:1; - /** For serialization with callback */ - spinlock_t bd_lock; - /** Import generation when request for this bulk was sent */ - int bd_import_generation; - /** {put,get}{source,sink}{kvec,kiov} */ - enum ptlrpc_bulk_op_type bd_type; - /** LNet portal for this bulk */ - __u32 bd_portal; - /** Server side - export this bulk created for */ - struct obd_export *bd_export; - /** Client side - import this bulk was sent on */ - struct obd_import *bd_import; - /** Back pointer to the request */ - struct ptlrpc_request *bd_req; - struct ptlrpc_bulk_frag_ops *bd_frag_ops; - wait_queue_head_t bd_waitq; /* server side only WQ */ - int bd_iov_count; /* # entries in bd_iov */ - int bd_max_iov; /* allocated size of bd_iov */ - int bd_nob; /* # bytes covered */ - int bd_nob_transferred; /* # bytes GOT/PUT */ - - u64 bd_last_mbits; - - struct ptlrpc_cb_id bd_cbid; /* network callback info */ - lnet_nid_t bd_sender; /* stash event::sender */ - int bd_md_count; /* # valid entries in bd_mds */ - int bd_md_max_brw; /* max entries in bd_mds */ - /** array of associated MDs */ - struct lnet_handle_md bd_mds[PTLRPC_BULK_OPS_COUNT]; - - union { - struct { - /* - * encrypt iov, size is either 0 or bd_iov_count. - */ - struct bio_vec *bd_enc_vec; - struct bio_vec *bd_vec; /* Array of bio_vecs */ - } bd_kiov; - - struct { - struct kvec *bd_enc_kvec; - struct kvec *bd_kvec; /* Array of kvecs */ - } bd_kvec; - } bd_u; -}; - -#define GET_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_vec) -#define BD_GET_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_vec[i]) -#define GET_ENC_KIOV(desc) ((desc)->bd_u.bd_kiov.bd_enc_vec) -#define BD_GET_ENC_KIOV(desc, i) ((desc)->bd_u.bd_kiov.bd_enc_vec[i]) -#define GET_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_kvec) -#define BD_GET_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_kvec[i]) -#define GET_ENC_KVEC(desc) ((desc)->bd_u.bd_kvec.bd_enc_kvec) -#define BD_GET_ENC_KVEC(desc, i) ((desc)->bd_u.bd_kvec.bd_enc_kvec[i]) - -enum { - SVC_STOPPED = 1 << 0, - SVC_STOPPING = 1 << 1, - SVC_STARTING = 1 << 2, - SVC_RUNNING = 1 << 3, -}; - -#define PTLRPC_THR_NAME_LEN 32 -/** - * Definition of server service thread structure - */ -struct ptlrpc_thread { - /** - * List of active threads in svc->srv_threads - */ - struct list_head t_link; - /** - * thread-private data (preallocated memory) - */ - void *t_data; - __u32 t_flags; - /** - * service thread index, from ptlrpc_start_threads - */ - unsigned int t_id; - /** - * service thread pid - */ - pid_t t_pid; - /** - * put watchdog in the structure per thread b=14840 - * - * Lustre watchdog is removed for client in the hope - * of a generic watchdog can be merged in kernel. - * When that happens, we should add below back. - * - * struct lc_watchdog *t_watchdog; - */ - /** - * the svc this thread belonged to b=18582 - */ - struct ptlrpc_service_part *t_svcpt; - wait_queue_head_t t_ctl_waitq; - struct lu_env *t_env; - char t_name[PTLRPC_THR_NAME_LEN]; -}; - -static inline int thread_is_stopped(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPED); -} - -static inline int thread_is_stopping(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STOPPING); -} - -static inline int thread_is_starting(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_STARTING); -} - -static inline int thread_is_running(struct ptlrpc_thread *thread) -{ - return !!(thread->t_flags & SVC_RUNNING); -} - -static inline void thread_clear_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags &= ~flags; -} - -static inline void thread_set_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags = flags; -} - -static inline void thread_add_flags(struct ptlrpc_thread *thread, __u32 flags) -{ - thread->t_flags |= flags; -} - -static inline int thread_test_and_clear_flags(struct ptlrpc_thread *thread, - __u32 flags) -{ - if (thread->t_flags & flags) { - thread->t_flags &= ~flags; - return 1; - } - return 0; -} - -/** - * Request buffer descriptor structure. - * This is a structure that contains one posted request buffer for service. - * Once data land into a buffer, event callback creates actual request and - * notifies wakes one of the service threads to process new incoming request. - * More than one request can fit into the buffer. - */ -struct ptlrpc_request_buffer_desc { - /** Link item for rqbds on a service */ - struct list_head rqbd_list; - /** History of requests for this buffer */ - struct list_head rqbd_reqs; - /** Back pointer to service for which this buffer is registered */ - struct ptlrpc_service_part *rqbd_svcpt; - /** LNet descriptor */ - struct lnet_handle_md rqbd_md_h; - int rqbd_refcount; - /** The buffer itself */ - char *rqbd_buffer; - struct ptlrpc_cb_id rqbd_cbid; - /** - * This "embedded" request structure is only used for the - * last request to fit into the buffer - */ - struct ptlrpc_request rqbd_req; -}; - -typedef int (*svc_handler_t)(struct ptlrpc_request *req); - -struct ptlrpc_service_ops { - /** - * if non-NULL called during thread creation (ptlrpc_start_thread()) - * to initialize service specific per-thread state. - */ - int (*so_thr_init)(struct ptlrpc_thread *thr); - /** - * if non-NULL called during thread shutdown (ptlrpc_main()) to - * destruct state created by ->srv_init(). - */ - void (*so_thr_done)(struct ptlrpc_thread *thr); - /** - * Handler function for incoming requests for this service - */ - int (*so_req_handler)(struct ptlrpc_request *req); - /** - * function to determine priority of the request, it's called - * on every new request - */ - int (*so_hpreq_handler)(struct ptlrpc_request *); - /** - * service-specific print fn - */ - void (*so_req_printer)(void *, struct ptlrpc_request *); -}; - -#ifndef __cfs_cacheline_aligned -/* NB: put it here for reducing patche dependence */ -# define __cfs_cacheline_aligned -#endif - -/** - * How many high priority requests to serve before serving one normal - * priority request - */ -#define PTLRPC_SVC_HP_RATIO 10 - -/** - * Definition of PortalRPC service. - * The service is listening on a particular portal (like tcp port) - * and perform actions for a specific server like IO service for OST - * or general metadata service for MDS. - */ -struct ptlrpc_service { - /** serialize sysfs operations */ - spinlock_t srv_lock; - /** most often accessed fields */ - /** chain thru all services */ - struct list_head srv_list; - /** service operations table */ - struct ptlrpc_service_ops srv_ops; - /** only statically allocated strings here; we don't clean them */ - char *srv_name; - /** only statically allocated strings here; we don't clean them */ - char *srv_thread_name; - /** service thread list */ - struct list_head srv_threads; - /** threads # should be created for each partition on initializing */ - int srv_nthrs_cpt_init; - /** limit of threads number for each partition */ - int srv_nthrs_cpt_limit; - /** Root of debugfs dir tree for this service */ - struct dentry *srv_debugfs_entry; - /** Pointer to statistic data for this service */ - struct lprocfs_stats *srv_stats; - /** # hp per lp reqs to handle */ - int srv_hpreq_ratio; - /** biggest request to receive */ - int srv_max_req_size; - /** biggest reply to send */ - int srv_max_reply_size; - /** size of individual buffers */ - int srv_buf_size; - /** # buffers to allocate in 1 group */ - int srv_nbuf_per_group; - /** Local portal on which to receive requests */ - __u32 srv_req_portal; - /** Portal on the client to send replies to */ - __u32 srv_rep_portal; - /** - * Tags for lu_context associated with this thread, see struct - * lu_context. - */ - __u32 srv_ctx_tags; - /** soft watchdog timeout multiplier */ - int srv_watchdog_factor; - /** under unregister_service */ - unsigned srv_is_stopping:1; - - /** max # request buffers in history per partition */ - int srv_hist_nrqbds_cpt_max; - /** number of CPTs this service bound on */ - int srv_ncpts; - /** CPTs array this service bound on */ - __u32 *srv_cpts; - /** 2^srv_cptab_bits >= cfs_cpt_numbert(srv_cptable) */ - int srv_cpt_bits; - /** CPT table this service is running over */ - struct cfs_cpt_table *srv_cptable; - - /* sysfs object */ - struct kobject srv_kobj; - struct completion srv_kobj_unregister; - /** - * partition data for ptlrpc service - */ - struct ptlrpc_service_part *srv_parts[0]; -}; - -/** - * Definition of PortalRPC service partition data. - * Although a service only has one instance of it right now, but we - * will have multiple instances very soon (instance per CPT). - * - * it has four locks: - * \a scp_lock - * serialize operations on rqbd and requests waiting for preprocess - * \a scp_req_lock - * serialize operations active requests sent to this portal - * \a scp_at_lock - * serialize adaptive timeout stuff - * \a scp_rep_lock - * serialize operations on RS list (reply states) - * - * We don't have any use-case to take two or more locks at the same time - * for now, so there is no lock order issue. - */ -struct ptlrpc_service_part { - /** back reference to owner */ - struct ptlrpc_service *scp_service __cfs_cacheline_aligned; - /* CPT id, reserved */ - int scp_cpt; - /** always increasing number */ - int scp_thr_nextid; - /** # of starting threads */ - int scp_nthrs_starting; - /** # of stopping threads, reserved for shrinking threads */ - int scp_nthrs_stopping; - /** # running threads */ - int scp_nthrs_running; - /** service threads list */ - struct list_head scp_threads; - - /** - * serialize the following fields, used for protecting - * rqbd list and incoming requests waiting for preprocess, - * threads starting & stopping are also protected by this lock. - */ - spinlock_t scp_lock __cfs_cacheline_aligned; - /** total # req buffer descs allocated */ - int scp_nrqbds_total; - /** # posted request buffers for receiving */ - int scp_nrqbds_posted; - /** in progress of allocating rqbd */ - int scp_rqbd_allocating; - /** # incoming reqs */ - int scp_nreqs_incoming; - /** request buffers to be reposted */ - struct list_head scp_rqbd_idle; - /** req buffers receiving */ - struct list_head scp_rqbd_posted; - /** incoming reqs */ - struct list_head scp_req_incoming; - /** timeout before re-posting reqs, in tick */ - long scp_rqbd_timeout; - /** - * all threads sleep on this. This wait-queue is signalled when new - * incoming request arrives and when difficult reply has to be handled. - */ - wait_queue_head_t scp_waitq; - - /** request history */ - struct list_head scp_hist_reqs; - /** request buffer history */ - struct list_head scp_hist_rqbds; - /** # request buffers in history */ - int scp_hist_nrqbds; - /** sequence number for request */ - __u64 scp_hist_seq; - /** highest seq culled from history */ - __u64 scp_hist_seq_culled; - - /** - * serialize the following fields, used for processing requests - * sent to this portal - */ - spinlock_t scp_req_lock __cfs_cacheline_aligned; - /** # reqs in either of the NRS heads below */ - /** # reqs being served */ - int scp_nreqs_active; - /** # HPreqs being served */ - int scp_nhreqs_active; - /** # hp requests handled */ - int scp_hreq_count; - - /** NRS head for regular requests */ - struct ptlrpc_nrs scp_nrs_reg; - /** NRS head for HP requests; this is only valid for services that can - * handle HP requests - */ - struct ptlrpc_nrs *scp_nrs_hp; - - /** AT stuff */ - /** @{ */ - /** - * serialize the following fields, used for changes on - * adaptive timeout - */ - spinlock_t scp_at_lock __cfs_cacheline_aligned; - /** estimated rpc service time */ - struct adaptive_timeout scp_at_estimate; - /** reqs waiting for replies */ - struct ptlrpc_at_array scp_at_array; - /** early reply timer */ - struct timer_list scp_at_timer; - /** debug */ - unsigned long scp_at_checktime; - /** check early replies */ - unsigned scp_at_check; - /** @} */ - - /** - * serialize the following fields, used for processing - * replies for this portal - */ - spinlock_t scp_rep_lock __cfs_cacheline_aligned; - /** all the active replies */ - struct list_head scp_rep_active; - /** List of free reply_states */ - struct list_head scp_rep_idle; - /** waitq to run, when adding stuff to srv_free_rs_list */ - wait_queue_head_t scp_rep_waitq; - /** # 'difficult' replies */ - atomic_t scp_nreps_difficult; -}; - -#define ptlrpc_service_for_each_part(part, i, svc) \ - for (i = 0; \ - i < (svc)->srv_ncpts && \ - (svc)->srv_parts && \ - ((part) = (svc)->srv_parts[i]); i++) - -/** - * Declaration of ptlrpcd control structure - */ -struct ptlrpcd_ctl { - /** - * Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE) - */ - unsigned long pc_flags; - /** - * Thread lock protecting structure fields. - */ - spinlock_t pc_lock; - /** - * Start completion. - */ - struct completion pc_starting; - /** - * Stop completion. - */ - struct completion pc_finishing; - /** - * Thread requests set. - */ - struct ptlrpc_request_set *pc_set; - /** - * Thread name used in kthread_run() - */ - char pc_name[16]; - /** - * CPT the thread is bound on. - */ - int pc_cpt; - /** - * Index of ptlrpcd thread in the array. - */ - int pc_index; - /** - * Pointer to the array of partners' ptlrpcd_ctl structure. - */ - struct ptlrpcd_ctl **pc_partners; - /** - * Number of the ptlrpcd's partners. - */ - int pc_npartners; - /** - * Record the partner index to be processed next. - */ - int pc_cursor; - /** - * Error code if the thread failed to fully start. - */ - int pc_error; -}; - -/* Bits for pc_flags */ -enum ptlrpcd_ctl_flags { - /** - * Ptlrpc thread start flag. - */ - LIOD_START = 1 << 0, - /** - * Ptlrpc thread stop flag. - */ - LIOD_STOP = 1 << 1, - /** - * Ptlrpc thread force flag (only stop force so far). - * This will cause aborting any inflight rpcs handled - * by thread if LIOD_STOP is specified. - */ - LIOD_FORCE = 1 << 2, - /** - * This is a recovery ptlrpc thread. - */ - LIOD_RECOVERY = 1 << 3, -}; - -/** - * \addtogroup nrs - * @{ - * - * Service compatibility function; the policy is compatible with all services. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_all(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return true; -} - -/** - * Service compatibility function; the policy is compatible with only a specific - * service which is identified by its human-readable name at - * ptlrpc_service::srv_name. - * - * \param[in] svc The service the policy is attempting to register with. - * \param[in] desc The policy descriptor - * - * \retval false The policy is not compatible with the service - * \retval true The policy is compatible with the service - * - * \see ptlrpc_nrs_pol_desc::pd_compat() - */ -static inline bool nrs_policy_compat_one(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return strcmp(svc->srv_name, desc->pd_compat_svc_name) == 0; -} - -/** @} nrs */ - -/* ptlrpc/events.c */ -extern struct lnet_handle_eq ptlrpc_eq_h; -int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, - struct lnet_process_id *peer, lnet_nid_t *self); -/** - * These callbacks are invoked by LNet when something happened to - * underlying buffer - * @{ - */ -void request_out_callback(struct lnet_event *ev); -void reply_in_callback(struct lnet_event *ev); -void client_bulk_callback(struct lnet_event *ev); -void request_in_callback(struct lnet_event *ev); -void reply_out_callback(struct lnet_event *ev); -/** @} */ - -/* ptlrpc/connection.c */ -struct ptlrpc_connection *ptlrpc_connection_get(struct lnet_process_id peer, - lnet_nid_t self, - struct obd_uuid *uuid); -int ptlrpc_connection_put(struct ptlrpc_connection *c); -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -int ptlrpc_connection_init(void); -void ptlrpc_connection_fini(void); - -/* ptlrpc/niobuf.c */ -/** - * Actual interfacing with LNet to put/get/register/unregister stuff - * @{ - */ - -int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async); - -static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc; - int rc; - - desc = req->rq_bulk; - - if (req->rq_bulk_deadline > ktime_get_real_seconds()) - return 1; - - if (!desc) - return 0; - - spin_lock(&desc->bd_lock); - rc = desc->bd_md_count; - spin_unlock(&desc->bd_lock); - return rc; -} - -#define PTLRPC_REPLY_MAYBE_DIFFICULT 0x01 -#define PTLRPC_REPLY_EARLY 0x02 -int ptlrpc_send_reply(struct ptlrpc_request *req, int flags); -int ptlrpc_reply(struct ptlrpc_request *req); -int ptlrpc_send_error(struct ptlrpc_request *req, int difficult); -int ptlrpc_error(struct ptlrpc_request *req); -int ptlrpc_at_get_net_latency(struct ptlrpc_request *req); -int ptl_send_rpc(struct ptlrpc_request *request, int noreply); -int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd); -/** @} */ - -/* ptlrpc/client.c */ -/** - * Client-side portals API. Everything to send requests, receive replies, - * request queues, request management, etc. - * @{ - */ -void ptlrpc_request_committed(struct ptlrpc_request *req, int force); - -int ptlrpc_inc_ref(void); -void ptlrpc_dec_ref(void); - -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *); -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); - -int ptlrpc_queue_wait(struct ptlrpc_request *req); -int ptlrpc_replay_req(struct ptlrpc_request *req); -void ptlrpc_abort_inflight(struct obd_import *imp); -void ptlrpc_abort_set(struct ptlrpc_request_set *set); - -struct ptlrpc_request_set *ptlrpc_prep_set(void); -struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, - void *arg); -int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set); -int ptlrpc_set_wait(struct ptlrpc_request_set *); -void ptlrpc_mark_interrupted(struct ptlrpc_request *req); -void ptlrpc_set_destroy(struct ptlrpc_request_set *); -void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); - -void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); -int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); - -struct ptlrpc_request_pool * -ptlrpc_init_rq_pool(int, int, - int (*populate_pool)(struct ptlrpc_request_pool *, int)); - -void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req); -struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp, - const struct req_format *format); -struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp, - struct ptlrpc_request_pool *, - const struct req_format *); -void ptlrpc_request_free(struct ptlrpc_request *request); -int ptlrpc_request_pack(struct ptlrpc_request *request, - __u32 version, int opcode); -struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *, - const struct req_format *, - __u32, int); -int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, char **bufs, - struct ptlrpc_cli_ctx *ctx); -void ptlrpc_req_finished(struct ptlrpc_request *request); -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned int nfrags, - unsigned int max_brw, - unsigned int type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops); - -int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, - void *frag, int len); -void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, - int pin); -static inline void ptlrpc_prep_bulk_page_pin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1); -} - -static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, - int len) -{ - __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); -} - -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); - -static inline void ptlrpc_release_bulk_page_pin(struct ptlrpc_bulk_desc *desc) -{ - int i; - - for (i = 0; i < desc->bd_iov_count ; i++) - put_page(BD_GET_KIOV(desc, i).bv_page); -} - -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp); -__u64 ptlrpc_next_xid(void); -__u64 ptlrpc_sample_next_xid(void); -__u64 ptlrpc_req_xid(struct ptlrpc_request *request); - -/* Set of routines to run a function in ptlrpcd context */ -void *ptlrpcd_alloc_work(struct obd_import *imp, - int (*cb)(const struct lu_env *, void *), void *data); -void ptlrpcd_destroy_work(void *handler); -int ptlrpcd_queue_work(void *handler); - -/** @} */ -struct ptlrpc_service_buf_conf { - /* nbufs is buffers # to allocate when growing the pool */ - unsigned int bc_nbufs; - /* buffer size to post */ - unsigned int bc_buf_size; - /* portal to listed for requests on */ - unsigned int bc_req_portal; - /* portal of where to send replies to */ - unsigned int bc_rep_portal; - /* maximum request size to be accepted for this service */ - unsigned int bc_req_max_size; - /* maximum reply size this service can ever send */ - unsigned int bc_rep_max_size; -}; - -struct ptlrpc_service_thr_conf { - /* threadname should be 8 characters or less - 6 will be added on */ - char *tc_thr_name; - /* threads increasing factor for each CPU */ - unsigned int tc_thr_factor; - /* service threads # to start on each partition while initializing */ - unsigned int tc_nthrs_init; - /* - * low water of threads # upper-limit on each partition while running, - * service availability may be impacted if threads number is lower - * than this value. It can be ZERO if the service doesn't require - * CPU affinity or there is only one partition. - */ - unsigned int tc_nthrs_base; - /* "soft" limit for total threads number */ - unsigned int tc_nthrs_max; - /* user specified threads number, it will be validated due to - * other members of this structure. - */ - unsigned int tc_nthrs_user; - /* set NUMA node affinity for service threads */ - unsigned int tc_cpu_affinity; - /* Tags for lu_context associated with service thread */ - __u32 tc_ctx_tags; -}; - -struct ptlrpc_service_cpt_conf { - struct cfs_cpt_table *cc_cptable; - /* string pattern to describe CPTs for a service */ - char *cc_pattern; -}; - -struct ptlrpc_service_conf { - /* service name */ - char *psc_name; - /* soft watchdog timeout multiplifier to print stuck service traces */ - unsigned int psc_watchdog_factor; - /* buffer information */ - struct ptlrpc_service_buf_conf psc_buf; - /* thread information */ - struct ptlrpc_service_thr_conf psc_thr; - /* CPU partition information */ - struct ptlrpc_service_cpt_conf psc_cpt; - /* function table */ - struct ptlrpc_service_ops psc_ops; -}; - -/* ptlrpc/service.c */ -/** - * Server-side services API. Register/unregister service, request state - * management, service thread management - * - * @{ - */ -void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs); -void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs); -struct ptlrpc_service *ptlrpc_register_service(struct ptlrpc_service_conf *conf, - struct kset *parent, - struct dentry *debugfs_entry); - -int ptlrpc_start_threads(struct ptlrpc_service *svc); -int ptlrpc_unregister_service(struct ptlrpc_service *service); - -int ptlrpc_hr_init(void); -void ptlrpc_hr_fini(void); - -/** @} */ - -/* ptlrpc/import.c */ -/** - * Import API - * @{ - */ -int ptlrpc_connect_import(struct obd_import *imp); -int ptlrpc_init_import(struct obd_import *imp); -int ptlrpc_disconnect_import(struct obd_import *imp, int noclose); -int ptlrpc_import_recovery_state_machine(struct obd_import *imp); - -/* ptlrpc/pack_generic.c */ -int ptlrpc_reconnect_import(struct obd_import *imp); -/** @} */ - -/** - * ptlrpc msg buffer and swab interface - * - * @{ - */ -int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, - u32 index); -void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, - u32 index); -int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len); -int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len); - -void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, - char **bufs); -int lustre_pack_request(struct ptlrpc_request *, __u32 magic, int count, - __u32 *lens, char **bufs); -int lustre_pack_reply(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs); -int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - __u32 *lens, char **bufs, int flags); -#define LPRFL_EARLY_REPLY 1 -int lustre_pack_reply_flags(struct ptlrpc_request *, int count, __u32 *lens, - char **bufs, int flags); -int lustre_shrink_msg(struct lustre_msg *msg, int segment, - unsigned int newlen, int move_data); -void lustre_free_reply_state(struct ptlrpc_reply_state *rs); -int __lustre_unpack_msg(struct lustre_msg *m, int len); -u32 lustre_msg_hdr_size(__u32 magic, u32 count); -u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths); -u32 lustre_msg_size_v2(int count, __u32 *lengths); -u32 lustre_packed_msg_size(struct lustre_msg *msg); -u32 lustre_msg_early_size(void); -void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size); -void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen); -u32 lustre_msg_buflen(struct lustre_msg *m, u32 n); -u32 lustre_msg_bufcount(struct lustre_msg *m); -char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len); -__u32 lustre_msghdr_get_flags(struct lustre_msg *msg); -void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags); -__u32 lustre_msg_get_flags(struct lustre_msg *msg); -void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags); -void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags); -__u32 lustre_msg_get_op_flags(struct lustre_msg *msg); -void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags); -struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg); -__u32 lustre_msg_get_type(struct lustre_msg *msg); -void lustre_msg_add_version(struct lustre_msg *msg, u32 version); -__u32 lustre_msg_get_opc(struct lustre_msg *msg); -__u16 lustre_msg_get_tag(struct lustre_msg *msg); -__u64 lustre_msg_get_last_committed(struct lustre_msg *msg); -__u64 *lustre_msg_get_versions(struct lustre_msg *msg); -__u64 lustre_msg_get_transno(struct lustre_msg *msg); -__u64 lustre_msg_get_slv(struct lustre_msg *msg); -__u32 lustre_msg_get_limit(struct lustre_msg *msg); -void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv); -void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit); -int lustre_msg_get_status(struct lustre_msg *msg); -__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg); -__u32 lustre_msg_get_magic(struct lustre_msg *msg); -__u32 lustre_msg_get_timeout(struct lustre_msg *msg); -__u32 lustre_msg_get_service_time(struct lustre_msg *msg); -__u32 lustre_msg_get_cksum(struct lustre_msg *msg); -__u32 lustre_msg_calc_cksum(struct lustre_msg *msg); -void lustre_msg_set_handle(struct lustre_msg *msg, - struct lustre_handle *handle); -void lustre_msg_set_type(struct lustre_msg *msg, __u32 type); -void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc); -void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid); -void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag); -void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions); -void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno); -void lustre_msg_set_status(struct lustre_msg *msg, __u32 status); -void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt); -void ptlrpc_request_set_replen(struct ptlrpc_request *req); -void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout); -void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time); -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); -void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); -void lustre_msg_set_mbits(struct lustre_msg *msg, u64 mbits); - -static inline void -lustre_shrink_reply(struct ptlrpc_request *req, int segment, - unsigned int newlen, int move_data) -{ - LASSERT(req->rq_reply_state); - LASSERT(req->rq_repmsg); - req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment, - newlen, move_data); -} - -#ifdef CONFIG_LUSTRE_TRANSLATE_ERRNOS - -static inline int ptlrpc_status_hton(int h) -{ - /* - * Positive errnos must be network errnos, such as LUSTRE_EDEADLK, - * ELDLM_LOCK_ABORTED, etc. - */ - if (h < 0) - return -lustre_errno_hton(-h); - else - return h; -} - -static inline int ptlrpc_status_ntoh(int n) -{ - /* - * See the comment in ptlrpc_status_hton(). - */ - if (n < 0) - return -lustre_errno_ntoh(-n); - else - return n; -} - -#else - -#define ptlrpc_status_hton(h) (h) -#define ptlrpc_status_ntoh(n) (n) - -#endif -/** @} */ - -/** Change request phase of \a req to \a new_phase */ -static inline void -ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) -{ - if (req->rq_phase == new_phase) - return; - - if (new_phase == RQ_PHASE_UNREG_RPC || - new_phase == RQ_PHASE_UNREG_BULK) { - /* No embedded unregistering phases */ - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) - return; - - req->rq_next_phase = req->rq_phase; - if (req->rq_import) - atomic_inc(&req->rq_import->imp_unregistering); - } - - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) { - if (req->rq_import) - atomic_dec(&req->rq_import->imp_unregistering); - } - - DEBUG_REQ(D_INFO, req, "move req \"%s\" -> \"%s\"", - ptlrpc_rqphase2str(req), ptlrpc_phase2str(new_phase)); - - req->rq_phase = new_phase; -} - -/** - * Returns true if request \a req got early reply and hard deadline is not met - */ -static inline int -ptlrpc_client_early(struct ptlrpc_request *req) -{ - return req->rq_early; -} - -/** - * Returns true if we got real reply from server for this request - */ -static inline int -ptlrpc_client_replied(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; - return req->rq_replied; -} - -/** Returns true if request \a req is in process of receiving server reply */ -static inline int -ptlrpc_client_recv(struct ptlrpc_request *req) -{ - if (req->rq_reply_deadline > ktime_get_real_seconds()) - return 1; - return req->rq_receiving_reply; -} - -static inline int -ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) -{ - int rc; - - spin_lock(&req->rq_lock); - if (req->rq_reply_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - if (req->rq_req_deadline > ktime_get_real_seconds()) { - spin_unlock(&req->rq_lock); - return 1; - } - rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || - req->rq_receiving_reply; - spin_unlock(&req->rq_lock); - return rc; -} - -static inline void -ptlrpc_client_wake_req(struct ptlrpc_request *req) -{ - if (!req->rq_set) - wake_up(&req->rq_reply_waitq); - else - wake_up(&req->rq_set->set_waitq); -} - -static inline void -ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - atomic_inc(&rs->rs_refcount); -} - -static inline void -ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - if (atomic_dec_and_test(&rs->rs_refcount)) - lustre_free_reply_state(rs); -} - -/* Should only be called once per req */ -static inline void ptlrpc_req_drop_rs(struct ptlrpc_request *req) -{ - if (!req->rq_reply_state) - return; /* shouldn't occur */ - ptlrpc_rs_decref(req->rq_reply_state); - req->rq_reply_state = NULL; - req->rq_repmsg = NULL; -} - -static inline __u32 lustre_request_magic(struct ptlrpc_request *req) -{ - return lustre_msg_get_magic(req->rq_reqmsg); -} - -static inline int ptlrpc_req_get_repsize(struct ptlrpc_request *req) -{ - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return req->rq_reqmsg->lm_repsize; - default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_reqmsg->lm_magic); - return -EFAULT; - } -} - -static inline int ptlrpc_send_limit_expired(struct ptlrpc_request *req) -{ - if (req->rq_delay_limit != 0 && - time_before(req->rq_queued_time + req->rq_delay_limit * HZ, - jiffies)) { - return 1; - } - return 0; -} - -static inline int ptlrpc_no_resend(struct ptlrpc_request *req) -{ - if (!req->rq_no_resend && ptlrpc_send_limit_expired(req)) { - spin_lock(&req->rq_lock); - req->rq_no_resend = 1; - spin_unlock(&req->rq_lock); - } - return req->rq_no_resend; -} - -static inline int -ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt) -{ - int at = AT_OFF ? 0 : at_get(&svcpt->scp_at_estimate); - - return svcpt->scp_service->srv_watchdog_factor * - max_t(int, at, obd_timeout); -} - -static inline struct ptlrpc_service * -ptlrpc_req2svc(struct ptlrpc_request *req) -{ - return req->rq_rqbd->rqbd_svcpt->scp_service; -} - -/* ldlm/ldlm_lib.c */ -/** - * Target client logic - * @{ - */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); -int client_obd_cleanup(struct obd_device *obddev); -int client_connect_import(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *, - void *localdata); -int client_disconnect_export(struct obd_export *exp); -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority); -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); -int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, - struct obd_uuid *uuid); -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); -void client_destroy_import(struct obd_import *imp); -/** @} */ - -/* ptlrpc/pinger.c */ -/** - * Pinger API (client side only) - * @{ - */ -enum timeout_event { - TIMEOUT_GRANT = 1 -}; - -struct timeout_item; -typedef int (*timeout_cb_t)(struct timeout_item *, void *); -int ptlrpc_pinger_add_import(struct obd_import *imp); -int ptlrpc_pinger_del_import(struct obd_import *imp); -int ptlrpc_add_timeout_client(int time, enum timeout_event event, - timeout_cb_t cb, void *data, - struct list_head *obd_list); -int ptlrpc_del_timeout_client(struct list_head *obd_list, - enum timeout_event event); -struct ptlrpc_request *ptlrpc_prep_ping(struct obd_import *imp); -int ptlrpc_obd_ping(struct obd_device *obd); -void ptlrpc_pinger_ir_up(void); -void ptlrpc_pinger_ir_down(void); -/** @} */ -int ptlrpc_pinger_suppress_pings(void); - -/* ptlrpc/ptlrpcd.c */ -void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force); -void ptlrpcd_free(struct ptlrpcd_ctl *pc); -void ptlrpcd_wake(struct ptlrpc_request *req); -void ptlrpcd_add_req(struct ptlrpc_request *req); -int ptlrpcd_addref(void); -void ptlrpcd_decref(void); - -/* ptlrpc/lproc_ptlrpc.c */ -/** - * procfs output related functions - * @{ - */ -const char *ll_opcode2str(__u32 opcode); -void ptlrpc_lprocfs_register_obd(struct obd_device *obd); -void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); -void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes); -/** @} */ - -/* ptlrpc/llog_client.c */ -extern struct llog_operations llog_client_ops; -/** @} net */ - -#endif -/** @} PtlRPC */ diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs.h b/drivers/staging/lustre/lustre/include/lustre_nrs.h deleted file mode 100644 index ffa7317da35b..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_nrs.h +++ /dev/null @@ -1,718 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * - * Network Request Scheduler (NRS) - * - */ - -#ifndef _LUSTRE_NRS_H -#define _LUSTRE_NRS_H - -/** - * \defgroup nrs Network Request Scheduler - * @{ - */ -struct ptlrpc_nrs_policy; -struct ptlrpc_nrs_resource; -struct ptlrpc_nrs_request; - -/** - * NRS control operations. - * - * These are common for all policies. - */ -enum ptlrpc_nrs_ctl { - /** - * Not a valid opcode. - */ - PTLRPC_NRS_CTL_INVALID, - /** - * Activate the policy. - */ - PTLRPC_NRS_CTL_START, - /** - * Reserved for multiple primary policies, which may be a possibility - * in the future. - */ - PTLRPC_NRS_CTL_STOP, - /** - * Policies can start using opcodes from this value and onwards for - * their own purposes; the assigned value itself is arbitrary. - */ - PTLRPC_NRS_CTL_1ST_POL_SPEC = 0x20, -}; - -/** - * NRS policy operations. - * - * These determine the behaviour of a policy, and are called in response to - * NRS core events. - */ -struct ptlrpc_nrs_pol_ops { - /** - * Called during policy registration; this operation is optional. - * - * \param[in,out] policy The policy being initialized - */ - int (*op_policy_init)(struct ptlrpc_nrs_policy *policy); - /** - * Called during policy unregistration; this operation is optional. - * - * \param[in,out] policy The policy being unregistered/finalized - */ - void (*op_policy_fini)(struct ptlrpc_nrs_policy *policy); - /** - * Called when activating a policy via lprocfs; policies allocate and - * initialize their resources here; this operation is optional. - * - * \param[in,out] policy The policy being started - * - * \see nrs_policy_start_locked() - */ - int (*op_policy_start)(struct ptlrpc_nrs_policy *policy); - /** - * Called when deactivating a policy via lprocfs; policies deallocate - * their resources here; this operation is optional - * - * \param[in,out] policy The policy being stopped - * - * \see nrs_policy_stop0() - */ - void (*op_policy_stop)(struct ptlrpc_nrs_policy *policy); - /** - * Used for policy-specific operations; i.e. not generic ones like - * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous - * to an ioctl; this operation is optional. - * - * \param[in,out] policy The policy carrying out operation \a opc - * \param[in] opc The command operation being carried out - * \param[in,out] arg An generic buffer for communication between the - * user and the control operation - * - * \retval -ve error - * \retval 0 success - * - * \see ptlrpc_nrs_policy_control() - */ - int (*op_policy_ctl)(struct ptlrpc_nrs_policy *policy, - enum ptlrpc_nrs_ctl opc, void *arg); - - /** - * Called when obtaining references to the resources of the resource - * hierarchy for a request that has arrived for handling at the PTLRPC - * service. Policies should return -ve for requests they do not wish - * to handle. This operation is mandatory. - * - * \param[in,out] policy The policy we're getting resources for. - * \param[in,out] nrq The request we are getting resources for. - * \param[in] parent The parent resource of the resource being - * requested; set to NULL if none. - * \param[out] resp The resource is to be returned here; the - * fallback policy in an NRS head should - * \e always return a non-NULL pointer value. - * \param[in] moving_req When set, signifies that this is an attempt - * to obtain resources for a request being moved - * to the high-priority NRS head by - * ldlm_lock_reorder_req(). - * This implies two things: - * 1. We are under obd_export::exp_rpc_lock and - * so should not sleep. - * 2. We should not perform non-idempotent or can - * skip performing idempotent operations that - * were carried out when resources were first - * taken for the request when it was initialized - * in ptlrpc_nrs_req_initialize(). - * - * \retval 0, +ve The level of the returned resource in the resource - * hierarchy; currently only 0 (for a non-leaf resource) - * and 1 (for a leaf resource) are supported by the - * framework. - * \retval -ve error - * - * \see ptlrpc_nrs_req_initialize() - * \see ptlrpc_nrs_hpreq_add_nolock() - * \see ptlrpc_nrs_req_hp_move() - */ - int (*op_res_get)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - const struct ptlrpc_nrs_resource *parent, - struct ptlrpc_nrs_resource **resp, - bool moving_req); - /** - * Called when releasing references taken for resources in the resource - * hierarchy for the request; this operation is optional. - * - * \param[in,out] policy The policy the resource belongs to - * \param[in] res The resource to be freed - * - * \see ptlrpc_nrs_req_finalize() - * \see ptlrpc_nrs_hpreq_add_nolock() - * \see ptlrpc_nrs_req_hp_move() - */ - void (*op_res_put)(struct ptlrpc_nrs_policy *policy, - const struct ptlrpc_nrs_resource *res); - - /** - * Obtains a request for handling from the policy, and optionally - * removes the request from the policy; this operation is mandatory. - * - * \param[in,out] policy The policy to poll - * \param[in] peek When set, signifies that we just want to - * examine the request, and not handle it, so the - * request is not removed from the policy. - * \param[in] force When set, it will force a policy to return a - * request if it has one queued. - * - * \retval NULL No request available for handling - * \retval valid-pointer The request polled for handling - * - * \see ptlrpc_nrs_req_get_nolock() - */ - struct ptlrpc_nrs_request * - (*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek, - bool force); - /** - * Called when attempting to add a request to a policy for later - * handling; this operation is mandatory. - * - * \param[in,out] policy The policy on which to enqueue \a nrq - * \param[in,out] nrq The request to enqueue - * - * \retval 0 success - * \retval != 0 error - * - * \see ptlrpc_nrs_req_add_nolock() - */ - int (*op_req_enqueue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Removes a request from the policy's set of pending requests. Normally - * called after a request has been polled successfully from the policy - * for handling; this operation is mandatory. - * - * \param[in,out] policy The policy the request \a nrq belongs to - * \param[in,out] nrq The request to dequeue - * - * \see ptlrpc_nrs_req_del_nolock() - */ - void (*op_req_dequeue)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Called after the request being carried out. Could be used for - * job/resource control; this operation is optional. - * - * \param[in,out] policy The policy which is stopping to handle request - * \a nrq - * \param[in,out] nrq The request - * - * \pre assert_spin_locked(&svcpt->scp_req_lock) - * - * \see ptlrpc_nrs_req_stop_nolock() - */ - void (*op_req_stop)(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq); - /** - * Registers the policy's lprocfs interface with a PTLRPC service. - * - * \param[in] svc The service - * - * \retval 0 success - * \retval != 0 error - */ - int (*op_lprocfs_init)(struct ptlrpc_service *svc); - /** - * Unegisters the policy's lprocfs interface with a PTLRPC service. - * - * In cases of failed policy registration in - * \e ptlrpc_nrs_policy_register(), this function may be called for a - * service which has not registered the policy successfully, so - * implementations of this method should make sure their operations are - * safe in such cases. - * - * \param[in] svc The service - */ - void (*op_lprocfs_fini)(struct ptlrpc_service *svc); -}; - -/** - * Policy flags - */ -enum nrs_policy_flags { - /** - * Fallback policy, use this flag only on a single supported policy per - * service. The flag cannot be used on policies that use - * \e PTLRPC_NRS_FL_REG_EXTERN - */ - PTLRPC_NRS_FL_FALLBACK = BIT(0), - /** - * Start policy immediately after registering. - */ - PTLRPC_NRS_FL_REG_START = BIT(1), - /** - * This is a policy registering from a module different to the one NRS - * core ships in (currently ptlrpc). - */ - PTLRPC_NRS_FL_REG_EXTERN = BIT(2), -}; - -/** - * NRS queue type. - * - * Denotes whether an NRS instance is for handling normal or high-priority - * RPCs, or whether an operation pertains to one or both of the NRS instances - * in a service. - */ -enum ptlrpc_nrs_queue_type { - PTLRPC_NRS_QUEUE_REG = BIT(0), - PTLRPC_NRS_QUEUE_HP = BIT(1), - PTLRPC_NRS_QUEUE_BOTH = (PTLRPC_NRS_QUEUE_REG | PTLRPC_NRS_QUEUE_HP) -}; - -/** - * NRS head - * - * A PTLRPC service has at least one NRS head instance for handling normal - * priority RPCs, and may optionally have a second NRS head instance for - * handling high-priority RPCs. Each NRS head maintains a list of available - * policies, of which one and only one policy is acting as the fallback policy, - * and optionally a different policy may be acting as the primary policy. For - * all RPCs handled by this NRS head instance, NRS core will first attempt to - * enqueue the RPC using the primary policy (if any). The fallback policy is - * used in the following cases: - * - when there was no primary policy in the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state at the time the request - * was initialized. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, denoted it did not wish, or for some other reason was - * not able to handle the request, by returning a non-valid NRS resource - * reference. - * - when the primary policy that was at the - * ptlrpc_nrs_pol_state::PTLRPC_NRS_POL_STATE_STARTED state at the time the - * RPC was initialized, fails later during the request enqueueing stage. - * - * \see nrs_resource_get_safe() - * \see nrs_request_enqueue() - */ -struct ptlrpc_nrs { - spinlock_t nrs_lock; - /** XXX Possibly replace svcpt->scp_req_lock with another lock here. */ - /** - * List of registered policies - */ - struct list_head nrs_policy_list; - /** - * List of policies with queued requests. Policies that have any - * outstanding requests are queued here, and this list is queried - * in a round-robin manner from NRS core when obtaining a request - * for handling. This ensures that requests from policies that at some - * point transition away from the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED state are drained. - */ - struct list_head nrs_policy_queued; - /** - * Service partition for this NRS head - */ - struct ptlrpc_service_part *nrs_svcpt; - /** - * Primary policy, which is the preferred policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_primary; - /** - * Fallback policy, which is the backup policy for handling RPCs - */ - struct ptlrpc_nrs_policy *nrs_policy_fallback; - /** - * This NRS head handles either HP or regular requests - */ - enum ptlrpc_nrs_queue_type nrs_queue_type; - /** - * # queued requests from all policies in this NRS head - */ - unsigned long nrs_req_queued; - /** - * # scheduled requests from all policies in this NRS head - */ - unsigned long nrs_req_started; - /** - * # policies on this NRS - */ - unsigned int nrs_num_pols; - /** - * This NRS head is in progress of starting a policy - */ - unsigned int nrs_policy_starting:1; - /** - * In progress of shutting down the whole NRS head; used during - * unregistration - */ - unsigned int nrs_stopping:1; - /** - * NRS policy is throttling request - */ - unsigned int nrs_throttling:1; -}; - -#define NRS_POL_NAME_MAX 16 -#define NRS_POL_ARG_MAX 16 - -struct ptlrpc_nrs_pol_desc; - -/** - * Service compatibility predicate; this determines whether a policy is adequate - * for handling RPCs of a particular PTLRPC service. - * - * XXX:This should give the same result during policy registration and - * unregistration, and for all partitions of a service; so the result should not - * depend on temporal service or other properties, that may influence the - * result. - */ -typedef bool (*nrs_pol_desc_compat_t)(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc); - -struct ptlrpc_nrs_pol_conf { - /** - * Human-readable policy name - */ - char nc_name[NRS_POL_NAME_MAX]; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *nc_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t nc_compat; - /** - * Set for policies that support a single ptlrpc service, i.e. ones that - * have \a pd_compat set to nrs_policy_compat_one(). The variable value - * depicts the name of the single service that such policies are - * compatible with. - */ - const char *nc_compat_svc_name; - /** - * Owner module for this policy descriptor; policies registering from a - * different module to the one the NRS framework is held within - * (currently ptlrpc), should set this field to THIS_MODULE. - */ - struct module *nc_owner; - /** - * Policy registration flags; a bitmask of \e nrs_policy_flags - */ - unsigned int nc_flags; -}; - -/** - * NRS policy registering descriptor - * - * Is used to hold a description of a policy that can be passed to NRS core in - * order to register the policy with NRS heads in different PTLRPC services. - */ -struct ptlrpc_nrs_pol_desc { - /** - * Human-readable policy name - */ - char pd_name[NRS_POL_NAME_MAX]; - /** - * Link into nrs_core::nrs_policies - */ - struct list_head pd_list; - /** - * NRS operations for this policy - */ - const struct ptlrpc_nrs_pol_ops *pd_ops; - /** - * Service compatibility predicate - */ - nrs_pol_desc_compat_t pd_compat; - /** - * Set for policies that are compatible with only one PTLRPC service. - * - * \see ptlrpc_nrs_pol_conf::nc_compat_svc_name - */ - const char *pd_compat_svc_name; - /** - * Owner module for this policy descriptor. - * - * We need to hold a reference to the module whenever we might make use - * of any of the module's contents, i.e. - * - If one or more instances of the policy are at a state where they - * might be handling a request, i.e. - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING as we will have to - * call into the policy's ptlrpc_nrs_pol_ops() handlers. A reference - * is taken on the module when - * \e ptlrpc_nrs_pol_desc::pd_refs becomes 1, and released when it - * becomes 0, so that we hold only one reference to the module maximum - * at any time. - * - * We do not need to hold a reference to the module, even though we - * might use code and data from the module, in the following cases: - * - During external policy registration, because this should happen in - * the module's init() function, in which case the module is safe from - * removal because a reference is being held on the module by the - * kernel, and iirc kmod (and I guess module-init-tools also) will - * serialize any racing processes properly anyway. - * - During external policy unregistration, because this should happen - * in a module's exit() function, and any attempts to start a policy - * instance would need to take a reference on the module, and this is - * not possible once we have reached the point where the exit() - * handler is called. - * - During service registration and unregistration, as service setup - * and cleanup, and policy registration, unregistration and policy - * instance starting, are serialized by \e nrs_core::nrs_mutex, so - * as long as users adhere to the convention of registering policies - * in init() and unregistering them in module exit() functions, there - * should not be a race between these operations. - * - During any policy-specific lprocfs operations, because a reference - * is held by the kernel on a proc entry that has been entered by a - * syscall, so as long as proc entries are removed during - * unregistration time, then unregistration and lprocfs operations - * will be properly serialized. - */ - struct module *pd_owner; - /** - * Bitmask of \e nrs_policy_flags - */ - unsigned int pd_flags; - /** - * # of references on this descriptor - */ - atomic_t pd_refs; -}; - -/** - * NRS policy state - * - * Policies transition from one state to the other during their lifetime - */ -enum ptlrpc_nrs_pol_state { - /** - * Not a valid policy state. - */ - NRS_POL_STATE_INVALID, - /** - * Policies are at this state either at the start of their life, or - * transition here when the user selects a different policy to act - * as the primary one. - */ - NRS_POL_STATE_STOPPED, - /** - * Policy is progress of stopping - */ - NRS_POL_STATE_STOPPING, - /** - * Policy is in progress of starting - */ - NRS_POL_STATE_STARTING, - /** - * A policy is in this state in two cases: - * - it is the fallback policy, which is always in this state. - * - it has been activated by the user; i.e. it is the primary policy, - */ - NRS_POL_STATE_STARTED, -}; - -/** - * NRS policy information - * - * Used for obtaining information for the status of a policy via lprocfs - */ -struct ptlrpc_nrs_pol_info { - /** - * Policy name - */ - char pi_name[NRS_POL_NAME_MAX]; - /** - * Policy argument - */ - char pi_arg[NRS_POL_ARG_MAX]; - /** - * Current policy state - */ - enum ptlrpc_nrs_pol_state pi_state; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pi_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pi_req_started; - /** - * Is this a fallback policy? - */ - unsigned pi_fallback:1; -}; - -/** - * NRS policy - * - * There is one instance of this for each policy in each NRS head of each - * PTLRPC service partition. - */ -struct ptlrpc_nrs_policy { - /** - * Linkage into the NRS head's list of policies, - * ptlrpc_nrs:nrs_policy_list - */ - struct list_head pol_list; - /** - * Linkage into the NRS head's list of policies with enqueued - * requests ptlrpc_nrs:nrs_policy_queued - */ - struct list_head pol_list_queued; - /** - * Current state of this policy - */ - enum ptlrpc_nrs_pol_state pol_state; - /** - * Bitmask of nrs_policy_flags - */ - unsigned int pol_flags; - /** - * # RPCs enqueued for later dispatching by the policy - */ - long pol_req_queued; - /** - * # RPCs started for dispatch by the policy - */ - long pol_req_started; - /** - * Usage Reference count taken on the policy instance - */ - long pol_ref; - /** - * Human-readable policy argument - */ - char pol_arg[NRS_POL_ARG_MAX]; - /** - * The NRS head this policy has been created at - */ - struct ptlrpc_nrs *pol_nrs; - /** - * Private policy data; varies by policy type - */ - void *pol_private; - /** - * Policy descriptor for this policy instance. - */ - struct ptlrpc_nrs_pol_desc *pol_desc; -}; - -/** - * NRS resource - * - * Resources are embedded into two types of NRS entities: - * - Inside NRS policies, in the policy's private data in - * ptlrpc_nrs_policy::pol_private - * - In objects that act as prime-level scheduling entities in different NRS - * policies; e.g. on a policy that performs round robin or similar order - * scheduling across client NIDs, there would be one NRS resource per unique - * client NID. On a policy which performs round robin scheduling across - * backend filesystem objects, there would be one resource associated with - * each of the backend filesystem objects partaking in the scheduling - * performed by the policy. - * - * NRS resources share a parent-child relationship, in which resources embedded - * in policy instances are the parent entities, with all scheduling entities - * a policy schedules across being the children, thus forming a simple resource - * hierarchy. This hierarchy may be extended with one or more levels in the - * future if the ability to have more than one primary policy is added. - * - * Upon request initialization, references to the then active NRS policies are - * taken and used to later handle the dispatching of the request with one of - * these policies. - * - * \see nrs_resource_get_safe() - * \see ptlrpc_nrs_req_add() - */ -struct ptlrpc_nrs_resource { - /** - * This NRS resource's parent; is NULL for resources embedded in NRS - * policy instances; i.e. those are top-level ones. - */ - struct ptlrpc_nrs_resource *res_parent; - /** - * The policy associated with this resource. - */ - struct ptlrpc_nrs_policy *res_policy; -}; - -enum { - NRS_RES_FALLBACK, - NRS_RES_PRIMARY, - NRS_RES_MAX -}; - -#include - -/** - * NRS request - * - * Instances of this object exist embedded within ptlrpc_request; the main - * purpose of this object is to hold references to the request's resources - * for the lifetime of the request, and to hold properties that policies use - * use for determining the request's scheduling priority. - **/ -struct ptlrpc_nrs_request { - /** - * The request's resource hierarchy. - */ - struct ptlrpc_nrs_resource *nr_res_ptrs[NRS_RES_MAX]; - /** - * Index into ptlrpc_nrs_request::nr_res_ptrs of the resource of the - * policy that was used to enqueue the request. - * - * \see nrs_request_enqueue() - */ - unsigned int nr_res_idx; - unsigned int nr_initialized:1; - unsigned int nr_enqueued:1; - unsigned int nr_started:1; - unsigned int nr_finalized:1; - - /** - * Policy-specific fields, used for determining a request's scheduling - * priority, and other supporting functionality. - */ - union { - /** - * Fields for the FIFO policy - */ - struct nrs_fifo_req fifo; - } nr_u; - /** - * Externally-registering policies may want to use this to allocate - * their own request properties. - */ - void *ext; -}; - -/** @} nrs */ -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h b/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h deleted file mode 100644 index b70d97d4acbb..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_nrs_fifo.h +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2014, Intel Corporation. - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * - * Network Request Scheduler (NRS) First-in First-out (FIFO) policy - * - */ - -#ifndef _LUSTRE_NRS_FIFO_H -#define _LUSTRE_NRS_FIFO_H - -/* \name fifo - * - * FIFO policy - * - * This policy is a logical wrapper around previous, non-NRS functionality. - * It dispatches RPCs in the same order as they arrive from the network. This - * policy is currently used as the fallback policy, and the only enabled policy - * on all NRS heads of all PTLRPC service partitions. - * @{ - */ - -/** - * Private data structure for the FIFO policy - */ -struct nrs_fifo_head { - /** - * Resource object for policy instance. - */ - struct ptlrpc_nrs_resource fh_res; - /** - * List of queued requests. - */ - struct list_head fh_list; - /** - * For debugging purposes. - */ - __u64 fh_sequence; -}; - -struct nrs_fifo_req { - struct list_head fr_list; - __u64 fr_sequence; -}; - -/** @} fifo */ -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_obdo.h b/drivers/staging/lustre/lustre/include/lustre_obdo.h deleted file mode 100644 index d67dcbb84f18..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_obdo.h +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * Define obdo associated functions - * obdo: OBject Device o... - */ - -#ifndef _LUSTRE_OBDO_H_ -#define _LUSTRE_OBDO_H_ - -#include - -/** - * Create an obdo to send over the wire - */ -void lustre_set_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *wobdo, - const struct obdo *lobdo); - -/** - * Create a local obdo from a wire based odbo - */ -void lustre_get_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *lobdo, - const struct obdo *wobdo); - -#endif diff --git a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h deleted file mode 100644 index 298476ea7557..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LUSTRE_PATCHLESS_COMPAT_H -#define LUSTRE_PATCHLESS_COMPAT_H - -#include - -#include -#include -#include -#include - -#define ll_delete_from_page_cache(page) delete_from_page_cache(page) - -static inline void -truncate_complete_page(struct address_space *mapping, struct page *page) -{ - if (page->mapping != mapping) - return; - - if (PagePrivate(page)) - page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); - - cancel_dirty_page(page); - ClearPageMappedToDisk(page); - ll_delete_from_page_cache(page); -} - -#ifndef ATTR_CTIME_SET -/* - * set ATTR_CTIME_SET to a high value to avoid any risk of collision with other - * ATTR_* attributes (see bug 13828) - */ -#define ATTR_CTIME_SET (1 << 28) -#endif - -#endif /* LUSTRE_PATCHLESS_COMPAT_H */ diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h deleted file mode 100644 index 213d0a01adcf..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/include/lustre_req_layout.h - * - * Lustre Metadata Target (mdt) request handler - * - * Author: Nikita Danilov - */ - -#ifndef _LUSTRE_REQ_LAYOUT_H__ -#define _LUSTRE_REQ_LAYOUT_H__ - -#include - -/** \defgroup req_layout req_layout - * - * @{ - */ - -struct req_msg_field; -struct req_format; -struct req_capsule; - -struct ptlrpc_request; - -enum req_location { - RCL_CLIENT, - RCL_SERVER, - RCL_NR -}; - -/* Maximal number of fields (buffers) in a request message. */ -#define REQ_MAX_FIELD_NR 9 - -struct req_capsule { - struct ptlrpc_request *rc_req; - const struct req_format *rc_fmt; - enum req_location rc_loc; - __u32 rc_area[RCL_NR][REQ_MAX_FIELD_NR]; -}; - -void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req, - enum req_location location); -void req_capsule_fini(struct req_capsule *pill); - -void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt); -size_t req_capsule_filled_sizes(struct req_capsule *pill, - enum req_location loc); -int req_capsule_server_pack(struct req_capsule *pill); - -void *req_capsule_client_get(struct req_capsule *pill, - const struct req_msg_field *field); -void *req_capsule_client_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber); -void *req_capsule_client_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len); -void *req_capsule_server_get(struct req_capsule *pill, - const struct req_msg_field *field); -void *req_capsule_server_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len); -void *req_capsule_server_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber); -void *req_capsule_server_sized_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len, void *swabber); - -void req_capsule_set_size(struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc, u32 size); -u32 req_capsule_get_size(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc); -u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc); -u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt, - enum req_location loc); -void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt); - -int req_capsule_has_field(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc); -void req_capsule_shrink(struct req_capsule *pill, - const struct req_msg_field *field, - u32 newlen, enum req_location loc); -int req_layout_init(void); -void req_layout_fini(void); - -extern struct req_format RQF_OBD_PING; -extern struct req_format RQF_OBD_SET_INFO; -extern struct req_format RQF_SEC_CTX; -/* MGS req_format */ -extern struct req_format RQF_MGS_TARGET_REG; -extern struct req_format RQF_MGS_SET_INFO; -extern struct req_format RQF_MGS_CONFIG_READ; -/* fid/fld req_format */ -extern struct req_format RQF_SEQ_QUERY; -extern struct req_format RQF_FLD_QUERY; -extern struct req_format RQF_FLD_READ; -/* MDS req_format */ -extern struct req_format RQF_MDS_CONNECT; -extern struct req_format RQF_MDS_DISCONNECT; -extern struct req_format RQF_MDS_STATFS; -extern struct req_format RQF_MDS_GETSTATUS; -extern struct req_format RQF_MDS_SYNC; -extern struct req_format RQF_MDS_GETXATTR; -extern struct req_format RQF_MDS_GETATTR; - -/* - * This is format of direct (non-intent) MDS_GETATTR_NAME request. - */ -extern struct req_format RQF_MDS_GETATTR_NAME; -extern struct req_format RQF_MDS_CLOSE; -extern struct req_format RQF_MDS_INTENT_CLOSE; -extern struct req_format RQF_MDS_CONNECT; -extern struct req_format RQF_MDS_DISCONNECT; -extern struct req_format RQF_MDS_GET_INFO; -extern struct req_format RQF_MDS_READPAGE; -extern struct req_format RQF_MDS_WRITEPAGE; -extern struct req_format RQF_MDS_REINT; -extern struct req_format RQF_MDS_REINT_CREATE; -extern struct req_format RQF_MDS_REINT_CREATE_ACL; -extern struct req_format RQF_MDS_REINT_CREATE_SLAVE; -extern struct req_format RQF_MDS_REINT_CREATE_SYM; -extern struct req_format RQF_MDS_REINT_OPEN; -extern struct req_format RQF_MDS_REINT_UNLINK; -extern struct req_format RQF_MDS_REINT_LINK; -extern struct req_format RQF_MDS_REINT_RENAME; -extern struct req_format RQF_MDS_REINT_SETATTR; -extern struct req_format RQF_MDS_REINT_SETXATTR; -extern struct req_format RQF_MDS_QUOTACTL; -extern struct req_format RQF_MDS_SWAP_LAYOUTS; -extern struct req_format RQF_MDS_REINT_MIGRATE; -/* MDS hsm formats */ -extern struct req_format RQF_MDS_HSM_STATE_GET; -extern struct req_format RQF_MDS_HSM_STATE_SET; -extern struct req_format RQF_MDS_HSM_ACTION; -extern struct req_format RQF_MDS_HSM_PROGRESS; -extern struct req_format RQF_MDS_HSM_CT_REGISTER; -extern struct req_format RQF_MDS_HSM_CT_UNREGISTER; -extern struct req_format RQF_MDS_HSM_REQUEST; -/* OST req_format */ -extern struct req_format RQF_OST_CONNECT; -extern struct req_format RQF_OST_DISCONNECT; -extern struct req_format RQF_OST_QUOTACTL; -extern struct req_format RQF_OST_GETATTR; -extern struct req_format RQF_OST_SETATTR; -extern struct req_format RQF_OST_CREATE; -extern struct req_format RQF_OST_PUNCH; -extern struct req_format RQF_OST_SYNC; -extern struct req_format RQF_OST_DESTROY; -extern struct req_format RQF_OST_BRW_READ; -extern struct req_format RQF_OST_BRW_WRITE; -extern struct req_format RQF_OST_STATFS; -extern struct req_format RQF_OST_SET_GRANT_INFO; -extern struct req_format RQF_OST_GET_INFO; -extern struct req_format RQF_OST_GET_INFO_LAST_ID; -extern struct req_format RQF_OST_GET_INFO_LAST_FID; -extern struct req_format RQF_OST_SET_INFO_LAST_FID; -extern struct req_format RQF_OST_GET_INFO_FIEMAP; - -/* LDLM req_format */ -extern struct req_format RQF_LDLM_ENQUEUE; -extern struct req_format RQF_LDLM_ENQUEUE_LVB; -extern struct req_format RQF_LDLM_CONVERT; -extern struct req_format RQF_LDLM_INTENT; -extern struct req_format RQF_LDLM_INTENT_BASIC; -extern struct req_format RQF_LDLM_INTENT_LAYOUT; -extern struct req_format RQF_LDLM_INTENT_GETATTR; -extern struct req_format RQF_LDLM_INTENT_OPEN; -extern struct req_format RQF_LDLM_INTENT_CREATE; -extern struct req_format RQF_LDLM_INTENT_UNLINK; -extern struct req_format RQF_LDLM_INTENT_GETXATTR; -extern struct req_format RQF_LDLM_CANCEL; -extern struct req_format RQF_LDLM_CALLBACK; -extern struct req_format RQF_LDLM_CP_CALLBACK; -extern struct req_format RQF_LDLM_BL_CALLBACK; -extern struct req_format RQF_LDLM_GL_CALLBACK; -extern struct req_format RQF_LDLM_GL_DESC_CALLBACK; -/* LOG req_format */ -extern struct req_format RQF_LOG_CANCEL; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK; -extern struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER; -extern struct req_format RQF_LLOG_ORIGIN_CONNECT; - -extern struct req_format RQF_CONNECT; - -extern struct req_msg_field RMF_GENERIC_DATA; -extern struct req_msg_field RMF_PTLRPC_BODY; -extern struct req_msg_field RMF_MDT_BODY; -extern struct req_msg_field RMF_MDT_EPOCH; -extern struct req_msg_field RMF_OBD_STATFS; -extern struct req_msg_field RMF_NAME; -extern struct req_msg_field RMF_SYMTGT; -extern struct req_msg_field RMF_TGTUUID; -extern struct req_msg_field RMF_CLUUID; -extern struct req_msg_field RMF_SETINFO_VAL; -extern struct req_msg_field RMF_SETINFO_KEY; -extern struct req_msg_field RMF_GETINFO_VAL; -extern struct req_msg_field RMF_GETINFO_VALLEN; -extern struct req_msg_field RMF_GETINFO_KEY; -extern struct req_msg_field RMF_CLOSE_DATA; - -/* - * connection handle received in MDS_CONNECT request. - */ -extern struct req_msg_field RMF_CONN; -extern struct req_msg_field RMF_CONNECT_DATA; -extern struct req_msg_field RMF_DLM_REQ; -extern struct req_msg_field RMF_DLM_REP; -extern struct req_msg_field RMF_DLM_LVB; -extern struct req_msg_field RMF_DLM_GL_DESC; -extern struct req_msg_field RMF_LDLM_INTENT; -extern struct req_msg_field RMF_LAYOUT_INTENT; -extern struct req_msg_field RMF_MDT_MD; -extern struct req_msg_field RMF_REC_REINT; -extern struct req_msg_field RMF_EADATA; -extern struct req_msg_field RMF_EAVALS; -extern struct req_msg_field RMF_EAVALS_LENS; -extern struct req_msg_field RMF_ACL; -extern struct req_msg_field RMF_LOGCOOKIES; -extern struct req_msg_field RMF_CAPA1; -extern struct req_msg_field RMF_CAPA2; -extern struct req_msg_field RMF_OBD_QUOTACHECK; -extern struct req_msg_field RMF_OBD_QUOTACTL; -extern struct req_msg_field RMF_STRING; -extern struct req_msg_field RMF_SWAP_LAYOUTS; -extern struct req_msg_field RMF_MDS_HSM_PROGRESS; -extern struct req_msg_field RMF_MDS_HSM_REQUEST; -extern struct req_msg_field RMF_MDS_HSM_USER_ITEM; -extern struct req_msg_field RMF_MDS_HSM_ARCHIVE; -extern struct req_msg_field RMF_HSM_USER_STATE; -extern struct req_msg_field RMF_HSM_STATE_SET; -extern struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION; -extern struct req_msg_field RMF_MDS_HSM_REQUEST; - -/* seq-mgr fields */ -extern struct req_msg_field RMF_SEQ_OPC; -extern struct req_msg_field RMF_SEQ_RANGE; -extern struct req_msg_field RMF_FID_SPACE; - -/* FLD fields */ -extern struct req_msg_field RMF_FLD_OPC; -extern struct req_msg_field RMF_FLD_MDFLD; - -extern struct req_msg_field RMF_LLOGD_BODY; -extern struct req_msg_field RMF_LLOG_LOG_HDR; -extern struct req_msg_field RMF_LLOGD_CONN_BODY; - -extern struct req_msg_field RMF_MGS_TARGET_INFO; -extern struct req_msg_field RMF_MGS_SEND_PARAM; - -extern struct req_msg_field RMF_OST_BODY; -extern struct req_msg_field RMF_OBD_IOOBJ; -extern struct req_msg_field RMF_OBD_ID; -extern struct req_msg_field RMF_FID; -extern struct req_msg_field RMF_NIOBUF_REMOTE; -extern struct req_msg_field RMF_RCS; -extern struct req_msg_field RMF_FIEMAP_KEY; -extern struct req_msg_field RMF_FIEMAP_VAL; -extern struct req_msg_field RMF_OST_ID; - -/* MGS config read message format */ -extern struct req_msg_field RMF_MGS_CONFIG_BODY; -extern struct req_msg_field RMF_MGS_CONFIG_RES; - -/* generic uint32 */ -extern struct req_msg_field RMF_U32; - -/** @} req_layout */ - -#endif /* _LUSTRE_REQ_LAYOUT_H__ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_sec.h b/drivers/staging/lustre/lustre/include/lustre_sec.h deleted file mode 100644 index d35bcbc98831..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_sec.h +++ /dev/null @@ -1,1072 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LUSTRE_SEC_H_ -#define _LUSTRE_SEC_H_ - -#include - -/** \defgroup sptlrpc sptlrpc - * - * @{ - */ - -/* - * to avoid include - */ -struct obd_import; -struct obd_export; -struct ptlrpc_request; -struct ptlrpc_reply_state; -struct ptlrpc_bulk_desc; -struct brw_page; -/* Linux specific */ -struct key; -struct seq_file; -struct lustre_cfg; - -/* - * forward declaration - */ -struct ptlrpc_sec_policy; -struct ptlrpc_sec_cops; -struct ptlrpc_sec_sops; -struct ptlrpc_sec; -struct ptlrpc_svc_ctx; -struct ptlrpc_cli_ctx; -struct ptlrpc_ctx_ops; - -/** - * \addtogroup flavor flavor - * - * RPC flavor is represented by a 32 bits integer. Currently the high 12 bits - * are unused, must be set to 0 for future expansion. - *
- * ------------------------------------------------------------------------
- * | 4b (bulk svc) | 4b (bulk type) | 4b (svc) | 4b (mech)  | 4b (policy) |
- * ------------------------------------------------------------------------
- * 
- * - * @{ - */ - -/* - * flavor constants - */ -enum sptlrpc_policy { - SPTLRPC_POLICY_NULL = 0, - SPTLRPC_POLICY_PLAIN = 1, - SPTLRPC_POLICY_GSS = 2, - SPTLRPC_POLICY_MAX, -}; - -enum sptlrpc_mech_null { - SPTLRPC_MECH_NULL = 0, - SPTLRPC_MECH_NULL_MAX, -}; - -enum sptlrpc_mech_plain { - SPTLRPC_MECH_PLAIN = 0, - SPTLRPC_MECH_PLAIN_MAX, -}; - -enum sptlrpc_mech_gss { - SPTLRPC_MECH_GSS_NULL = 0, - SPTLRPC_MECH_GSS_KRB5 = 1, - SPTLRPC_MECH_GSS_MAX, -}; - -enum sptlrpc_service_type { - SPTLRPC_SVC_NULL = 0, /**< no security */ - SPTLRPC_SVC_AUTH = 1, /**< authentication only */ - SPTLRPC_SVC_INTG = 2, /**< integrity */ - SPTLRPC_SVC_PRIV = 3, /**< privacy */ - SPTLRPC_SVC_MAX, -}; - -enum sptlrpc_bulk_type { - SPTLRPC_BULK_DEFAULT = 0, /**< follow rpc flavor */ - SPTLRPC_BULK_HASH = 1, /**< hash integrity */ - SPTLRPC_BULK_MAX, -}; - -enum sptlrpc_bulk_service { - SPTLRPC_BULK_SVC_NULL = 0, /**< no security */ - SPTLRPC_BULK_SVC_AUTH = 1, /**< authentication only */ - SPTLRPC_BULK_SVC_INTG = 2, /**< integrity */ - SPTLRPC_BULK_SVC_PRIV = 3, /**< privacy */ - SPTLRPC_BULK_SVC_MAX, -}; - -/* - * compose/extract macros - */ -#define FLVR_POLICY_OFFSET (0) -#define FLVR_MECH_OFFSET (4) -#define FLVR_SVC_OFFSET (8) -#define FLVR_BULK_TYPE_OFFSET (12) -#define FLVR_BULK_SVC_OFFSET (16) - -#define MAKE_FLVR(policy, mech, svc, btype, bsvc) \ - (((__u32)(policy) << FLVR_POLICY_OFFSET) | \ - ((__u32)(mech) << FLVR_MECH_OFFSET) | \ - ((__u32)(svc) << FLVR_SVC_OFFSET) | \ - ((__u32)(btype) << FLVR_BULK_TYPE_OFFSET) | \ - ((__u32)(bsvc) << FLVR_BULK_SVC_OFFSET)) - -/* - * extraction - */ -#define SPTLRPC_FLVR_POLICY(flavor) \ - ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xF) -#define SPTLRPC_FLVR_MECH(flavor) \ - ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xF) -#define SPTLRPC_FLVR_SVC(flavor) \ - ((((__u32)(flavor)) >> FLVR_SVC_OFFSET) & 0xF) -#define SPTLRPC_FLVR_BULK_TYPE(flavor) \ - ((((__u32)(flavor)) >> FLVR_BULK_TYPE_OFFSET) & 0xF) -#define SPTLRPC_FLVR_BULK_SVC(flavor) \ - ((((__u32)(flavor)) >> FLVR_BULK_SVC_OFFSET) & 0xF) - -#define SPTLRPC_FLVR_BASE(flavor) \ - ((((__u32)(flavor)) >> FLVR_POLICY_OFFSET) & 0xFFF) -#define SPTLRPC_FLVR_BASE_SUB(flavor) \ - ((((__u32)(flavor)) >> FLVR_MECH_OFFSET) & 0xFF) - -/* - * gss subflavors - */ -#define MAKE_BASE_SUBFLVR(mech, svc) \ - ((__u32)(mech) | \ - ((__u32)(svc) << (FLVR_SVC_OFFSET - FLVR_MECH_OFFSET))) - -#define SPTLRPC_SUBFLVR_KRB5N \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_NULL) -#define SPTLRPC_SUBFLVR_KRB5A \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_AUTH) -#define SPTLRPC_SUBFLVR_KRB5I \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_INTG) -#define SPTLRPC_SUBFLVR_KRB5P \ - MAKE_BASE_SUBFLVR(SPTLRPC_MECH_GSS_KRB5, SPTLRPC_SVC_PRIV) - -/* - * "end user" flavors - */ -#define SPTLRPC_FLVR_NULL \ - MAKE_FLVR(SPTLRPC_POLICY_NULL, \ - SPTLRPC_MECH_NULL, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_PLAIN \ - MAKE_FLVR(SPTLRPC_POLICY_PLAIN, \ - SPTLRPC_MECH_PLAIN, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_HASH, \ - SPTLRPC_BULK_SVC_INTG) -#define SPTLRPC_FLVR_KRB5N \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_NULL, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_KRB5A \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_AUTH, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_NULL) -#define SPTLRPC_FLVR_KRB5I \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_INTG, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_INTG) -#define SPTLRPC_FLVR_KRB5P \ - MAKE_FLVR(SPTLRPC_POLICY_GSS, \ - SPTLRPC_MECH_GSS_KRB5, \ - SPTLRPC_SVC_PRIV, \ - SPTLRPC_BULK_DEFAULT, \ - SPTLRPC_BULK_SVC_PRIV) - -#define SPTLRPC_FLVR_DEFAULT SPTLRPC_FLVR_NULL - -#define SPTLRPC_FLVR_INVALID ((__u32)0xFFFFFFFF) -#define SPTLRPC_FLVR_ANY ((__u32)0xFFF00000) - -/** - * extract the useful part from wire flavor - */ -#define WIRE_FLVR(wflvr) (((__u32)(wflvr)) & 0x000FFFFF) - -/** @} flavor */ - -static inline void flvr_set_svc(__u32 *flvr, __u32 svc) -{ - LASSERT(svc < SPTLRPC_SVC_MAX); - *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr), - SPTLRPC_FLVR_MECH(*flvr), - svc, - SPTLRPC_FLVR_BULK_TYPE(*flvr), - SPTLRPC_FLVR_BULK_SVC(*flvr)); -} - -static inline void flvr_set_bulk_svc(__u32 *flvr, __u32 svc) -{ - LASSERT(svc < SPTLRPC_BULK_SVC_MAX); - *flvr = MAKE_FLVR(SPTLRPC_FLVR_POLICY(*flvr), - SPTLRPC_FLVR_MECH(*flvr), - SPTLRPC_FLVR_SVC(*flvr), - SPTLRPC_FLVR_BULK_TYPE(*flvr), - svc); -} - -struct bulk_spec_hash { - __u8 hash_alg; -}; - -/** - * Full description of flavors being used on a ptlrpc connection, include - * both regular RPC and bulk transfer parts. - */ -struct sptlrpc_flavor { - /** - * wire flavor, should be renamed to sf_wire. - */ - __u32 sf_rpc; - /** - * general flags of PTLRPC_SEC_FL_* - */ - __u32 sf_flags; - /** - * rpc flavor specification - */ - union { - /* nothing for now */ - } u_rpc; - /** - * bulk flavor specification - */ - union { - struct bulk_spec_hash hash; - } u_bulk; -}; - -/** - * identify the RPC is generated from what part of Lustre. It's encoded into - * RPC requests and to be checked by ptlrpc service. - */ -enum lustre_sec_part { - LUSTRE_SP_CLI = 0, - LUSTRE_SP_MDT, - LUSTRE_SP_OST, - LUSTRE_SP_MGC, - LUSTRE_SP_MGS, - LUSTRE_SP_ANY = 0xFF -}; - -enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd); - -/** - * A rule specifies a flavor to be used by a ptlrpc connection between - * two Lustre parts. - */ -struct sptlrpc_rule { - __u32 sr_netid; /* LNET network ID */ - __u8 sr_from; /* sec_part */ - __u8 sr_to; /* sec_part */ - __u16 sr_padding; - struct sptlrpc_flavor sr_flvr; -}; - -/** - * A set of rules in memory. - * - * Rules are generated and stored on MGS, and propagated to MDT, OST, - * and client when needed. - */ -struct sptlrpc_rule_set { - int srs_nslot; - int srs_nrule; - struct sptlrpc_rule *srs_rules; -}; - -int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr); -bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr); - -static inline void sptlrpc_rule_set_init(struct sptlrpc_rule_set *set) -{ - memset(set, 0, sizeof(*set)); -} - -int sptlrpc_process_config(struct lustre_cfg *lcfg); -void sptlrpc_conf_log_start(const char *logname); -void sptlrpc_conf_log_stop(const char *logname); -void sptlrpc_conf_log_update_begin(const char *logname); -void sptlrpc_conf_log_update_end(const char *logname); -void sptlrpc_conf_client_adapt(struct obd_device *obd); - -/* The maximum length of security payload. 1024 is enough for Kerberos 5, - * and should be enough for other future mechanisms but not sure. - * Only used by pre-allocated request/reply pool. - */ -#define SPTLRPC_MAX_PAYLOAD (1024) - -struct vfs_cred { - u32 vc_uid; - u32 vc_gid; -}; - -struct ptlrpc_ctx_ops { - /** - * To determine whether it's suitable to use the \a ctx for \a vcred. - */ - int (*match)(struct ptlrpc_cli_ctx *ctx, struct vfs_cred *vcred); - - /** - * To bring the \a ctx uptodate. - */ - int (*refresh)(struct ptlrpc_cli_ctx *ctx); - - /** - * Validate the \a ctx. - */ - int (*validate)(struct ptlrpc_cli_ctx *ctx); - - /** - * Force the \a ctx to die. - */ - void (*force_die)(struct ptlrpc_cli_ctx *ctx, int grace); - int (*display)(struct ptlrpc_cli_ctx *ctx, char *buf, int bufsize); - - /** - * Sign the request message using \a ctx. - * - * \pre req->rq_reqmsg point to request message. - * \pre req->rq_reqlen is the request message length. - * \post req->rq_reqbuf point to request message with signature. - * \post req->rq_reqdata_len is set to the final request message size. - * - * \see null_ctx_sign(), plain_ctx_sign(), gss_cli_ctx_sign(). - */ - int (*sign)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Verify the reply message using \a ctx. - * - * \pre req->rq_repdata point to reply message with signature. - * \pre req->rq_repdata_len is the total reply message length. - * \post req->rq_repmsg point to reply message without signature. - * \post req->rq_replen is the reply message length. - * - * \see null_ctx_verify(), plain_ctx_verify(), gss_cli_ctx_verify(). - */ - int (*verify)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Encrypt the request message using \a ctx. - * - * \pre req->rq_reqmsg point to request message in clear text. - * \pre req->rq_reqlen is the request message length. - * \post req->rq_reqbuf point to request message. - * \post req->rq_reqdata_len is set to the final request message size. - * - * \see gss_cli_ctx_seal(). - */ - int (*seal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Decrypt the reply message using \a ctx. - * - * \pre req->rq_repdata point to encrypted reply message. - * \pre req->rq_repdata_len is the total cipher text length. - * \post req->rq_repmsg point to reply message in clear text. - * \post req->rq_replen is the reply message length in clear text. - * - * \see gss_cli_ctx_unseal(). - */ - int (*unseal)(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req); - - /** - * Wrap bulk request data. This is called before wrapping RPC - * request message. - * - * \pre bulk buffer is descripted by desc->bd_iov and - * desc->bd_iov_count. note for read it's just buffer, no data - * need to be sent; for write it contains data in clear text. - * \post when necessary, ptlrpc_bulk_sec_desc was properly prepared - * (usually inside of RPC request message). - * - encryption: cipher text bulk buffer is descripted by - * desc->bd_enc_iov and desc->bd_iov_count (currently assume iov - * count remains the same). - * - otherwise: bulk buffer is still desc->bd_iov and - * desc->bd_iov_count. - * - * \return 0: success. - * \return -ev: error code. - * - * \see plain_cli_wrap_bulk(), gss_cli_ctx_wrap_bulk(). - */ - int (*wrap_bulk)(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Unwrap bulk reply data. This is called after wrapping RPC - * reply message. - * - * \pre bulk buffer is descripted by desc->bd_iov/desc->bd_enc_iov and - * desc->bd_iov_count, according to wrap_bulk(). - * \post final bulk data in clear text is placed in buffer described - * by desc->bd_iov and desc->bd_iov_count. - * \return +ve nob of actual bulk data in clear text. - * \return -ve error code. - * - * \see plain_cli_unwrap_bulk(), gss_cli_ctx_unwrap_bulk(). - */ - int (*unwrap_bulk)(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -}; - -#define PTLRPC_CTX_NEW_BIT (0) /* newly created */ -#define PTLRPC_CTX_UPTODATE_BIT (1) /* uptodate */ -#define PTLRPC_CTX_DEAD_BIT (2) /* mark expired gracefully */ -#define PTLRPC_CTX_ERROR_BIT (3) /* fatal error (refresh, etc.) */ -#define PTLRPC_CTX_CACHED_BIT (8) /* in ctx cache (hash etc.) */ -#define PTLRPC_CTX_ETERNAL_BIT (9) /* always valid */ - -#define PTLRPC_CTX_NEW (1 << PTLRPC_CTX_NEW_BIT) -#define PTLRPC_CTX_UPTODATE (1 << PTLRPC_CTX_UPTODATE_BIT) -#define PTLRPC_CTX_DEAD (1 << PTLRPC_CTX_DEAD_BIT) -#define PTLRPC_CTX_ERROR (1 << PTLRPC_CTX_ERROR_BIT) -#define PTLRPC_CTX_CACHED (1 << PTLRPC_CTX_CACHED_BIT) -#define PTLRPC_CTX_ETERNAL (1 << PTLRPC_CTX_ETERNAL_BIT) - -#define PTLRPC_CTX_STATUS_MASK (PTLRPC_CTX_NEW_BIT | \ - PTLRPC_CTX_UPTODATE | \ - PTLRPC_CTX_DEAD | \ - PTLRPC_CTX_ERROR) - -struct ptlrpc_cli_ctx { - struct hlist_node cc_cache; /* linked into ctx cache */ - atomic_t cc_refcount; - struct ptlrpc_sec *cc_sec; - struct ptlrpc_ctx_ops *cc_ops; - unsigned long cc_expire; /* in seconds */ - unsigned int cc_early_expire:1; - unsigned long cc_flags; - struct vfs_cred cc_vcred; - spinlock_t cc_lock; - struct list_head cc_req_list; /* waiting reqs linked here */ - struct list_head cc_gc_chain; /* linked to gc chain */ -}; - -/** - * client side policy operation vector. - */ -struct ptlrpc_sec_cops { - /** - * Given an \a imp, create and initialize a ptlrpc_sec structure. - * \param ctx service context: - * - regular import: \a ctx should be NULL; - * - reverse import: \a ctx is obtained from incoming request. - * \param flavor specify what flavor to use. - * - * When necessary, policy module is responsible for taking reference - * on the import. - * - * \see null_create_sec(), plain_create_sec(), gss_sec_create_kr(). - */ - struct ptlrpc_sec *(*create_sec)(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - struct sptlrpc_flavor *flavor); - - /** - * Destructor of ptlrpc_sec. When called, refcount has been dropped - * to 0 and all contexts has been destroyed. - * - * \see null_destroy_sec(), plain_destroy_sec(), gss_sec_destroy_kr(). - */ - void (*destroy_sec)(struct ptlrpc_sec *sec); - - /** - * Notify that this ptlrpc_sec is going to die. Optionally, policy - * module is supposed to set sec->ps_dying and whatever necessary - * actions. - * - * \see plain_kill_sec(), gss_sec_kill(). - */ - void (*kill_sec)(struct ptlrpc_sec *sec); - - /** - * Given \a vcred, lookup and/or create its context. The policy module - * is supposed to maintain its own context cache. - * XXX currently \a create and \a remove_dead is always 1, perhaps - * should be removed completely. - * - * \see null_lookup_ctx(), plain_lookup_ctx(), gss_sec_lookup_ctx_kr(). - */ - struct ptlrpc_cli_ctx *(*lookup_ctx)(struct ptlrpc_sec *sec, - struct vfs_cred *vcred, - int create, int remove_dead); - - /** - * Called then the reference of \a ctx dropped to 0. The policy module - * is supposed to destroy this context or whatever else according to - * its cache maintenance mechanism. - * - * \param sync if zero, we shouldn't wait for the context being - * destroyed completely. - * - * \see plain_release_ctx(), gss_sec_release_ctx_kr(). - */ - void (*release_ctx)(struct ptlrpc_sec *sec, struct ptlrpc_cli_ctx *ctx, - int sync); - - /** - * Flush the context cache. - * - * \param uid context of which user, -1 means all contexts. - * \param grace if zero, the PTLRPC_CTX_UPTODATE_BIT of affected - * contexts should be cleared immediately. - * \param force if zero, only idle contexts will be flushed. - * - * \see plain_flush_ctx_cache(), gss_sec_flush_ctx_cache_kr(). - */ - int (*flush_ctx_cache)(struct ptlrpc_sec *sec, uid_t uid, - int grace, int force); - - /** - * Called periodically by garbage collector to remove dead contexts - * from cache. - * - * \see gss_sec_gc_ctx_kr(). - */ - void (*gc_ctx)(struct ptlrpc_sec *sec); - - /** - * Given an context \a ctx, install a corresponding reverse service - * context on client side. - * XXX currently it's only used by GSS module, maybe we should remove - * this from general API. - */ - int (*install_rctx)(struct obd_import *imp, struct ptlrpc_sec *sec, - struct ptlrpc_cli_ctx *ctx); - - /** - * To allocate request buffer for \a req. - * - * \pre req->rq_reqmsg == NULL. - * \pre req->rq_reqbuf == NULL, otherwise it must be pre-allocated, - * we are not supposed to free it. - * \post if success, req->rq_reqmsg point to a buffer with size - * at least \a lustre_msg_size. - * - * \see null_alloc_reqbuf(), plain_alloc_reqbuf(), gss_alloc_reqbuf(). - */ - int (*alloc_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req, - int lustre_msg_size); - - /** - * To free request buffer for \a req. - * - * \pre req->rq_reqbuf != NULL. - * - * \see null_free_reqbuf(), plain_free_reqbuf(), gss_free_reqbuf(). - */ - void (*free_reqbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req); - - /** - * To allocate reply buffer for \a req. - * - * \pre req->rq_repbuf == NULL. - * \post if success, req->rq_repbuf point to a buffer with size - * req->rq_repbuf_len, the size should be large enough to receive - * reply which be transformed from \a lustre_msg_size of clear text. - * - * \see null_alloc_repbuf(), plain_alloc_repbuf(), gss_alloc_repbuf(). - */ - int (*alloc_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req, - int lustre_msg_size); - - /** - * To free reply buffer for \a req. - * - * \pre req->rq_repbuf != NULL. - * \post req->rq_repbuf == NULL. - * \post req->rq_repbuf_len == 0. - * - * \see null_free_repbuf(), plain_free_repbuf(), gss_free_repbuf(). - */ - void (*free_repbuf)(struct ptlrpc_sec *sec, struct ptlrpc_request *req); - - /** - * To expand the request buffer of \a req, thus the \a segment in - * the request message pointed by req->rq_reqmsg can accommodate - * at least \a newsize of data. - * - * \pre req->rq_reqmsg->lm_buflens[segment] < newsize. - * - * \see null_enlarge_reqbuf(), plain_enlarge_reqbuf(), - * gss_enlarge_reqbuf(). - */ - int (*enlarge_reqbuf)(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int segment, int newsize); - /* - * misc - */ - int (*display)(struct ptlrpc_sec *sec, struct seq_file *seq); -}; - -/** - * server side policy operation vector. - */ -struct ptlrpc_sec_sops { - /** - * verify an incoming request. - * - * \pre request message is pointed by req->rq_reqbuf, size is - * req->rq_reqdata_len; and the message has been unpacked to - * host byte order. - * - * \retval SECSVC_OK success, req->rq_reqmsg point to request message - * in clear text, size is req->rq_reqlen; req->rq_svc_ctx is set; - * req->rq_sp_from is decoded from request. - * \retval SECSVC_COMPLETE success, the request has been fully - * processed, and reply message has been prepared; req->rq_sp_from is - * decoded from request. - * \retval SECSVC_DROP failed, this request should be dropped. - * - * \see null_accept(), plain_accept(), gss_svc_accept_kr(). - */ - int (*accept)(struct ptlrpc_request *req); - - /** - * Perform security transformation upon reply message. - * - * \pre reply message is pointed by req->rq_reply_state->rs_msg, size - * is req->rq_replen. - * \post req->rs_repdata_len is the final message size. - * \post req->rq_reply_off is set. - * - * \see null_authorize(), plain_authorize(), gss_svc_authorize(). - */ - int (*authorize)(struct ptlrpc_request *req); - - /** - * Invalidate server context \a ctx. - * - * \see gss_svc_invalidate_ctx(). - */ - void (*invalidate_ctx)(struct ptlrpc_svc_ctx *ctx); - - /** - * Allocate a ptlrpc_reply_state. - * - * \param msgsize size of the reply message in clear text. - * \pre if req->rq_reply_state != NULL, then it's pre-allocated, we - * should simply use it; otherwise we'll responsible for allocating - * a new one. - * \post req->rq_reply_state != NULL; - * \post req->rq_reply_state->rs_msg != NULL; - * - * \see null_alloc_rs(), plain_alloc_rs(), gss_svc_alloc_rs(). - */ - int (*alloc_rs)(struct ptlrpc_request *req, int msgsize); - - /** - * Free a ptlrpc_reply_state. - */ - void (*free_rs)(struct ptlrpc_reply_state *rs); - - /** - * Release the server context \a ctx. - * - * \see gss_svc_free_ctx(). - */ - void (*free_ctx)(struct ptlrpc_svc_ctx *ctx); - - /** - * Install a reverse context based on the server context \a ctx. - * - * \see gss_svc_install_rctx_kr(). - */ - int (*install_rctx)(struct obd_import *imp, struct ptlrpc_svc_ctx *ctx); - - /** - * Prepare buffer for incoming bulk write. - * - * \pre desc->bd_iov and desc->bd_iov_count describes the buffer - * intended to receive the write. - * - * \see gss_svc_prep_bulk(). - */ - int (*prep_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Unwrap the bulk write data. - * - * \see plain_svc_unwrap_bulk(), gss_svc_unwrap_bulk(). - */ - int (*unwrap_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - - /** - * Wrap the bulk read data. - * - * \see plain_svc_wrap_bulk(), gss_svc_wrap_bulk(). - */ - int (*wrap_bulk)(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -}; - -struct ptlrpc_sec_policy { - struct module *sp_owner; - char *sp_name; - __u16 sp_policy; /* policy number */ - struct ptlrpc_sec_cops *sp_cops; /* client ops */ - struct ptlrpc_sec_sops *sp_sops; /* server ops */ -}; - -#define PTLRPC_SEC_FL_REVERSE 0x0001 /* reverse sec */ -#define PTLRPC_SEC_FL_ROOTONLY 0x0002 /* treat everyone as root */ -#define PTLRPC_SEC_FL_UDESC 0x0004 /* ship udesc */ -#define PTLRPC_SEC_FL_BULK 0x0008 /* intensive bulk i/o expected */ -#define PTLRPC_SEC_FL_PAG 0x0010 /* PAG mode */ - -/** - * The ptlrpc_sec represents the client side ptlrpc security facilities, - * each obd_import (both regular and reverse import) must associate with - * a ptlrpc_sec. - * - * \see sptlrpc_import_sec_adapt(). - */ -struct ptlrpc_sec { - struct ptlrpc_sec_policy *ps_policy; - atomic_t ps_refcount; - /** statistic only */ - atomic_t ps_nctx; - /** unique identifier */ - int ps_id; - struct sptlrpc_flavor ps_flvr; - enum lustre_sec_part ps_part; - /** after set, no more new context will be created */ - unsigned int ps_dying:1; - /** owning import */ - struct obd_import *ps_import; - spinlock_t ps_lock; - - /* - * garbage collection - */ - struct list_head ps_gc_list; - unsigned long ps_gc_interval; /* in seconds */ - time64_t ps_gc_next; /* in seconds */ -}; - -static inline int sec_is_reverse(struct ptlrpc_sec *sec) -{ - return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE); -} - -static inline int sec_is_rootonly(struct ptlrpc_sec *sec) -{ - return (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_ROOTONLY); -} - -struct ptlrpc_svc_ctx { - atomic_t sc_refcount; - struct ptlrpc_sec_policy *sc_policy; -}; - -/* - * user identity descriptor - */ -#define LUSTRE_MAX_GROUPS (128) - -struct ptlrpc_user_desc { - __u32 pud_uid; - __u32 pud_gid; - __u32 pud_fsuid; - __u32 pud_fsgid; - __u32 pud_cap; - __u32 pud_ngroups; - __u32 pud_groups[0]; -}; - -/* - * bulk flavors - */ -enum sptlrpc_bulk_hash_alg { - BULK_HASH_ALG_NULL = 0, - BULK_HASH_ALG_ADLER32, - BULK_HASH_ALG_CRC32, - BULK_HASH_ALG_MD5, - BULK_HASH_ALG_SHA1, - BULK_HASH_ALG_SHA256, - BULK_HASH_ALG_SHA384, - BULK_HASH_ALG_SHA512, - BULK_HASH_ALG_MAX -}; - -const char *sptlrpc_get_hash_name(__u8 hash_alg); -__u8 sptlrpc_get_hash_alg(const char *algname); - -enum { - BSD_FL_ERR = 1, -}; - -struct ptlrpc_bulk_sec_desc { - __u8 bsd_version; /* 0 */ - __u8 bsd_type; /* SPTLRPC_BULK_XXX */ - __u8 bsd_svc; /* SPTLRPC_BULK_SVC_XXXX */ - __u8 bsd_flags; /* flags */ - __u32 bsd_nob; /* nob of bulk data */ - __u8 bsd_data[0]; /* policy-specific token */ -}; - -/* - * round size up to next power of 2, for slab allocation. - * @size must be sane (can't overflow after round up) - */ -static inline int size_roundup_power2(int size) -{ - size--; - size |= size >> 1; - size |= size >> 2; - size |= size >> 4; - size |= size >> 8; - size |= size >> 16; - size++; - return size; -} - -/* - * internal support libraries - */ -void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, - int segment, int newsize); - -/* - * security policies - */ -int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy); -int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy); - -__u32 sptlrpc_name2flavor_base(const char *name); -const char *sptlrpc_flavor2name_base(__u32 flvr); -char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf, - char *buf, int bufsize); -char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize); - -static inline -struct ptlrpc_sec_policy *sptlrpc_policy_get(struct ptlrpc_sec_policy *policy) -{ - __module_get(policy->sp_owner); - return policy; -} - -static inline -void sptlrpc_policy_put(struct ptlrpc_sec_policy *policy) -{ - module_put(policy->sp_owner); -} - -/* - * client credential - */ -static inline -unsigned long cli_ctx_status(struct ptlrpc_cli_ctx *ctx) -{ - return (ctx->cc_flags & PTLRPC_CTX_STATUS_MASK); -} - -static inline -int cli_ctx_is_ready(struct ptlrpc_cli_ctx *ctx) -{ - return (cli_ctx_status(ctx) == PTLRPC_CTX_UPTODATE); -} - -static inline -int cli_ctx_is_refreshed(struct ptlrpc_cli_ctx *ctx) -{ - return (cli_ctx_status(ctx) != 0); -} - -static inline -int cli_ctx_is_uptodate(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_UPTODATE) != 0); -} - -static inline -int cli_ctx_is_error(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_ERROR) != 0); -} - -static inline -int cli_ctx_is_dead(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & (PTLRPC_CTX_DEAD | PTLRPC_CTX_ERROR)) != 0); -} - -static inline -int cli_ctx_is_eternal(struct ptlrpc_cli_ctx *ctx) -{ - return ((ctx->cc_flags & PTLRPC_CTX_ETERNAL) != 0); -} - -/* - * sec get/put - */ -void sptlrpc_sec_put(struct ptlrpc_sec *sec); - -/* - * internal apis which only used by policy implementation - */ -int sptlrpc_get_next_secid(void); - -/* - * exported client context api - */ -struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx); -void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync); - -/* - * exported client context wrap/buffers - */ -int sptlrpc_cli_wrap_request(struct ptlrpc_request *req); -int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req); -int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize); -void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req); -int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize); -void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req); -int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, - int segment, int newsize); -int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, - struct ptlrpc_request **req_ret); -void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req); - -void sptlrpc_request_out_callback(struct ptlrpc_request *req); - -/* - * exported higher interface of import & request - */ -int sptlrpc_import_sec_adapt(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx, - struct sptlrpc_flavor *flvr); -struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp); -void sptlrpc_import_sec_put(struct obd_import *imp); - -int sptlrpc_import_check_ctx(struct obd_import *imp); -void sptlrpc_import_flush_root_ctx(struct obd_import *imp); -void sptlrpc_import_flush_my_ctx(struct obd_import *imp); -void sptlrpc_import_flush_all_ctx(struct obd_import *imp); -int sptlrpc_req_get_ctx(struct ptlrpc_request *req); -void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync); -int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout); -void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode); - -/* gc */ -void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec); -void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec); - -/* misc */ -const char *sec2target_str(struct ptlrpc_sec *sec); -/* - * lprocfs - */ -int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev); - -/* - * server side - */ -enum secsvc_accept_res { - SECSVC_OK = 0, - SECSVC_COMPLETE, - SECSVC_DROP, -}; - -int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req); -int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen); -int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req); -void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs); -void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req); -void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req); - -int sptlrpc_target_export_check(struct obd_export *exp, - struct ptlrpc_request *req); - -/* bulk security api */ -void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc); -int get_free_pages_in_pool(void); -int pool_is_at_full_capacity(void); - -int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); -int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc, - int nob); -int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc); - -/* bulk helpers (internal use only by policies) */ -int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, - void *buf, int buflen); - -int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed); - -/* user descriptor helpers */ -static inline int sptlrpc_user_desc_size(int ngroups) -{ - return sizeof(struct ptlrpc_user_desc) + ngroups * sizeof(__u32); -} - -int sptlrpc_current_user_desc_size(void); -int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset); -int sptlrpc_unpack_user_desc(struct lustre_msg *req, int offset, int swabbed); - -enum { - LUSTRE_SEC_NONE = 0, - LUSTRE_SEC_REMOTE = 1, - LUSTRE_SEC_SPECIFY = 2, - LUSTRE_SEC_ALL = 3 -}; - -/** @} sptlrpc */ - -#endif /* _LUSTRE_SEC_H_ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_swab.h b/drivers/staging/lustre/lustre/include/lustre_swab.h deleted file mode 100644 index 9d786bbe7f3f..000000000000 --- a/drivers/staging/lustre/lustre/include/lustre_swab.h +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines - * are implemented in ptlrpc/lustre_swab.c. These 'swabbers' convert the - * type from "other" endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - */ - -#ifndef _LUSTRE_SWAB_H_ -#define _LUSTRE_SWAB_H_ - -#include - -void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); -void lustre_swab_connect(struct obd_connect_data *ocd); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_state_set(struct hsm_state_set *hss); -void lustre_swab_obd_statfs(struct obd_statfs *os); -void lustre_swab_obd_ioobj(struct obd_ioobj *ioo); -void lustre_swab_niobuf_remote(struct niobuf_remote *nbr); -void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb); -void lustre_swab_ost_lvb(struct ost_lvb *lvb); -void lustre_swab_obd_quotactl(struct obd_quotactl *q); -void lustre_swab_lquota_lvb(struct lquota_lvb *lvb); -void lustre_swab_generic_32s(__u32 *val); -void lustre_swab_mdt_body(struct mdt_body *b); -void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b); -void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); -void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); -void lustre_swab_lmv_desc(struct lmv_desc *ld); -void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm); -void lustre_swab_lov_desc(struct lov_desc *ld); -void lustre_swab_gl_desc(union ldlm_gl_desc *desc); -void lustre_swab_ldlm_intent(struct ldlm_intent *i); -void lustre_swab_ldlm_request(struct ldlm_request *rq); -void lustre_swab_ldlm_reply(struct ldlm_reply *r); -void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo); -void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo); -void lustre_swab_mgs_config_body(struct mgs_config_body *body); -void lustre_swab_mgs_config_res(struct mgs_config_res *body); -void lustre_swab_ost_body(struct ost_body *b); -void lustre_swab_ost_last_id(__u64 *id); -void lustre_swab_fiemap(struct fiemap *fiemap); -void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum); -void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum); -void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, - int stripe_count); -void lustre_swab_lov_mds_md(struct lov_mds_md *lmm); -void lustre_swab_lustre_capa(struct lustre_capa *c); -void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); -void lustre_swab_fid2path(struct getinfo_fid2path *gf); -void lustre_swab_layout_intent(struct layout_intent *li); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_current_action(struct hsm_current_action *action); -void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk); -void lustre_swab_hsm_user_state(struct hsm_user_state *hus); -void lustre_swab_hsm_user_item(struct hsm_user_item *hui); -void lustre_swab_hsm_request(struct hsm_request *hr); -void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl); -void lustre_swab_close_data(struct close_data *data); -void lustre_swab_lmv_user_md(struct lmv_user_md *lum); - -/* Functions for dumping PTLRPC fields */ -void dump_rniobuf(struct niobuf_remote *rnb); -void dump_ioo(struct obd_ioobj *nb); -void dump_ost_body(struct ost_body *ob); -void dump_rcs(__u32 *rc); - -#endif diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h deleted file mode 100644 index b1907bbffb19..000000000000 --- a/drivers/staging/lustre/lustre/include/obd.h +++ /dev/null @@ -1,1114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __OBD_H -#define __OBD_H - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define MAX_OBD_DEVICES 8192 - -struct osc_async_rc { - int ar_rc; - int ar_force_sync; - __u64 ar_min_xid; -}; - -struct lov_oinfo { /* per-stripe data structure */ - struct ost_id loi_oi; /* object ID/Sequence on the target OST */ - int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */ - int loi_ost_gen; /* generation of this loi_ost_idx */ - - unsigned long loi_kms_valid:1; - __u64 loi_kms; /* known minimum size */ - struct ost_lvb loi_lvb; - struct osc_async_rc loi_ar; -}; - -static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms) -{ - oinfo->loi_kms = kms; - oinfo->loi_kms_valid = 1; -} - -static inline void loi_init(struct lov_oinfo *loi) -{ -} - -struct lov_stripe_md; -struct obd_info; - -int lov_read_and_clear_async_rc(struct cl_object *clob); - -typedef int (*obd_enqueue_update_f)(void *cookie, int rc); - -/* obd info for a particular level (lov, osc). */ -struct obd_info { - /* OBD_STATFS_* flags */ - __u64 oi_flags; - /* lsm data specific for every OSC. */ - struct lov_stripe_md *oi_md; - /* statfs data specific for every OSC, if needed at all. */ - struct obd_statfs *oi_osfs; - /* An update callback which is called to update some data on upper - * level. E.g. it is used for update lsm->lsm_oinfo at every received - * request in osc level for enqueue requests. It is also possible to - * update some caller data from LOV layer if needed. - */ - obd_enqueue_update_f oi_cb_up; -}; - -struct obd_type { - struct list_head typ_chain; - struct obd_ops *typ_dt_ops; - struct md_ops *typ_md_ops; - struct dentry *typ_debugfs_entry; - char *typ_name; - int typ_refcnt; - struct lu_device_type *typ_lu; - spinlock_t obd_type_lock; - struct kobject *typ_kobj; -}; - -struct brw_page { - u64 off; - struct page *pg; - unsigned int count; - u32 flag; -}; - -struct timeout_item { - enum timeout_event ti_event; - unsigned long ti_timeout; - timeout_cb_t ti_cb; - void *ti_cb_data; - struct list_head ti_obd_list; - struct list_head ti_chain; -}; - -#define OBD_MAX_RIF_DEFAULT 8 -#define OBD_MAX_RIF_MAX 512 -#define OSC_MAX_RIF_MAX 256 -#define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4) -#define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */ -#define OSC_DEFAULT_RESENDS 10 - -/* possible values for fo_sync_lock_cancel */ -enum { - NEVER_SYNC_ON_CANCEL = 0, - BLOCKING_SYNC_ON_CANCEL = 1, - ALWAYS_SYNC_ON_CANCEL = 2, - NUM_SYNC_ON_CANCEL_STATES -}; - -enum obd_cl_sem_lock_class { - OBD_CLI_SEM_NORMAL, - OBD_CLI_SEM_MGC, - OBD_CLI_SEM_MDCOSC, -}; - -/* - * Limit reply buffer size for striping data to one x86_64 page. This - * value is chosen to fit the striping data for common use cases while - * staying well below the limit at which the buffer must be backed by - * vmalloc(). Excessive use of vmalloc() may cause spinlock contention - * on the MDS. - */ -#define OBD_MAX_DEFAULT_EA_SIZE 4096 - -struct mdc_rpc_lock; -struct obd_import; -struct client_obd { - struct rw_semaphore cl_sem; - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ - size_t cl_conn_count; - /* - * Cache maximum and default values for easize. This is - * strictly a performance optimization to minimize calls to - * obd_size_diskmd(). The default values are used to calculate the - * initial size of a request buffer. The ptlrpc layer will resize the - * buffer as needed to accommodate a larger reply from the - * server. The default values should be small enough to avoid wasted - * memory and excessive use of vmalloc(), yet large enough to avoid - * reallocating the buffer in the common use case. - */ - /* - * Default EA size for striping attributes. It is initialized at - * mount-time based on the default stripe width of the filesystem, - * then it tracks the largest observed EA size advertised by - * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. - */ - u32 cl_default_mds_easize; - /* Maximum possible EA size computed at mount-time based on - * the number of OSTs in the filesystem. May be increased at - * run-time if a larger observed size is advertised by the MDT. - */ - u32 cl_max_mds_easize; - - enum lustre_sec_part cl_sp_me; - enum lustre_sec_part cl_sp_to; - struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ - - /* the grant values are protected by loi_list_lock below */ - unsigned long cl_dirty_pages; /* all _dirty_ in pages */ - unsigned long cl_dirty_max_pages; /* allowed w/o rpc */ - unsigned long cl_dirty_transit; /* dirty synchronous */ - unsigned long cl_avail_grant; /* bytes of credit for ost */ - unsigned long cl_lost_grant; /* lost credits (trunc) */ - - /* since we allocate grant by blocks, we don't know how many grant will - * be used to add a page into cache. As a solution, we reserve maximum - * grant before trying to dirty a page and unreserve the rest. - * See osc_{reserve|unreserve}_grant for details. - */ - long cl_reserved_grant; - struct list_head cl_cache_waiters; /* waiting for cache/grant */ - unsigned long cl_next_shrink_grant; /* jiffies */ - struct list_head cl_grant_shrink_list; /* Timeout event list */ - int cl_grant_shrink_interval; /* seconds */ - - /* A chunk is an optimal size used by osc_extent to determine - * the extent size. A chunk is max(PAGE_SIZE, OST block size) - */ - int cl_chunkbits; - unsigned int cl_extent_tax; /* extent overhead, by bytes */ - - /* keep track of objects that have lois that contain pages which - * have been queued for async brw. this lock also protects the - * lists of osc_client_pages that hang off of the loi - */ - /* - * ->cl_loi_list_lock protects consistency of - * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and - * ->ap_completion() call-backs are executed under this lock. As we - * cannot guarantee that these call-backs never block on all platforms - * (as a matter of fact they do block on Mac OS X), type of - * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux - * and blocking mutex on Mac OS X. (Alternative is to make this lock - * blocking everywhere, but we don't want to slow down fast-path of - * our main platform.) - * - * NB by Jinshan: though field names are still _loi_, but actually - * osc_object{}s are in the list. - */ - spinlock_t cl_loi_list_lock; - struct list_head cl_loi_ready_list; - struct list_head cl_loi_hp_ready_list; - struct list_head cl_loi_write_list; - struct list_head cl_loi_read_list; - __u32 cl_r_in_flight; - __u32 cl_w_in_flight; - /* just a sum of the loi/lop pending numbers to be exported by sysfs */ - atomic_t cl_pending_w_pages; - atomic_t cl_pending_r_pages; - __u32 cl_max_pages_per_rpc; - __u32 cl_max_rpcs_in_flight; - struct obd_histogram cl_read_rpc_hist; - struct obd_histogram cl_write_rpc_hist; - struct obd_histogram cl_read_page_hist; - struct obd_histogram cl_write_page_hist; - struct obd_histogram cl_read_offset_hist; - struct obd_histogram cl_write_offset_hist; - - /* LRU for osc caching pages */ - struct cl_client_cache *cl_cache; - /** member of cl_cache->ccc_lru */ - struct list_head cl_lru_osc; - /** # of available LRU slots left in the per-OSC cache. - * Available LRU slots are shared by all OSCs of the same file system, - * therefore this is a pointer to cl_client_cache::ccc_lru_left. - */ - atomic_long_t *cl_lru_left; - /** # of busy LRU pages. A page is considered busy if it's in writeback - * queue, or in transfer. Busy pages can't be discarded so they are not - * in LRU cache. - */ - atomic_long_t cl_lru_busy; - /** # of LRU pages in the cache for this client_obd */ - atomic_long_t cl_lru_in_list; - /** # of threads are shrinking LRU cache. To avoid contention, it's not - * allowed to have multiple threads shrinking LRU cache. - */ - atomic_t cl_lru_shrinkers; - /** The time when this LRU cache was last used. */ - time64_t cl_lru_last_used; - /** stats: how many reclaims have happened for this client_obd. - * reclaim and shrink - shrink is async, voluntarily rebalancing; - * reclaim is sync, initiated by IO thread when the LRU slots are - * in shortage. - */ - u64 cl_lru_reclaim; - /** List of LRU pages for this client_obd */ - struct list_head cl_lru_list; - /** Lock for LRU page list */ - spinlock_t cl_lru_list_lock; - /** # of unstable pages in this client_obd. - * An unstable page is a page state that WRITE RPC has finished but - * the transaction has NOT yet committed. - */ - atomic_long_t cl_unstable_count; - /** Link to osc_shrinker_list */ - struct list_head cl_shrink_list; - - /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ - atomic_t cl_destroy_in_flight; - wait_queue_head_t cl_destroy_waitq; - - struct mdc_rpc_lock *cl_rpc_lock; - - /* modify rpcs in flight - * currently used for metadata only - */ - spinlock_t cl_mod_rpcs_lock; - u16 cl_max_mod_rpcs_in_flight; - u16 cl_mod_rpcs_in_flight; - u16 cl_close_rpcs_in_flight; - wait_queue_head_t cl_mod_rpcs_waitq; - unsigned long *cl_mod_tag_bitmap; - struct obd_histogram cl_mod_rpcs_hist; - - /* mgc datastruct */ - atomic_t cl_mgc_refcount; - struct obd_export *cl_mgc_mgsexp; - - /* checksumming for data sent over the network */ - unsigned int cl_checksum:1; /* 0 = disabled, 1 = enabled */ - /* supported checksum types that are worked out at connect time */ - __u32 cl_supp_cksum_types; - /* checksum algorithm to be used */ - enum cksum_type cl_cksum_type; - - /* also protected by the poorly named _loi_list_lock lock above */ - struct osc_async_rc cl_ar; - - /* sequence manager */ - struct lu_client_seq *cl_seq; - - atomic_t cl_resends; /* resend count */ - - /* ptlrpc work for writeback in ptlrpcd context */ - void *cl_writeback_work; - void *cl_lru_work; - /* hash tables for osc_quota_info */ - struct rhashtable cl_quota_hash[MAXQUOTAS]; -}; - -#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) - -struct obd_id_info { - __u32 idx; - u64 *data; -}; - -struct echo_client_obd { - struct obd_export *ec_exp; /* the local connection to osc/lov */ - spinlock_t ec_lock; - struct list_head ec_objects; - struct list_head ec_locks; - __u64 ec_unique; -}; - -/* Generic subset of OSTs */ -struct ost_pool { - __u32 *op_array; /* array of index of lov_obd->lov_tgts */ - unsigned int op_count; /* number of OSTs in the array */ - unsigned int op_size; /* allocated size of lp_array */ - struct rw_semaphore op_rw_sem; /* to protect ost_pool use */ -}; - -/* allow statfs data caching for 1 second */ -#define OBD_STATFS_CACHE_SECONDS 1 - -struct lov_tgt_desc { - struct list_head ltd_kill; - struct obd_uuid ltd_uuid; - struct obd_device *ltd_obd; - struct obd_export *ltd_exp; - __u32 ltd_gen; - __u32 ltd_index; /* index in lov_obd->tgts */ - unsigned long ltd_active:1,/* is this target up for requests */ - ltd_activate:1,/* should target be activated */ - ltd_reap:1; /* should this target be deleted */ -}; - -struct lov_obd { - struct lov_desc desc; - struct lov_tgt_desc **lov_tgts; /* sparse array */ - struct ost_pool lov_packed; /* all OSTs in a packed array */ - struct mutex lov_lock; - struct obd_connect_data lov_ocd; - atomic_t lov_refcount; - __u32 lov_death_row;/* tgts scheduled to be deleted */ - __u32 lov_tgt_size; /* size of tgts array */ - int lov_connects; - int lov_pool_count; - struct rhashtable lov_pools_hash_body; /* used for key access */ - struct list_head lov_pool_list; /* used for sequential access */ - struct dentry *lov_pool_debugfs_entry; - enum lustre_sec_part lov_sp_me; - - /* Cached LRU and unstable data from upper layer */ - struct cl_client_cache *lov_cache; - - struct rw_semaphore lov_notify_lock; - - struct kobject *lov_tgts_kobj; -}; - -struct lmv_tgt_desc { - struct obd_uuid ltd_uuid; - struct obd_export *ltd_exp; - u32 ltd_idx; - struct mutex ltd_fid_mutex; - unsigned long ltd_active:1; /* target up for requests */ -}; - -struct lmv_obd { - struct lu_client_fld lmv_fld; - spinlock_t lmv_lock; - struct lmv_desc desc; - struct obd_uuid cluuid; - - struct mutex lmv_init_mutex; - int connected; - int max_easize; - int max_def_easize; - - u32 tgts_size; /* size of tgts array */ - struct lmv_tgt_desc **tgts; - - struct obd_connect_data conn_data; - struct kobject *lmv_tgts_kobj; -}; - -struct niobuf_local { - __u64 lnb_file_offset; - __u32 lnb_page_offset; - __u32 lnb_len; - __u32 lnb_flags; - int lnb_rc; - struct page *lnb_page; - void *lnb_data; -}; - -#define LUSTRE_FLD_NAME "fld" -#define LUSTRE_SEQ_NAME "seq" - -#define LUSTRE_MDD_NAME "mdd" -#define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs" -#define LUSTRE_OSD_ZFS_NAME "osd-zfs" -#define LUSTRE_VVP_NAME "vvp" -#define LUSTRE_LMV_NAME "lmv" -#define LUSTRE_SLP_NAME "slp" -#define LUSTRE_LOD_NAME "lod" -#define LUSTRE_OSP_NAME "osp" -#define LUSTRE_LWP_NAME "lwp" - -/* obd device type names */ - /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */ -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" -#define LUSTRE_MDC_NAME "mdc" -#define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */ -#define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */ -#define LUSTRE_OSC_NAME "osc" -#define LUSTRE_LOV_NAME "lov" -#define LUSTRE_MGS_NAME "mgs" -#define LUSTRE_MGC_NAME "mgc" - -#define LUSTRE_ECHO_NAME "obdecho" -#define LUSTRE_ECHO_CLIENT_NAME "echo_client" -#define LUSTRE_QMT_NAME "qmt" - -/* Constant obd names (post-rename) */ -#define LUSTRE_MDS_OBDNAME "MDS" -#define LUSTRE_OSS_OBDNAME "OSS" -#define LUSTRE_MGS_OBDNAME "MGS" -#define LUSTRE_MGC_OBDNAME "MGC" - -/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ -#define N_LOCAL_TEMP_PAGE 0x10000000 - -/* - * Events signalled through obd_notify() upcall-chain. - */ -enum obd_notify_event { - /* Device connect start */ - OBD_NOTIFY_CONNECT, - /* Device activated */ - OBD_NOTIFY_ACTIVE, - /* Device deactivated */ - OBD_NOTIFY_INACTIVE, - /* Connect data for import were changed */ - OBD_NOTIFY_OCD, - /* Sync request */ - OBD_NOTIFY_SYNC_NONBLOCK, - OBD_NOTIFY_SYNC, - /* Configuration event */ - OBD_NOTIFY_CONFIG, - /* Administratively deactivate/activate event */ - OBD_NOTIFY_DEACTIVATE, - OBD_NOTIFY_ACTIVATE -}; - -/* - * Data structure used to pass obd_notify()-event to non-obd listeners (llite - * being main example). - */ -struct obd_notify_upcall { - int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); - /* Opaque datum supplied by upper layer listener */ - void *onu_owner; -}; - -struct target_recovery_data { - svc_handler_t trd_recovery_handler; - pid_t trd_processing_task; - struct completion trd_starting; - struct completion trd_finishing; -}; - -struct obd_llog_group { - struct llog_ctxt *olg_ctxts[LLOG_MAX_CTXTS]; - wait_queue_head_t olg_waitq; - spinlock_t olg_lock; - struct mutex olg_cat_processing; -}; - -/* corresponds to one of the obd's */ -#define OBD_DEVICE_MAGIC 0XAB5CD6EF - -struct lvfs_run_ctxt { - struct dt_device *dt; -}; - -struct obd_device { - struct obd_type *obd_type; - u32 obd_magic; /* OBD_DEVICE_MAGIC */ - int obd_minor; /* device number: lctl dl */ - struct lu_device *obd_lu_dev; - - /* common and UUID name of this device */ - struct obd_uuid obd_uuid; - char obd_name[MAX_OBD_NAME]; - - /* bitfield modification is protected by obd_dev_lock */ - unsigned long obd_attached:1, /* finished attach */ - obd_set_up:1, /* finished setup */ - obd_version_recov:1, /* obd uses version checking */ - obd_replayable:1,/* recovery is enabled; inform clients */ - obd_no_transno:1, /* no committed-transno notification */ - obd_no_recov:1, /* fail instead of retry messages */ - obd_stopping:1, /* started cleanup */ - obd_starting:1, /* started setup */ - obd_force:1, /* cleanup with > 0 obd refcount */ - obd_fail:1, /* cleanup with failover */ - obd_no_conn:1, /* deny new connections */ - obd_inactive:1, /* device active/inactive - * (for sysfs status only!!) - */ - obd_no_ir:1, /* no imperative recovery. */ - obd_process_conf:1; /* device is processing mgs config */ - /* use separate field as it is set in interrupt to don't mess with - * protection of other bits using _bh lock - */ - unsigned long obd_recovery_expired:1; - /* uuid-export hash body */ - struct rhashtable obd_uuid_hash; - wait_queue_head_t obd_refcount_waitq; - struct list_head obd_exports; - struct list_head obd_unlinked_exports; - struct list_head obd_delayed_exports; - atomic_t obd_refcount; - int obd_num_exports; - spinlock_t obd_nid_lock; - struct ldlm_namespace *obd_namespace; - struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ - /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; /* protect OBD bitfield above */ - spinlock_t obd_osfs_lock; - struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ - __u64 obd_osfs_age; - u64 obd_last_committed; - struct mutex obd_dev_mutex; - struct lvfs_run_ctxt obd_lvfs_ctxt; - struct obd_llog_group obd_olg; /* default llog group */ - struct obd_device *obd_observer; - struct rw_semaphore obd_observer_link_sem; - struct obd_notify_upcall obd_upcall; - struct obd_export *obd_self_export; - - union { - struct client_obd cli; - struct echo_client_obd echo_client; - struct lov_obd lov; - struct lmv_obd lmv; - } u; - - /* Fields used by LProcFS */ - struct lprocfs_stats *obd_stats; - unsigned int obd_cntr_base; - - struct lprocfs_stats *md_stats; - unsigned int md_cntr_base; - - struct dentry *obd_debugfs_entry; - struct dentry *obd_svc_debugfs_entry; - struct lprocfs_stats *obd_svc_stats; - atomic_t obd_evict_inprogress; - wait_queue_head_t obd_evict_inprogress_waitq; - struct list_head obd_evict_list; /* protected with pet_lock */ - - /** - * Ldlm pool part. Save last calculated SLV and Limit. - */ - rwlock_t obd_pool_lock; - u64 obd_pool_slv; - int obd_pool_limit; - - int obd_conn_inprogress; - - /** - * A list of outstanding class_incref()'s against this obd. For - * debugging. - */ - struct lu_ref obd_reference; - - struct kobject obd_kobj; /* sysfs object */ - struct completion obd_kobj_unregister; -}; - -int obd_uuid_add(struct obd_device *obd, struct obd_export *export); -void obd_uuid_del(struct obd_device *obd, struct obd_export *export); - -/* get/set_info keys */ -#define KEY_ASYNC "async" -#define KEY_CHANGELOG_CLEAR "changelog_clear" -#define KEY_FID2PATH "fid2path" -#define KEY_CHECKSUM "checksum" -#define KEY_CLEAR_FS "clear_fs" -#define KEY_CONN_DATA "conn_data" -#define KEY_EVICT_BY_NID "evict_by_nid" -#define KEY_FIEMAP "fiemap" -#define KEY_FLUSH_CTX "flush_ctx" -#define KEY_GRANT_SHRINK "grant_shrink" -#define KEY_HSM_COPYTOOL_SEND "hsm_send" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" -#define KEY_INTERMDS "inter_mds" -#define KEY_LAST_ID "last_id" -#define KEY_LAST_FID "last_fid" -#define KEY_MAX_EASIZE "max_easize" -#define KEY_DEFAULT_EASIZE "default_easize" -#define KEY_MGSSEC "mgssec" -#define KEY_READ_ONLY "read-only" -#define KEY_REGISTER_TARGET "register_target" -#define KEY_SET_FS "set_fs" -#define KEY_TGT_COUNT "tgt_count" -/* KEY_SET_INFO in lustre_idl.h */ -#define KEY_SPTLRPC_CONF "sptlrpc_conf" - -#define KEY_CACHE_SET "cache_set" -#define KEY_CACHE_LRU_SHRINK "cache_lru_shrink" - -struct lu_context; - -static inline int it_to_lock_mode(struct lookup_intent *it) -{ - /* CREAT needs to be tested before open (both could be set) */ - if (it->it_op & IT_CREAT) - return LCK_CW; - else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP | - IT_LAYOUT)) - return LCK_CR; - else if (it->it_op & IT_READDIR) - return LCK_PR; - else if (it->it_op & IT_GETXATTR) - return LCK_PR; - else if (it->it_op & IT_SETXATTR) - return LCK_PW; - - LASSERTF(0, "Invalid it_op: %d\n", it->it_op); - return -EINVAL; -} - -enum md_op_flags { - MF_MDC_CANCEL_FID1 = BIT(0), - MF_MDC_CANCEL_FID2 = BIT(1), - MF_MDC_CANCEL_FID3 = BIT(2), - MF_MDC_CANCEL_FID4 = BIT(3), - MF_GET_MDT_IDX = BIT(4), -}; - -enum md_cli_flags { - CLI_SET_MEA = BIT(0), - CLI_RM_ENTRY = BIT(1), - CLI_HASH64 = BIT(2), - CLI_API32 = BIT(3), - CLI_MIGRATE = BIT(4), -}; - -/** - * GETXATTR is not included as only a couple of fields in the reply body - * is filled, but not FID which is needed for common intent handling in - * mdc_finish_intent_lock() - */ -static inline bool it_has_reply_body(const struct lookup_intent *it) -{ - return it->it_op & (IT_OPEN | IT_UNLINK | IT_LOOKUP | IT_GETATTR); -} - -struct md_op_data { - struct lu_fid op_fid1; /* operation fid1 (usually parent) */ - struct lu_fid op_fid2; /* operation fid2 (usually child) */ - struct lu_fid op_fid3; /* 2 extra fids to find conflicting */ - struct lu_fid op_fid4; /* to the operation locks. */ - u32 op_mds; /* what mds server open will go to */ - struct lustre_handle op_handle; - s64 op_mod_time; - const char *op_name; - size_t op_namelen; - __u32 op_mode; - struct lmv_stripe_md *op_mea1; - struct lmv_stripe_md *op_mea2; - __u32 op_suppgids[2]; - __u32 op_fsuid; - __u32 op_fsgid; - kernel_cap_t op_cap; - void *op_data; - size_t op_data_size; - - /* iattr fields and blocks. */ - struct iattr op_attr; - unsigned int op_attr_flags; - __u64 op_valid; - loff_t op_attr_blocks; - - __u32 op_flags; - - /* Various operation flags. */ - enum mds_op_bias op_bias; - - /* Used by readdir */ - __u64 op_offset; - - /* Used by readdir */ - __u32 op_max_pages; - - /* used to transfer info between the stacks of MD client - * see enum op_cli_flags - */ - enum md_cli_flags op_cli_flags; - - /* File object data version for HSM release, on client */ - __u64 op_data_version; - struct lustre_handle op_lease_handle; - - /* default stripe offset */ - __u32 op_default_stripe_offset; -}; - -struct md_callback { - int (*md_blocking_ast)(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag); -}; - -struct md_enqueue_info; -/* metadata stat-ahead */ - -struct md_enqueue_info { - struct md_op_data mi_data; - struct lookup_intent mi_it; - struct lustre_handle mi_lockh; - struct inode *mi_dir; - struct ldlm_enqueue_info mi_einfo; - int (*mi_cb)(struct ptlrpc_request *req, - struct md_enqueue_info *minfo, int rc); - void *mi_cbdata; -}; - -struct obd_ops { - struct module *owner; - int (*iocontrol)(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg); - int (*get_info)(const struct lu_env *env, struct obd_export *, - __u32 keylen, void *key, __u32 *vallen, void *val); - int (*set_info_async)(const struct lu_env *, struct obd_export *, - __u32 keylen, void *key, - __u32 vallen, void *val, - struct ptlrpc_request_set *set); - int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg); - int (*precleanup)(struct obd_device *dev); - int (*cleanup)(struct obd_device *dev); - int (*process_config)(struct obd_device *dev, u32 len, void *data); - int (*postrecov)(struct obd_device *dev); - int (*add_conn)(struct obd_import *imp, struct obd_uuid *uuid, - int priority); - int (*del_conn)(struct obd_import *imp, struct obd_uuid *uuid); - /* connect to the target device with given connection - * data. @ocd->ocd_connect_flags is modified to reflect flags actually - * granted by the target, which are guaranteed to be a subset of flags - * asked for. If @ocd == NULL, use default parameters. - */ - int (*connect)(const struct lu_env *env, - struct obd_export **exp, struct obd_device *src, - struct obd_uuid *cluuid, struct obd_connect_data *ocd, - void *localdata); - int (*reconnect)(const struct lu_env *env, - struct obd_export *exp, struct obd_device *src, - struct obd_uuid *cluuid, - struct obd_connect_data *ocd, - void *localdata); - int (*disconnect)(struct obd_export *exp); - - /* Initialize/finalize fids infrastructure. */ - int (*fid_init)(struct obd_device *obd, - struct obd_export *exp, enum lu_cli_type type); - int (*fid_fini)(struct obd_device *obd); - - /* Allocate new fid according to passed @hint. */ - int (*fid_alloc)(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data); - - /* - * Object with @fid is getting deleted, we may want to do something - * about this. - */ - int (*statfs)(const struct lu_env *, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags); - int (*statfs_async)(struct obd_export *exp, struct obd_info *oinfo, - __u64 max_age, struct ptlrpc_request_set *set); - int (*create)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*destroy)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*setattr)(const struct lu_env *, struct obd_export *exp, - struct obdo *oa); - int (*getattr)(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa); - int (*preprw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, int objcount, - struct obd_ioobj *obj, struct niobuf_remote *remote, - int *nr_pages, struct niobuf_local *local); - int (*commitrw)(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int pages, - struct niobuf_local *local, int rc); - int (*init_export)(struct obd_export *exp); - int (*destroy_export)(struct obd_export *exp); - - /* metadata-only methods */ - int (*import_event)(struct obd_device *, struct obd_import *, - enum obd_import_event); - - int (*notify)(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - - int (*health_check)(const struct lu_env *env, struct obd_device *); - struct obd_uuid *(*get_uuid)(struct obd_export *exp); - - /* quota methods */ - int (*quotactl)(struct obd_device *, struct obd_export *, - struct obd_quotactl *); - - /* pools methods */ - int (*pool_new)(struct obd_device *obd, char *poolname); - int (*pool_del)(struct obd_device *obd, char *poolname); - int (*pool_add)(struct obd_device *obd, char *poolname, - char *ostname); - int (*pool_rem)(struct obd_device *obd, char *poolname, - char *ostname); - void (*getref)(struct obd_device *obd); - void (*putref)(struct obd_device *obd); - /* - * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line - * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. - * Also, add a wrapper function in include/linux/obd_class.h. - */ -}; - -/* lmv structures */ -struct lustre_md { - struct mdt_body *body; - struct lu_buf layout; - struct lmv_stripe_md *lmv; -#ifdef CONFIG_FS_POSIX_ACL - struct posix_acl *posix_acl; -#endif - struct mdt_remote_perm *remote_perm; -}; - -struct md_open_data { - struct obd_client_handle *mod_och; - struct ptlrpc_request *mod_open_req; - struct ptlrpc_request *mod_close_req; - atomic_t mod_refcount; - bool mod_is_create; -}; - -struct obd_client_handle { - struct lustre_handle och_fh; - struct lu_fid och_fid; - struct md_open_data *och_mod; - struct lustre_handle och_lease_handle; /* open lock for lease */ - __u32 och_magic; - fmode_t och_flags; -}; - -#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed - -struct lookup_intent; -struct cl_attr; - -struct md_ops { - int (*getstatus)(struct obd_export *, struct lu_fid *); - int (*null_inode)(struct obd_export *, const struct lu_fid *); - int (*close)(struct obd_export *, struct md_op_data *, - struct md_open_data *, struct ptlrpc_request **); - int (*create)(struct obd_export *, struct md_op_data *, - const void *, size_t, umode_t, uid_t, gid_t, - kernel_cap_t, __u64, struct ptlrpc_request **); - int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *, - const union ldlm_policy_data *, struct md_op_data *, - struct lustre_handle *, __u64); - int (*getattr)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*getattr_name)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*intent_lock)(struct obd_export *, struct md_op_data *, - struct lookup_intent *, - struct ptlrpc_request **, - ldlm_blocking_callback, __u64); - int (*link)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*rename)(struct obd_export *, struct md_op_data *, - const char *, size_t, const char *, size_t, - struct ptlrpc_request **); - int (*setattr)(struct obd_export *, struct md_op_data *, void *, - size_t, struct ptlrpc_request **); - int (*sync)(struct obd_export *, const struct lu_fid *, - struct ptlrpc_request **); - int (*read_page)(struct obd_export *, struct md_op_data *, - struct md_callback *cb_op, __u64 hash_offset, - struct page **ppage); - int (*unlink)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - - int (*setxattr)(struct obd_export *, const struct lu_fid *, - u64, const char *, const void *, size_t, unsigned int, - u32, struct ptlrpc_request **); - - int (*getxattr)(struct obd_export *, const struct lu_fid *, - u64, const char *, size_t, struct ptlrpc_request **); - - int (*init_ea_size)(struct obd_export *, u32, u32); - - int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *, - struct obd_export *, struct obd_export *, - struct lustre_md *); - - int (*free_lustre_md)(struct obd_export *, struct lustre_md *); - - int (*merge_attr)(struct obd_export *, - const struct lmv_stripe_md *lsm, - struct cl_attr *attr, ldlm_blocking_callback); - - int (*set_open_replay_data)(struct obd_export *, - struct obd_client_handle *, - struct lookup_intent *); - int (*clear_open_replay_data)(struct obd_export *, - struct obd_client_handle *); - int (*set_lock_data)(struct obd_export *, const struct lustre_handle *, - void *, __u64 *); - - enum ldlm_mode (*lock_match)(struct obd_export *, __u64, - const struct lu_fid *, enum ldlm_type, - union ldlm_policy_data *, enum ldlm_mode, - struct lustre_handle *); - - int (*cancel_unused)(struct obd_export *, const struct lu_fid *, - union ldlm_policy_data *, enum ldlm_mode, - enum ldlm_cancel_flags flags, void *opaque); - - int (*get_fid_from_lsm)(struct obd_export *, - const struct lmv_stripe_md *, - const char *name, int namelen, - struct lu_fid *fid); - - int (*intent_getattr_async)(struct obd_export *, - struct md_enqueue_info *); - - int (*revalidate_lock)(struct obd_export *, struct lookup_intent *, - struct lu_fid *, __u64 *bits); - - int (*unpackmd)(struct obd_export *exp, struct lmv_stripe_md **plsm, - const union lmv_mds_md *lmv, size_t lmv_size); - /* - * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to - * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a - * wrapper function in include/linux/obd_class.h. - */ -}; - -static inline struct md_open_data *obd_mod_alloc(void) -{ - struct md_open_data *mod; - - mod = kzalloc(sizeof(*mod), GFP_NOFS); - if (!mod) - return NULL; - atomic_set(&mod->mod_refcount, 1); - return mod; -} - -#define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount) -#define obd_mod_put(mod) \ -({ \ - if (atomic_dec_and_test(&(mod)->mod_refcount)) { \ - if ((mod)->mod_open_req) \ - ptlrpc_req_finished((mod)->mod_open_req); \ - kfree(mod); \ - } \ -}) - -void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid); -void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent); - -/* return 1 if client should be resend request */ -static inline int client_should_resend(int resend, struct client_obd *cli) -{ - return atomic_read(&cli->cl_resends) ? - atomic_read(&cli->cl_resends) > resend : 1; -} - -/** - * Return device name for this device - * - * XXX: lu_device is declared before obd_device, while a pointer pointing - * back to obd_device in lu_device, so this helper function defines here - * instead of in lu_object.h - */ -static inline const char *lu_dev_name(const struct lu_device *lu_dev) -{ - return lu_dev->ld_obd->obd_name; -} - -static inline bool filename_is_volatile(const char *name, size_t namelen, - int *idx) -{ - const char *start; - char *end; - - if (strncmp(name, LUSTRE_VOLATILE_HDR, LUSTRE_VOLATILE_HDR_LEN) != 0) - return false; - - /* caller does not care of idx */ - if (!idx) - return true; - - /* volatile file, the MDT can be set from name */ - /* name format is LUSTRE_VOLATILE_HDR:[idx]: */ - /* if no MDT is specified, use std way */ - if (namelen < LUSTRE_VOLATILE_HDR_LEN + 2) - goto bad_format; - /* test for no MDT idx case */ - if ((*(name + LUSTRE_VOLATILE_HDR_LEN) == ':') && - (*(name + LUSTRE_VOLATILE_HDR_LEN + 1) == ':')) { - *idx = -1; - return true; - } - /* we have an idx, read it */ - start = name + LUSTRE_VOLATILE_HDR_LEN + 1; - *idx = simple_strtoul(start, &end, 0); - /* error cases: - * no digit, no trailing :, negative value - */ - if (((*idx == 0) && (end == start)) || - (*end != ':') || (*idx < 0)) - goto bad_format; - - return true; -bad_format: - /* bad format of mdt idx, we cannot return an error - * to caller so we use hash algo - */ - CERROR("Bad volatile file name format: %s\n", - name + LUSTRE_VOLATILE_HDR_LEN); - return false; -} - -static inline int cli_brw_size(struct obd_device *obd) -{ - return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT; -} - -/* - * when RPC size or the max RPCs in flight is increased, the max dirty pages - * of the client should be increased accordingly to avoid sending fragmented - * RPCs over the network when the client runs out of the maximum dirty space - * when so many RPCs are being generated. - */ -static inline void client_adjust_max_dirty(struct client_obd *cli) -{ - /* initializing */ - if (cli->cl_dirty_max_pages <= 0) - cli->cl_dirty_max_pages = - (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT; - else { - unsigned long dirty_max = cli->cl_max_rpcs_in_flight * - cli->cl_max_pages_per_rpc; - - if (dirty_max > cli->cl_dirty_max_pages) - cli->cl_dirty_max_pages = dirty_max; - } - - if (cli->cl_dirty_max_pages > totalram_pages / 8) - cli->cl_dirty_max_pages = totalram_pages / 8; -} - -#endif /* __OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_cksum.h b/drivers/staging/lustre/lustre/include/obd_cksum.h deleted file mode 100644 index e5f7bb20415d..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_cksum.h +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __OBD_CKSUM -#define __OBD_CKSUM -#include -#include -#include - -static inline unsigned char cksum_obd2cfs(enum cksum_type cksum_type) -{ - switch (cksum_type) { - case OBD_CKSUM_CRC32: - return CFS_HASH_ALG_CRC32; - case OBD_CKSUM_ADLER: - return CFS_HASH_ALG_ADLER32; - case OBD_CKSUM_CRC32C: - return CFS_HASH_ALG_CRC32C; - default: - CERROR("Unknown checksum type (%x)!!!\n", cksum_type); - LBUG(); - } - return 0; -} - -/* The OBD_FL_CKSUM_* flags is packed into 5 bits of o_flags, since there can - * only be a single checksum type per RPC. - * - * The OBD_CHECKSUM_* type bits passed in ocd_cksum_types are a 32-bit bitmask - * since they need to represent the full range of checksum algorithms that - * both the client and server can understand. - * - * In case of an unsupported types/flags we fall back to ADLER - * because that is supported by all clients since 1.8 - * - * In case multiple algorithms are supported the best one is used. - */ -static inline u32 cksum_type_pack(enum cksum_type cksum_type) -{ - unsigned int performance = 0, tmp; - u32 flag = OBD_FL_CKSUM_ADLER; - - if (cksum_type & OBD_CKSUM_CRC32) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_CRC32; - } - } - if (cksum_type & OBD_CKSUM_CRC32C) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_CRC32C; - } - } - if (cksum_type & OBD_CKSUM_ADLER) { - tmp = cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER)); - if (tmp > performance) { - performance = tmp; - flag = OBD_FL_CKSUM_ADLER; - } - } - if (unlikely(cksum_type && !(cksum_type & (OBD_CKSUM_CRC32C | - OBD_CKSUM_CRC32 | - OBD_CKSUM_ADLER)))) - CWARN("unknown cksum type %x\n", cksum_type); - - return flag; -} - -static inline enum cksum_type cksum_type_unpack(u32 o_flags) -{ - switch (o_flags & OBD_FL_CKSUM_ALL) { - case OBD_FL_CKSUM_CRC32C: - return OBD_CKSUM_CRC32C; - case OBD_FL_CKSUM_CRC32: - return OBD_CKSUM_CRC32; - default: - break; - } - - return OBD_CKSUM_ADLER; -} - -/* Return a bitmask of the checksum types supported on this system. - * 1.8 supported ADLER it is base and not depend on hw - * Client uses all available local algos - */ -static inline enum cksum_type cksum_types_supported_client(void) -{ - enum cksum_type ret = OBD_CKSUM_ADLER; - - CDEBUG(D_INFO, "Crypto hash speed: crc %d, crc32c %d, adler %d\n", - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)), - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)), - cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_ADLER))); - - if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32C)) > 0) - ret |= OBD_CKSUM_CRC32C; - if (cfs_crypto_hash_speed(cksum_obd2cfs(OBD_CKSUM_CRC32)) > 0) - ret |= OBD_CKSUM_CRC32; - - return ret; -} - -/* Select the best checksum algorithm among those supplied in the cksum_types - * input. - * - * Currently, calling cksum_type_pack() with a mask will return the fastest - * checksum type due to its benchmarking at libcfs module load. - * Caution is advised, however, since what is fastest on a single client may - * not be the fastest or most efficient algorithm on the server. - */ -static inline enum cksum_type cksum_type_select(enum cksum_type cksum_types) -{ - return cksum_type_unpack(cksum_type_pack(cksum_types)); -} - -/* Checksum algorithm names. Must be defined in the same order as the - * OBD_CKSUM_* flags. - */ -#define DECLARE_CKSUM_NAME char *cksum_name[] = {"crc32", "adler", "crc32c"} - -#endif /* __OBD_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h deleted file mode 100644 index fc9c7720fee0..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_class.h +++ /dev/null @@ -1,1603 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#ifndef __CLASS_OBD_H -#define __CLASS_OBD_H - -#include -#include -#include -#include -#include -#include - -/* requests should be send without delay and resends for avoid deadlocks */ -#define OBD_STATFS_NODELAY 0x0001 -/* the statfs callback should not update obd_osfs_age */ -#define OBD_STATFS_FROM_CACHE 0x0002 -/* the statfs is only for retrieving information from MDT0 */ -#define OBD_STATFS_FOR_MDT0 0x0004 - -/* OBD Device Declarations */ -extern struct obd_device *obd_devs[MAX_OBD_DEVICES]; -extern rwlock_t obd_dev_lock; - -/* OBD Operations Declarations */ -struct obd_device *class_exp2obd(struct obd_export *exp); -int class_handle_ioctl(unsigned int cmd, unsigned long arg); -int lustre_get_jobid(char *jobid); - -struct lu_device_type; - -/* genops.c */ -extern struct list_head obd_types; -struct obd_export *class_conn2export(struct lustre_handle *conn); -int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, - const char *name, struct lu_device_type *ldt); -int class_unregister_type(const char *name); - -struct obd_device *class_newdev(const char *type_name, const char *name); -void class_release_dev(struct obd_device *obd); - -int class_name2dev(const char *name); -struct obd_device *class_name2obd(const char *name); -int class_uuid2dev(struct obd_uuid *uuid); -struct obd_device *class_find_client_obd(struct obd_uuid *tgt_uuid, - const char *typ_name, - struct obd_uuid *grp_uuid); -struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, - int *next); -struct obd_device *class_num2obd(int num); - -int class_notify_sptlrpc_conf(const char *fsname, int namelen); - -int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep); - -int obd_zombie_impexp_init(void); -void obd_zombie_impexp_stop(void); -void obd_zombie_barrier(void); - -int obd_get_request_slot(struct client_obd *cli); -void obd_put_request_slot(struct client_obd *cli); -__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli); -int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max); -int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, u16 max); -int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq); - -u16 obd_get_mod_rpc_slot(struct client_obd *cli, u32 opc, - struct lookup_intent *it); -void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, - struct lookup_intent *it, u16 tag); - -struct llog_handle; -struct llog_rec_hdr; -typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *, - struct llog_rec_hdr *, void *); - -/* obd_config.c */ -char *lustre_cfg_string(struct lustre_cfg *lcfg, u32 index); -int class_process_config(struct lustre_cfg *lcfg); -int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, - struct lustre_cfg *lcfg, void *data); - -/* For interoperability */ -struct cfg_interop_param { - char *old_param; - char *new_param; -}; - -int class_find_param(char *buf, char *key, char **valp); -struct cfg_interop_param *class_find_old_param(const char *param, - struct cfg_interop_param *ptr); -int class_get_next_param(char **params, char *copy); -int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh); -int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh); -int class_parse_net(char *buf, u32 *net, char **endh); -int class_match_nid(char *buf, char *key, lnet_nid_t nid); -int class_match_net(char *buf, char *key, u32 net); - -struct obd_device *class_incref(struct obd_device *obd, - const char *scope, const void *source); -void class_decref(struct obd_device *obd, - const char *scope, const void *source); -int class_config_llog_handler(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, void *data); -int class_add_uuid(const char *uuid, __u64 nid); - -/* obdecho */ -void lprocfs_echo_init_vars(struct lprocfs_static_vars *lvars); - -#define CFG_F_START 0x01 /* Set when we start updating from a log */ -#define CFG_F_MARKER 0x02 /* We are within a maker */ -#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ -#define CFG_F_COMPAT146 0x08 /* Allow old-style logs */ -#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ - -/* Passed as data param to class_config_parse_llog */ -struct config_llog_instance { - char *cfg_obdname; - void *cfg_instance; - struct super_block *cfg_sb; - struct obd_uuid cfg_uuid; - llog_cb_t cfg_callback; - int cfg_last_idx; /* for partial llog processing */ - int cfg_flags; -}; - -int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt, - char *name, struct config_llog_instance *cfg); -enum { - CONFIG_T_CONFIG = 0, - CONFIG_T_SPTLRPC = 1, - CONFIG_T_RECOVER = 2, - CONFIG_T_PARAMS = 3, - CONFIG_T_MAX = 4 -}; - -#define PARAMS_FILENAME "params" -#define LCTL_UPCALL "lctl" - -/* list of active configuration logs */ -struct config_llog_data { - struct ldlm_res_id cld_resid; - struct config_llog_instance cld_cfg; - struct list_head cld_list_chain; - atomic_t cld_refcount; - struct config_llog_data *cld_sptlrpc;/* depended sptlrpc log */ - struct config_llog_data *cld_params; /* common parameters log */ - struct config_llog_data *cld_recover;/* imperative recover log */ - struct obd_export *cld_mgcexp; - struct mutex cld_lock; - int cld_type; - unsigned int cld_stopping:1, /* - * we were told to stop - * watching - */ - cld_lostlock:1; /* lock not requeued */ - char cld_logname[0]; -}; - -struct lustre_profile { - struct list_head lp_list; - char *lp_profile; - char *lp_dt; - char *lp_md; - int lp_refs; - bool lp_list_deleted; -}; - -struct lustre_profile *class_get_profile(const char *prof); -void class_del_profile(const char *prof); -void class_put_profile(struct lustre_profile *lprof); -void class_del_profiles(void); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS - -void __class_export_add_lock_ref(struct obd_export *exp, - struct ldlm_lock *lock); -void __class_export_del_lock_ref(struct obd_export *exp, - struct ldlm_lock *lock); -extern void (*class_export_dump_hook)(struct obd_export *exp); - -#else - -#define __class_export_add_lock_ref(exp, lock) do {} while (0) -#define __class_export_del_lock_ref(exp, lock) do {} while (0) - -#endif - -/* genops.c */ -struct obd_export *class_export_get(struct obd_export *exp); -void class_export_put(struct obd_export *exp); -struct obd_export *class_new_export(struct obd_device *obddev, - struct obd_uuid *cluuid); -void class_unlink_export(struct obd_export *exp); - -struct obd_import *class_import_get(struct obd_import *imp); -void class_import_put(struct obd_import *imp); -struct obd_import *class_new_import(struct obd_device *obd); -void class_destroy_import(struct obd_import *exp); - -void class_put_type(struct obd_type *type); -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid); -int class_disconnect(struct obd_export *exp); -void class_fail_export(struct obd_export *exp); -int class_manual_cleanup(struct obd_device *obd); - -static inline void class_export_rpc_inc(struct obd_export *exp) -{ - atomic_inc(&(exp)->exp_rpc_count); - CDEBUG(D_INFO, "RPC GETting export %p : new rpc_count %d\n", - (exp), atomic_read(&(exp)->exp_rpc_count)); -} - -static inline void class_export_rpc_dec(struct obd_export *exp) -{ - LASSERT_ATOMIC_POS(&exp->exp_rpc_count); - atomic_dec(&(exp)->exp_rpc_count); - CDEBUG(D_INFO, "RPC PUTting export %p : new rpc_count %d\n", - (exp), atomic_read(&(exp)->exp_rpc_count)); -} - -static inline struct obd_export *class_export_lock_get(struct obd_export *exp, - struct ldlm_lock *lock) -{ - atomic_inc(&(exp)->exp_locks_count); - __class_export_add_lock_ref(exp, lock); - CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", - (exp), atomic_read(&(exp)->exp_locks_count)); - return class_export_get(exp); -} - -static inline void class_export_lock_put(struct obd_export *exp, - struct ldlm_lock *lock) -{ - LASSERT_ATOMIC_POS(&exp->exp_locks_count); - atomic_dec(&(exp)->exp_locks_count); - __class_export_del_lock_ref(exp, lock); - CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", - (exp), atomic_read(&(exp)->exp_locks_count)); - class_export_put(exp); -} - -static inline enum obd_option exp_flags_from_obd(struct obd_device *obd) -{ - return ((obd->obd_fail ? OBD_OPT_FAILOVER : 0) | - (obd->obd_force ? OBD_OPT_FORCE : 0) | - 0); -} - -static inline int lprocfs_climp_check(struct obd_device *obd) -{ - down_read(&(obd)->u.cli.cl_sem); - if (!(obd)->u.cli.cl_import) { - up_read(&(obd)->u.cli.cl_sem); - return -ENODEV; - } - return 0; -} - -struct inode; -struct lu_attr; -struct obdo; - -void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj); - -#define OBT(dev) ((dev)->obd_type) -#define OBP(dev, op) ((dev)->obd_type->typ_dt_ops->op) -#define MDP(dev, op) ((dev)->obd_type->typ_md_ops->op) -#define CTXTP(ctxt, op) ((ctxt)->loc_logops->lop_##op) - -/* - * Ensure obd_setup: used for cleanup which must be called - * while obd is stopping - */ -static inline int obd_check_dev(struct obd_device *obd) -{ - if (!obd) { - CERROR("NULL device\n"); - return -ENODEV; - } - return 0; -} - -/* ensure obd_setup and !obd_stopping */ -static inline int obd_check_dev_active(struct obd_device *obd) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - if (!obd->obd_set_up || obd->obd_stopping) { - CERROR("Device %d not setup\n", obd->obd_minor); - return -ENODEV; - } - return rc; -} - -#define OBD_COUNTER_OFFSET(op) \ - ((offsetof(struct obd_ops, op) - \ - offsetof(struct obd_ops, iocontrol)) \ - / sizeof(((struct obd_ops *)(0))->iocontrol)) - -#define OBD_COUNTER_INCREMENT(obdx, op) \ -do { \ - if ((obdx)->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((obdx)->obd_cntr_base) + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (obdx)->obd_stats->ls_num); \ - lprocfs_counter_incr((obdx)->obd_stats, coffset); \ - } \ -} while (0) - -#define EXP_COUNTER_INCREMENT(export, op) \ -do { \ - if ((export)->exp_obd->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((export)->exp_obd->obd_cntr_base) + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (export)->exp_obd->obd_stats->ls_num); \ - lprocfs_counter_incr((export)->exp_obd->obd_stats, coffset); \ - } \ -} while (0) - -#define MD_COUNTER_OFFSET(op) \ - ((offsetof(struct md_ops, op) - \ - offsetof(struct md_ops, getstatus)) \ - / sizeof(((struct md_ops *)(0))->getstatus)) - -#define MD_COUNTER_INCREMENT(obdx, op) \ -do { \ - if ((obd)->md_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((obdx)->md_cntr_base) + \ - MD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (obdx)->md_stats->ls_num); \ - lprocfs_counter_incr((obdx)->md_stats, coffset); \ - } \ -} while (0) - -#define EXP_MD_COUNTER_INCREMENT(export, op) \ -do { \ - if ((export)->exp_obd->obd_stats) { \ - unsigned int coffset; \ - coffset = (unsigned int)((export)->exp_obd->md_cntr_base) + \ - MD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (export)->exp_obd->md_stats->ls_num); \ - lprocfs_counter_incr((export)->exp_obd->md_stats, coffset); \ - if ((export)->exp_md_stats) \ - lprocfs_counter_incr( \ - (export)->exp_md_stats, coffset); \ - } \ -} while (0) - -#define EXP_CHECK_MD_OP(exp, op) \ -do { \ - if (!(exp)) { \ - CERROR("obd_" #op ": NULL export\n"); \ - return -ENODEV; \ - } \ - if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - return -EOPNOTSUPP; \ - } \ - if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %s/%d no operation\n", \ - (exp)->exp_obd->obd_name, \ - (exp)->exp_obd->obd_minor); \ - return -EOPNOTSUPP; \ - } \ -} while (0) - -#define OBD_CHECK_DT_OP(obd, op, err) \ -do { \ - if (!OBT(obd) || !OBP((obd), op)) { \ - if (err) \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ - return err; \ - } \ -} while (0) - -#define EXP_CHECK_DT_OP(exp, op) \ -do { \ - if (!(exp)) { \ - CERROR("obd_" #op ": NULL export\n"); \ - return -ENODEV; \ - } \ - if (!(exp)->exp_obd || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - return -EOPNOTSUPP; \ - } \ - if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ - return -EOPNOTSUPP; \ - } \ -} while (0) - -#define CTXT_CHECK_OP(ctxt, op, err) \ -do { \ - if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ - if (err) \ - CERROR("lop_" #op ": dev %d no operation\n", \ - ctxt->loc_obd->obd_minor); \ - return err; \ - } \ -} while (0) - -static inline int class_devno_max(void) -{ - return MAX_OBD_DEVICES; -} - -static inline int obd_get_info(const struct lu_env *env, - struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - int rc; - - EXP_CHECK_DT_OP(exp, get_info); - EXP_COUNTER_INCREMENT(exp, get_info); - - rc = OBP(exp->exp_obd, get_info)(env, exp, keylen, key, vallen, val); - return rc; -} - -static inline int obd_set_info_async(const struct lu_env *env, - struct obd_export *exp, u32 keylen, - void *key, u32 vallen, void *val, - struct ptlrpc_request_set *set) -{ - int rc; - - EXP_CHECK_DT_OP(exp, set_info_async); - EXP_COUNTER_INCREMENT(exp, set_info_async); - - rc = OBP(exp->exp_obd, set_info_async)(env, exp, keylen, key, vallen, - val, set); - return rc; -} - -/* - * obd-lu integration. - * - * Functionality is being moved into new lu_device-based layering, but some - * pieces of configuration process are still based on obd devices. - * - * Specifically, lu_device_type_operations::ldto_device_alloc() methods fully - * subsume ->o_setup() methods of obd devices they replace. The same for - * lu_device_operations::ldo_process_config() and ->o_process_config(). As a - * result, obd_setup() and obd_process_config() branch and call one XOR - * another. - * - * Yet neither lu_device_type_operations::ldto_device_fini() nor - * lu_device_type_operations::ldto_device_free() fully implement the - * functionality of ->o_precleanup() and ->o_cleanup() they override. Hence, - * obd_precleanup() and obd_cleanup() call both lu_device and obd operations. - */ - -static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - ldt = obd->obd_type->typ_lu; - if (ldt) { - struct lu_context session_ctx; - struct lu_env env; - - lu_context_init(&session_ctx, LCT_SESSION | LCT_SERVER_SESSION); - session_ctx.lc_thread = NULL; - lu_context_enter(&session_ctx); - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - env.le_ses = &session_ctx; - d = ldt->ldt_ops->ldto_device_alloc(&env, ldt, cfg); - lu_env_fini(&env); - if (!IS_ERR(d)) { - obd->obd_lu_dev = d; - d->ld_obd = obd; - rc = 0; - } else { - rc = PTR_ERR(d); - } - } - lu_context_exit(&session_ctx); - lu_context_fini(&session_ctx); - - } else { - OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, setup); - rc = OBP(obd, setup)(obd, cfg); - } - return rc; -} - -static inline int obd_precleanup(struct obd_device *obd) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (!rc) { - ldt->ldt_ops->ldto_device_fini(&env, d); - lu_env_fini(&env); - } - } - OBD_CHECK_DT_OP(obd, precleanup, 0); - OBD_COUNTER_INCREMENT(obd, precleanup); - - rc = OBP(obd, precleanup)(obd); - return rc; -} - -static inline int obd_cleanup(struct obd_device *obd) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - ldt->ldt_ops->ldto_device_free(&env, d); - lu_env_fini(&env); - obd->obd_lu_dev = NULL; - } - } - OBD_CHECK_DT_OP(obd, cleanup, 0); - OBD_COUNTER_INCREMENT(obd, cleanup); - - rc = OBP(obd, cleanup)(obd); - return rc; -} - -static inline void obd_cleanup_client_import(struct obd_device *obd) -{ - /* - * If we set up but never connected, the - * client import will not have been cleaned. - */ - down_write(&obd->u.cli.cl_sem); - if (obd->u.cli.cl_import) { - struct obd_import *imp; - - imp = obd->u.cli.cl_import; - CDEBUG(D_CONFIG, "%s: client import never connected\n", - obd->obd_name); - ptlrpc_invalidate_import(imp); - client_destroy_import(imp); - obd->u.cli.cl_import = NULL; - } - up_write(&obd->u.cli.cl_sem); -} - -static inline int -obd_process_config(struct obd_device *obd, int datalen, void *data) -{ - int rc; - struct lu_device_type *ldt; - struct lu_device *d; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - obd->obd_process_conf = 1; - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt && d) { - struct lu_env env; - - rc = lu_env_init(&env, ldt->ldt_ctx_tags); - if (rc == 0) { - rc = d->ld_ops->ldo_process_config(&env, d, data); - lu_env_fini(&env); - } - } else { - OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP); - rc = OBP(obd, process_config)(obd, datalen, data); - } - OBD_COUNTER_INCREMENT(obd, process_config); - obd->obd_process_conf = 0; - - return rc; -} - -static inline int obd_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo) -{ - int rc; - - EXP_CHECK_DT_OP(exp, create); - EXP_COUNTER_INCREMENT(exp, create); - - rc = OBP(exp->exp_obd, create)(env, exp, obdo); - return rc; -} - -static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *obdo) -{ - int rc; - - EXP_CHECK_DT_OP(exp, destroy); - EXP_COUNTER_INCREMENT(exp, destroy); - - rc = OBP(exp->exp_obd, destroy)(env, exp, obdo); - return rc; -} - -static inline int obd_getattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - int rc; - - EXP_CHECK_DT_OP(exp, getattr); - EXP_COUNTER_INCREMENT(exp, getattr); - - rc = OBP(exp->exp_obd, getattr)(env, exp, oa); - return rc; -} - -static inline int obd_setattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - int rc; - - EXP_CHECK_DT_OP(exp, setattr); - EXP_COUNTER_INCREMENT(exp, setattr); - - rc = OBP(exp->exp_obd, setattr)(env, exp, oa); - return rc; -} - -static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, add_conn); - - rc = OBP(obd, add_conn)(imp, uuid, priority); - return rc; -} - -static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, del_conn); - - rc = OBP(obd, del_conn)(imp, uuid); - return rc; -} - -static inline struct obd_uuid *obd_get_uuid(struct obd_export *exp) -{ - struct obd_uuid *uuid; - - OBD_CHECK_DT_OP(exp->exp_obd, get_uuid, NULL); - EXP_COUNTER_INCREMENT(exp, get_uuid); - - uuid = OBP(exp->exp_obd, get_uuid)(exp); - return uuid; -} - -/* - * Create a new /a exp on device /a obd for the uuid /a cluuid - * @param exp New export handle - * @param d Connect data, supported flags are set, flags also understood - * by obd are returned. - */ -static inline int obd_connect(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *data, - void *localdata) -{ - int rc; - __u64 ocf = data ? data->ocd_connect_flags : 0; /* - * for post-condition - * check - */ - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, connect); - - rc = OBP(obd, connect)(env, exp, obd, cluuid, data, localdata); - /* check that only subset is granted */ - LASSERT(ergo(data, (data->ocd_connect_flags & ocf) == - data->ocd_connect_flags)); - return rc; -} - -static inline int obd_reconnect(const struct lu_env *env, - struct obd_export *exp, - struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *d, - void *localdata) -{ - int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ - - rc = obd_check_dev_active(obd); - if (rc) - return rc; - OBD_CHECK_DT_OP(obd, reconnect, 0); - OBD_COUNTER_INCREMENT(obd, reconnect); - - rc = OBP(obd, reconnect)(env, exp, obd, cluuid, d, localdata); - /* check that only subset is granted */ - LASSERT(ergo(d, (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); - return rc; -} - -static inline int obd_disconnect(struct obd_export *exp) -{ - int rc; - - EXP_CHECK_DT_OP(exp, disconnect); - EXP_COUNTER_INCREMENT(exp, disconnect); - - rc = OBP(exp->exp_obd, disconnect)(exp); - return rc; -} - -static inline int obd_fid_init(struct obd_device *obd, struct obd_export *exp, - enum lu_cli_type type) -{ - int rc; - - OBD_CHECK_DT_OP(obd, fid_init, 0); - OBD_COUNTER_INCREMENT(obd, fid_init); - - rc = OBP(obd, fid_init)(obd, exp, type); - return rc; -} - -static inline int obd_fid_fini(struct obd_device *obd) -{ - int rc; - - OBD_CHECK_DT_OP(obd, fid_fini, 0); - OBD_COUNTER_INCREMENT(obd, fid_fini); - - rc = OBP(obd, fid_fini)(obd); - return rc; -} - -static inline int obd_fid_alloc(const struct lu_env *env, - struct obd_export *exp, - struct lu_fid *fid, - struct md_op_data *op_data) -{ - int rc; - - EXP_CHECK_DT_OP(exp, fid_alloc); - EXP_COUNTER_INCREMENT(exp, fid_alloc); - - rc = OBP(exp->exp_obd, fid_alloc)(env, exp, fid, op_data); - return rc; -} - -static inline int obd_pool_new(struct obd_device *obd, char *poolname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_new, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_new); - - rc = OBP(obd, pool_new)(obd, poolname); - return rc; -} - -static inline int obd_pool_del(struct obd_device *obd, char *poolname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_del, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_del); - - rc = OBP(obd, pool_del)(obd, poolname); - return rc; -} - -static inline int obd_pool_add(struct obd_device *obd, - char *poolname, - char *ostname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_add, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_add); - - rc = OBP(obd, pool_add)(obd, poolname, ostname); - return rc; -} - -static inline int obd_pool_rem(struct obd_device *obd, - char *poolname, - char *ostname) -{ - int rc; - - OBD_CHECK_DT_OP(obd, pool_rem, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, pool_rem); - - rc = OBP(obd, pool_rem)(obd, poolname, ostname); - return rc; -} - -static inline void obd_getref(struct obd_device *obd) -{ - if (OBT(obd) && OBP(obd, getref)) { - OBD_COUNTER_INCREMENT(obd, getref); - OBP(obd, getref)(obd); - } -} - -static inline void obd_putref(struct obd_device *obd) -{ - if (OBT(obd) && OBP(obd, putref)) { - OBD_COUNTER_INCREMENT(obd, putref); - OBP(obd, putref)(obd); - } -} - -static inline int obd_init_export(struct obd_export *exp) -{ - int rc = 0; - - if ((exp)->exp_obd && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, init_export)) - rc = OBP(exp->exp_obd, init_export)(exp); - return rc; -} - -static inline int obd_destroy_export(struct obd_export *exp) -{ - if ((exp)->exp_obd && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, destroy_export)) - OBP(exp->exp_obd, destroy_export)(exp); - return 0; -} - -/* - * @max_age is the oldest time in jiffies that we accept using a cached data. - * If the cache is older than @max_age we will get a new value from the - * target. Use a value of "jiffies + HZ" to guarantee freshness. - */ -static inline int obd_statfs_async(struct obd_export *exp, - struct obd_info *oinfo, - __u64 max_age, - struct ptlrpc_request_set *rqset) -{ - int rc = 0; - struct obd_device *obd; - - if (!exp || !exp->exp_obd) - return -EINVAL; - - obd = exp->exp_obd; - OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, statfs); - - CDEBUG(D_SUPER, "%s: osfs %p age %llu, max_age %llu\n", - obd->obd_name, &obd->obd_osfs, obd->obd_osfs_age, max_age); - if (time_before64(obd->obd_osfs_age, max_age)) { - rc = OBP(obd, statfs_async)(exp, oinfo, max_age, rqset); - } else { - CDEBUG(D_SUPER, - "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n", - obd->obd_name, &obd->obd_osfs, - obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks, - obd->obd_osfs.os_ffree, obd->obd_osfs.os_files); - spin_lock(&obd->obd_osfs_lock); - memcpy(oinfo->oi_osfs, &obd->obd_osfs, sizeof(*oinfo->oi_osfs)); - spin_unlock(&obd->obd_osfs_lock); - oinfo->oi_flags |= OBD_STATFS_FROM_CACHE; - if (oinfo->oi_cb_up) - oinfo->oi_cb_up(oinfo, 0); - } - return rc; -} - -static inline int obd_statfs_rqset(struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, - __u32 flags) -{ - struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { - .oi_osfs = osfs, - .oi_flags = flags, - }; - int rc = 0; - - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - - rc = obd_statfs_async(exp, &oinfo, max_age, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - return rc; -} - -/* - * @max_age is the oldest time in jiffies that we accept using a cached data. - * If the cache is older than @max_age we will get a new value from the - * target. Use a value of "jiffies + HZ" to guarantee freshness. - */ -static inline int obd_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, - __u32 flags) -{ - int rc = 0; - struct obd_device *obd = exp->exp_obd; - - if (!obd) - return -EINVAL; - - OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, statfs); - - CDEBUG(D_SUPER, "osfs %llu, max_age %llu\n", - obd->obd_osfs_age, max_age); - if (time_before64(obd->obd_osfs_age, max_age)) { - rc = OBP(obd, statfs)(env, exp, osfs, max_age, flags); - if (rc == 0) { - spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); - obd->obd_osfs_age = get_jiffies_64(); - spin_unlock(&obd->obd_osfs_lock); - } - } else { - CDEBUG(D_SUPER, - "%s: use %p cache blocks %llu/%llu objects %llu/%llu\n", - obd->obd_name, &obd->obd_osfs, - obd->obd_osfs.os_bavail, obd->obd_osfs.os_blocks, - obd->obd_osfs.os_ffree, obd->obd_osfs.os_files); - spin_lock(&obd->obd_osfs_lock); - memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); - spin_unlock(&obd->obd_osfs_lock); - } - return rc; -} - -static inline int obd_preprw(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *remote, int *pages, - struct niobuf_local *local) -{ - int rc; - - EXP_CHECK_DT_OP(exp, preprw); - EXP_COUNTER_INCREMENT(exp, preprw); - - rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote, - pages, local); - return rc; -} - -static inline int obd_commitrw(const struct lu_env *env, int cmd, - struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - struct niobuf_remote *rnb, int pages, - struct niobuf_local *local, int rc) -{ - EXP_CHECK_DT_OP(exp, commitrw); - EXP_COUNTER_INCREMENT(exp, commitrw); - - rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj, - rnb, pages, local, rc); - return rc; -} - -static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, - int len, void *karg, void __user *uarg) -{ - int rc; - - EXP_CHECK_DT_OP(exp, iocontrol); - EXP_COUNTER_INCREMENT(exp, iocontrol); - - rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg); - return rc; -} - -static inline void obd_import_event(struct obd_device *obd, - struct obd_import *imp, - enum obd_import_event event) -{ - if (!obd) { - CERROR("NULL device\n"); - return; - } - if (obd->obd_set_up && OBP(obd, import_event)) { - OBD_COUNTER_INCREMENT(obd, import_event); - OBP(obd, import_event)(obd, imp, event); - } -} - -static inline int obd_notify(struct obd_device *obd, - struct obd_device *watched, - enum obd_notify_event ev, - void *data) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - - if (!obd->obd_set_up) { - CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name); - return -EINVAL; - } - - if (!OBP(obd, notify)) { - CDEBUG(D_HA, "obd %s has no notify handler\n", obd->obd_name); - return -ENOSYS; - } - - OBD_COUNTER_INCREMENT(obd, notify); - rc = OBP(obd, notify)(obd, watched, ev, data); - return rc; -} - -static inline int obd_notify_observer(struct obd_device *observer, - struct obd_device *observed, - enum obd_notify_event ev, - void *data) -{ - int rc1; - int rc2; - - struct obd_notify_upcall *onu; - - if (observer->obd_observer) - rc1 = obd_notify(observer->obd_observer, observed, ev, data); - else - rc1 = 0; - /* - * Also, call non-obd listener, if any - */ - onu = &observer->obd_upcall; - if (onu->onu_upcall) - rc2 = onu->onu_upcall(observer, observed, ev, - onu->onu_owner, NULL); - else - rc2 = 0; - - return rc1 ? rc1 : rc2; -} - -static inline int obd_quotactl(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - - EXP_CHECK_DT_OP(exp, quotactl); - EXP_COUNTER_INCREMENT(exp, quotactl); - - rc = OBP(exp->exp_obd, quotactl)(exp->exp_obd, exp, oqctl); - return rc; -} - -static inline int obd_health_check(const struct lu_env *env, - struct obd_device *obd) -{ - /* - * returns: 0 on healthy - * >0 on unhealthy + reason code/flag - * however the only supported reason == 1 right now - * We'll need to define some better reasons - * or flags in the future. - * <0 on error - */ - int rc; - - /* don't use EXP_CHECK_DT_OP, because NULL method is normal here */ - if (!obd || !OBT(obd)) { - CERROR("cleaned up obd\n"); - return -EOPNOTSUPP; - } - if (!obd->obd_set_up || obd->obd_stopping) - return 0; - if (!OBP(obd, health_check)) - return 0; - - rc = OBP(obd, health_check)(env, obd); - return rc; -} - -static inline int obd_register_observer(struct obd_device *obd, - struct obd_device *observer) -{ - int rc; - - rc = obd_check_dev(obd); - if (rc) - return rc; - down_write(&obd->obd_observer_link_sem); - if (obd->obd_observer && observer) { - up_write(&obd->obd_observer_link_sem); - return -EALREADY; - } - obd->obd_observer = observer; - up_write(&obd->obd_observer_link_sem); - return 0; -} - -/* metadata helpers */ -static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getstatus); - EXP_MD_COUNTER_INCREMENT(exp, getstatus); - rc = MDP(exp->exp_obd, getstatus)(exp, fid); - return rc; -} - -static inline int md_getattr(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getattr); - EXP_MD_COUNTER_INCREMENT(exp, getattr); - rc = MDP(exp->exp_obd, getattr)(exp, op_data, request); - return rc; -} - -static inline int md_null_inode(struct obd_export *exp, - const struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, null_inode); - EXP_MD_COUNTER_INCREMENT(exp, null_inode); - rc = MDP(exp->exp_obd, null_inode)(exp, fid); - return rc; -} - -static inline int md_close(struct obd_export *exp, struct md_op_data *op_data, - struct md_open_data *mod, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, close); - EXP_MD_COUNTER_INCREMENT(exp, close); - rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request); - return rc; -} - -static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, - uid_t uid, gid_t gid, kernel_cap_t cap_effective, - __u64 rdev, struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, create); - EXP_MD_COUNTER_INCREMENT(exp, create); - rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode, - uid, gid, cap_effective, rdev, request); - return rc; -} - -static inline int md_enqueue(struct obd_export *exp, - struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, - struct md_op_data *op_data, - struct lustre_handle *lockh, - __u64 extra_lock_flags) -{ - int rc; - - EXP_CHECK_MD_OP(exp, enqueue); - EXP_MD_COUNTER_INCREMENT(exp, enqueue); - rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, op_data, lockh, - extra_lock_flags); - return rc; -} - -static inline int md_getattr_name(struct obd_export *exp, - struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, getattr_name); - EXP_MD_COUNTER_INCREMENT(exp, getattr_name); - rc = MDP(exp->exp_obd, getattr_name)(exp, op_data, request); - return rc; -} - -static inline int md_intent_lock(struct obd_export *exp, - struct md_op_data *op_data, - struct lookup_intent *it, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - int rc; - - EXP_CHECK_MD_OP(exp, intent_lock); - EXP_MD_COUNTER_INCREMENT(exp, intent_lock); - rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, it, reqp, - cb_blocking, extra_lock_flags); - return rc; -} - -static inline int md_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, link); - EXP_MD_COUNTER_INCREMENT(exp, link); - rc = MDP(exp->exp_obd, link)(exp, op_data, request); - return rc; -} - -static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, const char *new, - size_t newlen, struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, rename); - EXP_MD_COUNTER_INCREMENT(exp, rename); - rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new, - newlen, request); - return rc; -} - -static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, setattr); - EXP_MD_COUNTER_INCREMENT(exp, setattr); - rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, request); - return rc; -} - -static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, sync); - EXP_MD_COUNTER_INCREMENT(exp, sync); - rc = MDP(exp->exp_obd, sync)(exp, fid, request); - return rc; -} - -static inline int md_read_page(struct obd_export *exp, - struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 hash_offset, - struct page **ppage) -{ - int rc; - - EXP_CHECK_MD_OP(exp, read_page); - EXP_MD_COUNTER_INCREMENT(exp, read_page); - rc = MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset, - ppage); - return rc; -} - -static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - - EXP_CHECK_MD_OP(exp, unlink); - EXP_MD_COUNTER_INCREMENT(exp, unlink); - rc = MDP(exp->exp_obd, unlink)(exp, op_data, request); - return rc; -} - -static inline int md_get_lustre_md(struct obd_export *exp, - struct ptlrpc_request *req, - struct obd_export *dt_exp, - struct obd_export *md_exp, - struct lustre_md *md) -{ - EXP_CHECK_MD_OP(exp, get_lustre_md); - EXP_MD_COUNTER_INCREMENT(exp, get_lustre_md); - return MDP(exp->exp_obd, get_lustre_md)(exp, req, dt_exp, md_exp, md); -} - -static inline int md_free_lustre_md(struct obd_export *exp, - struct lustre_md *md) -{ - EXP_CHECK_MD_OP(exp, free_lustre_md); - EXP_MD_COUNTER_INCREMENT(exp, free_lustre_md); - return MDP(exp->exp_obd, free_lustre_md)(exp, md); -} - -static inline int md_merge_attr(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - struct cl_attr *attr, - ldlm_blocking_callback cb) -{ - EXP_CHECK_MD_OP(exp, merge_attr); - EXP_MD_COUNTER_INCREMENT(exp, merge_attr); - return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb); -} - -static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, - const char *value, size_t value_size, - unsigned int xattr_flags, u32 suppgid, - struct ptlrpc_request **request) -{ - EXP_CHECK_MD_OP(exp, setxattr); - EXP_MD_COUNTER_INCREMENT(exp, setxattr); - return MDP(exp->exp_obd, setxattr)(exp, fid, obd_md_valid, name, - value, value_size, xattr_flags, - suppgid, request); -} - -static inline int md_getxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, - size_t buf_size, struct ptlrpc_request **req) -{ - EXP_CHECK_MD_OP(exp, getxattr); - EXP_MD_COUNTER_INCREMENT(exp, getxattr); - return MDP(exp->exp_obd, getxattr)(exp, fid, obd_md_valid, name, - buf_size, req); -} - -static inline int md_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct lookup_intent *it) -{ - EXP_CHECK_MD_OP(exp, set_open_replay_data); - EXP_MD_COUNTER_INCREMENT(exp, set_open_replay_data); - return MDP(exp->exp_obd, set_open_replay_data)(exp, och, it); -} - -static inline int md_clear_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och) -{ - EXP_CHECK_MD_OP(exp, clear_open_replay_data); - EXP_MD_COUNTER_INCREMENT(exp, clear_open_replay_data); - return MDP(exp->exp_obd, clear_open_replay_data)(exp, och); -} - -static inline int md_set_lock_data(struct obd_export *exp, - const struct lustre_handle *lockh, - void *data, __u64 *bits) -{ - EXP_CHECK_MD_OP(exp, set_lock_data); - EXP_MD_COUNTER_INCREMENT(exp, set_lock_data); - return MDP(exp->exp_obd, set_lock_data)(exp, lockh, data, bits); -} - -static inline int md_cancel_unused(struct obd_export *exp, - const struct lu_fid *fid, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque) -{ - int rc; - - EXP_CHECK_MD_OP(exp, cancel_unused); - EXP_MD_COUNTER_INCREMENT(exp, cancel_unused); - - rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, policy, mode, - flags, opaque); - return rc; -} - -static inline enum ldlm_mode md_lock_match(struct obd_export *exp, __u64 flags, - const struct lu_fid *fid, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh) -{ - EXP_CHECK_MD_OP(exp, lock_match); - EXP_MD_COUNTER_INCREMENT(exp, lock_match); - return MDP(exp->exp_obd, lock_match)(exp, flags, fid, type, - policy, mode, lockh); -} - -static inline int md_init_ea_size(struct obd_export *exp, u32 easize, - u32 def_asize) -{ - EXP_CHECK_MD_OP(exp, init_ea_size); - EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); - return MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize); -} - -static inline int md_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo) -{ - int rc; - - EXP_CHECK_MD_OP(exp, intent_getattr_async); - EXP_MD_COUNTER_INCREMENT(exp, intent_getattr_async); - rc = MDP(exp->exp_obd, intent_getattr_async)(exp, minfo); - return rc; -} - -static inline int md_revalidate_lock(struct obd_export *exp, - struct lookup_intent *it, - struct lu_fid *fid, __u64 *bits) -{ - int rc; - - EXP_CHECK_MD_OP(exp, revalidate_lock); - EXP_MD_COUNTER_INCREMENT(exp, revalidate_lock); - rc = MDP(exp->exp_obd, revalidate_lock)(exp, it, fid, bits); - return rc; -} - -static inline int md_get_fid_from_lsm(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - const char *name, int namelen, - struct lu_fid *fid) -{ - int rc; - - EXP_CHECK_MD_OP(exp, get_fid_from_lsm); - EXP_MD_COUNTER_INCREMENT(exp, get_fid_from_lsm); - rc = MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen, fid); - return rc; -} - -/* - * Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If *plsm != NULL and lmm == NULL then *lsm will be freed. - * If *plsm == NULL then it will be allocated. - */ -static inline int md_unpackmd(struct obd_export *exp, - struct lmv_stripe_md **plsm, - const union lmv_mds_md *lmm, size_t lmm_size) -{ - int rc; - - EXP_CHECK_MD_OP(exp, unpackmd); - EXP_MD_COUNTER_INCREMENT(exp, unpackmd); - rc = MDP(exp->exp_obd, unpackmd)(exp, plsm, lmm, lmm_size); - return rc; -} - -/* OBD Metadata Support */ - -int obd_init_caches(void); -void obd_cleanup_caches(void); - -/* support routines */ -extern struct kmem_cache *obdo_cachep; - -typedef int (*register_lwp_cb)(void *data); - -struct lwp_register_item { - struct obd_export **lri_exp; - register_lwp_cb lri_cb_func; - void *lri_cb_data; - struct list_head lri_list; - char lri_name[MTI_NAME_MAXLEN]; -}; - -/* - * I'm as embarrassed about this as you are. - * - * // XXX do not look into _superhack with remaining eye - * // XXX if this were any uglier, I'd get my own show on MTV - */ -extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); - -/* obd_mount.c */ -int lustre_unregister_fs(void); -int lustre_register_fs(void); -int lustre_check_exclusion(struct super_block *sb, char *svname); - -/* sysctl.c */ -int obd_sysctl_init(void); - -/* uuid.c */ -typedef __u8 class_uuid_t[16]; -void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); - -/* lustre_peer.c */ -int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index); -int class_add_uuid(const char *uuid, __u64 nid); -int class_del_uuid(const char *uuid); -int class_check_uuid(struct obd_uuid *uuid, __u64 nid); -void class_init_uuidlist(void); -void class_exit_uuidlist(void); - -/* class_obd.c */ -extern char obd_jobid_node[]; -extern struct miscdevice obd_psdev; -extern spinlock_t obd_types_lock; -int class_procfs_init(void); -int class_procfs_clean(void); - -/* prng.c */ -#define ll_generate_random_uuid(uuid_out) \ - get_random_bytes(uuid_out, sizeof(class_uuid_t)) - -/* statfs_pack.c */ -struct kstatfs; -void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs); -void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs); - -/* root squash info */ -struct rw_semaphore; -struct root_squash_info { - uid_t rsi_uid; - gid_t rsi_gid; - struct list_head rsi_nosquash_nids; - struct rw_semaphore rsi_sem; -}; - -/* linux-module.c */ -int obd_ioctl_getdata(char **buf, int *len, void __user *arg); - -#endif /* __LINUX_OBD_CLASS_H */ diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h deleted file mode 100644 index 9e41633823f7..000000000000 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ /dev/null @@ -1,517 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _OBD_SUPPORT -#define _OBD_SUPPORT - -#include -#include - -#include -#include - -/* global variables */ -extern unsigned int obd_debug_peer_on_timeout; -extern unsigned int obd_dump_on_timeout; -extern unsigned int obd_dump_on_eviction; -/* obd_timeout should only be used for recovery, not for - * networking / disk / timings affected by load (use Adaptive Timeouts) - */ -extern unsigned int obd_timeout; /* seconds */ -extern unsigned int obd_timeout_set; -extern unsigned int at_min; -extern unsigned int at_max; -extern unsigned int at_history; -extern int at_early_margin; -extern int at_extra; -extern unsigned long obd_max_dirty_pages; -extern atomic_long_t obd_dirty_pages; -extern atomic_long_t obd_dirty_transit_pages; -extern char obd_jobid_var[]; - -/* Some hash init argument constants */ -/* Timeout definitions */ -#define OBD_TIMEOUT_DEFAULT 100 -/* Time to wait for all clients to reconnect during recovery (hard limit) */ -#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9) -/* Time to wait for all clients to reconnect during recovery (soft limit) */ -/* Should be very conservative; must catch the first reconnect after reboot */ -#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3) -/* Change recovery-small 26b time if you change this */ -#define PING_INTERVAL max(obd_timeout / 4, 1U) -/* a bit more than maximal journal commit time in seconds */ -#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U) -/* Client may skip 1 ping; we must wait at least 2.5. But for multiple - * failover targets the client only pings one server at a time, and pings - * can be lost on a loaded network. Since eviction has serious consequences, - * and there's no urgent need to evict a client just because it's idle, we - * should be very conservative here. - */ -#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6) -#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */ -#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */ -/* Max connect interval for nonresponsive servers; ~50s to avoid building up - * connect requests in the LND queues, but within obd_timeout so we don't - * miss the recovery window - */ -#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN, obd_timeout)) -#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */ -/* In general this should be low to have quick detection of a system - * running on a backup server. (If it's too low, import_select_connection - * will increase the timeout anyhow.) - */ -#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout / 20) -/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */ -#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \ - INITIAL_CONNECT_TIMEOUT) -/* The min time a target should wait for clients to reconnect in recovery */ -#define OBD_RECOVERY_TIME_MIN (2 * RECONNECT_DELAY_MAX) -#define OBD_IR_FACTOR_MIN 1 -#define OBD_IR_FACTOR_MAX 10 -#define OBD_IR_FACTOR_DEFAULT (OBD_IR_FACTOR_MAX / 2) -/* default timeout for the MGS to become IR_FULL */ -#define OBD_IR_MGS_TIMEOUT (4 * obd_timeout) -#define LONG_UNLINK 300 /* Unlink should happen before now */ - -/** - * Time interval of shrink, if the client is "idle" more than this interval, - * then the ll_grant thread will return the requested grant space to filter - */ -#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/ - -#define OBD_FAIL_MDS 0x100 -#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 -#define OBD_FAIL_MDS_GETATTR_NET 0x102 -#define OBD_FAIL_MDS_GETATTR_PACK 0x103 -#define OBD_FAIL_MDS_READPAGE_NET 0x104 -#define OBD_FAIL_MDS_READPAGE_PACK 0x105 -#define OBD_FAIL_MDS_SENDPAGE 0x106 -#define OBD_FAIL_MDS_REINT_NET 0x107 -#define OBD_FAIL_MDS_REINT_UNPACK 0x108 -#define OBD_FAIL_MDS_REINT_SETATTR 0x109 -#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a -#define OBD_FAIL_MDS_REINT_CREATE 0x10b -#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c -#define OBD_FAIL_MDS_REINT_UNLINK 0x10d -#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e -#define OBD_FAIL_MDS_REINT_LINK 0x10f -#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110 -#define OBD_FAIL_MDS_REINT_RENAME 0x111 -#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112 -#define OBD_FAIL_MDS_OPEN_NET 0x113 -#define OBD_FAIL_MDS_OPEN_PACK 0x114 -#define OBD_FAIL_MDS_CLOSE_NET 0x115 -#define OBD_FAIL_MDS_CLOSE_PACK 0x116 -#define OBD_FAIL_MDS_CONNECT_NET 0x117 -#define OBD_FAIL_MDS_CONNECT_PACK 0x118 -#define OBD_FAIL_MDS_REINT_NET_REP 0x119 -#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a -#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b -#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c -#define OBD_FAIL_MDS_STATFS_PACK 0x11d -#define OBD_FAIL_MDS_STATFS_NET 0x11e -#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f -#define OBD_FAIL_MDS_PIN_NET 0x120 -#define OBD_FAIL_MDS_UNPIN_NET 0x121 -#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122 -#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 -#define OBD_FAIL_MDS_SYNC_NET 0x124 -#define OBD_FAIL_MDS_SYNC_PACK 0x125 -/* OBD_FAIL_MDS_DONE_WRITING_NET 0x126 obsolete since 2.8.0 */ -/* OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 obsolete since 2.8.0 */ -#define OBD_FAIL_MDS_ALLOC_OBDO 0x128 -#define OBD_FAIL_MDS_PAUSE_OPEN 0x129 -#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a -#define OBD_FAIL_MDS_OPEN_CREATE 0x12b -#define OBD_FAIL_MDS_OST_SETATTR 0x12c -/* OBD_FAIL_MDS_QUOTACHECK_NET 0x12d obsolete since 2.4 */ -#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e -#define OBD_FAIL_MDS_CLIENT_ADD 0x12f -#define OBD_FAIL_MDS_GETXATTR_NET 0x130 -#define OBD_FAIL_MDS_GETXATTR_PACK 0x131 -#define OBD_FAIL_MDS_SETXATTR_NET 0x132 -#define OBD_FAIL_MDS_SETXATTR 0x133 -#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 -#define OBD_FAIL_MDS_FS_SETUP 0x135 -#define OBD_FAIL_MDS_RESEND 0x136 -#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137 -#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138 -#define OBD_FAIL_MDS_OSC_PRECREATE 0x139 -#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a -#define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b -#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c -#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d -#define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e -#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f -#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 -#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 -#define OBD_FAIL_MDS_REINT_DELAY 0x142 -#define OBD_FAIL_MDS_READLINK_EPROTO 0x143 -#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144 -#define OBD_FAIL_MDS_PDO_LOCK 0x145 -#define OBD_FAIL_MDS_PDO_LOCK2 0x146 -#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147 -#define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148 -#define OBD_FAIL_MDS_HSM_STATE_GET_NET 0x149 -#define OBD_FAIL_MDS_HSM_STATE_SET_NET 0x14a -#define OBD_FAIL_MDS_HSM_PROGRESS_NET 0x14b -#define OBD_FAIL_MDS_HSM_REQUEST_NET 0x14c -#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d -#define OBD_FAIL_MDS_HSM_CT_UNREGISTER_NET 0x14e -#define OBD_FAIL_MDS_SWAP_LAYOUTS_NET 0x14f -#define OBD_FAIL_MDS_HSM_ACTION_NET 0x150 -#define OBD_FAIL_MDS_CHANGELOG_INIT 0x151 - -/* layout lock */ -#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 -#define OBD_FAIL_MDS_NO_LL_OPEN 0x171 -#define OBD_FAIL_MDS_LL_BLOCK 0x172 - -/* CMD */ -#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180 -#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x181 -#define OBD_FAIL_MDS_SET_INFO_NET 0x182 -#define OBD_FAIL_MDS_WRITEPAGE_NET 0x183 -#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184 -#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185 -#define OBD_FAIL_MDS_GET_INFO_NET 0x186 -#define OBD_FAIL_MDS_DQACQ_NET 0x187 - -/* OI scrub */ -#define OBD_FAIL_OSD_SCRUB_DELAY 0x190 -#define OBD_FAIL_OSD_SCRUB_CRASH 0x191 -#define OBD_FAIL_OSD_SCRUB_FATAL 0x192 -#define OBD_FAIL_OSD_FID_MAPPING 0x193 -#define OBD_FAIL_OSD_LMA_INCOMPAT 0x194 -#define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 - -#define OBD_FAIL_OST 0x200 -#define OBD_FAIL_OST_CONNECT_NET 0x201 -#define OBD_FAIL_OST_DISCONNECT_NET 0x202 -#define OBD_FAIL_OST_GET_INFO_NET 0x203 -#define OBD_FAIL_OST_CREATE_NET 0x204 -#define OBD_FAIL_OST_DESTROY_NET 0x205 -#define OBD_FAIL_OST_GETATTR_NET 0x206 -#define OBD_FAIL_OST_SETATTR_NET 0x207 -#define OBD_FAIL_OST_OPEN_NET 0x208 -#define OBD_FAIL_OST_CLOSE_NET 0x209 -#define OBD_FAIL_OST_BRW_NET 0x20a -#define OBD_FAIL_OST_PUNCH_NET 0x20b -#define OBD_FAIL_OST_STATFS_NET 0x20c -#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d -#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e -#define OBD_FAIL_OST_BRW_READ_BULK 0x20f -#define OBD_FAIL_OST_SYNC_NET 0x210 -#define OBD_FAIL_OST_ALL_REPLY_NET 0x211 -#define OBD_FAIL_OST_ALL_REQUEST_NET 0x212 -#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 -#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 -#define OBD_FAIL_OST_ENOSPC 0x215 -#define OBD_FAIL_OST_EROFS 0x216 -#define OBD_FAIL_OST_ENOENT 0x217 -/* OBD_FAIL_OST_QUOTACHECK_NET 0x218 obsolete since 2.4 */ -#define OBD_FAIL_OST_QUOTACTL_NET 0x219 -#define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a -#define OBD_FAIL_OST_CHECKSUM_SEND 0x21b -#define OBD_FAIL_OST_BRW_SIZE 0x21c -#define OBD_FAIL_OST_DROP_REQ 0x21d -#define OBD_FAIL_OST_SETATTR_CREDITS 0x21e -#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f -#define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 -#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 -#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 -#define OBD_FAIL_OST_PAUSE_CREATE 0x223 -#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224 -#define OBD_FAIL_OST_CONNECT_NET2 0x225 -#define OBD_FAIL_OST_NOMEM 0x226 -#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 -#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 -#define OBD_FAIL_OST_ENOINO 0x229 -#define OBD_FAIL_OST_DQACQ_NET 0x230 -#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 -#define OBD_FAIL_OST_SET_INFO_NET 0x232 - -#define OBD_FAIL_LDLM 0x300 -#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 -#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302 -#define OBD_FAIL_LDLM_CONVERT_NET 0x303 -#define OBD_FAIL_LDLM_CANCEL_NET 0x304 -#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 -#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306 -#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307 -#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 -#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 -#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a -#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b -#define OBD_FAIL_LDLM_REPLY 0x30c -#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d -#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e -#define OBD_FAIL_LDLM_GLIMPSE 0x30f -#define OBD_FAIL_LDLM_CANCEL_RACE 0x310 -#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311 -#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312 -#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313 -#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 -#define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 -#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 -#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 -#define OBD_FAIL_LDLM_CP_BL_RACE 0x318 -#define OBD_FAIL_LDLM_NEW_LOCK 0x319 -#define OBD_FAIL_LDLM_AGL_DELAY 0x31a -#define OBD_FAIL_LDLM_AGL_NOLOCK 0x31b -#define OBD_FAIL_LDLM_OST_LVB 0x31c -#define OBD_FAIL_LDLM_ENQUEUE_HANG 0x31d -#define OBD_FAIL_LDLM_PAUSE_CANCEL2 0x31f -#define OBD_FAIL_LDLM_CP_CB_WAIT2 0x320 -#define OBD_FAIL_LDLM_CP_CB_WAIT3 0x321 -#define OBD_FAIL_LDLM_CP_CB_WAIT4 0x322 -#define OBD_FAIL_LDLM_CP_CB_WAIT5 0x323 - -#define OBD_FAIL_LDLM_GRANT_CHECK 0x32a - -/* LOCKLESS IO */ -#define OBD_FAIL_LDLM_SET_CONTENTION 0x385 - -#define OBD_FAIL_OSC 0x400 -#define OBD_FAIL_OSC_BRW_READ_BULK 0x401 -#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402 -#define OBD_FAIL_OSC_LOCK_BL_AST 0x403 -#define OBD_FAIL_OSC_LOCK_CP_AST 0x404 -#define OBD_FAIL_OSC_MATCH 0x405 -#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 -#define OBD_FAIL_OSC_SHUTDOWN 0x407 -#define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 -#define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 -#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a -#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b -#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c -#define OBD_FAIL_OSC_DIO_PAUSE 0x40d -#define OBD_FAIL_OSC_OBJECT_CONTENTION 0x40e -#define OBD_FAIL_OSC_CP_CANCEL_RACE 0x40f -#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410 -#define OBD_FAIL_OSC_NO_GRANT 0x411 -#define OBD_FAIL_OSC_DELAY_SETTIME 0x412 -#define OBD_FAIL_OSC_DELAY_IO 0x414 - -#define OBD_FAIL_PTLRPC 0x500 -#define OBD_FAIL_PTLRPC_ACK 0x501 -#define OBD_FAIL_PTLRPC_RQBD 0x502 -#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503 -#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 -#define OBD_FAIL_PTLRPC_DROP_RPC 0x505 -#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 -#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507 -#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508 -#define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a -#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c -#define OBD_FAIL_PTLRPC_IMP_DEACTIVE 0x50d -#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e -#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f -#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510 -#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 -#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 -#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513 -#define OBD_FAIL_PTLRPC_FINISH_REPLAY 0x514 -#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 -#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516 -#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517 -#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a -#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b -#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c - -#define OBD_FAIL_OBD_PING_NET 0x600 -#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 -#define OBD_FAIL_OBD_LOGD_NET 0x602 -/* OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 obsolete since 2.4 */ -#define OBD_FAIL_OBD_DQACQ 0x604 -#define OBD_FAIL_OBD_LLOG_SETUP 0x605 -#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 -#define OBD_FAIL_OBD_IDX_READ_NET 0x607 -#define OBD_FAIL_OBD_IDX_READ_BREAK 0x608 -#define OBD_FAIL_OBD_NO_LRU 0x609 - -#define OBD_FAIL_TGT_REPLY_NET 0x700 -#define OBD_FAIL_TGT_CONN_RACE 0x701 -#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702 -#define OBD_FAIL_TGT_DELAY_CONNECT 0x703 -#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 -#define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 -#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 -#define OBD_FAIL_TGT_REPLAY_DROP 0x707 -#define OBD_FAIL_TGT_FAKE_EXP 0x708 -#define OBD_FAIL_TGT_REPLAY_DELAY 0x709 -#define OBD_FAIL_TGT_LAST_REPLAY 0x710 -#define OBD_FAIL_TGT_CLIENT_ADD 0x711 -#define OBD_FAIL_TGT_RCVG_FLAG 0x712 -#define OBD_FAIL_TGT_DELAY_CONDITIONAL 0x713 - -#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 -#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 -#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802 -#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803 -#define OBD_FAIL_MDC_RPCS_SEM 0x804 -#define OBD_FAIL_MDC_LIGHTWEIGHT 0x805 -#define OBD_FAIL_MDC_CLOSE 0x806 - -#define OBD_FAIL_MGS 0x900 -#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901 -#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 -#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903 -#define OBD_FAIL_MGS_PAUSE_REQ 0x904 -#define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 -#define OBD_FAIL_MGS_CONNECT_NET 0x906 -#define OBD_FAIL_MGS_DISCONNECT_NET 0x907 -#define OBD_FAIL_MGS_SET_INFO_NET 0x908 -#define OBD_FAIL_MGS_EXCEPTION_NET 0x909 -#define OBD_FAIL_MGS_TARGET_REG_NET 0x90a -#define OBD_FAIL_MGS_TARGET_DEL_NET 0x90b -#define OBD_FAIL_MGS_CONFIG_READ_NET 0x90c - -#define OBD_FAIL_QUOTA_DQACQ_NET 0xA01 -#define OBD_FAIL_QUOTA_EDQUOT 0xA02 -#define OBD_FAIL_QUOTA_DELAY_REINT 0xA03 -#define OBD_FAIL_QUOTA_RECOVERABLE_ERR 0xA04 - -#define OBD_FAIL_LPROC_REMOVE 0xB00 - -#define OBD_FAIL_SEQ 0x1000 -#define OBD_FAIL_SEQ_QUERY_NET 0x1001 -#define OBD_FAIL_SEQ_EXHAUST 0x1002 - -#define OBD_FAIL_FLD 0x1100 -#define OBD_FAIL_FLD_QUERY_NET 0x1101 -#define OBD_FAIL_FLD_READ_NET 0x1102 - -#define OBD_FAIL_SEC_CTX 0x1200 -#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 -#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202 -#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 -#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204 - -#define OBD_FAIL_LLOG 0x1300 -#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307 -#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308 -#define OBD_FAIL_LLOG_CATINFO_NET 0x1309 -#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310 -#define OBD_FAIL_SEQ_ALLOC 0x1311 - -#define OBD_FAIL_LLITE 0x1400 -#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401 -#define OBD_FAIL_LOCK_STATE_WAIT_INTR 0x1402 -#define OBD_FAIL_LOV_INIT 0x1403 -#define OBD_FAIL_GLIMPSE_DELAY 0x1404 -#define OBD_FAIL_LLITE_XATTR_ENOMEM 0x1405 -#define OBD_FAIL_MAKE_LOVEA_HOLE 0x1406 -#define OBD_FAIL_LLITE_LOST_LAYOUT 0x1407 -#define OBD_FAIL_GETATTR_DELAY 0x1409 - -#define OBD_FAIL_FID_INDIR 0x1501 -#define OBD_FAIL_FID_INLMA 0x1502 -#define OBD_FAIL_FID_IGIF 0x1504 -#define OBD_FAIL_FID_LOOKUP 0x1505 -#define OBD_FAIL_FID_NOLMA 0x1506 - -/* LFSCK */ -#define OBD_FAIL_LFSCK_DELAY1 0x1600 -#define OBD_FAIL_LFSCK_DELAY2 0x1601 -#define OBD_FAIL_LFSCK_DELAY3 0x1602 -#define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 -#define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 -#define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605 -#define OBD_FAIL_LFSCK_FATAL1 0x1608 -#define OBD_FAIL_LFSCK_FATAL2 0x1609 -#define OBD_FAIL_LFSCK_CRASH 0x160a -#define OBD_FAIL_LFSCK_NO_AUTO 0x160b -#define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c -#define OBD_FAIL_LFSCK_INVALID_PFID 0x1619 -#define OBD_FAIL_LFSCK_BAD_NAME_HASH 0x1628 - -/* UPDATE */ -#define OBD_FAIL_UPDATE_OBJ_NET 0x1700 -#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 - -/* LMV */ -#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901 - -/* Assign references to moved code to reduce code changes */ -#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) -#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id) -#define OBD_FAIL_CHECK_VALUE(id, value) CFS_FAIL_CHECK_VALUE(id, value) -#define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value) -#define OBD_FAIL_CHECK_RESET(id, value) CFS_FAIL_CHECK_RESET(id, value) -#define OBD_FAIL_RETURN(id, ret) CFS_FAIL_RETURN(id, ret) -#define OBD_FAIL_TIMEOUT(id, secs) CFS_FAIL_TIMEOUT(id, secs) -#define OBD_FAIL_TIMEOUT_MS(id, ms) CFS_FAIL_TIMEOUT_MS(id, ms) -#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs) -#define OBD_RACE(id) CFS_RACE(id) -#define OBD_FAIL_ONCE CFS_FAIL_ONCE -#define OBD_FAILED CFS_FAILED - -#ifdef CONFIG_DEBUG_SLAB -#define POISON(ptr, c, s) do {} while (0) -#define POISON_PTR(ptr) ((void)0) -#else -#define POISON(ptr, c, s) memset(ptr, c, s) -#define POISON_PTR(ptr) ((ptr) = (void *)0xdeadbeef) -#endif - -#ifdef POISON_BULK -#define POISON_PAGE(page, val) do { \ - memset(kmap(page), val, PAGE_SIZE); \ - kunmap(page); \ -} while (0) -#else -#define POISON_PAGE(page, val) do { } while (0) -#endif - -#define OBD_FREE_RCU(ptr, size, handle) \ -do { \ - struct portals_handle *__h = (handle); \ - \ - __h->h_cookie = (unsigned long)(ptr); \ - __h->h_size = (size); \ - call_rcu(&__h->h_rcu, class_handle_free_cb); \ - POISON_PTR(ptr); \ -} while (0) - -#define KEY_IS(str) \ - (keylen >= (sizeof(str) - 1) && \ - memcmp(key, str, (sizeof(str) - 1)) == 0) - -#endif diff --git a/drivers/staging/lustre/lustre/include/seq_range.h b/drivers/staging/lustre/lustre/include/seq_range.h deleted file mode 100644 index 9450da728160..000000000000 --- a/drivers/staging/lustre/lustre/include/seq_range.h +++ /dev/null @@ -1,200 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2014, Intel Corporation. - * - * Copyright 2015 Cray Inc, all rights reserved. - * Author: Ben Evans. - * - * Define lu_seq_range associated functions - */ - -#ifndef _SEQ_RANGE_H_ -#define _SEQ_RANGE_H_ - -#include - -/** - * computes the sequence range type \a range - */ - -static inline unsigned int fld_range_type(const struct lu_seq_range *range) -{ - return range->lsr_flags & LU_SEQ_RANGE_MASK; -} - -/** - * Is this sequence range an OST? \a range - */ - -static inline bool fld_range_is_ost(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_OST; -} - -/** - * Is this sequence range an MDT? \a range - */ - -static inline bool fld_range_is_mdt(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_MDT; -} - -/** - * ANY range is only used when the fld client sends a fld query request, - * but it does not know whether the seq is an MDT or OST, so it will send the - * request with ANY type, which means any seq type from the lookup can be - * expected. /a range - */ -static inline unsigned int fld_range_is_any(const struct lu_seq_range *range) -{ - return fld_range_type(range) == LU_SEQ_RANGE_ANY; -} - -/** - * Apply flags to range \a range \a flags - */ - -static inline void fld_range_set_type(struct lu_seq_range *range, - unsigned int flags) -{ - range->lsr_flags |= flags; -} - -/** - * Add MDT to range type \a range - */ - -static inline void fld_range_set_mdt(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_MDT); -} - -/** - * Add OST to range type \a range - */ - -static inline void fld_range_set_ost(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_OST); -} - -/** - * Add ANY to range type \a range - */ - -static inline void fld_range_set_any(struct lu_seq_range *range) -{ - fld_range_set_type(range, LU_SEQ_RANGE_ANY); -} - -/** - * computes width of given sequence range \a range - */ - -static inline u64 lu_seq_range_space(const struct lu_seq_range *range) -{ - return range->lsr_end - range->lsr_start; -} - -/** - * initialize range to zero \a range - */ - -static inline void lu_seq_range_init(struct lu_seq_range *range) -{ - memset(range, 0, sizeof(*range)); -} - -/** - * check if given seq id \a s is within given range \a range - */ - -static inline bool lu_seq_range_within(const struct lu_seq_range *range, - u64 seq) -{ - return seq >= range->lsr_start && seq < range->lsr_end; -} - -/** - * Is the range sane? Is the end after the beginning? \a range - */ - -static inline bool lu_seq_range_is_sane(const struct lu_seq_range *range) -{ - return range->lsr_end >= range->lsr_start; -} - -/** - * Is the range 0? \a range - */ - -static inline bool lu_seq_range_is_zero(const struct lu_seq_range *range) -{ - return range->lsr_start == 0 && range->lsr_end == 0; -} - -/** - * Is the range out of space? \a range - */ - -static inline bool lu_seq_range_is_exhausted(const struct lu_seq_range *range) -{ - return lu_seq_range_space(range) == 0; -} - -/** - * return 0 if two ranges have the same location, nonzero if they are - * different \a r1 \a r2 - */ - -static inline int lu_seq_range_compare_loc(const struct lu_seq_range *r1, - const struct lu_seq_range *r2) -{ - return r1->lsr_index != r2->lsr_index || - r1->lsr_flags != r2->lsr_flags; -} - -#if !defined(__REQ_LAYOUT_USER__) -/** - * byte swap range structure \a range - */ - -void lustre_swab_lu_seq_range(struct lu_seq_range *range); -#endif -/** - * printf string and argument list for sequence range - */ -#define DRANGE "[%#16.16llx-%#16.16llx]:%x:%s" - -#define PRANGE(range) \ - (range)->lsr_start, \ - (range)->lsr_end, \ - (range)->lsr_index, \ - fld_range_is_mdt(range) ? "mdt" : "ost" - -#endif diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c deleted file mode 100644 index 8df7a4463c21..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c +++ /dev/null @@ -1,599 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/interval_tree.c - * - * Interval tree library used by ldlm extent lock code - * - * Author: Huang Wei - * Author: Jay Xiong - */ -#include -#include -#include - -enum { - INTERVAL_RED = 0, - INTERVAL_BLACK = 1 -}; - -static inline int node_is_left_child(struct interval_node *node) -{ - return node == node->in_parent->in_left; -} - -static inline int node_is_right_child(struct interval_node *node) -{ - return node == node->in_parent->in_right; -} - -static inline int node_is_red(struct interval_node *node) -{ - return node->in_color == INTERVAL_RED; -} - -static inline int node_is_black(struct interval_node *node) -{ - return node->in_color == INTERVAL_BLACK; -} - -static inline int extent_compare(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - int rc; - - if (e1->start == e2->start) { - if (e1->end < e2->end) - rc = -1; - else if (e1->end > e2->end) - rc = 1; - else - rc = 0; - } else { - if (e1->start < e2->start) - rc = -1; - else - rc = 1; - } - return rc; -} - -static inline int extent_equal(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - return (e1->start == e2->start) && (e1->end == e2->end); -} - -static inline int extent_overlapped(struct interval_node_extent *e1, - struct interval_node_extent *e2) -{ - return (e1->start <= e2->end) && (e2->start <= e1->end); -} - -static inline int node_equal(struct interval_node *n1, struct interval_node *n2) -{ - return extent_equal(&n1->in_extent, &n2->in_extent); -} - -static struct interval_node *interval_first(struct interval_node *node) -{ - if (!node) - return NULL; - while (node->in_left) - node = node->in_left; - return node; -} - -static struct interval_node *interval_last(struct interval_node *node) -{ - if (!node) - return NULL; - while (node->in_right) - node = node->in_right; - return node; -} - -static struct interval_node *interval_next(struct interval_node *node) -{ - if (!node) - return NULL; - if (node->in_right) - return interval_first(node->in_right); - while (node->in_parent && node_is_right_child(node)) - node = node->in_parent; - return node->in_parent; -} - -static struct interval_node *interval_prev(struct interval_node *node) -{ - if (!node) - return NULL; - - if (node->in_left) - return interval_last(node->in_left); - - while (node->in_parent && node_is_left_child(node)) - node = node->in_parent; - - return node->in_parent; -} - -enum interval_iter interval_iterate_reverse(struct interval_node *root, - interval_callback_t func, - void *data) -{ - enum interval_iter rc = INTERVAL_ITER_CONT; - struct interval_node *node; - - for (node = interval_last(root); node; node = interval_prev(node)) { - rc = func(node, data); - if (rc == INTERVAL_ITER_STOP) - break; - } - - return rc; -} -EXPORT_SYMBOL(interval_iterate_reverse); - -static void __rotate_change_maxhigh(struct interval_node *node, - struct interval_node *rotate) -{ - __u64 left_max, right_max; - - rotate->in_max_high = node->in_max_high; - left_max = node->in_left ? node->in_left->in_max_high : 0; - right_max = node->in_right ? node->in_right->in_max_high : 0; - node->in_max_high = max(interval_high(node), - max(left_max, right_max)); -} - -/* The left rotation "pivots" around the link from node to node->right, and - * - node will be linked to node->right's left child, and - * - node->right's left child will be linked to node's right child. - */ -static void __rotate_left(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *right = node->in_right; - struct interval_node *parent = node->in_parent; - - node->in_right = right->in_left; - if (node->in_right) - right->in_left->in_parent = node; - - right->in_left = node; - right->in_parent = parent; - if (parent) { - if (node_is_left_child(node)) - parent->in_left = right; - else - parent->in_right = right; - } else { - *root = right; - } - node->in_parent = right; - - /* update max_high for node and right */ - __rotate_change_maxhigh(node, right); -} - -/* The right rotation "pivots" around the link from node to node->left, and - * - node will be linked to node->left's right child, and - * - node->left's right child will be linked to node's left child. - */ -static void __rotate_right(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *left = node->in_left; - struct interval_node *parent = node->in_parent; - - node->in_left = left->in_right; - if (node->in_left) - left->in_right->in_parent = node; - left->in_right = node; - - left->in_parent = parent; - if (parent) { - if (node_is_right_child(node)) - parent->in_right = left; - else - parent->in_left = left; - } else { - *root = left; - } - node->in_parent = left; - - /* update max_high for node and left */ - __rotate_change_maxhigh(node, left); -} - -#define interval_swap(a, b) do { \ - struct interval_node *c = a; a = b; b = c; \ -} while (0) - -/* - * Operations INSERT and DELETE, when run on a tree with n keys, - * take O(logN) time.Because they modify the tree, the result - * may violate the red-black properties.To restore these properties, - * we must change the colors of some of the nodes in the tree - * and also change the pointer structure. - */ -static void interval_insert_color(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *parent, *gparent; - - while ((parent = node->in_parent) && node_is_red(parent)) { - gparent = parent->in_parent; - /* Parent is RED, so gparent must not be NULL */ - if (node_is_left_child(parent)) { - struct interval_node *uncle; - - uncle = gparent->in_right; - if (uncle && node_is_red(uncle)) { - uncle->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - node = gparent; - continue; - } - - if (parent->in_right == node) { - __rotate_left(parent, root); - interval_swap(node, parent); - } - - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - __rotate_right(gparent, root); - } else { - struct interval_node *uncle; - - uncle = gparent->in_left; - if (uncle && node_is_red(uncle)) { - uncle->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - node = gparent; - continue; - } - - if (node_is_left_child(node)) { - __rotate_right(parent, root); - interval_swap(node, parent); - } - - parent->in_color = INTERVAL_BLACK; - gparent->in_color = INTERVAL_RED; - __rotate_left(gparent, root); - } - } - - (*root)->in_color = INTERVAL_BLACK; -} - -struct interval_node *interval_insert(struct interval_node *node, - struct interval_node **root) - -{ - struct interval_node **p, *parent = NULL; - - LASSERT(!interval_is_intree(node)); - p = root; - while (*p) { - parent = *p; - if (node_equal(parent, node)) - return parent; - - /* max_high field must be updated after each iteration */ - if (parent->in_max_high < interval_high(node)) - parent->in_max_high = interval_high(node); - - if (extent_compare(&node->in_extent, &parent->in_extent) < 0) - p = &parent->in_left; - else - p = &parent->in_right; - } - - /* link node into the tree */ - node->in_parent = parent; - node->in_color = INTERVAL_RED; - node->in_left = NULL; - node->in_right = NULL; - *p = node; - - interval_insert_color(node, root); - node->in_intree = 1; - - return NULL; -} -EXPORT_SYMBOL(interval_insert); - -static inline int node_is_black_or_0(struct interval_node *node) -{ - return !node || node_is_black(node); -} - -static void interval_erase_color(struct interval_node *node, - struct interval_node *parent, - struct interval_node **root) -{ - struct interval_node *tmp; - - while (node_is_black_or_0(node) && node != *root) { - if (parent->in_left == node) { - tmp = parent->in_right; - if (node_is_red(tmp)) { - tmp->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_RED; - __rotate_left(parent, root); - tmp = parent->in_right; - } - if (node_is_black_or_0(tmp->in_left) && - node_is_black_or_0(tmp->in_right)) { - tmp->in_color = INTERVAL_RED; - node = parent; - parent = node->in_parent; - } else { - if (node_is_black_or_0(tmp->in_right)) { - struct interval_node *o_left; - - o_left = tmp->in_left; - if (o_left) - o_left->in_color = INTERVAL_BLACK; - tmp->in_color = INTERVAL_RED; - __rotate_right(tmp, root); - tmp = parent->in_right; - } - tmp->in_color = parent->in_color; - parent->in_color = INTERVAL_BLACK; - if (tmp->in_right) - tmp->in_right->in_color = INTERVAL_BLACK; - __rotate_left(parent, root); - node = *root; - break; - } - } else { - tmp = parent->in_left; - if (node_is_red(tmp)) { - tmp->in_color = INTERVAL_BLACK; - parent->in_color = INTERVAL_RED; - __rotate_right(parent, root); - tmp = parent->in_left; - } - if (node_is_black_or_0(tmp->in_left) && - node_is_black_or_0(tmp->in_right)) { - tmp->in_color = INTERVAL_RED; - node = parent; - parent = node->in_parent; - } else { - if (node_is_black_or_0(tmp->in_left)) { - struct interval_node *o_right; - - o_right = tmp->in_right; - if (o_right) - o_right->in_color = INTERVAL_BLACK; - tmp->in_color = INTERVAL_RED; - __rotate_left(tmp, root); - tmp = parent->in_left; - } - tmp->in_color = parent->in_color; - parent->in_color = INTERVAL_BLACK; - if (tmp->in_left) - tmp->in_left->in_color = INTERVAL_BLACK; - __rotate_right(parent, root); - node = *root; - break; - } - } - } - if (node) - node->in_color = INTERVAL_BLACK; -} - -/* - * if the @max_high value of @node is changed, this function traverse a path - * from node up to the root to update max_high for the whole tree. - */ -static void update_maxhigh(struct interval_node *node, - __u64 old_maxhigh) -{ - __u64 left_max, right_max; - - while (node) { - left_max = node->in_left ? node->in_left->in_max_high : 0; - right_max = node->in_right ? node->in_right->in_max_high : 0; - node->in_max_high = max(interval_high(node), - max(left_max, right_max)); - - if (node->in_max_high >= old_maxhigh) - break; - node = node->in_parent; - } -} - -void interval_erase(struct interval_node *node, - struct interval_node **root) -{ - struct interval_node *child, *parent; - int color; - - LASSERT(interval_is_intree(node)); - node->in_intree = 0; - if (!node->in_left) { - child = node->in_right; - } else if (!node->in_right) { - child = node->in_left; - } else { /* Both left and right child are not NULL */ - struct interval_node *old = node; - - node = interval_next(node); - child = node->in_right; - parent = node->in_parent; - color = node->in_color; - - if (child) - child->in_parent = parent; - if (parent == old) - parent->in_right = child; - else - parent->in_left = child; - - node->in_color = old->in_color; - node->in_right = old->in_right; - node->in_left = old->in_left; - node->in_parent = old->in_parent; - - if (old->in_parent) { - if (node_is_left_child(old)) - old->in_parent->in_left = node; - else - old->in_parent->in_right = node; - } else { - *root = node; - } - - old->in_left->in_parent = node; - if (old->in_right) - old->in_right->in_parent = node; - update_maxhigh(child ? : parent, node->in_max_high); - update_maxhigh(node, old->in_max_high); - if (parent == old) - parent = node; - goto color; - } - parent = node->in_parent; - color = node->in_color; - - if (child) - child->in_parent = parent; - if (parent) { - if (node_is_left_child(node)) - parent->in_left = child; - else - parent->in_right = child; - } else { - *root = child; - } - - update_maxhigh(child ? : parent, node->in_max_high); - -color: - if (color == INTERVAL_BLACK) - interval_erase_color(child, parent, root); -} -EXPORT_SYMBOL(interval_erase); - -static inline int interval_may_overlap(struct interval_node *node, - struct interval_node_extent *ext) -{ - return (ext->start <= node->in_max_high && - ext->end >= interval_low(node)); -} - -/* - * This function finds all intervals that overlap interval ext, - * and calls func to handle resulted intervals one by one. - * in lustre, this function will find all conflicting locks in - * the granted queue and add these locks to the ast work list. - * - * { - * if (!node) - * return 0; - * if (ext->end < interval_low(node)) { - * interval_search(node->in_left, ext, func, data); - * } else if (interval_may_overlap(node, ext)) { - * if (extent_overlapped(ext, &node->in_extent)) - * func(node, data); - * interval_search(node->in_left, ext, func, data); - * interval_search(node->in_right, ext, func, data); - * } - * return 0; - * } - * - */ -enum interval_iter interval_search(struct interval_node *node, - struct interval_node_extent *ext, - interval_callback_t func, - void *data) -{ - enum interval_iter rc = INTERVAL_ITER_CONT; - struct interval_node *parent; - - LASSERT(ext); - LASSERT(func); - - while (node) { - if (ext->end < interval_low(node)) { - if (node->in_left) { - node = node->in_left; - continue; - } - } else if (interval_may_overlap(node, ext)) { - if (extent_overlapped(ext, &node->in_extent)) { - rc = func(node, data); - if (rc == INTERVAL_ITER_STOP) - break; - } - - if (node->in_left) { - node = node->in_left; - continue; - } - if (node->in_right) { - node = node->in_right; - continue; - } - } - - parent = node->in_parent; - while (parent) { - if (node_is_left_child(node) && - parent->in_right) { - /* - * If we ever got the left, it means that the - * parent met ext->endin_right; - break; - } - node = parent; - parent = parent->in_parent; - } - if (!parent || !interval_may_overlap(parent, ext)) - break; - } - - return rc; -} -EXPORT_SYMBOL(interval_search); diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c deleted file mode 100644 index 296259aa51e6..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/l_lock.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include - -/** - * Lock a lock and its resource. - * - * LDLM locking uses resource to serialize access to locks - * but there is a case when we change resource of lock upon - * enqueue reply. We rely on lock->l_resource = new_res - * being an atomic operation. - */ -struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock) - __acquires(&lock->l_lock) - __acquires(&lock->l_resource->lr_lock) -{ - spin_lock(&lock->l_lock); - - lock_res(lock->l_resource); - - ldlm_set_res_locked(lock); - return lock->l_resource; -} -EXPORT_SYMBOL(lock_res_and_lock); - -/** - * Unlock a lock and its resource previously locked with lock_res_and_lock - */ -void unlock_res_and_lock(struct ldlm_lock *lock) - __releases(&lock->l_resource->lr_lock) - __releases(&lock->l_lock) -{ - /* on server-side resource of lock doesn't change */ - ldlm_clear_res_locked(lock); - - unlock_res(lock->l_resource); - spin_unlock(&lock->l_lock); -} -EXPORT_SYMBOL(unlock_res_and_lock); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c deleted file mode 100644 index 4da23ade2bb3..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c +++ /dev/null @@ -1,258 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_extent.c - * - * Author: Peter Braam - * Author: Phil Schwan - */ - -/** - * This file contains implementation of EXTENT lock type - * - * EXTENT lock type is for locking a contiguous range of values, represented - * by 64-bit starting and ending offsets (inclusive). There are several extent - * lock modes, some of which may be mutually incompatible. Extent locks are - * considered incompatible if their modes are incompatible and their extents - * intersect. See the lock mode compatibility matrix in lustre_dlm.h. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#include -#include -#include -#include -#include -#include "ldlm_internal.h" - -/* When a lock is cancelled by a client, the KMS may undergo change if this - * is the "highest lock". This function returns the new KMS value. - * Caller must hold lr_lock already. - * - * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! - */ -__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms) -{ - struct ldlm_resource *res = lock->l_resource; - struct ldlm_lock *lck; - __u64 kms = 0; - - /* don't let another thread in ldlm_extent_shift_kms race in - * just after we finish and take our lock into account in its - * calculation of the kms - */ - ldlm_set_kms_ignore(lock); - - list_for_each_entry(lck, &res->lr_granted, l_res_link) { - - if (ldlm_is_kms_ignore(lck)) - continue; - - if (lck->l_policy_data.l_extent.end >= old_kms) - return old_kms; - - /* This extent _has_ to be smaller than old_kms (checked above) - * so kms can only ever be smaller or the same as old_kms. - */ - if (lck->l_policy_data.l_extent.end + 1 > kms) - kms = lck->l_policy_data.l_extent.end + 1; - } - LASSERTF(kms <= old_kms, "kms %llu old_kms %llu\n", kms, old_kms); - - return kms; -} -EXPORT_SYMBOL(ldlm_extent_shift_kms); - -struct kmem_cache *ldlm_interval_slab; - -/* interval tree, for LDLM_EXTENT. */ -static void ldlm_interval_attach(struct ldlm_interval *n, struct ldlm_lock *l) -{ - LASSERT(!l->l_tree_node); - LASSERT(l->l_resource->lr_type == LDLM_EXTENT); - - list_add_tail(&l->l_sl_policy, &n->li_group); - l->l_tree_node = n; -} - -struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock) -{ - struct ldlm_interval *node; - - LASSERT(lock->l_resource->lr_type == LDLM_EXTENT); - node = kmem_cache_zalloc(ldlm_interval_slab, GFP_NOFS); - if (!node) - return NULL; - - INIT_LIST_HEAD(&node->li_group); - ldlm_interval_attach(node, lock); - return node; -} - -void ldlm_interval_free(struct ldlm_interval *node) -{ - if (node) { - LASSERT(list_empty(&node->li_group)); - LASSERT(!interval_is_intree(&node->li_node)); - kmem_cache_free(ldlm_interval_slab, node); - } -} - -struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l) -{ - struct ldlm_interval *n = l->l_tree_node; - - if (!n) - return NULL; - - LASSERT(!list_empty(&n->li_group)); - l->l_tree_node = NULL; - list_del_init(&l->l_sl_policy); - - return list_empty(&n->li_group) ? n : NULL; -} - -static inline int lock_mode_to_index(enum ldlm_mode mode) -{ - int index; - - LASSERT(mode != 0); - LASSERT(is_power_of_2(mode)); - for (index = -1; mode; index++) - mode >>= 1; - LASSERT(index < LCK_MODE_NUM); - return index; -} - -/** Add newly granted lock into interval tree for the resource. */ -void ldlm_extent_add_lock(struct ldlm_resource *res, - struct ldlm_lock *lock) -{ - struct interval_node *found, **root; - struct ldlm_interval *node; - struct ldlm_extent *extent; - int idx, rc; - - LASSERT(lock->l_granted_mode == lock->l_req_mode); - - node = lock->l_tree_node; - LASSERT(node); - LASSERT(!interval_is_intree(&node->li_node)); - - idx = lock_mode_to_index(lock->l_granted_mode); - LASSERT(lock->l_granted_mode == 1 << idx); - LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode); - - /* node extent initialize */ - extent = &lock->l_policy_data.l_extent; - rc = interval_set(&node->li_node, extent->start, extent->end); - LASSERT(!rc); - - root = &res->lr_itree[idx].lit_root; - found = interval_insert(&node->li_node, root); - if (found) { /* The policy group found. */ - struct ldlm_interval *tmp; - - tmp = ldlm_interval_detach(lock); - ldlm_interval_free(tmp); - ldlm_interval_attach(to_ldlm_interval(found), lock); - } - res->lr_itree[idx].lit_size++; - - /* even though we use interval tree to manage the extent lock, we also - * add the locks into grant list, for debug purpose, .. - */ - ldlm_resource_add_lock(res, &res->lr_granted, lock); - - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GRANT_CHECK)) { - struct ldlm_lock *lck; - - list_for_each_entry_reverse(lck, &res->lr_granted, - l_res_link) { - if (lck == lock) - continue; - if (lockmode_compat(lck->l_granted_mode, - lock->l_granted_mode)) - continue; - if (ldlm_extent_overlap(&lck->l_req_extent, - &lock->l_req_extent)) { - CDEBUG(D_ERROR, - "granting conflicting lock %p %p\n", - lck, lock); - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - } - } -} - -/** Remove cancelled lock from resource interval tree. */ -void ldlm_extent_unlink_lock(struct ldlm_lock *lock) -{ - struct ldlm_resource *res = lock->l_resource; - struct ldlm_interval *node = lock->l_tree_node; - struct ldlm_interval_tree *tree; - int idx; - - if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */ - return; - - idx = lock_mode_to_index(lock->l_granted_mode); - LASSERT(lock->l_granted_mode == 1 << idx); - tree = &res->lr_itree[idx]; - - LASSERT(tree->lit_root); /* assure the tree is not null */ - - tree->lit_size--; - node = ldlm_interval_detach(lock); - if (node) { - interval_erase(&node->li_node, &tree->lit_root); - ldlm_interval_free(node); - } -} - -void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_extent.start = wpolicy->l_extent.start; - lpolicy->l_extent.end = wpolicy->l_extent.end; - lpolicy->l_extent.gid = wpolicy->l_extent.gid; -} - -void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_extent.start = lpolicy->l_extent.start; - wpolicy->l_extent.end = lpolicy->l_extent.end; - wpolicy->l_extent.gid = lpolicy->l_extent.gid; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c deleted file mode 100644 index 94f3b1e49896..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c +++ /dev/null @@ -1,486 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003 Hewlett-Packard Development Company LP. - * Developed under the sponsorship of the US Government under - * Subcontract No. B514193 - * - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** - * This file implements POSIX lock type for Lustre. - * Its policy properties are start and end of extent and PID. - * - * These locks are only done through MDS due to POSIX semantics requiring - * e.g. that locks could be only partially released and as such split into - * two parts, and also that two adjacent locks from the same process may be - * merged into a single wider lock. - * - * Lock modes are mapped like this: - * PR and PW for READ and WRITE locks - * NL to request a releasing of a portion of the lock - * - * These flock locks never timeout. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include -#include -#include "ldlm_internal.h" - -static inline int -ldlm_same_flock_owner(struct ldlm_lock *lock, struct ldlm_lock *new) -{ - return((new->l_policy_data.l_flock.owner == - lock->l_policy_data.l_flock.owner) && - (new->l_export == lock->l_export)); -} - -static inline int -ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new) -{ - return((new->l_policy_data.l_flock.start <= - lock->l_policy_data.l_flock.end) && - (new->l_policy_data.l_flock.end >= - lock->l_policy_data.l_flock.start)); -} - -static inline void -ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - LDLM_DEBUG(lock, "%s(mode: %d)", - __func__, mode); - - list_del_init(&lock->l_res_link); - - /* client side - set a flag to prevent sending a CANCEL */ - lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING; - - /* when reaching here, it is under lock_res_and_lock(). Thus, - * need call the nolock version of ldlm_lock_decref_internal - */ - ldlm_lock_decref_internal_nolock(lock, mode); - - ldlm_lock_destroy_nolock(lock); -} - -/** - * Process a granting attempt for flock lock. - * Must be called under ns lock held. - * - * This function looks for any conflicts for \a lock in the granted or - * waiting queues. The lock is granted if no conflicts are found in - * either queue. - * - * It is also responsible for splitting a lock if a portion of the lock - * is released. - * - */ -static int ldlm_process_flock_lock(struct ldlm_lock *req) -{ - struct ldlm_resource *res = req->l_resource; - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - struct ldlm_lock *tmp; - struct ldlm_lock *lock; - struct ldlm_lock *new = req; - struct ldlm_lock *new2 = NULL; - enum ldlm_mode mode = req->l_req_mode; - int added = (mode == LCK_NL); - int splitted = 0; - const struct ldlm_callback_suite null_cbs = { }; - - CDEBUG(D_DLMTRACE, - "owner %llu pid %u mode %u start %llu end %llu\n", - new->l_policy_data.l_flock.owner, - new->l_policy_data.l_flock.pid, mode, - req->l_policy_data.l_flock.start, - req->l_policy_data.l_flock.end); - - /* No blocking ASTs are sent to the clients for - * Posix file & record locks - */ - req->l_blocking_ast = NULL; - -reprocess: - /* This loop determines where this processes locks start - * in the resource lr_granted list. - */ - list_for_each_entry(lock, &res->lr_granted, l_res_link) - if (ldlm_same_flock_owner(lock, req)) - break; - - /* Scan the locks owned by this process to find the insertion point - * (as locks are ordered), and to handle overlaps. - * We may have to merge or split existing locks. - */ - list_for_each_entry_safe_from(lock, tmp, &res->lr_granted, l_res_link) { - - if (!ldlm_same_flock_owner(lock, new)) - break; - - if (lock->l_granted_mode == mode) { - /* If the modes are the same then we need to process - * locks that overlap OR adjoin the new lock. The extra - * logic condition is necessary to deal with arithmetic - * overflow and underflow. - */ - if ((new->l_policy_data.l_flock.start > - (lock->l_policy_data.l_flock.end + 1)) && - (lock->l_policy_data.l_flock.end != OBD_OBJECT_EOF)) - continue; - - if ((new->l_policy_data.l_flock.end < - (lock->l_policy_data.l_flock.start - 1)) && - (lock->l_policy_data.l_flock.start != 0)) - break; - - if (new->l_policy_data.l_flock.start < - lock->l_policy_data.l_flock.start) { - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.start; - } else { - new->l_policy_data.l_flock.start = - lock->l_policy_data.l_flock.start; - } - - if (new->l_policy_data.l_flock.end > - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.end; - } else { - new->l_policy_data.l_flock.end = - lock->l_policy_data.l_flock.end; - } - - if (added) { - ldlm_flock_destroy(lock, mode); - } else { - new = lock; - added = 1; - } - continue; - } - - if (new->l_policy_data.l_flock.start > - lock->l_policy_data.l_flock.end) - continue; - - if (new->l_policy_data.l_flock.end < - lock->l_policy_data.l_flock.start) - break; - - if (new->l_policy_data.l_flock.start <= - lock->l_policy_data.l_flock.start) { - if (new->l_policy_data.l_flock.end < - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.end + 1; - break; - } - ldlm_flock_destroy(lock, lock->l_req_mode); - continue; - } - if (new->l_policy_data.l_flock.end >= - lock->l_policy_data.l_flock.end) { - lock->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.start - 1; - continue; - } - - /* split the existing lock into two locks */ - - /* if this is an F_UNLCK operation then we could avoid - * allocating a new lock and use the req lock passed in - * with the request but this would complicate the reply - * processing since updates to req get reflected in the - * reply. The client side replays the lock request so - * it must see the original lock data in the reply. - */ - - /* XXX - if ldlm_lock_new() can sleep we should - * release the lr_lock, allocate the new lock, - * and restart processing this lock. - */ - if (!new2) { - unlock_res_and_lock(req); - new2 = ldlm_lock_create(ns, &res->lr_name, LDLM_FLOCK, - lock->l_granted_mode, &null_cbs, - NULL, 0, LVB_T_NONE); - lock_res_and_lock(req); - if (IS_ERR(new2)) { - ldlm_flock_destroy(req, lock->l_granted_mode); - return LDLM_ITER_STOP; - } - goto reprocess; - } - - splitted = 1; - - new2->l_granted_mode = lock->l_granted_mode; - new2->l_policy_data.l_flock.pid = - new->l_policy_data.l_flock.pid; - new2->l_policy_data.l_flock.owner = - new->l_policy_data.l_flock.owner; - new2->l_policy_data.l_flock.start = - lock->l_policy_data.l_flock.start; - new2->l_policy_data.l_flock.end = - new->l_policy_data.l_flock.start - 1; - lock->l_policy_data.l_flock.start = - new->l_policy_data.l_flock.end + 1; - new2->l_conn_export = lock->l_conn_export; - if (lock->l_export) - new2->l_export = class_export_lock_get(lock->l_export, - new2); - ldlm_lock_addref_internal_nolock(new2, - lock->l_granted_mode); - - /* insert new2 at lock */ - ldlm_resource_add_lock(res, &lock->l_res_link, new2); - LDLM_LOCK_RELEASE(new2); - break; - } - - /* if new2 is created but never used, destroy it*/ - if (splitted == 0 && new2) - ldlm_lock_destroy_nolock(new2); - - /* At this point we're granting the lock request. */ - req->l_granted_mode = req->l_req_mode; - - if (!added) { - list_del_init(&req->l_res_link); - /* insert new lock before "lock", which might be the - * next lock for this owner, or might be the first - * lock for the next owner, or might not be a lock at - * all, but instead points at the head of the list - */ - ldlm_resource_add_lock(res, &lock->l_res_link, req); - } - - /* In case we're reprocessing the requested lock we can't destroy - * it until after calling ldlm_add_ast_work_item() above so that laawi() - * can bump the reference count on \a req. Otherwise \a req - * could be freed before the completion AST can be sent. - */ - if (added) - ldlm_flock_destroy(req, mode); - - ldlm_resource_dump(D_INFO, res); - return LDLM_ITER_CONTINUE; -} - -/** - * Flock completion callback function. - * - * \param lock [in,out]: A lock to be handled - * \param flags [in]: flags - * \param *data [in]: ldlm_work_cp_ast_lock() will use ldlm_cb_set_arg - * - * \retval 0 : success - * \retval <0 : failure - */ -int -ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) -{ - struct file_lock *getlk = lock->l_ast_data; - int rc = 0; - - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4); - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) { - lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_FAIL_LOC; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4); - } - CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n", - flags, data, getlk); - - LASSERT(flags != LDLM_FL_WAIT_NOREPROC); - - if (flags & LDLM_FL_FAILED) - goto granted; - - if (!(flags & LDLM_FL_BLOCKED_MASK)) { - if (!data) - /* mds granted the lock in the reply */ - goto granted; - /* CP AST RPC: lock get granted, wake it up */ - wake_up(&lock->l_waitq); - return 0; - } - - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, sleeping"); - - /* Go to sleep until the lock is granted. */ - rc = l_wait_event_abortable(lock->l_waitq, is_granted_or_cancelled(lock)); - - if (rc) { - lock_res_and_lock(lock); - - /* client side - set flag to prevent lock from being put on LRU list */ - ldlm_set_cbpending(lock); - unlock_res_and_lock(lock); - - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - -granted: - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) { - lock_res_and_lock(lock); - /* DEADLOCK is always set with CBPENDING */ - lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4); - } - if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) { - lock_res_and_lock(lock); - /* DEADLOCK is always set with CBPENDING */ - lock->l_flags |= LDLM_FL_FAIL_LOC | - LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING; - unlock_res_and_lock(lock); - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4); - } - - lock_res_and_lock(lock); - - /* - * Protect against race where lock could have been just destroyed - * due to overlap in ldlm_process_flock_lock(). - */ - if (ldlm_is_destroyed(lock)) { - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - /* - * An error is still to be returned, to propagate it up to - * ldlm_cli_enqueue_fini() caller. - */ - return -EIO; - } - - /* ldlm_lock_enqueue() has already placed lock on the granted list. */ - ldlm_resource_unlink_lock(lock); - - /* - * Import invalidation. We need to actually release the lock - * references being held, so that it can go away. No point in - * holding the lock even if app still believes it has it, since - * server already dropped it anyway. Only for granted locks too. - */ - /* Do the same for DEADLOCK'ed locks. */ - if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) { - int mode; - - if (flags & LDLM_FL_TEST_LOCK) - LASSERT(ldlm_is_test_lock(lock)); - - if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock)) - mode = getlk->fl_type; - else - mode = lock->l_granted_mode; - - if (ldlm_is_flock_deadlock(lock)) { - LDLM_DEBUG(lock, - "client-side enqueue deadlock received"); - rc = -EDEADLK; - } - ldlm_flock_destroy(lock, mode); - unlock_res_and_lock(lock); - - /* Need to wake up the waiter if we were evicted */ - wake_up(&lock->l_waitq); - - /* - * An error is still to be returned, to propagate it up to - * ldlm_cli_enqueue_fini() caller. - */ - return rc ? : -EIO; - } - - LDLM_DEBUG(lock, "client-side enqueue granted"); - - if (flags & LDLM_FL_TEST_LOCK) { - /* fcntl(F_GETLK) request */ - /* The old mode was saved in getlk->fl_type so that if the mode - * in the lock changes we can decref the appropriate refcount. - */ - LASSERT(ldlm_is_test_lock(lock)); - ldlm_flock_destroy(lock, getlk->fl_type); - switch (lock->l_granted_mode) { - case LCK_PR: - getlk->fl_type = F_RDLCK; - break; - case LCK_PW: - getlk->fl_type = F_WRLCK; - break; - default: - getlk->fl_type = F_UNLCK; - } - getlk->fl_pid = -(pid_t)lock->l_policy_data.l_flock.pid; - getlk->fl_start = (loff_t)lock->l_policy_data.l_flock.start; - getlk->fl_end = (loff_t)lock->l_policy_data.l_flock.end; - } else { - /* We need to reprocess the lock to do merges or splits - * with existing locks owned by this process. - */ - ldlm_process_flock_lock(lock); - } - unlock_res_and_lock(lock); - return rc; -} -EXPORT_SYMBOL(ldlm_flock_completion_ast); - -void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_flock.start = wpolicy->l_flock.lfw_start; - lpolicy->l_flock.end = wpolicy->l_flock.lfw_end; - lpolicy->l_flock.pid = wpolicy->l_flock.lfw_pid; - lpolicy->l_flock.owner = wpolicy->l_flock.lfw_owner; -} - -void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_flock.lfw_start = lpolicy->l_flock.start; - wpolicy->l_flock.lfw_end = lpolicy->l_flock.end; - wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid; - wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c b/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c deleted file mode 100644 index 2926208cdfa1..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_inodebits.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_inodebits.c - * - * Author: Peter Braam - * Author: Phil Schwan - */ - -/** - * This file contains implementation of IBITS lock type - * - * IBITS lock type contains a bit mask determining various properties of an - * object. The meanings of specific bits are specific to the caller and are - * opaque to LDLM code. - * - * Locks with intersecting bitmasks and conflicting lock modes (e.g. LCK_PW) - * are considered conflicting. See the lock mode compatibility matrix - * in lustre_dlm.h. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include "ldlm_internal.h" - -void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits; -} - -void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - memset(wpolicy, 0, sizeof(*wpolicy)); - wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h deleted file mode 100644 index bc33ca100620..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define MAX_STRING_SIZE 128 - -extern int ldlm_srv_namespace_nr; -extern int ldlm_cli_namespace_nr; -extern struct mutex ldlm_srv_namespace_lock; -extern struct list_head ldlm_srv_namespace_list; -extern struct mutex ldlm_cli_namespace_lock; -extern struct list_head ldlm_cli_active_namespace_list; - -static inline int ldlm_namespace_nr_read(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - ldlm_srv_namespace_nr : ldlm_cli_namespace_nr; -} - -static inline void ldlm_namespace_nr_inc(enum ldlm_side client) -{ - if (client == LDLM_NAMESPACE_SERVER) - ldlm_srv_namespace_nr++; - else - ldlm_cli_namespace_nr++; -} - -static inline void ldlm_namespace_nr_dec(enum ldlm_side client) -{ - if (client == LDLM_NAMESPACE_SERVER) - ldlm_srv_namespace_nr--; - else - ldlm_cli_namespace_nr--; -} - -static inline struct list_head *ldlm_namespace_list(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - &ldlm_srv_namespace_list : &ldlm_cli_active_namespace_list; -} - -static inline struct mutex *ldlm_namespace_lock(enum ldlm_side client) -{ - return client == LDLM_NAMESPACE_SERVER ? - &ldlm_srv_namespace_lock : &ldlm_cli_namespace_lock; -} - -/* ns_bref is the number of resources in this namespace */ -static inline int ldlm_ns_empty(struct ldlm_namespace *ns) -{ - return atomic_read(&ns->ns_bref) == 0; -} - -void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, - enum ldlm_side client); -void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, - enum ldlm_side client); -struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client); - -/* ldlm_request.c */ -/* Cancel lru flag, it indicates we cancel aged locks. */ -enum { - LDLM_LRU_FLAG_AGED = BIT(0), /* Cancel old non-LRU resize locks */ - LDLM_LRU_FLAG_PASSED = BIT(1), /* Cancel passed number of locks. */ - LDLM_LRU_FLAG_SHRINK = BIT(2), /* Cancel locks from shrinker. */ - LDLM_LRU_FLAG_LRUR = BIT(3), /* Cancel locks from lru resize. */ - LDLM_LRU_FLAG_NO_WAIT = BIT(4), /* Cancel locks w/o blocking (neither - * sending nor waiting for any rpcs) - */ - LDLM_LRU_FLAG_LRUR_NO_WAIT = BIT(5), /* LRUR + NO_WAIT */ -}; - -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, - enum ldlm_cancel_flags sync, int flags); -int ldlm_cancel_lru_local(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - enum ldlm_cancel_flags cancel_flags, int flags); -extern unsigned int ldlm_enqueue_min; -extern unsigned int ldlm_cancel_unused_locks_before_replay; - -/* ldlm_lock.c */ - -struct ldlm_cb_set_arg { - struct ptlrpc_request_set *set; - int type; /* LDLM_{CP,BL,GL}_CALLBACK */ - atomic_t restart; - struct list_head *list; - union ldlm_gl_desc *gl_desc; /* glimpse AST descriptor */ -}; - -enum ldlm_desc_ast_t { - LDLM_WORK_BL_AST, - LDLM_WORK_CP_AST, - LDLM_WORK_REVOKE_AST, - LDLM_WORK_GL_AST -}; - -void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list); -int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, - enum req_location loc, void *data, int size); -struct ldlm_lock * -ldlm_lock_create(struct ldlm_namespace *ns, const struct ldlm_res_id *id, - enum ldlm_type type, enum ldlm_mode mode, - const struct ldlm_callback_suite *cbs, - void *data, __u32 lvb_len, enum lvb_type lvb_type); -enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, - struct ldlm_lock **lock, void *cookie, - __u64 *flags); -void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode); -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode); -void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode); -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode); -int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, - enum ldlm_desc_ast_t ast_type); -int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use); -#define ldlm_lock_remove_from_lru(lock) ldlm_lock_remove_from_lru_check(lock, 0) -int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock); -void ldlm_lock_destroy_nolock(struct ldlm_lock *lock); - -/* ldlm_lockd.c */ -int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct ldlm_lock *lock); -int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags); -int ldlm_bl_thread_wakeup(void); - -void ldlm_handle_bl_callback(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, struct ldlm_lock *lock); - -extern struct kmem_cache *ldlm_resource_slab; -extern struct kset *ldlm_ns_kset; - -/* ldlm_lockd.c & ldlm_lock.c */ -extern struct kmem_cache *ldlm_lock_slab; - -/* ldlm_extent.c */ -void ldlm_extent_add_lock(struct ldlm_resource *res, struct ldlm_lock *lock); -void ldlm_extent_unlink_lock(struct ldlm_lock *lock); - -/* l_lock.c */ -void l_check_ns_lock(struct ldlm_namespace *ns); -void l_check_no_ns_lock(struct ldlm_namespace *ns); - -extern struct dentry *ldlm_svc_debugfs_dir; - -struct ldlm_state { - struct ptlrpc_service *ldlm_cb_service; - struct ptlrpc_service *ldlm_cancel_service; - struct ptlrpc_client *ldlm_client; - struct ptlrpc_connection *ldlm_server_conn; - struct ldlm_bl_pool *ldlm_bl_pool; -}; - -/* ldlm_pool.c */ -__u64 ldlm_pool_get_slv(struct ldlm_pool *pl); -void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv); -__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl); - -/* interval tree, for LDLM_EXTENT. */ -extern struct kmem_cache *ldlm_interval_slab; /* slab cache for ldlm_interval */ -struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l); -struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock); -void ldlm_interval_free(struct ldlm_interval *node); -/* this function must be called with res lock held */ -static inline struct ldlm_extent * -ldlm_interval_extent(struct ldlm_interval *node) -{ - struct ldlm_lock *lock; - - LASSERT(!list_empty(&node->li_group)); - - lock = list_entry(node->li_group.next, struct ldlm_lock, l_sl_policy); - return &lock->l_policy_data.l_extent; -} - -int ldlm_init(void); -void ldlm_exit(void); - -enum ldlm_policy_res { - LDLM_POLICY_CANCEL_LOCK, - LDLM_POLICY_KEEP_LOCK, - LDLM_POLICY_SKIP_LOCK -}; - -#define LDLM_POOL_SYSFS_PRINT_int(v) sprintf(buf, "%d\n", v) -#define LDLM_POOL_SYSFS_SET_int(a, b) { a = b; } -#define LDLM_POOL_SYSFS_PRINT_u64(v) sprintf(buf, "%lld\n", v) -#define LDLM_POOL_SYSFS_SET_u64(a, b) { a = b; } -#define LDLM_POOL_SYSFS_PRINT_atomic(v) sprintf(buf, "%d\n", atomic_read(&v)) -#define LDLM_POOL_SYSFS_SET_atomic(a, b) atomic_set(&a, b) - -#define LDLM_POOL_SYSFS_READER_SHOW(var, type) \ - static ssize_t var##_show(struct kobject *kobj, \ - struct attribute *attr, \ - char *buf) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - type tmp; \ - \ - spin_lock(&pl->pl_lock); \ - tmp = pl->pl_##var; \ - spin_unlock(&pl->pl_lock); \ - \ - return LDLM_POOL_SYSFS_PRINT_##type(tmp); \ - } \ - struct __##var##__dummy_read {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_WRITER_STORE(var, type) \ - static ssize_t var##_store(struct kobject *kobj, \ - struct attribute *attr, \ - const char *buffer, \ - size_t count) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - unsigned long tmp; \ - int rc; \ - \ - rc = kstrtoul(buffer, 10, &tmp); \ - if (rc < 0) { \ - return rc; \ - } \ - \ - spin_lock(&pl->pl_lock); \ - LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \ - spin_unlock(&pl->pl_lock); \ - \ - return count; \ - } \ - struct __##var##__dummy_write {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(var, type) \ - static ssize_t var##_show(struct kobject *kobj, \ - struct attribute *attr, \ - char *buf) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - \ - return LDLM_POOL_SYSFS_PRINT_##type(pl->pl_##var); \ - } \ - struct __##var##__dummy_read {; } /* semicolon catcher */ - -#define LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(var, type) \ - static ssize_t var##_store(struct kobject *kobj, \ - struct attribute *attr, \ - const char *buffer, \ - size_t count) \ - { \ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, \ - pl_kobj); \ - unsigned long tmp; \ - int rc; \ - \ - rc = kstrtoul(buffer, 10, &tmp); \ - if (rc < 0) { \ - return rc; \ - } \ - \ - LDLM_POOL_SYSFS_SET_##type(pl->pl_##var, tmp); \ - \ - return count; \ - } \ - struct __##var##__dummy_write {; } /* semicolon catcher */ - -static inline int is_granted_or_cancelled(struct ldlm_lock *lock) -{ - int ret = 0; - - lock_res_and_lock(lock); - if ((lock->l_req_mode == lock->l_granted_mode) && - !ldlm_is_cp_reqd(lock)) - ret = 1; - else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock)) - ret = 1; - unlock_res_and_lock(lock); - - return ret; -} - -typedef void (*ldlm_policy_wire_to_local_t)(const union ldlm_wire_policy_data *, - union ldlm_policy_data *); - -typedef void (*ldlm_policy_local_to_wire_t)(const union ldlm_policy_data *, - union ldlm_wire_policy_data *); - -void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_extent_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_extent_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); -void ldlm_flock_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy); -void ldlm_flock_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy); - -static inline bool ldlm_res_eq(const struct ldlm_res_id *res0, - const struct ldlm_res_id *res1) -{ - return memcmp(res0, res1, sizeof(*res0)) == 0; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c deleted file mode 100644 index 0aa4f234a4f4..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c +++ /dev/null @@ -1,842 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** - * This file deals with various client/target related logic including recovery. - * - * TODO: This code more logically belongs in the ptlrpc module than in ldlm and - * should be moved. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include -#include -#include "ldlm_internal.h" - -/* @priority: If non-zero, move the selected connection to the list head. - * @create: If zero, only search in existing connections. - */ -static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority, int create) -{ - struct ptlrpc_connection *ptlrpc_conn; - struct obd_import_conn *imp_conn = NULL, *item; - int rc = 0; - - if (!create && !priority) { - CDEBUG(D_HA, "Nothing to do\n"); - return -EINVAL; - } - - ptlrpc_conn = ptlrpc_uuid_to_connection(uuid); - if (!ptlrpc_conn) { - CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid); - return -ENOENT; - } - - if (create) { - imp_conn = kzalloc(sizeof(*imp_conn), GFP_NOFS); - if (!imp_conn) { - rc = -ENOMEM; - goto out_put; - } - } - - spin_lock(&imp->imp_lock); - list_for_each_entry(item, &imp->imp_conn_list, oic_item) { - if (obd_uuid_equals(uuid, &item->oic_uuid)) { - if (priority) { - list_del(&item->oic_item); - list_add(&item->oic_item, - &imp->imp_conn_list); - item->oic_last_attempt = 0; - } - CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n", - imp, imp->imp_obd->obd_name, uuid->uuid, - (priority ? ", moved to head" : "")); - spin_unlock(&imp->imp_lock); - rc = 0; - goto out_free; - } - } - /* No existing import connection found for \a uuid. */ - if (create) { - imp_conn->oic_conn = ptlrpc_conn; - imp_conn->oic_uuid = *uuid; - imp_conn->oic_last_attempt = 0; - if (priority) - list_add(&imp_conn->oic_item, &imp->imp_conn_list); - else - list_add_tail(&imp_conn->oic_item, - &imp->imp_conn_list); - CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n", - imp, imp->imp_obd->obd_name, uuid->uuid, - (priority ? "head" : "tail")); - } else { - spin_unlock(&imp->imp_lock); - rc = -ENOENT; - goto out_free; - } - - spin_unlock(&imp->imp_lock); - return 0; -out_free: - kfree(imp_conn); -out_put: - ptlrpc_connection_put(ptlrpc_conn); - return rc; -} - -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid) -{ - return import_set_conn(imp, uuid, 1, 0); -} - -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority) -{ - return import_set_conn(imp, uuid, priority, 1); -} -EXPORT_SYMBOL(client_import_add_conn); - -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) -{ - struct obd_import_conn *imp_conn; - struct obd_export *dlmexp; - int rc = -ENOENT; - - spin_lock(&imp->imp_lock); - if (list_empty(&imp->imp_conn_list)) { - LASSERT(!imp->imp_connection); - goto out; - } - - list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) { - if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid)) - continue; - LASSERT(imp_conn->oic_conn); - - if (imp_conn == imp->imp_conn_current) { - LASSERT(imp_conn->oic_conn == imp->imp_connection); - - if (imp->imp_state != LUSTRE_IMP_CLOSED && - imp->imp_state != LUSTRE_IMP_DISCON) { - CERROR("can't remove current connection\n"); - rc = -EBUSY; - goto out; - } - - ptlrpc_connection_put(imp->imp_connection); - imp->imp_connection = NULL; - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - if (dlmexp && dlmexp->exp_connection) { - LASSERT(dlmexp->exp_connection == - imp_conn->oic_conn); - ptlrpc_connection_put(dlmexp->exp_connection); - dlmexp->exp_connection = NULL; - } - - if (dlmexp) - class_export_put(dlmexp); - } - - list_del(&imp_conn->oic_item); - ptlrpc_connection_put(imp_conn->oic_conn); - kfree(imp_conn); - CDEBUG(D_HA, "imp %p@%s: remove connection %s\n", - imp, imp->imp_obd->obd_name, uuid->uuid); - rc = 0; - break; - } -out: - spin_unlock(&imp->imp_lock); - if (rc == -ENOENT) - CERROR("connection %s not found\n", uuid->uuid); - return rc; -} -EXPORT_SYMBOL(client_import_del_conn); - -/** - * Find conn UUID by peer NID. \a peer is a server NID. This function is used - * to find a conn uuid of \a imp which can reach \a peer. - */ -int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, - struct obd_uuid *uuid) -{ - struct obd_import_conn *conn; - int rc = -ENOENT; - - spin_lock(&imp->imp_lock); - list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { - /* Check if conn UUID does have this peer NID. */ - if (class_check_uuid(&conn->oic_uuid, peer)) { - *uuid = conn->oic_uuid; - rc = 0; - break; - } - } - spin_unlock(&imp->imp_lock); - return rc; -} -EXPORT_SYMBOL(client_import_find_conn); - -void client_destroy_import(struct obd_import *imp) -{ - /* Drop security policy instance after all RPCs have finished/aborted - * to let all busy contexts be released. - */ - class_import_get(imp); - class_destroy_import(imp); - sptlrpc_import_sec_put(imp); - class_import_put(imp); -} -EXPORT_SYMBOL(client_destroy_import); - -/* Configure an RPC client OBD device. - * - * lcfg parameters: - * 1 - client UUID - * 2 - server UUID - * 3 - inactive-on-startup - */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) -{ - struct client_obd *cli = &obddev->u.cli; - struct obd_import *imp; - struct obd_uuid server_uuid; - int rq_portal, rp_portal, connect_op; - char *name = obddev->obd_type->typ_name; - enum ldlm_ns_type ns_type = LDLM_NS_TYPE_UNKNOWN; - int rc; - - /* In a more perfect world, we would hang a ptlrpc_client off of - * obd_type and just use the values from there. - */ - if (!strcmp(name, LUSTRE_OSC_NAME)) { - rq_portal = OST_REQUEST_PORTAL; - rp_portal = OSC_REPLY_PORTAL; - connect_op = OST_CONNECT; - cli->cl_sp_me = LUSTRE_SP_CLI; - cli->cl_sp_to = LUSTRE_SP_OST; - ns_type = LDLM_NS_TYPE_OSC; - } else if (!strcmp(name, LUSTRE_MDC_NAME) || - !strcmp(name, LUSTRE_LWP_NAME)) { - rq_portal = MDS_REQUEST_PORTAL; - rp_portal = MDC_REPLY_PORTAL; - connect_op = MDS_CONNECT; - cli->cl_sp_me = LUSTRE_SP_CLI; - cli->cl_sp_to = LUSTRE_SP_MDT; - ns_type = LDLM_NS_TYPE_MDC; - } else if (!strcmp(name, LUSTRE_MGC_NAME)) { - rq_portal = MGS_REQUEST_PORTAL; - rp_portal = MGC_REPLY_PORTAL; - connect_op = MGS_CONNECT; - cli->cl_sp_me = LUSTRE_SP_MGC; - cli->cl_sp_to = LUSTRE_SP_MGS; - cli->cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_INVALID; - ns_type = LDLM_NS_TYPE_MGC; - } else { - CERROR("unknown client OBD type \"%s\", can't setup\n", - name); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { - CERROR("requires a TARGET UUID\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) > 37) { - CERROR("client UUID must be less than 38 characters\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) { - CERROR("setup requires a SERVER UUID\n"); - return -EINVAL; - } - - if (LUSTRE_CFG_BUFLEN(lcfg, 2) > 37) { - CERROR("target UUID must be less than 38 characters\n"); - return -EINVAL; - } - - init_rwsem(&cli->cl_sem); - cli->cl_conn_count = 0; - memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2), - min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), - sizeof(server_uuid))); - - cli->cl_dirty_pages = 0; - cli->cl_avail_grant = 0; - /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */ - /* - * cl_dirty_max_pages may be changed at connect time in - * ptlrpc_connect_interpret(). - */ - client_adjust_max_dirty(cli); - INIT_LIST_HEAD(&cli->cl_cache_waiters); - INIT_LIST_HEAD(&cli->cl_loi_ready_list); - INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); - INIT_LIST_HEAD(&cli->cl_loi_write_list); - INIT_LIST_HEAD(&cli->cl_loi_read_list); - spin_lock_init(&cli->cl_loi_list_lock); - atomic_set(&cli->cl_pending_w_pages, 0); - atomic_set(&cli->cl_pending_r_pages, 0); - cli->cl_r_in_flight = 0; - cli->cl_w_in_flight = 0; - - spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); - spin_lock_init(&cli->cl_read_page_hist.oh_lock); - spin_lock_init(&cli->cl_write_page_hist.oh_lock); - spin_lock_init(&cli->cl_read_offset_hist.oh_lock); - spin_lock_init(&cli->cl_write_offset_hist.oh_lock); - - /* lru for osc. */ - INIT_LIST_HEAD(&cli->cl_lru_osc); - atomic_set(&cli->cl_lru_shrinkers, 0); - atomic_long_set(&cli->cl_lru_busy, 0); - atomic_long_set(&cli->cl_lru_in_list, 0); - INIT_LIST_HEAD(&cli->cl_lru_list); - spin_lock_init(&cli->cl_lru_list_lock); - atomic_long_set(&cli->cl_unstable_count, 0); - INIT_LIST_HEAD(&cli->cl_shrink_list); - - init_waitqueue_head(&cli->cl_destroy_waitq); - atomic_set(&cli->cl_destroy_in_flight, 0); - /* Turn on checksumming by default. */ - cli->cl_checksum = 1; - /* - * The supported checksum types will be worked out at connect time - * Set cl_chksum* to CRC32 for now to avoid returning screwed info - * through procfs. - */ - cli->cl_cksum_type = OBD_CKSUM_CRC32; - cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; - atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); - - /* - * Set it to possible maximum size. It may be reduced by ocd_brw_size - * from OFD after connecting. - */ - cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES; - - /* - * set cl_chunkbits default value to PAGE_CACHE_SHIFT, - * it will be updated at OSC connection time. - */ - cli->cl_chunkbits = PAGE_SHIFT; - - if (!strcmp(name, LUSTRE_MDC_NAME)) - cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) - cli->cl_max_rpcs_in_flight = 2; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) - cli->cl_max_rpcs_in_flight = 3; - else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) - cli->cl_max_rpcs_in_flight = 4; - else - cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - - spin_lock_init(&cli->cl_mod_rpcs_lock); - spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock); - cli->cl_max_mod_rpcs_in_flight = 0; - cli->cl_mod_rpcs_in_flight = 0; - cli->cl_close_rpcs_in_flight = 0; - init_waitqueue_head(&cli->cl_mod_rpcs_waitq); - cli->cl_mod_tag_bitmap = NULL; - - if (connect_op == MDS_CONNECT) { - cli->cl_max_mod_rpcs_in_flight = cli->cl_max_rpcs_in_flight - 1; - cli->cl_mod_tag_bitmap = kcalloc(BITS_TO_LONGS(OBD_MAX_RIF_MAX), - sizeof(long), GFP_NOFS); - if (!cli->cl_mod_tag_bitmap) { - rc = -ENOMEM; - goto err; - } - } - - rc = ldlm_get_ref(); - if (rc) { - CERROR("ldlm_get_ref failed: %d\n", rc); - goto err; - } - - ptlrpc_init_client(rq_portal, rp_portal, name, - &obddev->obd_ldlm_client); - - imp = class_new_import(obddev); - if (!imp) { - rc = -ENOENT; - goto err_ldlm; - } - imp->imp_client = &obddev->obd_ldlm_client; - imp->imp_connect_op = connect_op; - memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), - LUSTRE_CFG_BUFLEN(lcfg, 1)); - class_import_put(imp); - - rc = client_import_add_conn(imp, &server_uuid, 1); - if (rc) { - CERROR("can't add initial connection\n"); - goto err_import; - } - - cli->cl_import = imp; - /* cli->cl_max_mds_easize updated by mdc_init_ea_size() */ - cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3); - - if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { - if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) { - CDEBUG(D_HA, "marking %s %s->%s as inactive\n", - name, obddev->obd_name, - cli->cl_target_uuid.uuid); - spin_lock(&imp->imp_lock); - imp->imp_deactive = 1; - spin_unlock(&imp->imp_lock); - } - } - - obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name, - LDLM_NAMESPACE_CLIENT, - LDLM_NAMESPACE_GREEDY, - ns_type); - if (!obddev->obd_namespace) { - CERROR("Unable to create client namespace - %s\n", - obddev->obd_name); - rc = -ENOMEM; - goto err_import; - } - - return rc; - -err_import: - class_destroy_import(imp); -err_ldlm: - ldlm_put_ref(); -err: - kfree(cli->cl_mod_tag_bitmap); - cli->cl_mod_tag_bitmap = NULL; - return rc; -} -EXPORT_SYMBOL(client_obd_setup); - -int client_obd_cleanup(struct obd_device *obddev) -{ - struct client_obd *cli = &obddev->u.cli; - - ldlm_namespace_free_post(obddev->obd_namespace); - obddev->obd_namespace = NULL; - - obd_cleanup_client_import(obddev); - LASSERT(!obddev->u.cli.cl_import); - - ldlm_put_ref(); - - kfree(cli->cl_mod_tag_bitmap); - cli->cl_mod_tag_bitmap = NULL; - - return 0; -} -EXPORT_SYMBOL(client_obd_cleanup); - -/* ->o_connect() method for client side (OSC and MDC and MGC) */ -int client_connect_import(const struct lu_env *env, - struct obd_export **exp, - struct obd_device *obd, struct obd_uuid *cluuid, - struct obd_connect_data *data, void *localdata) -{ - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - struct obd_connect_data *ocd; - struct lustre_handle conn = { 0 }; - bool is_mdc = false; - int rc; - - *exp = NULL; - down_write(&cli->cl_sem); - if (cli->cl_conn_count > 0) { - rc = -EALREADY; - goto out_sem; - } - - rc = class_connect(&conn, obd, cluuid); - if (rc) - goto out_sem; - - cli->cl_conn_count++; - *exp = class_conn2export(&conn); - - LASSERT(obd->obd_namespace); - - imp->imp_dlm_handle = conn; - rc = ptlrpc_init_import(imp); - if (rc != 0) - goto out_ldlm; - - ocd = &imp->imp_connect_data; - if (data) { - *ocd = *data; - is_mdc = !strncmp(imp->imp_obd->obd_type->typ_name, - LUSTRE_MDC_NAME, 3); - if (is_mdc) - data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS; - imp->imp_connect_flags_orig = data->ocd_connect_flags; - } - - rc = ptlrpc_connect_import(imp); - if (rc != 0) { - if (data && is_mdc) - data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS; - LASSERT(imp->imp_state == LUSTRE_IMP_DISCON); - goto out_ldlm; - } - LASSERT(*exp && (*exp)->exp_connection); - - if (data) { - LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) == - ocd->ocd_connect_flags, "old %#llx, new %#llx\n", - data->ocd_connect_flags, ocd->ocd_connect_flags); - data->ocd_connect_flags = ocd->ocd_connect_flags; - /* clear the flag as it was not set and is not known - * by upper layers - */ - if (is_mdc) - data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS; - } - - ptlrpc_pinger_add_import(imp); - - if (rc) { -out_ldlm: - cli->cl_conn_count--; - class_disconnect(*exp); - *exp = NULL; - } -out_sem: - up_write(&cli->cl_sem); - - return rc; -} -EXPORT_SYMBOL(client_connect_import); - -int client_disconnect_export(struct obd_export *exp) -{ - struct obd_device *obd = class_exp2obd(exp); - struct client_obd *cli; - struct obd_import *imp; - int rc = 0, err; - - if (!obd) { - CERROR("invalid export for disconnect: exp %p cookie %#llx\n", - exp, exp ? exp->exp_handle.h_cookie : -1); - return -EINVAL; - } - - cli = &obd->u.cli; - imp = cli->cl_import; - - down_write(&cli->cl_sem); - CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name, - cli->cl_conn_count); - - if (!cli->cl_conn_count) { - CERROR("disconnecting disconnected device (%s)\n", - obd->obd_name); - rc = -EINVAL; - goto out_disconnect; - } - - cli->cl_conn_count--; - if (cli->cl_conn_count) { - rc = 0; - goto out_disconnect; - } - - /* Mark import deactivated now, so we don't try to reconnect if any - * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't - * fully deactivate the import, or that would drop all requests. - */ - spin_lock(&imp->imp_lock); - imp->imp_deactive = 1; - spin_unlock(&imp->imp_lock); - - /* Some non-replayable imports (MDS's OSCs) are pinged, so just - * delete it regardless. (It's safe to delete an import that was - * never added.) - */ - (void)ptlrpc_pinger_del_import(imp); - - if (obd->obd_namespace) { - /* obd_force == local only */ - ldlm_cli_cancel_unused(obd->obd_namespace, NULL, - obd->obd_force ? LCF_LOCAL : 0, NULL); - ldlm_namespace_free_prior(obd->obd_namespace, imp, - obd->obd_force); - } - - /* There's no need to hold sem while disconnecting an import, - * and it may actually cause deadlock in GSS. - */ - up_write(&cli->cl_sem); - rc = ptlrpc_disconnect_import(imp, 0); - down_write(&cli->cl_sem); - - ptlrpc_invalidate_import(imp); - -out_disconnect: - /* Use server style - class_disconnect should be always called for - * o_disconnect. - */ - err = class_disconnect(exp); - if (!rc && err) - rc = err; - - up_write(&cli->cl_sem); - - return rc; -} -EXPORT_SYMBOL(client_disconnect_export); - -/** - * Packs current SLV and Limit into \a req. - */ -int target_pack_pool_reply(struct ptlrpc_request *req) -{ - struct obd_device *obd; - - /* Check that we still have all structures alive as this may - * be some late RPC at shutdown time. - */ - if (unlikely(!req->rq_export || !req->rq_export->exp_obd || - !exp_connect_lru_resize(req->rq_export))) { - lustre_msg_set_slv(req->rq_repmsg, 0); - lustre_msg_set_limit(req->rq_repmsg, 0); - return 0; - } - - /* OBD is alive here as export is alive, which we checked above. */ - obd = req->rq_export->exp_obd; - - read_lock(&obd->obd_pool_lock); - lustre_msg_set_slv(req->rq_repmsg, obd->obd_pool_slv); - lustre_msg_set_limit(req->rq_repmsg, obd->obd_pool_limit); - read_unlock(&obd->obd_pool_lock); - - return 0; -} -EXPORT_SYMBOL(target_pack_pool_reply); - -static int -target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id) -{ - if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) { - DEBUG_REQ(D_ERROR, req, "dropping reply"); - return -ECOMM; - } - - if (unlikely(rc)) { - DEBUG_REQ(D_NET, req, "processing error (%d)", rc); - req->rq_status = rc; - return ptlrpc_send_error(req, 1); - } - - DEBUG_REQ(D_NET, req, "sending reply"); - return ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT); -} - -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) -{ - struct ptlrpc_service_part *svcpt; - int netrc; - struct ptlrpc_reply_state *rs; - struct obd_export *exp; - - if (req->rq_no_reply) - return; - - svcpt = req->rq_rqbd->rqbd_svcpt; - rs = req->rq_reply_state; - if (!rs || !rs->rs_difficult) { - /* no notifiers */ - target_send_reply_msg(req, rc, fail_id); - return; - } - - /* must be an export if locks saved */ - LASSERT(req->rq_export); - /* req/reply consistent */ - LASSERT(rs->rs_svcpt == svcpt); - - /* "fresh" reply */ - LASSERT(!rs->rs_scheduled); - LASSERT(!rs->rs_scheduled_ever); - LASSERT(!rs->rs_handled); - LASSERT(!rs->rs_on_net); - LASSERT(!rs->rs_export); - LASSERT(list_empty(&rs->rs_obd_list)); - LASSERT(list_empty(&rs->rs_exp_list)); - - exp = class_export_get(req->rq_export); - - /* disable reply scheduling while I'm setting up */ - rs->rs_scheduled = 1; - rs->rs_on_net = 1; - rs->rs_xid = req->rq_xid; - rs->rs_transno = req->rq_transno; - rs->rs_export = exp; - rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg); - - spin_lock(&exp->exp_uncommitted_replies_lock); - CDEBUG(D_NET, "rs transno = %llu, last committed = %llu\n", - rs->rs_transno, exp->exp_last_committed); - if (rs->rs_transno > exp->exp_last_committed) { - /* not committed already */ - list_add_tail(&rs->rs_obd_list, - &exp->exp_uncommitted_replies); - } - spin_unlock(&exp->exp_uncommitted_replies_lock); - - spin_lock(&exp->exp_lock); - list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies); - spin_unlock(&exp->exp_lock); - - netrc = target_send_reply_msg(req, rc, fail_id); - - spin_lock(&svcpt->scp_rep_lock); - - atomic_inc(&svcpt->scp_nreps_difficult); - - if (netrc != 0) { - /* error sending: reply is off the net. Also we need +1 - * reply ref until ptlrpc_handle_rs() is done - * with the reply state (if the send was successful, there - * would have been +1 ref for the net, which - * reply_out_callback leaves alone) - */ - rs->rs_on_net = 0; - ptlrpc_rs_addref(rs); - } - - spin_lock(&rs->rs_lock); - if (rs->rs_transno <= exp->exp_last_committed || - (!rs->rs_on_net && !rs->rs_no_ack) || - list_empty(&rs->rs_exp_list) || /* completed already */ - list_empty(&rs->rs_obd_list)) { - CDEBUG(D_HA, "Schedule reply immediately\n"); - ptlrpc_dispatch_difficult_reply(rs); - } else { - list_add(&rs->rs_list, &svcpt->scp_rep_active); - rs->rs_scheduled = 0; /* allow notifier to schedule */ - } - spin_unlock(&rs->rs_lock); - spin_unlock(&svcpt->scp_rep_lock); -} -EXPORT_SYMBOL(target_send_reply); - -enum ldlm_mode lck_compat_array[] = { - [LCK_EX] = LCK_COMPAT_EX, - [LCK_PW] = LCK_COMPAT_PW, - [LCK_PR] = LCK_COMPAT_PR, - [LCK_CW] = LCK_COMPAT_CW, - [LCK_CR] = LCK_COMPAT_CR, - [LCK_NL] = LCK_COMPAT_NL, - [LCK_GROUP] = LCK_COMPAT_GROUP, - [LCK_COS] = LCK_COMPAT_COS, -}; - -/** - * Rather arbitrary mapping from LDLM error codes to errno values. This should - * not escape to the user level. - */ -int ldlm_error2errno(enum ldlm_error error) -{ - int result; - - switch (error) { - case ELDLM_OK: - case ELDLM_LOCK_MATCHED: - result = 0; - break; - case ELDLM_LOCK_CHANGED: - result = -ESTALE; - break; - case ELDLM_LOCK_ABORTED: - result = -ENAVAIL; - break; - case ELDLM_LOCK_REPLACED: - result = -ESRCH; - break; - case ELDLM_NO_LOCK_DATA: - result = -ENOENT; - break; - case ELDLM_NAMESPACE_EXISTS: - result = -EEXIST; - break; - case ELDLM_BAD_NAMESPACE: - result = -EBADF; - break; - default: - if (((int)error) < 0) /* cast to signed type */ - result = error; /* as enum ldlm_error can be unsigned */ - else { - CERROR("Invalid DLM result code: %d\n", error); - result = -EPROTO; - } - } - return result; -} -EXPORT_SYMBOL(ldlm_error2errno); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS -void ldlm_dump_export_locks(struct obd_export *exp) -{ - spin_lock(&exp->exp_locks_list_guard); - if (!list_empty(&exp->exp_locks_list)) { - struct ldlm_lock *lock; - - CERROR("dumping locks for export %p,ignore if the unmount doesn't hang\n", - exp); - list_for_each_entry(lock, &exp->exp_locks_list, - l_exp_refs_link) - LDLM_ERROR(lock, "lock:"); - } - spin_unlock(&exp->exp_locks_list_guard); -} -#endif diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c deleted file mode 100644 index a644d133063b..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c +++ /dev/null @@ -1,2135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_lock.c - * - * Author: Peter Braam - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include "ldlm_internal.h" - -/* lock types */ -char *ldlm_lockname[] = { - [0] = "--", - [LCK_EX] = "EX", - [LCK_PW] = "PW", - [LCK_PR] = "PR", - [LCK_CW] = "CW", - [LCK_CR] = "CR", - [LCK_NL] = "NL", - [LCK_GROUP] = "GROUP", - [LCK_COS] = "COS", -}; -EXPORT_SYMBOL(ldlm_lockname); - -static char *ldlm_typename[] = { - [LDLM_PLAIN] = "PLN", - [LDLM_EXTENT] = "EXT", - [LDLM_FLOCK] = "FLK", - [LDLM_IBITS] = "IBT", -}; - -static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = { - [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_wire_to_local, - [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_wire_to_local, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_wire_to_local, - [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_wire_to_local, -}; - -static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = { - [LDLM_PLAIN - LDLM_MIN_TYPE] = ldlm_plain_policy_local_to_wire, - [LDLM_EXTENT - LDLM_MIN_TYPE] = ldlm_extent_policy_local_to_wire, - [LDLM_FLOCK - LDLM_MIN_TYPE] = ldlm_flock_policy_local_to_wire, - [LDLM_IBITS - LDLM_MIN_TYPE] = ldlm_ibits_policy_local_to_wire, -}; - -/** - * Converts lock policy from local format to on the wire lock_desc format - */ -static void ldlm_convert_policy_to_wire(enum ldlm_type type, - const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - ldlm_policy_local_to_wire_t convert; - - convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE]; - - convert(lpolicy, wpolicy); -} - -/** - * Converts lock policy from on the wire lock_desc format to local format - */ -void ldlm_convert_policy_to_local(struct obd_export *exp, enum ldlm_type type, - const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - ldlm_policy_wire_to_local_t convert; - - convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE]; - - convert(wpolicy, lpolicy); -} - -const char *ldlm_it2str(enum ldlm_intent_flags it) -{ - switch (it) { - case IT_OPEN: - return "open"; - case IT_CREAT: - return "creat"; - case (IT_OPEN | IT_CREAT): - return "open|creat"; - case IT_READDIR: - return "readdir"; - case IT_GETATTR: - return "getattr"; - case IT_LOOKUP: - return "lookup"; - case IT_UNLINK: - return "unlink"; - case IT_GETXATTR: - return "getxattr"; - case IT_LAYOUT: - return "layout"; - default: - CERROR("Unknown intent 0x%08x\n", it); - return "UNKNOWN"; - } -} -EXPORT_SYMBOL(ldlm_it2str); - -/* - * REFCOUNTED LOCK OBJECTS - */ - -/** - * Get a reference on a lock. - * - * Lock refcounts, during creation: - * - one special one for allocation, dec'd only once in destroy - * - one for being a lock that's in-use - * - one for the addref associated with a new lock - */ -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock) -{ - atomic_inc(&lock->l_refc); - return lock; -} -EXPORT_SYMBOL(ldlm_lock_get); - -/** - * Release lock reference. - * - * Also frees the lock if it was last reference. - */ -void ldlm_lock_put(struct ldlm_lock *lock) -{ - LASSERT(lock->l_resource != LP_POISON); - LASSERT(atomic_read(&lock->l_refc) > 0); - if (atomic_dec_and_test(&lock->l_refc)) { - struct ldlm_resource *res; - - LDLM_DEBUG(lock, - "final lock_put on destroyed lock, freeing it."); - - res = lock->l_resource; - LASSERT(ldlm_is_destroyed(lock)); - LASSERT(list_empty(&lock->l_res_link)); - LASSERT(list_empty(&lock->l_pending_chain)); - - lprocfs_counter_decr(ldlm_res_to_ns(res)->ns_stats, - LDLM_NSS_LOCKS); - lu_ref_del(&res->lr_reference, "lock", lock); - ldlm_resource_putref(res); - lock->l_resource = NULL; - if (lock->l_export) { - class_export_lock_put(lock->l_export, lock); - lock->l_export = NULL; - } - - kfree(lock->l_lvb_data); - - ldlm_interval_free(ldlm_interval_detach(lock)); - lu_ref_fini(&lock->l_reference); - OBD_FREE_RCU(lock, sizeof(*lock), &lock->l_handle); - } -} -EXPORT_SYMBOL(ldlm_lock_put); - -/** - * Removes LDLM lock \a lock from LRU. Assumes LRU is already locked. - */ -int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock) -{ - int rc = 0; - - if (!list_empty(&lock->l_lru)) { - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); - list_del_init(&lock->l_lru); - LASSERT(ns->ns_nr_unused > 0); - ns->ns_nr_unused--; - rc = 1; - } - return rc; -} - -/** - * Removes LDLM lock \a lock from LRU. Obtains the LRU lock first. - * - * If \a last_use is non-zero, it will remove the lock from LRU only if - * it matches lock's l_last_used. - * - * \retval 0 if \a last_use is set, the lock is not in LRU list or \a last_use - * doesn't match lock's l_last_used; - * otherwise, the lock hasn't been in the LRU list. - * \retval 1 the lock was in LRU list and removed. - */ -int ldlm_lock_remove_from_lru_check(struct ldlm_lock *lock, time_t last_use) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - int rc = 0; - - spin_lock(&ns->ns_lock); - if (last_use == 0 || last_use == lock->l_last_used) - rc = ldlm_lock_remove_from_lru_nolock(lock); - spin_unlock(&ns->ns_lock); - - return rc; -} - -/** - * Adds LDLM lock \a lock to namespace LRU. Assumes LRU is already locked. - */ -static void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - lock->l_last_used = jiffies; - LASSERT(list_empty(&lock->l_lru)); - LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); - list_add_tail(&lock->l_lru, &ns->ns_unused_list); - ldlm_clear_skipped(lock); - LASSERT(ns->ns_nr_unused >= 0); - ns->ns_nr_unused++; -} - -/** - * Adds LDLM lock \a lock to namespace LRU. Obtains necessary LRU locks - * first. - */ -static void ldlm_lock_add_to_lru(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - spin_lock(&ns->ns_lock); - ldlm_lock_add_to_lru_nolock(lock); - spin_unlock(&ns->ns_lock); -} - -/** - * Moves LDLM lock \a lock that is already in namespace LRU to the tail of - * the LRU. Performs necessary LRU locking - */ -static void ldlm_lock_touch_in_lru(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = ldlm_lock_to_ns(lock); - - spin_lock(&ns->ns_lock); - if (!list_empty(&lock->l_lru)) { - ldlm_lock_remove_from_lru_nolock(lock); - ldlm_lock_add_to_lru_nolock(lock); - } - spin_unlock(&ns->ns_lock); -} - -/** - * Helper to destroy a locked lock. - * - * Used by ldlm_lock_destroy and ldlm_lock_destroy_nolock - * Must be called with l_lock and lr_lock held. - * - * Does not actually free the lock data, but rather marks the lock as - * destroyed by setting l_destroyed field in the lock to 1. Destroys a - * handle->lock association too, so that the lock can no longer be found - * and removes the lock from LRU list. Actual lock freeing occurs when - * last lock reference goes away. - * - * Original comment (of some historical value): - * This used to have a 'strict' flag, which recovery would use to mark an - * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I - * shall explain why it's gone: with the new hash table scheme, once you call - * ldlm_lock_destroy, you can never drop your final references on this lock. - * Because it's not in the hash table anymore. -phil - */ -static int ldlm_lock_destroy_internal(struct ldlm_lock *lock) -{ - if (lock->l_readers || lock->l_writers) { - LDLM_ERROR(lock, "lock still has references"); - LBUG(); - } - - if (!list_empty(&lock->l_res_link)) { - LDLM_ERROR(lock, "lock still on resource"); - LBUG(); - } - - if (ldlm_is_destroyed(lock)) { - LASSERT(list_empty(&lock->l_lru)); - return 0; - } - ldlm_set_destroyed(lock); - - ldlm_lock_remove_from_lru(lock); - class_handle_unhash(&lock->l_handle); - - return 1; -} - -/** - * Destroys a LDLM lock \a lock. Performs necessary locking first. - */ -static void ldlm_lock_destroy(struct ldlm_lock *lock) -{ - int first; - - lock_res_and_lock(lock); - first = ldlm_lock_destroy_internal(lock); - unlock_res_and_lock(lock); - - /* drop reference from hashtable only for first destroy */ - if (first) { - lu_ref_del(&lock->l_reference, "hash", lock); - LDLM_LOCK_RELEASE(lock); - } -} - -/** - * Destroys a LDLM lock \a lock that is already locked. - */ -void ldlm_lock_destroy_nolock(struct ldlm_lock *lock) -{ - int first; - - first = ldlm_lock_destroy_internal(lock); - /* drop reference from hashtable only for first destroy */ - if (first) { - lu_ref_del(&lock->l_reference, "hash", lock); - LDLM_LOCK_RELEASE(lock); - } -} - -/* this is called by portals_handle2object with the handle lock taken */ -static void lock_handle_addref(void *lock) -{ - LDLM_LOCK_GET((struct ldlm_lock *)lock); -} - -static void lock_handle_free(void *lock, int size) -{ - LASSERT(size == sizeof(struct ldlm_lock)); - kmem_cache_free(ldlm_lock_slab, lock); -} - -static struct portals_handle_ops lock_handle_ops = { - .hop_addref = lock_handle_addref, - .hop_free = lock_handle_free, -}; - -/** - * - * Allocate and initialize new lock structure. - * - * usage: pass in a resource on which you have done ldlm_resource_get - * new lock will take over the refcount. - * returns: lock with refcount 2 - one for current caller and one for remote - */ -static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) -{ - struct ldlm_lock *lock; - - LASSERT(resource); - - lock = kmem_cache_zalloc(ldlm_lock_slab, GFP_NOFS); - if (!lock) - return NULL; - - spin_lock_init(&lock->l_lock); - lock->l_resource = resource; - lu_ref_add(&resource->lr_reference, "lock", lock); - - atomic_set(&lock->l_refc, 2); - INIT_LIST_HEAD(&lock->l_res_link); - INIT_LIST_HEAD(&lock->l_lru); - INIT_LIST_HEAD(&lock->l_pending_chain); - INIT_LIST_HEAD(&lock->l_bl_ast); - INIT_LIST_HEAD(&lock->l_cp_ast); - INIT_LIST_HEAD(&lock->l_rk_ast); - init_waitqueue_head(&lock->l_waitq); - lock->l_blocking_lock = NULL; - INIT_LIST_HEAD(&lock->l_sl_mode); - INIT_LIST_HEAD(&lock->l_sl_policy); - - lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats, - LDLM_NSS_LOCKS); - INIT_LIST_HEAD(&lock->l_handle.h_link); - class_handle_hash(&lock->l_handle, &lock_handle_ops); - - lu_ref_init(&lock->l_reference); - lu_ref_add(&lock->l_reference, "hash", lock); - lock->l_callback_timeout = 0; - -#if LUSTRE_TRACKS_LOCK_EXP_REFS - INIT_LIST_HEAD(&lock->l_exp_refs_link); - lock->l_exp_refs_nr = 0; - lock->l_exp_refs_target = NULL; -#endif - - return lock; -} - -/** - * Moves LDLM lock \a lock to another resource. - * This is used on client when server returns some other lock than requested - * (typically as a result of intent operation) - */ -int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, - const struct ldlm_res_id *new_resid) -{ - struct ldlm_resource *oldres = lock->l_resource; - struct ldlm_resource *newres; - int type; - - lock_res_and_lock(lock); - if (memcmp(new_resid, &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) == 0) { - /* Nothing to do */ - unlock_res_and_lock(lock); - return 0; - } - - LASSERT(new_resid->name[0] != 0); - - /* This function assumes that the lock isn't on any lists */ - LASSERT(list_empty(&lock->l_res_link)); - - type = oldres->lr_type; - unlock_res_and_lock(lock); - - newres = ldlm_resource_get(ns, NULL, new_resid, type, 1); - if (IS_ERR(newres)) - return PTR_ERR(newres); - - lu_ref_add(&newres->lr_reference, "lock", lock); - /* - * To flip the lock from the old to the new resource, lock, oldres and - * newres have to be locked. Resource spin-locks are nested within - * lock->l_lock, and are taken in the memory address order to avoid - * dead-locks. - */ - spin_lock(&lock->l_lock); - oldres = lock->l_resource; - if (oldres < newres) { - lock_res(oldres); - lock_res_nested(newres, LRT_NEW); - } else { - lock_res(newres); - lock_res_nested(oldres, LRT_NEW); - } - LASSERT(memcmp(new_resid, &oldres->lr_name, - sizeof(oldres->lr_name)) != 0); - lock->l_resource = newres; - unlock_res(oldres); - unlock_res_and_lock(lock); - - /* ...and the flowers are still standing! */ - lu_ref_del(&oldres->lr_reference, "lock", lock); - ldlm_resource_putref(oldres); - - return 0; -} - -/** \defgroup ldlm_handles LDLM HANDLES - * Ways to get hold of locks without any addresses. - * @{ - */ - -/** - * Fills in handle for LDLM lock \a lock into supplied \a lockh - * Does not take any references. - */ -void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh) -{ - lockh->cookie = lock->l_handle.h_cookie; -} -EXPORT_SYMBOL(ldlm_lock2handle); - -/** - * Obtain a lock reference by handle. - * - * if \a flags: atomically get the lock and set the flags. - * Return NULL if flag already set - */ -struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle, - __u64 flags) -{ - struct ldlm_lock *lock; - - LASSERT(handle); - - lock = class_handle2object(handle->cookie, NULL); - if (!lock) - return NULL; - - if (lock->l_export && lock->l_export->exp_failed) { - CDEBUG(D_INFO, "lock export failed: lock %p, exp %p\n", - lock, lock->l_export); - LDLM_LOCK_PUT(lock); - return NULL; - } - - /* It's unlikely but possible that someone marked the lock as - * destroyed after we did handle2object on it - */ - if (flags == 0 && !ldlm_is_destroyed(lock)) { - lu_ref_add(&lock->l_reference, "handle", current); - return lock; - } - - lock_res_and_lock(lock); - - LASSERT(lock->l_resource); - - lu_ref_add_atomic(&lock->l_reference, "handle", current); - if (unlikely(ldlm_is_destroyed(lock))) { - unlock_res_and_lock(lock); - CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock); - LDLM_LOCK_PUT(lock); - return NULL; - } - - if (flags) { - if (lock->l_flags & flags) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - return NULL; - } - - lock->l_flags |= flags; - } - - unlock_res_and_lock(lock); - return lock; -} -EXPORT_SYMBOL(__ldlm_handle2lock); -/** @} ldlm_handles */ - -/** - * Fill in "on the wire" representation for given LDLM lock into supplied - * lock descriptor \a desc structure. - */ -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc) -{ - ldlm_res2desc(lock->l_resource, &desc->l_resource); - desc->l_req_mode = lock->l_req_mode; - desc->l_granted_mode = lock->l_granted_mode; - ldlm_convert_policy_to_wire(lock->l_resource->lr_type, - &lock->l_policy_data, - &desc->l_policy_data); -} - -/** - * Add a lock to list of conflicting locks to send AST to. - * - * Only add if we have not sent a blocking AST to the lock yet. - */ -static void ldlm_add_bl_work_item(struct ldlm_lock *lock, struct ldlm_lock *new, - struct list_head *work_list) -{ - if (!ldlm_is_ast_sent(lock)) { - LDLM_DEBUG(lock, "lock incompatible; sending blocking AST."); - ldlm_set_ast_sent(lock); - /* If the enqueuing client said so, tell the AST recipient to - * discard dirty data, rather than writing back. - */ - if (ldlm_is_ast_discard_data(new)) - ldlm_set_discard_data(lock); - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, work_list); - LDLM_LOCK_GET(lock); - LASSERT(!lock->l_blocking_lock); - lock->l_blocking_lock = LDLM_LOCK_GET(new); - } -} - -/** - * Add a lock to list of just granted locks to send completion AST to. - */ -static void ldlm_add_cp_work_item(struct ldlm_lock *lock, - struct list_head *work_list) -{ - if (!ldlm_is_cp_reqd(lock)) { - ldlm_set_cp_reqd(lock); - LDLM_DEBUG(lock, "lock granted; sending completion AST."); - LASSERT(list_empty(&lock->l_cp_ast)); - list_add(&lock->l_cp_ast, work_list); - LDLM_LOCK_GET(lock); - } -} - -/** - * Aggregator function to add AST work items into a list. Determines - * what sort of an AST work needs to be done and calls the proper - * adding function. - * Must be called with lr_lock held. - */ -static void ldlm_add_ast_work_item(struct ldlm_lock *lock, - struct ldlm_lock *new, - struct list_head *work_list) -{ - check_res_locked(lock->l_resource); - if (new) - ldlm_add_bl_work_item(lock, new, work_list); - else - ldlm_add_cp_work_item(lock, work_list); -} - -/** - * Add specified reader/writer reference to LDLM lock with handle \a lockh. - * r/w reference type is determined by \a mode - * Calls ldlm_lock_addref_internal. - */ -void ldlm_lock_addref(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(lockh); - LASSERTF(lock, "Non-existing lock: %llx\n", lockh->cookie); - ldlm_lock_addref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_addref); - -/** - * Helper function. - * Add specified reader/writer reference to LDLM lock \a lock. - * r/w reference type is determined by \a mode - * Removes lock from LRU if it is there. - * Assumes the LDLM lock is already locked. - */ -void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode) -{ - ldlm_lock_remove_from_lru(lock); - if (mode & (LCK_NL | LCK_CR | LCK_PR)) { - lock->l_readers++; - lu_ref_add_atomic(&lock->l_reference, "reader", lock); - } - if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) { - lock->l_writers++; - lu_ref_add_atomic(&lock->l_reference, "writer", lock); - } - LDLM_LOCK_GET(lock); - lu_ref_add_atomic(&lock->l_reference, "user", lock); - LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); -} - -/** - * Attempts to add reader/writer reference to a lock with handle \a lockh, and - * fails if lock is already LDLM_FL_CBPENDING or destroyed. - * - * \retval 0 success, lock was addref-ed - * - * \retval -EAGAIN lock is being canceled. - */ -int ldlm_lock_addref_try(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock; - int result; - - result = -EAGAIN; - lock = ldlm_handle2lock(lockh); - if (lock) { - lock_res_and_lock(lock); - if (lock->l_readers != 0 || lock->l_writers != 0 || - !ldlm_is_cbpending(lock)) { - ldlm_lock_addref_internal_nolock(lock, mode); - result = 0; - } - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - return result; -} -EXPORT_SYMBOL(ldlm_lock_addref_try); - -/** - * Add specified reader/writer reference to LDLM lock \a lock. - * Locks LDLM lock and calls ldlm_lock_addref_internal_nolock to do the work. - * Only called for local locks. - */ -void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - lock_res_and_lock(lock); - ldlm_lock_addref_internal_nolock(lock, mode); - unlock_res_and_lock(lock); -} - -/** - * Removes reader/writer reference for LDLM lock \a lock. - * Assumes LDLM lock is already locked. - * only called in ldlm_flock_destroy and for local locks. - * Does NOT add lock to LRU if no r/w references left to accommodate flock locks - * that cannot be placed in LRU. - */ -void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock, - enum ldlm_mode mode) -{ - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - if (mode & (LCK_NL | LCK_CR | LCK_PR)) { - LASSERT(lock->l_readers > 0); - lu_ref_del(&lock->l_reference, "reader", lock); - lock->l_readers--; - } - if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) { - LASSERT(lock->l_writers > 0); - lu_ref_del(&lock->l_reference, "writer", lock); - lock->l_writers--; - } - - lu_ref_del(&lock->l_reference, "user", lock); - LDLM_LOCK_RELEASE(lock); /* matches the LDLM_LOCK_GET() in addref */ -} - -/** - * Removes reader/writer reference for LDLM lock \a lock. - * Locks LDLM lock first. - * If the lock is determined to be client lock on a client and r/w refcount - * drops to zero and the lock is not blocked, the lock is added to LRU lock - * on the namespace. - * For blocked LDLM locks if r/w count drops to zero, blocking_ast is called. - */ -void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) -{ - struct ldlm_namespace *ns; - - lock_res_and_lock(lock); - - ns = ldlm_lock_to_ns(lock); - - ldlm_lock_decref_internal_nolock(lock, mode); - - if ((ldlm_is_local(lock) || lock->l_req_mode == LCK_GROUP) && - !lock->l_readers && !lock->l_writers) { - /* If this is a local lock on a server namespace and this was - * the last reference, cancel the lock. - * - * Group locks are special: - * They must not go in LRU, but they are not called back - * like non-group locks, instead they are manually released. - * They have an l_writers reference which they keep until - * they are manually released, so we remove them when they have - * no more reader or writer references. - LU-6368 - */ - ldlm_set_cbpending(lock); - } - - if (!lock->l_readers && !lock->l_writers && ldlm_is_cbpending(lock)) { - /* If we received a blocked AST and this was the last reference, - * run the callback. - */ - LDLM_DEBUG(lock, "final decref done on cbpending lock"); - - LDLM_LOCK_GET(lock); /* dropped by bl thread */ - ldlm_lock_remove_from_lru(lock); - unlock_res_and_lock(lock); - - if (ldlm_is_fail_loc(lock)) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - if (ldlm_is_atomic_cb(lock) || - ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) - ldlm_handle_bl_callback(ns, NULL, lock); - } else if (!lock->l_readers && !lock->l_writers && - !ldlm_is_no_lru(lock) && !ldlm_is_bl_ast(lock)) { - LDLM_DEBUG(lock, "add lock into lru list"); - - /* If this is a client-side namespace and this was the last - * reference, put it on the LRU. - */ - ldlm_lock_add_to_lru(lock); - unlock_res_and_lock(lock); - - if (ldlm_is_fail_loc(lock)) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - /* Call ldlm_cancel_lru() only if EARLY_CANCEL and LRU RESIZE - * are not supported by the server, otherwise, it is done on - * enqueue. - */ - if (!exp_connect_cancelset(lock->l_conn_export) && - !ns_connect_lru_resize(ns)) - ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0); - } else { - LDLM_DEBUG(lock, "do not add lock into lru list"); - unlock_res_and_lock(lock); - } -} - -/** - * Decrease reader/writer refcount for LDLM lock with handle \a lockh - */ -void ldlm_lock_decref(const struct lustre_handle *lockh, enum ldlm_mode mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - - LASSERTF(lock, "Non-existing lock: %#llx\n", lockh->cookie); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_decref); - -/** - * Decrease reader/writer refcount for LDLM lock with handle - * \a lockh and mark it for subsequent cancellation once r/w refcount - * drops to zero instead of putting into LRU. - */ -void ldlm_lock_decref_and_cancel(const struct lustre_handle *lockh, - enum ldlm_mode mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - - LASSERT(lock); - - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - unlock_res_and_lock(lock); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_decref_and_cancel); - -struct sl_insert_point { - struct list_head *res_link; - struct list_head *mode_link; - struct list_head *policy_link; -}; - -/** - * Finds a position to insert the new lock into granted lock list. - * - * Used for locks eligible for skiplist optimization. - * - * Parameters: - * queue [input]: the granted list where search acts on; - * req [input]: the lock whose position to be located; - * prev [output]: positions within 3 lists to insert @req to - * Return Value: - * filled @prev - * NOTE: called by - * - ldlm_grant_lock_with_skiplist - */ -static void search_granted_lock(struct list_head *queue, - struct ldlm_lock *req, - struct sl_insert_point *prev) -{ - struct ldlm_lock *lock, *mode_end, *policy_end; - - list_for_each_entry(lock, queue, l_res_link) { - - mode_end = list_prev_entry(lock, l_sl_mode); - - if (lock->l_req_mode != req->l_req_mode) { - /* jump to last lock of mode group */ - lock = mode_end; - continue; - } - - /* suitable mode group is found */ - if (lock->l_resource->lr_type == LDLM_PLAIN) { - /* insert point is last lock of the mode group */ - prev->res_link = &mode_end->l_res_link; - prev->mode_link = &mode_end->l_sl_mode; - prev->policy_link = &req->l_sl_policy; - return; - } - - if (lock->l_resource->lr_type == LDLM_IBITS) { - for (;;) { - policy_end = - list_prev_entry(lock, l_sl_policy); - - if (lock->l_policy_data.l_inodebits.bits == - req->l_policy_data.l_inodebits.bits) { - /* insert point is last lock of - * the policy group - */ - prev->res_link = - &policy_end->l_res_link; - prev->mode_link = - &policy_end->l_sl_mode; - prev->policy_link = - &policy_end->l_sl_policy; - return; - } - - if (policy_end == mode_end) - /* done with mode group */ - break; - - /* go to next policy group within mode group */ - lock = list_next_entry(policy_end, l_res_link); - } /* loop over policy groups within the mode group */ - - /* insert point is last lock of the mode group, - * new policy group is started - */ - prev->res_link = &mode_end->l_res_link; - prev->mode_link = &mode_end->l_sl_mode; - prev->policy_link = &req->l_sl_policy; - return; - } - - LDLM_ERROR(lock, "is not LDLM_PLAIN or LDLM_IBITS lock"); - LBUG(); - } - - /* insert point is last lock on the queue, - * new mode group and new policy group are started - */ - prev->res_link = queue->prev; - prev->mode_link = &req->l_sl_mode; - prev->policy_link = &req->l_sl_policy; -} - -/** - * Add a lock into resource granted list after a position described by - * \a prev. - */ -static void ldlm_granted_list_add_lock(struct ldlm_lock *lock, - struct sl_insert_point *prev) -{ - struct ldlm_resource *res = lock->l_resource; - - check_res_locked(res); - - ldlm_resource_dump(D_INFO, res); - LDLM_DEBUG(lock, "About to add lock:"); - - if (ldlm_is_destroyed(lock)) { - CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); - return; - } - - LASSERT(list_empty(&lock->l_res_link)); - LASSERT(list_empty(&lock->l_sl_mode)); - LASSERT(list_empty(&lock->l_sl_policy)); - - /* - * lock->link == prev->link means lock is first starting the group. - * Don't re-add to itself to suppress kernel warnings. - */ - if (&lock->l_res_link != prev->res_link) - list_add(&lock->l_res_link, prev->res_link); - if (&lock->l_sl_mode != prev->mode_link) - list_add(&lock->l_sl_mode, prev->mode_link); - if (&lock->l_sl_policy != prev->policy_link) - list_add(&lock->l_sl_policy, prev->policy_link); -} - -/** - * Add a lock to granted list on a resource maintaining skiplist - * correctness. - */ -static void ldlm_grant_lock_with_skiplist(struct ldlm_lock *lock) -{ - struct sl_insert_point prev; - - LASSERT(lock->l_req_mode == lock->l_granted_mode); - - search_granted_lock(&lock->l_resource->lr_granted, lock, &prev); - ldlm_granted_list_add_lock(lock, &prev); -} - -/** - * Perform lock granting bookkeeping. - * - * Includes putting the lock into granted list and updating lock mode. - * NOTE: called by - * - ldlm_lock_enqueue - * - ldlm_reprocess_queue - * - ldlm_lock_convert - * - * must be called with lr_lock held - */ -void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list) -{ - struct ldlm_resource *res = lock->l_resource; - - check_res_locked(res); - - lock->l_granted_mode = lock->l_req_mode; - - if (work_list && lock->l_completion_ast) - ldlm_add_ast_work_item(lock, NULL, work_list); - - if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) { - ldlm_grant_lock_with_skiplist(lock); - } else if (res->lr_type == LDLM_EXTENT) { - ldlm_extent_add_lock(res, lock); - } else if (res->lr_type == LDLM_FLOCK) { - /* - * We should not add locks to granted list in - * the following cases: - * - this is an UNLOCK but not a real lock; - * - this is a TEST lock; - * - this is a F_CANCELLK lock (async flock has req_mode == 0) - * - this is a deadlock (flock cannot be granted) - */ - if (!lock->l_req_mode || lock->l_req_mode == LCK_NL || - ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock)) - return; - ldlm_resource_add_lock(res, &res->lr_granted, lock); - } else { - LBUG(); - } - - ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock); -} - -/** - * Describe the overlap between two locks. itree_overlap_cb data. - */ -struct lock_match_data { - struct ldlm_lock *lmd_old; - struct ldlm_lock *lmd_lock; - enum ldlm_mode *lmd_mode; - union ldlm_policy_data *lmd_policy; - __u64 lmd_flags; - int lmd_unref; -}; - -/** - * Check if the given @lock meets the criteria for a match. - * A reference on the lock is taken if matched. - * - * \param lock test-against this lock - * \param data parameters - */ -static int lock_matches(struct ldlm_lock *lock, struct lock_match_data *data) -{ - union ldlm_policy_data *lpol = &lock->l_policy_data; - enum ldlm_mode match; - - if (lock == data->lmd_old) - return INTERVAL_ITER_STOP; - - /* - * Check if this lock can be matched. - * Used by LU-2919(exclusive open) for open lease lock - */ - if (ldlm_is_excl(lock)) - return INTERVAL_ITER_CONT; - - /* - * llite sometimes wants to match locks that will be - * canceled when their users drop, but we allow it to match - * if it passes in CBPENDING and the lock still has users. - * this is generally only going to be used by children - * whose parents already hold a lock so forward progress - * can still happen. - */ - if (ldlm_is_cbpending(lock) && - !(data->lmd_flags & LDLM_FL_CBPENDING)) - return INTERVAL_ITER_CONT; - - if (!data->lmd_unref && ldlm_is_cbpending(lock) && - !lock->l_readers && !lock->l_writers) - return INTERVAL_ITER_CONT; - - if (!(lock->l_req_mode & *data->lmd_mode)) - return INTERVAL_ITER_CONT; - match = lock->l_req_mode; - - switch (lock->l_resource->lr_type) { - case LDLM_EXTENT: - if (lpol->l_extent.start > data->lmd_policy->l_extent.start || - lpol->l_extent.end < data->lmd_policy->l_extent.end) - return INTERVAL_ITER_CONT; - - if (unlikely(match == LCK_GROUP) && - data->lmd_policy->l_extent.gid != LDLM_GID_ANY && - lpol->l_extent.gid != data->lmd_policy->l_extent.gid) - return INTERVAL_ITER_CONT; - break; - case LDLM_IBITS: - /* - * We match if we have existing lock with same or wider set - * of bits. - */ - if ((lpol->l_inodebits.bits & - data->lmd_policy->l_inodebits.bits) != - data->lmd_policy->l_inodebits.bits) - return INTERVAL_ITER_CONT; - break; - default: - break; - } - /* - * We match if we have existing lock with same or wider set - * of bits. - */ - if (!data->lmd_unref && LDLM_HAVE_MASK(lock, GONE)) - return INTERVAL_ITER_CONT; - - if (!equi(data->lmd_flags & LDLM_FL_LOCAL_ONLY, ldlm_is_local(lock))) - return INTERVAL_ITER_CONT; - - if (data->lmd_flags & LDLM_FL_TEST_LOCK) { - LDLM_LOCK_GET(lock); - ldlm_lock_touch_in_lru(lock); - } else { - ldlm_lock_addref_internal_nolock(lock, match); - } - - *data->lmd_mode = match; - data->lmd_lock = lock; - - return INTERVAL_ITER_STOP; -} - -static enum interval_iter itree_overlap_cb(struct interval_node *in, void *args) -{ - struct ldlm_interval *node = to_ldlm_interval(in); - struct lock_match_data *data = args; - struct ldlm_lock *lock; - int rc; - - list_for_each_entry(lock, &node->li_group, l_sl_policy) { - rc = lock_matches(lock, data); - if (rc == INTERVAL_ITER_STOP) - return INTERVAL_ITER_STOP; - } - return INTERVAL_ITER_CONT; -} - -/** - * Search for a lock with given parameters in interval trees. - * - * \param res search for a lock in this resource - * \param data parameters - * - * \retval a referenced lock or NULL. - */ -static struct ldlm_lock *search_itree(struct ldlm_resource *res, - struct lock_match_data *data) -{ - struct interval_node_extent ext = { - .start = data->lmd_policy->l_extent.start, - .end = data->lmd_policy->l_extent.end - }; - int idx; - - for (idx = 0; idx < LCK_MODE_NUM; idx++) { - struct ldlm_interval_tree *tree = &res->lr_itree[idx]; - - if (!tree->lit_root) - continue; - - if (!(tree->lit_mode & *data->lmd_mode)) - continue; - - interval_search(tree->lit_root, &ext, - itree_overlap_cb, data); - } - return data->lmd_lock; -} - -/** - * Search for a lock with given properties in a queue. - * - * \param queue search for a lock in this queue - * \param data parameters - * - * \retval a referenced lock or NULL. - */ -static struct ldlm_lock *search_queue(struct list_head *queue, - struct lock_match_data *data) -{ - struct ldlm_lock *lock; - int rc; - - list_for_each_entry(lock, queue, l_res_link) { - rc = lock_matches(lock, data); - if (rc == INTERVAL_ITER_STOP) - return data->lmd_lock; - } - return NULL; -} - -void ldlm_lock_fail_match_locked(struct ldlm_lock *lock) -{ - if ((lock->l_flags & LDLM_FL_FAIL_NOTIFIED) == 0) { - lock->l_flags |= LDLM_FL_FAIL_NOTIFIED; - wake_up_all(&lock->l_waitq); - } -} - -/** - * Mark lock as "matchable" by OST. - * - * Used to prevent certain races in LOV/OSC where the lock is granted, but LVB - * is not yet valid. - * Assumes LDLM lock is already locked. - */ -void ldlm_lock_allow_match_locked(struct ldlm_lock *lock) -{ - ldlm_set_lvb_ready(lock); - wake_up_all(&lock->l_waitq); -} -EXPORT_SYMBOL(ldlm_lock_allow_match_locked); - -/** - * Mark lock as "matchable" by OST. - * Locks the lock and then \see ldlm_lock_allow_match_locked - */ -void ldlm_lock_allow_match(struct ldlm_lock *lock) -{ - lock_res_and_lock(lock); - ldlm_lock_allow_match_locked(lock); - unlock_res_and_lock(lock); -} -EXPORT_SYMBOL(ldlm_lock_allow_match); - -/** - * Attempt to find a lock with specified properties. - * - * Typically returns a reference to matched lock unless LDLM_FL_TEST_LOCK is - * set in \a flags - * - * Can be called in two ways: - * - * If 'ns' is NULL, then lockh describes an existing lock that we want to look - * for a duplicate of. - * - * Otherwise, all of the fields must be filled in, to match against. - * - * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the - * server (ie, connh is NULL) - * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted - * list will be considered - * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked - * to be canceled can still be matched as long as they still have reader - * or writer referneces - * If 'flags' contains LDLM_FL_TEST_LOCK, then don't actually reference a lock, - * just tell us if we would have matched. - * - * \retval 1 if it finds an already-existing lock that is compatible; in this - * case, lockh is filled in with a addref()ed lock - * - * We also check security context, and if that fails we simply return 0 (to - * keep caller code unchanged), the context failure will be discovered by - * caller sometime later. - */ -enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags, - const struct ldlm_res_id *res_id, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh, int unref) -{ - struct lock_match_data data = { - .lmd_old = NULL, - .lmd_lock = NULL, - .lmd_mode = &mode, - .lmd_policy = policy, - .lmd_flags = flags, - .lmd_unref = unref, - }; - struct ldlm_resource *res; - struct ldlm_lock *lock; - int rc = 0; - - if (!ns) { - data.lmd_old = ldlm_handle2lock(lockh); - LASSERT(data.lmd_old); - - ns = ldlm_lock_to_ns(data.lmd_old); - res_id = &data.lmd_old->l_resource->lr_name; - type = data.lmd_old->l_resource->lr_type; - *data.lmd_mode = data.lmd_old->l_req_mode; - } - - res = ldlm_resource_get(ns, NULL, res_id, type, 0); - if (IS_ERR(res)) { - LASSERT(!data.lmd_old); - return 0; - } - - LDLM_RESOURCE_ADDREF(res); - lock_res(res); - - if (res->lr_type == LDLM_EXTENT) - lock = search_itree(res, &data); - else - lock = search_queue(&res->lr_granted, &data); - if (lock) { - rc = 1; - goto out; - } - if (flags & LDLM_FL_BLOCK_GRANTED) { - rc = 0; - goto out; - } - lock = search_queue(&res->lr_waiting, &data); - if (lock) { - rc = 1; - goto out; - } -out: - unlock_res(res); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - - if (lock) { - ldlm_lock2handle(lock, lockh); - if ((flags & LDLM_FL_LVB_READY) && !ldlm_is_lvb_ready(lock)) { - __u64 wait_flags = LDLM_FL_LVB_READY | - LDLM_FL_DESTROYED | LDLM_FL_FAIL_NOTIFIED; - - if (lock->l_completion_ast) { - int err = lock->l_completion_ast(lock, - LDLM_FL_WAIT_NOREPROC, - NULL); - if (err) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, - mode); - rc = 0; - goto out2; - } - } - - /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */ - wait_event_idle_timeout(lock->l_waitq, - lock->l_flags & wait_flags, - obd_timeout * HZ); - if (!ldlm_is_lvb_ready(lock)) { - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - else - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } - } - } - out2: - if (rc) { - LDLM_DEBUG(lock, "matched (%llu %llu)", - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] : policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - - /* check user's security context */ - if (lock->l_conn_export && - sptlrpc_import_check_ctx( - class_exp2cliimp(lock->l_conn_export))) { - if (!(flags & LDLM_FL_TEST_LOCK)) - ldlm_lock_decref_internal(lock, mode); - rc = 0; - } - - if (flags & LDLM_FL_TEST_LOCK) - LDLM_LOCK_RELEASE(lock); - - } else if (!(flags & LDLM_FL_TEST_LOCK)) {/*less verbose for test-only*/ - LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res %llu/%llu (%llu %llu)", - ns, type, mode, res_id->name[0], - res_id->name[1], - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[2] : policy->l_extent.start, - (type == LDLM_PLAIN || type == LDLM_IBITS) ? - res_id->name[3] : policy->l_extent.end); - } - if (data.lmd_old) - LDLM_LOCK_PUT(data.lmd_old); - - return rc ? mode : 0; -} -EXPORT_SYMBOL(ldlm_lock_match); - -enum ldlm_mode ldlm_revalidate_lock_handle(const struct lustre_handle *lockh, - __u64 *bits) -{ - struct ldlm_lock *lock; - enum ldlm_mode mode = 0; - - lock = ldlm_handle2lock(lockh); - if (lock) { - lock_res_and_lock(lock); - if (LDLM_HAVE_MASK(lock, GONE)) - goto out; - - if (ldlm_is_cbpending(lock) && - lock->l_readers == 0 && lock->l_writers == 0) - goto out; - - if (bits) - *bits = lock->l_policy_data.l_inodebits.bits; - mode = lock->l_granted_mode; - ldlm_lock_addref_internal_nolock(lock, mode); - } - -out: - if (lock) { - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - return mode; -} -EXPORT_SYMBOL(ldlm_revalidate_lock_handle); - -/** The caller must guarantee that the buffer is large enough. */ -int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill, - enum req_location loc, void *data, int size) -{ - void *lvb; - - LASSERT(data); - LASSERT(size >= 0); - - switch (lock->l_lvb_type) { - case LVB_T_OST: - if (size == sizeof(struct ost_lvb)) { - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb); - else - lvb = req_capsule_server_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - } else if (size == sizeof(struct ost_lvb_v1)) { - struct ost_lvb *olvb = data; - - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_ost_lvb_v1); - else - lvb = req_capsule_server_sized_swab_get(pill, - &RMF_DLM_LVB, size, - lustre_swab_ost_lvb_v1); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - olvb->lvb_mtime_ns = 0; - olvb->lvb_atime_ns = 0; - olvb->lvb_ctime_ns = 0; - } else { - LDLM_ERROR(lock, "Replied unexpected ost LVB size %d", - size); - return -EINVAL; - } - break; - case LVB_T_LQUOTA: - if (size == sizeof(struct lquota_lvb)) { - if (loc == RCL_CLIENT) - lvb = req_capsule_client_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_lquota_lvb); - else - lvb = req_capsule_server_swab_get(pill, - &RMF_DLM_LVB, - lustre_swab_lquota_lvb); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - } else { - LDLM_ERROR(lock, - "Replied unexpected lquota LVB size %d", - size); - return -EINVAL; - } - break; - case LVB_T_LAYOUT: - if (size == 0) - break; - - if (loc == RCL_CLIENT) - lvb = req_capsule_client_get(pill, &RMF_DLM_LVB); - else - lvb = req_capsule_server_get(pill, &RMF_DLM_LVB); - if (unlikely(!lvb)) { - LDLM_ERROR(lock, "no LVB"); - return -EPROTO; - } - - memcpy(data, lvb, size); - break; - default: - LDLM_ERROR(lock, "Unknown LVB type: %d", lock->l_lvb_type); - dump_stack(); - return -EINVAL; - } - - return 0; -} - -/** - * Create and fill in new LDLM lock with specified properties. - * Returns a referenced lock - */ -struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - enum ldlm_type type, - enum ldlm_mode mode, - const struct ldlm_callback_suite *cbs, - void *data, __u32 lvb_len, - enum lvb_type lvb_type) -{ - struct ldlm_lock *lock; - struct ldlm_resource *res; - int rc; - - res = ldlm_resource_get(ns, NULL, res_id, type, 1); - if (IS_ERR(res)) - return ERR_CAST(res); - - lock = ldlm_lock_new(res); - if (!lock) { - ldlm_resource_putref(res); - return ERR_PTR(-ENOMEM); - } - - lock->l_req_mode = mode; - lock->l_ast_data = data; - lock->l_pid = current->pid; - if (cbs) { - lock->l_blocking_ast = cbs->lcs_blocking; - lock->l_completion_ast = cbs->lcs_completion; - lock->l_glimpse_ast = cbs->lcs_glimpse; - } - - lock->l_tree_node = NULL; - /* if this is the extent lock, allocate the interval tree node */ - if (type == LDLM_EXTENT) { - if (!ldlm_interval_alloc(lock)) { - rc = -ENOMEM; - goto out; - } - } - - if (lvb_len) { - lock->l_lvb_len = lvb_len; - lock->l_lvb_data = kzalloc(lvb_len, GFP_NOFS); - if (!lock->l_lvb_data) { - rc = -ENOMEM; - goto out; - } - } - - lock->l_lvb_type = lvb_type; - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK)) { - rc = -ENOENT; - goto out; - } - - return lock; - -out: - ldlm_lock_destroy(lock); - LDLM_LOCK_RELEASE(lock); - return ERR_PTR(rc); -} - - - -/** - * Enqueue (request) a lock. - * On the client this is called from ldlm_cli_enqueue_fini - * after we already got an initial reply from the server with some status. - * - * Does not block. As a result of enqueue the lock would be put - * into granted or waiting list. - */ -enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns, - struct ldlm_lock **lockp, - void *cookie, __u64 *flags) -{ - struct ldlm_lock *lock = *lockp; - struct ldlm_resource *res = lock->l_resource; - - lock_res_and_lock(lock); - if (lock->l_req_mode == lock->l_granted_mode) { - /* The server returned a blocked lock, but it was granted - * before we got a chance to actually enqueue it. We don't - * need to do anything else. - */ - *flags &= ~LDLM_FL_BLOCKED_MASK; - goto out; - } - - ldlm_resource_unlink_lock(lock); - - /* Cannot happen unless on the server */ - if (res->lr_type == LDLM_EXTENT && !lock->l_tree_node) - LBUG(); - - /* Some flags from the enqueue want to make it into the AST, via the - * lock's l_flags. - */ - if (*flags & LDLM_FL_AST_DISCARD_DATA) - ldlm_set_ast_discard_data(lock); - if (*flags & LDLM_FL_TEST_LOCK) - ldlm_set_test_lock(lock); - - /* - * This distinction between local lock trees is very important; a client - * namespace only has information about locks taken by that client, and - * thus doesn't have enough information to decide for itself if it can - * be granted (below). In this case, we do exactly what the server - * tells us to do, as dictated by the 'flags'. - */ - if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED)) - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - else - ldlm_grant_lock(lock, NULL); - -out: - unlock_res_and_lock(lock); - return ELDLM_OK; -} - -/** - * Process a call to blocking AST callback for a lock in ast_work list - */ -static int -ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_lock_desc d; - int rc; - struct ldlm_lock *lock; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_bl_ast); - - /* nobody should touch l_bl_ast */ - lock_res_and_lock(lock); - list_del_init(&lock->l_bl_ast); - - LASSERT(ldlm_is_ast_sent(lock)); - LASSERT(lock->l_bl_ast_run == 0); - LASSERT(lock->l_blocking_lock); - lock->l_bl_ast_run++; - unlock_res_and_lock(lock); - - ldlm_lock2desc(lock->l_blocking_lock, &d); - - rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING); - LDLM_LOCK_RELEASE(lock->l_blocking_lock); - lock->l_blocking_lock = NULL; - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to completion AST callback for a lock in ast_work list - */ -static int -ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - int rc = 0; - struct ldlm_lock *lock; - ldlm_completion_callback completion_callback; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_cp_ast); - - /* It's possible to receive a completion AST before we've set - * the l_completion_ast pointer: either because the AST arrived - * before the reply, or simply because there's a small race - * window between receiving the reply and finishing the local - * enqueue. (bug 842) - * - * This can't happen with the blocking_ast, however, because we - * will never call the local blocking_ast until we drop our - * reader/writer reference, which we won't do until we get the - * reply and finish enqueueing. - */ - - /* nobody should touch l_cp_ast */ - lock_res_and_lock(lock); - list_del_init(&lock->l_cp_ast); - LASSERT(ldlm_is_cp_reqd(lock)); - /* save l_completion_ast since it can be changed by - * mds_intent_policy(), see bug 14225 - */ - completion_callback = lock->l_completion_ast; - ldlm_clear_cp_reqd(lock); - unlock_res_and_lock(lock); - - if (completion_callback) - rc = completion_callback(lock, 0, (void *)arg); - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to revocation AST callback for a lock in ast_work list - */ -static int -ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_lock_desc desc; - int rc; - struct ldlm_lock *lock; - - if (list_empty(arg->list)) - return -ENOENT; - - lock = list_first_entry(arg->list, struct ldlm_lock, l_rk_ast); - list_del_init(&lock->l_rk_ast); - - /* the desc just pretend to exclusive */ - ldlm_lock2desc(lock, &desc); - desc.l_req_mode = LCK_EX; - desc.l_granted_mode = 0; - - rc = lock->l_blocking_ast(lock, &desc, (void *)arg, LDLM_CB_BLOCKING); - LDLM_LOCK_RELEASE(lock); - - return rc; -} - -/** - * Process a call to glimpse AST callback for a lock in ast_work list - */ -static int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq) -{ - struct ldlm_cb_set_arg *arg = opaq; - struct ldlm_glimpse_work *gl_work; - struct ldlm_lock *lock; - int rc = 0; - - if (list_empty(arg->list)) - return -ENOENT; - - gl_work = list_first_entry(arg->list, struct ldlm_glimpse_work, - gl_list); - list_del_init(&gl_work->gl_list); - - lock = gl_work->gl_lock; - - /* transfer the glimpse descriptor to ldlm_cb_set_arg */ - arg->gl_desc = gl_work->gl_desc; - - /* invoke the actual glimpse callback */ - if (lock->l_glimpse_ast(lock, (void *)arg) == 0) - rc = 1; - - LDLM_LOCK_RELEASE(lock); - - if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0) - kfree(gl_work); - - return rc; -} - -/** - * Process list of locks in need of ASTs being sent. - * - * Used on server to send multiple ASTs together instead of sending one by - * one. - */ -int ldlm_run_ast_work(struct ldlm_namespace *ns, struct list_head *rpc_list, - enum ldlm_desc_ast_t ast_type) -{ - struct ldlm_cb_set_arg *arg; - set_producer_func work_ast_lock; - int rc; - - if (list_empty(rpc_list)) - return 0; - - arg = kzalloc(sizeof(*arg), GFP_NOFS); - if (!arg) - return -ENOMEM; - - atomic_set(&arg->restart, 0); - arg->list = rpc_list; - - switch (ast_type) { - case LDLM_WORK_BL_AST: - arg->type = LDLM_BL_CALLBACK; - work_ast_lock = ldlm_work_bl_ast_lock; - break; - case LDLM_WORK_CP_AST: - arg->type = LDLM_CP_CALLBACK; - work_ast_lock = ldlm_work_cp_ast_lock; - break; - case LDLM_WORK_REVOKE_AST: - arg->type = LDLM_BL_CALLBACK; - work_ast_lock = ldlm_work_revoke_ast_lock; - break; - case LDLM_WORK_GL_AST: - arg->type = LDLM_GL_CALLBACK; - work_ast_lock = ldlm_work_gl_ast_lock; - break; - default: - LBUG(); - } - - /* We create a ptlrpc request set with flow control extension. - * This request set will use the work_ast_lock function to produce new - * requests and will send a new request each time one completes in order - * to keep the number of requests in flight to ns_max_parallel_ast - */ - arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX, - work_ast_lock, arg); - if (!arg->set) { - rc = -ENOMEM; - goto out; - } - - ptlrpc_set_wait(arg->set); - ptlrpc_set_destroy(arg->set); - - rc = atomic_read(&arg->restart) ? -ERESTART : 0; - goto out; -out: - kfree(arg); - return rc; -} - -static bool is_bl_done(struct ldlm_lock *lock) -{ - bool bl_done = true; - - if (!ldlm_is_bl_done(lock)) { - lock_res_and_lock(lock); - bl_done = ldlm_is_bl_done(lock); - unlock_res_and_lock(lock); - } - - return bl_done; -} - -/** - * Helper function to call blocking AST for LDLM lock \a lock in a - * "cancelling" mode. - */ -void ldlm_cancel_callback(struct ldlm_lock *lock) -{ - check_res_locked(lock->l_resource); - if (!ldlm_is_cancel(lock)) { - ldlm_set_cancel(lock); - if (lock->l_blocking_ast) { - unlock_res_and_lock(lock); - lock->l_blocking_ast(lock, NULL, lock->l_ast_data, - LDLM_CB_CANCELING); - lock_res_and_lock(lock); - } else { - LDLM_DEBUG(lock, "no blocking ast"); - } - /* only canceller can set bl_done bit */ - ldlm_set_bl_done(lock); - wake_up_all(&lock->l_waitq); - } else if (!ldlm_is_bl_done(lock)) { - /* - * The lock is guaranteed to have been canceled once - * returning from this function. - */ - unlock_res_and_lock(lock); - wait_event_idle(lock->l_waitq, is_bl_done(lock)); - lock_res_and_lock(lock); - } -} - -/** - * Remove skiplist-enabled LDLM lock \a req from granted list - */ -void ldlm_unlink_lock_skiplist(struct ldlm_lock *req) -{ - if (req->l_resource->lr_type != LDLM_PLAIN && - req->l_resource->lr_type != LDLM_IBITS) - return; - - list_del_init(&req->l_sl_policy); - list_del_init(&req->l_sl_mode); -} - -/** - * Attempts to cancel LDLM lock \a lock that has no reader/writer references. - */ -void ldlm_lock_cancel(struct ldlm_lock *lock) -{ - struct ldlm_resource *res; - struct ldlm_namespace *ns; - - lock_res_and_lock(lock); - - res = lock->l_resource; - ns = ldlm_res_to_ns(res); - - /* Please do not, no matter how tempting, remove this LBUG without - * talking to me first. -phik - */ - if (lock->l_readers || lock->l_writers) { - LDLM_ERROR(lock, "lock still has references"); - LBUG(); - } - - /* Releases cancel callback. */ - ldlm_cancel_callback(lock); - - ldlm_resource_unlink_lock(lock); - ldlm_lock_destroy_nolock(lock); - - if (lock->l_granted_mode == lock->l_req_mode) - ldlm_pool_del(&ns->ns_pool, lock); - - /* Make sure we will not be called again for same lock what is possible - * if not to zero out lock->l_granted_mode - */ - lock->l_granted_mode = LCK_MINMODE; - unlock_res_and_lock(lock); -} -EXPORT_SYMBOL(ldlm_lock_cancel); - -/** - * Set opaque data into the lock that only makes sense to upper layer. - */ -int ldlm_lock_set_data(const struct lustre_handle *lockh, void *data) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - int rc = -EINVAL; - - if (lock) { - if (!lock->l_ast_data) - lock->l_ast_data = data; - if (lock->l_ast_data == data) - rc = 0; - LDLM_LOCK_PUT(lock); - } - return rc; -} -EXPORT_SYMBOL(ldlm_lock_set_data); - -struct export_cl_data { - struct obd_export *ecl_exp; - int ecl_loop; -}; - -/** - * Print lock with lock handle \a lockh description into debug log. - * - * Used when printing all locks on a resource for debug purposes. - */ -void ldlm_lock_dump_handle(int level, const struct lustre_handle *lockh) -{ - struct ldlm_lock *lock; - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - lock = ldlm_handle2lock(lockh); - if (!lock) - return; - - LDLM_DEBUG_LIMIT(level, lock, "###"); - - LDLM_LOCK_PUT(lock); -} -EXPORT_SYMBOL(ldlm_lock_dump_handle); - -/** - * Print lock information with custom message into debug log. - * Helper function. - */ -void _ldlm_lock_debug(struct ldlm_lock *lock, - struct libcfs_debug_msg_data *msgdata, - const char *fmt, ...) -{ - va_list args; - struct obd_export *exp = lock->l_export; - struct ldlm_resource *resource = lock->l_resource; - char *nid = "local"; - - va_start(args, fmt); - - if (exp && exp->exp_connection) { - nid = libcfs_nid2str(exp->exp_connection->c_peer.nid); - } else if (exp && exp->exp_obd) { - struct obd_import *imp = exp->exp_obd->u.cli.cl_import; - - nid = libcfs_nid2str(imp->imp_connection->c_peer.nid); - } - - if (!resource) { - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: \?\? lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: \?\? rrc=\?\? type: \?\?\? flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - va_end(args); - return; - } - - switch (resource->lr_type) { - case LDLM_EXTENT: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s [%llu->%llu] (req %llu->%llu) flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_extent.start, - lock->l_policy_data.l_extent.end, - lock->l_req_extent.start, - lock->l_req_extent.end, - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - - case LDLM_FLOCK: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s pid: %d [%llu->%llu] flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_flock.pid, - lock->l_policy_data.l_flock.start, - lock->l_policy_data.l_flock.end, - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout); - break; - - case LDLM_IBITS: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - lock->l_policy_data.l_inodebits.bits, - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - - default: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " rrc: %d type: %s flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - PLDLMRES(resource), - atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, - lock->l_remote_handle.cookie, - exp ? atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, - lock->l_lvb_type); - break; - } - va_end(args); -} -EXPORT_SYMBOL(_ldlm_lock_debug); diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c deleted file mode 100644 index 5963e90d0938..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ /dev/null @@ -1,1163 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_lockd.c - * - * Author: Peter Braam - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include -#include -#include "ldlm_internal.h" - -static int ldlm_num_threads; -module_param(ldlm_num_threads, int, 0444); -MODULE_PARM_DESC(ldlm_num_threads, "number of DLM service threads to start"); - -static char *ldlm_cpts; -module_param(ldlm_cpts, charp, 0444); -MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on"); - -static struct mutex ldlm_ref_mutex; -static int ldlm_refcount; - -static struct kobject *ldlm_kobj; -struct kset *ldlm_ns_kset; -static struct kset *ldlm_svc_kset; - -struct ldlm_cb_async_args { - struct ldlm_cb_set_arg *ca_set_arg; - struct ldlm_lock *ca_lock; -}; - -/* LDLM state */ - -static struct ldlm_state *ldlm_state; - -#define ELT_STOPPED 0 -#define ELT_READY 1 -#define ELT_TERMINATE 2 - -struct ldlm_bl_pool { - spinlock_t blp_lock; - - /* - * blp_prio_list is used for callbacks that should be handled - * as a priority. It is used for LDLM_FL_DISCARD_DATA requests. - * see bug 13843 - */ - struct list_head blp_prio_list; - - /* - * blp_list is used for all other callbacks which are likely - * to take longer to process. - */ - struct list_head blp_list; - - wait_queue_head_t blp_waitq; - struct completion blp_comp; - atomic_t blp_num_threads; - atomic_t blp_busy_threads; - int blp_min_threads; - int blp_max_threads; -}; - -struct ldlm_bl_work_item { - struct list_head blwi_entry; - struct ldlm_namespace *blwi_ns; - struct ldlm_lock_desc blwi_ld; - struct ldlm_lock *blwi_lock; - struct list_head blwi_head; - int blwi_count; - struct completion blwi_comp; - enum ldlm_cancel_flags blwi_flags; - int blwi_mem_pressure; -}; - -/** - * Callback handler for receiving incoming blocking ASTs. - * - * This can only happen on client side. - */ -void ldlm_handle_bl_callback(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, struct ldlm_lock *lock) -{ - int do_ast; - - LDLM_DEBUG(lock, "client blocking AST callback handler"); - - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - - if (ldlm_is_cancel_on_block(lock)) - ldlm_set_cancel(lock); - - do_ast = !lock->l_readers && !lock->l_writers; - unlock_res_and_lock(lock); - - if (do_ast) { - CDEBUG(D_DLMTRACE, - "Lock %p already unused, calling callback (%p)\n", lock, - lock->l_blocking_ast); - if (lock->l_blocking_ast) - lock->l_blocking_ast(lock, ld, lock->l_ast_data, - LDLM_CB_BLOCKING); - } else { - CDEBUG(D_DLMTRACE, - "Lock %p is referenced, will be cancelled later\n", - lock); - } - - LDLM_DEBUG(lock, "client blocking callback handler END"); - LDLM_LOCK_RELEASE(lock); -} - -/** - * Callback handler for receiving incoming completion ASTs. - * - * This only can happen on client side. - */ -static void ldlm_handle_cp_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - int lvb_len; - LIST_HEAD(ast_list); - int rc = 0; - - LDLM_DEBUG(lock, "client completion callback handler START"); - - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) { - int to = HZ; - - while (to > 0) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(to); - if (lock->l_granted_mode == lock->l_req_mode || - ldlm_is_destroyed(lock)) - break; - } - } - - lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT); - if (lvb_len < 0) { - LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len); - rc = lvb_len; - goto out; - } else if (lvb_len > 0) { - if (lock->l_lvb_len > 0) { - /* for extent lock, lvb contains ost_lvb{}. */ - LASSERT(lock->l_lvb_data); - - if (unlikely(lock->l_lvb_len < lvb_len)) { - LDLM_ERROR(lock, - "Replied LVB is larger than expectation, expected = %d, replied = %d", - lock->l_lvb_len, lvb_len); - rc = -EINVAL; - goto out; - } - } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has - * variable length - */ - void *lvb_data; - - lvb_data = kzalloc(lvb_len, GFP_NOFS); - if (!lvb_data) { - LDLM_ERROR(lock, "No memory: %d.\n", lvb_len); - rc = -ENOMEM; - goto out; - } - - lock_res_and_lock(lock); - LASSERT(!lock->l_lvb_data); - lock->l_lvb_type = LVB_T_LAYOUT; - lock->l_lvb_data = lvb_data; - lock->l_lvb_len = lvb_len; - unlock_res_and_lock(lock); - } - } - - lock_res_and_lock(lock); - if (ldlm_is_destroyed(lock) || - lock->l_granted_mode == lock->l_req_mode) { - /* bug 11300: the lock has already been granted */ - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "Double grant race happened"); - rc = 0; - goto out; - } - - /* If we receive the completion AST before the actual enqueue returned, - * then we might need to switch lock modes, resources, or extents. - */ - if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { - lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; - LDLM_DEBUG(lock, "completion AST, new lock mode"); - } - - if (lock->l_resource->lr_type != LDLM_PLAIN) { - ldlm_convert_policy_to_local(req->rq_export, - dlm_req->lock_desc.l_resource.lr_type, - &dlm_req->lock_desc.l_policy_data, - &lock->l_policy_data); - LDLM_DEBUG(lock, "completion AST, new policy data"); - } - - ldlm_resource_unlink_lock(lock); - if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, - &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) != 0) { - unlock_res_and_lock(lock); - rc = ldlm_lock_change_resource(ns, lock, - &dlm_req->lock_desc.l_resource.lr_name); - if (rc < 0) { - LDLM_ERROR(lock, "Failed to allocate resource"); - goto out; - } - LDLM_DEBUG(lock, "completion AST, new resource"); - CERROR("change resource!\n"); - lock_res_and_lock(lock); - } - - if (dlm_req->lock_flags & LDLM_FL_AST_SENT) { - /* BL_AST locks are not needed in LRU. - * Let ldlm_cancel_lru() be fast. - */ - ldlm_lock_remove_from_lru(lock); - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; - LDLM_DEBUG(lock, "completion AST includes blocking AST"); - } - - if (lock->l_lvb_len > 0) { - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT, - lock->l_lvb_data, lvb_len); - if (rc < 0) { - unlock_res_and_lock(lock); - goto out; - } - } - - ldlm_grant_lock(lock, &ast_list); - unlock_res_and_lock(lock); - - LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work"); - - /* Let Enqueue to call osc_lock_upcall() and initialize l_ast_data */ - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 2); - - ldlm_run_ast_work(ns, &ast_list, LDLM_WORK_CP_AST); - - LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)", - lock); - goto out; - -out: - if (rc < 0) { - lock_res_and_lock(lock); - ldlm_set_failed(lock); - unlock_res_and_lock(lock); - wake_up(&lock->l_waitq); - } - LDLM_LOCK_RELEASE(lock); -} - -/** - * Callback handler for receiving incoming glimpse ASTs. - * - * This only can happen on client side. After handling the glimpse AST - * we also consider dropping the lock here if it is unused locally for a - * long time. - */ -static void ldlm_handle_gl_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - int rc = -ENOSYS; - - LDLM_DEBUG(lock, "client glimpse AST callback handler"); - - if (lock->l_glimpse_ast) - rc = lock->l_glimpse_ast(lock, req); - - if (req->rq_repmsg) { - ptlrpc_reply(req); - } else { - req->rq_status = rc; - ptlrpc_error(req); - } - - lock_res_and_lock(lock); - if (lock->l_granted_mode == LCK_PW && - !lock->l_readers && !lock->l_writers && - time_after(jiffies, - lock->l_last_used + 10 * HZ)) { - unlock_res_and_lock(lock); - if (ldlm_bl_to_thread_lock(ns, NULL, lock)) - ldlm_handle_bl_callback(ns, NULL, lock); - - return; - } - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); -} - -static int ldlm_callback_reply(struct ptlrpc_request *req, int rc) -{ - if (req->rq_no_reply) - return 0; - - req->rq_status = rc; - if (!req->rq_packed_final) { - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc) - return rc; - } - return ptlrpc_reply(req); -} - -static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, - enum ldlm_cancel_flags cancel_flags) -{ - struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; - - spin_lock(&blp->blp_lock); - if (blwi->blwi_lock && ldlm_is_discard_data(blwi->blwi_lock)) { - /* add LDLM_FL_DISCARD_DATA requests to the priority list */ - list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); - } else { - /* other blocking callbacks are added to the regular list */ - list_add_tail(&blwi->blwi_entry, &blp->blp_list); - } - spin_unlock(&blp->blp_lock); - - wake_up(&blp->blp_waitq); - - /* can not check blwi->blwi_flags as blwi could be already freed in - * LCF_ASYNC mode - */ - if (!(cancel_flags & LCF_ASYNC)) - wait_for_completion(&blwi->blwi_comp); - - return 0; -} - -static inline void init_blwi(struct ldlm_bl_work_item *blwi, - struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - struct ldlm_lock *lock, - enum ldlm_cancel_flags cancel_flags) -{ - init_completion(&blwi->blwi_comp); - INIT_LIST_HEAD(&blwi->blwi_head); - - if (current->flags & PF_MEMALLOC) - blwi->blwi_mem_pressure = 1; - - blwi->blwi_ns = ns; - blwi->blwi_flags = cancel_flags; - if (ld) - blwi->blwi_ld = *ld; - if (count) { - list_add(&blwi->blwi_head, cancels); - list_del_init(cancels); - blwi->blwi_count = count; - } else { - blwi->blwi_lock = lock; - } -} - -/** - * Queues a list of locks \a cancels containing \a count locks - * for later processing by a blocking thread. If \a count is zero, - * then the lock referenced as \a lock is queued instead. - * - * The blocking thread would then call ->l_blocking_ast callback in the lock. - * If list addition fails an error is returned and caller is supposed to - * call ->l_blocking_ast itself. - */ -static int ldlm_bl_to_thread(struct ldlm_namespace *ns, - struct ldlm_lock_desc *ld, - struct ldlm_lock *lock, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags) -{ - if (cancels && count == 0) - return 0; - - if (cancel_flags & LCF_ASYNC) { - struct ldlm_bl_work_item *blwi; - - blwi = kzalloc(sizeof(*blwi), GFP_NOFS); - if (!blwi) - return -ENOMEM; - init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags); - - return __ldlm_bl_to_thread(blwi, cancel_flags); - } else { - /* if it is synchronous call do minimum mem alloc, as it could - * be triggered from kernel shrinker - */ - struct ldlm_bl_work_item blwi; - - memset(&blwi, 0, sizeof(blwi)); - init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags); - return __ldlm_bl_to_thread(&blwi, cancel_flags); - } -} - -int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct ldlm_lock *lock) -{ - return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC); -} - -int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, - struct list_head *cancels, int count, - enum ldlm_cancel_flags cancel_flags) -{ - return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags); -} - -int ldlm_bl_thread_wakeup(void) -{ - wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq); - return 0; -} - -/* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */ -static int ldlm_handle_setinfo(struct ptlrpc_request *req) -{ - struct obd_device *obd = req->rq_export->exp_obd; - char *key; - void *val; - int keylen, vallen; - int rc = -ENOSYS; - - DEBUG_REQ(D_HSM, req, "%s: handle setinfo\n", obd->obd_name); - - req_capsule_set(&req->rq_pill, &RQF_OBD_SET_INFO); - - key = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); - if (!key) { - DEBUG_REQ(D_IOCTL, req, "no set_info key"); - return -EFAULT; - } - keylen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_KEY, - RCL_CLIENT); - val = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL); - if (!val) { - DEBUG_REQ(D_IOCTL, req, "no set_info val"); - return -EFAULT; - } - vallen = req_capsule_get_size(&req->rq_pill, &RMF_SETINFO_VAL, - RCL_CLIENT); - - /* We are responsible for swabbing contents of val */ - - if (KEY_IS(KEY_HSM_COPYTOOL_SEND)) - /* Pass it on to mdc (the "export" in this case) */ - rc = obd_set_info_async(req->rq_svc_thread->t_env, - req->rq_export, - sizeof(KEY_HSM_COPYTOOL_SEND), - KEY_HSM_COPYTOOL_SEND, - vallen, val, NULL); - else - DEBUG_REQ(D_WARNING, req, "ignoring unknown key %s", key); - - return rc; -} - -static inline void ldlm_callback_errmsg(struct ptlrpc_request *req, - const char *msg, int rc, - const struct lustre_handle *handle) -{ - DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req, - "%s: [nid %s] [rc %d] [lock %#llx]", - msg, libcfs_id2str(req->rq_peer), rc, - handle ? handle->cookie : 0); - if (req->rq_no_reply) - CWARN("No reply was sent, maybe cause bug 21636.\n"); - else if (rc) - CWARN("Send reply failed, maybe cause bug 21636.\n"); -} - -/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ -static int ldlm_callback_handler(struct ptlrpc_request *req) -{ - struct ldlm_namespace *ns; - struct ldlm_request *dlm_req; - struct ldlm_lock *lock; - int rc; - - /* Requests arrive in sender's byte order. The ptlrpc service - * handler has already checked and, if necessary, byte-swapped the - * incoming request message body, but I am responsible for the - * message buffers. - */ - - /* do nothing for sec context finalize */ - if (lustre_msg_get_opc(req->rq_reqmsg) == SEC_CTX_FINI) - return 0; - - req_capsule_init(&req->rq_pill, req, RCL_SERVER); - - if (!req->rq_export) { - rc = ldlm_callback_reply(req, -ENOTCONN); - ldlm_callback_errmsg(req, "Operate on unconnected server", - rc, NULL); - return 0; - } - - LASSERT(req->rq_export->exp_obd); - - switch (lustre_msg_get_opc(req->rq_reqmsg)) { - case LDLM_BL_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) { - if (cfs_fail_err) - ldlm_callback_reply(req, -(int)cfs_fail_err); - return 0; - } - break; - case LDLM_CP_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET)) - return 0; - break; - case LDLM_GL_CALLBACK: - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_GL_CALLBACK_NET)) - return 0; - break; - case LDLM_SET_INFO: - rc = ldlm_handle_setinfo(req); - ldlm_callback_reply(req, rc); - return 0; - default: - CERROR("unknown opcode %u\n", - lustre_msg_get_opc(req->rq_reqmsg)); - ldlm_callback_reply(req, -EPROTO); - return 0; - } - - ns = req->rq_export->exp_obd->obd_namespace; - LASSERT(ns); - - req_capsule_set(&req->rq_pill, &RQF_LDLM_CALLBACK); - - dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - if (!dlm_req) { - rc = ldlm_callback_reply(req, -EPROTO); - ldlm_callback_errmsg(req, "Operate without parameter", rc, - NULL); - return 0; - } - - /* Force a known safe race, send a cancel to the server for a lock - * which the server has already started a blocking callback on. - */ - if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) && - lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0); - if (rc < 0) - CERROR("ldlm_cli_cancel: %d\n", rc); - } - - lock = ldlm_handle2lock_long(&dlm_req->lock_handle[0], 0); - if (!lock) { - CDEBUG(D_DLMTRACE, - "callback on lock %#llx - lock disappeared\n", - dlm_req->lock_handle[0].cookie); - rc = ldlm_callback_reply(req, -EINVAL); - ldlm_callback_errmsg(req, "Operate with invalid parameter", rc, - &dlm_req->lock_handle[0]); - return 0; - } - - if (ldlm_is_fail_loc(lock) && - lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) - OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - - /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */ - lock_res_and_lock(lock); - lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags & - LDLM_FL_AST_MASK); - if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) { - /* If somebody cancels lock and cache is already dropped, - * or lock is failed before cp_ast received on client, - * we can tell the server we have no lock. Otherwise, we - * should send cancel after dropping the cache. - */ - if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) || - ldlm_is_failed(lock)) { - LDLM_DEBUG(lock, - "callback on lock %#llx - lock disappeared", - dlm_req->lock_handle[0].cookie); - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); - rc = ldlm_callback_reply(req, -EINVAL); - ldlm_callback_errmsg(req, "Operate on stale lock", rc, - &dlm_req->lock_handle[0]); - return 0; - } - /* BL_AST locks are not needed in LRU. - * Let ldlm_cancel_lru() be fast. - */ - ldlm_lock_remove_from_lru(lock); - ldlm_set_bl_ast(lock); - } - unlock_res_and_lock(lock); - - /* We want the ost thread to get this reply so that it can respond - * to ost requests (write cache writeback) that might be triggered - * in the callback. - * - * But we'd also like to be able to indicate in the reply that we're - * cancelling right now, because it's unused, or have an intent result - * in the reply, so we might have to push the responsibility for sending - * the reply down into the AST handlers, alas. - */ - - switch (lustre_msg_get_opc(req->rq_reqmsg)) { - case LDLM_BL_CALLBACK: - CDEBUG(D_INODE, "blocking ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK); - if (!ldlm_is_cancel_on_block(lock)) { - rc = ldlm_callback_reply(req, 0); - if (req->rq_no_reply || rc) - ldlm_callback_errmsg(req, "Normal process", rc, - &dlm_req->lock_handle[0]); - } - if (ldlm_bl_to_thread_lock(ns, &dlm_req->lock_desc, lock)) - ldlm_handle_bl_callback(ns, &dlm_req->lock_desc, lock); - break; - case LDLM_CP_CALLBACK: - CDEBUG(D_INODE, "completion ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_CP_CALLBACK); - ldlm_callback_reply(req, 0); - ldlm_handle_cp_callback(req, ns, dlm_req, lock); - break; - case LDLM_GL_CALLBACK: - CDEBUG(D_INODE, "glimpse ast\n"); - req_capsule_extend(&req->rq_pill, &RQF_LDLM_GL_CALLBACK); - ldlm_handle_gl_callback(req, ns, dlm_req, lock); - break; - default: - LBUG(); /* checked above */ - } - - return 0; -} - -static int ldlm_bl_get_work(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item **p_blwi, - struct obd_export **p_exp) -{ - int num_th = atomic_read(&blp->blp_num_threads); - struct ldlm_bl_work_item *blwi = NULL; - static unsigned int num_bl; - - spin_lock(&blp->blp_lock); - /* process a request from the blp_list at least every blp_num_threads */ - if (!list_empty(&blp->blp_list) && - (list_empty(&blp->blp_prio_list) || num_bl == 0)) - blwi = list_first_entry(&blp->blp_list, - struct ldlm_bl_work_item, blwi_entry); - else - if (!list_empty(&blp->blp_prio_list)) - blwi = list_first_entry(&blp->blp_prio_list, - struct ldlm_bl_work_item, - blwi_entry); - - if (blwi) { - if (++num_bl >= num_th) - num_bl = 0; - list_del(&blwi->blwi_entry); - } - spin_unlock(&blp->blp_lock); - *p_blwi = blwi; - - return (*p_blwi || *p_exp) ? 1 : 0; -} - -/* This only contains temporary data until the thread starts */ -struct ldlm_bl_thread_data { - struct ldlm_bl_pool *bltd_blp; - struct completion bltd_comp; - int bltd_num; -}; - -static int ldlm_bl_thread_main(void *arg); - -static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp, bool check_busy) -{ - struct ldlm_bl_thread_data bltd = { .bltd_blp = blp }; - struct task_struct *task; - - init_completion(&bltd.bltd_comp); - - bltd.bltd_num = atomic_inc_return(&blp->blp_num_threads); - if (bltd.bltd_num >= blp->blp_max_threads) { - atomic_dec(&blp->blp_num_threads); - return 0; - } - - LASSERTF(bltd.bltd_num > 0, "thread num:%d\n", bltd.bltd_num); - if (check_busy && - atomic_read(&blp->blp_busy_threads) < (bltd.bltd_num - 1)) { - atomic_dec(&blp->blp_num_threads); - return 0; - } - - task = kthread_run(ldlm_bl_thread_main, &bltd, "ldlm_bl_%02d", - bltd.bltd_num); - if (IS_ERR(task)) { - CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n", - bltd.bltd_num, PTR_ERR(task)); - atomic_dec(&blp->blp_num_threads); - return PTR_ERR(task); - } - wait_for_completion(&bltd.bltd_comp); - - return 0; -} - -/* Not fatal if racy and have a few too many threads */ -static int ldlm_bl_thread_need_create(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item *blwi) -{ - if (atomic_read(&blp->blp_num_threads) >= blp->blp_max_threads) - return 0; - - if (atomic_read(&blp->blp_busy_threads) < - atomic_read(&blp->blp_num_threads)) - return 0; - - if (blwi && (!blwi->blwi_ns || blwi->blwi_mem_pressure)) - return 0; - - return 1; -} - -static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp, - struct ldlm_bl_work_item *blwi) -{ - unsigned int flags = 0; - - if (!blwi->blwi_ns) - /* added by ldlm_cleanup() */ - return LDLM_ITER_STOP; - - if (blwi->blwi_mem_pressure) - flags = memalloc_noreclaim_save(); - - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4); - - if (blwi->blwi_count) { - int count; - - /* - * The special case when we cancel locks in lru - * asynchronously, we pass the list of locks here. - * Thus locks are marked LDLM_FL_CANCELING, but NOT - * canceled locally yet. - */ - count = ldlm_cli_cancel_list_local(&blwi->blwi_head, - blwi->blwi_count, - LCF_BL_AST); - ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, - blwi->blwi_flags); - } else { - ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld, - blwi->blwi_lock); - } - if (blwi->blwi_mem_pressure) - memalloc_noreclaim_restore(flags); - - if (blwi->blwi_flags & LCF_ASYNC) - kfree(blwi); - else - complete(&blwi->blwi_comp); - - return 0; -} - -/** - * Main blocking requests processing thread. - * - * Callers put locks into its queue by calling ldlm_bl_to_thread. - * This thread in the end ends up doing actual call to ->l_blocking_ast - * for queued locks. - */ -static int ldlm_bl_thread_main(void *arg) -{ - struct ldlm_bl_pool *blp; - struct ldlm_bl_thread_data *bltd = arg; - - blp = bltd->bltd_blp; - - complete(&bltd->bltd_comp); - /* cannot use bltd after this, it is only on caller's stack */ - - while (1) { - struct ldlm_bl_work_item *blwi = NULL; - struct obd_export *exp = NULL; - int rc; - - rc = ldlm_bl_get_work(blp, &blwi, &exp); - if (!rc) - wait_event_idle_exclusive(blp->blp_waitq, - ldlm_bl_get_work(blp, &blwi, - &exp)); - atomic_inc(&blp->blp_busy_threads); - - if (ldlm_bl_thread_need_create(blp, blwi)) - /* discard the return value, we tried */ - ldlm_bl_thread_start(blp, true); - - if (blwi) - rc = ldlm_bl_thread_blwi(blp, blwi); - - atomic_dec(&blp->blp_busy_threads); - - if (rc == LDLM_ITER_STOP) - break; - } - - atomic_dec(&blp->blp_num_threads); - complete(&blp->blp_comp); - return 0; -} - -static int ldlm_setup(void); -static int ldlm_cleanup(void); - -int ldlm_get_ref(void) -{ - int rc = 0; - - rc = ptlrpc_inc_ref(); - if (rc) - return rc; - - mutex_lock(&ldlm_ref_mutex); - if (++ldlm_refcount == 1) { - rc = ldlm_setup(); - if (rc) - ldlm_refcount--; - } - mutex_unlock(&ldlm_ref_mutex); - - if (rc) - ptlrpc_dec_ref(); - - return rc; -} - -void ldlm_put_ref(void) -{ - int rc = 0; - mutex_lock(&ldlm_ref_mutex); - if (ldlm_refcount == 1) { - rc = ldlm_cleanup(); - - if (rc) - CERROR("ldlm_cleanup failed: %d\n", rc); - else - ldlm_refcount--; - } else { - ldlm_refcount--; - } - mutex_unlock(&ldlm_ref_mutex); - if (!rc) - ptlrpc_dec_ref(); -} - -static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", ldlm_cancel_unused_locks_before_replay); -} - -static ssize_t cancel_unused_locks_before_replay_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - ldlm_cancel_unused_locks_before_replay = val; - - return count; -} -LUSTRE_RW_ATTR(cancel_unused_locks_before_replay); - -/* These are for root of /sys/fs/lustre/ldlm */ -static struct attribute *ldlm_attrs[] = { - &lustre_attr_cancel_unused_locks_before_replay.attr, - NULL, -}; - -static const struct attribute_group ldlm_attr_group = { - .attrs = ldlm_attrs, -}; - -static int ldlm_setup(void) -{ - static struct ptlrpc_service_conf conf; - struct ldlm_bl_pool *blp = NULL; - int rc = 0; - int i; - - if (ldlm_state) - return -EALREADY; - - ldlm_state = kzalloc(sizeof(*ldlm_state), GFP_NOFS); - if (!ldlm_state) - return -ENOMEM; - - ldlm_kobj = kobject_create_and_add("ldlm", lustre_kobj); - if (!ldlm_kobj) { - rc = -ENOMEM; - goto out; - } - - rc = sysfs_create_group(ldlm_kobj, &ldlm_attr_group); - if (rc) - goto out; - - ldlm_ns_kset = kset_create_and_add("namespaces", NULL, ldlm_kobj); - if (!ldlm_ns_kset) { - rc = -ENOMEM; - goto out; - } - - ldlm_svc_kset = kset_create_and_add("services", NULL, ldlm_kobj); - if (!ldlm_svc_kset) { - rc = -ENOMEM; - goto out; - } - - ldlm_debugfs_setup(); - - memset(&conf, 0, sizeof(conf)); - conf = (typeof(conf)) { - .psc_name = "ldlm_cbd", - .psc_watchdog_factor = 2, - .psc_buf = { - .bc_nbufs = LDLM_CLIENT_NBUFS, - .bc_buf_size = LDLM_BUFSIZE, - .bc_req_max_size = LDLM_MAXREQSIZE, - .bc_rep_max_size = LDLM_MAXREPSIZE, - .bc_req_portal = LDLM_CB_REQUEST_PORTAL, - .bc_rep_portal = LDLM_CB_REPLY_PORTAL, - }, - .psc_thr = { - .tc_thr_name = "ldlm_cb", - .tc_thr_factor = LDLM_THR_FACTOR, - .tc_nthrs_init = LDLM_NTHRS_INIT, - .tc_nthrs_base = LDLM_NTHRS_BASE, - .tc_nthrs_max = LDLM_NTHRS_MAX, - .tc_nthrs_user = ldlm_num_threads, - .tc_cpu_affinity = 1, - .tc_ctx_tags = LCT_MD_THREAD | LCT_DT_THREAD, - }, - .psc_cpt = { - .cc_pattern = ldlm_cpts, - }, - .psc_ops = { - .so_req_handler = ldlm_callback_handler, - }, - }; - ldlm_state->ldlm_cb_service = - ptlrpc_register_service(&conf, ldlm_svc_kset, - ldlm_svc_debugfs_dir); - if (IS_ERR(ldlm_state->ldlm_cb_service)) { - CERROR("failed to start service\n"); - rc = PTR_ERR(ldlm_state->ldlm_cb_service); - ldlm_state->ldlm_cb_service = NULL; - goto out; - } - - blp = kzalloc(sizeof(*blp), GFP_NOFS); - if (!blp) { - rc = -ENOMEM; - goto out; - } - ldlm_state->ldlm_bl_pool = blp; - - spin_lock_init(&blp->blp_lock); - INIT_LIST_HEAD(&blp->blp_list); - INIT_LIST_HEAD(&blp->blp_prio_list); - init_waitqueue_head(&blp->blp_waitq); - atomic_set(&blp->blp_num_threads, 0); - atomic_set(&blp->blp_busy_threads, 0); - - if (ldlm_num_threads == 0) { - blp->blp_min_threads = LDLM_NTHRS_INIT; - blp->blp_max_threads = LDLM_NTHRS_MAX; - } else { - blp->blp_min_threads = min_t(int, LDLM_NTHRS_MAX, - max_t(int, LDLM_NTHRS_INIT, - ldlm_num_threads)); - - blp->blp_max_threads = blp->blp_min_threads; - } - - for (i = 0; i < blp->blp_min_threads; i++) { - rc = ldlm_bl_thread_start(blp, false); - if (rc < 0) - goto out; - } - - rc = ldlm_pools_init(); - if (rc) { - CERROR("Failed to initialize LDLM pools: %d\n", rc); - goto out; - } - return 0; - - out: - ldlm_cleanup(); - return rc; -} - -static int ldlm_cleanup(void) -{ - if (!list_empty(ldlm_namespace_list(LDLM_NAMESPACE_SERVER)) || - !list_empty(ldlm_namespace_list(LDLM_NAMESPACE_CLIENT))) { - CERROR("ldlm still has namespaces; clean these up first.\n"); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE); - return -EBUSY; - } - - ldlm_pools_fini(); - - if (ldlm_state->ldlm_bl_pool) { - struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; - - while (atomic_read(&blp->blp_num_threads) > 0) { - struct ldlm_bl_work_item blwi = { .blwi_ns = NULL }; - - init_completion(&blp->blp_comp); - - spin_lock(&blp->blp_lock); - list_add_tail(&blwi.blwi_entry, &blp->blp_list); - wake_up(&blp->blp_waitq); - spin_unlock(&blp->blp_lock); - - wait_for_completion(&blp->blp_comp); - } - - kfree(blp); - } - - if (ldlm_state->ldlm_cb_service) - ptlrpc_unregister_service(ldlm_state->ldlm_cb_service); - - if (ldlm_ns_kset) - kset_unregister(ldlm_ns_kset); - if (ldlm_svc_kset) - kset_unregister(ldlm_svc_kset); - if (ldlm_kobj) { - sysfs_remove_group(ldlm_kobj, &ldlm_attr_group); - kobject_put(ldlm_kobj); - } - - ldlm_debugfs_cleanup(); - - kfree(ldlm_state); - ldlm_state = NULL; - - return 0; -} - -int ldlm_init(void) -{ - mutex_init(&ldlm_ref_mutex); - mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER)); - mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - ldlm_resource_slab = kmem_cache_create("ldlm_resources", - sizeof(struct ldlm_resource), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!ldlm_resource_slab) - return -ENOMEM; - - ldlm_lock_slab = kmem_cache_create("ldlm_locks", - sizeof(struct ldlm_lock), 0, - SLAB_HWCACHE_ALIGN | - SLAB_TYPESAFE_BY_RCU, NULL); - if (!ldlm_lock_slab) { - kmem_cache_destroy(ldlm_resource_slab); - return -ENOMEM; - } - - ldlm_interval_slab = kmem_cache_create("interval_node", - sizeof(struct ldlm_interval), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ldlm_interval_slab) { - kmem_cache_destroy(ldlm_resource_slab); - kmem_cache_destroy(ldlm_lock_slab); - return -ENOMEM; - } -#if LUSTRE_TRACKS_LOCK_EXP_REFS - class_export_dump_hook = ldlm_dump_export_locks; -#endif - return 0; -} - -void ldlm_exit(void) -{ - if (ldlm_refcount) - CERROR("ldlm_refcount is %d in %s!\n", ldlm_refcount, __func__); - kmem_cache_destroy(ldlm_resource_slab); - /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call - * synchronize_rcu() to wait a grace period elapsed, so that - * ldlm_lock_free() get a chance to be called. - */ - synchronize_rcu(); - kmem_cache_destroy(ldlm_lock_slab); - kmem_cache_destroy(ldlm_interval_slab); -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c b/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c deleted file mode 100644 index 33b5a3f96fcb..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_plain.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_plain.c - * - * Author: Peter Braam - * Author: Phil Schwan - */ - -/** - * This file contains implementation of PLAIN lock type. - * - * PLAIN locks are the simplest form of LDLM locking, and are used when - * there only needs to be a single lock on a resource. This avoids some - * of the complexity of EXTENT and IBITS lock types, but doesn't allow - * different "parts" of a resource to be locked concurrently. Example - * use cases for PLAIN locks include locking of MGS configuration logs - * and (as of Lustre 2.4) quota records. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include - -#include "ldlm_internal.h" - -void ldlm_plain_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy, - union ldlm_policy_data *lpolicy) -{ - /* No policy for plain locks */ -} - -void ldlm_plain_policy_local_to_wire(const union ldlm_policy_data *lpolicy, - union ldlm_wire_policy_data *wpolicy) -{ - /* No policy for plain locks */ -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c deleted file mode 100644 index 36d14ee4e5b1..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c +++ /dev/null @@ -1,1013 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_pool.c - * - * Author: Yury Umanets - */ - -/* - * Idea of this code is rather simple. Each second, for each server namespace - * we have SLV - server lock volume which is calculated on current number of - * granted locks, grant speed for past period, etc - that is, locking load. - * This SLV number may be thought as a flow definition for simplicity. It is - * sent to clients with each occasion to let them know what is current load - * situation on the server. By default, at the beginning, SLV on server is - * set max value which is calculated as the following: allow to one client - * have all locks of limit ->pl_limit for 10h. - * - * Next, on clients, number of cached locks is not limited artificially in any - * way as it was before. Instead, client calculates CLV, that is, client lock - * volume for each lock and compares it with last SLV from the server. CLV is - * calculated as the number of locks in LRU * lock live time in seconds. If - * CLV > SLV - lock is canceled. - * - * Client has LVF, that is, lock volume factor which regulates how much - * sensitive client should be about last SLV from server. The higher LVF is the - * more locks will be canceled on client. Default value for it is 1. Setting LVF - * to 2 means that client will cancel locks 2 times faster. - * - * Locks on a client will be canceled more intensively in these cases: - * (1) if SLV is smaller, that is, load is higher on the server; - * (2) client has a lot of locks (the more locks are held by client, the bigger - * chances that some of them should be canceled); - * (3) client has old locks (taken some time ago); - * - * Thus, according to flow paradigm that we use for better understanding SLV, - * CLV is the volume of particle in flow described by SLV. According to this, - * if flow is getting thinner, more and more particles become outside of it and - * as particles are locks, they should be canceled. - * - * General idea of this belongs to Vitaly Fertman (vitaly@clusterfs.com). - * Andreas Dilger (adilger@clusterfs.com) proposed few nice ideas like using - * LVF and many cleanups. Flow definition to allow more easy understanding of - * the logic belongs to Nikita Danilov (nikita@clusterfs.com) as well as many - * cleanups and fixes. And design and implementation are done by Yury Umanets - * (umka@clusterfs.com). - * - * Glossary for terms used: - * - * pl_limit - Number of allowed locks in pool. Applies to server and client - * side (tunable); - * - * pl_granted - Number of granted locks (calculated); - * pl_grant_rate - Number of granted locks for last T (calculated); - * pl_cancel_rate - Number of canceled locks for last T (calculated); - * pl_grant_speed - Grant speed (GR - CR) for last T (calculated); - * pl_grant_plan - Planned number of granted locks for next T (calculated); - * pl_server_lock_volume - Current server lock volume (calculated); - * - * As it may be seen from list above, we have few possible tunables which may - * affect behavior much. They all may be modified via sysfs. However, they also - * give a possibility for constructing few pre-defined behavior policies. If - * none of predefines is suitable for a working pattern being used, new one may - * be "constructed" via sysfs tunables. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include -#include "ldlm_internal.h" - -/* - * 50 ldlm locks for 1MB of RAM. - */ -#define LDLM_POOL_HOST_L ((NUM_CACHEPAGES >> (20 - PAGE_SHIFT)) * 50) - -/* - * Maximal possible grant step plan in %. - */ -#define LDLM_POOL_MAX_GSP (30) - -/* - * Minimal possible grant step plan in %. - */ -#define LDLM_POOL_MIN_GSP (1) - -/* - * This controls the speed of reaching LDLM_POOL_MAX_GSP - * with increasing thread period. - */ -#define LDLM_POOL_GSP_STEP_SHIFT (2) - -/* - * LDLM_POOL_GSP% of all locks is default GP. - */ -#define LDLM_POOL_GP(L) (((L) * LDLM_POOL_MAX_GSP) / 100) - -/* - * Max age for locks on clients. - */ -#define LDLM_POOL_MAX_AGE (36000) - -/* - * The granularity of SLV calculation. - */ -#define LDLM_POOL_SLV_SHIFT (10) - -static inline __u64 dru(__u64 val, __u32 shift, int round_up) -{ - return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift; -} - -static inline __u64 ldlm_pool_slv_max(__u32 L) -{ - /* - * Allow to have all locks for 1 client for 10 hrs. - * Formula is the following: limit * 10h / 1 client. - */ - __u64 lim = (__u64)L * LDLM_POOL_MAX_AGE / 1; - return lim; -} - -static inline __u64 ldlm_pool_slv_min(__u32 L) -{ - return 1; -} - -enum { - LDLM_POOL_FIRST_STAT = 0, - LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT, - LDLM_POOL_GRANT_STAT, - LDLM_POOL_CANCEL_STAT, - LDLM_POOL_GRANT_RATE_STAT, - LDLM_POOL_CANCEL_RATE_STAT, - LDLM_POOL_GRANT_PLAN_STAT, - LDLM_POOL_SLV_STAT, - LDLM_POOL_SHRINK_REQTD_STAT, - LDLM_POOL_SHRINK_FREED_STAT, - LDLM_POOL_RECALC_STAT, - LDLM_POOL_TIMING_STAT, - LDLM_POOL_LAST_STAT -}; - -/** - * Calculates suggested grant_step in % of available locks for passed - * \a period. This is later used in grant_plan calculations. - */ -static inline int ldlm_pool_t2gsp(unsigned int t) -{ - /* - * This yields 1% grant step for anything below LDLM_POOL_GSP_STEP - * and up to 30% for anything higher than LDLM_POOL_GSP_STEP. - * - * How this will affect execution is the following: - * - * - for thread period 1s we will have grant_step 1% which good from - * pov of taking some load off from server and push it out to clients. - * This is like that because 1% for grant_step means that server will - * not allow clients to get lots of locks in short period of time and - * keep all old locks in their caches. Clients will always have to - * get some locks back if they want to take some new; - * - * - for thread period 10s (which is default) we will have 23% which - * means that clients will have enough of room to take some new locks - * without getting some back. All locks from this 23% which were not - * taken by clients in current period will contribute in SLV growing. - * SLV growing means more locks cached on clients until limit or grant - * plan is reached. - */ - return LDLM_POOL_MAX_GSP - - ((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >> - (t >> LDLM_POOL_GSP_STEP_SHIFT)); -} - -/** - * Recalculates next stats on passed \a pl. - * - * \pre ->pl_lock is locked. - */ -static void ldlm_pool_recalc_stats(struct ldlm_pool *pl) -{ - int grant_plan = pl->pl_grant_plan; - __u64 slv = pl->pl_server_lock_volume; - int granted = atomic_read(&pl->pl_granted); - int grant_rate = atomic_read(&pl->pl_grant_rate); - int cancel_rate = atomic_read(&pl->pl_cancel_rate); - - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_SLV_STAT, - slv); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANTED_STAT, - granted); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, - grant_rate); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT, - grant_plan); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT, - cancel_rate); -} - -/** - * Sets SLV and Limit from container_of(pl, struct ldlm_namespace, - * ns_pool)->ns_obd tp passed \a pl. - */ -static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl) -{ - struct obd_device *obd; - - /* - * Get new SLV and Limit from obd which is updated with coming - * RPCs. - */ - obd = container_of(pl, struct ldlm_namespace, - ns_pool)->ns_obd; - read_lock(&obd->obd_pool_lock); - pl->pl_server_lock_volume = obd->obd_pool_slv; - atomic_set(&pl->pl_limit, obd->obd_pool_limit); - read_unlock(&obd->obd_pool_lock); -} - -/** - * Recalculates client size pool \a pl according to current SLV and Limit. - */ -static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) -{ - time64_t recalc_interval_sec; - int ret; - - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) - return 0; - - spin_lock(&pl->pl_lock); - /* - * Check if we need to recalc lists now. - */ - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) { - spin_unlock(&pl->pl_lock); - return 0; - } - - /* - * Make sure that pool knows last SLV and Limit from obd. - */ - ldlm_cli_pool_pop_slv(pl); - - spin_unlock(&pl->pl_lock); - - /* - * Do not cancel locks in case lru resize is disabled for this ns. - */ - if (!ns_connect_lru_resize(container_of(pl, struct ldlm_namespace, - ns_pool))) { - ret = 0; - goto out; - } - - /* - * In the time of canceling locks on client we do not need to maintain - * sharp timing, we only want to cancel locks asap according to new SLV. - * It may be called when SLV has changed much, this is why we do not - * take into account pl->pl_recalc_time here. - */ - ret = ldlm_cancel_lru(container_of(pl, struct ldlm_namespace, ns_pool), - 0, LCF_ASYNC, LDLM_LRU_FLAG_LRUR); - -out: - spin_lock(&pl->pl_lock); - /* - * Time of LRU resizing might be longer than period, - * so update after LRU resizing rather than before it. - */ - pl->pl_recalc_time = ktime_get_real_seconds(); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT, - recalc_interval_sec); - spin_unlock(&pl->pl_lock); - return ret; -} - -/** - * This function is main entry point for memory pressure handling on client - * side. Main goal of this function is to cancel some number of locks on - * passed \a pl according to \a nr and \a gfp_mask. - */ -static int ldlm_cli_pool_shrink(struct ldlm_pool *pl, - int nr, gfp_t gfp_mask) -{ - struct ldlm_namespace *ns; - int unused; - - ns = container_of(pl, struct ldlm_namespace, ns_pool); - - /* - * Do not cancel locks in case lru resize is disabled for this ns. - */ - if (!ns_connect_lru_resize(ns)) - return 0; - - /* - * Make sure that pool knows last SLV and Limit from obd. - */ - ldlm_cli_pool_pop_slv(pl); - - spin_lock(&ns->ns_lock); - unused = ns->ns_nr_unused; - spin_unlock(&ns->ns_lock); - - if (nr == 0) - return (unused / 100) * sysctl_vfs_cache_pressure; - else - return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_LRU_FLAG_SHRINK); -} - -static const struct ldlm_pool_ops ldlm_cli_pool_ops = { - .po_recalc = ldlm_cli_pool_recalc, - .po_shrink = ldlm_cli_pool_shrink -}; - -/** - * Pool recalc wrapper. Will call either client or server pool recalc callback - * depending what pool \a pl is used. - */ -static int ldlm_pool_recalc(struct ldlm_pool *pl) -{ - u32 recalc_interval_sec; - int count; - - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec > 0) { - spin_lock(&pl->pl_lock); - recalc_interval_sec = ktime_get_real_seconds() - pl->pl_recalc_time; - - if (recalc_interval_sec > 0) { - /* - * Update pool statistics every 1s. - */ - ldlm_pool_recalc_stats(pl); - - /* - * Zero out all rates and speed for the last period. - */ - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - } - spin_unlock(&pl->pl_lock); - } - - if (pl->pl_ops->po_recalc) { - count = pl->pl_ops->po_recalc(pl); - lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, - count); - } - - recalc_interval_sec = pl->pl_recalc_time - ktime_get_real_seconds() + - pl->pl_recalc_period; - if (recalc_interval_sec <= 0) { - /* DEBUG: should be re-removed after LU-4536 is fixed */ - CDEBUG(D_DLMTRACE, - "%s: Negative interval(%ld), too short period(%ld)\n", - pl->pl_name, (long)recalc_interval_sec, - (long)pl->pl_recalc_period); - - /* Prevent too frequent recalculation. */ - recalc_interval_sec = 1; - } - - return recalc_interval_sec; -} - -/* - * Pool shrink wrapper. Will call either client or server pool recalc callback - * depending what pool pl is used. When nr == 0, just return the number of - * freeable locks. Otherwise, return the number of canceled locks. - */ -static int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask) -{ - int cancel = 0; - - if (pl->pl_ops->po_shrink) { - cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask); - if (nr > 0) { - lprocfs_counter_add(pl->pl_stats, - LDLM_POOL_SHRINK_REQTD_STAT, - nr); - lprocfs_counter_add(pl->pl_stats, - LDLM_POOL_SHRINK_FREED_STAT, - cancel); - CDEBUG(D_DLMTRACE, - "%s: request to shrink %d locks, shrunk %d\n", - pl->pl_name, nr, cancel); - } - } - return cancel; -} - -static int lprocfs_pool_state_seq_show(struct seq_file *m, void *unused) -{ - int granted, grant_rate, cancel_rate; - int grant_speed, lvf; - struct ldlm_pool *pl = m->private; - __u64 slv, clv; - __u32 limit; - - spin_lock(&pl->pl_lock); - slv = pl->pl_server_lock_volume; - clv = pl->pl_client_lock_volume; - limit = atomic_read(&pl->pl_limit); - granted = atomic_read(&pl->pl_granted); - grant_rate = atomic_read(&pl->pl_grant_rate); - cancel_rate = atomic_read(&pl->pl_cancel_rate); - grant_speed = grant_rate - cancel_rate; - lvf = atomic_read(&pl->pl_lock_volume_factor); - spin_unlock(&pl->pl_lock); - - seq_printf(m, "LDLM pool state (%s):\n" - " SLV: %llu\n" - " CLV: %llu\n" - " LVF: %d\n", - pl->pl_name, slv, clv, lvf); - - seq_printf(m, " GR: %d\n CR: %d\n GS: %d\n" - " G: %d\n L: %d\n", - grant_rate, cancel_rate, grant_speed, - granted, limit); - - return 0; -} - -LPROC_SEQ_FOPS_RO(lprocfs_pool_state); - -static ssize_t grant_speed_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, - pl_kobj); - - int grant_speed; - - spin_lock(&pl->pl_lock); - /* serialize with ldlm_pool_recalc */ - grant_speed = atomic_read(&pl->pl_grant_rate) - - atomic_read(&pl->pl_cancel_rate); - spin_unlock(&pl->pl_lock); - return sprintf(buf, "%d\n", grant_speed); -} -LUSTRE_RO_ATTR(grant_speed); - -LDLM_POOL_SYSFS_READER_SHOW(grant_plan, int); -LUSTRE_RO_ATTR(grant_plan); - -LDLM_POOL_SYSFS_READER_SHOW(recalc_period, int); -LDLM_POOL_SYSFS_WRITER_STORE(recalc_period, int); -LUSTRE_RW_ATTR(recalc_period); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(server_lock_volume, u64); -LUSTRE_RO_ATTR(server_lock_volume); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(limit, atomic); -LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(limit, atomic); -LUSTRE_RW_ATTR(limit); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(granted, atomic); -LUSTRE_RO_ATTR(granted); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(cancel_rate, atomic); -LUSTRE_RO_ATTR(cancel_rate); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(grant_rate, atomic); -LUSTRE_RO_ATTR(grant_rate); - -LDLM_POOL_SYSFS_READER_NOLOCK_SHOW(lock_volume_factor, atomic); -LDLM_POOL_SYSFS_WRITER_NOLOCK_STORE(lock_volume_factor, atomic); -LUSTRE_RW_ATTR(lock_volume_factor); - -#define LDLM_POOL_ADD_VAR(name, var, ops) \ - do { \ - snprintf(var_name, MAX_STRING_SIZE, #name); \ - pool_vars[0].data = var; \ - pool_vars[0].fops = ops; \ - ldebugfs_add_vars(pl->pl_debugfs_entry, pool_vars, NULL);\ - } while (0) - -/* These are for pools in /sys/fs/lustre/ldlm/namespaces/.../pool */ -static struct attribute *ldlm_pl_attrs[] = { - &lustre_attr_grant_speed.attr, - &lustre_attr_grant_plan.attr, - &lustre_attr_recalc_period.attr, - &lustre_attr_server_lock_volume.attr, - &lustre_attr_limit.attr, - &lustre_attr_granted.attr, - &lustre_attr_cancel_rate.attr, - &lustre_attr_grant_rate.attr, - &lustre_attr_lock_volume_factor.attr, - NULL, -}; - -static void ldlm_pl_release(struct kobject *kobj) -{ - struct ldlm_pool *pl = container_of(kobj, struct ldlm_pool, - pl_kobj); - complete(&pl->pl_kobj_unregister); -} - -static struct kobj_type ldlm_pl_ktype = { - .default_attrs = ldlm_pl_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = ldlm_pl_release, -}; - -static int ldlm_pool_sysfs_init(struct ldlm_pool *pl) -{ - struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace, - ns_pool); - int err; - - init_completion(&pl->pl_kobj_unregister); - err = kobject_init_and_add(&pl->pl_kobj, &ldlm_pl_ktype, &ns->ns_kobj, - "pool"); - - return err; -} - -static int ldlm_pool_debugfs_init(struct ldlm_pool *pl) -{ - struct ldlm_namespace *ns = container_of(pl, struct ldlm_namespace, - ns_pool); - struct dentry *debugfs_ns_parent; - struct lprocfs_vars pool_vars[2]; - char *var_name = NULL; - int rc = 0; - - var_name = kzalloc(MAX_STRING_SIZE + 1, GFP_NOFS); - if (!var_name) - return -ENOMEM; - - debugfs_ns_parent = ns->ns_debugfs_entry; - if (IS_ERR_OR_NULL(debugfs_ns_parent)) { - CERROR("%s: debugfs entry is not initialized\n", - ldlm_ns_name(ns)); - rc = -EINVAL; - goto out_free_name; - } - pl->pl_debugfs_entry = debugfs_create_dir("pool", debugfs_ns_parent); - - var_name[MAX_STRING_SIZE] = '\0'; - memset(pool_vars, 0, sizeof(pool_vars)); - pool_vars[0].name = var_name; - - LDLM_POOL_ADD_VAR(state, pl, &lprocfs_pool_state_fops); - - pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT - - LDLM_POOL_FIRST_STAT, 0); - if (!pl->pl_stats) { - rc = -ENOMEM; - goto out_free_name; - } - - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "granted", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "cancel", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant_rate", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_RATE_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "cancel_rate", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_PLAN_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "grant_plan", "locks/s"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "slv", "slv"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "shrink_request", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "shrink_freed", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "recalc_freed", "locks"); - lprocfs_counter_init(pl->pl_stats, LDLM_POOL_TIMING_STAT, - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV, - "recalc_timing", "sec"); - debugfs_create_file("stats", 0644, pl->pl_debugfs_entry, pl->pl_stats, - &lprocfs_stats_seq_fops); - -out_free_name: - kfree(var_name); - return rc; -} - -static void ldlm_pool_sysfs_fini(struct ldlm_pool *pl) -{ - kobject_put(&pl->pl_kobj); - wait_for_completion(&pl->pl_kobj_unregister); -} - -static void ldlm_pool_debugfs_fini(struct ldlm_pool *pl) -{ - if (pl->pl_stats) { - lprocfs_free_stats(&pl->pl_stats); - pl->pl_stats = NULL; - } - debugfs_remove_recursive(pl->pl_debugfs_entry); -} - -int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, - int idx, enum ldlm_side client) -{ - int rc; - - spin_lock_init(&pl->pl_lock); - atomic_set(&pl->pl_granted, 0); - pl->pl_recalc_time = ktime_get_real_seconds(); - atomic_set(&pl->pl_lock_volume_factor, 1); - - atomic_set(&pl->pl_grant_rate, 0); - atomic_set(&pl->pl_cancel_rate, 0); - pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L); - - snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d", - ldlm_ns_name(ns), idx); - - atomic_set(&pl->pl_limit, 1); - pl->pl_server_lock_volume = 0; - pl->pl_ops = &ldlm_cli_pool_ops; - pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD; - pl->pl_client_lock_volume = 0; - rc = ldlm_pool_debugfs_init(pl); - if (rc) - return rc; - - rc = ldlm_pool_sysfs_init(pl); - if (rc) - return rc; - - CDEBUG(D_DLMTRACE, "Lock pool %s is initialized\n", pl->pl_name); - - return rc; -} - -void ldlm_pool_fini(struct ldlm_pool *pl) -{ - ldlm_pool_sysfs_fini(pl); - ldlm_pool_debugfs_fini(pl); - - /* - * Pool should not be used after this point. We can't free it here as - * it lives in struct ldlm_namespace, but still interested in catching - * any abnormal using cases. - */ - POISON(pl, 0x5a, sizeof(*pl)); -} - -/** - * Add new taken ldlm lock \a lock into pool \a pl accounting. - */ -void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) -{ - /* - * FLOCK locks are special in a sense that they are almost never - * cancelled, instead special kind of lock is used to drop them. - * also there is no LRU for flock locks, so no point in tracking - * them anyway. - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) - return; - - atomic_inc(&pl->pl_granted); - atomic_inc(&pl->pl_grant_rate); - lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT); - /* - * Do not do pool recalc for client side as all locks which - * potentially may be canceled has already been packed into - * enqueue/cancel rpc. Also we do not want to run out of stack - * with too long call paths. - */ -} - -/** - * Remove ldlm lock \a lock from pool \a pl accounting. - */ -void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) -{ - /* - * Filter out FLOCK locks. Read above comment in ldlm_pool_add(). - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) - return; - - LASSERT(atomic_read(&pl->pl_granted) > 0); - atomic_dec(&pl->pl_granted); - atomic_inc(&pl->pl_cancel_rate); - - lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT); -} - -/** - * Returns current \a pl SLV. - * - * \pre ->pl_lock is not locked. - */ -__u64 ldlm_pool_get_slv(struct ldlm_pool *pl) -{ - __u64 slv; - - spin_lock(&pl->pl_lock); - slv = pl->pl_server_lock_volume; - spin_unlock(&pl->pl_lock); - return slv; -} - -/** - * Sets passed \a clv to \a pl. - * - * \pre ->pl_lock is not locked. - */ -void ldlm_pool_set_clv(struct ldlm_pool *pl, __u64 clv) -{ - spin_lock(&pl->pl_lock); - pl->pl_client_lock_volume = clv; - spin_unlock(&pl->pl_lock); -} - -/** - * Returns current LVF from \a pl. - */ -__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl) -{ - return atomic_read(&pl->pl_lock_volume_factor); -} - -static int ldlm_pool_granted(struct ldlm_pool *pl) -{ - return atomic_read(&pl->pl_granted); -} - -/* - * count locks from all namespaces (if possible). Returns number of - * cached locks. - */ -static unsigned long ldlm_pools_count(enum ldlm_side client, gfp_t gfp_mask) -{ - unsigned long total = 0; - int nr_ns; - struct ldlm_namespace *ns; - struct ldlm_namespace *ns_old = NULL; /* loop detection */ - - if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) - return 0; - - CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n", - client == LDLM_NAMESPACE_CLIENT ? "client" : "server"); - - /* - * Find out how many resources we may release. - */ - for (nr_ns = ldlm_namespace_nr_read(client); - nr_ns > 0; nr_ns--) { - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - return 0; - } - ns = ldlm_namespace_first_locked(client); - - if (ns == ns_old) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - - if (ldlm_ns_empty(ns)) { - ldlm_namespace_move_to_inactive_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - continue; - } - - if (!ns_old) - ns_old = ns; - - ldlm_namespace_get(ns); - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask); - ldlm_namespace_put(ns); - } - - return total; -} - -static unsigned long ldlm_pools_scan(enum ldlm_side client, int nr, - gfp_t gfp_mask) -{ - unsigned long freed = 0; - int tmp, nr_ns; - struct ldlm_namespace *ns; - - if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS)) - return -1; - - /* - * Shrink at least ldlm_namespace_nr_read(client) namespaces. - */ - for (tmp = nr_ns = ldlm_namespace_nr_read(client); - tmp > 0; tmp--) { - int cancel, nr_locks; - - /* - * Do not call shrink under ldlm_namespace_lock(client) - */ - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - ns = ldlm_namespace_first_locked(client); - ldlm_namespace_get(ns); - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - - nr_locks = ldlm_pool_granted(&ns->ns_pool); - /* - * We use to shrink propotionally but with new shrinker API, - * we lost the total number of freeable locks. - */ - cancel = 1 + min_t(int, nr_locks, nr / nr_ns); - freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask); - ldlm_namespace_put(ns); - } - /* - * we only decrease the SLV in server pools shrinker, return - * SHRINK_STOP to kernel to avoid needless loop. LU-1128 - */ - return freed; -} - -static unsigned long ldlm_pools_cli_count(struct shrinker *s, - struct shrink_control *sc) -{ - return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask); -} - -static unsigned long ldlm_pools_cli_scan(struct shrinker *s, - struct shrink_control *sc) -{ - return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan, - sc->gfp_mask); -} - -static void ldlm_pools_recalc(struct work_struct *ws); -static DECLARE_DELAYED_WORK(ldlm_recalc_pools, ldlm_pools_recalc); - -static void ldlm_pools_recalc(struct work_struct *ws) -{ - enum ldlm_side client = LDLM_NAMESPACE_CLIENT; - struct ldlm_namespace *ns; - struct ldlm_namespace *ns_old = NULL; - /* seconds of sleep if no active namespaces */ - int time = LDLM_POOL_CLI_DEF_RECALC_PERIOD; - int nr; - - /* - * Recalc at least ldlm_namespace_nr_read(client) namespaces. - */ - for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { - int skip; - /* - * Lock the list, get first @ns in the list, getref, move it - * to the tail, unlock and call pool recalc. This way we avoid - * calling recalc under @ns lock what is really good as we get - * rid of potential deadlock on client nodes when canceling - * locks synchronously. - */ - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - ns = ldlm_namespace_first_locked(client); - - if (ns_old == ns) { /* Full pass complete */ - mutex_unlock(ldlm_namespace_lock(client)); - break; - } - - /* We got an empty namespace, need to move it back to inactive - * list. - * The race with parallel resource creation is fine: - * - If they do namespace_get before our check, we fail the - * check and they move this item to the end of the list anyway - * - If we do the check and then they do namespace_get, then - * we move the namespace to inactive and they will move - * it back to active (synchronised by the lock, so no clash - * there). - */ - if (ldlm_ns_empty(ns)) { - ldlm_namespace_move_to_inactive_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - continue; - } - - if (!ns_old) - ns_old = ns; - - spin_lock(&ns->ns_lock); - /* - * skip ns which is being freed, and we don't want to increase - * its refcount again, not even temporarily. bz21519 & LU-499. - */ - if (ns->ns_stopping) { - skip = 1; - } else { - skip = 0; - ldlm_namespace_get(ns); - } - spin_unlock(&ns->ns_lock); - - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); - - /* - * After setup is done - recalc the pool. - */ - if (!skip) { - int ttime = ldlm_pool_recalc(&ns->ns_pool); - - if (ttime < time) - time = ttime; - - ldlm_namespace_put(ns); - } - } - - /* Wake up the blocking threads from time to time. */ - ldlm_bl_thread_wakeup(); - - schedule_delayed_work(&ldlm_recalc_pools, time * HZ); -} - -static int ldlm_pools_thread_start(void) -{ - schedule_delayed_work(&ldlm_recalc_pools, 0); - - return 0; -} - -static void ldlm_pools_thread_stop(void) -{ - cancel_delayed_work_sync(&ldlm_recalc_pools); -} - -static struct shrinker ldlm_pools_cli_shrinker = { - .count_objects = ldlm_pools_cli_count, - .scan_objects = ldlm_pools_cli_scan, - .seeks = DEFAULT_SEEKS, -}; - -int ldlm_pools_init(void) -{ - int rc; - - rc = ldlm_pools_thread_start(); - if (!rc) - rc = register_shrinker(&ldlm_pools_cli_shrinker); - - return rc; -} - -void ldlm_pools_fini(void) -{ - unregister_shrinker(&ldlm_pools_cli_shrinker); - - ldlm_pools_thread_stop(); -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c deleted file mode 100644 index cdc52eed6d85..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c +++ /dev/null @@ -1,2033 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/** - * This file contains Asynchronous System Trap (AST) handlers and related - * LDLM request-processing routines. - * - * An AST is a callback issued on a lock when its state is changed. There are - * several different types of ASTs (callbacks) registered for each lock: - * - * - completion AST: when a lock is enqueued by some process, but cannot be - * granted immediately due to other conflicting locks on the same resource, - * the completion AST is sent to notify the caller when the lock is - * eventually granted - * - * - blocking AST: when a lock is granted to some process, if another process - * enqueues a conflicting (blocking) lock on a resource, a blocking AST is - * sent to notify the holder(s) of the lock(s) of the conflicting lock - * request. The lock holder(s) must release their lock(s) on that resource in - * a timely manner or be evicted by the server. - * - * - glimpse AST: this is used when a process wants information about a lock - * (i.e. the lock value block (LVB)) but does not necessarily require holding - * the lock. If the resource is locked, the lock holder(s) are sent glimpse - * ASTs and the LVB is returned to the caller, and lock holder(s) may CANCEL - * their lock(s) if they are idle. If the resource is not locked, the server - * may grant the lock. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include -#include -#include -#include -#include - -#include "ldlm_internal.h" - -unsigned int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT; -module_param(ldlm_enqueue_min, uint, 0644); -MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum"); - -/* in client side, whether the cached locks will be canceled before replay */ -unsigned int ldlm_cancel_unused_locks_before_replay = 1; - -struct ldlm_async_args { - struct lustre_handle lock_handle; -}; - -/** - * ldlm_request_bufsize - * - * @count: number of ldlm handles - * @type: ldlm opcode - * - * If opcode=LDLM_ENQUEUE, 1 slot is already occupied, - * LDLM_LOCKREQ_HANDLE -1 slots are available. - * Otherwise, LDLM_LOCKREQ_HANDLE slots are available. - * - * Return: size of the request buffer - */ -static int ldlm_request_bufsize(int count, int type) -{ - int avail = LDLM_LOCKREQ_HANDLES; - - if (type == LDLM_ENQUEUE) - avail -= LDLM_ENQUEUE_CANCEL_OFF; - - if (count > avail) - avail = (count - avail) * sizeof(struct lustre_handle); - else - avail = 0; - - return sizeof(struct ldlm_request) + avail; -} - -static void ldlm_expired_completion_wait(struct ldlm_lock *lock, __u32 conn_cnt) -{ - struct obd_import *imp; - struct obd_device *obd; - - if (!lock->l_conn_export) { - static unsigned long next_dump, last_dump; - - LDLM_ERROR(lock, - "lock timed out (enqueued at %lld, %llds ago); not entering recovery in server code, just going back to sleep", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - - lock->l_last_activity)); - if (time_after(jiffies, next_dump)) { - last_dump = next_dump; - next_dump = jiffies + 300 * HZ; - ldlm_namespace_dump(D_DLMTRACE, - ldlm_lock_to_ns(lock)); - if (last_dump == 0) - libcfs_debug_dumplog(); - } - return; - } - - obd = lock->l_conn_export->exp_obd; - imp = obd->u.cli.cl_import; - ptlrpc_fail_import(imp, conn_cnt); - LDLM_ERROR(lock, - "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - lock->l_last_activity), - obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid); -} - -/** - * Calculate the Completion timeout (covering enqueue, BL AST, data flush, - * lock cancel, and their replies). Used for lock completion timeout on the - * client side. - * - * \param[in] lock lock which is waiting the completion callback - * - * \retval timeout in seconds to wait for the server reply - */ -/* We use the same basis for both server side and client side functions - * from a single node. - */ -static unsigned int ldlm_cp_timeout(struct ldlm_lock *lock) -{ - unsigned int timeout; - - if (AT_OFF) - return obd_timeout; - - /* - * Wait a long time for enqueue - server may have to callback a - * lock from another client. Server will evict the other client if it - * doesn't respond reasonably, and then give us the lock. - */ - timeout = at_get(ldlm_lock_to_ns_at(lock)); - return max(3 * timeout, ldlm_enqueue_min); -} - -/** - * Helper function for ldlm_completion_ast(), updating timings when lock is - * actually granted. - */ -static int ldlm_completion_tail(struct ldlm_lock *lock, void *data) -{ - long delay; - int result = 0; - - if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) { - LDLM_DEBUG(lock, "client-side enqueue: destroyed"); - result = -EIO; - } else if (!data) { - LDLM_DEBUG(lock, "client-side enqueue: granted"); - } else { - /* Take into AT only CP RPC, not immediately granted locks */ - delay = ktime_get_real_seconds() - lock->l_last_activity; - LDLM_DEBUG(lock, "client-side enqueue: granted after %lds", - delay); - - /* Update our time estimate */ - at_measured(ldlm_lock_to_ns_at(lock), delay); - } - return result; -} - -/** - * Generic LDLM "completion" AST. This is called in several cases: - * - * - when a reply to an ENQUEUE RPC is received from the server - * (ldlm_cli_enqueue_fini()). Lock might be granted or not granted at - * this point (determined by flags); - * - * - when LDLM_CP_CALLBACK RPC comes to client to notify it that lock has - * been granted; - * - * - when ldlm_lock_match(LDLM_FL_LVB_READY) is about to wait until lock - * gets correct lvb; - * - * - to force all locks when resource is destroyed (cleanup_resource()); - * - * - during lock conversion (not used currently). - * - * If lock is not granted in the first case, this function waits until second - * or penultimate cases happen in some other thread. - * - */ -int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) -{ - /* XXX ALLOCATE - 160 bytes */ - struct obd_device *obd; - struct obd_import *imp = NULL; - __u32 timeout; - __u32 conn_cnt = 0; - int rc = 0; - - if (flags == LDLM_FL_WAIT_NOREPROC) { - LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock"); - goto noreproc; - } - - if (!(flags & LDLM_FL_BLOCKED_MASK)) { - wake_up(&lock->l_waitq); - return 0; - } - - LDLM_DEBUG(lock, - "client-side enqueue returned a blocked lock, sleeping"); - -noreproc: - - obd = class_exp2obd(lock->l_conn_export); - - /* if this is a local lock, then there is no import */ - if (obd) - imp = obd->u.cli.cl_import; - - timeout = ldlm_cp_timeout(lock); - - lock->l_last_activity = ktime_get_real_seconds(); - - if (imp) { - spin_lock(&imp->imp_lock); - conn_cnt = imp->imp_conn_cnt; - spin_unlock(&imp->imp_lock); - } - if (OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST, - OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) { - ldlm_set_fail_loc(lock); - rc = -EINTR; - } else { - /* Go to sleep until the lock is granted or canceled. */ - if (!ldlm_is_no_timeout(lock)) { - /* Wait uninterruptible for a while first */ - rc = wait_event_idle_timeout(lock->l_waitq, - is_granted_or_cancelled(lock), - timeout * HZ); - if (rc == 0) - ldlm_expired_completion_wait(lock, conn_cnt); - } - /* Now wait abortable */ - if (rc == 0) - rc = l_wait_event_abortable(lock->l_waitq, - is_granted_or_cancelled(lock)); - else - rc = 0; - } - - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - return rc; - } - - return ldlm_completion_tail(lock, data); -} -EXPORT_SYMBOL(ldlm_completion_ast); - -static void failed_lock_cleanup(struct ldlm_namespace *ns, - struct ldlm_lock *lock, int mode) -{ - int need_cancel = 0; - - /* Set a flag to prevent us from sending a CANCEL (bug 407) */ - lock_res_and_lock(lock); - /* Check that lock is not granted or failed, we might race. */ - if ((lock->l_req_mode != lock->l_granted_mode) && - !ldlm_is_failed(lock)) { - /* Make sure that this lock will not be found by raced - * bl_ast and -EINVAL reply is sent to server anyways. - * bug 17645 - */ - lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | - LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING; - need_cancel = 1; - } - unlock_res_and_lock(lock); - - if (need_cancel) - LDLM_DEBUG(lock, - "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING"); - else - LDLM_DEBUG(lock, "lock was granted or failed in race"); - - /* XXX - HACK because we shouldn't call ldlm_lock_destroy() - * from llite/file.c/ll_file_flock(). - */ - /* This code makes for the fact that we do not have blocking handler on - * a client for flock locks. As such this is the place where we must - * completely kill failed locks. (interrupted and those that - * were waiting to be granted when server evicted us. - */ - if (lock->l_resource->lr_type == LDLM_FLOCK) { - lock_res_and_lock(lock); - if (!ldlm_is_destroyed(lock)) { - ldlm_resource_unlink_lock(lock); - ldlm_lock_decref_internal_nolock(lock, mode); - ldlm_lock_destroy_nolock(lock); - } - unlock_res_and_lock(lock); - } else { - ldlm_lock_decref_internal(lock, mode); - } -} - -/** - * Finishing portion of client lock enqueue code. - * - * Called after receiving reply from server. - */ -int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, - enum ldlm_type type, __u8 with_policy, - enum ldlm_mode mode, - __u64 *flags, void *lvb, __u32 lvb_len, - const struct lustre_handle *lockh, int rc) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - int is_replay = *flags & LDLM_FL_REPLAY; - struct ldlm_lock *lock; - struct ldlm_reply *reply; - int cleanup_phase = 1; - - lock = ldlm_handle2lock(lockh); - /* ldlm_cli_enqueue is holding a reference on this lock. */ - if (!lock) { - LASSERT(type == LDLM_FLOCK); - return -ENOLCK; - } - - LASSERTF(ergo(lvb_len != 0, lvb_len == lock->l_lvb_len), - "lvb_len = %d, l_lvb_len = %d\n", lvb_len, lock->l_lvb_len); - - if (rc != ELDLM_OK) { - LASSERT(!is_replay); - LDLM_DEBUG(lock, "client-side enqueue END (%s)", - rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED"); - - if (rc != ELDLM_LOCK_ABORTED) - goto cleanup; - } - - /* Before we return, swab the reply */ - reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if (!reply) { - rc = -EPROTO; - goto cleanup; - } - - if (lvb_len > 0) { - int size = 0; - - size = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, - RCL_SERVER); - if (size < 0) { - LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", size); - rc = size; - goto cleanup; - } else if (unlikely(size > lvb_len)) { - LDLM_ERROR(lock, - "Replied LVB is larger than expectation, expected = %d, replied = %d", - lvb_len, size); - rc = -EINVAL; - goto cleanup; - } - lvb_len = size; - } - - if (rc == ELDLM_LOCK_ABORTED) { - if (lvb_len > 0 && lvb) - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lvb, lvb_len); - if (rc == 0) - rc = ELDLM_LOCK_ABORTED; - goto cleanup; - } - - /* lock enqueued on the server */ - cleanup_phase = 0; - - lock_res_and_lock(lock); - lock->l_remote_handle = reply->lock_handle; - - *flags = ldlm_flags_from_wire(reply->lock_flags); - lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags & - LDLM_FL_INHERIT_MASK); - unlock_res_and_lock(lock); - - CDEBUG(D_INFO, "local: %p, remote cookie: %#llx, flags: 0x%llx\n", - lock, reply->lock_handle.cookie, *flags); - - /* If enqueue returned a blocked lock but the completion handler has - * already run, then it fixed up the resource and we don't need to do it - * again. - */ - if ((*flags) & LDLM_FL_LOCK_CHANGED) { - int newmode = reply->lock_desc.l_req_mode; - - LASSERT(!is_replay); - if (newmode && newmode != lock->l_req_mode) { - LDLM_DEBUG(lock, "server returned different mode %s", - ldlm_lockname[newmode]); - lock->l_req_mode = newmode; - } - - if (!ldlm_res_eq(&reply->lock_desc.l_resource.lr_name, - &lock->l_resource->lr_name)) { - CDEBUG(D_INFO, - "remote intent success, locking " DLDLMRES " instead of " DLDLMRES "\n", - PLDLMRES(&reply->lock_desc.l_resource), - PLDLMRES(lock->l_resource)); - - rc = ldlm_lock_change_resource(ns, lock, - &reply->lock_desc.l_resource.lr_name); - if (rc || !lock->l_resource) { - rc = -ENOMEM; - goto cleanup; - } - LDLM_DEBUG(lock, "client-side enqueue, new resource"); - } - if (with_policy) - if (!(type == LDLM_IBITS && - !(exp_connect_flags(exp) & OBD_CONNECT_IBITS))) - /* We assume lock type cannot change on server*/ - ldlm_convert_policy_to_local(exp, - lock->l_resource->lr_type, - &reply->lock_desc.l_policy_data, - &lock->l_policy_data); - if (type != LDLM_PLAIN) - LDLM_DEBUG(lock, - "client-side enqueue, new policy data"); - } - - if ((*flags) & LDLM_FL_AST_SENT) { - lock_res_and_lock(lock); - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST; - unlock_res_and_lock(lock); - LDLM_DEBUG(lock, "enqueue reply includes blocking AST"); - } - - /* If the lock has already been granted by a completion AST, don't - * clobber the LVB with an older one. - */ - if (lvb_len > 0) { - /* We must lock or a racing completion might update lvb without - * letting us know and we'll clobber the correct value. - * Cannot unlock after the check either, as that still leaves - * a tiny window for completion to get in - */ - lock_res_and_lock(lock); - if (lock->l_req_mode != lock->l_granted_mode) - rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_SERVER, - lock->l_lvb_data, lvb_len); - unlock_res_and_lock(lock); - if (rc < 0) { - cleanup_phase = 1; - goto cleanup; - } - } - - if (!is_replay) { - rc = ldlm_lock_enqueue(ns, &lock, NULL, flags); - if (lock->l_completion_ast) { - int err = lock->l_completion_ast(lock, *flags, NULL); - - if (!rc) - rc = err; - if (rc) - cleanup_phase = 1; - } - } - - if (lvb_len > 0 && lvb) { - /* Copy the LVB here, and not earlier, because the completion - * AST (if any) can override what we got in the reply - */ - memcpy(lvb, lock->l_lvb_data, lvb_len); - } - - LDLM_DEBUG(lock, "client-side enqueue END"); -cleanup: - if (cleanup_phase == 1 && rc) - failed_lock_cleanup(ns, lock, mode); - /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */ - LDLM_LOCK_PUT(lock); - LDLM_LOCK_RELEASE(lock); - return rc; -} -EXPORT_SYMBOL(ldlm_cli_enqueue_fini); - -/** - * Estimate number of lock handles that would fit into request of given - * size. PAGE_SIZE-512 is to allow TCP/IP and LNET headers to fit into - * a single page on the send/receive side. XXX: 512 should be changed to - * more adequate value. - */ -static inline int ldlm_req_handles_avail(int req_size, int off) -{ - int avail; - - avail = min_t(int, LDLM_MAXREQSIZE, PAGE_SIZE - 512) - req_size; - if (likely(avail >= 0)) - avail /= (int)sizeof(struct lustre_handle); - else - avail = 0; - avail += LDLM_LOCKREQ_HANDLES - off; - - return avail; -} - -static inline int ldlm_capsule_handles_avail(struct req_capsule *pill, - enum req_location loc, - int off) -{ - u32 size = req_capsule_msg_size(pill, loc); - - return ldlm_req_handles_avail(size, off); -} - -static inline int ldlm_format_handles_avail(struct obd_import *imp, - const struct req_format *fmt, - enum req_location loc, int off) -{ - u32 size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc); - - return ldlm_req_handles_avail(size, off); -} - -/** - * Cancel LRU locks and pack them into the enqueue request. Pack there the given - * \a count locks in \a cancels. - * - * This is to be called by functions preparing their own requests that - * might contain lists of locks to cancel in addition to actual operation - * that needs to be performed. - */ -int ldlm_prep_elc_req(struct obd_export *exp, struct ptlrpc_request *req, - int version, int opc, int canceloff, - struct list_head *cancels, int count) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - struct req_capsule *pill = &req->rq_pill; - struct ldlm_request *dlm = NULL; - int flags, avail, to_free, pack = 0; - LIST_HEAD(head); - int rc; - - if (!cancels) - cancels = &head; - if (ns_connect_cancelset(ns)) { - /* Estimate the amount of available space in the request. */ - req_capsule_filled_sizes(pill, RCL_CLIENT); - avail = ldlm_capsule_handles_avail(pill, RCL_CLIENT, canceloff); - - flags = ns_connect_lru_resize(ns) ? - LDLM_LRU_FLAG_LRUR_NO_WAIT : LDLM_LRU_FLAG_AGED; - to_free = !ns_connect_lru_resize(ns) && - opc == LDLM_ENQUEUE ? 1 : 0; - - /* Cancel LRU locks here _only_ if the server supports - * EARLY_CANCEL. Otherwise we have to send extra CANCEL - * RPC, which will make us slower. - */ - if (avail > count) - count += ldlm_cancel_lru_local(ns, cancels, to_free, - avail - count, 0, flags); - if (avail > count) - pack = count; - else - pack = avail; - req_capsule_set_size(pill, &RMF_DLM_REQ, RCL_CLIENT, - ldlm_request_bufsize(pack, opc)); - } - - rc = ptlrpc_request_pack(req, version, opc); - if (rc) { - ldlm_lock_list_put(cancels, l_bl_ast, count); - return rc; - } - - if (ns_connect_cancelset(ns)) { - if (canceloff) { - dlm = req_capsule_client_get(pill, &RMF_DLM_REQ); - LASSERT(dlm); - /* Skip first lock handler in ldlm_request_pack(), - * this method will increment @lock_count according - * to the lock handle amount actually written to - * the buffer. - */ - dlm->lock_count = canceloff; - } - /* Pack into the request @pack lock handles. */ - ldlm_cli_cancel_list(cancels, pack, req, 0); - /* Prepare and send separate cancel RPC for others. */ - ldlm_cli_cancel_list(cancels, count - pack, NULL, 0); - } else { - ldlm_lock_list_put(cancels, l_bl_ast, count); - } - return 0; -} -EXPORT_SYMBOL(ldlm_prep_elc_req); - -int ldlm_prep_enqueue_req(struct obd_export *exp, struct ptlrpc_request *req, - struct list_head *cancels, int count) -{ - return ldlm_prep_elc_req(exp, req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE, - LDLM_ENQUEUE_CANCEL_OFF, cancels, count); -} -EXPORT_SYMBOL(ldlm_prep_enqueue_req); - -static struct ptlrpc_request *ldlm_enqueue_pack(struct obd_export *exp, - int lvb_len) -{ - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE); - if (!req) - return ERR_PTR(-ENOMEM); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len); - ptlrpc_request_set_replen(req); - return req; -} - -/** - * Client-side lock enqueue. - * - * If a request has some specific initialisation it is passed in \a reqp, - * otherwise it is created in ldlm_cli_enqueue. - * - * Supports sync and async requests, pass \a async flag accordingly. If a - * request was created in ldlm_cli_enqueue and it is the async request, - * pass it to the caller in \a reqp. - */ -int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, - struct ldlm_enqueue_info *einfo, - const struct ldlm_res_id *res_id, - union ldlm_policy_data const *policy, __u64 *flags, - void *lvb, __u32 lvb_len, enum lvb_type lvb_type, - struct lustre_handle *lockh, int async) -{ - struct ldlm_namespace *ns; - struct ldlm_lock *lock; - struct ldlm_request *body; - int is_replay = *flags & LDLM_FL_REPLAY; - int req_passed_in = 1; - int rc, err; - struct ptlrpc_request *req; - - ns = exp->exp_obd->obd_namespace; - - /* If we're replaying this lock, just check some invariants. - * If we're creating a new lock, get everything all setup nicely. - */ - if (is_replay) { - lock = ldlm_handle2lock_long(lockh, 0); - LASSERT(lock); - LDLM_DEBUG(lock, "client-side enqueue START"); - LASSERT(exp == lock->l_conn_export); - } else { - const struct ldlm_callback_suite cbs = { - .lcs_completion = einfo->ei_cb_cp, - .lcs_blocking = einfo->ei_cb_bl, - .lcs_glimpse = einfo->ei_cb_gl - }; - lock = ldlm_lock_create(ns, res_id, einfo->ei_type, - einfo->ei_mode, &cbs, einfo->ei_cbdata, - lvb_len, lvb_type); - if (IS_ERR(lock)) - return PTR_ERR(lock); - /* for the local lock, add the reference */ - ldlm_lock_addref_internal(lock, einfo->ei_mode); - ldlm_lock2handle(lock, lockh); - if (policy) - lock->l_policy_data = *policy; - - if (einfo->ei_type == LDLM_EXTENT) { - /* extent lock without policy is a bug */ - if (!policy) - LBUG(); - - lock->l_req_extent = policy->l_extent; - } - LDLM_DEBUG(lock, "client-side enqueue START, flags %llx", - *flags); - } - - lock->l_conn_export = exp; - lock->l_export = NULL; - lock->l_blocking_ast = einfo->ei_cb_bl; - lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); - lock->l_last_activity = ktime_get_real_seconds(); - - /* lock not sent to server yet */ - if (!reqp || !*reqp) { - req = ldlm_enqueue_pack(exp, lvb_len); - if (IS_ERR(req)) { - failed_lock_cleanup(ns, lock, einfo->ei_mode); - LDLM_LOCK_RELEASE(lock); - return PTR_ERR(req); - } - - req_passed_in = 0; - if (reqp) - *reqp = req; - } else { - int len; - - req = *reqp; - len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, - RCL_CLIENT); - LASSERTF(len >= sizeof(*body), "buflen[%d] = %d, not %d\n", - DLM_LOCKREQ_OFF, len, (int)sizeof(*body)); - } - - /* Dump lock data into the request buffer */ - body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = ldlm_flags_to_wire(*flags); - body->lock_handle[0] = *lockh; - - if (async) { - LASSERT(reqp); - return 0; - } - - LDLM_DEBUG(lock, "sending request"); - - rc = ptlrpc_queue_wait(req); - - err = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, policy ? 1 : 0, - einfo->ei_mode, flags, lvb, lvb_len, - lockh, rc); - - /* If ldlm_cli_enqueue_fini did not find the lock, we need to free - * one reference that we took - */ - if (err == -ENOLCK) - LDLM_LOCK_RELEASE(lock); - else - rc = err; - - if (!req_passed_in && req) { - ptlrpc_req_finished(req); - if (reqp) - *reqp = NULL; - } - - return rc; -} -EXPORT_SYMBOL(ldlm_cli_enqueue); - -/** - * Cancel locks locally. - * Returns: - * \retval LDLM_FL_LOCAL_ONLY if there is no need for a CANCEL RPC to the server - * \retval LDLM_FL_CANCELING otherwise; - * \retval LDLM_FL_BL_AST if there is a need for a separate CANCEL RPC. - */ -static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock) -{ - __u64 rc = LDLM_FL_LOCAL_ONLY; - - if (lock->l_conn_export) { - bool local_only; - - LDLM_DEBUG(lock, "client-side cancel"); - /* Set this flag to prevent others from getting new references*/ - lock_res_and_lock(lock); - ldlm_set_cbpending(lock); - local_only = !!(lock->l_flags & - (LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL_ON_BLOCK)); - ldlm_cancel_callback(lock); - rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING; - unlock_res_and_lock(lock); - - if (local_only) { - CDEBUG(D_DLMTRACE, - "not sending request (at caller's instruction)\n"); - rc = LDLM_FL_LOCAL_ONLY; - } - ldlm_lock_cancel(lock); - } else { - LDLM_ERROR(lock, "Trying to cancel local lock"); - LBUG(); - } - - return rc; -} - -/** - * Pack \a count locks in \a head into ldlm_request buffer of request \a req. - */ -static void ldlm_cancel_pack(struct ptlrpc_request *req, - struct list_head *head, int count) -{ - struct ldlm_request *dlm; - struct ldlm_lock *lock; - int max, packed = 0; - - dlm = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - LASSERT(dlm); - - /* Check the room in the request buffer. */ - max = req_capsule_get_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT) - - sizeof(struct ldlm_request); - max /= sizeof(struct lustre_handle); - max += LDLM_LOCKREQ_HANDLES; - LASSERT(max >= dlm->lock_count + count); - - /* XXX: it would be better to pack lock handles grouped by resource. - * so that the server cancel would call filter_lvbo_update() less - * frequently. - */ - list_for_each_entry(lock, head, l_bl_ast) { - if (!count--) - break; - LASSERT(lock->l_conn_export); - /* Pack the lock handle to the given request buffer. */ - LDLM_DEBUG(lock, "packing"); - dlm->lock_handle[dlm->lock_count++] = lock->l_remote_handle; - packed++; - } - CDEBUG(D_DLMTRACE, "%d locks packed\n", packed); -} - -/** - * Prepare and send a batched cancel RPC. It will include \a count lock - * handles of locks given in \a cancels list. - */ -static int ldlm_cli_cancel_req(struct obd_export *exp, - struct list_head *cancels, - int count, enum ldlm_cancel_flags flags) -{ - struct ptlrpc_request *req = NULL; - struct obd_import *imp; - int free, sent = 0; - int rc = 0; - - LASSERT(exp); - LASSERT(count > 0); - - CFS_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL, cfs_fail_val); - - if (CFS_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_RACE)) - return count; - - free = ldlm_format_handles_avail(class_exp2cliimp(exp), - &RQF_LDLM_CANCEL, RCL_CLIENT, 0); - if (count > free) - count = free; - - while (1) { - imp = class_exp2cliimp(exp); - if (!imp || imp->imp_invalid) { - CDEBUG(D_DLMTRACE, - "skipping cancel on invalid import %p\n", imp); - return count; - } - - req = ptlrpc_request_alloc(imp, &RQF_LDLM_CANCEL); - if (!req) { - rc = -ENOMEM; - goto out; - } - - req_capsule_filled_sizes(&req->rq_pill, RCL_CLIENT); - req_capsule_set_size(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT, - ldlm_request_bufsize(count, LDLM_CANCEL)); - - rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CANCEL); - if (rc) { - ptlrpc_request_free(req); - goto out; - } - - req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL; - req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; - ptlrpc_at_set_req_timeout(req); - - ldlm_cancel_pack(req, cancels, count); - - ptlrpc_request_set_replen(req); - if (flags & LCF_ASYNC) { - ptlrpcd_add_req(req); - sent = count; - goto out; - } - - rc = ptlrpc_queue_wait(req); - if (rc == LUSTRE_ESTALE) { - CDEBUG(D_DLMTRACE, - "client/server (nid %s) out of sync -- not fatal\n", - libcfs_nid2str(req->rq_import-> - imp_connection->c_peer.nid)); - rc = 0; - } else if (rc == -ETIMEDOUT && /* check there was no reconnect*/ - req->rq_import_generation == imp->imp_generation) { - ptlrpc_req_finished(req); - continue; - } else if (rc != ELDLM_OK) { - /* -ESHUTDOWN is common on umount */ - CDEBUG_LIMIT(rc == -ESHUTDOWN ? D_DLMTRACE : D_ERROR, - "Got rc %d from cancel RPC: canceling anyway\n", - rc); - break; - } - sent = count; - break; - } - - ptlrpc_req_finished(req); -out: - return sent ? sent : rc; -} - -static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp) -{ - return &imp->imp_obd->obd_namespace->ns_pool; -} - -/** - * Update client's OBD pool related fields with new SLV and Limit from \a req. - */ -int ldlm_cli_update_pool(struct ptlrpc_request *req) -{ - struct obd_device *obd; - __u64 new_slv; - __u32 new_limit; - - if (unlikely(!req->rq_import || !req->rq_import->imp_obd || - !imp_connect_lru_resize(req->rq_import))) { - /* - * Do nothing for corner cases. - */ - return 0; - } - - /* In some cases RPC may contain SLV and limit zeroed out. This - * is the case when server does not support LRU resize feature. - * This is also possible in some recovery cases when server-side - * reqs have no reference to the OBD export and thus access to - * server-side namespace is not possible. - */ - if (lustre_msg_get_slv(req->rq_repmsg) == 0 || - lustre_msg_get_limit(req->rq_repmsg) == 0) { - DEBUG_REQ(D_HA, req, - "Zero SLV or Limit found (SLV: %llu, Limit: %u)", - lustre_msg_get_slv(req->rq_repmsg), - lustre_msg_get_limit(req->rq_repmsg)); - return 0; - } - - new_limit = lustre_msg_get_limit(req->rq_repmsg); - new_slv = lustre_msg_get_slv(req->rq_repmsg); - obd = req->rq_import->imp_obd; - - /* Set new SLV and limit in OBD fields to make them accessible - * to the pool thread. We do not access obd_namespace and pool - * directly here as there is no reliable way to make sure that - * they are still alive at cleanup time. Evil races are possible - * which may cause Oops at that time. - */ - write_lock(&obd->obd_pool_lock); - obd->obd_pool_slv = new_slv; - obd->obd_pool_limit = new_limit; - write_unlock(&obd->obd_pool_lock); - - return 0; -} - -/** - * Client side lock cancel. - * - * Lock must not have any readers or writers by this time. - */ -int ldlm_cli_cancel(const struct lustre_handle *lockh, - enum ldlm_cancel_flags cancel_flags) -{ - struct obd_export *exp; - int avail, flags, count = 1; - __u64 rc = 0; - struct ldlm_namespace *ns; - struct ldlm_lock *lock; - LIST_HEAD(cancels); - - lock = ldlm_handle2lock_long(lockh, 0); - if (!lock) { - LDLM_DEBUG_NOLOCK("lock is already being destroyed"); - return 0; - } - - lock_res_and_lock(lock); - /* Lock is being canceled and the caller doesn't want to wait */ - if (ldlm_is_canceling(lock) && (cancel_flags & LCF_ASYNC)) { - unlock_res_and_lock(lock); - LDLM_LOCK_RELEASE(lock); - return 0; - } - - ldlm_set_canceling(lock); - unlock_res_and_lock(lock); - - rc = ldlm_cli_cancel_local(lock); - if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) { - LDLM_LOCK_RELEASE(lock); - return 0; - } - /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL - * RPC which goes to canceld portal, so we can cancel other LRU locks - * here and send them all as one LDLM_CANCEL RPC. - */ - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, &cancels); - - exp = lock->l_conn_export; - if (exp_connect_cancelset(exp)) { - avail = ldlm_format_handles_avail(class_exp2cliimp(exp), - &RQF_LDLM_CANCEL, - RCL_CLIENT, 0); - LASSERT(avail > 0); - - ns = ldlm_lock_to_ns(lock); - flags = ns_connect_lru_resize(ns) ? - LDLM_LRU_FLAG_LRUR : LDLM_LRU_FLAG_AGED; - count += ldlm_cancel_lru_local(ns, &cancels, 0, avail - 1, - LCF_BL_AST, flags); - } - ldlm_cli_cancel_list(&cancels, count, NULL, cancel_flags); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel); - -/** - * Locally cancel up to \a count locks in list \a cancels. - * Return the number of cancelled locks. - */ -int ldlm_cli_cancel_list_local(struct list_head *cancels, int count, - enum ldlm_cancel_flags flags) -{ - LIST_HEAD(head); - struct ldlm_lock *lock, *next; - int left = 0, bl_ast = 0; - __u64 rc; - - left = count; - list_for_each_entry_safe(lock, next, cancels, l_bl_ast) { - if (left-- == 0) - break; - - if (flags & LCF_LOCAL) { - rc = LDLM_FL_LOCAL_ONLY; - ldlm_lock_cancel(lock); - } else { - rc = ldlm_cli_cancel_local(lock); - } - /* Until we have compound requests and can send LDLM_CANCEL - * requests batched with generic RPCs, we need to send cancels - * with the LDLM_FL_BL_AST flag in a separate RPC from - * the one being generated now. - */ - if (!(flags & LCF_BL_AST) && (rc == LDLM_FL_BL_AST)) { - LDLM_DEBUG(lock, "Cancel lock separately"); - list_del_init(&lock->l_bl_ast); - list_add(&lock->l_bl_ast, &head); - bl_ast++; - continue; - } - if (rc == LDLM_FL_LOCAL_ONLY) { - /* CANCEL RPC should not be sent to server. */ - list_del_init(&lock->l_bl_ast); - LDLM_LOCK_RELEASE(lock); - count--; - } - } - if (bl_ast > 0) { - count -= bl_ast; - ldlm_cli_cancel_list(&head, bl_ast, NULL, 0); - } - - return count; -} - -/** - * Cancel as many locks as possible w/o sending any RPCs (e.g. to write back - * dirty data, to close a file, ...) or waiting for any RPCs in-flight (e.g. - * readahead requests, ...) - */ -static enum ldlm_policy_res -ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, - int unused, int added, int count) -{ - enum ldlm_policy_res result = LDLM_POLICY_CANCEL_LOCK; - - /* don't check added & count since we want to process all locks - * from unused list. - * It's fine to not take lock to access lock->l_resource since - * the lock has already been granted so it won't change. - */ - switch (lock->l_resource->lr_type) { - case LDLM_EXTENT: - case LDLM_IBITS: - if (ns->ns_cancel && ns->ns_cancel(lock) != 0) - break; - /* fall through */ - default: - result = LDLM_POLICY_SKIP_LOCK; - lock_res_and_lock(lock); - ldlm_set_skipped(lock); - unlock_res_and_lock(lock); - break; - } - - return result; -} - -/** - * Callback function for LRU-resize policy. Decides whether to keep - * \a lock in LRU for current \a LRU size \a unused, added in current - * scan \a added and number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - unsigned long cur = jiffies; - struct ldlm_pool *pl = &ns->ns_pool; - __u64 slv, lvf, lv; - unsigned long la; - - /* Stop LRU processing when we reach past @count or have checked all - * locks in LRU. - */ - if (count && added >= count) - return LDLM_POLICY_KEEP_LOCK; - - /* - * Despite of the LV, It doesn't make sense to keep the lock which - * is unused for ns_max_age time. - */ - if (time_after(jiffies, lock->l_last_used + ns->ns_max_age)) - return LDLM_POLICY_CANCEL_LOCK; - - slv = ldlm_pool_get_slv(pl); - lvf = ldlm_pool_get_lvf(pl); - la = (cur - lock->l_last_used) / HZ; - lv = lvf * la * unused; - - /* Inform pool about current CLV to see it via debugfs. */ - ldlm_pool_set_clv(pl, lv); - - /* Stop when SLV is not yet come from server or lv is smaller than - * it is. - */ - if (slv == 0 || lv < slv) - return LDLM_POLICY_KEEP_LOCK; - - return LDLM_POLICY_CANCEL_LOCK; -} - -/** - * Callback function for debugfs used policy. Makes decision whether to keep - * \a lock in LRU for current \a LRU size \a unused, added in current scan \a - * added and number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_passed_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - /* Stop LRU processing when we reach past @count or have checked all - * locks in LRU. - */ - return (added >= count) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; -} - -/** - * Callback function for aged policy. Makes decision whether to keep \a lock in - * LRU for current LRU size \a unused, added in current scan \a added and - * number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - if ((added >= count) && - time_before(jiffies, lock->l_last_used + ns->ns_max_age)) - return LDLM_POLICY_KEEP_LOCK; - - return LDLM_POLICY_CANCEL_LOCK; -} - -static enum ldlm_policy_res -ldlm_cancel_lrur_no_wait_policy(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - int unused, int added, - int count) -{ - enum ldlm_policy_res result; - - result = ldlm_cancel_lrur_policy(ns, lock, unused, added, count); - if (result == LDLM_POLICY_KEEP_LOCK) - return result; - - return ldlm_cancel_no_wait_policy(ns, lock, unused, added, count); -} - -/** - * Callback function for default policy. Makes decision whether to keep \a lock - * in LRU for current LRU size \a unused, added in current scan \a added and - * number of locks to be preferably canceled \a count. - * - * \retval LDLM_POLICY_KEEP_LOCK keep lock in LRU in stop scanning - * - * \retval LDLM_POLICY_CANCEL_LOCK cancel lock from LRU - */ -static enum ldlm_policy_res -ldlm_cancel_default_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock, - int unused, int added, int count) -{ - /* Stop LRU processing when we reach past count or have checked all - * locks in LRU. - */ - return (added >= count) ? - LDLM_POLICY_KEEP_LOCK : LDLM_POLICY_CANCEL_LOCK; -} - -typedef enum ldlm_policy_res (*ldlm_cancel_lru_policy_t)( - struct ldlm_namespace *, - struct ldlm_lock *, int, - int, int); - -static ldlm_cancel_lru_policy_t -ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags) -{ - if (flags & LDLM_LRU_FLAG_NO_WAIT) - return ldlm_cancel_no_wait_policy; - - if (ns_connect_lru_resize(ns)) { - if (flags & LDLM_LRU_FLAG_SHRINK) - /* We kill passed number of old locks. */ - return ldlm_cancel_passed_policy; - else if (flags & LDLM_LRU_FLAG_LRUR) - return ldlm_cancel_lrur_policy; - else if (flags & LDLM_LRU_FLAG_PASSED) - return ldlm_cancel_passed_policy; - else if (flags & LDLM_LRU_FLAG_LRUR_NO_WAIT) - return ldlm_cancel_lrur_no_wait_policy; - } else { - if (flags & LDLM_LRU_FLAG_AGED) - return ldlm_cancel_aged_policy; - } - - return ldlm_cancel_default_policy; -} - -/** - * - Free space in LRU for \a count new locks, - * redundant unused locks are canceled locally; - * - also cancel locally unused aged locks; - * - do not cancel more than \a max locks; - * - GET the found locks and add them into the \a cancels list. - * - * A client lock can be added to the l_bl_ast list only when it is - * marked LDLM_FL_CANCELING. Otherwise, somebody is already doing - * CANCEL. There are the following use cases: - * ldlm_cancel_resource_local(), ldlm_cancel_lru_local() and - * ldlm_cli_cancel(), which check and set this flag properly. As any - * attempt to cancel a lock rely on this flag, l_bl_ast list is accessed - * later without any special locking. - * - * Calling policies for enabled LRU resize: - * ---------------------------------------- - * flags & LDLM_LRU_FLAG_LRUR - use LRU resize policy (SLV from server) to - * cancel not more than \a count locks; - * - * flags & LDLM_LRU_FLAG_PASSED - cancel \a count number of old locks (located - * at the beginning of LRU list); - * - * flags & LDLM_LRU_FLAG_SHRINK - cancel not more than \a count locks according - * to memory pressure policy function; - * - * flags & LDLM_LRU_FLAG_AGED - cancel \a count locks according to - * "aged policy". - * - * flags & LDLM_LRU_FLAG_NO_WAIT - cancel as many unused locks as possible - * (typically before replaying locks) w/o - * sending any RPCs or waiting for any - * outstanding RPC to complete. - */ -static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - int flags) -{ - ldlm_cancel_lru_policy_t pf; - struct ldlm_lock *lock, *next; - int added = 0, unused, remained; - int no_wait = flags & - (LDLM_LRU_FLAG_NO_WAIT | LDLM_LRU_FLAG_LRUR_NO_WAIT); - - spin_lock(&ns->ns_lock); - unused = ns->ns_nr_unused; - remained = unused; - - if (!ns_connect_lru_resize(ns)) - count += unused - ns->ns_max_unused; - - pf = ldlm_cancel_lru_policy(ns, flags); - LASSERT(pf); - - while (!list_empty(&ns->ns_unused_list)) { - enum ldlm_policy_res result; - time_t last_use = 0; - - /* all unused locks */ - if (remained-- <= 0) - break; - - /* For any flags, stop scanning if @max is reached. */ - if (max && added >= max) - break; - - list_for_each_entry_safe(lock, next, &ns->ns_unused_list, - l_lru) { - /* No locks which got blocking requests. */ - LASSERT(!ldlm_is_bl_ast(lock)); - - if (no_wait && ldlm_is_skipped(lock)) - /* already processed */ - continue; - - last_use = lock->l_last_used; - if (last_use == jiffies) - continue; - - /* Somebody is already doing CANCEL. No need for this - * lock in LRU, do not traverse it again. - */ - if (!ldlm_is_canceling(lock)) - break; - - ldlm_lock_remove_from_lru_nolock(lock); - } - if (&lock->l_lru == &ns->ns_unused_list) - break; - - LDLM_LOCK_GET(lock); - spin_unlock(&ns->ns_lock); - lu_ref_add(&lock->l_reference, __func__, current); - - /* Pass the lock through the policy filter and see if it - * should stay in LRU. - * - * Even for shrinker policy we stop scanning if - * we find a lock that should stay in the cache. - * We should take into account lock age anyway - * as a new lock is a valuable resource even if - * it has a low weight. - * - * That is, for shrinker policy we drop only - * old locks, but additionally choose them by - * their weight. Big extent locks will stay in - * the cache. - */ - result = pf(ns, lock, unused, added, count); - if (result == LDLM_POLICY_KEEP_LOCK) { - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - break; - } - if (result == LDLM_POLICY_SKIP_LOCK) { - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - continue; - } - - lock_res_and_lock(lock); - /* Check flags again under the lock. */ - if (ldlm_is_canceling(lock) || - (ldlm_lock_remove_from_lru_check(lock, last_use) == 0)) { - /* Another thread is removing lock from LRU, or - * somebody is already doing CANCEL, or there - * is a blocking request which will send cancel - * by itself, or the lock is no longer unused or - * the lock has been used since the pf() call and - * pages could be put under it. - */ - unlock_res_and_lock(lock); - lu_ref_del(&lock->l_reference, - __func__, current); - LDLM_LOCK_RELEASE(lock); - spin_lock(&ns->ns_lock); - continue; - } - LASSERT(!lock->l_readers && !lock->l_writers); - - /* If we have chosen to cancel this lock voluntarily, we - * better send cancel notification to server, so that it - * frees appropriate state. This might lead to a race - * where while we are doing cancel here, server is also - * silently cancelling this lock. - */ - ldlm_clear_cancel_on_block(lock); - - /* Setting the CBPENDING flag is a little misleading, - * but prevents an important race; namely, once - * CBPENDING is set, the lock can accumulate no more - * readers/writers. Since readers and writers are - * already zero here, ldlm_lock_decref() won't see - * this flag and call l_blocking_ast - */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING; - - /* We can't re-add to l_lru as it confuses the - * refcounting in ldlm_lock_remove_from_lru() if an AST - * arrives after we drop lr_lock below. We use l_bl_ast - * and can't use l_pending_chain as it is used both on - * server and client nevertheless bug 5666 says it is - * used only on server - */ - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); - unlock_res_and_lock(lock); - lu_ref_del(&lock->l_reference, __func__, current); - spin_lock(&ns->ns_lock); - added++; - unused--; - } - spin_unlock(&ns->ns_lock); - return added; -} - -int ldlm_cancel_lru_local(struct ldlm_namespace *ns, - struct list_head *cancels, int count, int max, - enum ldlm_cancel_flags cancel_flags, int flags) -{ - int added; - - added = ldlm_prepare_lru_list(ns, cancels, count, max, flags); - if (added <= 0) - return added; - return ldlm_cli_cancel_list_local(cancels, added, cancel_flags); -} - -/** - * Cancel at least \a nr locks from given namespace LRU. - * - * When called with LCF_ASYNC the blocking callback will be handled - * in a thread and this function will return after the thread has been - * asked to call the callback. When called with LCF_ASYNC the blocking - * callback will be performed in this function. - */ -int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, - enum ldlm_cancel_flags cancel_flags, - int flags) -{ - LIST_HEAD(cancels); - int count, rc; - - /* Just prepare the list of locks, do not actually cancel them yet. - * Locks are cancelled later in a separate thread. - */ - count = ldlm_prepare_lru_list(ns, &cancels, nr, 0, flags); - rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count, cancel_flags); - if (rc == 0) - return count; - - return 0; -} - -/** - * Find and cancel locally unused locks found on resource, matched to the - * given policy, mode. GET the found locks and add them into the \a cancels - * list. - */ -int ldlm_cancel_resource_local(struct ldlm_resource *res, - struct list_head *cancels, - union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 lock_flags, - enum ldlm_cancel_flags cancel_flags, - void *opaque) -{ - struct ldlm_lock *lock; - int count = 0; - - lock_res(res); - list_for_each_entry(lock, &res->lr_granted, l_res_link) { - if (opaque && lock->l_ast_data != opaque) { - LDLM_ERROR(lock, "data %p doesn't match opaque %p", - lock->l_ast_data, opaque); - continue; - } - - if (lock->l_readers || lock->l_writers) - continue; - - /* If somebody is already doing CANCEL, or blocking AST came, - * skip this lock. - */ - if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock)) - continue; - - if (lockmode_compat(lock->l_granted_mode, mode)) - continue; - - /* If policy is given and this is IBITS lock, add to list only - * those locks that match by policy. - */ - if (policy && (lock->l_resource->lr_type == LDLM_IBITS) && - !(lock->l_policy_data.l_inodebits.bits & - policy->l_inodebits.bits)) - continue; - - /* See CBPENDING comment in ldlm_cancel_lru */ - lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING | - lock_flags; - - LASSERT(list_empty(&lock->l_bl_ast)); - list_add(&lock->l_bl_ast, cancels); - LDLM_LOCK_GET(lock); - count++; - } - unlock_res(res); - - return ldlm_cli_cancel_list_local(cancels, count, cancel_flags); -} -EXPORT_SYMBOL(ldlm_cancel_resource_local); - -/** - * Cancel client-side locks from a list and send/prepare cancel RPCs to the - * server. - * If \a req is NULL, send CANCEL request to server with handles of locks - * in the \a cancels. If EARLY_CANCEL is not supported, send CANCEL requests - * separately per lock. - * If \a req is not NULL, put handles of locks in \a cancels into the request - * buffer at the offset \a off. - * Destroy \a cancels at the end. - */ -int ldlm_cli_cancel_list(struct list_head *cancels, int count, - struct ptlrpc_request *req, - enum ldlm_cancel_flags flags) -{ - struct ldlm_lock *lock; - int res = 0; - - if (list_empty(cancels) || count == 0) - return 0; - - /* XXX: requests (both batched and not) could be sent in parallel. - * Usually it is enough to have just 1 RPC, but it is possible that - * there are too many locks to be cancelled in LRU or on a resource. - * It would also speed up the case when the server does not support - * the feature. - */ - while (count > 0) { - LASSERT(!list_empty(cancels)); - lock = list_first_entry(cancels, struct ldlm_lock, l_bl_ast); - LASSERT(lock->l_conn_export); - - if (exp_connect_cancelset(lock->l_conn_export)) { - res = count; - if (req) - ldlm_cancel_pack(req, cancels, count); - else - res = ldlm_cli_cancel_req(lock->l_conn_export, - cancels, count, - flags); - } else { - res = ldlm_cli_cancel_req(lock->l_conn_export, - cancels, 1, flags); - } - - if (res < 0) { - CDEBUG_LIMIT(res == -ESHUTDOWN ? D_DLMTRACE : D_ERROR, - "%s: %d\n", __func__, res); - res = count; - } - - count -= res; - ldlm_lock_list_put(cancels, l_bl_ast, res); - } - LASSERT(count == 0); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel_list); - -/** - * Cancel all locks on a resource that have 0 readers/writers. - * - * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying - * to notify the server. - */ -int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque) -{ - struct ldlm_resource *res; - LIST_HEAD(cancels); - int count; - int rc; - - res = ldlm_resource_get(ns, NULL, res_id, 0, 0); - if (IS_ERR(res)) { - /* This is not a problem. */ - CDEBUG(D_INFO, "No resource %llu\n", res_id->name[0]); - return 0; - } - - LDLM_RESOURCE_ADDREF(res); - count = ldlm_cancel_resource_local(res, &cancels, policy, mode, - 0, flags | LCF_BL_AST, opaque); - rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags); - if (rc != ELDLM_OK) - CERROR("canceling unused lock " DLDLMRES ": rc = %d\n", - PLDLMRES(res), rc); - - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return 0; -} -EXPORT_SYMBOL(ldlm_cli_cancel_unused_resource); - -struct ldlm_cli_cancel_arg { - int lc_flags; - void *lc_opaque; -}; - -static int ldlm_cli_hash_cancel_unused(struct cfs_hash *hs, - struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - struct ldlm_cli_cancel_arg *lc = arg; - - ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name, - NULL, LCK_MINMODE, - lc->lc_flags, lc->lc_opaque); - /* must return 0 for hash iteration */ - return 0; -} - -/** - * Cancel all locks on a namespace (or a specific resource, if given) - * that have 0 readers/writers. - * - * If flags & LCF_LOCAL, throw the locks away without trying - * to notify the server. - */ -int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - enum ldlm_cancel_flags flags, void *opaque) -{ - struct ldlm_cli_cancel_arg arg = { - .lc_flags = flags, - .lc_opaque = opaque, - }; - - if (!ns) - return ELDLM_OK; - - if (res_id) { - return ldlm_cli_cancel_unused_resource(ns, res_id, NULL, - LCK_MINMODE, flags, - opaque); - } else { - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_cli_hash_cancel_unused, &arg, 0); - return ELDLM_OK; - } -} -EXPORT_SYMBOL(ldlm_cli_cancel_unused); - -/* Lock iterators. */ - -static int ldlm_resource_foreach(struct ldlm_resource *res, - ldlm_iterator_t iter, void *closure) -{ - struct ldlm_lock *tmp; - struct ldlm_lock *lock; - int rc = LDLM_ITER_CONTINUE; - - if (!res) - return LDLM_ITER_CONTINUE; - - lock_res(res); - list_for_each_entry_safe(lock, tmp, &res->lr_granted, l_res_link) { - if (iter(lock, closure) == LDLM_ITER_STOP) { - rc = LDLM_ITER_STOP; - goto out; - } - } - - list_for_each_entry_safe(lock, tmp, &res->lr_waiting, l_res_link) { - if (iter(lock, closure) == LDLM_ITER_STOP) { - rc = LDLM_ITER_STOP; - goto out; - } - } - out: - unlock_res(res); - return rc; -} - -struct iter_helper_data { - ldlm_iterator_t iter; - void *closure; -}; - -static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure) -{ - struct iter_helper_data *helper = closure; - - return helper->iter(lock, helper->closure); -} - -static int ldlm_res_iter_helper(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) - -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - - return ldlm_resource_foreach(res, ldlm_iter_helper, arg) == - LDLM_ITER_STOP; -} - -static void ldlm_namespace_foreach(struct ldlm_namespace *ns, - ldlm_iterator_t iter, void *closure) - -{ - struct iter_helper_data helper = { - .iter = iter, - .closure = closure, - }; - - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_res_iter_helper, &helper, 0); -} - -/* non-blocking function to manipulate a lock whose cb_data is being put away. - * return 0: find no resource - * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE. - * < 0: errors - */ -int ldlm_resource_iterate(struct ldlm_namespace *ns, - const struct ldlm_res_id *res_id, - ldlm_iterator_t iter, void *data) -{ - struct ldlm_resource *res; - int rc; - - LASSERTF(ns, "must pass in namespace\n"); - - res = ldlm_resource_get(ns, NULL, res_id, 0, 0); - if (IS_ERR(res)) - return 0; - - LDLM_RESOURCE_ADDREF(res); - rc = ldlm_resource_foreach(res, iter, data); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return rc; -} -EXPORT_SYMBOL(ldlm_resource_iterate); - -/* Lock replay */ - -static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) -{ - struct list_head *list = closure; - - /* we use l_pending_chain here, because it's unused on clients. */ - LASSERTF(list_empty(&lock->l_pending_chain), - "lock %p next %p prev %p\n", - lock, &lock->l_pending_chain.next, - &lock->l_pending_chain.prev); - /* bug 9573: don't replay locks left after eviction, or - * bug 17614: locks being actively cancelled. Get a reference - * on a lock so that it does not disappear under us (e.g. due to cancel) - */ - if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_BL_DONE))) { - list_add(&lock->l_pending_chain, list); - LDLM_LOCK_GET(lock); - } - - return LDLM_ITER_CONTINUE; -} - -static int replay_lock_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct ldlm_async_args *aa, int rc) -{ - struct ldlm_lock *lock; - struct ldlm_reply *reply; - struct obd_export *exp; - - atomic_dec(&req->rq_import->imp_replay_inflight); - if (rc != ELDLM_OK) - goto out; - - reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - if (!reply) { - rc = -EPROTO; - goto out; - } - - lock = ldlm_handle2lock(&aa->lock_handle); - if (!lock) { - CERROR("received replay ack for unknown local cookie %#llx remote cookie %#llx from server %s id %s\n", - aa->lock_handle.cookie, reply->lock_handle.cookie, - req->rq_export->exp_client_uuid.uuid, - libcfs_id2str(req->rq_peer)); - rc = -ESTALE; - goto out; - } - - /* Key change rehash lock in per-export hash with new key */ - exp = req->rq_export; - lock->l_remote_handle = reply->lock_handle; - - LDLM_DEBUG(lock, "replayed lock:"); - ptlrpc_import_recovery_state_machine(req->rq_import); - LDLM_LOCK_PUT(lock); -out: - if (rc != ELDLM_OK) - ptlrpc_connect_import(req->rq_import); - - return rc; -} - -static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) -{ - struct ptlrpc_request *req; - struct ldlm_async_args *aa; - struct ldlm_request *body; - int flags; - - /* Bug 11974: Do not replay a lock which is actively being canceled */ - if (ldlm_is_bl_done(lock)) { - LDLM_DEBUG(lock, "Not replaying canceled lock:"); - return 0; - } - - /* If this is reply-less callback lock, we cannot replay it, since - * server might have long dropped it, but notification of that event was - * lost by network. (and server granted conflicting lock already) - */ - if (ldlm_is_cancel_on_block(lock)) { - LDLM_DEBUG(lock, "Not replaying reply-less lock:"); - ldlm_lock_cancel(lock); - return 0; - } - - /* - * If granted mode matches the requested mode, this lock is granted. - * - * If they differ, but we have a granted mode, then we were granted - * one mode and now want another: ergo, converting. - * - * If we haven't been granted anything and are on a resource list, - * then we're blocked/waiting. - * - * If we haven't been granted anything and we're NOT on a resource list, - * then we haven't got a reply yet and don't have a known disposition. - * This happens whenever a lock enqueue is the request that triggers - * recovery. - */ - if (lock->l_granted_mode == lock->l_req_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED; - else if (lock->l_granted_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV; - else if (!list_empty(&lock->l_res_link)) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT; - else - flags = LDLM_FL_REPLAY; - - req = ptlrpc_request_alloc_pack(imp, &RQF_LDLM_ENQUEUE, - LUSTRE_DLM_VERSION, LDLM_ENQUEUE); - if (!req) - return -ENOMEM; - - /* We're part of recovery, so don't wait for it. */ - req->rq_send_state = LUSTRE_IMP_REPLAY_LOCKS; - - body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = ldlm_flags_to_wire(flags); - - ldlm_lock2handle(lock, &body->lock_handle[0]); - if (lock->l_lvb_len > 0) - req_capsule_extend(&req->rq_pill, &RQF_LDLM_ENQUEUE_LVB); - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - lock->l_lvb_len); - ptlrpc_request_set_replen(req); - /* notify the server we've replayed all requests. - * also, we mark the request to be put on a dedicated - * queue to be processed after all request replayes. - * bug 6063 - */ - lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE); - - LDLM_DEBUG(lock, "replaying lock:"); - - atomic_inc(&req->rq_import->imp_replay_inflight); - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->lock_handle = body->lock_handle[0]; - req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret; - ptlrpcd_add_req(req); - - return 0; -} - -/** - * Cancel as many unused locks as possible before replay. since we are - * in recovery, we can't wait for any outstanding RPCs to send any RPC - * to the server. - * - * Called only in recovery before replaying locks. there is no need to - * replay locks that are unused. since the clients may hold thousands of - * cached unused locks, dropping the unused locks can greatly reduce the - * load on the servers at recovery time. - */ -static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns) -{ - int canceled; - LIST_HEAD(cancels); - - CDEBUG(D_DLMTRACE, - "Dropping as many unused locks as possible before replay for namespace %s (%d)\n", - ldlm_ns_name(ns), ns->ns_nr_unused); - - /* We don't need to care whether or not LRU resize is enabled - * because the LDLM_LRU_FLAG_NO_WAIT policy doesn't use the - * count parameter - */ - canceled = ldlm_cancel_lru_local(ns, &cancels, ns->ns_nr_unused, 0, - LCF_LOCAL, LDLM_LRU_FLAG_NO_WAIT); - - CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n", - canceled, ldlm_ns_name(ns)); -} - -int ldlm_replay_locks(struct obd_import *imp) -{ - struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; - LIST_HEAD(list); - struct ldlm_lock *lock, *next; - int rc = 0; - - LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); - - /* don't replay locks if import failed recovery */ - if (imp->imp_vbr_failed) - return 0; - - /* ensure this doesn't fall to 0 before all have been queued */ - atomic_inc(&imp->imp_replay_inflight); - - if (ldlm_cancel_unused_locks_before_replay) - ldlm_cancel_unused_locks_for_replay(ns); - - ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list); - - list_for_each_entry_safe(lock, next, &list, l_pending_chain) { - list_del_init(&lock->l_pending_chain); - if (rc) { - LDLM_LOCK_RELEASE(lock); - continue; /* or try to do the rest? */ - } - rc = replay_one_lock(imp, lock); - LDLM_LOCK_RELEASE(lock); - } - - atomic_dec(&imp->imp_replay_inflight); - - return rc; -} diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c deleted file mode 100644 index c93b019b8e37..000000000000 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c +++ /dev/null @@ -1,1318 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ldlm/ldlm_resource.c - * - * Author: Phil Schwan - * Author: Peter Braam - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#include -#include -#include -#include "ldlm_internal.h" -#include - -struct kmem_cache *ldlm_resource_slab, *ldlm_lock_slab; - -int ldlm_srv_namespace_nr; -int ldlm_cli_namespace_nr; - -struct mutex ldlm_srv_namespace_lock; -LIST_HEAD(ldlm_srv_namespace_list); - -struct mutex ldlm_cli_namespace_lock; -/* Client Namespaces that have active resources in them. - * Once all resources go away, ldlm_poold moves such namespaces to the - * inactive list - */ -LIST_HEAD(ldlm_cli_active_namespace_list); -/* Client namespaces that don't have any locks in them */ -static LIST_HEAD(ldlm_cli_inactive_namespace_list); - -static struct dentry *ldlm_debugfs_dir; -static struct dentry *ldlm_ns_debugfs_dir; -struct dentry *ldlm_svc_debugfs_dir; - -/* during debug dump certain amount of granted locks for one resource to avoid - * DDOS. - */ -static unsigned int ldlm_dump_granted_max = 256; - -static ssize_t -lprocfs_wr_dump_ns(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE); - ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE); - return count; -} - -LPROC_SEQ_FOPS_WR_ONLY(ldlm, dump_ns); - -static int ldlm_rw_uint_seq_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%u\n", *(unsigned int *)m->private); - return 0; -} - -static ssize_t -ldlm_rw_uint_seq_write(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - struct seq_file *seq = file->private_data; - - if (count == 0) - return 0; - return kstrtouint_from_user(buffer, count, 0, - (unsigned int *)seq->private); -} - -LPROC_SEQ_FOPS(ldlm_rw_uint); - -static struct lprocfs_vars ldlm_debugfs_list[] = { - { "dump_namespaces", &ldlm_dump_ns_fops, NULL, 0222 }, - { "dump_granted_max", &ldlm_rw_uint_fops, &ldlm_dump_granted_max }, - { NULL } -}; - -void ldlm_debugfs_setup(void) -{ - ldlm_debugfs_dir = debugfs_create_dir(OBD_LDLM_DEVICENAME, - debugfs_lustre_root); - - ldlm_ns_debugfs_dir = debugfs_create_dir("namespaces", - ldlm_debugfs_dir); - - ldlm_svc_debugfs_dir = debugfs_create_dir("services", ldlm_debugfs_dir); - - ldebugfs_add_vars(ldlm_debugfs_dir, ldlm_debugfs_list, NULL); -} - -void ldlm_debugfs_cleanup(void) -{ - debugfs_remove_recursive(ldlm_svc_debugfs_dir); - debugfs_remove_recursive(ldlm_ns_debugfs_dir); - debugfs_remove_recursive(ldlm_debugfs_dir); -} - -static ssize_t resource_count_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u64 res = 0; - struct cfs_hash_bd bd; - int i; - - /* result is not strictly consistent */ - cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, i) - res += cfs_hash_bd_count_get(&bd); - return sprintf(buf, "%lld\n", res); -} -LUSTRE_RO_ATTR(resource_count); - -static ssize_t lock_count_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u64 locks; - - locks = lprocfs_stats_collector(ns->ns_stats, LDLM_NSS_LOCKS, - LPROCFS_FIELDS_FLAGS_SUM); - return sprintf(buf, "%lld\n", locks); -} -LUSTRE_RO_ATTR(lock_count); - -static ssize_t lock_unused_count_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%d\n", ns->ns_nr_unused); -} -LUSTRE_RO_ATTR(lock_unused_count); - -static ssize_t lru_size_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - __u32 *nr = &ns->ns_max_unused; - - if (ns_connect_lru_resize(ns)) - nr = &ns->ns_nr_unused; - return sprintf(buf, "%u\n", *nr); -} - -static ssize_t lru_size_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long tmp; - int lru_resize; - int err; - - if (strncmp(buffer, "clear", 5) == 0) { - CDEBUG(D_DLMTRACE, - "dropping all unused locks from namespace %s\n", - ldlm_ns_name(ns)); - if (ns_connect_lru_resize(ns)) { - int canceled, unused = ns->ns_nr_unused; - - /* Try to cancel all @ns_nr_unused locks. */ - canceled = ldlm_cancel_lru(ns, unused, 0, - LDLM_LRU_FLAG_PASSED); - if (canceled < unused) { - CDEBUG(D_DLMTRACE, - "not all requested locks are canceled, requested: %d, canceled: %d\n", - unused, - canceled); - return -EINVAL; - } - } else { - tmp = ns->ns_max_unused; - ns->ns_max_unused = 0; - ldlm_cancel_lru(ns, 0, 0, LDLM_LRU_FLAG_PASSED); - ns->ns_max_unused = tmp; - } - return count; - } - - err = kstrtoul(buffer, 10, &tmp); - if (err != 0) { - CERROR("lru_size: invalid value written\n"); - return -EINVAL; - } - lru_resize = (tmp == 0); - - if (ns_connect_lru_resize(ns)) { - if (!lru_resize) - ns->ns_max_unused = (unsigned int)tmp; - - if (tmp > ns->ns_nr_unused) - tmp = ns->ns_nr_unused; - tmp = ns->ns_nr_unused - tmp; - - CDEBUG(D_DLMTRACE, - "changing namespace %s unused locks from %u to %u\n", - ldlm_ns_name(ns), ns->ns_nr_unused, - (unsigned int)tmp); - ldlm_cancel_lru(ns, tmp, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); - - if (!lru_resize) { - CDEBUG(D_DLMTRACE, - "disable lru_resize for namespace %s\n", - ldlm_ns_name(ns)); - ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE; - } - } else { - CDEBUG(D_DLMTRACE, - "changing namespace %s max_unused from %u to %u\n", - ldlm_ns_name(ns), ns->ns_max_unused, - (unsigned int)tmp); - ns->ns_max_unused = (unsigned int)tmp; - ldlm_cancel_lru(ns, 0, LCF_ASYNC, LDLM_LRU_FLAG_PASSED); - - /* Make sure that LRU resize was originally supported before - * turning it on here. - */ - if (lru_resize && - (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) { - CDEBUG(D_DLMTRACE, - "enable lru_resize for namespace %s\n", - ldlm_ns_name(ns)); - ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE; - } - } - - return count; -} -LUSTRE_RW_ATTR(lru_size); - -static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%u\n", ns->ns_max_age); -} - -static ssize_t lru_max_age_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long tmp; - int err; - - err = kstrtoul(buffer, 10, &tmp); - if (err != 0) - return -EINVAL; - - ns->ns_max_age = tmp; - - return count; -} -LUSTRE_RW_ATTR(lru_max_age); - -static ssize_t early_lock_cancel_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - - return sprintf(buf, "%d\n", ns_connect_cancelset(ns)); -} - -static ssize_t early_lock_cancel_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - unsigned long supp = -1; - int rc; - - rc = kstrtoul(buffer, 10, &supp); - if (rc < 0) - return rc; - - if (supp == 0) - ns->ns_connect_flags &= ~OBD_CONNECT_CANCELSET; - else if (ns->ns_orig_connect_flags & OBD_CONNECT_CANCELSET) - ns->ns_connect_flags |= OBD_CONNECT_CANCELSET; - return count; -} -LUSTRE_RW_ATTR(early_lock_cancel); - -/* These are for namespaces in /sys/fs/lustre/ldlm/namespaces/ */ -static struct attribute *ldlm_ns_attrs[] = { - &lustre_attr_resource_count.attr, - &lustre_attr_lock_count.attr, - &lustre_attr_lock_unused_count.attr, - &lustre_attr_lru_size.attr, - &lustre_attr_lru_max_age.attr, - &lustre_attr_early_lock_cancel.attr, - NULL, -}; - -static void ldlm_ns_release(struct kobject *kobj) -{ - struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, - ns_kobj); - complete(&ns->ns_kobj_unregister); -} - -static struct kobj_type ldlm_ns_ktype = { - .default_attrs = ldlm_ns_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = ldlm_ns_release, -}; - -static void ldlm_namespace_debugfs_unregister(struct ldlm_namespace *ns) -{ - debugfs_remove_recursive(ns->ns_debugfs_entry); - - if (ns->ns_stats) - lprocfs_free_stats(&ns->ns_stats); -} - -static void ldlm_namespace_sysfs_unregister(struct ldlm_namespace *ns) -{ - kobject_put(&ns->ns_kobj); - wait_for_completion(&ns->ns_kobj_unregister); -} - -static int ldlm_namespace_sysfs_register(struct ldlm_namespace *ns) -{ - int err; - - ns->ns_kobj.kset = ldlm_ns_kset; - init_completion(&ns->ns_kobj_unregister); - err = kobject_init_and_add(&ns->ns_kobj, &ldlm_ns_ktype, NULL, - "%s", ldlm_ns_name(ns)); - - ns->ns_stats = lprocfs_alloc_stats(LDLM_NSS_LAST, 0); - if (!ns->ns_stats) { - kobject_put(&ns->ns_kobj); - return -ENOMEM; - } - - lprocfs_counter_init(ns->ns_stats, LDLM_NSS_LOCKS, - LPROCFS_CNTR_AVGMINMAX, "locks", "locks"); - - return err; -} - -static int ldlm_namespace_debugfs_register(struct ldlm_namespace *ns) -{ - struct dentry *ns_entry; - - if (!IS_ERR_OR_NULL(ns->ns_debugfs_entry)) { - ns_entry = ns->ns_debugfs_entry; - } else { - ns_entry = debugfs_create_dir(ldlm_ns_name(ns), - ldlm_ns_debugfs_dir); - if (!ns_entry) - return -ENOMEM; - ns->ns_debugfs_entry = ns_entry; - } - - return 0; -} - -#undef MAX_STRING_SIZE - -static struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res) -{ - LASSERT(res); - LASSERT(res != LP_POISON); - atomic_inc(&res->lr_refcount); - CDEBUG(D_INFO, "getref res: %p count: %d\n", res, - atomic_read(&res->lr_refcount)); - return res; -} - -static unsigned int ldlm_res_hop_hash(struct cfs_hash *hs, - const void *key, unsigned int mask) -{ - const struct ldlm_res_id *id = key; - unsigned int val = 0; - unsigned int i; - - for (i = 0; i < RES_NAME_SIZE; i++) - val += id->name[i]; - return val & mask; -} - -static unsigned int ldlm_res_hop_fid_hash(struct cfs_hash *hs, - const void *key, unsigned int mask) -{ - const struct ldlm_res_id *id = key; - struct lu_fid fid; - __u32 hash; - __u32 val; - - fid.f_seq = id->name[LUSTRE_RES_ID_SEQ_OFF]; - fid.f_oid = (__u32)id->name[LUSTRE_RES_ID_VER_OID_OFF]; - fid.f_ver = (__u32)(id->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32); - - hash = fid_flatten32(&fid); - hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */ - if (id->name[LUSTRE_RES_ID_HSH_OFF] != 0) { - val = id->name[LUSTRE_RES_ID_HSH_OFF]; - hash += (val >> 5) + (val << 11); - } else { - val = fid_oid(&fid); - } - hash = hash_long(hash, hs->hs_bkt_bits); - /* give me another random factor */ - hash -= hash_long((unsigned long)hs, val % 11 + 3); - - hash <<= hs->hs_cur_bits - hs->hs_bkt_bits; - hash |= ldlm_res_hop_hash(hs, key, CFS_HASH_NBKT(hs) - 1); - - return hash & mask; -} - -static void *ldlm_res_hop_key(struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - return &res->lr_name; -} - -static int ldlm_res_hop_keycmp(const void *key, struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - return ldlm_res_eq((const struct ldlm_res_id *)key, - (const struct ldlm_res_id *)&res->lr_name); -} - -static void *ldlm_res_hop_object(struct hlist_node *hnode) -{ - return hlist_entry(hnode, struct ldlm_resource, lr_hash); -} - -static void ldlm_res_hop_get_locked(struct cfs_hash *hs, - struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - ldlm_resource_getref(res); -} - -static void ldlm_res_hop_put(struct cfs_hash *hs, struct hlist_node *hnode) -{ - struct ldlm_resource *res; - - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - ldlm_resource_putref(res); -} - -static struct cfs_hash_ops ldlm_ns_hash_ops = { - .hs_hash = ldlm_res_hop_hash, - .hs_key = ldlm_res_hop_key, - .hs_keycmp = ldlm_res_hop_keycmp, - .hs_keycpy = NULL, - .hs_object = ldlm_res_hop_object, - .hs_get = ldlm_res_hop_get_locked, - .hs_put = ldlm_res_hop_put -}; - -static struct cfs_hash_ops ldlm_ns_fid_hash_ops = { - .hs_hash = ldlm_res_hop_fid_hash, - .hs_key = ldlm_res_hop_key, - .hs_keycmp = ldlm_res_hop_keycmp, - .hs_keycpy = NULL, - .hs_object = ldlm_res_hop_object, - .hs_get = ldlm_res_hop_get_locked, - .hs_put = ldlm_res_hop_put -}; - -struct ldlm_ns_hash_def { - enum ldlm_ns_type nsd_type; - /** hash bucket bits */ - unsigned int nsd_bkt_bits; - /** hash bits */ - unsigned int nsd_all_bits; - /** hash operations */ - struct cfs_hash_ops *nsd_hops; -}; - -static struct ldlm_ns_hash_def ldlm_ns_hash_defs[] = { - { - .nsd_type = LDLM_NS_TYPE_MDC, - .nsd_bkt_bits = 11, - .nsd_all_bits = 16, - .nsd_hops = &ldlm_ns_fid_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MDT, - .nsd_bkt_bits = 14, - .nsd_all_bits = 21, - .nsd_hops = &ldlm_ns_fid_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_OSC, - .nsd_bkt_bits = 8, - .nsd_all_bits = 12, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_OST, - .nsd_bkt_bits = 11, - .nsd_all_bits = 17, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MGC, - .nsd_bkt_bits = 4, - .nsd_all_bits = 4, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_MGT, - .nsd_bkt_bits = 4, - .nsd_all_bits = 4, - .nsd_hops = &ldlm_ns_hash_ops, - }, - { - .nsd_type = LDLM_NS_TYPE_UNKNOWN, - }, -}; - -/** Register \a ns in the list of namespaces */ -static void ldlm_namespace_register(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - mutex_lock(ldlm_namespace_lock(client)); - LASSERT(list_empty(&ns->ns_list_chain)); - list_add(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list); - ldlm_namespace_nr_inc(client); - mutex_unlock(ldlm_namespace_lock(client)); -} - -/** - * Create and initialize new empty namespace. - */ -struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, - enum ldlm_side client, - enum ldlm_appetite apt, - enum ldlm_ns_type ns_type) -{ - struct ldlm_namespace *ns = NULL; - struct ldlm_ns_bucket *nsb; - struct ldlm_ns_hash_def *nsd; - struct cfs_hash_bd bd; - int idx; - int rc; - - LASSERT(obd); - - rc = ldlm_get_ref(); - if (rc) { - CERROR("ldlm_get_ref failed: %d\n", rc); - return NULL; - } - - for (idx = 0;; idx++) { - nsd = &ldlm_ns_hash_defs[idx]; - if (nsd->nsd_type == LDLM_NS_TYPE_UNKNOWN) { - CERROR("Unknown type %d for ns %s\n", ns_type, name); - goto out_ref; - } - - if (nsd->nsd_type == ns_type) - break; - } - - ns = kzalloc(sizeof(*ns), GFP_NOFS); - if (!ns) - goto out_ref; - - ns->ns_rs_hash = cfs_hash_create(name, - nsd->nsd_all_bits, nsd->nsd_all_bits, - nsd->nsd_bkt_bits, sizeof(*nsb), - CFS_HASH_MIN_THETA, - CFS_HASH_MAX_THETA, - nsd->nsd_hops, - CFS_HASH_DEPTH | - CFS_HASH_BIGNAME | - CFS_HASH_SPIN_BKTLOCK | - CFS_HASH_NO_ITEMREF); - if (!ns->ns_rs_hash) - goto out_ns; - - cfs_hash_for_each_bucket(ns->ns_rs_hash, &bd, idx) { - nsb = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd); - at_init(&nsb->nsb_at_estimate, ldlm_enqueue_min, 0); - nsb->nsb_namespace = ns; - } - - ns->ns_obd = obd; - ns->ns_appetite = apt; - ns->ns_client = client; - ns->ns_name = kstrdup(name, GFP_KERNEL); - if (!ns->ns_name) - goto out_hash; - - INIT_LIST_HEAD(&ns->ns_list_chain); - INIT_LIST_HEAD(&ns->ns_unused_list); - spin_lock_init(&ns->ns_lock); - atomic_set(&ns->ns_bref, 0); - init_waitqueue_head(&ns->ns_waitq); - - ns->ns_max_parallel_ast = LDLM_DEFAULT_PARALLEL_AST_LIMIT; - ns->ns_nr_unused = 0; - ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; - ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE; - ns->ns_orig_connect_flags = 0; - ns->ns_connect_flags = 0; - ns->ns_stopping = 0; - - rc = ldlm_namespace_sysfs_register(ns); - if (rc != 0) { - CERROR("Can't initialize ns sysfs, rc %d\n", rc); - goto out_hash; - } - - rc = ldlm_namespace_debugfs_register(ns); - if (rc != 0) { - CERROR("Can't initialize ns proc, rc %d\n", rc); - goto out_sysfs; - } - - idx = ldlm_namespace_nr_read(client); - rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client); - if (rc) { - CERROR("Can't initialize lock pool, rc %d\n", rc); - goto out_proc; - } - - ldlm_namespace_register(ns, client); - return ns; -out_proc: - ldlm_namespace_debugfs_unregister(ns); -out_sysfs: - ldlm_namespace_sysfs_unregister(ns); - ldlm_namespace_cleanup(ns, 0); -out_hash: - kfree(ns->ns_name); - cfs_hash_putref(ns->ns_rs_hash); -out_ns: - kfree(ns); -out_ref: - ldlm_put_ref(); - return NULL; -} -EXPORT_SYMBOL(ldlm_namespace_new); - -extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); - -/** - * Cancel and destroy all locks on a resource. - * - * If flags contains FL_LOCAL_ONLY, don't try to tell the server, just - * clean up. This is currently only used for recovery, and we make - * certain assumptions as a result--notably, that we shouldn't cancel - * locks with refs. - */ -static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, - __u64 flags) -{ - int rc = 0; - bool local_only = !!(flags & LDLM_FL_LOCAL_ONLY); - - do { - struct ldlm_lock *lock = NULL, *tmp; - struct lustre_handle lockh; - - /* First, we look for non-cleaned-yet lock - * all cleaned locks are marked by CLEANED flag. - */ - lock_res(res); - list_for_each_entry(tmp, q, l_res_link) { - if (ldlm_is_cleaned(tmp)) - continue; - - lock = tmp; - LDLM_LOCK_GET(lock); - ldlm_set_cleaned(lock); - break; - } - - if (!lock) { - unlock_res(res); - break; - } - - /* Set CBPENDING so nothing in the cancellation path - * can match this lock. - */ - ldlm_set_cbpending(lock); - ldlm_set_failed(lock); - lock->l_flags |= flags; - - /* ... without sending a CANCEL message for local_only. */ - if (local_only) - ldlm_set_local_only(lock); - - if (local_only && (lock->l_readers || lock->l_writers)) { - /* This is a little bit gross, but much better than the - * alternative: pretend that we got a blocking AST from - * the server, so that when the lock is decref'd, it - * will go away ... - */ - unlock_res(res); - LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); - if (lock->l_flags & LDLM_FL_FAIL_LOC) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(4 * HZ); - set_current_state(TASK_RUNNING); - } - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_FAILED, - NULL); - LDLM_LOCK_RELEASE(lock); - continue; - } - - unlock_res(res); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_LOCAL); - if (rc) - CERROR("ldlm_cli_cancel: %d\n", rc); - LDLM_LOCK_RELEASE(lock); - } while (1); -} - -static int ldlm_resource_clean(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - __u64 flags = *(__u64 *)arg; - - cleanup_resource(res, &res->lr_granted, flags); - cleanup_resource(res, &res->lr_waiting, flags); - - return 0; -} - -static int ldlm_resource_complain(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - - lock_res(res); - CERROR("%s: namespace resource " DLDLMRES - " (%p) refcount nonzero (%d) after lock cleanup; forcing cleanup.\n", - ldlm_ns_name(ldlm_res_to_ns(res)), PLDLMRES(res), res, - atomic_read(&res->lr_refcount) - 1); - - ldlm_resource_dump(D_ERROR, res); - unlock_res(res); - return 0; -} - -/** - * Cancel and destroy all locks in the namespace. - * - * Typically used during evictions when server notified client that it was - * evicted and all of its state needs to be destroyed. - * Also used during shutdown. - */ -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, __u64 flags) -{ - if (!ns) { - CDEBUG(D_INFO, "NULL ns, skipping cleanup\n"); - return ELDLM_OK; - } - - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_clean, - &flags, 0); - cfs_hash_for_each_nolock(ns->ns_rs_hash, ldlm_resource_complain, - NULL, 0); - return ELDLM_OK; -} -EXPORT_SYMBOL(ldlm_namespace_cleanup); - -/** - * Attempts to free namespace. - * - * Only used when namespace goes away, like during an unmount. - */ -static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force) -{ - /* At shutdown time, don't call the cancellation callback */ - ldlm_namespace_cleanup(ns, force ? LDLM_FL_LOCAL_ONLY : 0); - - if (atomic_read(&ns->ns_bref) > 0) { - int rc; - - CDEBUG(D_DLMTRACE, - "dlm namespace %s free waiting on refcount %d\n", - ldlm_ns_name(ns), atomic_read(&ns->ns_bref)); -force_wait: - if (force) - rc = wait_event_idle_timeout(ns->ns_waitq, - atomic_read(&ns->ns_bref) == 0, - obd_timeout * HZ / 4) ? 0 : -ETIMEDOUT; - else - rc = l_wait_event_abortable(ns->ns_waitq, - atomic_read(&ns->ns_bref) == 0); - - /* Forced cleanups should be able to reclaim all references, - * so it's safe to wait forever... we can't leak locks... - */ - if (force && rc == -ETIMEDOUT) { - LCONSOLE_ERROR("Forced cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n", - ldlm_ns_name(ns), - atomic_read(&ns->ns_bref), rc); - goto force_wait; - } - - if (atomic_read(&ns->ns_bref)) { - LCONSOLE_ERROR("Cleanup waiting for %s namespace with %d resources in use, (rc=%d)\n", - ldlm_ns_name(ns), - atomic_read(&ns->ns_bref), rc); - return ELDLM_NAMESPACE_EXISTS; - } - CDEBUG(D_DLMTRACE, "dlm namespace %s free done waiting\n", - ldlm_ns_name(ns)); - } - - return ELDLM_OK; -} - -/** - * Performs various cleanups for passed \a ns to make it drop refc and be - * ready for freeing. Waits for refc == 0. - * - * The following is done: - * (0) Unregister \a ns from its list to make inaccessible for potential - * users like pools thread and others; - * (1) Clear all locks in \a ns. - */ -void ldlm_namespace_free_prior(struct ldlm_namespace *ns, - struct obd_import *imp, - int force) -{ - int rc; - - if (!ns) - return; - - spin_lock(&ns->ns_lock); - ns->ns_stopping = 1; - spin_unlock(&ns->ns_lock); - - /* - * Can fail with -EINTR when force == 0 in which case try harder. - */ - rc = __ldlm_namespace_free(ns, force); - if (rc != ELDLM_OK) { - if (imp) { - ptlrpc_disconnect_import(imp, 0); - ptlrpc_invalidate_import(imp); - } - - /* - * With all requests dropped and the import inactive - * we are guaranteed all reference will be dropped. - */ - rc = __ldlm_namespace_free(ns, 1); - LASSERT(rc == 0); - } -} - -/** Unregister \a ns from the list of namespaces. */ -static void ldlm_namespace_unregister(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - mutex_lock(ldlm_namespace_lock(client)); - LASSERT(!list_empty(&ns->ns_list_chain)); - /* Some asserts and possibly other parts of the code are still - * using list_empty(&ns->ns_list_chain). This is why it is - * important to use list_del_init() here. - */ - list_del_init(&ns->ns_list_chain); - ldlm_namespace_nr_dec(client); - mutex_unlock(ldlm_namespace_lock(client)); -} - -/** - * Performs freeing memory structures related to \a ns. This is only done - * when ldlm_namespce_free_prior() successfully removed all resources - * referencing \a ns and its refc == 0. - */ -void ldlm_namespace_free_post(struct ldlm_namespace *ns) -{ - if (!ns) - return; - - /* Make sure that nobody can find this ns in its list. */ - ldlm_namespace_unregister(ns, ns->ns_client); - /* Fini pool _before_ parent proc dir is removed. This is important as - * ldlm_pool_fini() removes own proc dir which is child to @dir. - * Removing it after @dir may cause oops. - */ - ldlm_pool_fini(&ns->ns_pool); - - ldlm_namespace_debugfs_unregister(ns); - ldlm_namespace_sysfs_unregister(ns); - cfs_hash_putref(ns->ns_rs_hash); - kfree(ns->ns_name); - /* Namespace \a ns should be not on list at this time, otherwise - * this will cause issues related to using freed \a ns in poold - * thread. - */ - LASSERT(list_empty(&ns->ns_list_chain)); - kfree(ns); - ldlm_put_ref(); -} - -void ldlm_namespace_get(struct ldlm_namespace *ns) -{ - atomic_inc(&ns->ns_bref); -} - -/* This is only for callers that care about refcount */ -static int ldlm_namespace_get_return(struct ldlm_namespace *ns) -{ - return atomic_inc_return(&ns->ns_bref); -} - -void ldlm_namespace_put(struct ldlm_namespace *ns) -{ - if (atomic_dec_and_lock(&ns->ns_bref, &ns->ns_lock)) { - wake_up(&ns->ns_waitq); - spin_unlock(&ns->ns_lock); - } -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -void ldlm_namespace_move_to_active_locked(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - LASSERT(!list_empty(&ns->ns_list_chain)); - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client)); -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *ns, - enum ldlm_side client) -{ - LASSERT(!list_empty(&ns->ns_list_chain)); - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - list_move_tail(&ns->ns_list_chain, &ldlm_cli_inactive_namespace_list); -} - -/** Should be called with ldlm_namespace_lock(client) taken. */ -struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side client) -{ - LASSERT(mutex_is_locked(ldlm_namespace_lock(client))); - LASSERT(!list_empty(ldlm_namespace_list(client))); - return container_of(ldlm_namespace_list(client)->next, - struct ldlm_namespace, ns_list_chain); -} - -/** Create and initialize new resource. */ -static struct ldlm_resource *ldlm_resource_new(void) -{ - struct ldlm_resource *res; - int idx; - - res = kmem_cache_zalloc(ldlm_resource_slab, GFP_NOFS); - if (!res) - return NULL; - - INIT_LIST_HEAD(&res->lr_granted); - INIT_LIST_HEAD(&res->lr_waiting); - - /* Initialize interval trees for each lock mode. */ - for (idx = 0; idx < LCK_MODE_NUM; idx++) { - res->lr_itree[idx].lit_size = 0; - res->lr_itree[idx].lit_mode = 1 << idx; - res->lr_itree[idx].lit_root = NULL; - } - - atomic_set(&res->lr_refcount, 1); - spin_lock_init(&res->lr_lock); - lu_ref_init(&res->lr_reference); - - /* The creator of the resource must unlock the mutex after LVB - * initialization. - */ - mutex_init(&res->lr_lvb_mutex); - mutex_lock(&res->lr_lvb_mutex); - - return res; -} - -/** - * Return a reference to resource with given name, creating it if necessary. - * Args: namespace with ns_lock unlocked - * Locks: takes and releases NS hash-lock and res->lr_lock - * Returns: referenced, unlocked ldlm_resource or NULL - */ -struct ldlm_resource * -ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - const struct ldlm_res_id *name, enum ldlm_type type, - int create) -{ - struct hlist_node *hnode; - struct ldlm_resource *res = NULL; - struct cfs_hash_bd bd; - __u64 version; - int ns_refcount = 0; - int rc; - - LASSERT(!parent); - LASSERT(ns->ns_rs_hash); - LASSERT(name->name[0] != 0); - - cfs_hash_bd_get_and_lock(ns->ns_rs_hash, (void *)name, &bd, 0); - hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name); - if (hnode) { - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0); - goto lvbo_init; - } - - version = cfs_hash_bd_version_get(&bd); - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0); - - if (create == 0) - return ERR_PTR(-ENOENT); - - LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE, - "type: %d\n", type); - res = ldlm_resource_new(); - if (!res) - return ERR_PTR(-ENOMEM); - - res->lr_ns_bucket = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd); - res->lr_name = *name; - res->lr_type = type; - - cfs_hash_bd_lock(ns->ns_rs_hash, &bd, 1); - hnode = (version == cfs_hash_bd_version_get(&bd)) ? NULL : - cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name); - - if (hnode) { - /* Someone won the race and already added the resource. */ - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1); - /* Clean lu_ref for failed resource. */ - lu_ref_fini(&res->lr_reference); - /* We have taken lr_lvb_mutex. Drop it. */ - mutex_unlock(&res->lr_lvb_mutex); - kmem_cache_free(ldlm_resource_slab, res); -lvbo_init: - res = hlist_entry(hnode, struct ldlm_resource, lr_hash); - /* Synchronize with regard to resource creation. */ - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { - mutex_lock(&res->lr_lvb_mutex); - mutex_unlock(&res->lr_lvb_mutex); - } - - if (unlikely(res->lr_lvb_len < 0)) { - rc = res->lr_lvb_len; - ldlm_resource_putref(res); - res = ERR_PTR(rc); - } - return res; - } - /* We won! Let's add the resource. */ - cfs_hash_bd_add_locked(ns->ns_rs_hash, &bd, &res->lr_hash); - if (cfs_hash_bd_count_get(&bd) == 1) - ns_refcount = ldlm_namespace_get_return(ns); - - cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1); - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2); - rc = ns->ns_lvbo->lvbo_init(res); - if (rc < 0) { - CERROR("%s: lvbo_init failed for resource %#llx:%#llx: rc = %d\n", - ns->ns_obd->obd_name, name->name[0], - name->name[1], rc); - res->lr_lvb_len = rc; - mutex_unlock(&res->lr_lvb_mutex); - ldlm_resource_putref(res); - return ERR_PTR(rc); - } - } - - /* We create resource with locked lr_lvb_mutex. */ - mutex_unlock(&res->lr_lvb_mutex); - - /* Let's see if we happened to be the very first resource in this - * namespace. If so, and this is a client namespace, we need to move - * the namespace into the active namespaces list to be patrolled by - * the ldlm_poold. - */ - if (ns_refcount == 1) { - mutex_lock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - ldlm_namespace_move_to_active_locked(ns, LDLM_NAMESPACE_CLIENT); - mutex_unlock(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT)); - } - - return res; -} -EXPORT_SYMBOL(ldlm_resource_get); - -static void __ldlm_resource_putref_final(struct cfs_hash_bd *bd, - struct ldlm_resource *res) -{ - struct ldlm_ns_bucket *nsb = res->lr_ns_bucket; - struct ldlm_namespace *ns = nsb->nsb_namespace; - - if (!list_empty(&res->lr_granted)) { - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - - if (!list_empty(&res->lr_waiting)) { - ldlm_resource_dump(D_ERROR, res); - LBUG(); - } - - cfs_hash_bd_del_locked(ns->ns_rs_hash, - bd, &res->lr_hash); - lu_ref_fini(&res->lr_reference); - cfs_hash_bd_unlock(ns->ns_rs_hash, bd, 1); - if (ns->ns_lvbo && ns->ns_lvbo->lvbo_free) - ns->ns_lvbo->lvbo_free(res); - if (cfs_hash_bd_count_get(bd) == 0) - ldlm_namespace_put(ns); - kmem_cache_free(ldlm_resource_slab, res); -} - -void ldlm_resource_putref(struct ldlm_resource *res) -{ - struct ldlm_namespace *ns = ldlm_res_to_ns(res); - struct cfs_hash_bd bd; - - LASSERT_ATOMIC_GT_LT(&res->lr_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "putref res: %p count: %d\n", - res, atomic_read(&res->lr_refcount) - 1); - - cfs_hash_bd_get(ns->ns_rs_hash, &res->lr_name, &bd); - if (cfs_hash_bd_dec_and_lock(ns->ns_rs_hash, &bd, &res->lr_refcount)) - __ldlm_resource_putref_final(&bd, res); -} -EXPORT_SYMBOL(ldlm_resource_putref); - -/** - * Add a lock into a given resource into specified lock list. - */ -void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, - struct ldlm_lock *lock) -{ - check_res_locked(res); - - LDLM_DEBUG(lock, "About to add this lock:"); - - if (ldlm_is_destroyed(lock)) { - CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); - return; - } - - LASSERT(list_empty(&lock->l_res_link)); - - list_add_tail(&lock->l_res_link, head); -} - -void ldlm_resource_unlink_lock(struct ldlm_lock *lock) -{ - int type = lock->l_resource->lr_type; - - check_res_locked(lock->l_resource); - if (type == LDLM_IBITS || type == LDLM_PLAIN) - ldlm_unlink_lock_skiplist(lock); - else if (type == LDLM_EXTENT) - ldlm_extent_unlink_lock(lock); - list_del_init(&lock->l_res_link); -} -EXPORT_SYMBOL(ldlm_resource_unlink_lock); - -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) -{ - desc->lr_type = res->lr_type; - desc->lr_name = res->lr_name; -} - -/** - * Print information about all locks in all namespaces on this node to debug - * log. - */ -void ldlm_dump_all_namespaces(enum ldlm_side client, int level) -{ - struct ldlm_namespace *ns; - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - mutex_lock(ldlm_namespace_lock(client)); - - list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) - ldlm_namespace_dump(level, ns); - - mutex_unlock(ldlm_namespace_lock(client)); -} - -static int ldlm_res_hash_dump(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - int level = (int)(unsigned long)arg; - - lock_res(res); - ldlm_resource_dump(level, res); - unlock_res(res); - - return 0; -} - -/** - * Print information about all locks in this namespace on this node to debug - * log. - */ -void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) -{ - if (!((libcfs_debug | D_ERROR) & level)) - return; - - CDEBUG(level, "--- Namespace: %s (rc: %d, side: client)\n", - ldlm_ns_name(ns), atomic_read(&ns->ns_bref)); - - if (time_before(jiffies, ns->ns_next_dump)) - return; - - cfs_hash_for_each_nolock(ns->ns_rs_hash, - ldlm_res_hash_dump, - (void *)(unsigned long)level, 0); - spin_lock(&ns->ns_lock); - ns->ns_next_dump = jiffies + 10 * HZ; - spin_unlock(&ns->ns_lock); -} - -/** - * Print information about all locks in this resource to debug log. - */ -void ldlm_resource_dump(int level, struct ldlm_resource *res) -{ - struct ldlm_lock *lock; - unsigned int granted = 0; - - BUILD_BUG_ON(RES_NAME_SIZE != 4); - - if (!((libcfs_debug | D_ERROR) & level)) - return; - - CDEBUG(level, "--- Resource: " DLDLMRES " (%p) refcount = %d\n", - PLDLMRES(res), res, atomic_read(&res->lr_refcount)); - - if (!list_empty(&res->lr_granted)) { - CDEBUG(level, "Granted locks (in reverse order):\n"); - list_for_each_entry_reverse(lock, &res->lr_granted, - l_res_link) { - LDLM_DEBUG_LIMIT(level, lock, "###"); - if (!(level & D_CANTMASK) && - ++granted > ldlm_dump_granted_max) { - CDEBUG(level, - "only dump %d granted locks to avoid DDOS.\n", - granted); - break; - } - } - } - if (!list_empty(&res->lr_waiting)) { - CDEBUG(level, "Waiting locks:\n"); - list_for_each_entry(lock, &res->lr_waiting, l_res_link) - LDLM_DEBUG_LIMIT(level, lock, "###"); - } -} -EXPORT_SYMBOL(ldlm_resource_dump); diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile deleted file mode 100644 index 5200924182ae..000000000000 --- a/drivers/staging/lustre/lustre/llite/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += lustre.o -lustre-y := dcache.o dir.o file.o llite_lib.o llite_nfs.o \ - rw.o rw26.o namei.o symlink.o llite_mmap.o range_lock.o \ - xattr.o xattr_cache.o xattr_security.o \ - super25.o statahead.o glimpse.o lcommon_cl.o lcommon_misc.o \ - vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o \ - lproc_llite.o - -lustre-$(CONFIG_FS_POSIX_ACL) += acl.o diff --git a/drivers/staging/lustre/lustre/llite/acl.c b/drivers/staging/lustre/lustre/llite/acl.c deleted file mode 100644 index 2ee9ff931236..000000000000 --- a/drivers/staging/lustre/lustre/llite/acl.c +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/acl.c - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "llite_internal.h" - -struct posix_acl *ll_get_acl(struct inode *inode, int type) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct posix_acl *acl = NULL; - - spin_lock(&lli->lli_lock); - /* VFS' acl_permission_check->check_acl will release the refcount */ - acl = posix_acl_dup(lli->lli_posix_acl); - spin_unlock(&lli->lli_lock); - - return acl; -} - -int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - const char *name = NULL; - size_t value_size = 0; - char *value = NULL; - int rc = 0; - - switch (type) { - case ACL_TYPE_ACCESS: - name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) - rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); - break; - - case ACL_TYPE_DEFAULT: - name = XATTR_NAME_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - rc = acl ? -EACCES : 0; - break; - - default: - rc = -EINVAL; - break; - } - if (rc) - return rc; - - if (acl) { - value_size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(value_size, GFP_NOFS); - if (!value) { - rc = -ENOMEM; - goto out; - } - - rc = posix_acl_to_xattr(&init_user_ns, acl, value, value_size); - if (rc < 0) - goto out_value; - } - - rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), - value ? OBD_MD_FLXATTR : OBD_MD_FLXATTRRM, - name, value, value_size, 0, 0, &req); - - ptlrpc_req_finished(req); -out_value: - kfree(value); -out: - if (rc) - forget_cached_acl(inode, type); - else - set_cached_acl(inode, type, acl); - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c deleted file mode 100644 index 11b82c639bfe..000000000000 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include - -#include "llite_internal.h" - -static void free_dentry_data(struct rcu_head *head) -{ - struct ll_dentry_data *lld; - - lld = container_of(head, struct ll_dentry_data, lld_rcu_head); - kfree(lld); -} - -/* should NOT be called with the dcache lock, see fs/dcache.c */ -static void ll_release(struct dentry *de) -{ - struct ll_dentry_data *lld; - - LASSERT(de); - lld = ll_d2d(de); - if (lld->lld_it) { - ll_intent_release(lld->lld_it); - kfree(lld->lld_it); - } - - de->d_fsdata = NULL; - call_rcu(&lld->lld_rcu_head, free_dentry_data); -} - -/* Compare if two dentries are the same. Don't match if the existing dentry - * is marked invalid. Returns 1 if different, 0 if the same. - * - * This avoids a race where ll_lookup_it() instantiates a dentry, but we get - * an AST before calling d_revalidate_it(). The dentry still exists (marked - * INVALID) so d_lookup() matches it, but we have no lock on it (so - * lock_match() fails) and we spin around real_lookup(). - * - * This race doesn't apply to lookups in d_alloc_parallel(), and for - * those we want to ensure that only one dentry with a given name is - * in ll_lookup_nd() at a time. So allow invalid dentries to match - * while d_in_lookup(). We will be called again when the lookup - * completes, and can give a different answer then. - */ -static int ll_dcompare(const struct dentry *dentry, - unsigned int len, const char *str, - const struct qstr *name) -{ - if (len != name->len) - return 1; - - if (memcmp(str, name->name, len)) - return 1; - - CDEBUG(D_DENTRY, "found name %.*s(%p) flags %#x refc %d\n", - name->len, name->name, dentry, dentry->d_flags, - d_count(dentry)); - - /* mountpoint is always valid */ - if (d_mountpoint(dentry)) - return 0; - - /* ensure exclusion against parallel lookup of the same name */ - if (d_in_lookup((struct dentry *)dentry)) - return 0; - - if (d_lustre_invalid(dentry)) - return 1; - - return 0; -} - -/** - * Called when last reference to a dentry is dropped and dcache wants to know - * whether or not it should cache it: - * - return 1 to delete the dentry immediately - * - return 0 to cache the dentry - * Should NOT be called with the dcache lock, see fs/dcache.c - */ -static int ll_ddelete(const struct dentry *de) -{ - LASSERT(de); - - CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n", - d_lustre_invalid(de) ? "deleting" : "keeping", - de, de, de->d_parent, d_inode(de), - d_unhashed(de) ? "" : "hashed,", - list_empty(&de->d_subdirs) ? "" : "subdirs"); - - /* kernel >= 2.6.38 last refcount is decreased after this function. */ - LASSERT(d_count(de) == 1); - - if (d_lustre_invalid(de)) - return 1; - return 0; -} - -static int ll_d_init(struct dentry *de) -{ - struct ll_dentry_data *lld = kzalloc(sizeof(*lld), GFP_KERNEL); - - if (unlikely(!lld)) - return -ENOMEM; - lld->lld_invalid = 1; - de->d_fsdata = lld; - return 0; -} - -void ll_intent_drop_lock(struct lookup_intent *it) -{ - if (it->it_op && it->it_lock_mode) { - struct lustre_handle handle; - - handle.cookie = it->it_lock_handle; - - CDEBUG(D_DLMTRACE, - "releasing lock with cookie %#llx from it %p\n", - handle.cookie, it); - ldlm_lock_decref(&handle, it->it_lock_mode); - - /* bug 494: intent_release may be called multiple times, from - * this thread and we don't want to double-decref this lock - */ - it->it_lock_mode = 0; - if (it->it_remote_lock_mode != 0) { - handle.cookie = it->it_remote_lock_handle; - - CDEBUG(D_DLMTRACE, - "releasing remote lock with cookie%#llx from it %p\n", - handle.cookie, it); - ldlm_lock_decref(&handle, - it->it_remote_lock_mode); - it->it_remote_lock_mode = 0; - } - } -} - -void ll_intent_release(struct lookup_intent *it) -{ - CDEBUG(D_INFO, "intent %p released\n", it); - ll_intent_drop_lock(it); - /* We are still holding extra reference on a request, need to free it */ - if (it_disposition(it, DISP_ENQ_OPEN_REF)) - ptlrpc_req_finished(it->it_request); /* ll_file_open */ - - if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */ - ptlrpc_req_finished(it->it_request); - - it->it_disposition = 0; - it->it_request = NULL; -} - -void ll_invalidate_aliases(struct inode *inode) -{ - struct dentry *dentry; - - CDEBUG(D_INODE, "marking dentries for ino " DFID "(%p) invalid\n", - PFID(ll_inode2fid(inode)), inode); - - spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { - CDEBUG(D_DENTRY, - "dentry in drop %pd (%p) parent %p inode %p flags %d\n", - dentry, dentry, dentry->d_parent, - d_inode(dentry), dentry->d_flags); - - d_lustre_invalidate(dentry, 0); - } - spin_unlock(&inode->i_lock); -} - -int ll_revalidate_it_finish(struct ptlrpc_request *request, - struct lookup_intent *it, - struct inode *inode) -{ - int rc = 0; - - if (!request) - return 0; - - if (it_disposition(it, DISP_LOOKUP_NEG)) - return -ENOENT; - - rc = ll_prep_inode(&inode, request, NULL, it); - - return rc; -} - -void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode) -{ - if (it->it_lock_mode && inode) { - struct ll_sb_info *sbi = ll_i2sbi(inode); - - CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); - } - - /* drop lookup or getattr locks immediately */ - if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) { - /* on 2.6 there are situation when several lookups and - * revalidations may be requested during single operation. - * therefore, we don't release intent here -bzzz - */ - ll_intent_drop_lock(it); - } -} - -static int ll_revalidate_dentry(struct dentry *dentry, - unsigned int lookup_flags) -{ - struct inode *dir = d_inode(dentry->d_parent); - - /* If this is intermediate component path lookup and we were able to get - * to this dentry, then its lock has not been revoked and the - * path component is valid. - */ - if (lookup_flags & LOOKUP_PARENT) - return 1; - - /* Symlink - always valid as long as the dentry was found */ - if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) - return 1; - - /* - * VFS warns us that this is the second go around and previous - * operation failed (most likely open|creat), so this time - * we better talk to the server via the lookup path by name, - * not by fid. - */ - if (lookup_flags & LOOKUP_REVAL) - return 0; - - if (!dentry_may_statahead(dir, dentry)) - return 1; - - if (lookup_flags & LOOKUP_RCU) - return -ECHILD; - - ll_statahead(dir, &dentry, !d_inode(dentry)); - return 1; -} - -/* - * Always trust cached dentries. Update statahead window if necessary. - */ -static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags) -{ - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, flags=%u\n", - dentry, flags); - - return ll_revalidate_dentry(dentry, flags); -} - -const struct dentry_operations ll_d_ops = { - .d_init = ll_d_init, - .d_revalidate = ll_revalidate_nd, - .d_release = ll_release, - .d_delete = ll_ddelete, - .d_compare = ll_dcompare, -}; diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c deleted file mode 100644 index 688dddf3ca47..000000000000 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ /dev/null @@ -1,1708 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/dir.c - * - * Directory code for lustre client. - */ - -#include -#include -#include -#include -#include /* for wait_on_buffer */ -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "llite_internal.h" - -/* - * (new) readdir implementation overview. - * - * Original lustre readdir implementation cached exact copy of raw directory - * pages on the client. These pages were indexed in client page cache by - * logical offset in the directory file. This design, while very simple and - * intuitive had some inherent problems: - * - * . it implies that byte offset to the directory entry serves as a - * telldir(3)/seekdir(3) cookie, but that offset is not stable: in - * ext3/htree directory entries may move due to splits, and more - * importantly, - * - * . it is incompatible with the design of split directories for cmd3, - * that assumes that names are distributed across nodes based on their - * hash, and so readdir should be done in hash order. - * - * New readdir implementation does readdir in hash order, and uses hash of a - * file name as a telldir/seekdir cookie. This led to number of complications: - * - * . hash is not unique, so it cannot be used to index cached directory - * pages on the client (note, that it requires a whole pageful of hash - * collided entries to cause two pages to have identical hashes); - * - * . hash is not unique, so it cannot, strictly speaking, be used as an - * entry cookie. ext3/htree has the same problem and lustre implementation - * mimics their solution: seekdir(hash) positions directory at the first - * entry with the given hash. - * - * Client side. - * - * 0. caching - * - * Client caches directory pages using hash of the first entry as an index. As - * noted above hash is not unique, so this solution doesn't work as is: - * special processing is needed for "page hash chains" (i.e., sequences of - * pages filled with entries all having the same hash value). - * - * First, such chains have to be detected. To this end, server returns to the - * client the hash of the first entry on the page next to one returned. When - * client detects that this hash is the same as hash of the first entry on the - * returned page, page hash collision has to be handled. Pages in the - * hash chain, except first one, are termed "overflow pages". - * - * Solution to index uniqueness problem is to not cache overflow - * pages. Instead, when page hash collision is detected, all overflow pages - * from emerging chain are immediately requested from the server and placed in - * a special data structure (struct ll_dir_chain). This data structure is used - * by ll_readdir() to process entries from overflow pages. When readdir - * invocation finishes, overflow pages are discarded. If page hash collision - * chain weren't completely processed, next call to readdir will again detect - * page hash collision, again read overflow pages in, process next portion of - * entries and again discard the pages. This is not as wasteful as it looks, - * because, given reasonable hash, page hash collisions are extremely rare. - * - * 1. directory positioning - * - * When seekdir(hash) is called, original - * - * - * - * - * - * - * - * - * Server. - * - * identification of and access to overflow pages - * - * page format - * - * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains - * a header lu_dirpage which describes the start/end hash, and whether this - * page is empty (contains no dir entry) or hash collide with next page. - * After client receives reply, several pages will be integrated into dir page - * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the lu_dirpage - * for this integrated page will be adjusted. See lmv_adjust_dirpages(). - * - */ -struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data, - __u64 offset) -{ - struct md_callback cb_op; - struct page *page; - int rc; - - cb_op.md_blocking_ast = ll_md_blocking_ast; - rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page); - if (rc) - return ERR_PTR(rc); - - return page; -} - -void ll_release_page(struct inode *inode, struct page *page, bool remove) -{ - kunmap(page); - - /* - * Always remove the page for striped dir, because the page is - * built from temporarily in LMV layer - */ - if (inode && S_ISDIR(inode->i_mode) && - ll_i2info(inode)->lli_lsm_md) { - __free_page(page); - return; - } - - if (remove) { - lock_page(page); - if (likely(page->mapping)) - truncate_complete_page(page->mapping, page); - unlock_page(page); - } - put_page(page); -} - -/** - * return IF_* type for given lu_dirent entry. - * IF_* flag shld be converted to particular OS file type in - * platform llite module. - */ -static __u16 ll_dirent_type_get(struct lu_dirent *ent) -{ - __u16 type = 0; - struct luda_type *lt; - int len = 0; - - if (le32_to_cpu(ent->lde_attrs) & LUDA_TYPE) { - const unsigned int align = sizeof(struct luda_type) - 1; - - len = le16_to_cpu(ent->lde_namelen); - len = (len + align) & ~align; - lt = (void *)ent->lde_name + len; - type = IFTODT(le16_to_cpu(lt->lt_type)); - } - return type; -} - -int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, - struct dir_context *ctx) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - __u64 pos = *ppos; - int is_api32 = ll_need_32bit_api(sbi); - int is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; - struct page *page; - bool done = false; - int rc = 0; - - page = ll_get_dir_page(inode, op_data, pos); - - while (rc == 0 && !done) { - struct lu_dirpage *dp; - struct lu_dirent *ent; - __u64 hash; - __u64 next; - - if (IS_ERR(page)) { - rc = PTR_ERR(page); - break; - } - - hash = MDS_DIR_END_OFF; - dp = page_address(page); - for (ent = lu_dirent_start(dp); ent && !done; - ent = lu_dirent_next(ent)) { - __u16 type; - int namelen; - struct lu_fid fid; - __u64 lhash; - __u64 ino; - - hash = le64_to_cpu(ent->lde_hash); - if (hash < pos) - /* - * Skip until we find target hash - * value. - */ - continue; - - namelen = le16_to_cpu(ent->lde_namelen); - if (namelen == 0) - /* - * Skip dummy record. - */ - continue; - - if (is_api32 && is_hash64) - lhash = hash >> 32; - else - lhash = hash; - fid_le_to_cpu(&fid, &ent->lde_fid); - ino = cl_fid_build_ino(&fid, is_api32); - type = ll_dirent_type_get(ent); - ctx->pos = lhash; - /* For 'll_nfs_get_name_filldir()', it will try - * to access the 'ent' through its 'lde_name', - * so the parameter 'name' for 'ctx->actor()' - * must be part of the 'ent'. - */ - done = !dir_emit(ctx, ent->lde_name, - namelen, ino, type); - } - - if (done) { - pos = hash; - ll_release_page(inode, page, false); - break; - } - - next = le64_to_cpu(dp->ldp_hash_end); - pos = next; - if (pos == MDS_DIR_END_OFF) { - /* - * End of directory reached. - */ - done = 1; - ll_release_page(inode, page, false); - } else { - /* - * Normal case: continue to the next - * page. - */ - ll_release_page(inode, page, - le32_to_cpu(dp->ldp_flags) & - LDF_COLLIDE); - next = pos; - page = ll_get_dir_page(inode, op_data, pos); - } - } - - ctx->pos = pos; - return rc; -} - -static int ll_readdir(struct file *filp, struct dir_context *ctx) -{ - struct inode *inode = file_inode(filp); - struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp); - struct ll_sb_info *sbi = ll_i2sbi(inode); - __u64 pos = lfd ? lfd->lfd_pos : 0; - int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH; - int api32 = ll_need_32bit_api(sbi); - struct md_op_data *op_data; - int rc; - - CDEBUG(D_VFSTRACE, - "VFS Op:inode=" DFID "(%p) pos/size %lu/%llu 32bit_api %d\n", - PFID(ll_inode2fid(inode)), inode, (unsigned long)pos, - i_size_read(inode), api32); - - if (pos == MDS_DIR_END_OFF) { - /* - * end-of-file. - */ - rc = 0; - goto out; - } - - op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, - LUSTRE_OPC_ANY, inode); - if (IS_ERR(op_data)) { - rc = PTR_ERR(op_data); - goto out; - } - - if (unlikely(op_data->op_mea1)) { - /* - * This is only needed for striped dir to fill .., - * see lmv_read_page - */ - if (file_dentry(filp)->d_parent && - file_dentry(filp)->d_parent->d_inode) { - __u64 ibits = MDS_INODELOCK_UPDATE; - struct inode *parent; - - parent = file_dentry(filp)->d_parent->d_inode; - if (ll_have_md_lock(parent, &ibits, LCK_MINMODE)) - op_data->op_fid3 = *ll_inode2fid(parent); - } - - /* - * If it can not find in cache, do lookup .. on the master - * object - */ - if (fid_is_zero(&op_data->op_fid3)) { - rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3); - if (rc) { - ll_finish_md_op_data(op_data); - return rc; - } - } - } - op_data->op_max_pages = sbi->ll_md_brw_pages; - ctx->pos = pos; - rc = ll_dir_read(inode, &pos, op_data, ctx); - pos = ctx->pos; - if (lfd) - lfd->lfd_pos = pos; - - if (pos == MDS_DIR_END_OFF) { - if (api32) - pos = LL_DIR_END_OFF_32BIT; - else - pos = LL_DIR_END_OFF; - } else { - if (api32 && hash64) - pos >>= 32; - } - ctx->pos = pos; - ll_finish_md_op_data(op_data); -out: - if (!rc) - ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1); - - return rc; -} - -static int ll_send_mgc_param(struct obd_export *mgc, char *string) -{ - struct mgs_send_param *msp; - int rc = 0; - - msp = kzalloc(sizeof(*msp), GFP_NOFS); - if (!msp) - return -ENOMEM; - - strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param)); - rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO, - sizeof(struct mgs_send_param), msp, NULL); - if (rc) - CERROR("Failed to set parameter: %d\n", rc); - kfree(msp); - - return rc; -} - -/** - * Create striped directory with specified stripe(@lump) - * - * param[in] parent the parent of the directory. - * param[in] lump the specified stripes. - * param[in] dirname the name of the directory. - * param[in] mode the specified mode of the directory. - * - * retval =0 if striped directory is being created successfully. - * <0 if the creation is failed. - */ -static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump, - const char *dirname, umode_t mode) -{ - struct ptlrpc_request *request = NULL; - struct md_op_data *op_data; - struct ll_sb_info *sbi = ll_i2sbi(parent); - struct inode *inode = NULL; - struct dentry dentry; - int err; - - if (unlikely(lump->lum_magic != LMV_USER_MAGIC)) - return -EINVAL; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p) name %s stripe_offset %d, stripe_count: %u\n", - PFID(ll_inode2fid(parent)), parent, dirname, - (int)lump->lum_stripe_offset, lump->lum_stripe_count); - - if (lump->lum_stripe_count > 1 && - !(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_DIR_STRIPE)) - return -EINVAL; - - if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC)) - lustre_swab_lmv_user_md(lump); - - if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent))) - mode &= ~current_umask(); - mode = (mode & (0777 | S_ISVTX)) | S_IFDIR; - op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname, - strlen(dirname), mode, LUSTRE_OPC_MKDIR, - lump); - if (IS_ERR(op_data)) { - err = PTR_ERR(op_data); - goto err_exit; - } - - op_data->op_cli_flags |= CLI_SET_MEA; - err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode, - from_kuid(&init_user_ns, current_fsuid()), - from_kgid(&init_user_ns, current_fsgid()), - current_cap(), 0, &request); - ll_finish_md_op_data(op_data); - - err = ll_prep_inode(&inode, request, parent->i_sb, NULL); - if (err) - goto err_exit; - - memset(&dentry, 0, sizeof(dentry)); - dentry.d_inode = inode; - - err = ll_init_security(&dentry, inode, parent); - iput(inode); - -err_exit: - ptlrpc_req_finished(request); - return err; -} - -int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, - int set_default) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - int rc = 0; - struct lustre_sb_info *lsi = s2lsi(inode->i_sb); - struct obd_device *mgc = lsi->lsi_mgc; - int lum_size; - - if (lump) { - /* - * This is coming from userspace, so should be in - * local endian. But the MDS would like it in little - * endian, so we swab it before we send it. - */ - switch (lump->lmm_magic) { - case LOV_USER_MAGIC_V1: { - if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) - lustre_swab_lov_user_md_v1(lump); - lum_size = sizeof(struct lov_user_md_v1); - break; - } - case LOV_USER_MAGIC_V3: { - if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3)) - lustre_swab_lov_user_md_v3( - (struct lov_user_md_v3 *)lump); - lum_size = sizeof(struct lov_user_md_v3); - break; - } - case LMV_USER_MAGIC: { - if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC)) - lustre_swab_lmv_user_md( - (struct lmv_user_md *)lump); - lum_size = sizeof(struct lmv_user_md); - break; - } - default: { - CDEBUG(D_IOCTL, - "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n", - lump->lmm_magic, LOV_USER_MAGIC_V1, - LOV_USER_MAGIC_V3); - return -EINVAL; - } - } - } else { - lum_size = sizeof(struct lov_user_md_v1); - } - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - /* swabbing is done in lov_setstripe() on server side */ - rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size, &req); - ll_finish_md_op_data(op_data); - ptlrpc_req_finished(req); - if (rc) - return rc; - -#if OBD_OCD_VERSION(2, 13, 53, 0) > LUSTRE_VERSION_CODE - /* - * 2.9 server has stored filesystem default stripe in ROOT xattr, - * and it's stored into system config for backward compatibility. - * - * In the following we use the fact that LOV_USER_MAGIC_V1 and - * LOV_USER_MAGIC_V3 have the same initial fields so we do not - * need to make the distinction between the 2 versions - */ - if (set_default && mgc->u.cli.cl_mgc_mgsexp) { - char *param = NULL; - char *buf; - - param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS); - if (!param) - return -ENOMEM; - - buf = param; - /* Get fsname and assume devname to be -MDT0000. */ - ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN); - strcat(buf, "-MDT0000.lov"); - buf += strlen(buf); - - /* Set root stripesize */ - sprintf(buf, ".stripesize=%u", - lump ? le32_to_cpu(lump->lmm_stripe_size) : 0); - rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); - if (rc) - goto end; - - /* Set root stripecount */ - sprintf(buf, ".stripecount=%hd", - lump ? le16_to_cpu(lump->lmm_stripe_count) : 0); - rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); - if (rc) - goto end; - - /* Set root stripeoffset */ - sprintf(buf, ".stripeoffset=%hd", - lump ? le16_to_cpu(lump->lmm_stripe_offset) : - (typeof(lump->lmm_stripe_offset))(-1)); - rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param); - -end: - kfree(param); - } -#endif - return rc; -} - -/** - * This function will be used to get default LOV/LMV/Default LMV - * @valid will be used to indicate which stripe it will retrieve - * OBD_MD_MEA LMV stripe EA - * OBD_MD_DEFAULT_MEA Default LMV stripe EA - * otherwise Default LOV EA. - * Each time, it can only retrieve 1 stripe EA - **/ -int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, - struct ptlrpc_request **request, u64 valid) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdt_body *body; - struct lov_mds_md *lmm = NULL; - struct ptlrpc_request *req = NULL; - int rc, lmmsize; - struct md_op_data *op_data; - - rc = ll_get_max_mdsize(sbi, &lmmsize); - if (rc) - return rc; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, - 0, lmmsize, LUSTRE_OPC_ANY, - NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; - rc = md_getattr(sbi->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc < 0) { - CDEBUG(D_INFO, "md_getattr failed on inode " DFID ": rc %d\n", - PFID(ll_inode2fid(inode)), rc); - goto out; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - - lmmsize = body->mbo_eadatasize; - - if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || - lmmsize == 0) { - rc = -ENODATA; - goto out; - } - - lmm = req_capsule_server_sized_get(&req->rq_pill, - &RMF_MDT_MD, lmmsize); - LASSERT(lmm); - - /* - * This is coming from the MDS, so is probably in - * little endian. We convert it to host endian before - * passing it to userspace. - */ - /* We don't swab objects for directories */ - switch (le32_to_cpu(lmm->lmm_magic)) { - case LOV_MAGIC_V1: - if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) - lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); - break; - case LOV_MAGIC_V3: - if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) - lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); - break; - case LMV_MAGIC_V1: - if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC) - lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm); - break; - case LMV_USER_MAGIC: - if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC) - lustre_swab_lmv_user_md((struct lmv_user_md *)lmm); - break; - default: - CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic); - rc = -EPROTO; - } -out: - *plmm = lmm; - *plmm_size = lmmsize; - *request = req; - return rc; -} - -int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid) -{ - struct md_op_data *op_data; - int mdt_index, rc; - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return -ENOMEM; - - op_data->op_flags |= MF_GET_MDT_IDX; - op_data->op_fid1 = *fid; - rc = md_getattr(sbi->ll_md_exp, op_data, NULL); - mdt_index = op_data->op_mds; - kvfree(op_data); - if (rc < 0) - return rc; - - return mdt_index; -} - -/* - * Get MDT index for the inode. - */ -int ll_get_mdt_idx(struct inode *inode) -{ - return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode)); -} - -/** - * Generic handler to do any pre-copy work. - * - * It sends a first hsm_progress (with extent length == 0) to coordinator as a - * first information for it that real work has started. - * - * Moreover, for a ARCHIVE request, it will sample the file data version and - * store it in \a copy. - * - * \return 0 on success. - */ -static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct hsm_progress_kernel hpk; - int rc2, rc = 0; - - /* Forge a hsm_progress based on data from copy. */ - hpk.hpk_fid = copy->hc_hai.hai_fid; - hpk.hpk_cookie = copy->hc_hai.hai_cookie; - hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset; - hpk.hpk_extent.length = 0; - hpk.hpk_flags = 0; - hpk.hpk_errval = 0; - hpk.hpk_data_version = 0; - - /* For archive request, we need to read the current file version. */ - if (copy->hc_hai.hai_action == HSMA_ARCHIVE) { - struct inode *inode; - __u64 data_version = 0; - - /* Get inode for this fid */ - inode = search_inode_for_lustre(sb, ©->hc_hai.hai_fid); - if (IS_ERR(inode)) { - hpk.hpk_flags |= HP_FLAG_RETRY; - /* hpk_errval is >= 0 */ - hpk.hpk_errval = -PTR_ERR(inode); - rc = PTR_ERR(inode); - goto progress; - } - - /* Read current file data version */ - rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); - iput(inode); - if (rc != 0) { - CDEBUG(D_HSM, - "Could not read file data version of " DFID " (rc = %d). Archive request (%#llx) could not be done.\n", - PFID(©->hc_hai.hai_fid), rc, - copy->hc_hai.hai_cookie); - hpk.hpk_flags |= HP_FLAG_RETRY; - /* hpk_errval must be >= 0 */ - hpk.hpk_errval = -rc; - goto progress; - } - - /* Store in the hsm_copy for later copytool use. - * Always modified even if no lsm. - */ - copy->hc_data_version = data_version; - } - -progress: - /* On error, the request should be considered as completed */ - if (hpk.hpk_errval > 0) - hpk.hpk_flags |= HP_FLAG_COMPLETED; - rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), - &hpk, NULL); - - return rc ? rc : rc2; -} - -/** - * Generic handler to do any post-copy work. - * - * It will send the last hsm_progress update to coordinator to inform it - * that copy is finished and whether it was successful or not. - * - * Moreover, - * - for ARCHIVE request, it will sample the file data version and compare it - * with the version saved in ll_ioc_copy_start(). If they do not match, copy - * will be considered as failed. - * - for RESTORE request, it will sample the file data version and send it to - * coordinator which is useful if the file was imported as 'released'. - * - * \return 0 on success. - */ -static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct hsm_progress_kernel hpk; - int rc2, rc = 0; - - /* If you modify the logic here, also check llapi_hsm_copy_end(). */ - /* Take care: copy->hc_hai.hai_action, len, gid and data are not - * initialized if copy_end was called with copy == NULL. - */ - - /* Forge a hsm_progress based on data from copy. */ - hpk.hpk_fid = copy->hc_hai.hai_fid; - hpk.hpk_cookie = copy->hc_hai.hai_cookie; - hpk.hpk_extent = copy->hc_hai.hai_extent; - hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED; - hpk.hpk_errval = copy->hc_errval; - hpk.hpk_data_version = 0; - - /* For archive request, we need to check the file data was not changed. - * - * For restore request, we need to send the file data version, this is - * useful when the file was created using hsm_import. - */ - if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) || - (copy->hc_hai.hai_action == HSMA_RESTORE)) && - (copy->hc_errval == 0)) { - struct inode *inode; - __u64 data_version = 0; - - /* Get lsm for this fid */ - inode = search_inode_for_lustre(sb, ©->hc_hai.hai_fid); - if (IS_ERR(inode)) { - hpk.hpk_flags |= HP_FLAG_RETRY; - /* hpk_errval must be >= 0 */ - hpk.hpk_errval = -PTR_ERR(inode); - rc = PTR_ERR(inode); - goto progress; - } - - rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); - iput(inode); - if (rc) { - CDEBUG(D_HSM, - "Could not read file data version. Request could not be confirmed.\n"); - if (hpk.hpk_errval == 0) - hpk.hpk_errval = -rc; - goto progress; - } - - /* Store in the hsm_copy for later copytool use. - * Always modified even if no lsm. - */ - hpk.hpk_data_version = data_version; - - /* File could have been stripped during archiving, so we need - * to check anyway. - */ - if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) && - (copy->hc_data_version != data_version)) { - CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. " DFID ", start:%#llx current:%#llx\n", - PFID(©->hc_hai.hai_fid), - copy->hc_data_version, data_version); - /* File was changed, send error to cdt. Do not ask for - * retry because if a file is modified frequently, - * the cdt will loop on retried archive requests. - * The policy engine will ask for a new archive later - * when the file will not be modified for some tunable - * time - */ - hpk.hpk_flags &= ~HP_FLAG_RETRY; - rc = -EBUSY; - /* hpk_errval must be >= 0 */ - hpk.hpk_errval = -rc; - } - } - -progress: - rc2 = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk), - &hpk, NULL); - - return rc ? rc : rc2; -} - -static int copy_and_ioctl(int cmd, struct obd_export *exp, - const void __user *data, size_t size) -{ - void *copy; - int rc; - - copy = memdup_user(data, size); - if (IS_ERR(copy)) - return PTR_ERR(copy); - - rc = obd_iocontrol(cmd, exp, size, copy, NULL); - kfree(copy); - - return rc; -} - -static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl) -{ - int cmd = qctl->qc_cmd; - int type = qctl->qc_type; - int id = qctl->qc_id; - int valid = qctl->qc_valid; - int rc = 0; - - switch (cmd) { - case Q_SETQUOTA: - case Q_SETINFO: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; - case Q_GETQUOTA: - if (((type == USRQUOTA && - !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) || - (type == GRPQUOTA && - !in_egroup_p(make_kgid(&init_user_ns, id)))) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - break; - case Q_GETINFO: - break; - default: - CERROR("unsupported quotactl op: %#x\n", cmd); - return -ENOTTY; - } - - if (valid != QC_GENERAL) { - if (cmd == Q_GETINFO) - qctl->qc_cmd = Q_GETOINFO; - else if (cmd == Q_GETQUOTA) - qctl->qc_cmd = Q_GETOQUOTA; - else - return -EINVAL; - - switch (valid) { - case QC_MDTIDX: - rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp, - sizeof(*qctl), qctl, NULL); - break; - case QC_OSTIDX: - rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp, - sizeof(*qctl), qctl, NULL); - break; - case QC_UUID: - rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp, - sizeof(*qctl), qctl, NULL); - if (rc == -EAGAIN) - rc = obd_iocontrol(OBD_IOC_QUOTACTL, - sbi->ll_dt_exp, - sizeof(*qctl), qctl, NULL); - break; - default: - rc = -EINVAL; - break; - } - - if (rc) - return rc; - - qctl->qc_cmd = cmd; - } else { - struct obd_quotactl *oqctl; - - oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); - if (!oqctl) - return -ENOMEM; - - QCTL_COPY(oqctl, qctl); - rc = obd_quotactl(sbi->ll_md_exp, oqctl); - if (rc) { - kfree(oqctl); - return rc; - } - /* If QIF_SPACE is not set, client should collect the - * space usage from OSSs by itself - */ - if (cmd == Q_GETQUOTA && - !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) && - !oqctl->qc_dqblk.dqb_curspace) { - struct obd_quotactl *oqctl_tmp; - - oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS); - if (!oqctl_tmp) { - rc = -ENOMEM; - goto out; - } - - oqctl_tmp->qc_cmd = Q_GETOQUOTA; - oqctl_tmp->qc_id = oqctl->qc_id; - oqctl_tmp->qc_type = oqctl->qc_type; - - /* collect space usage from OSTs */ - oqctl_tmp->qc_dqblk.dqb_curspace = 0; - rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp); - if (!rc || rc == -EREMOTEIO) { - oqctl->qc_dqblk.dqb_curspace = - oqctl_tmp->qc_dqblk.dqb_curspace; - oqctl->qc_dqblk.dqb_valid |= QIF_SPACE; - } - - /* collect space & inode usage from MDTs */ - oqctl_tmp->qc_dqblk.dqb_curspace = 0; - oqctl_tmp->qc_dqblk.dqb_curinodes = 0; - rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp); - if (!rc || rc == -EREMOTEIO) { - oqctl->qc_dqblk.dqb_curspace += - oqctl_tmp->qc_dqblk.dqb_curspace; - oqctl->qc_dqblk.dqb_curinodes = - oqctl_tmp->qc_dqblk.dqb_curinodes; - oqctl->qc_dqblk.dqb_valid |= QIF_INODES; - } else { - oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE; - } - - kfree(oqctl_tmp); - } -out: - QCTL_COPY(qctl, oqctl); - kfree(oqctl); - } - - return rc; -} - -/* This function tries to get a single name component, - * to send to the server. No actual path traversal involved, - * so we limit to NAME_MAX - */ -static char *ll_getname(const char __user *filename) -{ - int ret = 0, len; - char *tmp; - - tmp = kzalloc(NAME_MAX + 1, GFP_KERNEL); - if (!tmp) - return ERR_PTR(-ENOMEM); - - len = strncpy_from_user(tmp, filename, NAME_MAX + 1); - if (len < 0) - ret = len; - else if (len == 0) - ret = -ENOENT; - else if (len > NAME_MAX && tmp[NAME_MAX] != 0) - ret = -ENAMETOOLONG; - - if (ret) { - kfree(tmp); - tmp = ERR_PTR(ret); - } - return tmp; -} - -#define ll_putname(filename) kfree(filename) - -static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(file); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_ioctl_data *data; - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), cmd=%#x\n", - PFID(ll_inode2fid(inode)), inode, cmd); - - /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ - if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ - return -ENOTTY; - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); - switch (cmd) { - case FSFILT_IOC_GETFLAGS: - case FSFILT_IOC_SETFLAGS: - return ll_iocontrol(inode, file, cmd, arg); - case FSFILT_IOC_GETVERSION_OLD: - case FSFILT_IOC_GETVERSION: - return put_user(inode->i_generation, (int __user *)arg); - /* We need to special case any other ioctls we want to handle, - * to send them to the MDS/OST as appropriate and to properly - * network encode the arg field. - case FSFILT_IOC_SETVERSION_OLD: - case FSFILT_IOC_SETVERSION: - */ - case LL_IOC_GET_MDTIDX: { - int mdtidx; - - mdtidx = ll_get_mdt_idx(inode); - if (mdtidx < 0) - return mdtidx; - - if (put_user((int)mdtidx, (int __user *)arg)) - return -EFAULT; - - return 0; - } - case IOC_MDC_LOOKUP: { - int namelen, len = 0; - char *buf = NULL; - char *filename; - - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); - if (rc) - return rc; - data = (void *)buf; - - filename = data->ioc_inlbuf1; - namelen = strlen(filename); - - if (namelen < 1) { - CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n"); - rc = -EINVAL; - goto out_free; - } - - rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL); - if (rc < 0) { - CERROR("%s: lookup %.*s failed: rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), namelen, - filename, rc); - goto out_free; - } -out_free: - kvfree(buf); - return rc; - } - case LL_IOC_LMV_SETSTRIPE: { - struct lmv_user_md *lum; - char *buf = NULL; - char *filename; - int namelen = 0; - int lumlen = 0; - umode_t mode; - int len; - int rc; - - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); - if (rc) - return rc; - - data = (void *)buf; - if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 || - data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) { - rc = -EINVAL; - goto lmv_out_free; - } - - filename = data->ioc_inlbuf1; - namelen = data->ioc_inllen1; - - if (namelen < 1) { - CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n"); - rc = -EINVAL; - goto lmv_out_free; - } - lum = (struct lmv_user_md *)data->ioc_inlbuf2; - lumlen = data->ioc_inllen2; - - if (lum->lum_magic != LMV_USER_MAGIC || - lumlen != sizeof(*lum)) { - CERROR("%s: wrong lum magic %x or size %d: rc = %d\n", - filename, lum->lum_magic, lumlen, -EFAULT); - rc = -EINVAL; - goto lmv_out_free; - } - -#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE - mode = data->ioc_type != 0 ? data->ioc_type : 0777; -#else - mode = data->ioc_type; -#endif - rc = ll_dir_setdirstripe(inode, lum, filename, mode); -lmv_out_free: - kvfree(buf); - return rc; - } - case LL_IOC_LMV_SET_DEFAULT_STRIPE: { - struct lmv_user_md __user *ulump; - struct lmv_user_md lum; - int rc; - - ulump = (struct lmv_user_md __user *)arg; - if (copy_from_user(&lum, ulump, sizeof(lum))) - return -EFAULT; - - if (lum.lum_magic != LMV_USER_MAGIC) - return -EINVAL; - - rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0); - - return rc; - } - case LL_IOC_LOV_SETSTRIPE: { - struct lov_user_md_v3 lumv3; - struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3; - struct lov_user_md_v1 __user *lumv1p = (void __user *)arg; - struct lov_user_md_v3 __user *lumv3p = (void __user *)arg; - - int set_default = 0; - - LASSERT(sizeof(lumv3) == sizeof(*lumv3p)); - LASSERT(sizeof(lumv3.lmm_objects[0]) == - sizeof(lumv3p->lmm_objects[0])); - /* first try with v1 which is smaller than v3 */ - if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1))) - return -EFAULT; - - if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) { - if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3))) - return -EFAULT; - if (lumv3.lmm_magic != LOV_USER_MAGIC_V3) - return -EINVAL; - } - - if (is_root_inode(inode)) - set_default = 1; - - /* in v1 and v3 cases lumv1 points to data */ - rc = ll_dir_setstripe(inode, lumv1, set_default); - - return rc; - } - case LL_IOC_LMV_GETSTRIPE: { - struct lmv_user_md __user *ulmv; - struct lmv_user_md lum; - struct ptlrpc_request *request = NULL; - struct lmv_user_md *tmp = NULL; - union lmv_mds_md *lmm = NULL; - u64 valid = 0; - int max_stripe_count; - int stripe_count; - int mdt_index; - int lum_size; - int lmmsize; - int rc; - int i; - - ulmv = (struct lmv_user_md __user *)arg; - if (copy_from_user(&lum, ulmv, sizeof(*ulmv))) - return -EFAULT; - - max_stripe_count = lum.lum_stripe_count; - /* - * lum_magic will indicate which stripe the ioctl will like - * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC - * is for default LMV stripe - */ - if (lum.lum_magic == LMV_MAGIC_V1) - valid |= OBD_MD_MEA; - else if (lum.lum_magic == LMV_USER_MAGIC) - valid |= OBD_MD_DEFAULT_MEA; - else - return -EINVAL; - - rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request, - valid); - if (rc) - goto finish_req; - - /* Get default LMV EA */ - if (lum.lum_magic == LMV_USER_MAGIC) { - if (lmmsize > sizeof(*ulmv)) { - rc = -EINVAL; - goto finish_req; - } - - if (copy_to_user(ulmv, lmm, lmmsize)) - rc = -EFAULT; - - goto finish_req; - } - - stripe_count = lmv_mds_md_stripe_count_get(lmm); - if (max_stripe_count < stripe_count) { - lum.lum_stripe_count = stripe_count; - if (copy_to_user(ulmv, &lum, sizeof(lum))) { - rc = -EFAULT; - goto finish_req; - } - rc = -E2BIG; - goto finish_req; - } - - lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1); - tmp = kzalloc(lum_size, GFP_NOFS); - if (!tmp) { - rc = -ENOMEM; - goto finish_req; - } - - mdt_index = ll_get_mdt_idx(inode); - if (mdt_index < 0) { - rc = -ENOMEM; - goto out_tmp; - } - tmp->lum_magic = LMV_MAGIC_V1; - tmp->lum_stripe_count = 0; - tmp->lum_stripe_offset = mdt_index; - for (i = 0; i < stripe_count; i++) { - struct lu_fid fid; - - fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]); - mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid); - if (mdt_index < 0) { - rc = mdt_index; - goto out_tmp; - } - tmp->lum_objects[i].lum_mds = mdt_index; - tmp->lum_objects[i].lum_fid = fid; - tmp->lum_stripe_count++; - } - - if (copy_to_user(ulmv, tmp, lum_size)) { - rc = -EFAULT; - goto out_tmp; - } -out_tmp: - kfree(tmp); -finish_req: - ptlrpc_req_finished(request); - return rc; - } - - case LL_IOC_LOV_SWAP_LAYOUTS: - return -EPERM; - case IOC_OBD_STATFS: - return ll_obd_statfs(inode, (void __user *)arg); - case LL_IOC_LOV_GETSTRIPE: - case LL_IOC_MDC_GETINFO: - case IOC_MDC_GETFILEINFO: - case IOC_MDC_GETFILESTRIPE: { - struct ptlrpc_request *request = NULL; - struct lov_user_md __user *lump; - struct lov_mds_md *lmm = NULL; - struct mdt_body *body; - char *filename = NULL; - int lmmsize; - - if (cmd == IOC_MDC_GETFILEINFO || - cmd == IOC_MDC_GETFILESTRIPE) { - filename = ll_getname((const char __user *)arg); - if (IS_ERR(filename)) - return PTR_ERR(filename); - - rc = ll_lov_getstripe_ea_info(inode, filename, &lmm, - &lmmsize, &request); - } else { - rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, - &request, 0); - } - - if (request) { - body = req_capsule_server_get(&request->rq_pill, - &RMF_MDT_BODY); - LASSERT(body); - } else { - goto out_req; - } - - if (rc < 0) { - if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO || - cmd == LL_IOC_MDC_GETINFO)) { - rc = 0; - goto skip_lmm; - } - - goto out_req; - } - - if (cmd == IOC_MDC_GETFILESTRIPE || - cmd == LL_IOC_LOV_GETSTRIPE) { - lump = (struct lov_user_md __user *)arg; - } else { - struct lov_user_mds_data __user *lmdp; - - lmdp = (struct lov_user_mds_data __user *)arg; - lump = &lmdp->lmd_lmm; - } - if (copy_to_user(lump, lmm, lmmsize)) { - if (copy_to_user(lump, lmm, sizeof(*lump))) { - rc = -EFAULT; - goto out_req; - } - rc = -EOVERFLOW; - } -skip_lmm: - if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) { - struct lov_user_mds_data __user *lmdp; - lstat_t st = { 0 }; - - st.st_dev = inode->i_sb->s_dev; - st.st_mode = body->mbo_mode; - st.st_nlink = body->mbo_nlink; - st.st_uid = body->mbo_uid; - st.st_gid = body->mbo_gid; - st.st_rdev = body->mbo_rdev; - st.st_size = body->mbo_size; - st.st_blksize = PAGE_SIZE; - st.st_blocks = body->mbo_blocks; - st.st_atime = body->mbo_atime; - st.st_mtime = body->mbo_mtime; - st.st_ctime = body->mbo_ctime; - st.st_ino = cl_fid_build_ino(&body->mbo_fid1, - sbi->ll_flags & - LL_SBI_32BIT_API); - - lmdp = (struct lov_user_mds_data __user *)arg; - if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) { - rc = -EFAULT; - goto out_req; - } - } - -out_req: - ptlrpc_req_finished(request); - if (filename) - ll_putname(filename); - return rc; - } - case OBD_IOC_QUOTACTL: { - struct if_quotactl *qctl; - - qctl = kzalloc(sizeof(*qctl), GFP_NOFS); - if (!qctl) - return -ENOMEM; - - if (copy_from_user(qctl, (void __user *)arg, sizeof(*qctl))) { - rc = -EFAULT; - goto out_quotactl; - } - - rc = quotactl_ioctl(sbi, qctl); - - if (rc == 0 && copy_to_user((void __user *)arg, qctl, - sizeof(*qctl))) - rc = -EFAULT; - -out_quotactl: - kfree(qctl); - return rc; - } - case OBD_IOC_GETDTNAME: - case OBD_IOC_GETMDNAME: - return ll_get_obd_name(inode, cmd, arg); - case LL_IOC_FLUSHCTX: - return ll_flush_ctx(inode); - case LL_IOC_GETOBDCOUNT: { - int count, vallen; - struct obd_export *exp; - - if (copy_from_user(&count, (int __user *)arg, sizeof(int))) - return -EFAULT; - - /* get ost count when count is zero, get mdt count otherwise */ - exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp; - vallen = sizeof(count); - rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT), - KEY_TGT_COUNT, &vallen, &count); - if (rc) { - CERROR("get target count failed: %d\n", rc); - return rc; - } - - if (copy_to_user((int __user *)arg, &count, sizeof(int))) - return -EFAULT; - - return 0; - } - case LL_IOC_PATH2FID: - if (copy_to_user((void __user *)arg, ll_inode2fid(inode), - sizeof(struct lu_fid))) - return -EFAULT; - return 0; - case LL_IOC_GET_CONNECT_FLAGS: { - return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL, - (void __user *)arg); - } - case OBD_IOC_CHANGELOG_SEND: - case OBD_IOC_CHANGELOG_CLEAR: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, - sizeof(struct ioc_changelog)); - return rc; - case OBD_IOC_FID2PATH: - return ll_fid2path(inode, (void __user *)arg); - case LL_IOC_GETPARENT: - return ll_getparent(file, (void __user *)arg); - case LL_IOC_FID2MDTIDX: { - struct obd_export *exp = ll_i2mdexp(inode); - struct lu_fid fid; - __u32 index; - - if (copy_from_user(&fid, (const struct lu_fid __user *)arg, - sizeof(fid))) - return -EFAULT; - - /* Call mdc_iocontrol */ - rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid, - &index); - if (rc) - return rc; - - return index; - } - case LL_IOC_HSM_REQUEST: { - struct hsm_user_request *hur; - ssize_t totalsize; - - hur = memdup_user((void __user *)arg, sizeof(*hur)); - if (IS_ERR(hur)) - return PTR_ERR(hur); - - /* Compute the whole struct size */ - totalsize = hur_len(hur); - kfree(hur); - if (totalsize < 0) - return -E2BIG; - - /* Final size will be more than double totalsize */ - if (totalsize >= MDS_MAXREQSIZE / 3) - return -E2BIG; - - hur = kzalloc(totalsize, GFP_NOFS); - if (!hur) - return -ENOMEM; - - /* Copy the whole struct */ - if (copy_from_user(hur, (void __user *)arg, totalsize)) { - kvfree(hur); - return -EFAULT; - } - - if (hur->hur_request.hr_action == HUA_RELEASE) { - const struct lu_fid *fid; - struct inode *f; - int i; - - for (i = 0; i < hur->hur_request.hr_itemcount; i++) { - fid = &hur->hur_user_item[i].hui_fid; - f = search_inode_for_lustre(inode->i_sb, fid); - if (IS_ERR(f)) { - rc = PTR_ERR(f); - break; - } - - rc = ll_hsm_release(f); - iput(f); - if (rc != 0) - break; - } - } else { - rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize, - hur, NULL); - } - - kvfree(hur); - - return rc; - } - case LL_IOC_HSM_PROGRESS: { - struct hsm_progress_kernel hpk; - struct hsm_progress hp; - - if (copy_from_user(&hp, (void __user *)arg, sizeof(hp))) - return -EFAULT; - - hpk.hpk_fid = hp.hp_fid; - hpk.hpk_cookie = hp.hp_cookie; - hpk.hpk_extent = hp.hp_extent; - hpk.hpk_flags = hp.hp_flags; - hpk.hpk_errval = hp.hp_errval; - hpk.hpk_data_version = 0; - - /* File may not exist in Lustre; all progress - * reported to Lustre root - */ - rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk, - NULL); - return rc; - } - case LL_IOC_HSM_CT_START: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg, - sizeof(struct lustre_kernelcomm)); - return rc; - - case LL_IOC_HSM_COPY_START: { - struct hsm_copy *copy; - int rc; - - copy = memdup_user((char __user *)arg, sizeof(*copy)); - if (IS_ERR(copy)) - return PTR_ERR(copy); - - rc = ll_ioc_copy_start(inode->i_sb, copy); - if (copy_to_user((char __user *)arg, copy, sizeof(*copy))) - rc = -EFAULT; - - kfree(copy); - return rc; - } - case LL_IOC_HSM_COPY_END: { - struct hsm_copy *copy; - int rc; - - copy = memdup_user((char __user *)arg, sizeof(*copy)); - if (IS_ERR(copy)) - return PTR_ERR(copy); - - rc = ll_ioc_copy_end(inode->i_sb, copy); - if (copy_to_user((char __user *)arg, copy, sizeof(*copy))) - rc = -EFAULT; - - kfree(copy); - return rc; - } - case LL_IOC_MIGRATE: { - char *buf = NULL; - const char *filename; - int namelen = 0; - int len; - int rc; - int mdtidx; - - rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg); - if (rc < 0) - return rc; - - data = (struct obd_ioctl_data *)buf; - if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 || - !data->ioc_inllen1 || !data->ioc_inllen2) { - rc = -EINVAL; - goto migrate_free; - } - - filename = data->ioc_inlbuf1; - namelen = data->ioc_inllen1; - if (namelen < 1 || namelen != strlen(filename) + 1) { - rc = -EINVAL; - goto migrate_free; - } - - if (data->ioc_inllen2 != sizeof(mdtidx)) { - rc = -EINVAL; - goto migrate_free; - } - mdtidx = *(int *)data->ioc_inlbuf2; - - rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1); -migrate_free: - kvfree(buf); - - return rc; - } - - default: - return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL, - (void __user *)arg); - } -} - -static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) -{ - struct inode *inode = file->f_mapping->host; - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int api32 = ll_need_32bit_api(sbi); - loff_t ret = -EINVAL; - - switch (origin) { - case SEEK_SET: - break; - case SEEK_CUR: - offset += file->f_pos; - break; - case SEEK_END: - if (offset > 0) - goto out; - if (api32) - offset += LL_DIR_END_OFF_32BIT; - else - offset += LL_DIR_END_OFF; - break; - default: - goto out; - } - - if (offset >= 0 && - ((api32 && offset <= LL_DIR_END_OFF_32BIT) || - (!api32 && offset <= LL_DIR_END_OFF))) { - if (offset != file->f_pos) { - if ((api32 && offset == LL_DIR_END_OFF_32BIT) || - (!api32 && offset == LL_DIR_END_OFF)) - fd->lfd_pos = MDS_DIR_END_OFF; - else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH) - fd->lfd_pos = offset << 32; - else - fd->lfd_pos = offset; - file->f_pos = offset; - } - ret = offset; - } - goto out; - -out: - return ret; -} - -static int ll_dir_open(struct inode *inode, struct file *file) -{ - return ll_file_open(inode, file); -} - -static int ll_dir_release(struct inode *inode, struct file *file) -{ - return ll_file_release(inode, file); -} - -const struct file_operations ll_dir_operations = { - .llseek = ll_dir_seek, - .open = ll_dir_open, - .release = ll_dir_release, - .read = generic_read_dir, - .iterate_shared = ll_readdir, - .unlocked_ioctl = ll_dir_ioctl, - .fsync = ll_fsync, -}; diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c deleted file mode 100644 index 02295931883b..000000000000 --- a/drivers/staging/lustre/lustre/llite/file.c +++ /dev/null @@ -1,3580 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/file.c - * - * Author: Peter Braam - * Author: Phil Schwan - * Author: Andreas Dilger - */ - -#define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "llite_internal.h" - -static int -ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); - -static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, - bool *lease_broken); - -static enum llioc_iter -ll_iocontrol_call(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg, int *rcp); - -static struct ll_file_data *ll_file_data_get(void) -{ - struct ll_file_data *fd; - - fd = kmem_cache_zalloc(ll_file_data_slab, GFP_NOFS); - if (!fd) - return NULL; - fd->fd_write_failed = false; - return fd; -} - -static void ll_file_data_put(struct ll_file_data *fd) -{ - if (fd) - kmem_cache_free(ll_file_data_slab, fd); -} - -/** - * Packs all the attributes into @op_data for the CLOSE rpc. - */ -static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, - struct obd_client_handle *och) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - ll_prep_md_op_data(op_data, inode, NULL, NULL, - 0, 0, LUSTRE_OPC_ANY, NULL); - - op_data->op_attr.ia_mode = inode->i_mode; - op_data->op_attr.ia_atime = inode->i_atime; - op_data->op_attr.ia_mtime = inode->i_mtime; - op_data->op_attr.ia_ctime = inode->i_ctime; - op_data->op_attr.ia_size = i_size_read(inode); - op_data->op_attr.ia_valid |= ATTR_MODE | ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET | - ATTR_CTIME | ATTR_CTIME_SET; - op_data->op_attr_blocks = inode->i_blocks; - op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); - op_data->op_handle = och->och_fh; - - /* - * For HSM: if inode data has been modified, pack it so that - * MDT can set data dirty flag in the archive. - */ - if (och->och_flags & FMODE_WRITE && - test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) - op_data->op_bias |= MDS_DATA_MODIFIED; -} - -/** - * Perform a close, possibly with a bias. - * The meaning of "data" depends on the value of "bias". - * - * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version. - * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to - * swap layouts with. - */ -static int ll_close_inode_openhandle(struct inode *inode, - struct obd_client_handle *och, - enum mds_op_bias bias, - void *data) -{ - const struct ll_inode_info *lli = ll_i2info(inode); - struct obd_export *md_exp = ll_i2mdexp(inode); - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - int rc; - - if (!class_exp2obd(md_exp)) { - CERROR("%s: invalid MDC connection handle closing " DFID "\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lli->lli_fid)); - rc = 0; - goto out; - } - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - /* - * We leak openhandle and request here on error, but not much to be - * done in OOM case since app won't retry close on error either. - */ - if (!op_data) { - rc = -ENOMEM; - goto out; - } - - ll_prepare_close(inode, op_data, och); - switch (bias) { - case MDS_CLOSE_LAYOUT_SWAP: - LASSERT(data); - op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP; - op_data->op_data_version = 0; - op_data->op_lease_handle = och->och_lease_handle; - op_data->op_fid2 = *ll_inode2fid(data); - break; - - case MDS_HSM_RELEASE: - LASSERT(data); - op_data->op_bias |= MDS_HSM_RELEASE; - op_data->op_data_version = *(__u64 *)data; - op_data->op_lease_handle = och->och_lease_handle; - op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; - break; - - default: - LASSERT(!data); - break; - } - - rc = md_close(md_exp, op_data, och->och_mod, &req); - if (rc && rc != -EINTR) { - CERROR("%s: inode " DFID " mdc close failed: rc = %d\n", - md_exp->exp_obd->obd_name, PFID(&lli->lli_fid), rc); - } - - if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) && - !rc) { - struct mdt_body *body; - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED)) - rc = -EBUSY; - } - - ll_finish_md_op_data(op_data); - -out: - md_clear_open_replay_data(md_exp, och); - och->och_fh.cookie = DEAD_HANDLE_MAGIC; - kfree(och); - - ptlrpc_req_finished(req); - return rc; -} - -int ll_md_real_close(struct inode *inode, fmode_t fmode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct obd_client_handle **och_p; - struct obd_client_handle *och; - __u64 *och_usecount; - int rc = 0; - - if (fmode & FMODE_WRITE) { - och_p = &lli->lli_mds_write_och; - och_usecount = &lli->lli_open_fd_write_count; - } else if (fmode & FMODE_EXEC) { - och_p = &lli->lli_mds_exec_och; - och_usecount = &lli->lli_open_fd_exec_count; - } else { - LASSERT(fmode & FMODE_READ); - och_p = &lli->lli_mds_read_och; - och_usecount = &lli->lli_open_fd_read_count; - } - - mutex_lock(&lli->lli_och_mutex); - if (*och_usecount > 0) { - /* There are still users of this handle, so skip - * freeing it. - */ - mutex_unlock(&lli->lli_och_mutex); - return 0; - } - - och = *och_p; - *och_p = NULL; - mutex_unlock(&lli->lli_och_mutex); - - if (och) { - /* There might be a race and this handle may already - * be closed. - */ - rc = ll_close_inode_openhandle(inode, och, 0, NULL); - } - - return rc; -} - -static int ll_md_close(struct inode *inode, struct file *file) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_inode_info *lli = ll_i2info(inode); - int lockmode; - __u64 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; - struct lustre_handle lockh; - union ldlm_policy_data policy = { - .l_inodebits = { MDS_INODELOCK_OPEN } - }; - int rc = 0; - - /* clear group lock, if present */ - if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) - ll_put_grouplock(inode, file, fd->fd_grouplock.lg_gid); - - if (fd->fd_lease_och) { - bool lease_broken; - - /* Usually the lease is not released when the - * application crashed, we need to release here. - */ - rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); - CDEBUG(rc ? D_ERROR : D_INODE, - "Clean up lease " DFID " %d/%d\n", - PFID(&lli->lli_fid), rc, lease_broken); - - fd->fd_lease_och = NULL; - } - - if (fd->fd_och) { - rc = ll_close_inode_openhandle(inode, fd->fd_och, 0, NULL); - fd->fd_och = NULL; - goto out; - } - - /* Let's see if we have good enough OPEN lock on the file and if - * we can skip talking to MDS - */ - - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_omode & FMODE_WRITE) { - lockmode = LCK_CW; - LASSERT(lli->lli_open_fd_write_count); - lli->lli_open_fd_write_count--; - } else if (fd->fd_omode & FMODE_EXEC) { - lockmode = LCK_PR; - LASSERT(lli->lli_open_fd_exec_count); - lli->lli_open_fd_exec_count--; - } else { - lockmode = LCK_CR; - LASSERT(lli->lli_open_fd_read_count); - lli->lli_open_fd_read_count--; - } - mutex_unlock(&lli->lli_och_mutex); - - if (!md_lock_match(ll_i2mdexp(inode), flags, ll_inode2fid(inode), - LDLM_IBITS, &policy, lockmode, &lockh)) - rc = ll_md_real_close(inode, fd->fd_omode); - -out: - LUSTRE_FPRIVATE(file) = NULL; - ll_file_data_put(fd); - - return rc; -} - -/* While this returns an error code, fput() the caller does not, so we need - * to make every effort to clean up all of our state here. Also, applications - * rarely check close errors and even if an error is returned they will not - * re-try the close call. - */ -int ll_file_release(struct inode *inode, struct file *file) -{ - struct ll_file_data *fd; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - - if (!is_root_inode(inode)) - ll_stats_ops_tally(sbi, LPROC_LL_RELEASE, 1); - fd = LUSTRE_FPRIVATE(file); - LASSERT(fd); - - /* The last ref on @file, maybe not be the owner pid of statahead, - * because parent and child process can share the same file handle. - */ - if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd) - ll_deauthorize_statahead(inode, fd); - - if (is_root_inode(inode)) { - LUSTRE_FPRIVATE(file) = NULL; - ll_file_data_put(fd); - return 0; - } - - if (!S_ISDIR(inode->i_mode)) { - if (lli->lli_clob) - lov_read_and_clear_async_rc(lli->lli_clob); - lli->lli_async_rc = 0; - } - - rc = ll_md_close(inode, file); - - if (CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, cfs_fail_val)) - libcfs_debug_dumplog(); - - return rc; -} - -static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize, - struct lookup_intent *itp) -{ - struct inode *inode = d_inode(de); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct dentry *parent = de->d_parent; - const char *name = NULL; - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - int len = 0, rc; - - LASSERT(parent); - LASSERT(itp->it_flags & MDS_OPEN_BY_FID); - - /* - * if server supports open-by-fid, or file name is invalid, don't pack - * name in open request - */ - if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) && - lu_name_is_valid_2(de->d_name.name, de->d_name.len)) { - name = de->d_name.name; - len = de->d_name.len; - } - - op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len, - O_RDWR, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - op_data->op_data = lmm; - op_data->op_data_size = lmmsize; - - rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req, - &ll_md_blocking_ast, 0); - ll_finish_md_op_data(op_data); - if (rc == -ESTALE) { - /* reason for keep own exit path - don`t flood log - * with messages with -ESTALE errors. - */ - if (!it_disposition(itp, DISP_OPEN_OPEN) || - it_open_error(DISP_OPEN_OPEN, itp)) - goto out; - ll_release_openhandle(inode, itp); - goto out; - } - - if (it_disposition(itp, DISP_LOOKUP_NEG)) { - rc = -ENOENT; - goto out; - } - - if (rc != 0 || it_open_error(DISP_OPEN_OPEN, itp)) { - rc = rc ? rc : it_open_error(DISP_OPEN_OPEN, itp); - CDEBUG(D_VFSTRACE, "lock enqueue: err: %d\n", rc); - goto out; - } - - rc = ll_prep_inode(&inode, req, NULL, itp); - if (!rc && itp->it_lock_mode) - ll_set_lock_data(sbi->ll_md_exp, inode, itp, NULL); - -out: - ptlrpc_req_finished(req); - ll_intent_drop_lock(itp); - - /* - * We did open by fid, but by the time we got to the server, - * the object disappeared. If this is a create, we cannot really - * tell the userspace that the file it was trying to create - * does not exist. Instead let's return -ESTALE, and the VFS will - * retry the create with LOOKUP_REVAL that we are going to catch - * in ll_revalidate_dentry() and use lookup then. - */ - if (rc == -ENOENT && itp->it_op & IT_CREAT) - rc = -ESTALE; - - return rc; -} - -static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, - struct obd_client_handle *och) -{ - struct mdt_body *body; - - body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY); - och->och_fh = body->mbo_handle; - och->och_fid = body->mbo_fid1; - och->och_lease_handle.cookie = it->it_lock_handle; - och->och_magic = OBD_CLIENT_HANDLE_MAGIC; - och->och_flags = it->it_flags; - - return md_set_open_replay_data(md_exp, och, it); -} - -static int ll_local_open(struct file *file, struct lookup_intent *it, - struct ll_file_data *fd, struct obd_client_handle *och) -{ - struct inode *inode = file_inode(file); - - LASSERT(!LUSTRE_FPRIVATE(file)); - - LASSERT(fd); - - if (och) { - int rc; - - rc = ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); - if (rc != 0) - return rc; - } - - LUSTRE_FPRIVATE(file) = fd; - ll_readahead_init(inode, &fd->fd_ras); - fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); - - /* ll_cl_context initialize */ - rwlock_init(&fd->fd_lock); - INIT_LIST_HEAD(&fd->fd_lccs); - - return 0; -} - -/* Open a file, and (for the very first open) create objects on the OSTs at - * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object - * creation or open until ll_lov_setstripe() ioctl is called. - * - * If we already have the stripe MD locally then we don't request it in - * md_open(), by passing a lmm_size = 0. - * - * It is up to the application to ensure no other processes open this file - * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be - * used. We might be able to avoid races of that sort by getting lli_open_sem - * before returning in the O_LOV_DELAY_CREATE case and dropping it here - * or in ll_file_release(), but I'm not sure that is desirable/necessary. - */ -int ll_file_open(struct inode *inode, struct file *file) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lookup_intent *it, oit = { .it_op = IT_OPEN, - .it_flags = file->f_flags }; - struct obd_client_handle **och_p = NULL; - __u64 *och_usecount = NULL; - struct ll_file_data *fd; - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), flags %o\n", - PFID(ll_inode2fid(inode)), inode, file->f_flags); - - it = file->private_data; /* XXX: compat macro */ - file->private_data = NULL; /* prevent ll_local_open assertion */ - - fd = ll_file_data_get(); - if (!fd) { - rc = -ENOMEM; - goto out_openerr; - } - - fd->fd_file = file; - if (S_ISDIR(inode->i_mode)) - ll_authorize_statahead(inode, fd); - - if (is_root_inode(inode)) { - LUSTRE_FPRIVATE(file) = fd; - return 0; - } - - if (!it || !it->it_disposition) { - /* Convert f_flags into access mode. We cannot use file->f_mode, - * because everything but O_ACCMODE mask was stripped from - * there - */ - if ((oit.it_flags + 1) & O_ACCMODE) - oit.it_flags++; - if (file->f_flags & O_TRUNC) - oit.it_flags |= FMODE_WRITE; - - /* kernel only call f_op->open in dentry_open. filp_open calls - * dentry_open after call to open_namei that checks permissions. - * Only nfsd_open call dentry_open directly without checking - * permissions and because of that this code below is safe. - */ - if (oit.it_flags & (FMODE_WRITE | FMODE_READ)) - oit.it_flags |= MDS_OPEN_OWNEROVERRIDE; - - /* We do not want O_EXCL here, presumably we opened the file - * already? XXX - NFS implications? - */ - oit.it_flags &= ~O_EXCL; - - /* bug20584, if "it_flags" contains O_CREAT, the file will be - * created if necessary, then "IT_CREAT" should be set to keep - * consistent with it - */ - if (oit.it_flags & O_CREAT) - oit.it_op |= IT_CREAT; - - it = &oit; - } - -restart: - /* Let's see if we have file open on MDS already. */ - if (it->it_flags & FMODE_WRITE) { - och_p = &lli->lli_mds_write_och; - och_usecount = &lli->lli_open_fd_write_count; - } else if (it->it_flags & FMODE_EXEC) { - och_p = &lli->lli_mds_exec_och; - och_usecount = &lli->lli_open_fd_exec_count; - } else { - och_p = &lli->lli_mds_read_och; - och_usecount = &lli->lli_open_fd_read_count; - } - - mutex_lock(&lli->lli_och_mutex); - if (*och_p) { /* Open handle is present */ - if (it_disposition(it, DISP_OPEN_OPEN)) { - /* Well, there's extra open request that we do not need, - * let's close it somehow. This will decref request. - */ - rc = it_open_error(DISP_OPEN_OPEN, it); - if (rc) { - mutex_unlock(&lli->lli_och_mutex); - goto out_openerr; - } - - ll_release_openhandle(inode, it); - } - (*och_usecount)++; - - rc = ll_local_open(file, it, fd, NULL); - if (rc) { - (*och_usecount)--; - mutex_unlock(&lli->lli_och_mutex); - goto out_openerr; - } - } else { - LASSERT(*och_usecount == 0); - if (!it->it_disposition) { - /* We cannot just request lock handle now, new ELC code - * means that one of other OPEN locks for this file - * could be cancelled, and since blocking ast handler - * would attempt to grab och_mutex as well, that would - * result in a deadlock - */ - mutex_unlock(&lli->lli_och_mutex); - /* - * Normally called under two situations: - * 1. NFS export. - * 2. revalidate with IT_OPEN (revalidate doesn't - * execute this intent any more). - * - * Always fetch MDS_OPEN_LOCK if this is not setstripe. - * - * Always specify MDS_OPEN_BY_FID because we don't want - * to get file with different fid. - */ - it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID; - rc = ll_intent_file_open(file->f_path.dentry, - NULL, 0, it); - if (rc) - goto out_openerr; - - goto restart; - } - *och_p = kzalloc(sizeof(struct obd_client_handle), GFP_NOFS); - if (!*och_p) { - rc = -ENOMEM; - goto out_och_free; - } - - (*och_usecount)++; - - /* md_intent_lock() didn't get a request ref if there was an - * open error, so don't do cleanup on the request here - * (bug 3430) - */ - /* XXX (green): Should not we bail out on any error here, not - * just open error? - */ - rc = it_open_error(DISP_OPEN_OPEN, it); - if (rc) - goto out_och_free; - - LASSERTF(it_disposition(it, DISP_ENQ_OPEN_REF), - "inode %p: disposition %x, status %d\n", inode, - it_disposition(it, ~0), it->it_status); - - rc = ll_local_open(file, it, fd, *och_p); - if (rc) - goto out_och_free; - } - mutex_unlock(&lli->lli_och_mutex); - fd = NULL; - - /* Must do this outside lli_och_mutex lock to prevent deadlock where - * different kind of OPEN lock for this same inode gets cancelled - * by ldlm_cancel_lru - */ - if (!S_ISREG(inode->i_mode)) - goto out_och_free; - - cl_lov_delay_create_clear(&file->f_flags); - goto out_och_free; - -out_och_free: - if (rc) { - if (och_p && *och_p) { - kfree(*och_p); - *och_p = NULL; - (*och_usecount)--; - } - mutex_unlock(&lli->lli_och_mutex); - -out_openerr: - if (lli->lli_opendir_key == fd) - ll_deauthorize_statahead(inode, fd); - if (fd) - ll_file_data_put(fd); - } else { - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1); - } - - if (it && it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->it_request); - it_clear_disposition(it, DISP_ENQ_OPEN_REF); - } - - return rc; -} - -static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag) -{ - int rc; - struct lustre_handle lockh; - - switch (flag) { - case LDLM_CB_BLOCKING: - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); - if (rc < 0) { - CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); - return rc; - } - break; - case LDLM_CB_CANCELING: - /* do nothing */ - break; - } - return 0; -} - -/** - * Acquire a lease and open the file. - */ -static struct obd_client_handle * -ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode, - __u64 open_flags) -{ - struct lookup_intent it = { .it_op = IT_OPEN }; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct md_op_data *op_data; - struct ptlrpc_request *req = NULL; - struct lustre_handle old_handle = { 0 }; - struct obd_client_handle *och = NULL; - int rc; - int rc2; - - if (fmode != FMODE_WRITE && fmode != FMODE_READ) - return ERR_PTR(-EINVAL); - - if (file) { - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct obd_client_handle **och_p; - __u64 *och_usecount; - - if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) - return ERR_PTR(-EPERM); - - /* Get the openhandle of the file */ - rc = -EBUSY; - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_lease_och) { - mutex_unlock(&lli->lli_och_mutex); - return ERR_PTR(rc); - } - - if (!fd->fd_och) { - if (file->f_mode & FMODE_WRITE) { - LASSERT(lli->lli_mds_write_och); - och_p = &lli->lli_mds_write_och; - och_usecount = &lli->lli_open_fd_write_count; - } else { - LASSERT(lli->lli_mds_read_och); - och_p = &lli->lli_mds_read_och; - och_usecount = &lli->lli_open_fd_read_count; - } - if (*och_usecount == 1) { - fd->fd_och = *och_p; - *och_p = NULL; - *och_usecount = 0; - rc = 0; - } - } - mutex_unlock(&lli->lli_och_mutex); - if (rc < 0) /* more than 1 opener */ - return ERR_PTR(rc); - - LASSERT(fd->fd_och); - old_handle = fd->fd_och->och_fh; - } - - och = kzalloc(sizeof(*och), GFP_NOFS); - if (!och) - return ERR_PTR(-ENOMEM); - - op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) { - rc = PTR_ERR(op_data); - goto out; - } - - /* To tell the MDT this openhandle is from the same owner */ - op_data->op_handle = old_handle; - - it.it_flags = fmode | open_flags; - it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; - rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req, - &ll_md_blocking_lease_ast, - /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise - * it can be cancelled which may mislead applications that the lease is - * broken; - * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal - * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast - * doesn't deal with openhandle, so normal openhandle will be leaked. - */ - LDLM_FL_NO_LRU | LDLM_FL_EXCL); - ll_finish_md_op_data(op_data); - ptlrpc_req_finished(req); - if (rc < 0) - goto out_release_it; - - if (it_disposition(&it, DISP_LOOKUP_NEG)) { - rc = -ENOENT; - goto out_release_it; - } - - rc = it_open_error(DISP_OPEN_OPEN, &it); - if (rc) - goto out_release_it; - - LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); - ll_och_fill(sbi->ll_md_exp, &it, och); - - if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ { - rc = -EOPNOTSUPP; - goto out_close; - } - - /* already get lease, handle lease lock */ - ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); - if (it.it_lock_mode == 0 || - it.it_lock_bits != MDS_INODELOCK_OPEN) { - /* open lock must return for lease */ - CERROR(DFID "lease granted but no open lock, %d/%llu.\n", - PFID(ll_inode2fid(inode)), it.it_lock_mode, - it.it_lock_bits); - rc = -EPROTO; - goto out_close; - } - - ll_intent_release(&it); - return och; - -out_close: - /* Cancel open lock */ - if (it.it_lock_mode != 0) { - ldlm_lock_decref_and_cancel(&och->och_lease_handle, - it.it_lock_mode); - it.it_lock_mode = 0; - och->och_lease_handle.cookie = 0ULL; - } - rc2 = ll_close_inode_openhandle(inode, och, 0, NULL); - if (rc2 < 0) - CERROR("%s: error closing file " DFID ": %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&ll_i2info(inode)->lli_fid), rc2); - och = NULL; /* och has been freed in ll_close_inode_openhandle() */ -out_release_it: - ll_intent_release(&it); -out: - kfree(och); - return ERR_PTR(rc); -} - -/** - * Check whether a layout swap can be done between two inodes. - * - * \param[in] inode1 First inode to check - * \param[in] inode2 Second inode to check - * - * \retval 0 on success, layout swap can be performed between both inodes - * \retval negative error code if requirements are not met - */ -static int ll_check_swap_layouts_validity(struct inode *inode1, - struct inode *inode2) -{ - if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode)) - return -EINVAL; - - if (inode_permission(inode1, MAY_WRITE) || - inode_permission(inode2, MAY_WRITE)) - return -EPERM; - - if (inode1->i_sb != inode2->i_sb) - return -EXDEV; - - return 0; -} - -static int ll_swap_layouts_close(struct obd_client_handle *och, - struct inode *inode, struct inode *inode2) -{ - const struct lu_fid *fid1 = ll_inode2fid(inode); - const struct lu_fid *fid2; - int rc; - - CDEBUG(D_INODE, "%s: biased close of file " DFID "\n", - ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1)); - - rc = ll_check_swap_layouts_validity(inode, inode2); - if (rc < 0) - goto out_free_och; - - /* We now know that inode2 is a lustre inode */ - fid2 = ll_inode2fid(inode2); - - rc = lu_fid_cmp(fid1, fid2); - if (!rc) { - rc = -EINVAL; - goto out_free_och; - } - - /* - * Close the file and swap layouts between inode & inode2. - * NB: lease lock handle is released in mdc_close_layout_swap_pack() - * because we still need it to pack l_remote_handle to MDT. - */ - rc = ll_close_inode_openhandle(inode, och, MDS_CLOSE_LAYOUT_SWAP, - inode2); - - och = NULL; /* freed in ll_close_inode_openhandle() */ - -out_free_och: - kfree(och); - return rc; -} - -/** - * Release lease and close the file. - * It will check if the lease has ever broken. - */ -static int ll_lease_close(struct obd_client_handle *och, struct inode *inode, - bool *lease_broken) -{ - struct ldlm_lock *lock; - bool cancelled = true; - - lock = ldlm_handle2lock(&och->och_lease_handle); - if (lock) { - lock_res_and_lock(lock); - cancelled = ldlm_is_cancel(lock); - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - - CDEBUG(D_INODE, "lease for " DFID " broken? %d\n", - PFID(&ll_i2info(inode)->lli_fid), cancelled); - - if (!cancelled) - ldlm_cli_cancel(&och->och_lease_handle, 0); - if (lease_broken) - *lease_broken = cancelled; - - return ll_close_inode_openhandle(inode, och, 0, NULL); -} - -int ll_merge_attr(const struct lu_env *env, struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_object *obj = lli->lli_clob; - struct cl_attr *attr = vvp_env_thread_attr(env); - s64 atime; - s64 mtime; - s64 ctime; - int rc = 0; - - ll_inode_size_lock(inode); - - /* merge timestamps the most recently obtained from mds with - * timestamps obtained from osts - */ - LTIME_S(inode->i_atime) = lli->lli_atime; - LTIME_S(inode->i_mtime) = lli->lli_mtime; - LTIME_S(inode->i_ctime) = lli->lli_ctime; - - mtime = LTIME_S(inode->i_mtime); - atime = LTIME_S(inode->i_atime); - ctime = LTIME_S(inode->i_ctime); - - cl_object_attr_lock(obj); - rc = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - - if (rc != 0) - goto out_size_unlock; - - if (atime < attr->cat_atime) - atime = attr->cat_atime; - - if (ctime < attr->cat_ctime) - ctime = attr->cat_ctime; - - if (mtime < attr->cat_mtime) - mtime = attr->cat_mtime; - - CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", - PFID(&lli->lli_fid), attr->cat_size); - - i_size_write(inode, attr->cat_size); - - inode->i_blocks = attr->cat_blocks; - - LTIME_S(inode->i_mtime) = mtime; - LTIME_S(inode->i_atime) = atime; - LTIME_S(inode->i_ctime) = ctime; - -out_size_unlock: - ll_inode_size_unlock(inode); - - return rc; -} - -static bool file_is_noatime(const struct file *file) -{ - const struct vfsmount *mnt = file->f_path.mnt; - const struct inode *inode = file_inode(file); - - /* Adapted from file_accessed() and touch_atime().*/ - if (file->f_flags & O_NOATIME) - return true; - - if (inode->i_flags & S_NOATIME) - return true; - - if (IS_NOATIME(inode)) - return true; - - if (mnt->mnt_flags & (MNT_NOATIME | MNT_READONLY)) - return true; - - if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - return true; - - if ((inode->i_sb->s_flags & SB_NODIRATIME) && S_ISDIR(inode->i_mode)) - return true; - - return false; -} - -static void ll_io_init(struct cl_io *io, const struct file *file, int write) -{ - struct inode *inode = file_inode(file); - - io->u.ci_rw.crw_nonblock = file->f_flags & O_NONBLOCK; - if (write) { - io->u.ci_wr.wr_append = !!(file->f_flags & O_APPEND); - io->u.ci_wr.wr_sync = file->f_flags & O_SYNC || - file->f_flags & O_DIRECT || - IS_SYNC(inode); - } - io->ci_obj = ll_i2info(inode)->lli_clob; - io->ci_lockreq = CILR_MAYBE; - if (ll_file_nolock(file)) { - io->ci_lockreq = CILR_NEVER; - io->ci_no_srvlock = 1; - } else if (file->f_flags & O_APPEND) { - io->ci_lockreq = CILR_MANDATORY; - } - - io->ci_noatime = file_is_noatime(file); -} - -static ssize_t -ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args, - struct file *file, enum cl_io_type iot, - loff_t *ppos, size_t count) -{ - struct ll_inode_info *lli = ll_i2info(file_inode(file)); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct vvp_io *vio = vvp_env_io(env); - struct range_lock range; - struct cl_io *io; - ssize_t result = 0; - int rc = 0; - - CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n", - file, iot, *ppos, count); - -restart: - io = vvp_env_thread_io(env); - ll_io_init(io, file, iot == CIT_WRITE); - - if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) { - struct vvp_io *vio = vvp_env_io(env); - bool range_locked = false; - - if (file->f_flags & O_APPEND) - range_lock_init(&range, 0, LUSTRE_EOF); - else - range_lock_init(&range, *ppos, *ppos + count - 1); - - vio->vui_fd = LUSTRE_FPRIVATE(file); - vio->vui_iter = args->u.normal.via_iter; - vio->vui_iocb = args->u.normal.via_iocb; - /* - * Direct IO reads must also take range lock, - * or multiple reads will try to work on the same pages - * See LU-6227 for details. - */ - if (((iot == CIT_WRITE) || - (iot == CIT_READ && (file->f_flags & O_DIRECT))) && - !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n", - range.rl_node.in_extent.start, - range.rl_node.in_extent.end); - rc = range_lock(&lli->lli_write_tree, &range); - if (rc < 0) - goto out; - - range_locked = true; - } - ll_cl_add(file, env, io); - rc = cl_io_loop(env, io); - ll_cl_remove(file, env); - if (range_locked) { - CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n", - range.rl_node.in_extent.start, - range.rl_node.in_extent.end); - range_unlock(&lli->lli_write_tree, &range); - } - } else { - /* cl_io_rw_init() handled IO */ - rc = io->ci_result; - } - - if (io->ci_nob > 0) { - result = io->ci_nob; - count -= io->ci_nob; - *ppos = io->u.ci_wr.wr.crw_pos; - - /* prepare IO restart */ - if (count > 0) - args->u.normal.via_iter = vio->vui_iter; - } -out: - cl_io_fini(env, io); - - if ((!rc || rc == -ENODATA) && count > 0 && io->ci_need_restart) { - CDEBUG(D_VFSTRACE, - "%s: restart %s from %lld, count:%zu, result: %zd\n", - file_dentry(file)->d_name.name, - iot == CIT_READ ? "read" : "write", - *ppos, count, result); - goto restart; - } - - if (iot == CIT_READ) { - if (result >= 0) - ll_stats_ops_tally(ll_i2sbi(file_inode(file)), - LPROC_LL_READ_BYTES, result); - } else if (iot == CIT_WRITE) { - if (result >= 0) { - ll_stats_ops_tally(ll_i2sbi(file_inode(file)), - LPROC_LL_WRITE_BYTES, result); - fd->fd_write_failed = false; - } else if (!result && !rc) { - rc = io->ci_result; - if (rc < 0) - fd->fd_write_failed = true; - else - fd->fd_write_failed = false; - } else if (rc != -ERESTARTSYS) { - fd->fd_write_failed = true; - } - } - CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result); - - return result > 0 ? result : rc; -} - -static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to) -{ - struct lu_env *env; - struct vvp_io_args *args; - ssize_t result; - u16 refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - args = ll_env_args(env); - args->u.normal.via_iter = to; - args->u.normal.via_iocb = iocb; - - result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ, - &iocb->ki_pos, iov_iter_count(to)); - cl_env_put(env, &refcheck); - return result; -} - -/* - * Write to a file (through the page cache). - */ -static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct lu_env *env; - struct vvp_io_args *args; - ssize_t result; - u16 refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - args = ll_env_args(env); - args->u.normal.via_iter = from; - args->u.normal.via_iocb = iocb; - - result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE, - &iocb->ki_pos, iov_iter_count(from)); - cl_env_put(env, &refcheck); - return result; -} - -int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - __u64 flags, struct lov_user_md *lum, - int lum_size) -{ - struct lookup_intent oit = { - .it_op = IT_OPEN, - .it_flags = flags | MDS_OPEN_BY_FID, - }; - int rc = 0; - - ll_inode_size_lock(inode); - rc = ll_intent_file_open(dentry, lum, lum_size, &oit); - if (rc < 0) - goto out_unlock; - - ll_release_openhandle(inode, &oit); - -out_unlock: - ll_inode_size_unlock(inode); - ll_intent_release(&oit); - return rc; -} - -int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, - struct lov_mds_md **lmmp, int *lmm_size, - struct ptlrpc_request **request) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdt_body *body; - struct lov_mds_md *lmm = NULL; - struct ptlrpc_request *req = NULL; - struct md_op_data *op_data; - int rc, lmmsize; - - rc = ll_get_default_mdsize(sbi, &lmmsize); - if (rc) - return rc; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, - strlen(filename), lmmsize, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; - rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc < 0) { - CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n", - filename, rc); - goto out; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - - lmmsize = body->mbo_eadatasize; - - if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) || - lmmsize == 0) { - rc = -ENODATA; - goto out; - } - - lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, lmmsize); - - if ((lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1)) && - (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3))) { - rc = -EPROTO; - goto out; - } - - /* - * This is coming from the MDS, so is probably in - * little endian. We convert it to host endian before - * passing it to userspace. - */ - if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC) { - int stripe_count; - - stripe_count = le16_to_cpu(lmm->lmm_stripe_count); - if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) - stripe_count = 0; - - /* if function called for directory - we should - * avoid swab not existent lsm objects - */ - if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) { - lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm); - if (S_ISREG(body->mbo_mode)) - lustre_swab_lov_user_md_objects( - ((struct lov_user_md_v1 *)lmm)->lmm_objects, - stripe_count); - } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) { - lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm); - if (S_ISREG(body->mbo_mode)) - lustre_swab_lov_user_md_objects( - ((struct lov_user_md_v3 *)lmm)->lmm_objects, - stripe_count); - } - } - -out: - *lmmp = lmm; - *lmm_size = lmmsize; - *request = req; - return rc; -} - -static int ll_lov_setea(struct inode *inode, struct file *file, - unsigned long arg) -{ - __u64 flags = MDS_OPEN_HAS_OBJS | FMODE_WRITE; - struct lov_user_md *lump; - int lum_size = sizeof(struct lov_user_md) + - sizeof(struct lov_user_ost_data); - int rc; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - lump = kzalloc(lum_size, GFP_NOFS); - if (!lump) - return -ENOMEM; - - if (copy_from_user(lump, (struct lov_user_md __user *)arg, lum_size)) { - kvfree(lump); - return -EFAULT; - } - - rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lump, - lum_size); - cl_lov_delay_create_clear(&file->f_flags); - - kvfree(lump); - return rc; -} - -static int ll_file_getstripe(struct inode *inode, - struct lov_user_md __user *lum) -{ - struct lu_env *env; - u16 refcheck; - int rc; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum); - cl_env_put(env, &refcheck); - return rc; -} - -static int ll_lov_setstripe(struct inode *inode, struct file *file, - unsigned long arg) -{ - struct lov_user_md __user *lum = (struct lov_user_md __user *)arg; - struct lov_user_md *klum; - int lum_size, rc; - __u64 flags = FMODE_WRITE; - - rc = ll_copy_user_md(lum, &klum); - if (rc < 0) - return rc; - - lum_size = rc; - rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum, - lum_size); - cl_lov_delay_create_clear(&file->f_flags); - if (rc == 0) { - __u32 gen; - - put_user(0, &lum->lmm_stripe_count); - - ll_layout_refresh(inode, &gen); - rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg); - } - - kfree(klum); - return rc; -} - -static int -ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_grouplock grouplock; - int rc; - - if (arg == 0) { - CWARN("group id for group lock must not be 0\n"); - return -EINVAL; - } - - if (ll_file_nolock(file)) - return -EOPNOTSUPP; - - spin_lock(&lli->lli_lock); - if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { - CWARN("group lock already existed with gid %lu\n", - fd->fd_grouplock.lg_gid); - spin_unlock(&lli->lli_lock); - return -EINVAL; - } - LASSERT(!fd->fd_grouplock.lg_lock); - spin_unlock(&lli->lli_lock); - - rc = cl_get_grouplock(ll_i2info(inode)->lli_clob, - arg, (file->f_flags & O_NONBLOCK), &grouplock); - if (rc) - return rc; - - spin_lock(&lli->lli_lock); - if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { - spin_unlock(&lli->lli_lock); - CERROR("another thread just won the race\n"); - cl_put_grouplock(&grouplock); - return -EINVAL; - } - - fd->fd_flags |= LL_FILE_GROUP_LOCKED; - fd->fd_grouplock = grouplock; - spin_unlock(&lli->lli_lock); - - CDEBUG(D_INFO, "group lock %lu obtained\n", arg); - return 0; -} - -static int ll_put_grouplock(struct inode *inode, struct file *file, - unsigned long arg) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_grouplock grouplock; - - spin_lock(&lli->lli_lock); - if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - spin_unlock(&lli->lli_lock); - CWARN("no group lock held\n"); - return -EINVAL; - } - LASSERT(fd->fd_grouplock.lg_lock); - - if (fd->fd_grouplock.lg_gid != arg) { - CWARN("group lock %lu doesn't match current id %lu\n", - arg, fd->fd_grouplock.lg_gid); - spin_unlock(&lli->lli_lock); - return -EINVAL; - } - - grouplock = fd->fd_grouplock; - memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); - fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; - spin_unlock(&lli->lli_lock); - - cl_put_grouplock(&grouplock); - CDEBUG(D_INFO, "group lock %lu released\n", arg); - return 0; -} - -/** - * Close inode open handle - * - * \param inode [in] inode in question - * \param it [in,out] intent which contains open info and result - * - * \retval 0 success - * \retval <0 failure - */ -int ll_release_openhandle(struct inode *inode, struct lookup_intent *it) -{ - struct obd_client_handle *och; - int rc; - - LASSERT(inode); - - /* Root ? Do nothing. */ - if (is_root_inode(inode)) - return 0; - - /* No open handle to close? Move away */ - if (!it_disposition(it, DISP_OPEN_OPEN)) - return 0; - - LASSERT(it_open_error(DISP_OPEN_OPEN, it) == 0); - - och = kzalloc(sizeof(*och), GFP_NOFS); - if (!och) { - rc = -ENOMEM; - goto out; - } - - ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och); - - rc = ll_close_inode_openhandle(inode, och, 0, NULL); -out: - /* this one is in place of ll_file_open */ - if (it_disposition(it, DISP_ENQ_OPEN_REF)) { - ptlrpc_req_finished(it->it_request); - it_clear_disposition(it, DISP_ENQ_OPEN_REF); - } - return rc; -} - -/** - * Get size for inode for which FIEMAP mapping is requested. - * Make the FIEMAP get_info call and returns the result. - * - * \param fiemap kernel buffer to hold extens - * \param num_bytes kernel buffer size - */ -static int ll_do_fiemap(struct inode *inode, struct fiemap *fiemap, - size_t num_bytes) -{ - struct ll_fiemap_info_key fmkey = { .lfik_name = KEY_FIEMAP, }; - struct lu_env *env; - u16 refcheck; - int rc = 0; - - /* Checks for fiemap flags */ - if (fiemap->fm_flags & ~LUSTRE_FIEMAP_FLAGS_COMPAT) { - fiemap->fm_flags &= ~LUSTRE_FIEMAP_FLAGS_COMPAT; - return -EBADR; - } - - /* Check for FIEMAP_FLAG_SYNC */ - if (fiemap->fm_flags & FIEMAP_FLAG_SYNC) { - rc = filemap_fdatawrite(inode->i_mapping); - if (rc) - return rc; - } - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - if (i_size_read(inode) == 0) { - rc = ll_glimpse_size(inode); - if (rc) - goto out; - } - - fmkey.lfik_oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - obdo_from_inode(&fmkey.lfik_oa, inode, OBD_MD_FLSIZE); - obdo_set_parent_fid(&fmkey.lfik_oa, &ll_i2info(inode)->lli_fid); - - /* If filesize is 0, then there would be no objects for mapping */ - if (fmkey.lfik_oa.o_size == 0) { - fiemap->fm_mapped_extents = 0; - rc = 0; - goto out; - } - - memcpy(&fmkey.lfik_fiemap, fiemap, sizeof(*fiemap)); - - rc = cl_object_fiemap(env, ll_i2info(inode)->lli_clob, - &fmkey, fiemap, &num_bytes); -out: - cl_env_put(env, &refcheck); - return rc; -} - -int ll_fid2path(struct inode *inode, void __user *arg) -{ - struct obd_export *exp = ll_i2mdexp(inode); - const struct getinfo_fid2path __user *gfin = arg; - struct getinfo_fid2path *gfout; - u32 pathlen; - size_t outsize; - int rc; - - if (!capable(CAP_DAC_READ_SEARCH) && - !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) - return -EPERM; - - /* Only need to get the buflen */ - if (get_user(pathlen, &gfin->gf_pathlen)) - return -EFAULT; - - if (pathlen > PATH_MAX) - return -EINVAL; - - outsize = sizeof(*gfout) + pathlen; - - gfout = kzalloc(outsize, GFP_NOFS); - if (!gfout) - return -ENOMEM; - - if (copy_from_user(gfout, arg, sizeof(*gfout))) { - rc = -EFAULT; - goto gf_free; - } - - /* Call mdc_iocontrol */ - rc = obd_iocontrol(OBD_IOC_FID2PATH, exp, outsize, gfout, NULL); - if (rc != 0) - goto gf_free; - - if (copy_to_user(arg, gfout, outsize)) - rc = -EFAULT; - -gf_free: - kfree(gfout); - return rc; -} - -/* - * Read the data_version for inode. - * - * This value is computed using stripe object version on OST. - * Version is computed using server side locking. - * - * @param flags if do sync on the OST side; - * 0: no sync - * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs - * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs - */ -int ll_data_version(struct inode *inode, __u64 *data_version, int flags) -{ - struct cl_object *obj = ll_i2info(inode)->lli_clob; - struct lu_env *env; - struct cl_io *io; - u16 refcheck; - int result; - - /* If no file object initialized, we consider its version is 0. */ - if (!obj) { - *data_version = 0; - return 0; - } - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = vvp_env_thread_io(env); - io->ci_obj = obj; - io->u.ci_data_version.dv_data_version = 0; - io->u.ci_data_version.dv_flags = flags; - -restart: - if (!cl_io_init(env, io, CIT_DATA_VERSION, io->ci_obj)) - result = cl_io_loop(env, io); - else - result = io->ci_result; - - *data_version = io->u.ci_data_version.dv_data_version; - - cl_io_fini(env, io); - - if (unlikely(io->ci_need_restart)) - goto restart; - - cl_env_put(env, &refcheck); - - return result; -} - -/* - * Trigger a HSM release request for the provided inode. - */ -int ll_hsm_release(struct inode *inode) -{ - struct lu_env *env; - struct obd_client_handle *och = NULL; - __u64 data_version = 0; - int rc; - u16 refcheck; - - CDEBUG(D_INODE, "%s: Releasing file " DFID ".\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&ll_i2info(inode)->lli_fid)); - - och = ll_lease_open(inode, NULL, FMODE_WRITE, MDS_OPEN_RELEASE); - if (IS_ERR(och)) { - rc = PTR_ERR(och); - goto out; - } - - /* Grab latest data_version and [am]time values */ - rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH); - if (rc != 0) - goto out; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - rc = PTR_ERR(env); - goto out; - } - - ll_merge_attr(env, inode); - cl_env_put(env, &refcheck); - - /* Release the file. - * NB: lease lock handle is released in mdc_hsm_release_pack() because - * we still need it to pack l_remote_handle to MDT. - */ - rc = ll_close_inode_openhandle(inode, och, MDS_HSM_RELEASE, - &data_version); - och = NULL; - -out: - if (och && !IS_ERR(och)) /* close the file */ - ll_lease_close(och, inode, NULL); - - return rc; -} - -struct ll_swap_stack { - u64 dv1; - u64 dv2; - struct inode *inode1; - struct inode *inode2; - bool check_dv1; - bool check_dv2; -}; - -static int ll_swap_layouts(struct file *file1, struct file *file2, - struct lustre_swap_layouts *lsl) -{ - struct mdc_swap_layouts msl; - struct md_op_data *op_data; - __u32 gid; - __u64 dv; - struct ll_swap_stack *llss = NULL; - int rc; - - llss = kzalloc(sizeof(*llss), GFP_NOFS); - if (!llss) - return -ENOMEM; - - llss->inode1 = file_inode(file1); - llss->inode2 = file_inode(file2); - - rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2); - if (rc < 0) - goto free; - - /* we use 2 bool because it is easier to swap than 2 bits */ - if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1) - llss->check_dv1 = true; - - if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV2) - llss->check_dv2 = true; - - /* we cannot use lsl->sl_dvX directly because we may swap them */ - llss->dv1 = lsl->sl_dv1; - llss->dv2 = lsl->sl_dv2; - - rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2)); - if (!rc) /* same file, done! */ - goto free; - - if (rc < 0) { /* sequentialize it */ - swap(llss->inode1, llss->inode2); - swap(file1, file2); - swap(llss->dv1, llss->dv2); - swap(llss->check_dv1, llss->check_dv2); - } - - gid = lsl->sl_gid; - if (gid != 0) { /* application asks to flush dirty cache */ - rc = ll_get_grouplock(llss->inode1, file1, gid); - if (rc < 0) - goto free; - - rc = ll_get_grouplock(llss->inode2, file2, gid); - if (rc < 0) { - ll_put_grouplock(llss->inode1, file1, gid); - goto free; - } - } - - /* ultimate check, before swapping the layouts we check if - * dataversion has changed (if requested) - */ - if (llss->check_dv1) { - rc = ll_data_version(llss->inode1, &dv, 0); - if (rc) - goto putgl; - if (dv != llss->dv1) { - rc = -EAGAIN; - goto putgl; - } - } - - if (llss->check_dv2) { - rc = ll_data_version(llss->inode2, &dv, 0); - if (rc) - goto putgl; - if (dv != llss->dv2) { - rc = -EAGAIN; - goto putgl; - } - } - - /* struct md_op_data is used to send the swap args to the mdt - * only flags is missing, so we use struct mdc_swap_layouts - * through the md_op_data->op_data - */ - /* flags from user space have to be converted before they are send to - * server, no flag is sent today, they are only used on the client - */ - msl.msl_flags = 0; - rc = -ENOMEM; - op_data = ll_prep_md_op_data(NULL, llss->inode1, llss->inode2, NULL, 0, - 0, LUSTRE_OPC_ANY, &msl); - if (IS_ERR(op_data)) { - rc = PTR_ERR(op_data); - goto free; - } - - rc = obd_iocontrol(LL_IOC_LOV_SWAP_LAYOUTS, ll_i2mdexp(llss->inode1), - sizeof(*op_data), op_data, NULL); - ll_finish_md_op_data(op_data); - -putgl: - if (gid != 0) { - ll_put_grouplock(llss->inode2, file2, gid); - ll_put_grouplock(llss->inode1, file1, gid); - } - -free: - kfree(llss); - - return rc; -} - -int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss) -{ - struct md_op_data *op_data; - int rc; - - /* Detect out-of range masks */ - if ((hss->hss_setmask | hss->hss_clearmask) & ~HSM_FLAGS_MASK) - return -EINVAL; - - /* Non-root users are forbidden to set or clear flags which are - * NOT defined in HSM_USER_MASK. - */ - if (((hss->hss_setmask | hss->hss_clearmask) & ~HSM_USER_MASK) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - - /* Detect out-of range archive id */ - if ((hss->hss_valid & HSS_ARCHIVE_ID) && - (hss->hss_archive_id > LL_HSM_MAX_ARCHIVE)) - return -EINVAL; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, hss); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - rc = obd_iocontrol(LL_IOC_HSM_STATE_SET, ll_i2mdexp(inode), - sizeof(*op_data), op_data, NULL); - - ll_finish_md_op_data(op_data); - - return rc; -} - -static int ll_hsm_import(struct inode *inode, struct file *file, - struct hsm_user_import *hui) -{ - struct hsm_state_set *hss = NULL; - struct iattr *attr = NULL; - int rc; - - if (!S_ISREG(inode->i_mode)) - return -EINVAL; - - /* set HSM flags */ - hss = kzalloc(sizeof(*hss), GFP_NOFS); - if (!hss) - return -ENOMEM; - - hss->hss_valid = HSS_SETMASK | HSS_ARCHIVE_ID; - hss->hss_archive_id = hui->hui_archive_id; - hss->hss_setmask = HS_ARCHIVED | HS_EXISTS | HS_RELEASED; - rc = ll_hsm_state_set(inode, hss); - if (rc != 0) - goto free_hss; - - attr = kzalloc(sizeof(*attr), GFP_NOFS); - if (!attr) { - rc = -ENOMEM; - goto free_hss; - } - - attr->ia_mode = hui->hui_mode & 0777; - attr->ia_mode |= S_IFREG; - attr->ia_uid = make_kuid(&init_user_ns, hui->hui_uid); - attr->ia_gid = make_kgid(&init_user_ns, hui->hui_gid); - attr->ia_size = hui->hui_size; - attr->ia_mtime.tv_sec = hui->hui_mtime; - attr->ia_mtime.tv_nsec = hui->hui_mtime_ns; - attr->ia_atime.tv_sec = hui->hui_atime; - attr->ia_atime.tv_nsec = hui->hui_atime_ns; - - attr->ia_valid = ATTR_SIZE | ATTR_MODE | ATTR_FORCE | - ATTR_UID | ATTR_GID | - ATTR_MTIME | ATTR_MTIME_SET | - ATTR_ATIME | ATTR_ATIME_SET; - - inode_lock(inode); - - rc = ll_setattr_raw(file->f_path.dentry, attr, true); - if (rc == -ENODATA) - rc = 0; - - inode_unlock(inode); - - kfree(attr); -free_hss: - kfree(hss); - return rc; -} - -static inline long ll_lease_type_from_fmode(fmode_t fmode) -{ - return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) | - ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0); -} - -static long -ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(file); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - int flags, rc; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),cmd=%x\n", - PFID(ll_inode2fid(inode)), inode, cmd); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1); - - /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */ - if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */ - return -ENOTTY; - - switch (cmd) { - case LL_IOC_GETFLAGS: - /* Get the current value of the file flags */ - return put_user(fd->fd_flags, (int __user *)arg); - case LL_IOC_SETFLAGS: - case LL_IOC_CLRFLAGS: - /* Set or clear specific file flags */ - /* XXX This probably needs checks to ensure the flags are - * not abused, and to handle any flag side effects. - */ - if (get_user(flags, (int __user *)arg)) - return -EFAULT; - - if (cmd == LL_IOC_SETFLAGS) { - if ((flags & LL_FILE_IGNORE_LOCK) && - !(file->f_flags & O_DIRECT)) { - CERROR("%s: unable to disable locking on non-O_DIRECT file\n", - current->comm); - return -EINVAL; - } - - fd->fd_flags |= flags; - } else { - fd->fd_flags &= ~flags; - } - return 0; - case LL_IOC_LOV_SETSTRIPE: - return ll_lov_setstripe(inode, file, arg); - case LL_IOC_LOV_SETEA: - return ll_lov_setea(inode, file, arg); - case LL_IOC_LOV_SWAP_LAYOUTS: { - struct file *file2; - struct lustre_swap_layouts lsl; - - if (copy_from_user(&lsl, (char __user *)arg, - sizeof(struct lustre_swap_layouts))) - return -EFAULT; - - if ((file->f_flags & O_ACCMODE) == O_RDONLY) - return -EPERM; - - file2 = fget(lsl.sl_fd); - if (!file2) - return -EBADF; - - /* O_WRONLY or O_RDWR */ - if ((file2->f_flags & O_ACCMODE) == O_RDONLY) { - rc = -EPERM; - goto out; - } - - if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) { - struct obd_client_handle *och = NULL; - struct ll_inode_info *lli; - struct inode *inode2; - - if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) { - rc = -EINVAL; - goto out; - } - - lli = ll_i2info(inode); - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_lease_och) { - och = fd->fd_lease_och; - fd->fd_lease_och = NULL; - } - mutex_unlock(&lli->lli_och_mutex); - if (!och) { - rc = -ENOLCK; - goto out; - } - inode2 = file_inode(file2); - rc = ll_swap_layouts_close(och, inode, inode2); - } else { - rc = ll_swap_layouts(file, file2, &lsl); - } -out: - fput(file2); - return rc; - } - case LL_IOC_LOV_GETSTRIPE: - return ll_file_getstripe(inode, - (struct lov_user_md __user *)arg); - case FSFILT_IOC_GETFLAGS: - case FSFILT_IOC_SETFLAGS: - return ll_iocontrol(inode, file, cmd, arg); - case FSFILT_IOC_GETVERSION_OLD: - case FSFILT_IOC_GETVERSION: - return put_user(inode->i_generation, (int __user *)arg); - case LL_IOC_GROUP_LOCK: - return ll_get_grouplock(inode, file, arg); - case LL_IOC_GROUP_UNLOCK: - return ll_put_grouplock(inode, file, arg); - case IOC_OBD_STATFS: - return ll_obd_statfs(inode, (void __user *)arg); - - /* We need to special case any other ioctls we want to handle, - * to send them to the MDS/OST as appropriate and to properly - * network encode the arg field. - case FSFILT_IOC_SETVERSION_OLD: - case FSFILT_IOC_SETVERSION: - */ - case LL_IOC_FLUSHCTX: - return ll_flush_ctx(inode); - case LL_IOC_PATH2FID: { - if (copy_to_user((void __user *)arg, ll_inode2fid(inode), - sizeof(struct lu_fid))) - return -EFAULT; - - return 0; - } - case LL_IOC_GETPARENT: - return ll_getparent(file, (struct getparent __user *)arg); - case OBD_IOC_FID2PATH: - return ll_fid2path(inode, (void __user *)arg); - case LL_IOC_DATA_VERSION: { - struct ioc_data_version idv; - int rc; - - if (copy_from_user(&idv, (char __user *)arg, sizeof(idv))) - return -EFAULT; - - idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH; - rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags); - if (rc == 0 && copy_to_user((char __user *)arg, &idv, - sizeof(idv))) - return -EFAULT; - - return rc; - } - - case LL_IOC_GET_MDTIDX: { - int mdtidx; - - mdtidx = ll_get_mdt_idx(inode); - if (mdtidx < 0) - return mdtidx; - - if (put_user(mdtidx, (int __user *)arg)) - return -EFAULT; - - return 0; - } - case OBD_IOC_GETDTNAME: - case OBD_IOC_GETMDNAME: - return ll_get_obd_name(inode, cmd, arg); - case LL_IOC_HSM_STATE_GET: { - struct md_op_data *op_data; - struct hsm_user_state *hus; - int rc; - - hus = kzalloc(sizeof(*hus), GFP_NOFS); - if (!hus) - return -ENOMEM; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, hus); - if (IS_ERR(op_data)) { - kfree(hus); - return PTR_ERR(op_data); - } - - rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), - op_data, NULL); - - if (copy_to_user((void __user *)arg, hus, sizeof(*hus))) - rc = -EFAULT; - - ll_finish_md_op_data(op_data); - kfree(hus); - return rc; - } - case LL_IOC_HSM_STATE_SET: { - struct hsm_state_set *hss; - int rc; - - hss = memdup_user((char __user *)arg, sizeof(*hss)); - if (IS_ERR(hss)) - return PTR_ERR(hss); - - rc = ll_hsm_state_set(inode, hss); - - kfree(hss); - return rc; - } - case LL_IOC_HSM_ACTION: { - struct md_op_data *op_data; - struct hsm_current_action *hca; - int rc; - - hca = kzalloc(sizeof(*hca), GFP_NOFS); - if (!hca) - return -ENOMEM; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, hca); - if (IS_ERR(op_data)) { - kfree(hca); - return PTR_ERR(op_data); - } - - rc = obd_iocontrol(cmd, ll_i2mdexp(inode), sizeof(*op_data), - op_data, NULL); - - if (copy_to_user((char __user *)arg, hca, sizeof(*hca))) - rc = -EFAULT; - - ll_finish_md_op_data(op_data); - kfree(hca); - return rc; - } - case LL_IOC_SET_LEASE: { - struct ll_inode_info *lli = ll_i2info(inode); - struct obd_client_handle *och = NULL; - bool lease_broken; - fmode_t fmode; - - switch (arg) { - case LL_LEASE_WRLCK: - if (!(file->f_mode & FMODE_WRITE)) - return -EPERM; - fmode = FMODE_WRITE; - break; - case LL_LEASE_RDLCK: - if (!(file->f_mode & FMODE_READ)) - return -EPERM; - fmode = FMODE_READ; - break; - case LL_LEASE_UNLCK: - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_lease_och) { - och = fd->fd_lease_och; - fd->fd_lease_och = NULL; - } - mutex_unlock(&lli->lli_och_mutex); - - if (!och) - return -ENOLCK; - - fmode = och->och_flags; - rc = ll_lease_close(och, inode, &lease_broken); - if (rc < 0) - return rc; - - if (lease_broken) - fmode = 0; - - return ll_lease_type_from_fmode(fmode); - default: - return -EINVAL; - } - - CDEBUG(D_INODE, "Set lease with mode %u\n", fmode); - - /* apply for lease */ - och = ll_lease_open(inode, file, fmode, 0); - if (IS_ERR(och)) - return PTR_ERR(och); - - rc = 0; - mutex_lock(&lli->lli_och_mutex); - if (!fd->fd_lease_och) { - fd->fd_lease_och = och; - och = NULL; - } - mutex_unlock(&lli->lli_och_mutex); - if (och) { - /* impossible now that only excl is supported for now */ - ll_lease_close(och, inode, &lease_broken); - rc = -EBUSY; - } - return rc; - } - case LL_IOC_GET_LEASE: { - struct ll_inode_info *lli = ll_i2info(inode); - struct ldlm_lock *lock = NULL; - fmode_t fmode = 0; - - mutex_lock(&lli->lli_och_mutex); - if (fd->fd_lease_och) { - struct obd_client_handle *och = fd->fd_lease_och; - - lock = ldlm_handle2lock(&och->och_lease_handle); - if (lock) { - lock_res_and_lock(lock); - if (!ldlm_is_cancel(lock)) - fmode = och->och_flags; - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - } - } - mutex_unlock(&lli->lli_och_mutex); - return ll_lease_type_from_fmode(fmode); - } - case LL_IOC_HSM_IMPORT: { - struct hsm_user_import *hui; - - hui = memdup_user((void __user *)arg, sizeof(*hui)); - if (IS_ERR(hui)) - return PTR_ERR(hui); - - rc = ll_hsm_import(inode, file, hui); - - kfree(hui); - return rc; - } - default: { - int err; - - if (ll_iocontrol_call(inode, file, cmd, arg, &err) == - LLIOC_STOP) - return err; - - return obd_iocontrol(cmd, ll_i2dtexp(inode), 0, NULL, - (void __user *)arg); - } - } -} - -static loff_t ll_file_seek(struct file *file, loff_t offset, int origin) -{ - struct inode *inode = file_inode(file); - loff_t retval, eof = 0; - - retval = offset + ((origin == SEEK_END) ? i_size_read(inode) : - (origin == SEEK_CUR) ? file->f_pos : 0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), to=%llu=%#llx(%d)\n", - PFID(ll_inode2fid(inode)), inode, retval, retval, origin); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LLSEEK, 1); - - if (origin == SEEK_END || origin == SEEK_HOLE || origin == SEEK_DATA) { - retval = ll_glimpse_size(inode); - if (retval != 0) - return retval; - eof = i_size_read(inode); - } - - return generic_file_llseek_size(file, offset, origin, - ll_file_maxbytes(inode), eof); -} - -static int ll_flush(struct file *file, fl_owner_t id) -{ - struct inode *inode = file_inode(file); - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - int rc, err; - - LASSERT(!S_ISDIR(inode->i_mode)); - - /* catch async errors that were recorded back when async writeback - * failed for pages in this mapping. - */ - rc = lli->lli_async_rc; - lli->lli_async_rc = 0; - if (lli->lli_clob) { - err = lov_read_and_clear_async_rc(lli->lli_clob); - if (!rc) - rc = err; - } - - /* The application has been told about write failure already. - * Do not report failure again. - */ - if (fd->fd_write_failed) - return 0; - return rc ? -EIO : 0; -} - -/** - * Called to make sure a portion of file has been written out. - * if @mode is not CL_FSYNC_LOCAL, it will send OST_SYNC RPCs to OST. - * - * Return how many pages have been written. - */ -int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode, int ignore_layout) -{ - struct lu_env *env; - struct cl_io *io; - struct cl_fsync_io *fio; - int result; - u16 refcheck; - - if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL && - mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL) - return -EINVAL; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = vvp_env_thread_io(env); - io->ci_obj = ll_i2info(inode)->lli_clob; - io->ci_ignore_layout = ignore_layout; - - /* initialize parameters for sync */ - fio = &io->u.ci_fsync; - fio->fi_start = start; - fio->fi_end = end; - fio->fi_fid = ll_inode2fid(inode); - fio->fi_mode = mode; - fio->fi_nr_written = 0; - - if (cl_io_init(env, io, CIT_FSYNC, io->ci_obj) == 0) - result = cl_io_loop(env, io); - else - result = io->ci_result; - if (result == 0) - result = fio->fi_nr_written; - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - - return result; -} - -int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync) -{ - struct inode *inode = file_inode(file); - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *req; - int rc, err; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1); - - rc = file_write_and_wait_range(file, start, end); - inode_lock(inode); - - /* catch async errors that were recorded back when async writeback - * failed for pages in this mapping. - */ - if (!S_ISDIR(inode->i_mode)) { - err = lli->lli_async_rc; - lli->lli_async_rc = 0; - if (rc == 0) - rc = err; - if (lli->lli_clob) { - err = lov_read_and_clear_async_rc(lli->lli_clob); - if (rc == 0) - rc = err; - } - } - - err = md_sync(ll_i2sbi(inode)->ll_md_exp, ll_inode2fid(inode), &req); - if (!rc) - rc = err; - if (!err) - ptlrpc_req_finished(req); - - if (S_ISREG(inode->i_mode)) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - err = cl_sync_file_range(inode, start, end, CL_FSYNC_ALL, 0); - if (rc == 0 && err < 0) - rc = err; - if (rc < 0) - fd->fd_write_failed = true; - else - fd->fd_write_failed = false; - } - - inode_unlock(inode); - return rc; -} - -static int -ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) -{ - struct inode *inode = file_inode(file); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ldlm_enqueue_info einfo = { - .ei_type = LDLM_FLOCK, - .ei_cb_cp = ldlm_flock_completion_ast, - .ei_cbdata = file_lock, - }; - struct md_op_data *op_data; - struct lustre_handle lockh = {0}; - union ldlm_policy_data flock = { { 0 } }; - int fl_type = file_lock->fl_type; - __u64 flags = 0; - int rc; - int rc2 = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID " file_lock=%p\n", - PFID(ll_inode2fid(inode)), file_lock); - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FLOCK, 1); - - if (file_lock->fl_flags & FL_FLOCK) - LASSERT((cmd == F_SETLKW) || (cmd == F_SETLK)); - else if (!(file_lock->fl_flags & FL_POSIX)) - return -EINVAL; - - flock.l_flock.owner = (unsigned long)file_lock->fl_owner; - flock.l_flock.pid = file_lock->fl_pid; - flock.l_flock.start = file_lock->fl_start; - flock.l_flock.end = file_lock->fl_end; - - /* Somewhat ugly workaround for svc lockd. - * lockd installs custom fl_lmops->lm_compare_owner that checks - * for the fl_owner to be the same (which it always is on local node - * I guess between lockd processes) and then compares pid. - * As such we assign pid to the owner field to make it all work, - * conflict with normal locks is unlikely since pid space and - * pointer space for current->files are not intersecting - */ - if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner) - flock.l_flock.owner = (unsigned long)file_lock->fl_pid; - - switch (fl_type) { - case F_RDLCK: - einfo.ei_mode = LCK_PR; - break; - case F_UNLCK: - /* An unlock request may or may not have any relation to - * existing locks so we may not be able to pass a lock handle - * via a normal ldlm_lock_cancel() request. The request may even - * unlock a byte range in the middle of an existing lock. In - * order to process an unlock request we need all of the same - * information that is given with a normal read or write record - * lock request. To avoid creating another ldlm unlock (cancel) - * message we'll treat a LCK_NL flock request as an unlock. - */ - einfo.ei_mode = LCK_NL; - break; - case F_WRLCK: - einfo.ei_mode = LCK_PW; - break; - default: - CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type); - return -ENOTSUPP; - } - - switch (cmd) { - case F_SETLKW: -#ifdef F_SETLKW64 - case F_SETLKW64: -#endif - flags = 0; - break; - case F_SETLK: -#ifdef F_SETLK64 - case F_SETLK64: -#endif - flags = LDLM_FL_BLOCK_NOWAIT; - break; - case F_GETLK: -#ifdef F_GETLK64 - case F_GETLK64: -#endif - flags = LDLM_FL_TEST_LOCK; - break; - default: - CERROR("unknown fcntl lock command: %d\n", cmd); - return -EINVAL; - } - - /* - * Save the old mode so that if the mode in the lock changes we - * can decrement the appropriate reader or writer refcount. - */ - file_lock->fl_type = einfo.ei_mode; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - CDEBUG(D_DLMTRACE, "inode=" DFID ", pid=%u, flags=%#llx, mode=%u, start=%llu, end=%llu\n", - PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags, - einfo.ei_mode, flock.l_flock.start, flock.l_flock.end); - - rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data, &lockh, - flags); - - /* Restore the file lock type if not TEST lock. */ - if (!(flags & LDLM_FL_TEST_LOCK)) - file_lock->fl_type = fl_type; - - if ((rc == 0 || file_lock->fl_type == F_UNLCK) && - !(flags & LDLM_FL_TEST_LOCK)) - rc2 = locks_lock_file_wait(file, file_lock); - - if (rc2 && file_lock->fl_type != F_UNLCK) { - einfo.ei_mode = LCK_NL; - md_enqueue(sbi->ll_md_exp, &einfo, &flock, op_data, - &lockh, flags); - rc = rc2; - } - - ll_finish_md_op_data(op_data); - - return rc; -} - -int ll_get_fid_by_name(struct inode *parent, const char *name, - int namelen, struct lu_fid *fid, - struct inode **inode) -{ - struct md_op_data *op_data = NULL; - struct ptlrpc_request *req; - struct mdt_body *body; - int rc; - - op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc < 0) - return rc; - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) { - rc = -EFAULT; - goto out_req; - } - if (fid) - *fid = body->mbo_fid1; - - if (inode) - rc = ll_prep_inode(inode, req, parent->i_sb, NULL); -out_req: - ptlrpc_req_finished(req); - return rc; -} - -int ll_migrate(struct inode *parent, struct file *file, int mdtidx, - const char *name, int namelen) -{ - struct ptlrpc_request *request = NULL; - struct obd_client_handle *och = NULL; - struct inode *child_inode = NULL; - struct dentry *dchild = NULL; - struct md_op_data *op_data; - struct mdt_body *body; - u64 data_version = 0; - struct qstr qstr; - int rc; - - CDEBUG(D_VFSTRACE, "migrate %s under " DFID " to MDT%d\n", - name, PFID(ll_inode2fid(parent)), mdtidx); - - op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, - 0, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - /* Get child FID first */ - qstr.hash = full_name_hash(parent, name, namelen); - qstr.name = name; - qstr.len = namelen; - dchild = d_lookup(file_dentry(file), &qstr); - if (dchild) { - op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); - if (dchild->d_inode) - child_inode = igrab(dchild->d_inode); - dput(dchild); - } - - if (!child_inode) { - rc = ll_get_fid_by_name(parent, name, namelen, - &op_data->op_fid3, &child_inode); - if (rc) - goto out_free; - } - - if (!child_inode) { - rc = -EINVAL; - goto out_free; - } - - inode_lock(child_inode); - op_data->op_fid3 = *ll_inode2fid(child_inode); - if (!fid_is_sane(&op_data->op_fid3)) { - CERROR("%s: migrate %s, but fid " DFID " is insane\n", - ll_get_fsname(parent->i_sb, NULL, 0), name, - PFID(&op_data->op_fid3)); - rc = -EINVAL; - goto out_unlock; - } - - rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3); - if (rc < 0) - goto out_unlock; - - if (rc == mdtidx) { - CDEBUG(D_INFO, "%s: " DFID " is already on MDT%d.\n", name, - PFID(&op_data->op_fid3), mdtidx); - rc = 0; - goto out_unlock; - } -again: - if (S_ISREG(child_inode->i_mode)) { - och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0); - if (IS_ERR(och)) { - rc = PTR_ERR(och); - och = NULL; - goto out_unlock; - } - - rc = ll_data_version(child_inode, &data_version, - LL_DV_WR_FLUSH); - if (rc) - goto out_close; - - op_data->op_handle = och->och_fh; - op_data->op_data = och->och_mod; - op_data->op_data_version = data_version; - op_data->op_lease_handle = och->och_lease_handle; - op_data->op_bias |= MDS_RENAME_MIGRATE; - } - - op_data->op_mds = mdtidx; - op_data->op_cli_flags = CLI_MIGRATE; - rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name, - namelen, name, namelen, &request); - if (!rc) { - LASSERT(request); - ll_update_times(request, parent); - - body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); - LASSERT(body); - - /* - * If the server does release layout lock, then we cleanup - * the client och here, otherwise release it in out_close: - */ - if (och && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) { - obd_mod_put(och->och_mod); - md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, - och); - och->och_fh.cookie = DEAD_HANDLE_MAGIC; - kfree(och); - och = NULL; - } - } - - if (request) { - ptlrpc_req_finished(request); - request = NULL; - } - - /* Try again if the file layout has changed. */ - if (rc == -EAGAIN && S_ISREG(child_inode->i_mode)) - goto again; - -out_close: - if (och) /* close the file */ - ll_lease_close(och, child_inode, NULL); - if (!rc) - clear_nlink(child_inode); -out_unlock: - inode_unlock(child_inode); - iput(child_inode); -out_free: - ll_finish_md_op_data(op_data); - return rc; -} - -static int -ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock) -{ - return -ENOSYS; -} - -/** - * test if some locks matching bits and l_req_mode are acquired - * - bits can be in different locks - * - if found clear the common lock bits in *bits - * - the bits not found, are kept in *bits - * \param inode [IN] - * \param bits [IN] searched lock bits [IN] - * \param l_req_mode [IN] searched lock mode - * \retval boolean, true iff all bits are found - */ -int ll_have_md_lock(struct inode *inode, __u64 *bits, - enum ldlm_mode l_req_mode) -{ - struct lustre_handle lockh; - union ldlm_policy_data policy; - enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ? - (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode; - struct lu_fid *fid; - __u64 flags; - int i; - - if (!inode) - return 0; - - fid = &ll_i2info(inode)->lli_fid; - CDEBUG(D_INFO, "trying to match res " DFID " mode %s\n", PFID(fid), - ldlm_lockname[mode]); - - flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; - for (i = 0; i <= MDS_INODELOCK_MAXSHIFT && *bits != 0; i++) { - policy.l_inodebits.bits = *bits & (1 << i); - if (policy.l_inodebits.bits == 0) - continue; - - if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, - &policy, mode, &lockh)) { - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(&lockh); - if (lock) { - *bits &= - ~(lock->l_policy_data.l_inodebits.bits); - LDLM_LOCK_PUT(lock); - } else { - *bits &= ~policy.l_inodebits.bits; - } - } - } - return *bits == 0; -} - -enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, - struct lustre_handle *lockh, __u64 flags, - enum ldlm_mode mode) -{ - union ldlm_policy_data policy = { .l_inodebits = { bits } }; - struct lu_fid *fid; - - fid = &ll_i2info(inode)->lli_fid; - CDEBUG(D_INFO, "trying to match res " DFID "\n", PFID(fid)); - - return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED, - fid, LDLM_IBITS, &policy, mode, lockh); -} - -static int ll_inode_revalidate_fini(struct inode *inode, int rc) -{ - /* Already unlinked. Just update nlink and return success */ - if (rc == -ENOENT) { - clear_nlink(inode); - /* If it is striped directory, and there is bad stripe - * Let's revalidate the dentry again, instead of returning - * error - */ - if (S_ISDIR(inode->i_mode) && ll_i2info(inode)->lli_lsm_md) - return 0; - - /* This path cannot be hit for regular files unless in - * case of obscure races, so no need to validate size. - */ - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return 0; - } else if (rc != 0) { - CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR, - "%s: revalidate FID " DFID " error: rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), rc); - } - - return rc; -} - -static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) -{ - struct inode *inode = d_inode(dentry); - struct ptlrpc_request *req = NULL; - struct obd_export *exp; - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p),name=%pd\n", - PFID(ll_inode2fid(inode)), inode, dentry); - - exp = ll_i2mdexp(inode); - - /* XXX: Enable OBD_CONNECT_ATTRFID to reduce unnecessary getattr RPC. - * But under CMD case, it caused some lock issues, should be fixed - * with new CMD ibits lock. See bug 12718 - */ - if (exp_connect_flags(exp) & OBD_CONNECT_ATTRFID) { - struct lookup_intent oit = { .it_op = IT_GETATTR }; - struct md_op_data *op_data; - - if (ibits == MDS_INODELOCK_LOOKUP) - oit.it_op = IT_LOOKUP; - - /* Call getattr by fid, so do not provide name at all. */ - op_data = ll_prep_md_op_data(NULL, inode, - inode, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - rc = md_intent_lock(exp, op_data, &oit, &req, - &ll_md_blocking_ast, 0); - ll_finish_md_op_data(op_data); - if (rc < 0) { - rc = ll_inode_revalidate_fini(inode, rc); - goto out; - } - - rc = ll_revalidate_it_finish(req, &oit, inode); - if (rc != 0) { - ll_intent_release(&oit); - goto out; - } - - /* Unlinked? Unhash dentry, so it is not picked up later by - * do_lookup() -> ll_revalidate_it(). We cannot use d_drop - * here to preserve get_cwd functionality on 2.6. - * Bug 10503 - */ - if (!d_inode(dentry)->i_nlink) { - spin_lock(&inode->i_lock); - d_lustre_invalidate(dentry, 0); - spin_unlock(&inode->i_lock); - } - - ll_lookup_finish_locks(&oit, inode); - } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) { - struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry)); - u64 valid = OBD_MD_FLGETATTR; - struct md_op_data *op_data; - int ealen = 0; - - if (S_ISREG(inode->i_mode)) { - rc = ll_get_default_mdsize(sbi, &ealen); - if (rc) - return rc; - valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; - } - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, - 0, ealen, LUSTRE_OPC_ANY, - NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = valid; - rc = md_getattr(sbi->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc) - return ll_inode_revalidate_fini(inode, rc); - - rc = ll_prep_inode(&inode, req, NULL, NULL); - } -out: - ptlrpc_req_finished(req); - return rc; -} - -static int ll_merge_md_attr(struct inode *inode) -{ - struct cl_attr attr = { 0 }; - int rc; - - LASSERT(ll_i2info(inode)->lli_lsm_md); - rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md, - &attr, ll_md_blocking_ast); - if (rc) - return rc; - - set_nlink(inode, attr.cat_nlink); - inode->i_blocks = attr.cat_blocks; - i_size_write(inode, attr.cat_size); - - ll_i2info(inode)->lli_atime = attr.cat_atime; - ll_i2info(inode)->lli_mtime = attr.cat_mtime; - ll_i2info(inode)->lli_ctime = attr.cat_ctime; - - return 0; -} - -static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) -{ - struct inode *inode = d_inode(dentry); - int rc; - - rc = __ll_inode_revalidate(dentry, ibits); - if (rc != 0) - return rc; - - /* if object isn't regular file, don't validate size */ - if (!S_ISREG(inode->i_mode)) { - if (S_ISDIR(inode->i_mode) && - ll_i2info(inode)->lli_lsm_md) { - rc = ll_merge_md_attr(inode); - if (rc) - return rc; - } - - LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime; - LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime; - LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime; - } else { - struct ll_inode_info *lli = ll_i2info(inode); - - /* In case of restore, the MDT has the right size and has - * already send it back without granting the layout lock, - * inode is up-to-date so glimpse is useless. - * Also to glimpse we need the layout, in case of a running - * restore the MDT holds the layout lock so the glimpse will - * block up to the end of restore (getattr will block) - */ - if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) - rc = ll_glimpse_size(inode); - } - return rc; -} - -int ll_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) -{ - struct inode *inode = d_inode(path->dentry); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int res; - - res = ll_inode_revalidate(path->dentry, - MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP); - ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); - - if (res) - return res; - - OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30); - - stat->dev = inode->i_sb->s_dev; - if (ll_need_32bit_api(sbi)) - stat->ino = cl_fid_build_ino(&lli->lli_fid, 1); - else - stat->ino = inode->i_ino; - stat->mode = inode->i_mode; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; - stat->rdev = inode->i_rdev; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->blksize = 1 << inode->i_blkbits; - - stat->nlink = inode->i_nlink; - stat->size = i_size_read(inode); - stat->blocks = inode->i_blocks; - - return 0; -} - -static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len) -{ - int rc; - size_t num_bytes; - struct fiemap *fiemap; - unsigned int extent_count = fieinfo->fi_extents_max; - - num_bytes = sizeof(*fiemap) + (extent_count * - sizeof(struct fiemap_extent)); - fiemap = kvzalloc(num_bytes, GFP_KERNEL); - if (!fiemap) - return -ENOMEM; - - fiemap->fm_flags = fieinfo->fi_flags; - fiemap->fm_extent_count = fieinfo->fi_extents_max; - fiemap->fm_start = start; - fiemap->fm_length = len; - - if (extent_count > 0 && - copy_from_user(&fiemap->fm_extents[0], fieinfo->fi_extents_start, - sizeof(struct fiemap_extent))) { - rc = -EFAULT; - goto out; - } - - rc = ll_do_fiemap(inode, fiemap, num_bytes); - - fieinfo->fi_flags = fiemap->fm_flags; - fieinfo->fi_extents_mapped = fiemap->fm_mapped_extents; - if (extent_count > 0 && - copy_to_user(fieinfo->fi_extents_start, &fiemap->fm_extents[0], - fiemap->fm_mapped_extents * - sizeof(struct fiemap_extent))) { - rc = -EFAULT; - goto out; - } -out: - kvfree(fiemap); - return rc; -} - -int ll_inode_permission(struct inode *inode, int mask) -{ - struct ll_sb_info *sbi; - struct root_squash_info *squash; - const struct cred *old_cred = NULL; - struct cred *cred = NULL; - bool squash_id = false; - int rc = 0; - - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - - /* as root inode are NOT getting validated in lookup operation, - * need to do it before permission check. - */ - - if (is_root_inode(inode)) { - rc = __ll_inode_revalidate(inode->i_sb->s_root, - MDS_INODELOCK_LOOKUP); - if (rc) - return rc; - } - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), inode mode %x mask %o\n", - PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); - - /* squash fsuid/fsgid if needed */ - sbi = ll_i2sbi(inode); - squash = &sbi->ll_squash; - if (unlikely(squash->rsi_uid && - uid_eq(current_fsuid(), GLOBAL_ROOT_UID) && - !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) { - squash_id = true; - } - - if (squash_id) { - CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n", - __kuid_val(current_fsuid()), __kgid_val(current_fsgid()), - squash->rsi_uid, squash->rsi_gid); - - /* - * update current process's credentials - * and FS capability - */ - cred = prepare_creds(); - if (!cred) - return -ENOMEM; - - cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid); - cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid); - cred->cap_effective = cap_drop_nfsd_set(cred->cap_effective); - cred->cap_effective = cap_drop_fs_set(cred->cap_effective); - - old_cred = override_creds(cred); - } - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1); - rc = generic_permission(inode, mask); - - /* restore current process's credentials and FS capability */ - if (squash_id) { - revert_creds(old_cred); - put_cred(cred); - } - - return rc; -} - -/* -o localflock - only provides locally consistent flock locks */ -const struct file_operations ll_file_operations = { - .read_iter = ll_file_read_iter, - .write_iter = ll_file_write_iter, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = generic_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush -}; - -const struct file_operations ll_file_operations_flock = { - .read_iter = ll_file_read_iter, - .write_iter = ll_file_write_iter, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = generic_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush, - .flock = ll_file_flock, - .lock = ll_file_flock -}; - -/* These are for -o noflock - to return ENOSYS on flock calls */ -const struct file_operations ll_file_operations_noflock = { - .read_iter = ll_file_read_iter, - .write_iter = ll_file_write_iter, - .unlocked_ioctl = ll_file_ioctl, - .open = ll_file_open, - .release = ll_file_release, - .mmap = ll_file_mmap, - .llseek = ll_file_seek, - .splice_read = generic_file_splice_read, - .fsync = ll_fsync, - .flush = ll_flush, - .flock = ll_file_noflock, - .lock = ll_file_noflock -}; - -const struct inode_operations ll_file_inode_operations = { - .setattr = ll_setattr, - .getattr = ll_getattr, - .permission = ll_inode_permission, - .listxattr = ll_listxattr, - .fiemap = ll_fiemap, - .get_acl = ll_get_acl, -}; - -/* dynamic ioctl number support routines */ -static struct llioc_ctl_data { - struct rw_semaphore ioc_sem; - struct list_head ioc_head; -} llioc = { - __RWSEM_INITIALIZER(llioc.ioc_sem), - LIST_HEAD_INIT(llioc.ioc_head) -}; - -struct llioc_data { - struct list_head iocd_list; - unsigned int iocd_size; - llioc_callback_t iocd_cb; - unsigned int iocd_count; - unsigned int iocd_cmd[0]; -}; - -void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd) -{ - unsigned int size; - struct llioc_data *in_data = NULL; - - if (!cb || !cmd || count > LLIOC_MAX_CMD || count < 0) - return NULL; - - size = sizeof(*in_data) + count * sizeof(unsigned int); - in_data = kzalloc(size, GFP_NOFS); - if (!in_data) - return NULL; - - in_data->iocd_size = size; - in_data->iocd_cb = cb; - in_data->iocd_count = count; - memcpy(in_data->iocd_cmd, cmd, sizeof(unsigned int) * count); - - down_write(&llioc.ioc_sem); - list_add_tail(&in_data->iocd_list, &llioc.ioc_head); - up_write(&llioc.ioc_sem); - - return in_data; -} -EXPORT_SYMBOL(ll_iocontrol_register); - -void ll_iocontrol_unregister(void *magic) -{ - struct llioc_data *tmp; - - if (!magic) - return; - - down_write(&llioc.ioc_sem); - list_for_each_entry(tmp, &llioc.ioc_head, iocd_list) { - if (tmp == magic) { - list_del(&tmp->iocd_list); - up_write(&llioc.ioc_sem); - - kfree(tmp); - return; - } - } - up_write(&llioc.ioc_sem); - - CWARN("didn't find iocontrol register block with magic: %p\n", magic); -} -EXPORT_SYMBOL(ll_iocontrol_unregister); - -static enum llioc_iter -ll_iocontrol_call(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg, int *rcp) -{ - enum llioc_iter ret = LLIOC_CONT; - struct llioc_data *data; - int rc = -EINVAL, i; - - down_read(&llioc.ioc_sem); - list_for_each_entry(data, &llioc.ioc_head, iocd_list) { - for (i = 0; i < data->iocd_count; i++) { - if (cmd != data->iocd_cmd[i]) - continue; - - ret = data->iocd_cb(inode, file, cmd, arg, data, &rc); - break; - } - - if (ret == LLIOC_STOP) - break; - } - up_read(&llioc.ioc_sem); - - if (rcp) - *rcp = rc; - return ret; -} - -int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_object *obj = lli->lli_clob; - struct lu_env *env; - int rc; - u16 refcheck; - - if (!obj) - return 0; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - rc = cl_conf_set(env, obj, conf); - if (rc < 0) - goto out; - - if (conf->coc_opc == OBJECT_CONF_SET) { - struct ldlm_lock *lock = conf->coc_lock; - struct cl_layout cl = { - .cl_layout_gen = 0, - }; - - LASSERT(lock); - LASSERT(ldlm_has_layout(lock)); - - /* it can only be allowed to match after layout is - * applied to inode otherwise false layout would be - * seen. Applying layout should happen before dropping - * the intent lock. - */ - ldlm_lock_allow_match(lock); - - rc = cl_object_layout_get(env, obj, &cl); - if (rc < 0) - goto out; - - CDEBUG(D_VFSTRACE, DFID ": layout version change: %u -> %u\n", - PFID(&lli->lli_fid), ll_layout_version_get(lli), - cl.cl_layout_gen); - ll_layout_version_set(lli, cl.cl_layout_gen); - } -out: - cl_env_put(env, &refcheck); - return rc; -} - -/* Fetch layout from MDT with getxattr request, if it's not ready yet */ -static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) - -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req; - struct mdt_body *body; - void *lvbdata; - void *lmm; - int lmmsize; - int rc; - - CDEBUG(D_INODE, DFID " LVB_READY=%d l_lvb_data=%p l_lvb_len=%d\n", - PFID(ll_inode2fid(inode)), ldlm_is_lvb_ready(lock), - lock->l_lvb_data, lock->l_lvb_len); - - if (lock->l_lvb_data && ldlm_is_lvb_ready(lock)) - return 0; - - /* if layout lock was granted right away, the layout is returned - * within DLM_LVB of dlm reply; otherwise if the lock was ever - * blocked and then granted via completion ast, we have to fetch - * layout here. Please note that we can't use the LVB buffer in - * completion AST because it doesn't have a large enough buffer - */ - rc = ll_get_default_mdsize(sbi, &lmmsize); - if (rc == 0) - rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), - OBD_MD_FLXATTR, XATTR_NAME_LOV, lmmsize, &req); - if (rc < 0) - return rc; - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - lmmsize = body->mbo_eadatasize; - if (lmmsize == 0) /* empty layout */ { - rc = 0; - goto out; - } - - lmm = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, lmmsize); - if (!lmm) { - rc = -EFAULT; - goto out; - } - - lvbdata = kvzalloc(lmmsize, GFP_NOFS); - if (!lvbdata) { - rc = -ENOMEM; - goto out; - } - - memcpy(lvbdata, lmm, lmmsize); - lock_res_and_lock(lock); - if (lock->l_lvb_data) - kvfree(lock->l_lvb_data); - - lock->l_lvb_data = lvbdata; - lock->l_lvb_len = lmmsize; - unlock_res_and_lock(lock); - -out: - ptlrpc_req_finished(req); - return rc; -} - -/** - * Apply the layout to the inode. Layout lock is held and will be released - * in this function. - */ -static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, - struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ldlm_lock *lock; - struct cl_object_conf conf; - int rc = 0; - bool lvb_ready; - bool wait_layout = false; - - LASSERT(lustre_handle_is_used(lockh)); - - lock = ldlm_handle2lock(lockh); - LASSERT(lock); - LASSERT(ldlm_has_layout(lock)); - - LDLM_DEBUG(lock, "File " DFID "(%p) being reconfigured", - PFID(&lli->lli_fid), inode); - - /* in case this is a caching lock and reinstate with new inode */ - md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL); - - lock_res_and_lock(lock); - lvb_ready = ldlm_is_lvb_ready(lock); - unlock_res_and_lock(lock); - /* checking lvb_ready is racy but this is okay. The worst case is - * that multi processes may configure the file on the same time. - */ - if (lvb_ready) { - rc = 0; - goto out; - } - - rc = ll_layout_fetch(inode, lock); - if (rc < 0) - goto out; - - /* for layout lock, lmm is returned in lock's lvb. - * lvb_data is immutable if the lock is held so it's safe to access it - * without res lock. - * - * set layout to file. Unlikely this will fail as old layout was - * surely eliminated - */ - memset(&conf, 0, sizeof(conf)); - conf.coc_opc = OBJECT_CONF_SET; - conf.coc_inode = inode; - conf.coc_lock = lock; - conf.u.coc_layout.lb_buf = lock->l_lvb_data; - conf.u.coc_layout.lb_len = lock->l_lvb_len; - rc = ll_layout_conf(inode, &conf); - - /* refresh layout failed, need to wait */ - wait_layout = rc == -EBUSY; - -out: - LDLM_LOCK_PUT(lock); - ldlm_lock_decref(lockh, mode); - - /* wait for IO to complete if it's still being used. */ - if (wait_layout) { - CDEBUG(D_INODE, "%s: " DFID "(%p) wait for layout reconf\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lli->lli_fid), inode); - - memset(&conf, 0, sizeof(conf)); - conf.coc_opc = OBJECT_CONF_WAIT; - conf.coc_inode = inode; - rc = ll_layout_conf(inode, &conf); - if (rc == 0) - rc = -EAGAIN; - - CDEBUG(D_INODE, - "%s: file=" DFID " waiting layout return: %d.\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lli->lli_fid), rc); - } - return rc; -} - -static int ll_layout_refresh_locked(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct md_op_data *op_data; - struct lookup_intent it; - struct lustre_handle lockh; - enum ldlm_mode mode; - struct ptlrpc_request *req; - int rc; - -again: - /* mostly layout lock is caching on the local side, so try to match - * it before grabbing layout lock mutex. - */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_LAYOUT, &lockh, 0, - LCK_CR | LCK_CW | LCK_PR | LCK_PW); - if (mode != 0) { /* hit cached lock */ - rc = ll_layout_lock_set(&lockh, mode, inode); - if (rc == -EAGAIN) - goto again; - return rc; - } - - op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, - 0, 0, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - /* have to enqueue one */ - memset(&it, 0, sizeof(it)); - it.it_op = IT_LAYOUT; - - LDLM_DEBUG_NOLOCK("%s: requeue layout lock for file " DFID "(%p)", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(&lli->lli_fid), inode); - - rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req, - &ll_md_blocking_ast, 0); - ptlrpc_req_finished(it.it_request); - it.it_request = NULL; - - ll_finish_md_op_data(op_data); - - mode = it.it_lock_mode; - it.it_lock_mode = 0; - ll_intent_drop_lock(&it); - - if (rc == 0) { - /* set lock data in case this is a new lock */ - ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); - lockh.cookie = it.it_lock_handle; - rc = ll_layout_lock_set(&lockh, mode, inode); - if (rc == -EAGAIN) - goto again; - } - - return rc; -} - -/** - * This function checks if there exists a LAYOUT lock on the client side, - * or enqueues it if it doesn't have one in cache. - * - * This function will not hold layout lock so it may be revoked any time after - * this function returns. Any operations depend on layout should be redone - * in that case. - * - * This function should be called before lov_io_init() to get an uptodate - * layout version, the caller should save the version number and after IO - * is finished, this function should be called again to verify that layout - * is not changed during IO time. - */ -int ll_layout_refresh(struct inode *inode, __u32 *gen) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc; - - *gen = ll_layout_version_get(lli); - if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK) || *gen != CL_LAYOUT_GEN_NONE) - return 0; - - /* sanity checks */ - LASSERT(fid_is_sane(ll_inode2fid(inode))); - LASSERT(S_ISREG(inode->i_mode)); - - /* take layout lock mutex to enqueue layout lock exclusively. */ - mutex_lock(&lli->lli_layout_mutex); - - rc = ll_layout_refresh_locked(inode); - if (rc < 0) - goto out; - - *gen = ll_layout_version_get(lli); -out: - mutex_unlock(&lli->lli_layout_mutex); - - return rc; -} - -/** - * This function send a restore request to the MDT - */ -int ll_layout_restore(struct inode *inode, loff_t offset, __u64 length) -{ - struct hsm_user_request *hur; - int len, rc; - - len = sizeof(struct hsm_user_request) + - sizeof(struct hsm_user_item); - hur = kzalloc(len, GFP_NOFS); - if (!hur) - return -ENOMEM; - - hur->hur_request.hr_action = HUA_RESTORE; - hur->hur_request.hr_archive_id = 0; - hur->hur_request.hr_flags = 0; - memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid, - sizeof(hur->hur_user_item[0].hui_fid)); - hur->hur_user_item[0].hui_extent.offset = offset; - hur->hur_user_item[0].hui_extent.length = length; - hur->hur_request.hr_itemcount = 1; - rc = obd_iocontrol(LL_IOC_HSM_REQUEST, ll_i2sbi(inode)->ll_md_exp, - len, hur, NULL); - kfree(hur); - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c deleted file mode 100644 index ce0d51767da3..000000000000 --- a/drivers/staging/lustre/lustre/llite/glimpse.c +++ /dev/null @@ -1,205 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * glimpse code shared between vvp and liblustre (and other Lustre clients in - * the future). - * - * Author: Nikita Danilov - * Author: Oleg Drokin - */ - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include "llite_internal.h" - -static const struct cl_lock_descr whole_file = { - .cld_start = 0, - .cld_end = CL_PAGE_EOF, - .cld_mode = CLM_READ -}; - -/* - * Check whether file has possible unwriten pages. - * - * \retval 1 file is mmap-ed or has dirty pages - * 0 otherwise - */ -blkcnt_t dirty_cnt(struct inode *inode) -{ - blkcnt_t cnt = 0; - struct vvp_object *vob = cl_inode2vvp(inode); - void *results[1]; - - if (inode->i_mapping) - cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages, - results, 0, 1, - PAGECACHE_TAG_DIRTY); - if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0) - cnt = 1; - - return (cnt > 0) ? 1 : 0; -} - -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl) -{ - const struct lu_fid *fid = lu_object_fid(&clob->co_lu); - struct cl_lock *lock = vvp_env_lock(env); - struct cl_lock_descr *descr = &lock->cll_descr; - int result = 0; - - CDEBUG(D_DLMTRACE, "Glimpsing inode " DFID "\n", PFID(fid)); - - /* NOTE: this looks like DLM lock request, but it may - * not be one. Due to CEF_ASYNC flag (translated - * to LDLM_FL_HAS_INTENT by osc), this is - * glimpse request, that won't revoke any - * conflicting DLM locks held. Instead, - * ll_glimpse_callback() will be called on each - * client holding a DLM lock against this file, - * and resulting size will be returned for each - * stripe. DLM lock on [0, EOF] is acquired only - * if there were no conflicting locks. If there - * were conflicting locks, enqueuing or waiting - * fails with -ENAVAIL, but valid inode - * attributes are returned anyway. - */ - *descr = whole_file; - descr->cld_obj = clob; - descr->cld_mode = CLM_READ; - descr->cld_enq_flags = CEF_ASYNC | CEF_MUST; - if (agl) - descr->cld_enq_flags |= CEF_AGL; - /* - * CEF_ASYNC is used because glimpse sub-locks cannot - * deadlock (because they never conflict with other - * locks) and, hence, can be enqueued out-of-order. - * - * CEF_MUST protects glimpse lock from conversion into - * a lockless mode. - */ - result = cl_lock_request(env, io, lock); - if (result < 0) - return result; - - if (!agl) { - ll_merge_attr(env, inode); - if (i_size_read(inode) > 0 && !inode->i_blocks) { - /* - * LU-417: Add dirty pages block count - * lest i_blocks reports 0, some "cp" or - * "tar" may think it's a completely - * sparse file and skip it. - */ - inode->i_blocks = dirty_cnt(inode); - } - } - - cl_lock_release(env, lock); - - return result; -} - -static int cl_io_get(struct inode *inode, struct lu_env **envout, - struct cl_io **ioout, u16 *refcheck) -{ - struct lu_env *env; - struct cl_io *io; - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_object *clob = lli->lli_clob; - int result; - - if (S_ISREG(inode->i_mode)) { - env = cl_env_get(refcheck); - if (!IS_ERR(env)) { - io = vvp_env_thread_io(env); - io->ci_obj = clob; - *envout = env; - *ioout = io; - result = 1; - } else { - result = PTR_ERR(env); - } - } else { - result = 0; - } - return result; -} - -int cl_glimpse_size0(struct inode *inode, int agl) -{ - /* - * We don't need ast_flags argument to cl_glimpse_size(), because - * osc_lock_enqueue() takes care of the possible deadlock that said - * argument was introduced to avoid. - */ - /* - * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to - * cl_glimpse_size(), which doesn't make sense: glimpse locks are not - * blocking anyway. - */ - struct lu_env *env = NULL; - struct cl_io *io = NULL; - int result; - u16 refcheck; - - result = cl_io_get(inode, &env, &io, &refcheck); - if (result > 0) { -again: - io->ci_verify_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result > 0) - /* - * nothing to do for this io. This currently happens - * when stripe sub-object's are not yet created. - */ - result = io->ci_result; - else if (result == 0) - result = cl_glimpse_lock(env, io, inode, io->ci_obj, - agl); - - OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2); - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; - cl_env_put(env, &refcheck); - } - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c deleted file mode 100644 index d7ea39ce0cb2..000000000000 --- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c +++ /dev/null @@ -1,292 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl code shared between vvp and liblustre (and other Lustre clients in the - * future). - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "llite_internal.h" - -/* - * ccc_ prefix stands for "Common Client Code". - */ - -/***************************************************************************** - * - * Vvp device and device type functions. - * - */ - -/** - * An `emergency' environment used by cl_inode_fini() when cl_env_get() - * fails. Access to this environment is serialized by cl_inode_fini_guard - * mutex. - */ -struct lu_env *cl_inode_fini_env; -u16 cl_inode_fini_refcheck; - -/** - * A mutex serializing calls to slp_inode_fini() under extreme memory - * pressure, when environments cannot be allocated. - */ -static DEFINE_MUTEX(cl_inode_fini_guard); - -int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, - unsigned int attr_flags) -{ - struct lu_env *env; - struct cl_io *io; - int result; - u16 refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = vvp_env_thread_io(env); - io->ci_obj = obj; - io->ci_verify_layout = 1; - - io->u.ci_setattr.sa_attr.lvb_atime = LTIME_S(attr->ia_atime); - io->u.ci_setattr.sa_attr.lvb_mtime = LTIME_S(attr->ia_mtime); - io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime); - io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size; - io->u.ci_setattr.sa_attr_flags = attr_flags; - io->u.ci_setattr.sa_valid = attr->ia_valid; - io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu); - -again: - if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) { - struct vvp_io *vio = vvp_env_io(env); - - if (attr->ia_valid & ATTR_FILE) - /* populate the file descriptor for ftruncate to honor - * group lock - see LU-787 - */ - vio->vui_fd = LUSTRE_FPRIVATE(attr->ia_file); - - result = cl_io_loop(env, io); - } else { - result = io->ci_result; - } - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; - - cl_env_put(env, &refcheck); - return result; -} - -/** - * Initialize or update CLIO structures for regular files when new - * meta-data arrives from the server. - * - * \param inode regular file inode - * \param md new file metadata from MDS - * - allocates cl_object if necessary, - * - updated layout, if object was already here. - */ -int cl_file_inode_init(struct inode *inode, struct lustre_md *md) -{ - struct lu_env *env; - struct ll_inode_info *lli; - struct cl_object *clob; - struct lu_site *site; - struct lu_fid *fid; - struct cl_object_conf conf = { - .coc_inode = inode, - .u = { - .coc_layout = md->layout, - } - }; - int result = 0; - u16 refcheck; - - LASSERT(md->body->mbo_valid & OBD_MD_FLID); - LASSERT(S_ISREG(inode->i_mode)); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - site = ll_i2sbi(inode)->ll_site; - lli = ll_i2info(inode); - fid = &lli->lli_fid; - LASSERT(fid_is_sane(fid)); - - if (!lli->lli_clob) { - /* clob is slave of inode, empty lli_clob means for new inode, - * there is no clob in cache with the given fid, so it is - * unnecessary to perform lookup-alloc-lookup-insert, just - * alloc and insert directly. - */ - LASSERT(inode->i_state & I_NEW); - conf.coc_lu.loc_flags = LOC_F_NEW; - clob = cl_object_find(env, lu2cl_dev(site->ls_top_dev), - fid, &conf); - if (!IS_ERR(clob)) { - /* - * No locking is necessary, as new inode is - * locked by I_NEW bit. - */ - lli->lli_clob = clob; - lu_object_ref_add(&clob->co_lu, "inode", inode); - } else { - result = PTR_ERR(clob); - } - } else { - result = cl_conf_set(env, lli->lli_clob, &conf); - } - - cl_env_put(env, &refcheck); - - if (result != 0) - CERROR("Failure to initialize cl object " DFID ": %d\n", - PFID(fid), result); - return result; -} - -/** - * Wait for others drop their references of the object at first, then we drop - * the last one, which will lead to the object be destroyed immediately. - * Must be called after cl_object_kill() against this object. - * - * The reason we want to do this is: destroying top object will wait for sub - * objects being destroyed first, so we can't let bottom layer (e.g. from ASTs) - * to initiate top object destroying which may deadlock. See bz22520. - */ -static void cl_object_put_last(struct lu_env *env, struct cl_object *obj) -{ - struct lu_object_header *header = obj->co_lu.lo_header; - wait_queue_entry_t waiter; - - if (unlikely(atomic_read(&header->loh_ref) != 1)) { - struct lu_site *site = obj->co_lu.lo_dev->ld_site; - wait_queue_head_t *wq; - - wq = lu_site_wq_from_fid(site, &header->loh_fid); - - init_waitqueue_entry(&waiter, current); - add_wait_queue(wq, &waiter); - - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&header->loh_ref) == 1) - break; - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(wq, &waiter); - } - - cl_object_put(env, obj); -} - -void cl_inode_fini(struct inode *inode) -{ - struct lu_env *env; - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_object *clob = lli->lli_clob; - u16 refcheck; - int emergency; - - if (clob) { - env = cl_env_get(&refcheck); - emergency = IS_ERR(env); - if (emergency) { - mutex_lock(&cl_inode_fini_guard); - LASSERT(cl_inode_fini_env); - env = cl_inode_fini_env; - } - /* - * cl_object cache is a slave to inode cache (which, in turn - * is a slave to dentry cache), don't keep cl_object in memory - * when its master is evicted. - */ - cl_object_kill(env, clob); - lu_object_ref_del(&clob->co_lu, "inode", inode); - cl_object_put_last(env, clob); - lli->lli_clob = NULL; - if (emergency) - mutex_unlock(&cl_inode_fini_guard); - else - cl_env_put(env, &refcheck); - } -} - -/** - * build inode number from passed @fid - */ -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32) -{ - if (BITS_PER_LONG == 32 || api32) - return fid_flatten32(fid); - else - return fid_flatten(fid); -} - -/** - * build inode generation from passed @fid. If our FID overflows the 32-bit - * inode number then return a non-zero generation to distinguish them. - */ -__u32 cl_fid_build_gen(const struct lu_fid *fid) -{ - __u32 gen; - - if (fid_is_igif(fid)) { - gen = lu_igif_gen(fid); - return gen; - } - - gen = fid_flatten(fid) >> 32; - return gen; -} diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c deleted file mode 100644 index a246b955306e..000000000000 --- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl code shared between vvp and liblustre (and other Lustre clients in the - * future). - * - */ -#define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include - -#include "llite_internal.h" - -/* Initialize the default and maximum LOV EA and cookie sizes. This allows - * us to make MDS RPCs with large enough reply buffers to hold the - * maximum-sized (= maximum striped) EA and cookie without having to - * calculate this (via a call into the LOV + OSCs) each time we make an RPC. - */ -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) -{ - u32 val_size, max_easize, def_easize; - int rc; - - val_size = sizeof(max_easize); - rc = obd_get_info(NULL, dt_exp, sizeof(KEY_MAX_EASIZE), KEY_MAX_EASIZE, - &val_size, &max_easize); - if (rc) - return rc; - - val_size = sizeof(def_easize); - rc = obd_get_info(NULL, dt_exp, sizeof(KEY_DEFAULT_EASIZE), - KEY_DEFAULT_EASIZE, &val_size, &def_easize); - if (rc) - return rc; - - /* - * default cookiesize is 0 because from 2.4 server doesn't send - * llog cookies to client. - */ - CDEBUG(D_HA, "updating def/max_easize: %d/%d\n", - def_easize, max_easize); - - rc = md_init_ea_size(md_exp, max_easize, def_easize); - return rc; -} - -/** - * This function is used as an upcall-callback hooked by liblustre and llite - * clients into obd_notify() listeners chain to handle notifications about - * change of import connect_flags. See llu_fsswop_mount() and - * lustre_common_fill_super(). - */ -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data) -{ - struct lustre_client_ocd *lco; - struct client_obd *cli; - __u64 flags; - int result; - - if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) && - watched->obd_set_up && !watched->obd_stopping) { - cli = &watched->u.cli; - lco = owner; - flags = cli->cl_import->imp_connect_data.ocd_connect_flags; - CDEBUG(D_SUPER, "Changing connect_flags: %#llx -> %#llx\n", - lco->lco_flags, flags); - mutex_lock(&lco->lco_lock); - lco->lco_flags &= flags; - /* for each osc event update ea size */ - if (lco->lco_dt_exp) - cl_init_ea_size(lco->lco_md_exp, lco->lco_dt_exp); - - mutex_unlock(&lco->lco_lock); - result = 0; - } else { - CERROR("unexpected notification from %s %s (setup:%d,stopping:%d)!\n", - watched->obd_type->typ_name, - watched->obd_name, watched->obd_set_up, - watched->obd_stopping); - result = -EINVAL; - } - return result; -} - -#define GROUPLOCK_SCOPE "grouplock" - -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ll_grouplock *cg) -{ - struct lu_env *env; - struct cl_io *io; - struct cl_lock *lock; - struct cl_lock_descr *descr; - __u32 enqflags; - u16 refcheck; - int rc; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = vvp_env_thread_io(env); - io->ci_obj = obj; - - rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (rc != 0) { - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - /* Does not make sense to take GL for released layout */ - if (rc > 0) - rc = -ENOTSUPP; - return rc; - } - - lock = vvp_env_lock(env); - descr = &lock->cll_descr; - descr->cld_obj = obj; - descr->cld_start = 0; - descr->cld_end = CL_PAGE_EOF; - descr->cld_gid = gid; - descr->cld_mode = CLM_GROUP; - - enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); - descr->cld_enq_flags = enqflags; - - rc = cl_lock_request(env, io, lock); - if (rc < 0) { - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - return rc; - } - - cg->lg_env = env; - cg->lg_io = io; - cg->lg_lock = lock; - cg->lg_gid = gid; - - return 0; -} - -void cl_put_grouplock(struct ll_grouplock *cg) -{ - struct lu_env *env = cg->lg_env; - struct cl_io *io = cg->lg_io; - struct cl_lock *lock = cg->lg_lock; - - LASSERT(cg->lg_env); - LASSERT(cg->lg_gid); - - cl_lock_release(env, lock); - cl_io_fini(env, io); - cl_env_put(env, NULL); -} diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h deleted file mode 100644 index c08a6e14b6d7..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ /dev/null @@ -1,1344 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LLITE_INTERNAL_H -#define LLITE_INTERNAL_H -#include -#include -#include /* for s2sbi */ -#include - -/* for struct cl_lock_descr and struct cl_io */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "vvp_internal.h" -#include "range_lock.h" - -#ifndef FMODE_EXEC -#define FMODE_EXEC 0 -#endif - -#ifndef VM_FAULT_RETRY -#define VM_FAULT_RETRY 0 -#endif - -/** Only used on client-side for indicating the tail of dir hash/offset. */ -#define LL_DIR_END_OFF 0x7fffffffffffffffULL -#define LL_DIR_END_OFF_32BIT 0x7fffffffUL - -/* 4UL * 1024 * 1024 */ -#define LL_MAX_BLKSIZE_BITS 22 - -#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") -#define LUSTRE_FPRIVATE(file) ((file)->private_data) - -struct ll_dentry_data { - struct lookup_intent *lld_it; - unsigned int lld_sa_generation; - unsigned int lld_invalid:1; - unsigned int lld_nfs_dentry:1; - struct rcu_head lld_rcu_head; -}; - -#define ll_d2d(de) ((struct ll_dentry_data *)((de)->d_fsdata)) - -#define LLI_INODE_MAGIC 0x111d0de5 -#define LLI_INODE_DEAD 0xdeadd00d - -struct ll_getname_data { - struct dir_context ctx; - char *lgd_name; /* points to buffer with NAME_MAX+1 size */ - struct lu_fid lgd_fid; /* target fid we are looking for */ - int lgd_found; /* inode matched? */ -}; - -struct ll_grouplock { - struct lu_env *lg_env; - struct cl_io *lg_io; - struct cl_lock *lg_lock; - unsigned long lg_gid; -}; - -enum ll_file_flags { - /* File data is modified. */ - LLIF_DATA_MODIFIED = 0, - /* File is being restored */ - LLIF_FILE_RESTORING = 1, - /* Xattr cache is attached to the file */ - LLIF_XATTR_CACHE = 2, -}; - -struct ll_inode_info { - __u32 lli_inode_magic; - - spinlock_t lli_lock; - unsigned long lli_flags; - struct posix_acl *lli_posix_acl; - - /* identifying fields for both metadata and data stacks. */ - struct lu_fid lli_fid; - /* master inode fid for stripe directory */ - struct lu_fid lli_pfid; - - /* We need all three because every inode may be opened in different - * modes - */ - struct obd_client_handle *lli_mds_read_och; - struct obd_client_handle *lli_mds_write_och; - struct obd_client_handle *lli_mds_exec_och; - __u64 lli_open_fd_read_count; - __u64 lli_open_fd_write_count; - __u64 lli_open_fd_exec_count; - /* Protects access to och pointers and their usage counters */ - struct mutex lli_och_mutex; - - struct inode lli_vfs_inode; - - /* the most recent timestamps obtained from mds */ - s64 lli_atime; - s64 lli_mtime; - s64 lli_ctime; - spinlock_t lli_agl_lock; - - /* Try to make the d::member and f::member are aligned. Before using - * these members, make clear whether it is directory or not. - */ - union { - /* for directory */ - struct { - /* serialize normal readdir and statahead-readdir. */ - struct mutex lli_readdir_mutex; - - /* metadata statahead */ - /* since parent-child threads can share the same @file - * struct, "opendir_key" is the token when dir close for - * case of parent exit before child -- it is me should - * cleanup the dir readahead. - */ - void *lli_opendir_key; - struct ll_statahead_info *lli_sai; - /* protect statahead stuff. */ - spinlock_t lli_sa_lock; - /* "opendir_pid" is the token when lookup/revalidate - * -- I am the owner of dir statahead. - */ - pid_t lli_opendir_pid; - /* stat will try to access statahead entries or start - * statahead if this flag is set, and this flag will be - * set upon dir open, and cleared when dir is closed, - * statahead hit ratio is too low, or start statahead - * thread failed. - */ - unsigned int lli_sa_enabled:1; - /* generation for statahead */ - unsigned int lli_sa_generation; - /* directory stripe information */ - struct lmv_stripe_md *lli_lsm_md; - /* default directory stripe offset. This is extracted - * from the "dmv" xattr in order to decide which MDT to - * create a subdirectory on. The MDS itself fetches - * "dmv" and gets the rest of the default layout itself - * (count, hash, etc). - */ - __u32 lli_def_stripe_offset; - }; - - /* for non-directory */ - struct { - struct mutex lli_size_mutex; - char *lli_symlink_name; - /* - * struct rw_semaphore { - * signed long count; // align d.d_def_acl - * spinlock_t wait_lock; // align d.d_sa_lock - * struct list_head wait_list; - * } - */ - struct rw_semaphore lli_trunc_sem; - struct range_lock_tree lli_write_tree; - - struct rw_semaphore lli_glimpse_sem; - unsigned long lli_glimpse_time; - struct list_head lli_agl_list; - __u64 lli_agl_index; - - /* for writepage() only to communicate to fsync */ - int lli_async_rc; - - /* - * whenever a process try to read/write the file, the - * jobid of the process will be saved here, and it'll - * be packed into the write PRC when flush later. - * - * so the read/write statistics for jobid will not be - * accurate if the file is shared by different jobs. - */ - char lli_jobid[LUSTRE_JOBID_SIZE]; - }; - }; - - /* XXX: For following frequent used members, although they maybe special - * used for non-directory object, it is some time-wasting to check - * whether the object is directory or not before using them. On the - * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce - * the "ll_inode_info" size even if moving those members into u.f. - * So keep them out side. - * - * In the future, if more members are added only for directory, - * some of the following members can be moved into u.f. - */ - struct cl_object *lli_clob; - - /* mutex to request for layout lock exclusively. */ - struct mutex lli_layout_mutex; - /* Layout version, protected by lli_layout_lock */ - __u32 lli_layout_gen; - spinlock_t lli_layout_lock; - - struct rw_semaphore lli_xattrs_list_rwsem; - struct mutex lli_xattrs_enq_lock; - struct list_head lli_xattrs;/* ll_xattr_entry->xe_list */ -}; - -static inline __u32 ll_layout_version_get(struct ll_inode_info *lli) -{ - __u32 gen; - - spin_lock(&lli->lli_layout_lock); - gen = lli->lli_layout_gen; - spin_unlock(&lli->lli_layout_lock); - - return gen; -} - -static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen) -{ - spin_lock(&lli->lli_layout_lock); - lli->lli_layout_gen = gen; - spin_unlock(&lli->lli_layout_lock); -} - -int ll_xattr_cache_destroy(struct inode *inode); - -int ll_xattr_cache_get(struct inode *inode, const char *name, - char *buffer, size_t size, __u64 valid); - -int ll_init_security(struct dentry *dentry, struct inode *inode, - struct inode *dir); - -/* - * Locking to guarantee consistency of non-atomic updates to long long i_size, - * consistency between file size and KMS. - * - * Implemented by ->lli_size_mutex and ->lsm_lock, nested in that order. - */ - -void ll_inode_size_lock(struct inode *inode); -void ll_inode_size_unlock(struct inode *inode); - -/* FIXME: replace the name of this with LL_I to conform to kernel stuff */ -/* static inline struct ll_inode_info *LL_I(struct inode *inode) */ -static inline struct ll_inode_info *ll_i2info(struct inode *inode) -{ - return container_of(inode, struct ll_inode_info, lli_vfs_inode); -} - -/* default to about 64M of readahead on a given system. */ -#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT)) - -/* default to read-ahead full files smaller than 2MB on the second read */ -#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT)) - -enum ra_stat { - RA_STAT_HIT = 0, - RA_STAT_MISS, - RA_STAT_DISTANT_READPAGE, - RA_STAT_MISS_IN_WINDOW, - RA_STAT_FAILED_GRAB_PAGE, - RA_STAT_FAILED_MATCH, - RA_STAT_DISCARDED, - RA_STAT_ZERO_LEN, - RA_STAT_ZERO_WINDOW, - RA_STAT_EOF, - RA_STAT_MAX_IN_FLIGHT, - RA_STAT_WRONG_GRAB_PAGE, - RA_STAT_FAILED_REACH_END, - _NR_RA_STAT, -}; - -struct ll_ra_info { - atomic_t ra_cur_pages; - unsigned long ra_max_pages; - unsigned long ra_max_pages_per_file; - unsigned long ra_max_read_ahead_whole_pages; -}; - -/* ra_io_arg will be filled in the beginning of ll_readahead with - * ras_lock, then the following ll_read_ahead_pages will read RA - * pages according to this arg, all the items in this structure are - * counted by page index. - */ -struct ra_io_arg { - unsigned long ria_start; /* start offset of read-ahead*/ - unsigned long ria_end; /* end offset of read-ahead*/ - unsigned long ria_reserved; /* reserved pages for read-ahead */ - unsigned long ria_end_min; /* minimum end to cover current read */ - bool ria_eof; /* reach end of file */ - /* If stride read pattern is detected, ria_stoff means where - * stride read is started. Note: for normal read-ahead, the - * value here is meaningless, and also it will not be accessed - */ - pgoff_t ria_stoff; - /* ria_length and ria_pages are the length and pages length in the - * stride I/O mode. And they will also be used to check whether - * it is stride I/O read-ahead in the read-ahead pages - */ - unsigned long ria_length; - unsigned long ria_pages; -}; - -/* LL_HIST_MAX=32 causes an overflow */ -#define LL_HIST_MAX 28 -#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */ -#define LL_PROCESS_HIST_MAX 10 -struct per_process_info { - pid_t pid; - struct obd_histogram pp_r_hist; - struct obd_histogram pp_w_hist; -}; - -/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */ -struct ll_rw_extents_info { - struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1]; -}; - -#define LL_OFFSET_HIST_MAX 100 -struct ll_rw_process_info { - pid_t rw_pid; - int rw_op; - loff_t rw_range_start; - loff_t rw_range_end; - loff_t rw_last_file_pos; - loff_t rw_offset; - size_t rw_smallest_extent; - size_t rw_largest_extent; - struct ll_file_data *rw_last_file; -}; - -enum stats_track_type { - STATS_TRACK_ALL = 0, /* track all processes */ - STATS_TRACK_PID, /* track process with this pid */ - STATS_TRACK_PPID, /* track processes with this ppid */ - STATS_TRACK_GID, /* track processes with this gid */ - STATS_TRACK_LAST, -}; - -/* flags for sbi->ll_flags */ -#define LL_SBI_NOLCK 0x01 /* DLM locking disabled (directio-only) */ -#define LL_SBI_CHECKSUM 0x02 /* checksum each page as it's written */ -#define LL_SBI_FLOCK 0x04 -#define LL_SBI_USER_XATTR 0x08 /* support user xattr */ -#define LL_SBI_ACL 0x10 /* support ACL */ -/* LL_SBI_RMT_CLIENT 0x40 remote client */ -#define LL_SBI_MDS_CAPA 0x80 /* support mds capa, obsolete */ -#define LL_SBI_OSS_CAPA 0x100 /* support oss capa, obsolete */ -#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */ -#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */ -#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */ -/* LL_SBI_SOM_PREVIEW 0x1000 SOM preview mount option, obsolete */ -#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */ -#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */ -#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */ -#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */ -#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */ -#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */ -#define LL_SBI_XATTR_CACHE 0x80000 /* support for xattr cache */ -#define LL_SBI_NOROOTSQUASH 0x100000 /* do not apply root squash */ -#define LL_SBI_ALWAYS_PING 0x200000 /* always ping even if server - * suppress_pings - */ - -#define LL_SBI_FLAGS { \ - "nolck", \ - "checksum", \ - "flock", \ - "user_xattr", \ - "acl", \ - "???", \ - "???", \ - "mds_capa", \ - "oss_capa", \ - "flock", \ - "lru_resize", \ - "lazy_statfs", \ - "som", \ - "32bit_api", \ - "64bit_hash", \ - "agl", \ - "verbose", \ - "layout", \ - "user_fid2path",\ - "xattr_cache", \ - "norootsquash", \ - "always_ping", \ -} - -/* - * This is embedded into llite super-blocks to keep track of connect - * flags (capabilities) supported by all imports given mount is - * connected to. - */ -struct lustre_client_ocd { - /* - * This is conjunction of connect_flags across all imports - * (LOVs) this mount is connected to. This field is updated by - * cl_ocd_update() under ->lco_lock. - */ - __u64 lco_flags; - struct mutex lco_lock; - struct obd_export *lco_md_exp; - struct obd_export *lco_dt_exp; -}; - -struct ll_sb_info { - /* this protects pglist and ra_info. It isn't safe to - * grab from interrupt contexts - */ - spinlock_t ll_lock; - spinlock_t ll_pp_extent_lock; /* pp_extent entry*/ - spinlock_t ll_process_lock; /* ll_rw_process_info */ - struct obd_uuid ll_sb_uuid; - struct obd_export *ll_md_exp; - struct obd_export *ll_dt_exp; - struct dentry *ll_debugfs_entry; - struct lu_fid ll_root_fid; /* root object fid */ - - int ll_flags; - unsigned int ll_umounting:1, - ll_xattr_cache_enabled:1, - ll_client_common_fill_super_succeeded:1; - - struct lustre_client_ocd ll_lco; - - struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ - - /* - * Used to track "unstable" pages on a client, and maintain a - * LRU list of clean pages. An "unstable" page is defined as - * any page which is sent to a server as part of a bulk request, - * but is uncommitted to stable storage. - */ - struct cl_client_cache *ll_cache; - - struct lprocfs_stats *ll_ra_stats; - - struct ll_ra_info ll_ra_info; - unsigned int ll_namelen; - const struct file_operations *ll_fop; - - unsigned int ll_md_brw_pages; /* readdir pages per RPC */ - - struct lu_site *ll_site; - struct cl_device *ll_cl; - /* Statistics */ - struct ll_rw_extents_info ll_rw_extents_info; - int ll_extent_process_count; - struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX]; - unsigned int ll_offset_process_count; - struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX]; - unsigned int ll_rw_offset_entry_count; - int ll_stats_track_id; - enum stats_track_type ll_stats_track_type; - int ll_rw_stats_on; - - /* metadata stat-ahead */ - unsigned int ll_sa_max; /* max statahead RPCs */ - atomic_t ll_sa_total; /* statahead thread started - * count - */ - atomic_t ll_sa_wrong; /* statahead thread stopped for - * low hit ratio - */ - atomic_t ll_sa_running; /* running statahead thread - * count - */ - atomic_t ll_agl_total; /* AGL thread started count */ - - dev_t ll_sdev_orig; /* save s_dev before assign for - * clustered nfs - */ - /* root squash */ - struct root_squash_info ll_squash; - struct path ll_mnt; - - __kernel_fsid_t ll_fsid; - struct kobject ll_kobj; /* sysfs object */ - struct super_block *ll_sb; /* struct super_block (for sysfs code)*/ - struct completion ll_kobj_unregister; -}; - -/* - * per file-descriptor read-ahead data. - */ -struct ll_readahead_state { - spinlock_t ras_lock; - /* - * index of the last page that read(2) needed and that wasn't in the - * cache. Used by ras_update() to detect seeks. - * - * XXX nikita: if access seeks into cached region, Lustre doesn't see - * this. - */ - unsigned long ras_last_readpage; - /* - * number of pages read after last read-ahead window reset. As window - * is reset on each seek, this is effectively a number of consecutive - * accesses. Maybe ->ras_accessed_in_window is better name. - * - * XXX nikita: window is also reset (by ras_update()) when Lustre - * believes that memory pressure evicts read-ahead pages. In that - * case, it probably doesn't make sense to expand window to - * PTLRPC_MAX_BRW_PAGES on the third access. - */ - unsigned long ras_consecutive_pages; - /* - * number of read requests after the last read-ahead window reset - * As window is reset on each seek, this is effectively the number - * on consecutive read request and is used to trigger read-ahead. - */ - unsigned long ras_consecutive_requests; - /* - * Parameters of current read-ahead window. Handled by - * ras_update(). On the initial access to the file or after a seek, - * window is reset to 0. After 3 consecutive accesses, window is - * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by - * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages. - */ - unsigned long ras_window_start, ras_window_len; - /* - * Optimal RPC size. It decides how many pages will be sent - * for each read-ahead. - */ - unsigned long ras_rpc_size; - /* - * Where next read-ahead should start at. This lies within read-ahead - * window. Read-ahead window is read in pieces rather than at once - * because: 1. lustre limits total number of pages under read-ahead by - * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages - * not covered by DLM lock. - */ - unsigned long ras_next_readahead; - /* - * Total number of ll_file_read requests issued, reads originating - * due to mmap are not counted in this total. This value is used to - * trigger full file read-ahead after multiple reads to a small file. - */ - unsigned long ras_requests; - /* - * Page index with respect to the current request, these value - * will not be accurate when dealing with reads issued via mmap. - */ - unsigned long ras_request_index; - /* - * The following 3 items are used for detecting the stride I/O - * mode. - * In stride I/O mode, - * ...............|-----data-----|****gap*****|--------|******|.... - * offset |-stride_pages-|-stride_gap-| - * ras_stride_offset = offset; - * ras_stride_length = stride_pages + stride_gap; - * ras_stride_pages = stride_pages; - * Note: all these three items are counted by pages. - */ - unsigned long ras_stride_length; - unsigned long ras_stride_pages; - pgoff_t ras_stride_offset; - /* - * number of consecutive stride request count, and it is similar as - * ras_consecutive_requests, but used for stride I/O mode. - * Note: only more than 2 consecutive stride request are detected, - * stride read-ahead will be enable - */ - unsigned long ras_consecutive_stride_requests; -}; - -extern struct kmem_cache *ll_file_data_slab; -struct lustre_handle; -struct ll_file_data { - struct ll_readahead_state fd_ras; - struct ll_grouplock fd_grouplock; - __u64 lfd_pos; - __u32 fd_flags; - fmode_t fd_omode; - /* openhandle if lease exists for this file. - * Borrow lli->lli_och_mutex to protect assignment - */ - struct obd_client_handle *fd_lease_och; - struct obd_client_handle *fd_och; - struct file *fd_file; - /* Indicate whether need to report failure when close. - * true: failure is known, not report again. - * false: unknown failure, should report. - */ - bool fd_write_failed; - rwlock_t fd_lock; /* protect lcc list */ - struct list_head fd_lccs; /* list of ll_cl_context */ -}; - -extern struct dentry *llite_root; -extern struct kset *llite_kset; - -static inline struct inode *ll_info2i(struct ll_inode_info *lli) -{ - return &lli->lli_vfs_inode; -} - -__u32 ll_i2suppgid(struct inode *i); -void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2); - -static inline int ll_need_32bit_api(struct ll_sb_info *sbi) -{ -#if BITS_PER_LONG == 32 - return 1; -#elif defined(CONFIG_COMPAT) - return unlikely(in_compat_syscall() || - (sbi->ll_flags & LL_SBI_32BIT_API)); -#else - return unlikely(sbi->ll_flags & LL_SBI_32BIT_API); -#endif -} - -void ll_ras_enter(struct file *f); - -/* llite/lcommon_misc.c */ -int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp); -int cl_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner, void *data); -int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, - struct ll_grouplock *cg); -void cl_put_grouplock(struct ll_grouplock *cg); - -/* llite/lproc_llite.c */ -int ldebugfs_register_mountpoint(struct dentry *parent, - struct super_block *sb, char *osc, char *mdc); -void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi); -void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count); -void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars); -void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, - struct ll_file_data *file, loff_t pos, - size_t count, int rw); - -enum { - LPROC_LL_DIRTY_HITS, - LPROC_LL_DIRTY_MISSES, - LPROC_LL_READ_BYTES, - LPROC_LL_WRITE_BYTES, - LPROC_LL_BRW_READ, - LPROC_LL_BRW_WRITE, - LPROC_LL_IOCTL, - LPROC_LL_OPEN, - LPROC_LL_RELEASE, - LPROC_LL_MAP, - LPROC_LL_LLSEEK, - LPROC_LL_FSYNC, - LPROC_LL_READDIR, - LPROC_LL_SETATTR, - LPROC_LL_TRUNC, - LPROC_LL_FLOCK, - LPROC_LL_GETATTR, - LPROC_LL_CREATE, - LPROC_LL_LINK, - LPROC_LL_UNLINK, - LPROC_LL_SYMLINK, - LPROC_LL_MKDIR, - LPROC_LL_RMDIR, - LPROC_LL_MKNOD, - LPROC_LL_RENAME, - LPROC_LL_STAFS, - LPROC_LL_ALLOC_INODE, - LPROC_LL_SETXATTR, - LPROC_LL_GETXATTR, - LPROC_LL_GETXATTR_HITS, - LPROC_LL_LISTXATTR, - LPROC_LL_REMOVEXATTR, - LPROC_LL_INODE_PERM, - LPROC_LL_FILE_OPCODES -}; - -/* llite/dir.c */ -extern const struct file_operations ll_dir_operations; -extern const struct inode_operations ll_dir_inode_operations; -int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data, - struct dir_context *ctx); -int ll_get_mdt_idx(struct inode *inode); -int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid); -struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data, - __u64 offset); -void ll_release_page(struct inode *inode, struct page *page, bool remove); - -/* llite/namei.c */ -extern const struct inode_operations ll_special_inode_operations; - -struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct lustre_md *lic); -int ll_test_inode_by_fid(struct inode *inode, void *opaque); -int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag); -struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de); -void ll_update_times(struct ptlrpc_request *request, struct inode *inode); - -/* llite/rw.c */ -int ll_writepage(struct page *page, struct writeback_control *wbc); -int ll_writepages(struct address_space *mapping, struct writeback_control *wbc); -int ll_readpage(struct file *file, struct page *page); -void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); -int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); -struct ll_cl_context *ll_cl_find(struct file *file); -void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io); -void ll_cl_remove(struct file *file, const struct lu_env *env); - -extern const struct address_space_operations ll_aops; - -/* llite/file.c */ -extern const struct file_operations ll_file_operations; -extern const struct file_operations ll_file_operations_flock; -extern const struct file_operations ll_file_operations_noflock; -extern const struct inode_operations ll_file_inode_operations; -int ll_have_md_lock(struct inode *inode, __u64 *bits, - enum ldlm_mode l_req_mode); -enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits, - struct lustre_handle *lockh, __u64 flags, - enum ldlm_mode mode); -int ll_file_open(struct inode *inode, struct file *file); -int ll_file_release(struct inode *inode, struct file *file); -int ll_release_openhandle(struct inode *inode, struct lookup_intent *it); -int ll_md_real_close(struct inode *inode, fmode_t fmode); -int ll_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); -#ifdef CONFIG_FS_POSIX_ACL -struct posix_acl *ll_get_acl(struct inode *inode, int type); -int ll_set_acl(struct inode *inode, struct posix_acl *acl, int type); -#else -#define ll_get_acl NULL -#define ll_set_acl NULL -#endif /* CONFIG_FS_POSIX_ACL */ - -int ll_migrate(struct inode *parent, struct file *file, int mdtidx, - const char *name, int namelen); -int ll_get_fid_by_name(struct inode *parent, const char *name, - int namelen, struct lu_fid *fid, struct inode **inode); -int ll_inode_permission(struct inode *inode, int mask); - -int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, - __u64 flags, struct lov_user_md *lum, - int lum_size); -int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, - struct lov_mds_md **lmm, int *lmm_size, - struct ptlrpc_request **request); -int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, - int set_default); -int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size, - struct ptlrpc_request **request, u64 valid); -int ll_fsync(struct file *file, loff_t start, loff_t end, int data); -int ll_merge_attr(const struct lu_env *env, struct inode *inode); -int ll_fid2path(struct inode *inode, void __user *arg); -int ll_data_version(struct inode *inode, __u64 *data_version, int flags); -int ll_hsm_release(struct inode *inode); -int ll_hsm_state_set(struct inode *inode, struct hsm_state_set *hss); - -/* llite/dcache.c */ - -extern const struct dentry_operations ll_d_ops; -void ll_intent_drop_lock(struct lookup_intent *it); -void ll_intent_release(struct lookup_intent *it); -void ll_invalidate_aliases(struct inode *inode); -void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode); -int ll_revalidate_it_finish(struct ptlrpc_request *request, - struct lookup_intent *it, struct inode *inode); - -/* llite/llite_lib.c */ -extern struct super_operations lustre_super_operations; - -void ll_lli_init(struct ll_inode_info *lli); -int ll_fill_super(struct super_block *sb); -void ll_put_super(struct super_block *sb); -void ll_kill_super(struct super_block *sb); -struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock); -void ll_dir_clear_lsm_md(struct inode *inode); -void ll_clear_inode(struct inode *inode); -int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import); -int ll_setattr(struct dentry *de, struct iattr *attr); -int ll_statfs(struct dentry *de, struct kstatfs *sfs); -int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, - __u64 max_age, __u32 flags); -int ll_update_inode(struct inode *inode, struct lustre_md *md); -int ll_read_inode2(struct inode *inode, void *opaque); -void ll_delete_inode(struct inode *inode); -int ll_iocontrol(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); -int ll_flush_ctx(struct inode *inode); -void ll_umount_begin(struct super_block *sb); -int ll_remount_fs(struct super_block *sb, int *flags, char *data); -int ll_show_options(struct seq_file *seq, struct dentry *dentry); -void ll_dirty_page_discard_warn(struct page *page, int ioret); -int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, - struct super_block *sb, struct lookup_intent *it); -int ll_obd_statfs(struct inode *inode, void __user *arg); -int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); -int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); -int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize); -int ll_process_config(struct lustre_cfg *lcfg); - -enum { - LUSTRE_OPC_MKDIR = 0, - LUSTRE_OPC_SYMLINK = 1, - LUSTRE_OPC_MKNOD = 2, - LUSTRE_OPC_CREATE = 3, - LUSTRE_OPC_ANY = 5, -}; - -struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, - struct inode *i1, struct inode *i2, - const char *name, size_t namelen, - u32 mode, __u32 opc, void *data); -void ll_finish_md_op_data(struct md_op_data *op_data); -int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg); -char *ll_get_fsname(struct super_block *sb, char *buf, int buflen); -void ll_compute_rootsquash_state(struct ll_sb_info *sbi); -void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req); -ssize_t ll_copy_user_md(const struct lov_user_md __user *md, - struct lov_user_md **kbuf); - -/* Compute expected user md size when passing in a md from user space */ -static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum) -{ - switch (lum->lmm_magic) { - case LOV_USER_MAGIC_V1: - return sizeof(struct lov_user_md_v1); - case LOV_USER_MAGIC_V3: - return sizeof(struct lov_user_md_v3); - case LOV_USER_MAGIC_SPECIFIC: - if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT) - return -EINVAL; - - return lov_user_md_size(lum->lmm_stripe_count, - LOV_USER_MAGIC_SPECIFIC); - } - return -EINVAL; -} - -/* llite/llite_nfs.c */ -extern const struct export_operations lustre_export_operations; -__u32 get_uuid2int(const char *name, int len); -void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid); -struct inode *search_inode_for_lustre(struct super_block *sb, - const struct lu_fid *fid); -int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid); - -/* llite/symlink.c */ -extern const struct inode_operations ll_fast_symlink_inode_operations; - -/** - * IO arguments for various VFS I/O interfaces. - */ -struct vvp_io_args { - /** normal/splice */ - union { - struct { - struct kiocb *via_iocb; - struct iov_iter *via_iter; - } normal; - } u; -}; - -struct ll_cl_context { - struct list_head lcc_list; - void *lcc_cookie; - const struct lu_env *lcc_env; - struct cl_io *lcc_io; - struct cl_page *lcc_page; -}; - -struct ll_thread_info { - struct vvp_io_args lti_args; - struct ra_io_arg lti_ria; - struct ll_cl_context lti_io_ctx; -}; - -extern struct lu_context_key ll_thread_key; -static inline struct ll_thread_info *ll_env_info(const struct lu_env *env) -{ - struct ll_thread_info *lti; - - lti = lu_context_key_get(&env->le_ctx, &ll_thread_key); - LASSERT(lti); - return lti; -} - -static inline struct vvp_io_args *ll_env_args(const struct lu_env *env) -{ - return &ll_env_info(env)->lti_args; -} - -/* llite/llite_mmap.c */ - -int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last); -int ll_file_mmap(struct file *file, struct vm_area_struct *vma); -void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma, - unsigned long addr, size_t count); -struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, - size_t count); - -static inline void ll_invalidate_page(struct page *vmpage) -{ - struct address_space *mapping = vmpage->mapping; - loff_t offset = vmpage->index << PAGE_SHIFT; - - LASSERT(PageLocked(vmpage)); - if (!mapping) - return; - - /* - * truncate_complete_page() calls - * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). - */ - ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE); - truncate_complete_page(mapping, vmpage); -} - -#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi) - -/* don't need an addref as the sb_info should be holding one */ -static inline struct obd_export *ll_s2dtexp(struct super_block *sb) -{ - return ll_s2sbi(sb)->ll_dt_exp; -} - -/* don't need an addref as the sb_info should be holding one */ -static inline struct obd_export *ll_s2mdexp(struct super_block *sb) -{ - return ll_s2sbi(sb)->ll_md_exp; -} - -static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi) -{ - struct obd_device *obd = sbi->ll_md_exp->exp_obd; - - if (!obd) - LBUG(); - return &obd->u.cli; -} - -/* FIXME: replace the name of this with LL_SB to conform to kernel stuff */ -static inline struct ll_sb_info *ll_i2sbi(struct inode *inode) -{ - return ll_s2sbi(inode->i_sb); -} - -static inline struct obd_export *ll_i2dtexp(struct inode *inode) -{ - return ll_s2dtexp(inode->i_sb); -} - -static inline struct obd_export *ll_i2mdexp(struct inode *inode) -{ - return ll_s2mdexp(inode->i_sb); -} - -static inline struct lu_fid *ll_inode2fid(struct inode *inode) -{ - struct lu_fid *fid; - - LASSERT(inode); - fid = &ll_i2info(inode)->lli_fid; - - return fid; -} - -static inline loff_t ll_file_maxbytes(struct inode *inode) -{ - struct cl_object *obj = ll_i2info(inode)->lli_clob; - - if (!obj) - return MAX_LFS_FILESIZE; - - return min_t(loff_t, cl_object_maxbytes(obj), MAX_LFS_FILESIZE); -} - -/* llite/xattr.c */ -extern const struct xattr_handler *ll_xattr_handlers[]; - -#define XATTR_USER_T 1 -#define XATTR_TRUSTED_T 2 -#define XATTR_SECURITY_T 3 -#define XATTR_ACL_ACCESS_T 4 -#define XATTR_ACL_DEFAULT_T 5 -#define XATTR_LUSTRE_T 6 -#define XATTR_OTHER_T 7 - -ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); -int ll_xattr_list(struct inode *inode, const char *name, int type, - void *buffer, size_t size, __u64 valid); -const struct xattr_handler *get_xattr_type(const char *name); - -/** - * Common IO arguments for various VFS I/O interfaces. - */ -int cl_sb_init(struct super_block *sb); -int cl_sb_fini(struct super_block *sb); - -enum ras_update_flags { - LL_RAS_HIT = 0x1, - LL_RAS_MMAP = 0x2 -}; -void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len); -void ll_ra_stats_inc(struct inode *inode, enum ra_stat which); - -/* statahead.c */ -#define LL_SA_RPC_MIN 2 -#define LL_SA_RPC_DEF 32 -#define LL_SA_RPC_MAX 8192 - -#define LL_SA_CACHE_BIT 5 -#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT) -#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1) - -/* per inode struct, for dir only */ -struct ll_statahead_info { - struct dentry *sai_dentry; - atomic_t sai_refcount; /* when access this struct, hold - * refcount - */ - unsigned int sai_max; /* max ahead of lookup */ - __u64 sai_sent; /* stat requests sent count */ - __u64 sai_replied; /* stat requests which received - * reply - */ - __u64 sai_index; /* index of statahead entry */ - __u64 sai_index_wait; /* index of entry which is the - * caller is waiting for - */ - __u64 sai_hit; /* hit count */ - __u64 sai_miss; /* miss count: - * for "ls -al" case, it includes - * hidden dentry miss; - * for "ls -l" case, it does not - * include hidden dentry miss. - * "sai_miss_hidden" is used for - * the later case. - */ - unsigned int sai_consecutive_miss; /* consecutive miss */ - unsigned int sai_miss_hidden;/* "ls -al", but first dentry - * is not a hidden one - */ - unsigned int sai_skip_hidden;/* skipped hidden dentry count */ - unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for - * hidden entries - */ - sai_agl_valid:1,/* AGL is valid for the dir */ - sai_in_readpage:1;/* statahead in readdir() */ - wait_queue_head_t sai_waitq; /* stat-ahead wait queue */ - struct task_struct *sai_task; /* stat-ahead thread */ - struct task_struct *sai_agl_task; /* AGL thread */ - struct list_head sai_interim_entries; /* entries which got async - * stat reply, but not - * instantiated - */ - struct list_head sai_entries; /* completed entries */ - struct list_head sai_agls; /* AGLs to be sent */ - struct list_head sai_cache[LL_SA_CACHE_SIZE]; - spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE]; - atomic_t sai_cache_count; /* entry count in cache */ -}; - -int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug); -void ll_authorize_statahead(struct inode *dir, void *key); -void ll_deauthorize_statahead(struct inode *dir, void *key); - -blkcnt_t dirty_cnt(struct inode *inode); - -int cl_glimpse_size0(struct inode *inode, int agl); -int cl_glimpse_lock(const struct lu_env *env, struct cl_io *io, - struct inode *inode, struct cl_object *clob, int agl); - -static inline int cl_glimpse_size(struct inode *inode) -{ - return cl_glimpse_size0(inode, 0); -} - -static inline int cl_agl(struct inode *inode) -{ - return cl_glimpse_size0(inode, 1); -} - -static inline int ll_glimpse_size(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - - down_read(&lli->lli_glimpse_sem); - rc = cl_glimpse_size(inode); - lli->lli_glimpse_time = jiffies; - up_read(&lli->lli_glimpse_sem); - return rc; -} - -/* - * dentry may statahead when statahead is enabled and current process has opened - * parent directory, and this dentry hasn't accessed statahead cache before - */ -static inline bool -dentry_may_statahead(struct inode *dir, struct dentry *dentry) -{ - struct ll_inode_info *lli; - struct ll_dentry_data *ldd; - - if (ll_i2sbi(dir)->ll_sa_max == 0) - return false; - - lli = ll_i2info(dir); - - /* - * statahead is not allowed for this dir, there may be three causes: - * 1. dir is not opened. - * 2. statahead hit ratio is too low. - * 3. previous stat started statahead thread failed. - */ - if (!lli->lli_sa_enabled) - return false; - - /* not the same process, don't statahead */ - if (lli->lli_opendir_pid != current->pid) - return false; - - /* - * When stating a dentry, kernel may trigger 'revalidate' or 'lookup' - * multiple times, eg. for 'getattr', 'getxattr' and etc. - * For patchless client, lookup intent is not accurate, which may - * misguide statahead. For example: - * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will - * have the same intent -- IT_GETATTR, while one dentry should access - * statahead cache once, otherwise statahead windows is messed up. - * The solution is as following: - * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry - * IT_GETATTR for the first time, and subsequent IT_GETATTR will - * bypass interacting with statahead cache by checking - * 'lld_sa_generation == lli->lli_sa_generation'. - */ - ldd = ll_d2d(dentry); - if (ldd->lld_sa_generation == lli->lli_sa_generation) - return false; - - return true; -} - -/* llite ioctl register support routine */ -enum llioc_iter { - LLIOC_CONT = 0, - LLIOC_STOP -}; - -#define LLIOC_MAX_CMD 256 - -/* - * Rules to write a callback function: - * - * Parameters: - * @magic: Dynamic ioctl call routine will feed this value with the pointer - * returned to ll_iocontrol_register. Callback functions should use this - * data to check the potential collasion of ioctl cmd. If collasion is - * found, callback function should return LLIOC_CONT. - * @rcp: The result of ioctl command. - * - * Return values: - * If @magic matches the pointer returned by ll_iocontrol_data, the - * callback should return LLIOC_STOP; return LLIOC_STOP otherwise. - */ -typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode, - struct file *file, unsigned int cmd, unsigned long arg, - void *magic, int *rcp); - -/* export functions */ -/* Register ioctl block dynamatically for a regular file. - * - * @cmd: the array of ioctl command set - * @count: number of commands in the @cmd - * @cb: callback function, it will be called if an ioctl command is found to - * belong to the command list @cmd. - * - * Return value: - * A magic pointer will be returned if success; - * otherwise, NULL will be returned. - */ -void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd); -void ll_iocontrol_unregister(void *magic); - -int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end, - enum cl_fsync_mode mode, int ignore_layout); - -/** direct write pages */ -struct ll_dio_pages { - /** page array to be written. we don't support - * partial pages except the last one. - */ - struct page **ldp_pages; - /* offset of each page */ - loff_t *ldp_offsets; - /** if ldp_offsets is NULL, it means a sequential - * pages to be written, then this is the file offset - * of the first page. - */ - loff_t ldp_start_offset; - /** how many bytes are to be written. */ - size_t ldp_size; - /** # of pages in the array. */ - int ldp_nr; -}; - -ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, - int rw, struct inode *inode, - struct ll_dio_pages *pv); - -static inline int ll_file_nolock(const struct file *file) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct inode *inode = file_inode(file); - - return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) || - (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK)); -} - -static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, - struct lookup_intent *it, __u64 *bits) -{ - if (!it->it_lock_set) { - struct lustre_handle handle; - - /* If this inode is a remote object, it will get two - * separate locks in different namespaces, Master MDT, - * where the name entry is, will grant LOOKUP lock, - * remote MDT, where the object is, will grant - * UPDATE|PERM lock. The inode will be attached to both - * LOOKUP and PERM locks, so revoking either locks will - * case the dcache being cleared - */ - if (it->it_remote_lock_mode) { - handle.cookie = it->it_remote_lock_handle; - CDEBUG(D_DLMTRACE, "setting l_data to inode " DFID "%p for remote lock %#llx\n", - PFID(ll_inode2fid(inode)), inode, - handle.cookie); - md_set_lock_data(exp, &handle, inode, NULL); - } - - handle.cookie = it->it_lock_handle; - - CDEBUG(D_DLMTRACE, - "setting l_data to inode " DFID "%p for lock %#llx\n", - PFID(ll_inode2fid(inode)), inode, handle.cookie); - - md_set_lock_data(exp, &handle, inode, &it->it_lock_bits); - it->it_lock_set = 1; - } - - if (bits) - *bits = it->it_lock_bits; -} - -static inline int d_lustre_invalid(const struct dentry *dentry) -{ - return ll_d2d(dentry)->lld_invalid; -} - -/* - * Mark dentry INVALID, if dentry refcount is zero (this is normally case for - * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later; - * else dput() of the last refcount will unhash this dentry and kill it. - */ -static inline void d_lustre_invalidate(struct dentry *dentry, int nested) -{ - CDEBUG(D_DENTRY, - "invalidate dentry %pd (%p) parent %p inode %p refc %d\n", - dentry, dentry, - dentry->d_parent, d_inode(dentry), d_count(dentry)); - - spin_lock_nested(&dentry->d_lock, - nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL); - ll_d2d(dentry)->lld_invalid = 1; - if (d_count(dentry) == 0) - __d_drop(dentry); - spin_unlock(&dentry->d_lock); -} - -static inline void d_lustre_revalidate(struct dentry *dentry) -{ - spin_lock(&dentry->d_lock); - LASSERT(ll_d2d(dentry)); - ll_d2d(dentry)->lld_invalid = 0; - spin_unlock(&dentry->d_lock); -} - -int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); -int ll_layout_refresh(struct inode *inode, __u32 *gen); -int ll_layout_restore(struct inode *inode, loff_t start, __u64 length); - -int ll_xattr_init(void); -void ll_xattr_fini(void); - -int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, enum cl_req_type crt); - -int ll_getparent(struct file *file, struct getparent __user *arg); - -/* lcommon_cl.c */ -int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, - unsigned int attr_flags); - -extern struct lu_env *cl_inode_fini_env; -extern u16 cl_inode_fini_refcheck; - -int cl_file_inode_init(struct inode *inode, struct lustre_md *md); -void cl_inode_fini(struct inode *inode); - -__u64 cl_fid_build_ino(const struct lu_fid *fid, int api32); -__u32 cl_fid_build_gen(const struct lu_fid *fid); - -#endif /* LLITE_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c deleted file mode 100644 index 36066c839160..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ /dev/null @@ -1,2668 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/llite_lib.c - * - * Lustre Light Super operations - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "llite_internal.h" - -struct kmem_cache *ll_file_data_slab; -struct dentry *llite_root; -struct kset *llite_kset; - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - -static struct ll_sb_info *ll_init_sbi(struct super_block *sb) -{ - struct ll_sb_info *sbi = NULL; - unsigned long pages; - unsigned long lru_page_max; - struct sysinfo si; - class_uuid_t uuid; - int i; - - sbi = kzalloc(sizeof(*sbi), GFP_NOFS); - if (!sbi) - return NULL; - - spin_lock_init(&sbi->ll_lock); - mutex_init(&sbi->ll_lco.lco_lock); - spin_lock_init(&sbi->ll_pp_extent_lock); - spin_lock_init(&sbi->ll_process_lock); - sbi->ll_rw_stats_on = 0; - - si_meminfo(&si); - pages = si.totalram - si.totalhigh; - lru_page_max = pages / 2; - - sbi->ll_cache = cl_cache_init(lru_page_max); - if (!sbi->ll_cache) { - kfree(sbi); - return NULL; - } - - sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, - SBI_DEFAULT_READAHEAD_MAX); - sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; - sbi->ll_ra_info.ra_max_read_ahead_whole_pages = - SBI_DEFAULT_READAHEAD_WHOLE_MAX; - - ll_generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid); - - sbi->ll_flags |= LL_SBI_VERBOSE; - sbi->ll_flags |= LL_SBI_CHECKSUM; - - sbi->ll_flags |= LL_SBI_LRU_RESIZE; - sbi->ll_flags |= LL_SBI_LAZYSTATFS; - - for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. - pp_r_hist.oh_lock); - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. - pp_w_hist.oh_lock); - } - - /* metadata statahead is enabled by default */ - sbi->ll_sa_max = LL_SA_RPC_DEF; - atomic_set(&sbi->ll_sa_total, 0); - atomic_set(&sbi->ll_sa_wrong, 0); - atomic_set(&sbi->ll_sa_running, 0); - atomic_set(&sbi->ll_agl_total, 0); - sbi->ll_flags |= LL_SBI_AGL_ENABLED; - - /* root squash */ - sbi->ll_squash.rsi_uid = 0; - sbi->ll_squash.rsi_gid = 0; - INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids); - init_rwsem(&sbi->ll_squash.rsi_sem); - - sbi->ll_sb = sb; - - return sbi; -} - -static void ll_free_sbi(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - - if (sbi->ll_cache) { - if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids)) - cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids); - cl_cache_decref(sbi->ll_cache); - sbi->ll_cache = NULL; - } - - kfree(sbi); -} - -static int client_common_fill_super(struct super_block *sb, char *md, char *dt) -{ - struct inode *root = NULL; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_statfs *osfs = NULL; - struct ptlrpc_request *request = NULL; - struct obd_connect_data *data = NULL; - struct obd_uuid *uuid; - struct md_op_data *op_data; - struct lustre_md lmd; - u64 valid; - int size, err, checksum; - - obd = class_name2obd(md); - if (!obd) { - CERROR("MD %s: not setup or attached\n", md); - return -EINVAL; - } - - data = kzalloc(sizeof(*data), GFP_NOFS); - if (!data) - return -ENOMEM; - - osfs = kzalloc(sizeof(*osfs), GFP_NOFS); - if (!osfs) { - kfree(data); - return -ENOMEM; - } - - /* indicate the features supported by this client */ - data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH | - OBD_CONNECT_ATTRFID | - OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE | - OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | - OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 | - OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | - OBD_CONNECT_64BITHASH | - OBD_CONNECT_EINPROGRESS | - OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | - OBD_CONNECT_PINGLESS | - OBD_CONNECT_MAX_EASIZE | - OBD_CONNECT_FLOCK_DEAD | - OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | - OBD_CONNECT_OPEN_BY_FID | - OBD_CONNECT_DIR_STRIPE | - OBD_CONNECT_BULK_MBITS; - - if (sbi->ll_flags & LL_SBI_LRU_RESIZE) - data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; -#ifdef CONFIG_FS_POSIX_ACL - data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_UMASK | - OBD_CONNECT_LARGE_ACL; -#endif - - if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT)) - /* flag mdc connection as lightweight, only used for test - * purpose, use with care - */ - data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT; - - data->ocd_ibits_known = MDS_INODELOCK_FULL; - data->ocd_version = LUSTRE_VERSION_CODE; - - if (sb_rdonly(sb)) - data->ocd_connect_flags |= OBD_CONNECT_RDONLY; - if (sbi->ll_flags & LL_SBI_USER_XATTR) - data->ocd_connect_flags |= OBD_CONNECT_XATTR; - - if (sbi->ll_flags & LL_SBI_FLOCK) - sbi->ll_fop = &ll_file_operations_flock; - else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) - sbi->ll_fop = &ll_file_operations; - else - sbi->ll_fop = &ll_file_operations_noflock; - - /* always ping even if server suppress_pings */ - if (sbi->ll_flags & LL_SBI_ALWAYS_PING) - data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; - - data->ocd_brw_size = MD_MAX_BRW_SIZE; - - err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, - data, NULL); - if (err == -EBUSY) { - LCONSOLE_ERROR_MSG(0x14f, - "An MDT (md %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n", - md); - goto out; - } - - if (err) { - CERROR("cannot connect to %s: rc = %d\n", md, err); - goto out; - } - - sbi->ll_md_exp->exp_connect_data = *data; - - err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp, - LUSTRE_SEQ_METADATA); - if (err) { - CERROR("%s: Can't init metadata layer FID infrastructure, rc = %d\n", - sbi->ll_md_exp->exp_obd->obd_name, err); - goto out_md; - } - - /* For mount, we only need fs info from MDT0, and also in DNE, it - * can make sure the client can be mounted as long as MDT0 is - * available - */ - err = obd_statfs(NULL, sbi->ll_md_exp, osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_FOR_MDT0); - if (err) - goto out_md_fid; - - /* This needs to be after statfs to ensure connect has finished. - * Note that "data" does NOT contain the valid connect reply. - * If connecting to a 1.8 server there will be no LMV device, so - * we can access the MDC export directly and exp_connect_flags will - * be non-zero, but if accessing an upgraded 2.1 server it will - * have the correct flags filled in. - * XXX: fill in the LMV exp_connect_flags from MDC(s). - */ - valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD; - if (exp_connect_flags(sbi->ll_md_exp) != 0 && - valid != CLIENT_CONNECT_MDT_REQD) { - char *buf; - - buf = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto out_md_fid; - } - obd_connect_flags2str(buf, PAGE_SIZE, - valid ^ CLIENT_CONNECT_MDT_REQD, ","); - LCONSOLE_ERROR_MSG(0x170, - "Server %s does not support feature(s) needed for correct operation of this client (%s). Please upgrade server or downgrade client.\n", - sbi->ll_md_exp->exp_obd->obd_name, buf); - kfree(buf); - err = -EPROTO; - goto out_md_fid; - } - - size = sizeof(*data); - err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA), - KEY_CONN_DATA, &size, data); - if (err) { - CERROR("%s: Get connect data failed: rc = %d\n", - sbi->ll_md_exp->exp_obd->obd_name, err); - goto out_md_fid; - } - - LASSERT(osfs->os_bsize); - sb->s_blocksize = osfs->os_bsize; - sb->s_blocksize_bits = log2(osfs->os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = MAX_LFS_FILESIZE; - sbi->ll_namelen = osfs->os_namelen; - sbi->ll_mnt.mnt = current->fs->root.mnt; - - if ((sbi->ll_flags & LL_SBI_USER_XATTR) && - !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) { - LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); - sbi->ll_flags &= ~LL_SBI_USER_XATTR; - } - - if (data->ocd_connect_flags & OBD_CONNECT_ACL) { - sb->s_flags |= SB_POSIXACL; - sbi->ll_flags |= LL_SBI_ACL; - } else { - LCONSOLE_INFO("client wants to enable acl, but mdt not!\n"); - sb->s_flags &= ~SB_POSIXACL; - sbi->ll_flags &= ~LL_SBI_ACL; - } - - if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH) - sbi->ll_flags |= LL_SBI_64BIT_HASH; - - if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) - sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT; - else - sbi->ll_md_brw_pages = 1; - - if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) - sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; - - if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) { - if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) { - LCONSOLE_INFO( - "%s: disabling xattr cache due to unknown maximum xattr size.\n", - dt); - } else { - sbi->ll_flags |= LL_SBI_XATTR_CACHE; - sbi->ll_xattr_cache_enabled = 1; - } - } - - obd = class_name2obd(dt); - if (!obd) { - CERROR("DT %s: not setup or attached\n", dt); - err = -ENODEV; - goto out_md_fid; - } - - data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | - OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | - OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | - OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| - OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA | - OBD_CONNECT_VBR | OBD_CONNECT_FULL20 | - OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | - OBD_CONNECT_EINPROGRESS | - OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | - OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | - OBD_CONNECT_BULK_MBITS; - - if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { - /* OBD_CONNECT_CKSUM should always be set, even if checksums are - * disabled by default, because it can still be enabled on the - * fly via /sys. As a consequence, we still need to come to an - * agreement on the supported algorithms at connect time - */ - data->ocd_connect_flags |= OBD_CONNECT_CKSUM; - - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY)) - data->ocd_cksum_types = OBD_CKSUM_ADLER; - else - data->ocd_cksum_types = cksum_types_supported_client(); - } - - data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; - - /* always ping even if server suppress_pings */ - if (sbi->ll_flags & LL_SBI_ALWAYS_PING) - data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS; - - CDEBUG(D_RPCTRACE, - "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d\n", - data->ocd_connect_flags, - data->ocd_version, data->ocd_grant); - - obd->obd_upcall.onu_owner = &sbi->ll_lco; - obd->obd_upcall.onu_upcall = cl_ocd_update; - - data->ocd_brw_size = DT_MAX_BRW_SIZE; - - err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data, - NULL); - if (err == -EBUSY) { - LCONSOLE_ERROR_MSG(0x150, - "An OST (dt %s) is performing recovery, of which this client is not a part. Please wait for recovery to complete, abort, or time out.\n", - dt); - goto out_md_fid; - } else if (err) { - CERROR("%s: Cannot connect to %s: rc = %d\n", - sbi->ll_dt_exp->exp_obd->obd_name, dt, err); - goto out_md_fid; - } - - sbi->ll_dt_exp->exp_connect_data = *data; - - err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp, - LUSTRE_SEQ_METADATA); - if (err) { - CERROR("%s: Can't init data layer FID infrastructure, rc = %d\n", - sbi->ll_dt_exp->exp_obd->obd_name, err); - goto out_dt; - } - - mutex_lock(&sbi->ll_lco.lco_lock); - sbi->ll_lco.lco_flags = data->ocd_connect_flags; - sbi->ll_lco.lco_md_exp = sbi->ll_md_exp; - sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp; - mutex_unlock(&sbi->ll_lco.lco_lock); - - fid_zero(&sbi->ll_root_fid); - err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - goto out_lock_cn_cb; - } - if (!fid_is_sane(&sbi->ll_root_fid)) { - CERROR("%s: Invalid root fid " DFID " during mount\n", - sbi->ll_md_exp->exp_obd->obd_name, - PFID(&sbi->ll_root_fid)); - err = -EINVAL; - goto out_lock_cn_cb; - } - CDEBUG(D_SUPER, "rootfid " DFID "\n", PFID(&sbi->ll_root_fid)); - - sb->s_op = &lustre_super_operations; - sb->s_xattr = ll_xattr_handlers; -#if THREAD_SIZE >= 8192 /*b=17630*/ - sb->s_export_op = &lustre_export_operations; -#endif - - /* make root inode - * XXX: move this to after cbd setup? - */ - valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE; - if (sbi->ll_flags & LL_SBI_ACL) - valid |= OBD_MD_FLACL; - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) { - err = -ENOMEM; - goto out_lock_cn_cb; - } - - op_data->op_fid1 = sbi->ll_root_fid; - op_data->op_mode = 0; - op_data->op_valid = valid; - - err = md_getattr(sbi->ll_md_exp, op_data, &request); - kfree(op_data); - if (err) { - CERROR("%s: md_getattr failed for root: rc = %d\n", - sbi->ll_md_exp->exp_obd->obd_name, err); - goto out_lock_cn_cb; - } - - err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp, - sbi->ll_md_exp, &lmd); - if (err) { - CERROR("failed to understand root inode md: rc = %d\n", err); - ptlrpc_req_finished(request); - goto out_lock_cn_cb; - } - - LASSERT(fid_is_sane(&sbi->ll_root_fid)); - root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, - sbi->ll_flags & LL_SBI_32BIT_API), - &lmd); - md_free_lustre_md(sbi->ll_md_exp, &lmd); - ptlrpc_req_finished(request); - - if (IS_ERR(root)) { -#ifdef CONFIG_FS_POSIX_ACL - if (lmd.posix_acl) { - posix_acl_release(lmd.posix_acl); - lmd.posix_acl = NULL; - } -#endif - err = -EBADF; - CERROR("lustre_lite: bad iget4 for root\n"); - goto out_root; - } - - checksum = sbi->ll_flags & LL_SBI_CHECKSUM; - err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), - KEY_CHECKSUM, sizeof(checksum), &checksum, - NULL); - if (err) { - CERROR("%s: Set checksum failed: rc = %d\n", - sbi->ll_dt_exp->exp_obd->obd_name, err); - goto out_root; - } - cl_sb_init(sb); - - err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET), - KEY_CACHE_SET, sizeof(*sbi->ll_cache), - sbi->ll_cache, NULL); - if (err) { - CERROR("%s: Set cache_set failed: rc = %d\n", - sbi->ll_dt_exp->exp_obd->obd_name, err); - goto out_root; - } - - sb->s_root = d_make_root(root); - if (!sb->s_root) { - CERROR("%s: can't make root dentry\n", - ll_get_fsname(sb, NULL, 0)); - err = -ENOMEM; - goto out_lock_cn_cb; - } - - sbi->ll_sdev_orig = sb->s_dev; - - /* We set sb->s_dev equal on all lustre clients in order to support - * NFS export clustering. NFSD requires that the FSID be the same - * on all clients. - */ - /* s_dev is also used in lt_compare() to compare two fs, but that is - * only a node-local comparison. - */ - uuid = obd_get_uuid(sbi->ll_md_exp); - if (uuid) { - sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid)); - get_uuid2fsid(uuid->uuid, strlen(uuid->uuid), &sbi->ll_fsid); - } - - kfree(data); - kfree(osfs); - - if (llite_root) { - err = ldebugfs_register_mountpoint(llite_root, sb, dt, md); - if (err < 0) { - CERROR("%s: could not register mount in debugfs: " - "rc = %d\n", ll_get_fsname(sb, NULL, 0), err); - err = 0; - } - } - - return err; -out_root: - iput(root); -out_lock_cn_cb: - obd_fid_fini(sbi->ll_dt_exp->exp_obd); -out_dt: - obd_disconnect(sbi->ll_dt_exp); - sbi->ll_dt_exp = NULL; -out_md_fid: - obd_fid_fini(sbi->ll_md_exp->exp_obd); -out_md: - obd_disconnect(sbi->ll_md_exp); - sbi->ll_md_exp = NULL; -out: - kfree(data); - kfree(osfs); - return err; -} - -int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) -{ - int size, rc; - - size = sizeof(*lmmsize); - rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize); - if (rc) { - CERROR("%s: cannot get max LOV EA size: rc = %d\n", - sbi->ll_dt_exp->exp_obd->obd_name, rc); - return rc; - } - - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize); - if (rc) - CERROR("Get max mdsize error rc %d\n", rc); - - return rc; -} - -/** - * Get the value of the default_easize parameter. - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] sbi superblock info for this filesystem - * \param[out] lmmsize pointer to storage location for value - * - * \retval 0 on success - * \retval negative negated errno on failure - */ -int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize) -{ - int size, rc; - - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE), - KEY_DEFAULT_EASIZE, &size, lmmsize); - if (rc) - CERROR("Get default mdsize error rc %d\n", rc); - - return rc; -} - -/** - * Set the default_easize parameter to the given value. - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] sbi superblock info for this filesystem - * \param[in] lmmsize the size to set - * - * \retval 0 on success - * \retval negative negated errno on failure - */ -int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize) -{ - if (lmmsize < sizeof(struct lov_mds_md) || - lmmsize > OBD_MAX_DEFAULT_EA_SIZE) - return -EINVAL; - - return obd_set_info_async(NULL, sbi->ll_md_exp, - sizeof(KEY_DEFAULT_EASIZE), - KEY_DEFAULT_EASIZE, - sizeof(int), &lmmsize, NULL); -} - -static void client_common_put_super(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - - cl_sb_fini(sb); - - obd_fid_fini(sbi->ll_dt_exp->exp_obd); - obd_disconnect(sbi->ll_dt_exp); - sbi->ll_dt_exp = NULL; - - ldebugfs_unregister_mountpoint(sbi); - - obd_fid_fini(sbi->ll_md_exp->exp_obd); - obd_disconnect(sbi->ll_md_exp); - sbi->ll_md_exp = NULL; -} - -void ll_kill_super(struct super_block *sb) -{ - struct ll_sb_info *sbi; - - /* not init sb ?*/ - if (!(sb->s_flags & SB_ACTIVE)) - return; - - sbi = ll_s2sbi(sb); - /* we need to restore s_dev from changed for clustered NFS before - * put_super because new kernels have cached s_dev and change sb->s_dev - * in put_super not affected real removing devices - */ - if (sbi) { - sb->s_dev = sbi->ll_sdev_orig; - sbi->ll_umounting = 1; - - /* wait running statahead threads to quit */ - while (atomic_read(&sbi->ll_sa_running) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3)); - } - } -} - -static inline int ll_set_opt(const char *opt, char *data, int fl) -{ - if (strncmp(opt, data, strlen(opt)) != 0) - return 0; - else - return fl; -} - -/* non-client-specific mount options are parsed in lmd_parse */ -static int ll_options(char *options, int *flags) -{ - int tmp; - char *s1 = options, *s2; - - if (!options) - return 0; - - CDEBUG(D_CONFIG, "Parsing opts %s\n", options); - - while (*s1) { - CDEBUG(D_SUPER, "next opt=%s\n", s1); - tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("noflock", s1, - LL_SBI_FLOCK | LL_SBI_LOCALFLOCK); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("context", s1, 1); - if (tmp) - goto next; - tmp = ll_set_opt("fscontext", s1, 1); - if (tmp) - goto next; - tmp = ll_set_opt("defcontext", s1, 1); - if (tmp) - goto next; - tmp = ll_set_opt("rootcontext", s1, 1); - if (tmp) - goto next; - tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH); - if (tmp) { - *flags &= ~tmp; - goto next; - } - - tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE); - if (tmp) { - *flags |= tmp; - goto next; - } - tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE); - if (tmp) { - *flags &= ~tmp; - goto next; - } - tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING); - if (tmp) { - *flags |= tmp; - goto next; - } - LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", - s1); - return -EINVAL; - -next: - /* Find next opt */ - s2 = strchr(s1, ','); - if (!s2) - break; - s1 = s2 + 1; - } - return 0; -} - -void ll_lli_init(struct ll_inode_info *lli) -{ - lli->lli_inode_magic = LLI_INODE_MAGIC; - lli->lli_flags = 0; - spin_lock_init(&lli->lli_lock); - lli->lli_posix_acl = NULL; - /* Do not set lli_fid, it has been initialized already. */ - fid_zero(&lli->lli_pfid); - lli->lli_mds_read_och = NULL; - lli->lli_mds_write_och = NULL; - lli->lli_mds_exec_och = NULL; - lli->lli_open_fd_read_count = 0; - lli->lli_open_fd_write_count = 0; - lli->lli_open_fd_exec_count = 0; - mutex_init(&lli->lli_och_mutex); - spin_lock_init(&lli->lli_agl_lock); - spin_lock_init(&lli->lli_layout_lock); - ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE); - lli->lli_clob = NULL; - - init_rwsem(&lli->lli_xattrs_list_rwsem); - mutex_init(&lli->lli_xattrs_enq_lock); - - LASSERT(lli->lli_vfs_inode.i_mode != 0); - if (S_ISDIR(lli->lli_vfs_inode.i_mode)) { - mutex_init(&lli->lli_readdir_mutex); - lli->lli_opendir_key = NULL; - lli->lli_sai = NULL; - spin_lock_init(&lli->lli_sa_lock); - lli->lli_opendir_pid = 0; - lli->lli_sa_enabled = 0; - lli->lli_def_stripe_offset = -1; - } else { - mutex_init(&lli->lli_size_mutex); - lli->lli_symlink_name = NULL; - init_rwsem(&lli->lli_trunc_sem); - range_lock_tree_init(&lli->lli_write_tree); - init_rwsem(&lli->lli_glimpse_sem); - lli->lli_glimpse_time = 0; - INIT_LIST_HEAD(&lli->lli_agl_list); - lli->lli_agl_index = 0; - lli->lli_async_rc = 0; - } - mutex_init(&lli->lli_layout_mutex); -} - -int ll_fill_super(struct super_block *sb) -{ - struct lustre_profile *lprof = NULL; - struct lustre_sb_info *lsi = s2lsi(sb); - struct ll_sb_info *sbi; - char *dt = NULL, *md = NULL; - char *profilenm = get_profile_name(sb); - struct config_llog_instance *cfg; - int err; - static atomic_t ll_bdi_num = ATOMIC_INIT(0); - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); - - err = ptlrpc_inc_ref(); - if (err) - return err; - - cfg = kzalloc(sizeof(*cfg), GFP_NOFS); - if (!cfg) { - err = -ENOMEM; - goto out_put; - } - - try_module_get(THIS_MODULE); - - /* client additional sb info */ - sbi = ll_init_sbi(sb); - lsi->lsi_llsbi = sbi; - if (!sbi) { - module_put(THIS_MODULE); - kfree(cfg); - err = -ENOMEM; - goto out_put; - } - - err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags); - if (err) - goto out_free; - - err = super_setup_bdi_name(sb, "lustre-%d", - atomic_inc_return(&ll_bdi_num)); - if (err) - goto out_free; - - /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */ - sb->s_d_op = &ll_d_ops; - - /* Generate a string unique to this super, in case some joker tries - * to mount the same fs at two mount points. - * Use the address of the super itself. - */ - cfg->cfg_instance = sb; - cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid; - cfg->cfg_callback = class_config_llog_handler; - /* set up client obds */ - err = lustre_process_log(sb, profilenm, cfg); - if (err < 0) - goto out_free; - - /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */ - lprof = class_get_profile(profilenm); - if (!lprof) { - LCONSOLE_ERROR_MSG(0x156, - "The client profile '%s' could not be read from the MGS. Does that filesystem exist?\n", - profilenm); - err = -EINVAL; - goto out_free; - } - CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm, - lprof->lp_md, lprof->lp_dt); - - dt = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_dt, cfg->cfg_instance); - if (!dt) { - err = -ENOMEM; - goto out_free; - } - - md = kasprintf(GFP_NOFS, "%s-%p", lprof->lp_md, cfg->cfg_instance); - if (!md) { - err = -ENOMEM; - goto out_free; - } - - /* connections, registrations, sb setup */ - err = client_common_fill_super(sb, md, dt); - if (!err) - sbi->ll_client_common_fill_super_succeeded = 1; - -out_free: - kfree(md); - kfree(dt); - if (lprof) - class_put_profile(lprof); - if (err) - ll_put_super(sb); - else if (sbi->ll_flags & LL_SBI_VERBOSE) - LCONSOLE_WARN("Mounted %s\n", profilenm); - - kfree(cfg); -out_put: - if (err) - ptlrpc_dec_ref(); - return err; -} /* ll_fill_super */ - -void ll_put_super(struct super_block *sb) -{ - struct config_llog_instance cfg, params_cfg; - struct obd_device *obd; - struct lustre_sb_info *lsi = s2lsi(sb); - struct ll_sb_info *sbi = ll_s2sbi(sb); - char *profilenm = get_profile_name(sb); - int next, force = 1, rc = 0; - long ccc_count; - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); - - cfg.cfg_instance = sb; - lustre_end_log(sb, profilenm, &cfg); - - params_cfg.cfg_instance = sb; - lustre_end_log(sb, PARAMS_FILENAME, ¶ms_cfg); - - if (sbi->ll_md_exp) { - obd = class_exp2obd(sbi->ll_md_exp); - if (obd) - force = obd->obd_force; - } - - /* Wait for unstable pages to be committed to stable storage */ - if (!force) - rc = l_wait_event_abortable(sbi->ll_cache->ccc_unstable_waitq, - !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr)); - - ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr); - if (!force && rc != -ERESTARTSYS) - LASSERTF(!ccc_count, "count: %li\n", ccc_count); - - /* We need to set force before the lov_disconnect in - * lustre_common_put_super, since l_d cleans up osc's as well. - */ - if (force) { - next = 0; - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, - &next)) != NULL) { - obd->obd_force = force; - } - } - - if (sbi->ll_client_common_fill_super_succeeded) { - /* Only if client_common_fill_super succeeded */ - client_common_put_super(sb); - } - - next = 0; - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))) - class_manual_cleanup(obd); - - if (sbi->ll_flags & LL_SBI_VERBOSE) - LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : ""); - - if (profilenm) - class_del_profile(profilenm); - - ll_free_sbi(sb); - lsi->lsi_llsbi = NULL; - - lustre_common_put_super(sb); - - cl_env_cache_purge(~0); - - module_put(THIS_MODULE); - - ptlrpc_dec_ref(); -} /* client_put_super */ - -struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock) -{ - struct inode *inode = NULL; - - /* NOTE: we depend on atomic igrab() -bzzz */ - lock_res_and_lock(lock); - if (lock->l_resource->lr_lvb_inode) { - struct ll_inode_info *lli; - - lli = ll_i2info(lock->l_resource->lr_lvb_inode); - if (lli->lli_inode_magic == LLI_INODE_MAGIC) { - inode = igrab(lock->l_resource->lr_lvb_inode); - } else { - inode = lock->l_resource->lr_lvb_inode; - LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ? D_INFO : - D_WARNING, lock, - "lr_lvb_inode %p is bogus: magic %08x", - lock->l_resource->lr_lvb_inode, - lli->lli_inode_magic); - inode = NULL; - } - } - unlock_res_and_lock(lock); - return inode; -} - -void ll_dir_clear_lsm_md(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - LASSERT(S_ISDIR(inode->i_mode)); - - if (lli->lli_lsm_md) { - lmv_free_memmd(lli->lli_lsm_md); - lli->lli_lsm_md = NULL; - } -} - -static struct inode *ll_iget_anon_dir(struct super_block *sb, - const struct lu_fid *fid, - struct lustre_md *md) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct mdt_body *body = md->body; - struct inode *inode; - ino_t ino; - - ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API); - inode = iget_locked(sb, ino); - if (!inode) { - CERROR("%s: failed get simple inode " DFID ": rc = -ENOENT\n", - ll_get_fsname(sb, NULL, 0), PFID(fid)); - return ERR_PTR(-ENOENT); - } - - if (inode->i_state & I_NEW) { - struct ll_inode_info *lli = ll_i2info(inode); - struct lmv_stripe_md *lsm = md->lmv; - - inode->i_mode = (inode->i_mode & ~S_IFMT) | - (body->mbo_mode & S_IFMT); - LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode " DFID "\n", - PFID(fid)); - - LTIME_S(inode->i_mtime) = 0; - LTIME_S(inode->i_atime) = 0; - LTIME_S(inode->i_ctime) = 0; - inode->i_rdev = 0; - - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - lli->lli_fid = *fid; - ll_lli_init(lli); - - LASSERT(lsm); - /* master object FID */ - lli->lli_pfid = body->mbo_fid1; - CDEBUG(D_INODE, "lli %p slave " DFID " master " DFID "\n", - lli, PFID(fid), PFID(&lli->lli_pfid)); - unlock_new_inode(inode); - } - - return inode; -} - -static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md) -{ - struct lmv_stripe_md *lsm = md->lmv; - struct lu_fid *fid; - int i; - - LASSERT(lsm); - /* - * XXX sigh, this lsm_root initialization should be in - * LMV layer, but it needs ll_iget right now, so we - * put this here right now. - */ - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - fid = &lsm->lsm_md_oinfo[i].lmo_fid; - LASSERT(!lsm->lsm_md_oinfo[i].lmo_root); - /* Unfortunately ll_iget will call ll_update_inode, - * where the initialization of slave inode is slightly - * different, so it reset lsm_md to NULL to avoid - * initializing lsm for slave inode. - */ - /* For migrating inode, master stripe and master object will - * be same, so we only need assign this inode - */ - if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i) - lsm->lsm_md_oinfo[i].lmo_root = inode; - else - lsm->lsm_md_oinfo[i].lmo_root = - ll_iget_anon_dir(inode->i_sb, fid, md); - if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) { - int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root); - - lsm->lsm_md_oinfo[i].lmo_root = NULL; - return rc; - } - } - - return 0; -} - -static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1, - const struct lmv_stripe_md *lsm_md2) -{ - return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic && - lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count && - lsm_md1->lsm_md_master_mdt_index == - lsm_md2->lsm_md_master_mdt_index && - lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type && - lsm_md1->lsm_md_layout_version == - lsm_md2->lsm_md_layout_version && - !strcmp(lsm_md1->lsm_md_pool_name, - lsm_md2->lsm_md_pool_name); -} - -static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lmv_stripe_md *lsm = md->lmv; - int rc; - - LASSERT(S_ISDIR(inode->i_mode)); - CDEBUG(D_INODE, "update lsm %p of " DFID "\n", lli->lli_lsm_md, - PFID(ll_inode2fid(inode))); - - /* no striped information from request. */ - if (!lsm) { - if (!lli->lli_lsm_md) { - return 0; - } else if (lli->lli_lsm_md->lsm_md_hash_type & - LMV_HASH_FLAG_MIGRATION) { - /* - * migration is done, the temporay MIGRATE layout has - * been removed - */ - CDEBUG(D_INODE, DFID " finish migration.\n", - PFID(ll_inode2fid(inode))); - lmv_free_memmd(lli->lli_lsm_md); - lli->lli_lsm_md = NULL; - return 0; - } - /* - * The lustre_md from req does not include stripeEA, - * see ll_md_setattr - */ - return 0; - } - - /* set the directory layout */ - if (!lli->lli_lsm_md) { - struct cl_attr *attr; - - rc = ll_init_lsm_md(inode, md); - if (rc) - return rc; - - /* - * set lsm_md to NULL, so the following free lustre_md - * will not free this lsm - */ - md->lmv = NULL; - lli->lli_lsm_md = lsm; - - attr = kzalloc(sizeof(*attr), GFP_NOFS); - if (!attr) - return -ENOMEM; - - /* validate the lsm */ - rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr, - ll_md_blocking_ast); - if (rc) { - kfree(attr); - return rc; - } - - if (md->body->mbo_valid & OBD_MD_FLNLINK) - md->body->mbo_nlink = attr->cat_nlink; - if (md->body->mbo_valid & OBD_MD_FLSIZE) - md->body->mbo_size = attr->cat_size; - if (md->body->mbo_valid & OBD_MD_FLATIME) - md->body->mbo_atime = attr->cat_atime; - if (md->body->mbo_valid & OBD_MD_FLCTIME) - md->body->mbo_ctime = attr->cat_ctime; - if (md->body->mbo_valid & OBD_MD_FLMTIME) - md->body->mbo_mtime = attr->cat_mtime; - - kfree(attr); - - CDEBUG(D_INODE, "Set lsm %p magic %x to " DFID "\n", lsm, - lsm->lsm_md_magic, PFID(ll_inode2fid(inode))); - return 0; - } - - /* Compare the old and new stripe information */ - if (!lsm_md_eq(lli->lli_lsm_md, lsm)) { - struct lmv_stripe_md *old_lsm = lli->lli_lsm_md; - int idx; - - CERROR("%s: inode " DFID "(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n", - ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), - inode, lsm, old_lsm, - lsm->lsm_md_magic, old_lsm->lsm_md_magic, - lsm->lsm_md_stripe_count, - old_lsm->lsm_md_stripe_count, - lsm->lsm_md_master_mdt_index, - old_lsm->lsm_md_master_mdt_index, - lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type, - lsm->lsm_md_layout_version, - old_lsm->lsm_md_layout_version, - lsm->lsm_md_pool_name, - old_lsm->lsm_md_pool_name); - - for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) { - CERROR("%s: sub FIDs in old lsm idx %d, old: " DFID "\n", - ll_get_fsname(inode->i_sb, NULL, 0), idx, - PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid)); - } - - for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) { - CERROR("%s: sub FIDs in new lsm idx %d, new: " DFID "\n", - ll_get_fsname(inode->i_sb, NULL, 0), idx, - PFID(&lsm->lsm_md_oinfo[idx].lmo_fid)); - } - - return -EIO; - } - - return 0; -} - -void ll_clear_inode(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - - if (S_ISDIR(inode->i_mode)) { - /* these should have been cleared in ll_file_release */ - LASSERT(!lli->lli_opendir_key); - LASSERT(!lli->lli_sai); - LASSERT(lli->lli_opendir_pid == 0); - } - - md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode)); - - LASSERT(!lli->lli_open_fd_write_count); - LASSERT(!lli->lli_open_fd_read_count); - LASSERT(!lli->lli_open_fd_exec_count); - - if (lli->lli_mds_write_och) - ll_md_real_close(inode, FMODE_WRITE); - if (lli->lli_mds_exec_och) - ll_md_real_close(inode, FMODE_EXEC); - if (lli->lli_mds_read_och) - ll_md_real_close(inode, FMODE_READ); - - if (S_ISLNK(inode->i_mode)) { - kfree(lli->lli_symlink_name); - lli->lli_symlink_name = NULL; - } - - ll_xattr_cache_destroy(inode); - -#ifdef CONFIG_FS_POSIX_ACL - forget_all_cached_acls(inode); - if (lli->lli_posix_acl) { - posix_acl_release(lli->lli_posix_acl); - lli->lli_posix_acl = NULL; - } -#endif - lli->lli_inode_magic = LLI_INODE_DEAD; - - if (S_ISDIR(inode->i_mode)) - ll_dir_clear_lsm_md(inode); - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) - LASSERT(list_empty(&lli->lli_agl_list)); - - /* - * XXX This has to be done before lsm is freed below, because - * cl_object still uses inode lsm. - */ - cl_inode_fini(inode); -} - -#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) - -static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) -{ - struct lustre_md md; - struct inode *inode = d_inode(dentry); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *request = NULL; - int rc, ia_valid; - - op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request); - if (rc) { - ptlrpc_req_finished(request); - if (rc == -ENOENT) { - clear_nlink(inode); - /* Unlinked special device node? Or just a race? - * Pretend we did everything. - */ - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode)) { - ia_valid = op_data->op_attr.ia_valid; - op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS; - rc = simple_setattr(dentry, &op_data->op_attr); - op_data->op_attr.ia_valid = ia_valid; - } - } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) { - CERROR("md_setattr fails: rc = %d\n", rc); - } - return rc; - } - - rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp, - sbi->ll_md_exp, &md); - if (rc) { - ptlrpc_req_finished(request); - return rc; - } - - ia_valid = op_data->op_attr.ia_valid; - /* inode size will be in cl_setattr_ost, can't do it now since dirty - * cache is not cleared yet. - */ - op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE); - if (S_ISREG(inode->i_mode)) - inode_lock(inode); - rc = simple_setattr(dentry, &op_data->op_attr); - if (S_ISREG(inode->i_mode)) - inode_unlock(inode); - op_data->op_attr.ia_valid = ia_valid; - - rc = ll_update_inode(inode, &md); - ptlrpc_req_finished(request); - - return rc; -} - -/* If this inode has objects allocated to it (lsm != NULL), then the OST - * object(s) determine the file size and mtime. Otherwise, the MDS will - * keep these values until such a time that objects are allocated for it. - * We do the MDS operations first, as it is checking permissions for us. - * We don't to the MDS RPC if there is nothing that we want to store there, - * otherwise there is no harm in updating mtime/atime on the MDS if we are - * going to do an RPC anyways. - * - * If we are doing a truncate, we will send the mtime and ctime updates - * to the OST with the punch RPC, otherwise we do an explicit setattr RPC. - * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE - * at the same time. - * - * In case of HSMimport, we only set attr on MDS. - */ -int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import) -{ - struct inode *inode = d_inode(dentry); - struct ll_inode_info *lli = ll_i2info(inode); - struct md_op_data *op_data = NULL; - int rc = 0; - - CDEBUG(D_VFSTRACE, "%s: setattr inode " DFID "(%p) from %llu to %llu, valid %x, hsm_import %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), inode, - i_size_read(inode), attr->ia_size, attr->ia_valid, hsm_import); - - if (attr->ia_valid & ATTR_SIZE) { - /* Check new size against VFS/VM file size limit and rlimit */ - rc = inode_newsize_ok(inode, attr->ia_size); - if (rc) - return rc; - - /* The maximum Lustre file size is variable, based on the - * OST maximum object size and number of stripes. This - * needs another check in addition to the VFS check above. - */ - if (attr->ia_size > ll_file_maxbytes(inode)) { - CDEBUG(D_INODE, "file " DFID " too large %llu > %llu\n", - PFID(&lli->lli_fid), attr->ia_size, - ll_file_maxbytes(inode)); - return -EFBIG; - } - - attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; - } - - /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */ - if (attr->ia_valid & TIMES_SET_FLAGS) { - if ((!uid_eq(current_fsuid(), inode->i_uid)) && - !capable(CAP_FOWNER)) - return -EPERM; - } - - /* We mark all of the fields "set" so MDS/OST does not re-set them */ - if (attr->ia_valid & ATTR_CTIME) { - attr->ia_ctime = current_time(inode); - attr->ia_valid |= ATTR_CTIME_SET; - } - if (!(attr->ia_valid & ATTR_ATIME_SET) && - (attr->ia_valid & ATTR_ATIME)) { - attr->ia_atime = current_time(inode); - attr->ia_valid |= ATTR_ATIME_SET; - } - if (!(attr->ia_valid & ATTR_MTIME_SET) && - (attr->ia_valid & ATTR_MTIME)) { - attr->ia_mtime = current_time(inode); - attr->ia_valid |= ATTR_MTIME_SET; - } - - if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) - CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %llu\n", - LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), - (s64)ktime_get_real_seconds()); - - if (S_ISREG(inode->i_mode)) - inode_unlock(inode); - - /* - * We always do an MDS RPC, even if we're only changing the size; - * only the MDS knows whether truncate() should fail with -ETXTBUSY - */ - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) { - rc = -ENOMEM; - goto out; - } - - if (!hsm_import && attr->ia_valid & ATTR_SIZE) { - /* - * If we are changing file size, file content is - * modified, flag it. - */ - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - op_data->op_bias |= MDS_DATA_MODIFIED; - clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); - } - - op_data->op_attr = *attr; - - rc = ll_md_setattr(dentry, op_data); - if (rc) - goto out; - - if (!S_ISREG(inode->i_mode) || hsm_import) { - rc = 0; - goto out; - } - - if (attr->ia_valid & (ATTR_SIZE | - ATTR_ATIME | ATTR_ATIME_SET | - ATTR_MTIME | ATTR_MTIME_SET)) { - /* For truncate and utimes sending attributes to OSTs, setting - * mtime/atime to the past will be performed under PW [0:EOF] - * extent lock (new_size:EOF for truncate). It may seem - * excessive to send mtime/atime updates to OSTs when not - * setting times to past, but it is necessary due to possible - * time de-synchronization between MDT inode and OST objects - */ - rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0); - } - - /* - * If the file was restored, it needs to set dirty flag. - * - * We've already sent MDS_DATA_MODIFIED flag in - * ll_md_setattr() for truncate. However, the MDT refuses to - * set the HS_DIRTY flag on released files, so we have to set - * it again if the file has been restored. Please check how - * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini(). - * - * Please notice that if the file is not released, the previous - * MDS_DATA_MODIFIED has taken effect and usually - * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()). - * This way we can save an RPC for common open + trunc - * operation. - */ - if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) { - struct hsm_state_set hss = { - .hss_valid = HSS_SETMASK, - .hss_setmask = HS_DIRTY, - }; - int rc2; - - rc2 = ll_hsm_state_set(inode, &hss); - /* - * truncate and write can happen at the same time, so that - * the file can be set modified even though the file is not - * restored from released state, and ll_hsm_state_set() is - * not applicable for the file, and rc2 < 0 is normal in this - * case. - */ - if (rc2 < 0) - CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n", - PFID(ll_inode2fid(inode)), rc2); - } - -out: - if (op_data) - ll_finish_md_op_data(op_data); - - if (S_ISREG(inode->i_mode)) { - inode_lock(inode); - if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) - inode_dio_wait(inode); - } - - ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ? - LPROC_LL_TRUNC : LPROC_LL_SETATTR, 1); - - return rc; -} - -int ll_setattr(struct dentry *de, struct iattr *attr) -{ - int mode = d_inode(de)->i_mode; - - if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) == - (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) - attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; - - if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) == - (ATTR_SIZE | ATTR_MODE)) && - (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) || - (((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) && - !(attr->ia_mode & S_ISGID)))) - attr->ia_valid |= ATTR_FORCE; - - if ((attr->ia_valid & ATTR_MODE) && - (mode & S_ISUID) && - !(attr->ia_mode & S_ISUID) && - !(attr->ia_valid & ATTR_KILL_SUID)) - attr->ia_valid |= ATTR_KILL_SUID; - - if ((attr->ia_valid & ATTR_MODE) && - ((mode & (S_ISGID | 0010)) == (S_ISGID | 0010)) && - !(attr->ia_mode & S_ISGID) && - !(attr->ia_valid & ATTR_KILL_SGID)) - attr->ia_valid |= ATTR_KILL_SGID; - - return ll_setattr_raw(de, attr, false); -} - -int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, - __u64 max_age, __u32 flags) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_statfs obd_osfs; - int rc; - - rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags); - if (rc) { - CERROR("md_statfs fails: rc = %d\n", rc); - return rc; - } - - osfs->os_type = sb->s_magic; - - CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n", - osfs->os_bavail, osfs->os_blocks, osfs->os_ffree, - osfs->os_files); - - if (sbi->ll_flags & LL_SBI_LAZYSTATFS) - flags |= OBD_STATFS_NODELAY; - - rc = obd_statfs_rqset(sbi->ll_dt_exp, &obd_osfs, max_age, flags); - if (rc) { - CERROR("obd_statfs fails: rc = %d\n", rc); - return rc; - } - - CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n", - obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree, - obd_osfs.os_files); - - osfs->os_bsize = obd_osfs.os_bsize; - osfs->os_blocks = obd_osfs.os_blocks; - osfs->os_bfree = obd_osfs.os_bfree; - osfs->os_bavail = obd_osfs.os_bavail; - - /* If we don't have as many objects free on the OST as inodes - * on the MDS, we reduce the total number of inodes to - * compensate, so that the "inodes in use" number is correct. - */ - if (obd_osfs.os_ffree < osfs->os_ffree) { - osfs->os_files = (osfs->os_files - osfs->os_ffree) + - obd_osfs.os_ffree; - osfs->os_ffree = obd_osfs.os_ffree; - } - - return rc; -} - -int ll_statfs(struct dentry *de, struct kstatfs *sfs) -{ - struct super_block *sb = de->d_sb; - struct obd_statfs osfs; - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op: at %llu jiffies\n", get_jiffies_64()); - ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STAFS, 1); - - /* Some amount of caching on the client is allowed */ - rc = ll_statfs_internal(sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - 0); - if (rc) - return rc; - - statfs_unpack(sfs, &osfs); - - /* We need to downshift for all 32-bit kernels, because we can't - * tell if the kernel is being called via sys_statfs64() or not. - * Stop before overflowing f_bsize - in which case it is better - * to just risk EOVERFLOW if caller is using old sys_statfs(). - */ - if (sizeof(long) < 8) { - while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) { - sfs->f_bsize <<= 1; - - osfs.os_blocks >>= 1; - osfs.os_bfree >>= 1; - osfs.os_bavail >>= 1; - } - } - - sfs->f_blocks = osfs.os_blocks; - sfs->f_bfree = osfs.os_bfree; - sfs->f_bavail = osfs.os_bavail; - sfs->f_fsid = ll_s2sbi(sb)->ll_fsid; - return 0; -} - -void ll_inode_size_lock(struct inode *inode) -{ - struct ll_inode_info *lli; - - LASSERT(!S_ISDIR(inode->i_mode)); - - lli = ll_i2info(inode); - mutex_lock(&lli->lli_size_mutex); -} - -void ll_inode_size_unlock(struct inode *inode) -{ - struct ll_inode_info *lli; - - lli = ll_i2info(inode); - mutex_unlock(&lli->lli_size_mutex); -} - -int ll_update_inode(struct inode *inode, struct lustre_md *md) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct mdt_body *body = md->body; - struct ll_sb_info *sbi = ll_i2sbi(inode); - - if (body->mbo_valid & OBD_MD_FLEASIZE) - cl_file_inode_init(inode, md); - - if (S_ISDIR(inode->i_mode)) { - int rc; - - rc = ll_update_lsm_md(inode, md); - if (rc) - return rc; - } - -#ifdef CONFIG_FS_POSIX_ACL - if (body->mbo_valid & OBD_MD_FLACL) { - spin_lock(&lli->lli_lock); - if (lli->lli_posix_acl) - posix_acl_release(lli->lli_posix_acl); - lli->lli_posix_acl = md->posix_acl; - spin_unlock(&lli->lli_lock); - } -#endif - inode->i_ino = cl_fid_build_ino(&body->mbo_fid1, - sbi->ll_flags & LL_SBI_32BIT_API); - inode->i_generation = cl_fid_build_gen(&body->mbo_fid1); - - if (body->mbo_valid & OBD_MD_FLATIME) { - if (body->mbo_atime > LTIME_S(inode->i_atime)) - LTIME_S(inode->i_atime) = body->mbo_atime; - lli->lli_atime = body->mbo_atime; - } - if (body->mbo_valid & OBD_MD_FLMTIME) { - if (body->mbo_mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, - "setting ino %lu mtime from %lu to %llu\n", - inode->i_ino, LTIME_S(inode->i_mtime), - body->mbo_mtime); - LTIME_S(inode->i_mtime) = body->mbo_mtime; - } - lli->lli_mtime = body->mbo_mtime; - } - if (body->mbo_valid & OBD_MD_FLCTIME) { - if (body->mbo_ctime > LTIME_S(inode->i_ctime)) - LTIME_S(inode->i_ctime) = body->mbo_ctime; - lli->lli_ctime = body->mbo_ctime; - } - if (body->mbo_valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT) | - (body->mbo_mode & ~S_IFMT); - if (body->mbo_valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT) | - (body->mbo_mode & S_IFMT); - LASSERT(inode->i_mode != 0); - if (S_ISREG(inode->i_mode)) - inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, - LL_MAX_BLKSIZE_BITS); - else - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - if (body->mbo_valid & OBD_MD_FLUID) - inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid); - if (body->mbo_valid & OBD_MD_FLGID) - inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid); - if (body->mbo_valid & OBD_MD_FLFLAGS) - inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); - if (body->mbo_valid & OBD_MD_FLNLINK) - set_nlink(inode, body->mbo_nlink); - if (body->mbo_valid & OBD_MD_FLRDEV) - inode->i_rdev = old_decode_dev(body->mbo_rdev); - - if (body->mbo_valid & OBD_MD_FLID) { - /* FID shouldn't be changed! */ - if (fid_is_sane(&lli->lli_fid)) { - LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1), - "Trying to change FID " DFID " to the " DFID ", inode " DFID "(%p)\n", - PFID(&lli->lli_fid), PFID(&body->mbo_fid1), - PFID(ll_inode2fid(inode)), inode); - } else { - lli->lli_fid = body->mbo_fid1; - } - } - - LASSERT(fid_seq(&lli->lli_fid) != 0); - - if (body->mbo_valid & OBD_MD_FLSIZE) { - i_size_write(inode, body->mbo_size); - - CDEBUG(D_VFSTRACE, "inode=" DFID ", updating i_size %llu\n", - PFID(ll_inode2fid(inode)), - (unsigned long long)body->mbo_size); - - if (body->mbo_valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->mbo_blocks; - } - - if (body->mbo_valid & OBD_MD_TSTATE) { - if (body->mbo_t_state & MS_RESTORE) - set_bit(LLIF_FILE_RESTORING, &lli->lli_flags); - } - - return 0; -} - -int ll_read_inode2(struct inode *inode, void *opaque) -{ - struct lustre_md *md = opaque; - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(&lli->lli_fid), inode); - - /* Core attributes from the MDS first. This is a new inode, and - * the VFS doesn't zero times in the core inode so we have to do - * it ourselves. They will be overwritten by either MDS or OST - * attributes - we just need to make sure they aren't newer. - */ - LTIME_S(inode->i_mtime) = 0; - LTIME_S(inode->i_atime) = 0; - LTIME_S(inode->i_ctime) = 0; - inode->i_rdev = 0; - rc = ll_update_inode(inode, md); - if (rc) - return rc; - - /* OIDEBUG(inode); */ - - if (S_ISREG(inode->i_mode)) { - struct ll_sb_info *sbi = ll_i2sbi(inode); - - inode->i_op = &ll_file_inode_operations; - inode->i_fop = sbi->ll_fop; - inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ll_fast_symlink_inode_operations; - } else { - inode->i_op = &ll_special_inode_operations; - - init_special_inode(inode, inode->i_mode, - inode->i_rdev); - } - - return 0; -} - -void ll_delete_inode(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - if (S_ISREG(inode->i_mode) && lli->lli_clob) - /* discard all dirty pages before truncating them, required by - * osc_extent implementation at LU-1030. - */ - cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, - CL_FSYNC_LOCAL, 1); - - truncate_inode_pages_final(&inode->i_data); - - LASSERTF(!inode->i_data.nrpages, - "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n", - PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); - - ll_clear_inode(inode); - clear_inode(inode); -} - -int ll_iocontrol(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - int rc, flags = 0; - - switch (cmd) { - case FSFILT_IOC_GETFLAGS: { - struct mdt_body *body; - struct md_op_data *op_data; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, - 0, 0, LUSTRE_OPC_ANY, - NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = OBD_MD_FLFLAGS; - rc = md_getattr(sbi->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc) { - CERROR("%s: failure inode " DFID ": rc = %d\n", - sbi->ll_md_exp->exp_obd->obd_name, - PFID(ll_inode2fid(inode)), rc); - return -abs(rc); - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - - flags = body->mbo_flags; - - ptlrpc_req_finished(req); - - return put_user(flags, (int __user *)arg); - } - case FSFILT_IOC_SETFLAGS: { - struct md_op_data *op_data; - struct cl_object *obj; - struct iattr *attr; - - if (get_user(flags, (int __user *)arg)) - return -EFAULT; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_attr_flags = flags; - op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req); - ll_finish_md_op_data(op_data); - ptlrpc_req_finished(req); - if (rc) - return rc; - - inode->i_flags = ll_ext_to_inode_flags(flags); - - obj = ll_i2info(inode)->lli_clob; - if (!obj) - return 0; - - attr = kzalloc(sizeof(*attr), GFP_NOFS); - if (!attr) - return -ENOMEM; - - attr->ia_valid = ATTR_ATTR_FLAG; - rc = cl_setattr_ost(obj, attr, flags); - kfree(attr); - return rc; - } - default: - return -ENOSYS; - } - - return 0; -} - -int ll_flush_ctx(struct inode *inode) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - - CDEBUG(D_SEC, "flush context for user %d\n", - from_kuid(&init_user_ns, current_uid())); - - obd_set_info_async(NULL, sbi->ll_md_exp, - sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, - 0, NULL, NULL); - obd_set_info_async(NULL, sbi->ll_dt_exp, - sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, - 0, NULL, NULL); - return 0; -} - -/* umount -f client means force down, don't save state */ -void ll_umount_begin(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data *ioc_data; - int cnt = 0; - - CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, - sb->s_count, atomic_read(&sb->s_active)); - - obd = class_exp2obd(sbi->ll_md_exp); - if (!obd) { - CERROR("Invalid MDC connection handle %#llx\n", - sbi->ll_md_exp->exp_handle.h_cookie); - return; - } - obd->obd_force = 1; - - obd = class_exp2obd(sbi->ll_dt_exp); - if (!obd) { - CERROR("Invalid LOV connection handle %#llx\n", - sbi->ll_dt_exp->exp_handle.h_cookie); - return; - } - obd->obd_force = 1; - - ioc_data = kzalloc(sizeof(*ioc_data), GFP_NOFS); - if (ioc_data) { - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp, - sizeof(*ioc_data), ioc_data, NULL); - - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, - sizeof(*ioc_data), ioc_data, NULL); - - kfree(ioc_data); - } - - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just periodically checking for vfs - * to decrement mnt_cnt and hope to finish it within 10sec. - */ - while (cnt < 10 && !may_umount(sbi->ll_mnt.mnt)) { - schedule_timeout_uninterruptible(HZ); - cnt++; - } - - schedule(); -} - -int ll_remount_fs(struct super_block *sb, int *flags, char *data) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - char *profilenm = get_profile_name(sb); - int err; - __u32 read_only; - - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { - read_only = *flags & SB_RDONLY; - err = obd_set_info_async(NULL, sbi->ll_md_exp, - sizeof(KEY_READ_ONLY), - KEY_READ_ONLY, sizeof(read_only), - &read_only, NULL); - if (err) { - LCONSOLE_WARN("Failed to remount %s %s (%d)\n", - profilenm, read_only ? - "read-only" : "read-write", err); - return err; - } - - if (read_only) - sb->s_flags |= SB_RDONLY; - else - sb->s_flags &= ~SB_RDONLY; - - if (sbi->ll_flags & LL_SBI_VERBOSE) - LCONSOLE_WARN("Remounted %s %s\n", profilenm, - read_only ? "read-only" : "read-write"); - } - return 0; -} - -/** - * Cleanup the open handle that is cached on MDT-side. - * - * For open case, the client side open handling thread may hit error - * after the MDT grant the open. Under such case, the client should - * send close RPC to the MDT as cleanup; otherwise, the open handle - * on the MDT will be leaked there until the client umount or evicted. - * - * In further, if someone unlinked the file, because the open handle - * holds the reference on such file/object, then it will block the - * subsequent threads that want to locate such object via FID. - * - * \param[in] sb super block for this file-system - * \param[in] open_req pointer to the original open request - */ -void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req) -{ - struct mdt_body *body; - struct md_op_data *op_data; - struct ptlrpc_request *close_req = NULL; - struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp; - - body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY); - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return; - - op_data->op_fid1 = body->mbo_fid1; - op_data->op_handle = body->mbo_handle; - op_data->op_mod_time = get_seconds(); - md_close(exp, op_data, NULL, &close_req); - ptlrpc_req_finished(close_req); - ll_finish_md_op_data(op_data); -} - -int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, - struct super_block *sb, struct lookup_intent *it) -{ - struct ll_sb_info *sbi = NULL; - struct lustre_md md = { NULL }; - int rc; - - LASSERT(*inode || sb); - sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); - rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp, - sbi->ll_md_exp, &md); - if (rc) - goto cleanup; - - if (*inode) { - rc = ll_update_inode(*inode, &md); - if (rc) - goto out; - } else { - LASSERT(sb); - - /* - * At this point server returns to client's same fid as client - * generated for creating. So using ->fid1 is okay here. - */ - if (!fid_is_sane(&md.body->mbo_fid1)) { - CERROR("%s: Fid is insane " DFID "\n", - ll_get_fsname(sb, NULL, 0), - PFID(&md.body->mbo_fid1)); - rc = -EINVAL; - goto out; - } - - *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1, - sbi->ll_flags & LL_SBI_32BIT_API), - &md); - if (IS_ERR(*inode)) { -#ifdef CONFIG_FS_POSIX_ACL - if (md.posix_acl) { - posix_acl_release(md.posix_acl); - md.posix_acl = NULL; - } -#endif - rc = PTR_ERR(*inode); - CERROR("new_inode -fatal: rc %d\n", rc); - goto out; - } - } - - /* Handling piggyback layout lock. - * Layout lock can be piggybacked by getattr and open request. - * The lsm can be applied to inode only if it comes with a layout lock - * otherwise correct layout may be overwritten, for example: - * 1. proc1: mdt returns a lsm but not granting layout - * 2. layout was changed by another client - * 3. proc2: refresh layout and layout lock granted - * 4. proc1: to apply a stale layout - */ - if (it && it->it_lock_mode != 0) { - struct lustre_handle lockh; - struct ldlm_lock *lock; - - lockh.cookie = it->it_lock_handle; - lock = ldlm_handle2lock(&lockh); - LASSERT(lock); - if (ldlm_has_layout(lock)) { - struct cl_object_conf conf; - - memset(&conf, 0, sizeof(conf)); - conf.coc_opc = OBJECT_CONF_SET; - conf.coc_inode = *inode; - conf.coc_lock = lock; - conf.u.coc_layout = md.layout; - (void)ll_layout_conf(*inode, &conf); - } - LDLM_LOCK_PUT(lock); - } - -out: - md_free_lustre_md(sbi->ll_md_exp, &md); -cleanup: - if (rc != 0 && it && it->it_op & IT_OPEN) - ll_open_cleanup(sb ? sb : (*inode)->i_sb, req); - - return rc; -} - -int ll_obd_statfs(struct inode *inode, void __user *arg) -{ - struct ll_sb_info *sbi = NULL; - struct obd_export *exp; - char *buf = NULL; - struct obd_ioctl_data *data = NULL; - __u32 type; - int len = 0, rc; - - if (!inode) { - rc = -EINVAL; - goto out_statfs; - } - - sbi = ll_i2sbi(inode); - if (!sbi) { - rc = -EINVAL; - goto out_statfs; - } - - rc = obd_ioctl_getdata(&buf, &len, arg); - if (rc) - goto out_statfs; - - data = (void *)buf; - if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 || - !data->ioc_pbuf1 || !data->ioc_pbuf2) { - rc = -EINVAL; - goto out_statfs; - } - - if (data->ioc_inllen1 != sizeof(__u32) || - data->ioc_inllen2 != sizeof(__u32) || - data->ioc_plen1 != sizeof(struct obd_statfs) || - data->ioc_plen2 != sizeof(struct obd_uuid)) { - rc = -EINVAL; - goto out_statfs; - } - - memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); - if (type & LL_STATFS_LMV) { - exp = sbi->ll_md_exp; - } else if (type & LL_STATFS_LOV) { - exp = sbi->ll_dt_exp; - } else { - rc = -ENODEV; - goto out_statfs; - } - - rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL); - if (rc) - goto out_statfs; -out_statfs: - kvfree(buf); - return rc; -} - -int ll_process_config(struct lustre_cfg *lcfg) -{ - char *ptr; - void *sb; - struct lprocfs_static_vars lvars; - unsigned long x; - int rc = 0; - - lprocfs_llite_init_vars(&lvars); - - /* The instance name contains the sb: lustre-client-aacfe000 */ - ptr = strrchr(lustre_cfg_string(lcfg, 0), '-'); - if (!ptr || !*(++ptr)) - return -EINVAL; - rc = kstrtoul(ptr, 16, &x); - if (rc != 0) - return -EINVAL; - sb = (void *)x; - /* This better be a real Lustre superblock! */ - LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == - LMD_MAGIC); - - /* Note we have not called client_common_fill_super yet, so - * proc fns must be able to handle that! - */ - rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars, - lcfg, sb); - if (rc > 0) - rc = 0; - return rc; -} - -/* this function prepares md_op_data hint for passing ot down to MD stack. */ -struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, - struct inode *i1, struct inode *i2, - const char *name, size_t namelen, - u32 mode, __u32 opc, void *data) -{ - if (!name) { - /* Do not reuse namelen for something else. */ - if (namelen) - return ERR_PTR(-EINVAL); - } else { - if (namelen > ll_i2sbi(i1)->ll_namelen) - return ERR_PTR(-ENAMETOOLONG); - - if (!lu_name_is_valid_2(name, namelen)) - return ERR_PTR(-EINVAL); - } - - if (!op_data) - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - - if (!op_data) - return ERR_PTR(-ENOMEM); - - ll_i2gids(op_data->op_suppgids, i1, i2); - op_data->op_fid1 = *ll_inode2fid(i1); - op_data->op_default_stripe_offset = -1; - if (S_ISDIR(i1->i_mode)) { - op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md; - if (opc == LUSTRE_OPC_MKDIR) - op_data->op_default_stripe_offset = - ll_i2info(i1)->lli_def_stripe_offset; - } - - if (i2) { - op_data->op_fid2 = *ll_inode2fid(i2); - if (S_ISDIR(i2->i_mode)) - op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md; - } else { - fid_zero(&op_data->op_fid2); - } - - if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH) - op_data->op_cli_flags |= CLI_HASH64; - - if (ll_need_32bit_api(ll_i2sbi(i1))) - op_data->op_cli_flags |= CLI_API32; - - op_data->op_name = name; - op_data->op_namelen = namelen; - op_data->op_mode = mode; - op_data->op_mod_time = ktime_get_real_seconds(); - op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); - op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); - op_data->op_cap = current_cap(); - if ((opc == LUSTRE_OPC_CREATE) && name && - filename_is_volatile(name, namelen, &op_data->op_mds)) - op_data->op_bias |= MDS_CREATE_VOLATILE; - else - op_data->op_mds = 0; - op_data->op_data = data; - - return op_data; -} - -void ll_finish_md_op_data(struct md_op_data *op_data) -{ - kfree(op_data); -} - -int ll_show_options(struct seq_file *seq, struct dentry *dentry) -{ - struct ll_sb_info *sbi; - - LASSERT(seq && dentry); - sbi = ll_s2sbi(dentry->d_sb); - - if (sbi->ll_flags & LL_SBI_NOLCK) - seq_puts(seq, ",nolock"); - - if (sbi->ll_flags & LL_SBI_FLOCK) - seq_puts(seq, ",flock"); - - if (sbi->ll_flags & LL_SBI_LOCALFLOCK) - seq_puts(seq, ",localflock"); - - if (sbi->ll_flags & LL_SBI_USER_XATTR) - seq_puts(seq, ",user_xattr"); - - if (sbi->ll_flags & LL_SBI_LAZYSTATFS) - seq_puts(seq, ",lazystatfs"); - - if (sbi->ll_flags & LL_SBI_USER_FID2PATH) - seq_puts(seq, ",user_fid2path"); - - if (sbi->ll_flags & LL_SBI_ALWAYS_PING) - seq_puts(seq, ",always_ping"); - - return 0; -} - -/** - * Get obd name by cmd, and copy out to user space - */ -int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_device *obd; - - if (cmd == OBD_IOC_GETDTNAME) - obd = class_exp2obd(sbi->ll_dt_exp); - else if (cmd == OBD_IOC_GETMDNAME) - obd = class_exp2obd(sbi->ll_md_exp); - else - return -EINVAL; - - if (!obd) - return -ENOENT; - - if (copy_to_user((void __user *)arg, obd->obd_name, - strlen(obd->obd_name) + 1)) - return -EFAULT; - - return 0; -} - -/** - * Get lustre file system name by \a sbi. If \a buf is provided(non-NULL), the - * fsname will be returned in this buffer; otherwise, a static buffer will be - * used to store the fsname and returned to caller. - */ -char *ll_get_fsname(struct super_block *sb, char *buf, int buflen) -{ - static char fsname_static[MTI_NAME_MAXLEN]; - struct lustre_sb_info *lsi = s2lsi(sb); - char *ptr; - int len; - - if (!buf) { - /* this means the caller wants to use static buffer - * and it doesn't care about race. Usually this is - * in error reporting path - */ - buf = fsname_static; - buflen = sizeof(fsname_static); - } - - len = strlen(lsi->lsi_lmd->lmd_profile); - ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-'); - if (ptr && (strcmp(ptr, "-client") == 0)) - len -= 7; - - if (unlikely(len >= buflen)) - len = buflen - 1; - strncpy(buf, lsi->lsi_lmd->lmd_profile, len); - buf[len] = '\0'; - - return buf; -} - -void ll_dirty_page_discard_warn(struct page *page, int ioret) -{ - char *buf, *path = NULL; - struct dentry *dentry = NULL; - struct vvp_object *obj = cl_inode2vvp(page->mapping->host); - - /* this can be called inside spin lock so use GFP_ATOMIC. */ - buf = (char *)__get_free_page(GFP_ATOMIC); - if (buf) { - dentry = d_find_alias(page->mapping->host); - if (dentry) - path = dentry_path_raw(dentry, buf, PAGE_SIZE); - } - - CDEBUG(D_WARNING, - "%s: dirty page discard: %s/fid: " DFID "/%s may get corrupted (rc %d)\n", - ll_get_fsname(page->mapping->host->i_sb, NULL, 0), - s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, - PFID(&obj->vob_header.coh_lu.loh_fid), - (path && !IS_ERR(path)) ? path : "", ioret); - - if (dentry) - dput(dentry); - - if (buf) - free_page((unsigned long)buf); -} - -ssize_t ll_copy_user_md(const struct lov_user_md __user *md, - struct lov_user_md **kbuf) -{ - struct lov_user_md lum; - ssize_t lum_size; - - if (copy_from_user(&lum, md, sizeof(lum))) { - lum_size = -EFAULT; - goto no_kbuf; - } - - lum_size = ll_lov_user_md_size(&lum); - if (lum_size < 0) - goto no_kbuf; - - *kbuf = kzalloc(lum_size, GFP_NOFS); - if (!*kbuf) { - lum_size = -ENOMEM; - goto no_kbuf; - } - - if (copy_from_user(*kbuf, md, lum_size) != 0) { - kfree(*kbuf); - *kbuf = NULL; - lum_size = -EFAULT; - } -no_kbuf: - return lum_size; -} - -/* - * Compute llite root squash state after a change of root squash - * configuration setting or add/remove of a lnet nid - */ -void ll_compute_rootsquash_state(struct ll_sb_info *sbi) -{ - struct root_squash_info *squash = &sbi->ll_squash; - struct lnet_process_id id; - bool matched; - int i; - - /* Update norootsquash flag */ - down_write(&squash->rsi_sem); - if (list_empty(&squash->rsi_nosquash_nids)) { - sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; - } else { - /* - * Do not apply root squash as soon as one of our NIDs is - * in the nosquash_nids list - */ - matched = false; - i = 0; - - while (LNetGetId(i++, &id) != -ENOENT) { - if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) - continue; - if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) { - matched = true; - break; - } - } - if (matched) - sbi->ll_flags |= LL_SBI_NOROOTSQUASH; - else - sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; - } - up_write(&squash->rsi_sem); -} - -/** - * Parse linkea content to extract information about a given hardlink - * - * \param[in] ldata - Initialized linkea data - * \param[in] linkno - Link identifier - * \param[out] parent_fid - The entry's parent FID - * \param[in] size - Entry name destination buffer - * - * \retval 0 on success - * \retval Appropriate negative error code on failure - */ -static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno, - struct lu_fid *parent_fid, struct lu_name *ln) -{ - unsigned int idx; - int rc; - - rc = linkea_init_with_rec(ldata); - if (rc < 0) - return rc; - - if (linkno >= ldata->ld_leh->leh_reccount) - /* beyond last link */ - return -ENODATA; - - linkea_first_entry(ldata); - for (idx = 0; ldata->ld_lee; idx++) { - linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln, - parent_fid); - if (idx == linkno) - break; - - linkea_next_entry(ldata); - } - - if (idx < linkno) - return -ENODATA; - - return 0; -} - -/** - * Get parent FID and name of an identified link. Operation is performed for - * a given link number, letting the caller iterate over linkno to list one or - * all links of an entry. - * - * \param[in] file - File descriptor against which to perform the operation - * \param[in,out] arg - User-filled structure containing the linkno to operate - * on and the available size. It is eventually filled - * with the requested information or left untouched on - * error - * - * \retval - 0 on success - * \retval - Appropriate negative error code on failure - */ -int ll_getparent(struct file *file, struct getparent __user *arg) -{ - struct inode *inode = file_inode(file); - struct linkea_data *ldata; - struct lu_fid parent_fid; - struct lu_buf buf = { - .lb_buf = NULL, - .lb_len = 0 - }; - struct lu_name ln; - u32 name_size; - u32 linkno; - int rc; - - if (!capable(CAP_DAC_READ_SEARCH) && - !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) - return -EPERM; - - if (get_user(name_size, &arg->gp_name_size)) - return -EFAULT; - - if (get_user(linkno, &arg->gp_linkno)) - return -EFAULT; - - if (name_size > PATH_MAX) - return -EINVAL; - - ldata = kzalloc(sizeof(*ldata), GFP_NOFS); - if (!ldata) - return -ENOMEM; - - rc = linkea_data_new(ldata, &buf); - if (rc < 0) - goto ldata_free; - - rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf, - buf.lb_len, OBD_MD_FLXATTR); - if (rc < 0) - goto lb_free; - - rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln); - if (rc < 0) - goto lb_free; - - if (ln.ln_namelen >= name_size) { - rc = -EOVERFLOW; - goto lb_free; - } - - if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) { - rc = -EFAULT; - goto lb_free; - } - - if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) { - rc = -EFAULT; - goto lb_free; - } - - if (put_user('\0', arg->gp_name + ln.ln_namelen)) { - rc = -EFAULT; - goto lb_free; - } - -lb_free: - kvfree(buf.lb_buf); -ldata_free: - kfree(ldata); - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c deleted file mode 100644 index d7fb5533f707..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_mmap.c +++ /dev/null @@ -1,480 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "llite_internal.h" - -static const struct vm_operations_struct ll_file_vm_ops; - -void policy_from_vma(union ldlm_policy_data *policy, - struct vm_area_struct *vma, unsigned long addr, - size_t count) -{ - policy->l_extent.start = ((addr - vma->vm_start) & PAGE_MASK) + - (vma->vm_pgoff << PAGE_SHIFT); - policy->l_extent.end = (policy->l_extent.start + count - 1) | - ~PAGE_MASK; -} - -struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, - size_t count) -{ - struct vm_area_struct *vma, *ret = NULL; - - /* mmap_sem must have been held by caller. */ - LASSERT(!down_write_trylock(&mm->mmap_sem)); - - for (vma = find_vma(mm, addr); - vma && vma->vm_start < (addr + count); vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops && - vma->vm_flags & VM_SHARED) { - ret = vma; - break; - } - } - return ret; -} - -/** - * API independent part for page fault initialization. - * \param vma - virtual memory area addressed to page fault - * \param env - corespondent lu_env to processing - * \param index - page index corespondent to fault. - * \parm ra_flags - vma readahead flags. - * - * \return error codes from cl_io_init. - */ -static struct cl_io * -ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma, - pgoff_t index, unsigned long *ra_flags) -{ - struct file *file = vma->vm_file; - struct inode *inode = file_inode(file); - struct cl_io *io; - struct cl_fault_io *fio; - int rc; - - if (ll_file_nolock(file)) - return ERR_PTR(-EOPNOTSUPP); - -restart: - io = vvp_env_thread_io(env); - io->ci_obj = ll_i2info(inode)->lli_clob; - LASSERT(io->ci_obj); - - fio = &io->u.ci_fault; - fio->ft_index = index; - fio->ft_executable = vma->vm_flags & VM_EXEC; - - /* - * disable VM_SEQ_READ and use VM_RAND_READ to make sure that - * the kernel will not read other pages not covered by ldlm in - * filemap_nopage. we do our readahead in ll_readpage. - */ - if (ra_flags) - *ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ); - vma->vm_flags &= ~VM_SEQ_READ; - vma->vm_flags |= VM_RAND_READ; - - CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, - fio->ft_index, fio->ft_executable); - - rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); - if (rc == 0) { - struct vvp_io *vio = vvp_env_io(env); - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - - LASSERT(vio->vui_cl.cis_io == io); - - /* mmap lock must be MANDATORY it has to cache pages. */ - io->ci_lockreq = CILR_MANDATORY; - vio->vui_fd = fd; - } else { - LASSERT(rc < 0); - cl_io_fini(env, io); - if (io->ci_need_restart) - goto restart; - - io = ERR_PTR(rc); - } - - return io; -} - -/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ -static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, - bool *retry) -{ - struct lu_env *env; - struct cl_io *io; - struct vvp_io *vio; - int result; - u16 refcheck; - sigset_t old, new; - struct inode *inode; - struct ll_inode_info *lli; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = ll_fault_io_init(env, vma, vmpage->index, NULL); - if (IS_ERR(io)) { - result = PTR_ERR(io); - goto out; - } - - result = io->ci_result; - if (result < 0) - goto out_io; - - io->u.ci_fault.ft_mkwrite = 1; - io->u.ci_fault.ft_writable = 1; - - vio = vvp_env_io(env); - vio->u.fault.ft_vma = vma; - vio->u.fault.ft_vmpage = vmpage; - - siginitsetinv(&new, sigmask(SIGKILL) | sigmask(SIGTERM)); - sigprocmask(SIG_BLOCK, &new, &old); - - inode = vvp_object_inode(io->ci_obj); - lli = ll_i2info(inode); - - result = cl_io_loop(env, io); - - sigprocmask(SIG_SETMASK, &old, NULL); - - if (result == 0) { - struct inode *inode = file_inode(vma->vm_file); - struct ll_inode_info *lli = ll_i2info(inode); - - lock_page(vmpage); - if (!vmpage->mapping) { - unlock_page(vmpage); - - /* page was truncated and lock was cancelled, return - * ENODATA so that VM_FAULT_NOPAGE will be returned - * to handle_mm_fault(). - */ - if (result == 0) - result = -ENODATA; - } else if (!PageDirty(vmpage)) { - /* race, the page has been cleaned by ptlrpcd after - * it was unlocked, it has to be added into dirty - * cache again otherwise this soon-to-dirty page won't - * consume any grants, even worse if this page is being - * transferred because it will break RPC checksum. - */ - unlock_page(vmpage); - - CDEBUG(D_MMAP, - "Race on page_mkwrite %p/%lu, page has been written out, retry.\n", - vmpage, vmpage->index); - - *retry = true; - result = -EAGAIN; - } - - if (!result) - set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); - } - -out_io: - cl_io_fini(env, io); -out: - cl_env_put(env, &refcheck); - CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); - LASSERT(ergo(result == 0, PageLocked(vmpage))); - - return result; -} - -static inline int to_fault_error(int result) -{ - switch (result) { - case 0: - result = VM_FAULT_LOCKED; - break; - case -EFAULT: - result = VM_FAULT_NOPAGE; - break; - case -ENOMEM: - result = VM_FAULT_OOM; - break; - default: - result = VM_FAULT_SIGBUS; - break; - } - return result; -} - -/** - * Lustre implementation of a vm_operations_struct::fault() method, called by - * VM to server page fault (both in kernel and user space). - * - * \param vma - is virtual area struct related to page fault - * \param vmf - structure which describe type and address where hit fault - * - * \return allocated and filled _locked_ page for address - * \retval VM_FAULT_ERROR on general error - * \retval NOPAGE_OOM not have memory for allocate new page - */ -static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct lu_env *env; - struct cl_io *io; - struct vvp_io *vio = NULL; - struct page *vmpage; - unsigned long ra_flags; - int result = 0; - int fault_ret = 0; - u16 refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags); - if (IS_ERR(io)) { - result = to_fault_error(PTR_ERR(io)); - goto out; - } - - result = io->ci_result; - if (result == 0) { - vio = vvp_env_io(env); - vio->u.fault.ft_vma = vma; - vio->u.fault.ft_vmpage = NULL; - vio->u.fault.ft_vmf = vmf; - vio->u.fault.ft_flags = 0; - vio->u.fault.ft_flags_valid = false; - - /* May call ll_readpage() */ - ll_cl_add(vma->vm_file, env, io); - - result = cl_io_loop(env, io); - - ll_cl_remove(vma->vm_file, env); - - /* ft_flags are only valid if we reached - * the call to filemap_fault - */ - if (vio->u.fault.ft_flags_valid) - fault_ret = vio->u.fault.ft_flags; - - vmpage = vio->u.fault.ft_vmpage; - if (result != 0 && vmpage) { - put_page(vmpage); - vmf->page = NULL; - } - } - cl_io_fini(env, io); - - vma->vm_flags |= ra_flags; - -out: - cl_env_put(env, &refcheck); - if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) - fault_ret |= to_fault_error(result); - - CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); - return fault_ret; -} - -static int ll_fault(struct vm_fault *vmf) -{ - int count = 0; - bool printed = false; - int result; - sigset_t old, new; - - /* Only SIGKILL and SIGTERM are allowed for fault/nopage/mkwrite - * so that it can be killed by admin but not cause segfault by - * other signals. - */ - siginitsetinv(&new, sigmask(SIGKILL) | sigmask(SIGTERM)); - sigprocmask(SIG_BLOCK, &new, &old); - -restart: - result = ll_fault0(vmf->vma, vmf); - LASSERT(!(result & VM_FAULT_LOCKED)); - if (result == 0) { - struct page *vmpage = vmf->page; - - /* check if this page has been truncated */ - lock_page(vmpage); - if (unlikely(!vmpage->mapping)) { /* unlucky */ - unlock_page(vmpage); - put_page(vmpage); - vmf->page = NULL; - - if (!printed && ++count > 16) { - CWARN("the page is under heavy contention, maybe your app(%s) needs revising :-)\n", - current->comm); - printed = true; - } - - goto restart; - } - - result = VM_FAULT_LOCKED; - } - sigprocmask(SIG_SETMASK, &old, NULL); - return result; -} - -static int ll_page_mkwrite(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - int count = 0; - bool printed = false; - bool retry; - int result; - - file_update_time(vma->vm_file); - do { - retry = false; - result = ll_page_mkwrite0(vma, vmf->page, &retry); - - if (!printed && ++count > 16) { - const struct dentry *de = vma->vm_file->f_path.dentry; - - CWARN("app(%s): the page %lu of file " DFID " is under heavy contention\n", - current->comm, vmf->pgoff, - PFID(ll_inode2fid(de->d_inode))); - printed = true; - } - } while (retry); - - switch (result) { - case 0: - LASSERT(PageLocked(vmf->page)); - result = VM_FAULT_LOCKED; - break; - case -ENODATA: - case -EAGAIN: - case -EFAULT: - result = VM_FAULT_NOPAGE; - break; - case -ENOMEM: - result = VM_FAULT_OOM; - break; - default: - result = VM_FAULT_SIGBUS; - break; - } - - return result; -} - -/** - * To avoid cancel the locks covering mmapped region for lock cache pressure, - * we track the mapped vma count in vvp_object::vob_mmap_cnt. - */ -static void ll_vm_open(struct vm_area_struct *vma) -{ - struct inode *inode = file_inode(vma->vm_file); - struct vvp_object *vob = cl_inode2vvp(inode); - - LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); - atomic_inc(&vob->vob_mmap_cnt); -} - -/** - * Dual to ll_vm_open(). - */ -static void ll_vm_close(struct vm_area_struct *vma) -{ - struct inode *inode = file_inode(vma->vm_file); - struct vvp_object *vob = cl_inode2vvp(inode); - - atomic_dec(&vob->vob_mmap_cnt); - LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0); -} - -/* XXX put nice comment here. talk about __free_pte -> dirty pages and - * nopage's reference passing to the pte - */ -int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last) -{ - int rc = -ENOENT; - - LASSERTF(last > first, "last %llu first %llu\n", last, first); - if (mapping_mapped(mapping)) { - rc = 0; - unmap_mapping_range(mapping, first + PAGE_SIZE - 1, - last - first + 1, 0); - } - - return rc; -} - -static const struct vm_operations_struct ll_file_vm_ops = { - .fault = ll_fault, - .page_mkwrite = ll_page_mkwrite, - .open = ll_vm_open, - .close = ll_vm_close, -}; - -int ll_file_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct inode *inode = file_inode(file); - int rc; - - if (ll_file_nolock(file)) - return -EOPNOTSUPP; - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_MAP, 1); - rc = generic_file_mmap(file, vma); - if (rc == 0) { - vma->vm_ops = &ll_file_vm_ops; - vma->vm_ops->open(vma); - /* update the inode's size and mtime */ - rc = ll_glimpse_size(inode); - } - - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c deleted file mode 100644 index 14172688d55f..000000000000 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ /dev/null @@ -1,375 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lustre/llite/llite_nfs.c - * - * NFS export of Lustre Light File System - * - * Author: Yury Umanets - * Author: Huang Hua - */ - -#define DEBUG_SUBSYSTEM S_LLITE -#include "llite_internal.h" -#include - -__u32 get_uuid2int(const char *name, int len) -{ - __u32 key0 = 0x12a3fe2d, key1 = 0x37abe8f9; - - while (len--) { - __u32 key = key1 + (key0 ^ (*name++ * 7152373)); - - if (key & 0x80000000) - key -= 0x7fffffff; - key1 = key0; - key0 = key; - } - return (key0 << 1); -} - -void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid) -{ - __u64 key = 0, key0 = 0x12a3fe2d, key1 = 0x37abe8f9; - - while (len--) { - key = key1 + (key0 ^ (*name++ * 7152373)); - if (key & 0x8000000000000000ULL) - key -= 0x7fffffffffffffffULL; - key1 = key0; - key0 = key; - } - - fsid->val[0] = key; - fsid->val[1] = key >> 32; -} - -struct inode *search_inode_for_lustre(struct super_block *sb, - const struct lu_fid *fid) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct ptlrpc_request *req = NULL; - struct inode *inode = NULL; - int eadatalen = 0; - unsigned long hash = cl_fid_build_ino(fid, - ll_need_32bit_api(sbi)); - struct md_op_data *op_data; - int rc; - - CDEBUG(D_INFO, "searching inode for:(%lu," DFID ")\n", hash, PFID(fid)); - - inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid); - if (inode) - return inode; - - rc = ll_get_default_mdsize(sbi, &eadatalen); - if (rc) - return ERR_PTR(rc); - - /* Because inode is NULL, ll_prep_md_op_data can not - * be used here. So we allocate op_data ourselves - */ - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return ERR_PTR(-ENOMEM); - - op_data->op_fid1 = *fid; - op_data->op_mode = eadatalen; - op_data->op_valid = OBD_MD_FLEASIZE; - - /* mds_fid2dentry ignores f_type */ - rc = md_getattr(sbi->ll_md_exp, op_data, &req); - kfree(op_data); - if (rc) { - CDEBUG(D_INFO, "can't get object attrs, fid " DFID ", rc %d\n", - PFID(fid), rc); - return ERR_PTR(rc); - } - rc = ll_prep_inode(&inode, req, sb, NULL); - ptlrpc_req_finished(req); - if (rc) - return ERR_PTR(rc); - - return inode; -} - -struct lustre_nfs_fid { - struct lu_fid lnf_child; - struct lu_fid lnf_parent; -}; - -static struct dentry * -ll_iget_for_nfs(struct super_block *sb, - struct lu_fid *fid, struct lu_fid *parent) -{ - struct inode *inode; - struct dentry *result; - - if (!fid_is_sane(fid)) - return ERR_PTR(-ESTALE); - - CDEBUG(D_INFO, "Get dentry for fid: " DFID "\n", PFID(fid)); - - inode = search_inode_for_lustre(sb, fid); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - if (is_bad_inode(inode)) { - /* we didn't find the right inode.. */ - iput(inode); - return ERR_PTR(-ESTALE); - } - - result = d_obtain_alias(inode); - if (IS_ERR(result)) { - iput(inode); - return result; - } - - /** - * In case d_obtain_alias() found a disconnected dentry, always update - * lli_pfid to allow later operation (normally open) have parent fid, - * which may be used by MDS to create data. - */ - if (parent) { - struct ll_inode_info *lli = ll_i2info(inode); - - spin_lock(&lli->lli_lock); - lli->lli_pfid = *parent; - spin_unlock(&lli->lli_lock); - } - - /* N.B. d_obtain_alias() drops inode ref on error */ - result = d_obtain_alias(inode); - if (!IS_ERR(result)) { - /* - * Need to signal to the ll_intent_file_open that - * we came from NFS and so opencache needs to be - * enabled for this one - */ - ll_d2d(result)->lld_nfs_dentry = 1; - } - - return result; -} - -/** - * \a connectable - is nfsd will connect himself or this should be done - * at lustre - * - * The return value is file handle type: - * 1 -- contains child file handle; - * 2 -- contains child file handle and parent file handle; - * 255 -- error. - */ -static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, - struct inode *parent) -{ - int fileid_len = sizeof(struct lustre_nfs_fid) / 4; - struct lustre_nfs_fid *nfs_fid = (void *)fh; - - CDEBUG(D_INFO, "%s: encoding for (" DFID ") maxlen=%d minlen=%d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), *plen, fileid_len); - - if (*plen < fileid_len) { - *plen = fileid_len; - return FILEID_INVALID; - } - - nfs_fid->lnf_child = *ll_inode2fid(inode); - if (parent) - nfs_fid->lnf_parent = *ll_inode2fid(parent); - else - fid_zero(&nfs_fid->lnf_parent); - *plen = fileid_len; - - return FILEID_LUSTRE; -} - -static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, - int namelen, loff_t hash, u64 ino, - unsigned int type) -{ - /* It is hack to access lde_fid for comparison with lgd_fid. - * So the input 'name' must be part of the 'lu_dirent'. - */ - struct lu_dirent *lde = container_of((void*)name, struct lu_dirent, lde_name); - struct ll_getname_data *lgd = - container_of(ctx, struct ll_getname_data, ctx); - struct lu_fid fid; - - fid_le_to_cpu(&fid, &lde->lde_fid); - if (lu_fid_eq(&fid, &lgd->lgd_fid)) { - memcpy(lgd->lgd_name, name, namelen); - lgd->lgd_name[namelen] = 0; - lgd->lgd_found = 1; - } - return lgd->lgd_found; -} - -static int ll_get_name(struct dentry *dentry, char *name, - struct dentry *child) -{ - struct inode *dir = d_inode(dentry); - int rc; - struct ll_getname_data lgd = { - .lgd_name = name, - .lgd_fid = ll_i2info(d_inode(child))->lli_fid, - .ctx.actor = ll_nfs_get_name_filldir, - }; - struct md_op_data *op_data; - __u64 pos = 0; - - if (!dir || !S_ISDIR(dir->i_mode)) { - rc = -ENOTDIR; - goto out; - } - - if (!dir->i_fop) { - rc = -EINVAL; - goto out; - } - - op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0, - LUSTRE_OPC_ANY, dir); - if (IS_ERR(op_data)) { - rc = PTR_ERR(op_data); - goto out; - } - - op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages; - inode_lock(dir); - rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx); - inode_unlock(dir); - ll_finish_md_op_data(op_data); - if (!rc && !lgd.lgd_found) - rc = -ENOENT; -out: - return rc; -} - -static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; - - if (fh_type != FILEID_LUSTRE) - return ERR_PTR(-EPROTO); - - return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent); -} - -static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid; - - if (fh_type != FILEID_LUSTRE) - return ERR_PTR(-EPROTO); - - return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL); -} - -int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid) -{ - struct ptlrpc_request *req = NULL; - struct ll_sb_info *sbi; - struct mdt_body *body; - static const char dotdot[] = ".."; - struct md_op_data *op_data; - int rc; - int lmmsize; - - LASSERT(dir && S_ISDIR(dir->i_mode)); - - sbi = ll_s2sbi(dir->i_sb); - - CDEBUG(D_INFO, "%s: getting parent for (" DFID ")\n", - ll_get_fsname(dir->i_sb, NULL, 0), - PFID(ll_inode2fid(dir))); - - rc = ll_get_default_mdsize(sbi, &lmmsize); - if (rc != 0) - return rc; - - op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot, - strlen(dotdot), lmmsize, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - rc = md_getattr_name(sbi->ll_md_exp, op_data, &req); - ll_finish_md_op_data(op_data); - if (rc) { - CERROR("%s: failure inode " DFID " get parent: rc = %d\n", - ll_get_fsname(dir->i_sb, NULL, 0), - PFID(ll_inode2fid(dir)), rc); - return rc; - } - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - /* - * LU-3952: MDT may lost the FID of its parent, we should not crash - * the NFS server, ll_iget_for_nfs() will handle the error. - */ - if (body->mbo_valid & OBD_MD_FLID) { - CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n", - PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1)); - *parent_fid = body->mbo_fid1; - } - - ptlrpc_req_finished(req); - return 0; -} - -static struct dentry *ll_get_parent(struct dentry *dchild) -{ - struct lu_fid parent_fid = { 0 }; - struct dentry *dentry; - int rc; - - rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid); - if (rc) - return ERR_PTR(rc); - - dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL); - - return dentry; -} - -const struct export_operations lustre_export_operations = { - .get_parent = ll_get_parent, - .encode_fh = ll_encode_fh, - .get_name = ll_get_name, - .fh_to_dentry = ll_fh_to_dentry, - .fh_to_parent = ll_fh_to_parent, -}; diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c deleted file mode 100644 index 49bf1b7ee311..000000000000 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ /dev/null @@ -1,1659 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include - -#include "llite_internal.h" -#include "vvp_internal.h" - -/* debugfs llite mount point registration */ -static const struct file_operations ll_rw_extents_stats_fops; -static const struct file_operations ll_rw_extents_stats_pp_fops; -static const struct file_operations ll_rw_offset_stats_fops; - -static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%u\n", osfs.os_bsize); - - return rc; -} -LUSTRE_RO_ATTR(blocksize); - -static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_blocks; - - while (blk_size >>= 1) - result <<= 1; - - rc = sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytestotal); - -static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_bfree; - - while (blk_size >>= 1) - result <<= 1; - - rc = sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytesfree); - -static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_bavail; - - while (blk_size >>= 1) - result <<= 1; - - rc = sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytesavail); - -static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%llu\n", osfs.os_files); - - return rc; -} -LUSTRE_RO_ATTR(filestotal); - -static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - struct obd_statfs osfs; - int rc; - - rc = ll_statfs_internal(sbi->ll_sb, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%llu\n", osfs.os_ffree); - - return rc; -} -LUSTRE_RO_ATTR(filesfree); - -static ssize_t client_type_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "local client\n"); -} -LUSTRE_RO_ATTR(client_type); - -static ssize_t fstype_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%s\n", sbi->ll_sb->s_type->name); -} -LUSTRE_RO_ATTR(fstype); - -static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%s\n", sbi->ll_sb_uuid.uuid); -} -LUSTRE_RO_ATTR(uuid); - -static int ll_site_stats_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - - /* - * See description of statistical counters in struct cl_site, and - * struct lu_site. - */ - return cl_site_stats_print(lu2cl_site(ll_s2sbi(sb)->ll_site), m); -} - -LPROC_SEQ_FOPS_RO(ll_site_stats); - -static ssize_t max_read_ahead_mb_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - long pages_number; - int mult; - - spin_lock(&sbi->ll_lock); - pages_number = sbi->ll_ra_info.ra_max_pages; - spin_unlock(&sbi->ll_lock); - - mult = 1 << (20 - PAGE_SHIFT); - return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult); -} - -static ssize_t max_read_ahead_mb_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long pages_number; - - rc = kstrtoul(buffer, 10, &pages_number); - if (rc) - return rc; - - pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ - - if (pages_number > totalram_pages / 2) { - CERROR("can't set file readahead more than %lu MB\n", - totalram_pages >> (20 - PAGE_SHIFT + 1)); /*1/2 of RAM*/ - return -ERANGE; - } - - spin_lock(&sbi->ll_lock); - sbi->ll_ra_info.ra_max_pages = pages_number; - spin_unlock(&sbi->ll_lock); - - return count; -} -LUSTRE_RW_ATTR(max_read_ahead_mb); - -static ssize_t max_read_ahead_per_file_mb_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - long pages_number; - int mult; - - spin_lock(&sbi->ll_lock); - pages_number = sbi->ll_ra_info.ra_max_pages_per_file; - spin_unlock(&sbi->ll_lock); - - mult = 1 << (20 - PAGE_SHIFT); - return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult); -} - -static ssize_t max_read_ahead_per_file_mb_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long pages_number; - - rc = kstrtoul(buffer, 10, &pages_number); - if (rc) - return rc; - - if (pages_number > sbi->ll_ra_info.ra_max_pages) { - CERROR("can't set file readahead more than max_read_ahead_mb %lu MB\n", - sbi->ll_ra_info.ra_max_pages); - return -ERANGE; - } - - spin_lock(&sbi->ll_lock); - sbi->ll_ra_info.ra_max_pages_per_file = pages_number; - spin_unlock(&sbi->ll_lock); - - return count; -} -LUSTRE_RW_ATTR(max_read_ahead_per_file_mb); - -static ssize_t max_read_ahead_whole_mb_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - long pages_number; - int mult; - - spin_lock(&sbi->ll_lock); - pages_number = sbi->ll_ra_info.ra_max_read_ahead_whole_pages; - spin_unlock(&sbi->ll_lock); - - mult = 1 << (20 - PAGE_SHIFT); - return lprocfs_read_frac_helper(buf, PAGE_SIZE, pages_number, mult); -} - -static ssize_t max_read_ahead_whole_mb_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long pages_number; - - rc = kstrtoul(buffer, 10, &pages_number); - if (rc) - return rc; - - /* Cap this at the current max readahead window size, the readahead - * algorithm does this anyway so it's pointless to set it larger. - */ - if (pages_number > sbi->ll_ra_info.ra_max_pages_per_file) { - CERROR("can't set max_read_ahead_whole_mb more than max_read_ahead_per_file_mb: %lu\n", - sbi->ll_ra_info.ra_max_pages_per_file >> (20 - PAGE_SHIFT)); - return -ERANGE; - } - - spin_lock(&sbi->ll_lock); - sbi->ll_ra_info.ra_max_read_ahead_whole_pages = pages_number; - spin_unlock(&sbi->ll_lock); - - return count; -} -LUSTRE_RW_ATTR(max_read_ahead_whole_mb); - -static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct cl_client_cache *cache = sbi->ll_cache; - int shift = 20 - PAGE_SHIFT; - long max_cached_mb; - long unused_mb; - - max_cached_mb = cache->ccc_lru_max >> shift; - unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift; - seq_printf(m, - "users: %d\n" - "max_cached_mb: %ld\n" - "used_mb: %ld\n" - "unused_mb: %ld\n" - "reclaim_count: %u\n", - atomic_read(&cache->ccc_users), - max_cached_mb, - max_cached_mb - unused_mb, - unused_mb, - cache->ccc_lru_shrinkers); - return 0; -} - -static ssize_t ll_max_cached_mb_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct super_block *sb = ((struct seq_file *)file->private_data)->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct cl_client_cache *cache = sbi->ll_cache; - struct lu_env *env; - long diff = 0; - long nrpages = 0; - u16 refcheck; - long pages_number; - int mult; - long rc; - u64 val; - char kernbuf[128]; - - if (count >= sizeof(kernbuf)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - kernbuf[count] = 0; - - mult = 1 << (20 - PAGE_SHIFT); - buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) - - kernbuf; - rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult); - if (rc) - return rc; - - if (val > LONG_MAX) - return -ERANGE; - pages_number = (long)val; - - if (pages_number < 0 || pages_number > totalram_pages) { - CERROR("%s: can't set max cache more than %lu MB\n", - ll_get_fsname(sb, NULL, 0), - totalram_pages >> (20 - PAGE_SHIFT)); - return -ERANGE; - } - - spin_lock(&sbi->ll_lock); - diff = pages_number - cache->ccc_lru_max; - spin_unlock(&sbi->ll_lock); - - /* easy - add more LRU slots. */ - if (diff >= 0) { - atomic_long_add(diff, &cache->ccc_lru_left); - rc = 0; - goto out; - } - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return 0; - - diff = -diff; - while (diff > 0) { - long tmp; - - /* reduce LRU budget from free slots. */ - do { - long ov, nv; - - ov = atomic_long_read(&cache->ccc_lru_left); - if (ov == 0) - break; - - nv = ov > diff ? ov - diff : 0; - rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv); - if (likely(ov == rc)) { - diff -= ov - nv; - nrpages += ov - nv; - break; - } - } while (1); - - if (diff <= 0) - break; - - if (!sbi->ll_dt_exp) { /* being initialized */ - rc = 0; - goto out; - } - - /* difficult - have to ask OSCs to drop LRU slots. */ - tmp = diff << 1; - rc = obd_set_info_async(env, sbi->ll_dt_exp, - sizeof(KEY_CACHE_LRU_SHRINK), - KEY_CACHE_LRU_SHRINK, - sizeof(tmp), &tmp, NULL); - if (rc < 0) - break; - } - cl_env_put(env, &refcheck); - -out: - if (rc >= 0) { - spin_lock(&sbi->ll_lock); - cache->ccc_lru_max = pages_number; - spin_unlock(&sbi->ll_lock); - rc = count; - } else { - atomic_long_add(nrpages, &cache->ccc_lru_left); - } - return rc; -} - -LPROC_SEQ_FOPS(ll_max_cached_mb); - -static ssize_t checksum_pages_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%u\n", (sbi->ll_flags & LL_SBI_CHECKSUM) ? 1 : 0); -} - -static ssize_t checksum_pages_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long val; - - if (!sbi->ll_dt_exp) - /* Not set up yet */ - return -EAGAIN; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - if (val) - sbi->ll_flags |= LL_SBI_CHECKSUM; - else - sbi->ll_flags &= ~LL_SBI_CHECKSUM; - - rc = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), - KEY_CHECKSUM, sizeof(val), &val, NULL); - if (rc) - CWARN("Failed to set OSC checksum flags: %d\n", rc); - - return count; -} -LUSTRE_RW_ATTR(checksum_pages); - -static ssize_t ll_rd_track_id(struct kobject *kobj, char *buf, - enum stats_track_type type) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - if (sbi->ll_stats_track_type == type) - return sprintf(buf, "%d\n", sbi->ll_stats_track_id); - else if (sbi->ll_stats_track_type == STATS_TRACK_ALL) - return sprintf(buf, "0 (all)\n"); - else - return sprintf(buf, "untracked\n"); -} - -static ssize_t ll_wr_track_id(struct kobject *kobj, const char *buffer, - size_t count, - enum stats_track_type type) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long pid; - - rc = kstrtoul(buffer, 10, &pid); - if (rc) - return rc; - sbi->ll_stats_track_id = pid; - if (pid == 0) - sbi->ll_stats_track_type = STATS_TRACK_ALL; - else - sbi->ll_stats_track_type = type; - lprocfs_clear_stats(sbi->ll_stats); - return count; -} - -static ssize_t stats_track_pid_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - return ll_rd_track_id(kobj, buf, STATS_TRACK_PID); -} - -static ssize_t stats_track_pid_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PID); -} -LUSTRE_RW_ATTR(stats_track_pid); - -static ssize_t stats_track_ppid_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - return ll_rd_track_id(kobj, buf, STATS_TRACK_PPID); -} - -static ssize_t stats_track_ppid_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_PPID); -} -LUSTRE_RW_ATTR(stats_track_ppid); - -static ssize_t stats_track_gid_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - return ll_rd_track_id(kobj, buf, STATS_TRACK_GID); -} - -static ssize_t stats_track_gid_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - return ll_wr_track_id(kobj, buffer, count, STATS_TRACK_GID); -} -LUSTRE_RW_ATTR(stats_track_gid); - -static ssize_t statahead_max_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%u\n", sbi->ll_sa_max); -} - -static ssize_t statahead_max_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val <= LL_SA_RPC_MAX) - sbi->ll_sa_max = val; - else - CERROR("Bad statahead_max value %lu. Valid values are in the range [0, %d]\n", - val, LL_SA_RPC_MAX); - - return count; -} -LUSTRE_RW_ATTR(statahead_max); - -static ssize_t statahead_agl_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_AGL_ENABLED ? 1 : 0); -} - -static ssize_t statahead_agl_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val) - sbi->ll_flags |= LL_SBI_AGL_ENABLED; - else - sbi->ll_flags &= ~LL_SBI_AGL_ENABLED; - - return count; -} -LUSTRE_RW_ATTR(statahead_agl); - -static int ll_statahead_stats_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - - seq_printf(m, - "statahead total: %u\n" - "statahead wrong: %u\n" - "agl total: %u\n", - atomic_read(&sbi->ll_sa_total), - atomic_read(&sbi->ll_sa_wrong), - atomic_read(&sbi->ll_agl_total)); - return 0; -} - -LPROC_SEQ_FOPS_RO(ll_statahead_stats); - -static ssize_t lazystatfs_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%u\n", sbi->ll_flags & LL_SBI_LAZYSTATFS ? 1 : 0); -} - -static ssize_t lazystatfs_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val) - sbi->ll_flags |= LL_SBI_LAZYSTATFS; - else - sbi->ll_flags &= ~LL_SBI_LAZYSTATFS; - - return count; -} -LUSTRE_RW_ATTR(lazystatfs); - -static ssize_t max_easize_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - unsigned int ealen; - int rc; - - rc = ll_get_max_mdsize(sbi, &ealen); - if (rc) - return rc; - - return sprintf(buf, "%u\n", ealen); -} -LUSTRE_RO_ATTR(max_easize); - -/** - * Get default_easize. - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] kobj kernel object for sysfs tree - * \param[in] attr attribute of this kernel object - * \param[in] buf buffer to write data into - * - * \retval positive \a count on success - * \retval negative negated errno on failure - */ -static ssize_t default_easize_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - unsigned int ealen; - int rc; - - rc = ll_get_default_mdsize(sbi, &ealen); - if (rc) - return rc; - - return sprintf(buf, "%u\n", ealen); -} - -/** - * Set default_easize. - * - * Range checking on the passed value is handled by - * ll_set_default_mdsize(). - * - * \see client_obd::cl_default_mds_easize - * - * \param[in] kobj kernel object for sysfs tree - * \param[in] attr attribute of this kernel object - * \param[in] buffer string passed from user space - * \param[in] count \a buffer length - * - * \retval positive \a count on success - * \retval negative negated errno on failure - */ -static ssize_t default_easize_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - unsigned long val; - int rc; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - rc = ll_set_default_mdsize(sbi, val); - if (rc) - return rc; - - return count; -} -LUSTRE_RW_ATTR(default_easize); - -static int ll_sbi_flags_seq_show(struct seq_file *m, void *v) -{ - const char *str[] = LL_SBI_FLAGS; - struct super_block *sb = m->private; - int flags = ll_s2sbi(sb)->ll_flags; - int i = 0; - - while (flags != 0) { - if (ARRAY_SIZE(str) <= i) { - CERROR("%s: Revise array LL_SBI_FLAGS to match sbi flags please.\n", - ll_get_fsname(sb, NULL, 0)); - return -EINVAL; - } - - if (flags & 0x1) - seq_printf(m, "%s ", str[i]); - flags >>= 1; - ++i; - } - seq_puts(m, "\b\n"); - return 0; -} - -LPROC_SEQ_FOPS_RO(ll_sbi_flags); - -static ssize_t xattr_cache_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - - return sprintf(buf, "%u\n", sbi->ll_xattr_cache_enabled); -} - -static ssize_t xattr_cache_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val != 0 && val != 1) - return -ERANGE; - - if (val == 1 && !(sbi->ll_flags & LL_SBI_XATTR_CACHE)) - return -ENOTSUPP; - - sbi->ll_xattr_cache_enabled = val; - - return count; -} -LUSTRE_RW_ATTR(xattr_cache); - -static int ll_unstable_stats_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct cl_client_cache *cache = sbi->ll_cache; - long pages; - int mb; - - pages = atomic_long_read(&cache->ccc_unstable_nr); - mb = (pages * PAGE_SIZE) >> 20; - - seq_printf(m, - "unstable_check: %8d\n" - "unstable_pages: %12ld\n" - "unstable_mb: %8d\n", - cache->ccc_unstable_check, pages, mb); - - return 0; -} - -static ssize_t ll_unstable_stats_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct super_block *sb = ((struct seq_file *)file->private_data)->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - char kernbuf[128]; - int val, rc; - - if (!count) - return 0; - if (count >= sizeof(kernbuf)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - kernbuf[count] = 0; - - buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) - - kernbuf; - rc = lprocfs_write_helper(buffer, count, &val); - if (rc < 0) - return rc; - - /* borrow lru lock to set the value */ - spin_lock(&sbi->ll_cache->ccc_lru_lock); - sbi->ll_cache->ccc_unstable_check = !!val; - spin_unlock(&sbi->ll_cache->ccc_lru_lock); - - return count; -} -LPROC_SEQ_FOPS(ll_unstable_stats); - -static int ll_root_squash_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct root_squash_info *squash = &sbi->ll_squash; - - seq_printf(m, "%u:%u\n", squash->rsi_uid, squash->rsi_gid); - return 0; -} - -static ssize_t ll_root_squash_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct seq_file *m = file->private_data; - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct root_squash_info *squash = &sbi->ll_squash; - - return lprocfs_wr_root_squash(buffer, count, squash, - ll_get_fsname(sb, NULL, 0)); -} -LPROC_SEQ_FOPS(ll_root_squash); - -static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v) -{ - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct root_squash_info *squash = &sbi->ll_squash; - int len; - - down_read(&squash->rsi_sem); - if (!list_empty(&squash->rsi_nosquash_nids)) { - len = cfs_print_nidlist(m->buf + m->count, m->size - m->count, - &squash->rsi_nosquash_nids); - m->count += len; - seq_puts(m, "\n"); - } else { - seq_puts(m, "NONE\n"); - } - up_read(&squash->rsi_sem); - - return 0; -} - -static ssize_t ll_nosquash_nids_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct seq_file *m = file->private_data; - struct super_block *sb = m->private; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct root_squash_info *squash = &sbi->ll_squash; - int rc; - - rc = lprocfs_wr_nosquash_nids(buffer, count, squash, - ll_get_fsname(sb, NULL, 0)); - if (rc < 0) - return rc; - - ll_compute_rootsquash_state(sbi); - - return rc; -} - -LPROC_SEQ_FOPS(ll_nosquash_nids); - -static struct lprocfs_vars lprocfs_llite_obd_vars[] = { - /* { "mntpt_path", ll_rd_path, 0, 0 }, */ - { "site", &ll_site_stats_fops, NULL, 0 }, - /* { "filegroups", lprocfs_rd_filegroups, 0, 0 }, */ - { "max_cached_mb", &ll_max_cached_mb_fops, NULL }, - { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 }, - { "unstable_stats", &ll_unstable_stats_fops, NULL }, - { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 }, - { .name = "root_squash", - .fops = &ll_root_squash_fops }, - { .name = "nosquash_nids", - .fops = &ll_nosquash_nids_fops }, - { NULL } -}; - -#define MAX_STRING_SIZE 128 - -static struct attribute *llite_attrs[] = { - &lustre_attr_blocksize.attr, - &lustre_attr_kbytestotal.attr, - &lustre_attr_kbytesfree.attr, - &lustre_attr_kbytesavail.attr, - &lustre_attr_filestotal.attr, - &lustre_attr_filesfree.attr, - &lustre_attr_client_type.attr, - &lustre_attr_fstype.attr, - &lustre_attr_uuid.attr, - &lustre_attr_max_read_ahead_mb.attr, - &lustre_attr_max_read_ahead_per_file_mb.attr, - &lustre_attr_max_read_ahead_whole_mb.attr, - &lustre_attr_checksum_pages.attr, - &lustre_attr_stats_track_pid.attr, - &lustre_attr_stats_track_ppid.attr, - &lustre_attr_stats_track_gid.attr, - &lustre_attr_statahead_max.attr, - &lustre_attr_statahead_agl.attr, - &lustre_attr_lazystatfs.attr, - &lustre_attr_max_easize.attr, - &lustre_attr_default_easize.attr, - &lustre_attr_xattr_cache.attr, - NULL, -}; - -static void llite_sb_release(struct kobject *kobj) -{ - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); - complete(&sbi->ll_kobj_unregister); -} - -static struct kobj_type llite_ktype = { - .default_attrs = llite_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = llite_sb_release, -}; - -static const struct llite_file_opcode { - __u32 opcode; - __u32 type; - const char *opname; -} llite_opcode_table[LPROC_LL_FILE_OPCODES] = { - /* file operation */ - { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" }, - { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" }, - { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "read_bytes" }, - { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES, - "write_bytes" }, - { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES, - "brw_read" }, - { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES, - "brw_write" }, - { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" }, - { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" }, - { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" }, - { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" }, - { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" }, - { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" }, - { LPROC_LL_READDIR, LPROCFS_TYPE_REGS, "readdir" }, - /* inode operation */ - { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" }, - { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "truncate" }, - { LPROC_LL_FLOCK, LPROCFS_TYPE_REGS, "flock" }, - { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" }, - /* dir inode operation */ - { LPROC_LL_CREATE, LPROCFS_TYPE_REGS, "create" }, - { LPROC_LL_LINK, LPROCFS_TYPE_REGS, "link" }, - { LPROC_LL_UNLINK, LPROCFS_TYPE_REGS, "unlink" }, - { LPROC_LL_SYMLINK, LPROCFS_TYPE_REGS, "symlink" }, - { LPROC_LL_MKDIR, LPROCFS_TYPE_REGS, "mkdir" }, - { LPROC_LL_RMDIR, LPROCFS_TYPE_REGS, "rmdir" }, - { LPROC_LL_MKNOD, LPROCFS_TYPE_REGS, "mknod" }, - { LPROC_LL_RENAME, LPROCFS_TYPE_REGS, "rename" }, - /* special inode operation */ - { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" }, - { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" }, - { LPROC_LL_SETXATTR, LPROCFS_TYPE_REGS, "setxattr" }, - { LPROC_LL_GETXATTR, LPROCFS_TYPE_REGS, "getxattr" }, - { LPROC_LL_GETXATTR_HITS, LPROCFS_TYPE_REGS, "getxattr_hits" }, - { LPROC_LL_LISTXATTR, LPROCFS_TYPE_REGS, "listxattr" }, - { LPROC_LL_REMOVEXATTR, LPROCFS_TYPE_REGS, "removexattr" }, - { LPROC_LL_INODE_PERM, LPROCFS_TYPE_REGS, "inode_permission" }, -}; - -void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) -{ - if (!sbi->ll_stats) - return; - if (sbi->ll_stats_track_type == STATS_TRACK_ALL) - lprocfs_counter_add(sbi->ll_stats, op, count); - else if (sbi->ll_stats_track_type == STATS_TRACK_PID && - sbi->ll_stats_track_id == current->pid) - lprocfs_counter_add(sbi->ll_stats, op, count); - else if (sbi->ll_stats_track_type == STATS_TRACK_PPID && - sbi->ll_stats_track_id == current->real_parent->pid) - lprocfs_counter_add(sbi->ll_stats, op, count); - else if (sbi->ll_stats_track_type == STATS_TRACK_GID && - sbi->ll_stats_track_id == - from_kgid(&init_user_ns, current_gid())) - lprocfs_counter_add(sbi->ll_stats, op, count); -} -EXPORT_SYMBOL(ll_stats_ops_tally); - -static const char *ra_stat_string[] = { - [RA_STAT_HIT] = "hits", - [RA_STAT_MISS] = "misses", - [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive", - [RA_STAT_MISS_IN_WINDOW] = "miss inside window", - [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page", - [RA_STAT_FAILED_MATCH] = "failed lock match", - [RA_STAT_DISCARDED] = "read but discarded", - [RA_STAT_ZERO_LEN] = "zero length file", - [RA_STAT_ZERO_WINDOW] = "zero size window", - [RA_STAT_EOF] = "read-ahead to EOF", - [RA_STAT_MAX_IN_FLIGHT] = "hit max r-a issue", - [RA_STAT_WRONG_GRAB_PAGE] = "wrong page from grab_cache_page", - [RA_STAT_FAILED_REACH_END] = "failed to reach end" -}; - -int ldebugfs_register_mountpoint(struct dentry *parent, - struct super_block *sb, char *osc, char *mdc) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct dentry *dir; - char name[MAX_STRING_SIZE + 1], *ptr; - int err, id, len; - - name[MAX_STRING_SIZE] = '\0'; - - LASSERT(sbi); - LASSERT(mdc); - LASSERT(osc); - - /* Get fsname */ - len = strlen(lsi->lsi_lmd->lmd_profile); - ptr = strrchr(lsi->lsi_lmd->lmd_profile, '-'); - if (ptr && (strcmp(ptr, "-client") == 0)) - len -= 7; - - /* Mount info */ - snprintf(name, MAX_STRING_SIZE, "%.*s-%p", len, - lsi->lsi_lmd->lmd_profile, sb); - - dir = debugfs_create_dir(name, parent); - sbi->ll_debugfs_entry = dir; - - debugfs_create_file("dump_page_cache", 0444, dir, sbi, - &vvp_dump_pgcache_file_ops); - debugfs_create_file("extents_stats", 0644, dir, sbi, - &ll_rw_extents_stats_fops); - debugfs_create_file("extents_stats_per_process", 0644, - dir, sbi, &ll_rw_extents_stats_pp_fops); - debugfs_create_file("offset_stats", 0644, dir, sbi, - &ll_rw_offset_stats_fops); - - /* File operations stats */ - sbi->ll_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES, - LPROCFS_STATS_FLAG_NONE); - if (!sbi->ll_stats) { - err = -ENOMEM; - goto out; - } - /* do counter init */ - for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) { - __u32 type = llite_opcode_table[id].type; - void *ptr = NULL; - - if (type & LPROCFS_TYPE_REGS) - ptr = "regs"; - else if (type & LPROCFS_TYPE_BYTES) - ptr = "bytes"; - else if (type & LPROCFS_TYPE_PAGES) - ptr = "pages"; - lprocfs_counter_init(sbi->ll_stats, - llite_opcode_table[id].opcode, - (type & LPROCFS_CNTR_AVGMINMAX), - llite_opcode_table[id].opname, ptr); - } - - debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry, sbi->ll_stats, - &lprocfs_stats_seq_fops); - - sbi->ll_ra_stats = lprocfs_alloc_stats(ARRAY_SIZE(ra_stat_string), - LPROCFS_STATS_FLAG_NONE); - if (!sbi->ll_ra_stats) { - err = -ENOMEM; - goto out; - } - - for (id = 0; id < ARRAY_SIZE(ra_stat_string); id++) - lprocfs_counter_init(sbi->ll_ra_stats, id, 0, - ra_stat_string[id], "pages"); - - debugfs_create_file("stats", 0644, sbi->ll_debugfs_entry, - sbi->ll_ra_stats, &lprocfs_stats_seq_fops); - - ldebugfs_add_vars(sbi->ll_debugfs_entry, lprocfs_llite_obd_vars, sb); - - sbi->ll_kobj.kset = llite_kset; - init_completion(&sbi->ll_kobj_unregister); - err = kobject_init_and_add(&sbi->ll_kobj, &llite_ktype, NULL, - "%s", name); - if (err) - goto out; - - /* MDC info */ - obd = class_name2obd(mdc); - - err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj, - obd->obd_type->typ_name); - if (err) - goto out; - - /* OSC */ - obd = class_name2obd(osc); - - err = sysfs_create_link(&sbi->ll_kobj, &obd->obd_kobj, - obd->obd_type->typ_name); -out: - if (err) { - debugfs_remove_recursive(sbi->ll_debugfs_entry); - lprocfs_free_stats(&sbi->ll_ra_stats); - lprocfs_free_stats(&sbi->ll_stats); - } - return err; -} - -void ldebugfs_unregister_mountpoint(struct ll_sb_info *sbi) -{ - debugfs_remove_recursive(sbi->ll_debugfs_entry); - kobject_put(&sbi->ll_kobj); - wait_for_completion(&sbi->ll_kobj_unregister); - lprocfs_free_stats(&sbi->ll_ra_stats); - lprocfs_free_stats(&sbi->ll_stats); -} - -#undef MAX_STRING_SIZE - -#define pct(a, b) (b ? a * 100 / b : 0) - -static void ll_display_extents_info(struct ll_rw_extents_info *io_extents, - struct seq_file *seq, int which) -{ - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - unsigned long start, end, r, w; - char *unitp = "KMGTPEZY"; - int i, units = 10; - struct per_process_info *pp_info = &io_extents->pp_extents[which]; - - read_cum = 0; - write_cum = 0; - start = 0; - - for (i = 0; i < LL_HIST_MAX; i++) { - read_tot += pp_info->pp_r_hist.oh_buckets[i]; - write_tot += pp_info->pp_w_hist.oh_buckets[i]; - } - - for (i = 0; i < LL_HIST_MAX; i++) { - r = pp_info->pp_r_hist.oh_buckets[i]; - w = pp_info->pp_w_hist.oh_buckets[i]; - read_cum += r; - write_cum += w; - end = 1 << (i + LL_HIST_START - units); - seq_printf(seq, "%4lu%c - %4lu%c%c: %14lu %4lu %4lu | %14lu %4lu %4lu\n", - start, *unitp, end, *unitp, - (i == LL_HIST_MAX - 1) ? '+' : ' ', - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - start = end; - if (start == 1024) { - start = 1; - units += 10; - unitp++; - } - if (read_cum == read_tot && write_cum == write_tot) - break; - } -} - -static int ll_rw_extents_stats_pp_seq_show(struct seq_file *seq, void *v) -{ - struct timespec64 now; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - int k; - - ktime_get_real_ts64(&now); - - if (!sbi->ll_rw_stats_on) { - seq_printf(seq, "disabled\n" - "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n"); - return 0; - } - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); - seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", - "extents", "calls", "%", "cum%", - "calls", "%", "cum%"); - spin_lock(&sbi->ll_pp_extent_lock); - for (k = 0; k < LL_PROCESS_HIST_MAX; k++) { - if (io_extents->pp_extents[k].pid != 0) { - seq_printf(seq, "\nPID: %d\n", - io_extents->pp_extents[k].pid); - ll_display_extents_info(io_extents, seq, k); - } - } - spin_unlock(&sbi->ll_pp_extent_lock); - return 0; -} - -static ssize_t ll_rw_extents_stats_pp_seq_write(struct file *file, - const char __user *buf, - size_t len, - loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - int i; - int value = 1, rc = 0; - - if (len == 0) - return -EINVAL; - - rc = lprocfs_write_helper(buf, len, &value); - if (rc < 0 && len < 16) { - char kernbuf[16]; - - if (copy_from_user(kernbuf, buf, len)) - return -EFAULT; - kernbuf[len] = 0; - - if (kernbuf[len - 1] == '\n') - kernbuf[len - 1] = 0; - - if (strcmp(kernbuf, "disabled") == 0 || - strcmp(kernbuf, "Disabled") == 0) - value = 0; - } - - if (value == 0) - sbi->ll_rw_stats_on = 0; - else - sbi->ll_rw_stats_on = 1; - - spin_lock(&sbi->ll_pp_extent_lock); - for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { - io_extents->pp_extents[i].pid = 0; - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); - } - spin_unlock(&sbi->ll_pp_extent_lock); - return len; -} - -LPROC_SEQ_FOPS(ll_rw_extents_stats_pp); - -static int ll_rw_extents_stats_seq_show(struct seq_file *seq, void *v) -{ - struct timespec64 now; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - - ktime_get_real_ts64(&now); - - if (!sbi->ll_rw_stats_on) { - seq_printf(seq, "disabled\n" - "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n"); - return 0; - } - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n", - (u64)now.tv_sec, (unsigned long)now.tv_nsec); - - seq_printf(seq, "%15s %19s | %20s\n", " ", "read", "write"); - seq_printf(seq, "%13s %14s %4s %4s | %14s %4s %4s\n", - "extents", "calls", "%", "cum%", - "calls", "%", "cum%"); - spin_lock(&sbi->ll_lock); - ll_display_extents_info(io_extents, seq, LL_PROCESS_HIST_MAX); - spin_unlock(&sbi->ll_lock); - - return 0; -} - -static ssize_t ll_rw_extents_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - int i; - int value = 1, rc = 0; - - if (len == 0) - return -EINVAL; - - rc = lprocfs_write_helper(buf, len, &value); - if (rc < 0 && len < 16) { - char kernbuf[16]; - - if (copy_from_user(kernbuf, buf, len)) - return -EFAULT; - kernbuf[len] = 0; - - if (kernbuf[len - 1] == '\n') - kernbuf[len - 1] = 0; - - if (strcmp(kernbuf, "disabled") == 0 || - strcmp(kernbuf, "Disabled") == 0) - value = 0; - } - - if (value == 0) - sbi->ll_rw_stats_on = 0; - else - sbi->ll_rw_stats_on = 1; - - spin_lock(&sbi->ll_pp_extent_lock); - for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { - io_extents->pp_extents[i].pid = 0; - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[i].pp_w_hist); - } - spin_unlock(&sbi->ll_pp_extent_lock); - - return len; -} - -LPROC_SEQ_FOPS(ll_rw_extents_stats); - -void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, - struct ll_file_data *file, loff_t pos, - size_t count, int rw) -{ - int i, cur = -1; - struct ll_rw_process_info *process; - struct ll_rw_process_info *offset; - int *off_count = &sbi->ll_rw_offset_entry_count; - int *process_count = &sbi->ll_offset_process_count; - struct ll_rw_extents_info *io_extents = &sbi->ll_rw_extents_info; - - if (!sbi->ll_rw_stats_on) - return; - process = sbi->ll_rw_process_info; - offset = sbi->ll_rw_offset_info; - - spin_lock(&sbi->ll_pp_extent_lock); - /* Extent statistics */ - for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { - if (io_extents->pp_extents[i].pid == pid) { - cur = i; - break; - } - } - - if (cur == -1) { - /* new process */ - sbi->ll_extent_process_count = - (sbi->ll_extent_process_count + 1) % LL_PROCESS_HIST_MAX; - cur = sbi->ll_extent_process_count; - io_extents->pp_extents[cur].pid = pid; - lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_r_hist); - lprocfs_oh_clear(&io_extents->pp_extents[cur].pp_w_hist); - } - - for (i = 0; (count >= (1 << LL_HIST_START << i)) && - (i < (LL_HIST_MAX - 1)); i++) - ; - if (rw == 0) { - io_extents->pp_extents[cur].pp_r_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_r_hist.oh_buckets[i]++; - } else { - io_extents->pp_extents[cur].pp_w_hist.oh_buckets[i]++; - io_extents->pp_extents[LL_PROCESS_HIST_MAX].pp_w_hist.oh_buckets[i]++; - } - spin_unlock(&sbi->ll_pp_extent_lock); - - spin_lock(&sbi->ll_process_lock); - /* Offset statistics */ - for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { - if (process[i].rw_pid == pid) { - if (process[i].rw_last_file != file) { - process[i].rw_range_start = pos; - process[i].rw_last_file_pos = pos + count; - process[i].rw_smallest_extent = count; - process[i].rw_largest_extent = count; - process[i].rw_offset = 0; - process[i].rw_last_file = file; - spin_unlock(&sbi->ll_process_lock); - return; - } - if (process[i].rw_last_file_pos != pos) { - *off_count = - (*off_count + 1) % LL_OFFSET_HIST_MAX; - offset[*off_count].rw_op = process[i].rw_op; - offset[*off_count].rw_pid = pid; - offset[*off_count].rw_range_start = - process[i].rw_range_start; - offset[*off_count].rw_range_end = - process[i].rw_last_file_pos; - offset[*off_count].rw_smallest_extent = - process[i].rw_smallest_extent; - offset[*off_count].rw_largest_extent = - process[i].rw_largest_extent; - offset[*off_count].rw_offset = - process[i].rw_offset; - process[i].rw_op = rw; - process[i].rw_range_start = pos; - process[i].rw_smallest_extent = count; - process[i].rw_largest_extent = count; - process[i].rw_offset = pos - - process[i].rw_last_file_pos; - } - if (process[i].rw_smallest_extent > count) - process[i].rw_smallest_extent = count; - if (process[i].rw_largest_extent < count) - process[i].rw_largest_extent = count; - process[i].rw_last_file_pos = pos + count; - spin_unlock(&sbi->ll_process_lock); - return; - } - } - *process_count = (*process_count + 1) % LL_PROCESS_HIST_MAX; - process[*process_count].rw_pid = pid; - process[*process_count].rw_op = rw; - process[*process_count].rw_range_start = pos; - process[*process_count].rw_last_file_pos = pos + count; - process[*process_count].rw_smallest_extent = count; - process[*process_count].rw_largest_extent = count; - process[*process_count].rw_offset = 0; - process[*process_count].rw_last_file = file; - spin_unlock(&sbi->ll_process_lock); -} - -static int ll_rw_offset_stats_seq_show(struct seq_file *seq, void *v) -{ - struct timespec64 now; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_process_info *offset = sbi->ll_rw_offset_info; - struct ll_rw_process_info *process = sbi->ll_rw_process_info; - int i; - - ktime_get_real_ts64(&now); - - if (!sbi->ll_rw_stats_on) { - seq_printf(seq, "disabled\n" - "write anything in this file to activate, then 0 or \"[D/d]isabled\" to deactivate\n"); - return 0; - } - spin_lock(&sbi->ll_process_lock); - - seq_printf(seq, "snapshot_time: %llu.%09lu (secs.usecs)\n", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - seq_printf(seq, "%3s %10s %14s %14s %17s %17s %14s\n", - "R/W", "PID", "RANGE START", "RANGE END", - "SMALLEST EXTENT", "LARGEST EXTENT", "OFFSET"); - /* We stored the discontiguous offsets here; print them first */ - for (i = 0; i < LL_OFFSET_HIST_MAX; i++) { - if (offset[i].rw_pid != 0) - seq_printf(seq, - "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu", - offset[i].rw_op == READ ? 'R' : 'W', - offset[i].rw_pid, - offset[i].rw_range_start, - offset[i].rw_range_end, - (unsigned long)offset[i].rw_smallest_extent, - (unsigned long)offset[i].rw_largest_extent, - offset[i].rw_offset); - } - /* Then print the current offsets for each process */ - for (i = 0; i < LL_PROCESS_HIST_MAX; i++) { - if (process[i].rw_pid != 0) - seq_printf(seq, - "%3c %10d %14Lu %14Lu %17lu %17lu %14Lu", - process[i].rw_op == READ ? 'R' : 'W', - process[i].rw_pid, - process[i].rw_range_start, - process[i].rw_last_file_pos, - (unsigned long)process[i].rw_smallest_extent, - (unsigned long)process[i].rw_largest_extent, - process[i].rw_offset); - } - spin_unlock(&sbi->ll_process_lock); - - return 0; -} - -static ssize_t ll_rw_offset_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct ll_sb_info *sbi = seq->private; - struct ll_rw_process_info *process_info = sbi->ll_rw_process_info; - struct ll_rw_process_info *offset_info = sbi->ll_rw_offset_info; - int value = 1, rc = 0; - - if (len == 0) - return -EINVAL; - - rc = lprocfs_write_helper(buf, len, &value); - - if (rc < 0 && len < 16) { - char kernbuf[16]; - - if (copy_from_user(kernbuf, buf, len)) - return -EFAULT; - kernbuf[len] = 0; - - if (kernbuf[len - 1] == '\n') - kernbuf[len - 1] = 0; - - if (strcmp(kernbuf, "disabled") == 0 || - strcmp(kernbuf, "Disabled") == 0) - value = 0; - } - - if (value == 0) - sbi->ll_rw_stats_on = 0; - else - sbi->ll_rw_stats_on = 1; - - spin_lock(&sbi->ll_process_lock); - sbi->ll_offset_process_count = 0; - sbi->ll_rw_offset_entry_count = 0; - memset(process_info, 0, sizeof(struct ll_rw_process_info) * - LL_PROCESS_HIST_MAX); - memset(offset_info, 0, sizeof(struct ll_rw_process_info) * - LL_OFFSET_HIST_MAX); - spin_unlock(&sbi->ll_process_lock); - - return len; -} - -LPROC_SEQ_FOPS(ll_rw_offset_stats); - -void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->obd_vars = lprocfs_llite_obd_vars; -} diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c deleted file mode 100644 index d5f6d20afe8c..000000000000 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ /dev/null @@ -1,1207 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include "llite_internal.h" - -static int ll_create_it(struct inode *dir, struct dentry *dentry, - struct lookup_intent *it); - -/* called from iget5_locked->find_inode() under inode_hash_lock spinlock */ -static int ll_test_inode(struct inode *inode, void *opaque) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lustre_md *md = opaque; - - if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) { - CERROR("MDS body missing FID\n"); - return 0; - } - - if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1)) - return 0; - - return 1; -} - -static int ll_set_inode(struct inode *inode, void *opaque) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct mdt_body *body = ((struct lustre_md *)opaque)->body; - - if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) { - CERROR("MDS body missing FID\n"); - return -EINVAL; - } - - lli->lli_fid = body->mbo_fid1; - if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) { - CERROR("Can not initialize inode " DFID - " without object type: valid = %#llx\n", - PFID(&lli->lli_fid), body->mbo_valid); - return -EINVAL; - } - - inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT); - if (unlikely(inode->i_mode == 0)) { - CERROR("Invalid inode " DFID " type\n", PFID(&lli->lli_fid)); - return -EINVAL; - } - - ll_lli_init(lli); - - return 0; -} - -/** - * Get an inode by inode number(@hash), which is already instantiated by - * the intent lookup). - */ -struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct lustre_md *md) -{ - struct inode *inode; - int rc = 0; - - LASSERT(hash != 0); - inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) { - rc = ll_read_inode2(inode, md); - if (!rc && S_ISREG(inode->i_mode) && - !ll_i2info(inode)->lli_clob) - rc = cl_file_inode_init(inode, md); - - if (rc) { - /* - * Let's clear directory lsm here, otherwise - * make_bad_inode() will reset the inode mode - * to regular, then ll_clear_inode will not - * be able to clear lsm_md - */ - if (S_ISDIR(inode->i_mode)) - ll_dir_clear_lsm_md(inode); - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); - inode = ERR_PTR(rc); - } else { - unlock_new_inode(inode); - } - } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { - rc = ll_update_inode(inode, md); - CDEBUG(D_VFSTRACE, "got inode: " DFID "(%p): rc = %d\n", - PFID(&md->body->mbo_fid1), inode, rc); - if (rc) { - if (S_ISDIR(inode->i_mode)) - ll_dir_clear_lsm_md(inode); - iput(inode); - inode = ERR_PTR(rc); - } - } - return inode; -} - -static void ll_invalidate_negative_children(struct inode *dir) -{ - struct dentry *dentry, *tmp_subdir; - - spin_lock(&dir->i_lock); - hlist_for_each_entry(dentry, &dir->i_dentry, d_u.d_alias) { - spin_lock(&dentry->d_lock); - if (!list_empty(&dentry->d_subdirs)) { - struct dentry *child; - - list_for_each_entry_safe(child, tmp_subdir, - &dentry->d_subdirs, - d_child) { - if (d_really_is_negative(child)) - d_lustre_invalidate(child, 1); - } - } - spin_unlock(&dentry->d_lock); - } - spin_unlock(&dir->i_lock); -} - -int ll_test_inode_by_fid(struct inode *inode, void *opaque) -{ - return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque); -} - -int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) -{ - struct lustre_handle lockh; - int rc; - - switch (flag) { - case LDLM_CB_BLOCKING: - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); - if (rc < 0) { - CDEBUG(D_INODE, "ldlm_cli_cancel: rc = %d\n", rc); - return rc; - } - break; - case LDLM_CB_CANCELING: { - struct inode *inode = ll_inode_from_resource_lock(lock); - __u64 bits = lock->l_policy_data.l_inodebits.bits; - - /* Inode is set to lock->l_resource->lr_lvb_inode - * for mdc - bug 24555 - */ - LASSERT(!lock->l_ast_data); - - if (!inode) - break; - - /* Invalidate all dentries associated with this inode */ - LASSERT(ldlm_is_canceling(lock)); - - if (!fid_res_name_eq(ll_inode2fid(inode), - &lock->l_resource->lr_name)) { - LDLM_ERROR(lock, - "data mismatch with object " DFID "(%p)", - PFID(ll_inode2fid(inode)), inode); - LBUG(); - } - - if (bits & MDS_INODELOCK_XATTR) { - if (S_ISDIR(inode->i_mode)) - ll_i2info(inode)->lli_def_stripe_offset = -1; - ll_xattr_cache_destroy(inode); - bits &= ~MDS_INODELOCK_XATTR; - } - - /* For OPEN locks we differentiate between lock modes - * LCK_CR, LCK_CW, LCK_PR - bug 22891 - */ - if (bits & MDS_INODELOCK_OPEN) - ll_have_md_lock(inode, &bits, lock->l_req_mode); - - if (bits & MDS_INODELOCK_OPEN) { - fmode_t fmode; - - switch (lock->l_req_mode) { - case LCK_CW: - fmode = FMODE_WRITE; - break; - case LCK_PR: - fmode = FMODE_EXEC; - break; - case LCK_CR: - fmode = FMODE_READ; - break; - default: - LDLM_ERROR(lock, "bad lock mode for OPEN lock"); - LBUG(); - } - - ll_md_real_close(inode, fmode); - } - - if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE | - MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM)) - ll_have_md_lock(inode, &bits, LCK_MINMODE); - - if (bits & MDS_INODELOCK_LAYOUT) { - struct cl_object_conf conf = { - .coc_opc = OBJECT_CONF_INVALIDATE, - .coc_inode = inode, - }; - - rc = ll_layout_conf(inode, &conf); - if (rc < 0) - CDEBUG(D_INODE, "cannot invalidate layout of " - DFID ": rc = %d\n", - PFID(ll_inode2fid(inode)), rc); - } - - if (bits & MDS_INODELOCK_UPDATE) { - struct ll_inode_info *lli = ll_i2info(inode); - - spin_lock(&lli->lli_lock); - LTIME_S(inode->i_mtime) = 0; - LTIME_S(inode->i_atime) = 0; - LTIME_S(inode->i_ctime) = 0; - spin_unlock(&lli->lli_lock); - } - - if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { - struct ll_inode_info *lli = ll_i2info(inode); - - CDEBUG(D_INODE, "invalidating inode " DFID " lli = %p, pfid = " DFID "\n", - PFID(ll_inode2fid(inode)), lli, - PFID(&lli->lli_pfid)); - - truncate_inode_pages(inode->i_mapping, 0); - - if (unlikely(!fid_is_zero(&lli->lli_pfid))) { - struct inode *master_inode = NULL; - unsigned long hash; - - /* - * This is slave inode, since all of the child - * dentry is connected on the master inode, so - * we have to invalidate the negative children - * on master inode - */ - CDEBUG(D_INODE, - "Invalidate s" DFID " m" DFID "\n", - PFID(ll_inode2fid(inode)), - PFID(&lli->lli_pfid)); - - hash = cl_fid_build_ino(&lli->lli_pfid, - ll_need_32bit_api(ll_i2sbi(inode))); - /* - * Do not lookup the inode with ilookup5, - * otherwise it will cause dead lock, - * - * 1. Client1 send chmod req to the MDT0, then - * on MDT0, it enqueues master and all of its - * slaves lock, (mdt_attr_set() -> - * mdt_lock_slaves()), after gets master and - * stripe0 lock, it will send the enqueue req - * (for stripe1) to MDT1, then MDT1 finds the - * lock has been granted to client2. Then MDT1 - * sends blocking ast to client2. - * - * 2. At the same time, client2 tries to unlink - * the striped dir (rm -rf striped_dir), and - * during lookup, it will hold the master inode - * of the striped directory, whose inode state - * is NEW, then tries to revalidate all of its - * slaves, (ll_prep_inode()->ll_iget()-> - * ll_read_inode2()-> ll_update_inode().). And - * it will be blocked on the server side because - * of 1. - * - * 3. Then the client get the blocking_ast req, - * cancel the lock, but being blocked if using - * ->ilookup5()), because master inode state is - * NEW. - */ - master_inode = ilookup5_nowait(inode->i_sb, - hash, - ll_test_inode_by_fid, - (void *)&lli->lli_pfid); - if (master_inode) { - ll_invalidate_negative_children(master_inode); - iput(master_inode); - } - } else { - ll_invalidate_negative_children(inode); - } - } - - if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) && - inode->i_sb->s_root && - !is_root_inode(inode)) - ll_invalidate_aliases(inode); - - iput(inode); - break; - } - default: - LBUG(); - } - - return 0; -} - -__u32 ll_i2suppgid(struct inode *i) -{ - if (in_group_p(i->i_gid)) - return (__u32)from_kgid(&init_user_ns, i->i_gid); - else - return (__u32)(-1); -} - -/* Pack the required supplementary groups into the supplied groups array. - * If we don't need to use the groups from the target inode(s) then we - * instead pack one or more groups from the user's supplementary group - * array in case it might be useful. Not needed if doing an MDS-side upcall. - */ -void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) -{ - LASSERT(i1); - - suppgids[0] = ll_i2suppgid(i1); - - if (i2) - suppgids[1] = ll_i2suppgid(i2); - else - suppgids[1] = -1; -} - -/* - * Try to reuse unhashed or invalidated dentries. - * This is very similar to d_exact_alias(), and any changes in one should be - * considered for inclusion in the other. The differences are that we don't - * need an unhashed alias, and we don't want d_compare to be used for - * comparison. - */ -static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) -{ - struct dentry *alias; - - if (hlist_empty(&inode->i_dentry)) - return NULL; - - spin_lock(&inode->i_lock); - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { - LASSERT(alias != dentry); - /* - * Don't need alias->d_lock here, because aliases with - * d_parent == entry->d_parent are not subject to name or - * parent changes, because the parent inode i_mutex is held. - */ - - if (alias->d_parent != dentry->d_parent) - continue; - if (alias->d_name.hash != dentry->d_name.hash) - continue; - if (alias->d_name.len != dentry->d_name.len || - memcmp(alias->d_name.name, dentry->d_name.name, - dentry->d_name.len) != 0) - continue; - spin_lock(&alias->d_lock); - dget_dlock(alias); - spin_unlock(&alias->d_lock); - spin_unlock(&inode->i_lock); - return alias; - } - spin_unlock(&inode->i_lock); - - return NULL; -} - -/* - * Similar to d_splice_alias(), but lustre treats invalid alias - * similar to DCACHE_DISCONNECTED, and tries to use it anyway. - */ -struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de) -{ - if (inode && !S_ISDIR(inode->i_mode)) { - struct dentry *new = ll_find_alias(inode, de); - - if (new) { - d_move(new, de); - iput(inode); - CDEBUG(D_DENTRY, - "Reuse dentry %p inode %p refc %d flags %#x\n", - new, d_inode(new), d_count(new), new->d_flags); - return new; - } - d_add(de, inode); - } else { - struct dentry *new = d_splice_alias(inode, de); - - if (IS_ERR(new)) - CDEBUG(D_DENTRY, - "splice inode %p as %pd gives error %lu\n", - inode, de, PTR_ERR(new)); - if (new) - de = new; - } - if (!IS_ERR(de)) - CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n", - de, d_inode(de), d_count(de), de->d_flags); - return de; -} - -static int ll_lookup_it_finish(struct ptlrpc_request *request, - struct lookup_intent *it, - struct inode *parent, struct dentry **de) -{ - struct inode *inode = NULL; - __u64 bits = 0; - int rc = 0; - struct dentry *alias; - - /* NB 1 request reference will be taken away by ll_intent_lock() - * when I return - */ - CDEBUG(D_DENTRY, "it %p it_disposition %x\n", it, - it->it_disposition); - if (!it_disposition(it, DISP_LOOKUP_NEG)) { - rc = ll_prep_inode(&inode, request, (*de)->d_sb, it); - if (rc) - return rc; - - ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits); - - /* We used to query real size from OSTs here, but actually - * this is not needed. For stat() calls size would be updated - * from subsequent do_revalidate()->ll_inode_revalidate_it() in - * 2.4 and - * vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6 - * Everybody else who needs correct file size would call - * ll_glimpse_size or some equivalent themselves anyway. - * Also see bug 7198. - */ - } - - alias = ll_splice_alias(inode, *de); - if (IS_ERR(alias)) { - rc = PTR_ERR(alias); - goto out; - } - *de = alias; - - if (!it_disposition(it, DISP_LOOKUP_NEG)) { - /* We have the "lookup" lock, so unhide dentry */ - if (bits & MDS_INODELOCK_LOOKUP) - d_lustre_revalidate(*de); - } else if (!it_disposition(it, DISP_OPEN_CREATE)) { - /* If file created on server, don't depend on parent UPDATE - * lock to unhide it. It is left hidden and next lookup can - * find it in ll_splice_alias. - */ - /* Check that parent has UPDATE lock. */ - struct lookup_intent parent_it = { - .it_op = IT_GETATTR, - .it_lock_handle = 0 }; - struct lu_fid fid = ll_i2info(parent)->lli_fid; - - /* If it is striped directory, get the real stripe parent */ - if (unlikely(ll_i2info(parent)->lli_lsm_md)) { - rc = md_get_fid_from_lsm(ll_i2mdexp(parent), - ll_i2info(parent)->lli_lsm_md, - (*de)->d_name.name, - (*de)->d_name.len, &fid); - if (rc) - return rc; - } - - if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid, - NULL)) { - d_lustre_revalidate(*de); - ll_intent_release(&parent_it); - } - } - -out: - if (rc != 0 && it->it_op & IT_OPEN) - ll_open_cleanup((*de)->d_sb, request); - - return rc; -} - -static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, - struct lookup_intent *it) -{ - struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; - struct dentry *save = dentry, *retval; - struct ptlrpc_request *req = NULL; - struct md_op_data *op_data = NULL; - struct inode *inode; - __u32 opc; - int rc; - - if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) - return ERR_PTR(-ENAMETOOLONG); - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),intent=%s\n", - dentry, PFID(ll_inode2fid(parent)), parent, LL_IT2STR(it)); - - if (d_mountpoint(dentry)) - CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); - - if (!it || it->it_op == IT_GETXATTR) - it = &lookup_it; - - if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) { - rc = ll_statahead(parent, &dentry, 0); - if (rc == 1) { - if (dentry == save) - retval = NULL; - else - retval = dentry; - goto out; - } - } - - if (it->it_op & IT_OPEN && it->it_flags & FMODE_WRITE && sb_rdonly(dentry->d_sb)) - return ERR_PTR(-EROFS); - - if (it->it_op & IT_CREAT) - opc = LUSTRE_OPC_CREATE; - else - opc = LUSTRE_OPC_ANY; - - op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name, - dentry->d_name.len, 0, opc, NULL); - if (IS_ERR(op_data)) - return (void *)op_data; - - /* enforce umask if acl disabled or MDS doesn't support umask */ - if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent))) - it->it_create_mode &= ~current_umask(); - - rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req, - &ll_md_blocking_ast, 0); - /* - * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the - * client does not know which suppgid should be sent to the MDS, or - * some other(s) changed the target file's GID after this RPC sent - * to the MDS with the suppgid as the original GID, then we should - * try again with right suppgid. - */ - if (rc == -EACCES && it->it_op & IT_OPEN && - it_disposition(it, DISP_OPEN_DENY)) { - struct mdt_body *body; - - LASSERT(req); - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (op_data->op_suppgids[0] == body->mbo_gid || - op_data->op_suppgids[1] == body->mbo_gid || - !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) { - retval = ERR_PTR(-EACCES); - goto out; - } - - fid_zero(&op_data->op_fid2); - op_data->op_suppgids[1] = body->mbo_gid; - ptlrpc_req_finished(req); - req = NULL; - ll_intent_release(it); - rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req, - ll_md_blocking_ast, 0); - } - - if (rc < 0) { - retval = ERR_PTR(rc); - goto out; - } - - rc = ll_lookup_it_finish(req, it, parent, &dentry); - if (rc != 0) { - ll_intent_release(it); - retval = ERR_PTR(rc); - goto out; - } - - inode = d_inode(dentry); - if ((it->it_op & IT_OPEN) && inode && - !S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode)) { - ll_release_openhandle(inode, it); - } - ll_lookup_finish_locks(it, inode); - - if (dentry == save) - retval = NULL; - else - retval = dentry; -out: - if (op_data && !IS_ERR(op_data)) - ll_finish_md_op_data(op_data); - - ptlrpc_req_finished(req); - return retval; -} - -static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, - unsigned int flags) -{ - struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; - struct dentry *de; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),flags=%u\n", - dentry, PFID(ll_inode2fid(parent)), parent, flags); - - /* Optimize away (CREATE && !OPEN). Let .create handle the race. - * but only if we have write permissions there, otherwise we need - * to proceed with lookup. LU-4185 - */ - if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) && - (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0)) - return NULL; - - if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE)) - itp = NULL; - else - itp = ⁢ - de = ll_lookup_it(parent, dentry, itp); - - if (itp) - ll_intent_release(itp); - - return de; -} - -/* - * For cached negative dentry and new dentry, handle lookup/create/open - * together. - */ -static int ll_atomic_open(struct inode *dir, struct dentry *dentry, - struct file *file, unsigned int open_flags, - umode_t mode, int *opened) -{ - struct lookup_intent *it; - struct dentry *de; - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),file %p,open_flags %x,mode %x opened %d\n", - dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, - *opened); - - /* Only negative dentries enter here */ - LASSERT(!d_inode(dentry)); - - if (!d_in_lookup(dentry)) { - /* A valid negative dentry that just passed revalidation, - * there's little point to try and open it server-side, - * even though there's a minuscle chance it might succeed. - * Either way it's a valid race to just return -ENOENT here. - */ - if (!(open_flags & O_CREAT)) - return -ENOENT; - - /* Otherwise we just unhash it to be rehashed afresh via - * lookup if necessary - */ - d_drop(dentry); - } - - it = kzalloc(sizeof(*it), GFP_NOFS); - if (!it) - return -ENOMEM; - - it->it_op = IT_OPEN; - if (open_flags & O_CREAT) - it->it_op |= IT_CREAT; - it->it_create_mode = (mode & S_IALLUGO) | S_IFREG; - it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags); - it->it_flags &= ~MDS_OPEN_FL_INTERNAL; - - /* Dentry added to dcache tree in ll_lookup_it */ - de = ll_lookup_it(dir, dentry, it); - if (IS_ERR(de)) - rc = PTR_ERR(de); - else if (de) - dentry = de; - - if (!rc) { - if (it_disposition(it, DISP_OPEN_CREATE)) { - /* Dentry instantiated in ll_create_it. */ - rc = ll_create_it(dir, dentry, it); - if (rc) { - /* We dget in ll_splice_alias. */ - if (de) - dput(de); - goto out_release; - } - - *opened |= FILE_CREATED; - } - if (d_really_is_positive(dentry) && - it_disposition(it, DISP_OPEN_OPEN)) { - /* Open dentry. */ - if (S_ISFIFO(d_inode(dentry)->i_mode)) { - /* We cannot call open here as it might - * deadlock. This case is unreachable in - * practice because of OBD_CONNECT_NODEVOH. - */ - rc = finish_no_open(file, de); - } else { - file->private_data = it; - rc = finish_open(file, dentry, NULL, opened); - /* We dget in ll_splice_alias. finish_open takes - * care of dget for fd open. - */ - if (de) - dput(de); - } - } else { - rc = finish_no_open(file, de); - } - } - -out_release: - ll_intent_release(it); - kfree(it); - - return rc; -} - -/* We depend on "mode" being set with the proper file type/umask by now */ -static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) -{ - struct inode *inode = NULL; - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dir); - int rc; - - LASSERT(it && it->it_disposition); - - LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); - request = it->it_request; - it_clear_disposition(it, DISP_ENQ_CREATE_REF); - rc = ll_prep_inode(&inode, request, dir->i_sb, it); - if (rc) { - inode = ERR_PTR(rc); - goto out; - } - - LASSERT(hlist_empty(&inode->i_dentry)); - - /* We asked for a lock on the directory, but were granted a - * lock on the inode. Since we finally have an inode pointer, - * stuff it in the lock. - */ - CDEBUG(D_DLMTRACE, "setting l_ast_data to inode " DFID "(%p)\n", - PFID(ll_inode2fid(dir)), inode); - ll_set_lock_data(sbi->ll_md_exp, inode, it, NULL); - out: - ptlrpc_req_finished(request); - return inode; -} - -/* - * By the time this is called, we already have created the directory cache - * entry for the new file, but it is so far negative - it has no inode. - * - * We defer creating the OBD object(s) until open, to keep the intent and - * non-intent code paths similar, and also because we do not have the MDS - * inode number before calling ll_create_node() (which is needed for LOV), - * so we would need to do yet another RPC to the MDS to store the LOV EA - * data on the MDS. If needed, we would pass the PACKED lmm as data and - * lmm_size in datalen (the MDS still has code which will handle that). - * - * If the create succeeds, we fill in the inode information - * with d_instantiate(). - */ -static int ll_create_it(struct inode *dir, struct dentry *dentry, - struct lookup_intent *it) -{ - struct inode *inode; - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p), intent=%s\n", - dentry, PFID(ll_inode2fid(dir)), dir, LL_IT2STR(it)); - - rc = it_open_error(DISP_OPEN_CREATE, it); - if (rc) - return rc; - - inode = ll_create_node(dir, it); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - d_instantiate(dentry, inode); - - return ll_init_security(dentry, inode, dir); -} - -void ll_update_times(struct ptlrpc_request *request, struct inode *inode) -{ - struct mdt_body *body = req_capsule_server_get(&request->rq_pill, - &RMF_MDT_BODY); - - LASSERT(body); - if (body->mbo_valid & OBD_MD_FLMTIME && - body->mbo_mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting fid " DFID " mtime from %lu to %llu\n", - PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime), - body->mbo_mtime); - LTIME_S(inode->i_mtime) = body->mbo_mtime; - } - if (body->mbo_valid & OBD_MD_FLCTIME && - body->mbo_ctime > LTIME_S(inode->i_ctime)) - LTIME_S(inode->i_ctime) = body->mbo_ctime; -} - -static int ll_new_node(struct inode *dir, struct dentry *dentry, - const char *tgt, umode_t mode, int rdev, - __u32 opc) -{ - struct ptlrpc_request *request = NULL; - struct md_op_data *op_data; - struct inode *inode = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dir); - int tgt_len = 0; - int err; - - if (unlikely(tgt)) - tgt_len = strlen(tgt) + 1; -again: - op_data = ll_prep_md_op_data(NULL, dir, NULL, - dentry->d_name.name, - dentry->d_name.len, - 0, opc, NULL); - if (IS_ERR(op_data)) { - err = PTR_ERR(op_data); - goto err_exit; - } - - err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode, - from_kuid(&init_user_ns, current_fsuid()), - from_kgid(&init_user_ns, current_fsgid()), - current_cap(), rdev, &request); - ll_finish_md_op_data(op_data); - if (err < 0 && err != -EREMOTE) - goto err_exit; - - /* - * If the client doesn't know where to create a subdirectory (or - * in case of a race that sends the RPC to the wrong MDS), the - * MDS will return -EREMOTE and the client will fetch the layout - * of the directory, then create the directory on the right MDT. - */ - if (unlikely(err == -EREMOTE)) { - struct ll_inode_info *lli = ll_i2info(dir); - struct lmv_user_md *lum; - int lumsize, err2; - - ptlrpc_req_finished(request); - request = NULL; - - err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request, - OBD_MD_DEFAULT_MEA); - if (!err2) { - /* Update stripe_offset and retry */ - lli->lli_def_stripe_offset = lum->lum_stripe_offset; - } else if (err2 == -ENODATA && - lli->lli_def_stripe_offset != -1) { - /* - * If there are no default stripe EA on the MDT, but the - * client has default stripe, then it probably means - * default stripe EA has just been deleted. - */ - lli->lli_def_stripe_offset = -1; - } else { - goto err_exit; - } - - ptlrpc_req_finished(request); - request = NULL; - goto again; - } - - ll_update_times(request, dir); - - err = ll_prep_inode(&inode, request, dir->i_sb, NULL); - if (err) - goto err_exit; - - d_instantiate(dentry, inode); - - err = ll_init_security(dentry, inode, dir); -err_exit: - if (request) - ptlrpc_req_finished(request); - - return err; -} - -static int ll_mknod(struct inode *dir, struct dentry *dchild, - umode_t mode, dev_t rdev) -{ - int err; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p) mode %o dev %x\n", - dchild, PFID(ll_inode2fid(dir)), dir, mode, - old_encode_dev(rdev)); - - if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) - mode &= ~current_umask(); - - switch (mode & S_IFMT) { - case 0: - mode |= S_IFREG; - /* for mode = 0 case */ - /* fall through */ - case S_IFREG: - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: - err = ll_new_node(dir, dchild, NULL, mode, - old_encode_dev(rdev), - LUSTRE_OPC_MKNOD); - break; - case S_IFDIR: - err = -EPERM; - break; - default: - err = -EINVAL; - } - - if (!err) - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, 1); - - return err; -} - -/* - * Plain create. Intent create is handled in atomic_open. - */ -static int ll_create_nd(struct inode *dir, struct dentry *dentry, - umode_t mode, bool want_excl) -{ - int rc; - - CDEBUG(D_VFSTRACE, - "VFS Op:name=%pd, dir=" DFID "(%p), flags=%u, excl=%d\n", - dentry, PFID(ll_inode2fid(dir)), dir, mode, want_excl); - - rc = ll_mknod(dir, dentry, mode, 0); - - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, 1); - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, unhashed %d\n", - dentry, d_unhashed(dentry)); - - return rc; -} - -static int ll_unlink(struct inode *dir, struct dentry *dchild) -{ - struct ptlrpc_request *request = NULL; - struct md_op_data *op_data; - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd,dir=%lu/%u(%p)\n", - dchild, dir->i_ino, dir->i_generation, dir); - - op_data = ll_prep_md_op_data(NULL, dir, NULL, - dchild->d_name.name, - dchild->d_name.len, - 0, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - if (dchild->d_inode) - op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); - - op_data->op_fid2 = op_data->op_fid3; - rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); - ll_finish_md_op_data(op_data); - if (rc) - goto out; - - ll_update_times(request, dir); - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1); - - out: - ptlrpc_req_finished(request); - return rc; -} - -static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - int err; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir" DFID "(%p)\n", - dentry, PFID(ll_inode2fid(dir)), dir); - - if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir))) - mode &= ~current_umask(); - mode = (mode & (0777 | S_ISVTX)) | S_IFDIR; - - err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR); - if (!err) - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1); - - return err; -} - -static int ll_rmdir(struct inode *dir, struct dentry *dchild) -{ - struct ptlrpc_request *request = NULL; - struct md_op_data *op_data; - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p)\n", - dchild, PFID(ll_inode2fid(dir)), dir); - - op_data = ll_prep_md_op_data(NULL, dir, NULL, - dchild->d_name.name, - dchild->d_name.len, - S_IFDIR, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - if (dchild->d_inode) - op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); - - op_data->op_fid2 = op_data->op_fid3; - rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); - ll_finish_md_op_data(op_data); - if (rc == 0) { - ll_update_times(request, dir); - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_RMDIR, 1); - } - - ptlrpc_req_finished(request); - return rc; -} - -static int ll_symlink(struct inode *dir, struct dentry *dentry, - const char *oldname) -{ - int err; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir=" DFID "(%p),target=%.*s\n", - dentry, PFID(ll_inode2fid(dir)), dir, 3000, oldname); - - err = ll_new_node(dir, dentry, oldname, S_IFLNK | 0777, - 0, LUSTRE_OPC_SYMLINK); - - if (!err) - ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, 1); - - return err; -} - -static int ll_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *new_dentry) -{ - struct inode *src = d_inode(old_dentry); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ptlrpc_request *request = NULL; - struct md_op_data *op_data; - int err; - - CDEBUG(D_VFSTRACE, - "VFS Op: inode=" DFID "(%p), dir=" DFID "(%p), target=%pd\n", - PFID(ll_inode2fid(src)), src, PFID(ll_inode2fid(dir)), dir, - new_dentry); - - op_data = ll_prep_md_op_data(NULL, src, dir, new_dentry->d_name.name, - new_dentry->d_name.len, - 0, LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - err = md_link(sbi->ll_md_exp, op_data, &request); - ll_finish_md_op_data(op_data); - if (err) - goto out; - - ll_update_times(request, dir); - ll_stats_ops_tally(sbi, LPROC_LL_LINK, 1); -out: - ptlrpc_req_finished(request); - return err; -} - -static int ll_rename(struct inode *src, struct dentry *src_dchild, - struct inode *tgt, struct dentry *tgt_dchild, - unsigned int flags) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(src); - struct md_op_data *op_data; - int err; - - if (flags) - return -EINVAL; - - CDEBUG(D_VFSTRACE, - "VFS Op:oldname=%pd, src_dir=" DFID "(%p), newname=%pd, tgt_dir=" DFID "(%p)\n", - src_dchild, PFID(ll_inode2fid(src)), src, - tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); - - op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - if (src_dchild->d_inode) - op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode); - if (tgt_dchild->d_inode) - op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode); - - err = md_rename(sbi->ll_md_exp, op_data, - src_dchild->d_name.name, - src_dchild->d_name.len, - tgt_dchild->d_name.name, - tgt_dchild->d_name.len, &request); - ll_finish_md_op_data(op_data); - if (!err) { - ll_update_times(request, src); - ll_update_times(request, tgt); - ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1); - } - - ptlrpc_req_finished(request); - if (!err) - d_move(src_dchild, tgt_dchild); - return err; -} - -const struct inode_operations ll_dir_inode_operations = { - .mknod = ll_mknod, - .atomic_open = ll_atomic_open, - .lookup = ll_lookup_nd, - .create = ll_create_nd, - /* We need all these non-raw things for NFSD, to not patch it. */ - .unlink = ll_unlink, - .mkdir = ll_mkdir, - .rmdir = ll_rmdir, - .symlink = ll_symlink, - .link = ll_link, - .rename = ll_rename, - .setattr = ll_setattr, - .getattr = ll_getattr, - .permission = ll_inode_permission, - .listxattr = ll_listxattr, - .get_acl = ll_get_acl, -}; - -const struct inode_operations ll_special_inode_operations = { - .setattr = ll_setattr, - .getattr = ll_getattr, - .permission = ll_inode_permission, - .listxattr = ll_listxattr, - .get_acl = ll_get_acl, -}; diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c deleted file mode 100644 index 008a8874118d..000000000000 --- a/drivers/staging/lustre/lustre/llite/range_lock.c +++ /dev/null @@ -1,241 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Range lock is used to allow multiple threads writing a single shared - * file given each thread is writing to a non-overlapping portion of the - * file. - * - * Refer to the possible upstream kernel version of range lock by - * Jan Kara : https://lkml.org/lkml/2013/1/31/480 - * - * This file could later replaced by the upstream kernel version. - */ -/* - * Author: Prakash Surya - * Author: Bobi Jam - */ -#include "range_lock.h" -#include -#include - -/** - * Initialize a range lock tree - * - * \param tree [in] an empty range lock tree - * - * Pre: Caller should have allocated the range lock tree. - * Post: The range lock tree is ready to function. - */ -void range_lock_tree_init(struct range_lock_tree *tree) -{ - tree->rlt_root = NULL; - tree->rlt_sequence = 0; - spin_lock_init(&tree->rlt_lock); -} - -/** - * Initialize a range lock node - * - * \param lock [in] an empty range lock node - * \param start [in] start of the covering region - * \param end [in] end of the covering region - * - * Pre: Caller should have allocated the range lock node. - * Post: The range lock node is meant to cover [start, end] region - */ -int range_lock_init(struct range_lock *lock, __u64 start, __u64 end) -{ - int rc; - - memset(&lock->rl_node, 0, sizeof(lock->rl_node)); - if (end != LUSTRE_EOF) - end >>= PAGE_SHIFT; - rc = interval_set(&lock->rl_node, start >> PAGE_SHIFT, end); - if (rc) - return rc; - - INIT_LIST_HEAD(&lock->rl_next_lock); - lock->rl_task = NULL; - lock->rl_lock_count = 0; - lock->rl_blocking_ranges = 0; - lock->rl_sequence = 0; - return rc; -} - -static inline struct range_lock *next_lock(struct range_lock *lock) -{ - return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock); -} - -/** - * Helper function of range_unlock() - * - * \param node [in] a range lock found overlapped during interval node - * search - * \param arg [in] the range lock to be tested - * - * \retval INTERVAL_ITER_CONT indicate to continue the search for next - * overlapping range node - * \retval INTERVAL_ITER_STOP indicate to stop the search - */ -static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg) -{ - struct range_lock *lock = arg; - struct range_lock *overlap = node2rangelock(node); - struct range_lock *iter; - - list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) { - if (iter->rl_sequence > lock->rl_sequence) { - --iter->rl_blocking_ranges; - LASSERT(iter->rl_blocking_ranges > 0); - } - } - if (overlap->rl_sequence > lock->rl_sequence) { - --overlap->rl_blocking_ranges; - if (overlap->rl_blocking_ranges == 0) - wake_up_process(overlap->rl_task); - } - return INTERVAL_ITER_CONT; -} - -/** - * Unlock a range lock, wake up locks blocked by this lock. - * - * \param tree [in] range lock tree - * \param lock [in] range lock to be deleted - * - * If this lock has been granted, relase it; if not, just delete it from - * the tree or the same region lock list. Wake up those locks only blocked - * by this lock through range_unlock_cb(). - */ -void range_unlock(struct range_lock_tree *tree, struct range_lock *lock) -{ - spin_lock(&tree->rlt_lock); - if (!list_empty(&lock->rl_next_lock)) { - struct range_lock *next; - - if (interval_is_intree(&lock->rl_node)) { /* first lock */ - /* Insert the next same range lock into the tree */ - next = next_lock(lock); - next->rl_lock_count = lock->rl_lock_count - 1; - interval_erase(&lock->rl_node, &tree->rlt_root); - interval_insert(&next->rl_node, &tree->rlt_root); - } else { - /* find the first lock in tree */ - list_for_each_entry(next, &lock->rl_next_lock, - rl_next_lock) { - if (!interval_is_intree(&next->rl_node)) - continue; - - LASSERT(next->rl_lock_count > 0); - next->rl_lock_count--; - break; - } - } - list_del_init(&lock->rl_next_lock); - } else { - LASSERT(interval_is_intree(&lock->rl_node)); - interval_erase(&lock->rl_node, &tree->rlt_root); - } - - interval_search(tree->rlt_root, &lock->rl_node.in_extent, - range_unlock_cb, lock); - spin_unlock(&tree->rlt_lock); -} - -/** - * Helper function of range_lock() - * - * \param node [in] a range lock found overlapped during interval node - * search - * \param arg [in] the range lock to be tested - * - * \retval INTERVAL_ITER_CONT indicate to continue the search for next - * overlapping range node - * \retval INTERVAL_ITER_STOP indicate to stop the search - */ -static enum interval_iter range_lock_cb(struct interval_node *node, void *arg) -{ - struct range_lock *lock = arg; - struct range_lock *overlap = node2rangelock(node); - - lock->rl_blocking_ranges += overlap->rl_lock_count + 1; - return INTERVAL_ITER_CONT; -} - -/** - * Lock a region - * - * \param tree [in] range lock tree - * \param lock [in] range lock node containing the region span - * - * \retval 0 get the range lock - * \retval <0 error code while not getting the range lock - * - * If there exists overlapping range lock, the new lock will wait and - * retry, if later it find that it is not the chosen one to wake up, - * it wait again. - */ -int range_lock(struct range_lock_tree *tree, struct range_lock *lock) -{ - struct interval_node *node; - int rc = 0; - - spin_lock(&tree->rlt_lock); - /* - * We need to check for all conflicting intervals - * already in the tree. - */ - interval_search(tree->rlt_root, &lock->rl_node.in_extent, - range_lock_cb, lock); - /* - * Insert to the tree if I am unique, otherwise I've been linked to - * the rl_next_lock of another lock which has the same range as mine - * in range_lock_cb(). - */ - node = interval_insert(&lock->rl_node, &tree->rlt_root); - if (node) { - struct range_lock *tmp = node2rangelock(node); - - list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock); - tmp->rl_lock_count++; - } - lock->rl_sequence = ++tree->rlt_sequence; - - while (lock->rl_blocking_ranges > 0) { - lock->rl_task = current; - __set_current_state(TASK_INTERRUPTIBLE); - spin_unlock(&tree->rlt_lock); - schedule(); - - if (signal_pending(current)) { - range_unlock(tree, lock); - rc = -EINTR; - goto out; - } - spin_lock(&tree->rlt_lock); - } - spin_unlock(&tree->rlt_lock); -out: - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h deleted file mode 100644 index 9ebac09160f2..000000000000 --- a/drivers/staging/lustre/lustre/llite/range_lock.h +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Range lock is used to allow multiple threads writing a single shared - * file given each thread is writing to a non-overlapping portion of the - * file. - * - * Refer to the possible upstream kernel version of range lock by - * Jan Kara : https://lkml.org/lkml/2013/1/31/480 - * - * This file could later replaced by the upstream kernel version. - */ -/* - * Author: Prakash Surya - * Author: Bobi Jam - */ -#ifndef _RANGE_LOCK_H -#define _RANGE_LOCK_H - -#include -#include - -struct range_lock { - struct interval_node rl_node; - /** - * Process to enqueue this lock. - */ - struct task_struct *rl_task; - /** - * List of locks with the same range. - */ - struct list_head rl_next_lock; - /** - * Number of locks in the list rl_next_lock - */ - unsigned int rl_lock_count; - /** - * Number of ranges which are blocking acquisition of the lock - */ - unsigned int rl_blocking_ranges; - /** - * Sequence number of range lock. This number is used to get to know - * the order the locks are queued; this is required for range_cancel(). - */ - __u64 rl_sequence; -}; - -static inline struct range_lock *node2rangelock(const struct interval_node *n) -{ - return container_of(n, struct range_lock, rl_node); -} - -struct range_lock_tree { - struct interval_node *rlt_root; - spinlock_t rlt_lock; /* protect range lock tree */ - __u64 rlt_sequence; -}; - -void range_lock_tree_init(struct range_lock_tree *tree); -int range_lock_init(struct range_lock *lock, __u64 start, __u64 end); -int range_lock(struct range_lock_tree *tree, struct range_lock *lock); -void range_unlock(struct range_lock_tree *tree, struct range_lock *lock); -#endif diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c deleted file mode 100644 index 3e008ce7275d..000000000000 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ /dev/null @@ -1,1214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/llite/rw.c - * - * Lustre Lite I/O page cache routines shared by different kernel revs - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -/* current_is_kswapd() */ -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include "llite_internal.h" - -static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which); - -/** - * Get readahead pages from the filesystem readahead pool of the client for a - * thread. - * - * /param sbi superblock for filesystem readahead state ll_ra_info - * /param ria per-thread readahead state - * /param pages number of pages requested for readahead for the thread. - * - * WARNING: This algorithm is used to reduce contention on sbi->ll_lock. - * It should work well if the ra_max_pages is much greater than the single - * file's read-ahead window, and not too many threads contending for - * these readahead pages. - * - * TODO: There may be a 'global sync problem' if many threads are trying - * to get an ra budget that is larger than the remaining readahead pages - * and reach here at exactly the same time. They will compute /a ret to - * consume the remaining pages, but will fail at atomic_add_return() and - * get a zero ra window, although there is still ra space remaining. - Jay - */ -static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, - struct ra_io_arg *ria, - unsigned long pages, unsigned long min) -{ - struct ll_ra_info *ra = &sbi->ll_ra_info; - long ret; - - /* If read-ahead pages left are less than 1M, do not do read-ahead, - * otherwise it will form small read RPC(< 1M), which hurt server - * performance a lot. - */ - ret = min(ra->ra_max_pages - atomic_read(&ra->ra_cur_pages), pages); - if (ret < 0 || ret < min_t(long, PTLRPC_MAX_BRW_PAGES, pages)) { - ret = 0; - goto out; - } - - if (atomic_add_return(ret, &ra->ra_cur_pages) > ra->ra_max_pages) { - atomic_sub(ret, &ra->ra_cur_pages); - ret = 0; - } - -out: - if (ret < min) { - /* override ra limit for maximum performance */ - atomic_add(min - ret, &ra->ra_cur_pages); - ret = min; - } - return ret; -} - -void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) -{ - struct ll_ra_info *ra = &sbi->ll_ra_info; - - atomic_sub(len, &ra->ra_cur_pages); -} - -static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which) -{ - LASSERTF(which < _NR_RA_STAT, "which: %u\n", which); - lprocfs_counter_incr(sbi->ll_ra_stats, which); -} - -void ll_ra_stats_inc(struct inode *inode, enum ra_stat which) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - - ll_ra_stats_inc_sbi(sbi, which); -} - -#define RAS_CDEBUG(ras) \ - CDEBUG(D_READA, \ - "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu rpc %lu " \ - "r %lu ri %lu csr %lu sf %lu sp %lu sl %lu\n", \ - ras->ras_last_readpage, ras->ras_consecutive_requests, \ - ras->ras_consecutive_pages, ras->ras_window_start, \ - ras->ras_window_len, ras->ras_next_readahead, \ - ras->ras_rpc_size, \ - ras->ras_requests, ras->ras_request_index, \ - ras->ras_consecutive_stride_requests, ras->ras_stride_offset, \ - ras->ras_stride_pages, ras->ras_stride_length) - -static int index_in_window(unsigned long index, unsigned long point, - unsigned long before, unsigned long after) -{ - unsigned long start = point - before, end = point + after; - - if (start > point) - start = 0; - if (end < point) - end = ~0; - - return start <= index && index <= end; -} - -void ll_ras_enter(struct file *f) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(f); - struct ll_readahead_state *ras = &fd->fd_ras; - - spin_lock(&ras->ras_lock); - ras->ras_requests++; - ras->ras_request_index = 0; - ras->ras_consecutive_requests++; - spin_unlock(&ras->ras_lock); -} - -/** - * Initiates read-ahead of a page with given index. - * - * \retval +ve: page was already uptodate so it will be skipped - * from being added; - * \retval -ve: page wasn't added to \a queue for error; - * \retval 0: page was added into \a queue for read ahead. - */ -static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, pgoff_t index) -{ - enum ra_stat which = _NR_RA_STAT; /* keep gcc happy */ - struct cl_object *clob = io->ci_obj; - struct inode *inode = vvp_object_inode(clob); - const char *msg = NULL; - struct cl_page *page; - struct vvp_page *vpg; - struct page *vmpage; - int rc = 0; - - vmpage = grab_cache_page_nowait(inode->i_mapping, index); - if (!vmpage) { - which = RA_STAT_FAILED_GRAB_PAGE; - msg = "g_c_p_n failed"; - rc = -EBUSY; - goto out; - } - - /* Check if vmpage was truncated or reclaimed */ - if (vmpage->mapping != inode->i_mapping) { - which = RA_STAT_WRONG_GRAB_PAGE; - msg = "g_c_p_n returned invalid page"; - rc = -EBUSY; - goto out; - } - - page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); - if (IS_ERR(page)) { - which = RA_STAT_FAILED_GRAB_PAGE; - msg = "cl_page_find failed"; - rc = PTR_ERR(page); - goto out; - } - - lu_ref_add(&page->cp_reference, "ra", current); - cl_page_assume(env, io, page); - vpg = cl2vvp_page(cl_object_page_slice(clob, page)); - if (!vpg->vpg_defer_uptodate && !PageUptodate(vmpage)) { - vpg->vpg_defer_uptodate = 1; - vpg->vpg_ra_used = 0; - cl_page_list_add(queue, page); - } else { - /* skip completed pages */ - cl_page_unassume(env, io, page); - /* This page is already uptodate, returning a positive number - * to tell the callers about this - */ - rc = 1; - } - - lu_ref_del(&page->cp_reference, "ra", current); - cl_page_put(env, page); -out: - if (vmpage) { - if (rc) - unlock_page(vmpage); - put_page(vmpage); - } - if (msg) { - ll_ra_stats_inc(inode, which); - CDEBUG(D_READA, "%s\n", msg); - } - return rc; -} - -#define RIA_DEBUG(ria) \ - CDEBUG(D_READA, "rs %lu re %lu ro %lu rl %lu rp %lu\n", \ - ria->ria_start, ria->ria_end, ria->ria_stoff, ria->ria_length,\ - ria->ria_pages) - -static inline int stride_io_mode(struct ll_readahead_state *ras) -{ - return ras->ras_consecutive_stride_requests > 1; -} - -/* The function calculates how much pages will be read in - * [off, off + length], in such stride IO area, - * stride_offset = st_off, stride_length = st_len, - * stride_pages = st_pgs - * - * |------------------|*****|------------------|*****|------------|*****|.... - * st_off - * |--- st_pgs ---| - * |----- st_len -----| - * - * How many pages it should read in such pattern - * |-------------------------------------------------------------| - * off - * |<------ length ------->| - * - * = |<----->| + |-------------------------------------| + |---| - * start_left st_pgs * i end_left - */ -static unsigned long -stride_pg_count(pgoff_t st_off, unsigned long st_len, unsigned long st_pgs, - unsigned long off, unsigned long length) -{ - __u64 start = off > st_off ? off - st_off : 0; - __u64 end = off + length > st_off ? off + length - st_off : 0; - unsigned long start_left = 0; - unsigned long end_left = 0; - unsigned long pg_count; - - if (st_len == 0 || length == 0 || end == 0) - return length; - - start_left = do_div(start, st_len); - if (start_left < st_pgs) - start_left = st_pgs - start_left; - else - start_left = 0; - - end_left = do_div(end, st_len); - if (end_left > st_pgs) - end_left = st_pgs; - - CDEBUG(D_READA, "start %llu, end %llu start_left %lu end_left %lu\n", - start, end, start_left, end_left); - - if (start == end) - pg_count = end_left - (st_pgs - start_left); - else - pg_count = start_left + st_pgs * (end - start - 1) + end_left; - - CDEBUG(D_READA, - "st_off %lu, st_len %lu st_pgs %lu off %lu length %lu pgcount %lu\n", - st_off, st_len, st_pgs, off, length, pg_count); - - return pg_count; -} - -static int ria_page_count(struct ra_io_arg *ria) -{ - __u64 length = ria->ria_end >= ria->ria_start ? - ria->ria_end - ria->ria_start + 1 : 0; - - return stride_pg_count(ria->ria_stoff, ria->ria_length, - ria->ria_pages, ria->ria_start, - length); -} - -static unsigned long ras_align(struct ll_readahead_state *ras, - unsigned long index, - unsigned long *remainder) -{ - unsigned long rem = index % ras->ras_rpc_size; - - if (remainder) - *remainder = rem; - return index - rem; -} - -/*Check whether the index is in the defined ra-window */ -static int ras_inside_ra_window(unsigned long idx, struct ra_io_arg *ria) -{ - /* If ria_length == ria_pages, it means non-stride I/O mode, - * idx should always inside read-ahead window in this case - * For stride I/O mode, just check whether the idx is inside - * the ria_pages. - */ - return ria->ria_length == 0 || ria->ria_length == ria->ria_pages || - (idx >= ria->ria_stoff && (idx - ria->ria_stoff) % - ria->ria_length < ria->ria_pages); -} - -static unsigned long -ll_read_ahead_pages(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, struct ll_readahead_state *ras, - struct ra_io_arg *ria) -{ - struct cl_read_ahead ra = { 0 }; - unsigned long ra_end = 0; - bool stride_ria; - pgoff_t page_idx; - int rc; - - LASSERT(ria); - RIA_DEBUG(ria); - - stride_ria = ria->ria_length > ria->ria_pages && ria->ria_pages > 0; - for (page_idx = ria->ria_start; - page_idx <= ria->ria_end && ria->ria_reserved > 0; page_idx++) { - if (ras_inside_ra_window(page_idx, ria)) { - if (!ra.cra_end || ra.cra_end < page_idx) { - unsigned long end; - - cl_read_ahead_release(env, &ra); - - rc = cl_io_read_ahead(env, io, page_idx, &ra); - if (rc < 0) - break; - - CDEBUG(D_READA, "idx: %lu, ra: %lu, rpc: %lu\n", - page_idx, ra.cra_end, ra.cra_rpc_size); - LASSERTF(ra.cra_end >= page_idx, - "object: %p, indcies %lu / %lu\n", - io->ci_obj, ra.cra_end, page_idx); - /* - * update read ahead RPC size. - * NB: it's racy but doesn't matter - */ - if (ras->ras_rpc_size > ra.cra_rpc_size && - ra.cra_rpc_size > 0) - ras->ras_rpc_size = ra.cra_rpc_size; - /* trim it to align with optimal RPC size */ - end = ras_align(ras, ria->ria_end + 1, NULL); - if (end > 0 && !ria->ria_eof) - ria->ria_end = end - 1; - if (ria->ria_end < ria->ria_end_min) - ria->ria_end = ria->ria_end_min; - if (ria->ria_end > ra.cra_end) - ria->ria_end = ra.cra_end; - } - - /* If the page is inside the read-ahead window */ - rc = ll_read_ahead_page(env, io, queue, page_idx); - if (rc < 0) - break; - - ra_end = page_idx; - if (!rc) - ria->ria_reserved--; - } else if (stride_ria) { - /* If it is not in the read-ahead window, and it is - * read-ahead mode, then check whether it should skip - * the stride gap - */ - pgoff_t offset; - /* FIXME: This assertion only is valid when it is for - * forward read-ahead, it will be fixed when backward - * read-ahead is implemented - */ - LASSERTF(page_idx >= ria->ria_stoff, - "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n", - page_idx, - ria->ria_start, ria->ria_end, ria->ria_stoff, - ria->ria_length, ria->ria_pages); - offset = page_idx - ria->ria_stoff; - offset = offset % (ria->ria_length); - if (offset > ria->ria_pages) { - page_idx += ria->ria_length - offset; - CDEBUG(D_READA, "i %lu skip %lu\n", page_idx, - ria->ria_length - offset); - continue; - } - } - } - cl_read_ahead_release(env, &ra); - - return ra_end; -} - -static int ll_readahead(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, - struct ll_readahead_state *ras, bool hit) -{ - struct vvp_io *vio = vvp_env_io(env); - struct ll_thread_info *lti = ll_env_info(env); - struct cl_attr *attr = vvp_env_thread_attr(env); - unsigned long len, mlen = 0; - pgoff_t ra_end, start = 0, end = 0; - struct inode *inode; - struct ra_io_arg *ria = <i->lti_ria; - struct cl_object *clob; - int ret = 0; - __u64 kms; - - clob = io->ci_obj; - inode = vvp_object_inode(clob); - - memset(ria, 0, sizeof(*ria)); - - cl_object_attr_lock(clob); - ret = cl_object_attr_get(env, clob, attr); - cl_object_attr_unlock(clob); - - if (ret != 0) - return ret; - kms = attr->cat_kms; - if (kms == 0) { - ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN); - return 0; - } - - spin_lock(&ras->ras_lock); - - /** - * Note: other thread might rollback the ras_next_readahead, - * if it can not get the full size of prepared pages, see the - * end of this function. For stride read ahead, it needs to - * make sure the offset is no less than ras_stride_offset, - * so that stride read ahead can work correctly. - */ - if (stride_io_mode(ras)) - start = max(ras->ras_next_readahead, ras->ras_stride_offset); - else - start = ras->ras_next_readahead; - - if (ras->ras_window_len > 0) - end = ras->ras_window_start + ras->ras_window_len - 1; - - /* Enlarge the RA window to encompass the full read */ - if (vio->vui_ra_valid && - end < vio->vui_ra_start + vio->vui_ra_count - 1) - end = vio->vui_ra_start + vio->vui_ra_count - 1; - - if (end) { - unsigned long end_index; - - /* Truncate RA window to end of file */ - end_index = (unsigned long)((kms - 1) >> PAGE_SHIFT); - if (end_index <= end) { - end = end_index; - ria->ria_eof = true; - } - - ras->ras_next_readahead = max(end, end + 1); - RAS_CDEBUG(ras); - } - ria->ria_start = start; - ria->ria_end = end; - /* If stride I/O mode is detected, get stride window*/ - if (stride_io_mode(ras)) { - ria->ria_stoff = ras->ras_stride_offset; - ria->ria_length = ras->ras_stride_length; - ria->ria_pages = ras->ras_stride_pages; - } - spin_unlock(&ras->ras_lock); - - if (end == 0) { - ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); - return 0; - } - len = ria_page_count(ria); - if (len == 0) { - ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW); - return 0; - } - - CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n", - PFID(lu_object_fid(&clob->co_lu)), - ria->ria_start, ria->ria_end, - vio->vui_ra_valid ? vio->vui_ra_start : 0, - vio->vui_ra_valid ? vio->vui_ra_count : 0, - hit); - - /* at least to extend the readahead window to cover current read */ - if (!hit && vio->vui_ra_valid && - vio->vui_ra_start + vio->vui_ra_count > ria->ria_start) { - unsigned long remainder; - - /* to the end of current read window. */ - mlen = vio->vui_ra_start + vio->vui_ra_count - ria->ria_start; - /* trim to RPC boundary */ - ras_align(ras, ria->ria_start, &remainder); - mlen = min(mlen, ras->ras_rpc_size - remainder); - ria->ria_end_min = ria->ria_start + mlen; - } - - ria->ria_reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen); - if (ria->ria_reserved < len) - ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT); - - CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n", - ria->ria_reserved, len, mlen, - atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages), - ll_i2sbi(inode)->ll_ra_info.ra_max_pages); - - ra_end = ll_read_ahead_pages(env, io, queue, ras, ria); - - if (ria->ria_reserved) - ll_ra_count_put(ll_i2sbi(inode), ria->ria_reserved); - - if (ra_end == end && ra_end == (kms >> PAGE_SHIFT)) - ll_ra_stats_inc(inode, RA_STAT_EOF); - - /* if we didn't get to the end of the region we reserved from - * the ras we need to go back and update the ras so that the - * next read-ahead tries from where we left off. we only do so - * if the region we failed to issue read-ahead on is still ahead - * of the app and behind the next index to start read-ahead from - */ - CDEBUG(D_READA, "ra_end = %lu end = %lu stride end = %lu pages = %d\n", - ra_end, end, ria->ria_end, ret); - - if (ra_end > 0 && ra_end != end) { - ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END); - spin_lock(&ras->ras_lock); - if (ra_end <= ras->ras_next_readahead && - index_in_window(ra_end, ras->ras_window_start, 0, - ras->ras_window_len)) { - ras->ras_next_readahead = ra_end + 1; - RAS_CDEBUG(ras); - } - spin_unlock(&ras->ras_lock); - } - - return ret; -} - -static void ras_set_start(struct inode *inode, struct ll_readahead_state *ras, - unsigned long index) -{ - ras->ras_window_start = ras_align(ras, index, NULL); -} - -/* called with the ras_lock held or from places where it doesn't matter */ -static void ras_reset(struct inode *inode, struct ll_readahead_state *ras, - unsigned long index) -{ - ras->ras_last_readpage = index; - ras->ras_consecutive_requests = 0; - ras->ras_consecutive_pages = 0; - ras->ras_window_len = 0; - ras_set_start(inode, ras, index); - ras->ras_next_readahead = max(ras->ras_window_start, index + 1); - - RAS_CDEBUG(ras); -} - -/* called with the ras_lock held or from places where it doesn't matter */ -static void ras_stride_reset(struct ll_readahead_state *ras) -{ - ras->ras_consecutive_stride_requests = 0; - ras->ras_stride_length = 0; - ras->ras_stride_pages = 0; - RAS_CDEBUG(ras); -} - -void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) -{ - spin_lock_init(&ras->ras_lock); - ras->ras_rpc_size = PTLRPC_MAX_BRW_PAGES; - ras_reset(inode, ras, 0); - ras->ras_requests = 0; -} - -/* - * Check whether the read request is in the stride window. - * If it is in the stride window, return 1, otherwise return 0. - */ -static int index_in_stride_window(struct ll_readahead_state *ras, - unsigned long index) -{ - unsigned long stride_gap; - - if (ras->ras_stride_length == 0 || ras->ras_stride_pages == 0 || - ras->ras_stride_pages == ras->ras_stride_length) - return 0; - - stride_gap = index - ras->ras_last_readpage - 1; - - /* If it is contiguous read */ - if (stride_gap == 0) - return ras->ras_consecutive_pages + 1 <= ras->ras_stride_pages; - - /* Otherwise check the stride by itself */ - return (ras->ras_stride_length - ras->ras_stride_pages) == stride_gap && - ras->ras_consecutive_pages == ras->ras_stride_pages; -} - -static void ras_update_stride_detector(struct ll_readahead_state *ras, - unsigned long index) -{ - unsigned long stride_gap = index - ras->ras_last_readpage - 1; - - if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) && - !stride_io_mode(ras)) { - ras->ras_stride_pages = ras->ras_consecutive_pages; - ras->ras_stride_length = ras->ras_consecutive_pages + - stride_gap; - } - LASSERT(ras->ras_request_index == 0); - LASSERT(ras->ras_consecutive_stride_requests == 0); - - if (index <= ras->ras_last_readpage) { - /*Reset stride window for forward read*/ - ras_stride_reset(ras); - return; - } - - ras->ras_stride_pages = ras->ras_consecutive_pages; - ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages; - - RAS_CDEBUG(ras); -} - -/* Stride Read-ahead window will be increased inc_len according to - * stride I/O pattern - */ -static void ras_stride_increase_window(struct ll_readahead_state *ras, - struct ll_ra_info *ra, - unsigned long inc_len) -{ - unsigned long left, step, window_len; - unsigned long stride_len; - - LASSERT(ras->ras_stride_length > 0); - LASSERTF(ras->ras_window_start + ras->ras_window_len >= - ras->ras_stride_offset, - "window_start %lu, window_len %lu stride_offset %lu\n", - ras->ras_window_start, - ras->ras_window_len, ras->ras_stride_offset); - - stride_len = ras->ras_window_start + ras->ras_window_len - - ras->ras_stride_offset; - - left = stride_len % ras->ras_stride_length; - window_len = ras->ras_window_len - left; - - if (left < ras->ras_stride_pages) - left += inc_len; - else - left = ras->ras_stride_pages + inc_len; - - LASSERT(ras->ras_stride_pages != 0); - - step = left / ras->ras_stride_pages; - left %= ras->ras_stride_pages; - - window_len += step * ras->ras_stride_length + left; - - if (stride_pg_count(ras->ras_stride_offset, ras->ras_stride_length, - ras->ras_stride_pages, ras->ras_stride_offset, - window_len) <= ra->ra_max_pages_per_file) - ras->ras_window_len = window_len; - - RAS_CDEBUG(ras); -} - -static void ras_increase_window(struct inode *inode, - struct ll_readahead_state *ras, - struct ll_ra_info *ra) -{ - /* The stretch of ra-window should be aligned with max rpc_size - * but current clio architecture does not support retrieve such - * information from lower layer. FIXME later - */ - if (stride_io_mode(ras)) { - ras_stride_increase_window(ras, ra, ras->ras_rpc_size); - } else { - unsigned long wlen; - - wlen = min(ras->ras_window_len + ras->ras_rpc_size, - ra->ra_max_pages_per_file); - ras->ras_window_len = ras_align(ras, wlen, NULL); - } -} - -static void ras_update(struct ll_sb_info *sbi, struct inode *inode, - struct ll_readahead_state *ras, unsigned long index, - enum ras_update_flags flags) -{ - struct ll_ra_info *ra = &sbi->ll_ra_info; - int zero = 0, stride_detect = 0, ra_miss = 0; - bool hit = flags & LL_RAS_HIT; - - spin_lock(&ras->ras_lock); - - if (!hit) - CDEBUG(D_READA, DFID " pages at %lu miss.\n", - PFID(ll_inode2fid(inode)), index); - - ll_ra_stats_inc_sbi(sbi, hit ? RA_STAT_HIT : RA_STAT_MISS); - - /* reset the read-ahead window in two cases. First when the app seeks - * or reads to some other part of the file. Secondly if we get a - * read-ahead miss that we think we've previously issued. This can - * be a symptom of there being so many read-ahead pages that the VM is - * reclaiming it before we get to it. - */ - if (!index_in_window(index, ras->ras_last_readpage, 8, 8)) { - zero = 1; - ll_ra_stats_inc_sbi(sbi, RA_STAT_DISTANT_READPAGE); - } else if (!hit && ras->ras_window_len && - index < ras->ras_next_readahead && - index_in_window(index, ras->ras_window_start, 0, - ras->ras_window_len)) { - ra_miss = 1; - ll_ra_stats_inc_sbi(sbi, RA_STAT_MISS_IN_WINDOW); - } - - /* On the second access to a file smaller than the tunable - * ra_max_read_ahead_whole_pages trigger RA on all pages in the - * file up to ra_max_pages_per_file. This is simply a best effort - * and only occurs once per open file. Normal RA behavior is reverted - * to for subsequent IO. The mmap case does not increment - * ras_requests and thus can never trigger this behavior. - */ - if (ras->ras_requests >= 2 && !ras->ras_request_index) { - __u64 kms_pages; - - kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >> - PAGE_SHIFT; - - CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages, - ra->ra_max_read_ahead_whole_pages, - ra->ra_max_pages_per_file); - - if (kms_pages && - kms_pages <= ra->ra_max_read_ahead_whole_pages) { - ras->ras_window_start = 0; - ras->ras_next_readahead = index + 1; - ras->ras_window_len = min(ra->ra_max_pages_per_file, - ra->ra_max_read_ahead_whole_pages); - goto out_unlock; - } - } - if (zero) { - /* check whether it is in stride I/O mode*/ - if (!index_in_stride_window(ras, index)) { - if (ras->ras_consecutive_stride_requests == 0 && - ras->ras_request_index == 0) { - ras_update_stride_detector(ras, index); - ras->ras_consecutive_stride_requests++; - } else { - ras_stride_reset(ras); - } - ras_reset(inode, ras, index); - ras->ras_consecutive_pages++; - goto out_unlock; - } else { - ras->ras_consecutive_pages = 0; - ras->ras_consecutive_requests = 0; - if (++ras->ras_consecutive_stride_requests > 1) - stride_detect = 1; - RAS_CDEBUG(ras); - } - } else { - if (ra_miss) { - if (index_in_stride_window(ras, index) && - stride_io_mode(ras)) { - if (index != ras->ras_last_readpage + 1) - ras->ras_consecutive_pages = 0; - ras_reset(inode, ras, index); - - /* If stride-RA hit cache miss, the stride - * detector will not be reset to avoid the - * overhead of redetecting read-ahead mode, - * but on the condition that the stride window - * is still intersect with normal sequential - * read-ahead window. - */ - if (ras->ras_window_start < - ras->ras_stride_offset) - ras_stride_reset(ras); - RAS_CDEBUG(ras); - } else { - /* Reset both stride window and normal RA - * window - */ - ras_reset(inode, ras, index); - ras->ras_consecutive_pages++; - ras_stride_reset(ras); - goto out_unlock; - } - } else if (stride_io_mode(ras)) { - /* If this is contiguous read but in stride I/O mode - * currently, check whether stride step still is valid, - * if invalid, it will reset the stride ra window - */ - if (!index_in_stride_window(ras, index)) { - /* Shrink stride read-ahead window to be zero */ - ras_stride_reset(ras); - ras->ras_window_len = 0; - ras->ras_next_readahead = index; - } - } - } - ras->ras_consecutive_pages++; - ras->ras_last_readpage = index; - ras_set_start(inode, ras, index); - - if (stride_io_mode(ras)) { - /* Since stride readahead is sensitive to the offset - * of read-ahead, so we use original offset here, - * instead of ras_window_start, which is RPC aligned - */ - ras->ras_next_readahead = max(index, ras->ras_next_readahead); - ras->ras_window_start = max(ras->ras_stride_offset, - ras->ras_window_start); - } else { - if (ras->ras_next_readahead < ras->ras_window_start) - ras->ras_next_readahead = ras->ras_window_start; - if (!hit) - ras->ras_next_readahead = index + 1; - } - RAS_CDEBUG(ras); - - /* Trigger RA in the mmap case where ras_consecutive_requests - * is not incremented and thus can't be used to trigger RA - */ - if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) { - ras_increase_window(inode, ras, ra); - /* - * reset consecutive pages so that the readahead window can - * grow gradually. - */ - ras->ras_consecutive_pages = 0; - goto out_unlock; - } - - /* Initially reset the stride window offset to next_readahead*/ - if (ras->ras_consecutive_stride_requests == 2 && stride_detect) { - /** - * Once stride IO mode is detected, next_readahead should be - * reset to make sure next_readahead > stride offset - */ - ras->ras_next_readahead = max(index, ras->ras_next_readahead); - ras->ras_stride_offset = index; - ras->ras_window_start = max(index, ras->ras_window_start); - } - - /* The initial ras_window_len is set to the request size. To avoid - * uselessly reading and discarding pages for random IO the window is - * only increased once per consecutive request received. - */ - if ((ras->ras_consecutive_requests > 1 || stride_detect) && - !ras->ras_request_index) - ras_increase_window(inode, ras, ra); -out_unlock: - RAS_CDEBUG(ras); - ras->ras_request_index++; - spin_unlock(&ras->ras_lock); -} - -int ll_writepage(struct page *vmpage, struct writeback_control *wbc) -{ - struct inode *inode = vmpage->mapping->host; - struct ll_inode_info *lli = ll_i2info(inode); - struct lu_env *env; - struct cl_io *io; - struct cl_page *page; - struct cl_object *clob; - bool redirtied = false; - bool unlocked = false; - int result; - u16 refcheck; - - LASSERT(PageLocked(vmpage)); - LASSERT(!PageWriteback(vmpage)); - - LASSERT(ll_i2dtexp(inode)); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - result = PTR_ERR(env); - goto out; - } - - clob = ll_i2info(inode)->lli_clob; - LASSERT(clob); - - io = vvp_env_thread_io(env); - io->ci_obj = clob; - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, clob); - if (result == 0) { - page = cl_page_find(env, clob, vmpage->index, - vmpage, CPT_CACHEABLE); - if (!IS_ERR(page)) { - lu_ref_add(&page->cp_reference, "writepage", - current); - cl_page_assume(env, io, page); - result = cl_page_flush(env, io, page); - if (result != 0) { - /* - * Re-dirty page on error so it retries write, - * but not in case when IO has actually - * occurred and completed with an error. - */ - if (!PageError(vmpage)) { - redirty_page_for_writepage(wbc, vmpage); - result = 0; - redirtied = true; - } - } - cl_page_disown(env, io, page); - unlocked = true; - lu_ref_del(&page->cp_reference, - "writepage", current); - cl_page_put(env, page); - } else { - result = PTR_ERR(page); - } - } - cl_io_fini(env, io); - - if (redirtied && wbc->sync_mode == WB_SYNC_ALL) { - loff_t offset = cl_offset(clob, vmpage->index); - - /* Flush page failed because the extent is being written out. - * Wait for the write of extent to be finished to avoid - * breaking kernel which assumes ->writepage should mark - * PageWriteback or clean the page. - */ - result = cl_sync_file_range(inode, offset, - offset + PAGE_SIZE - 1, - CL_FSYNC_LOCAL, 1); - if (result > 0) { - /* actually we may have written more than one page. - * decreasing this page because the caller will count - * it. - */ - wbc->nr_to_write -= result - 1; - result = 0; - } - } - - cl_env_put(env, &refcheck); - goto out; - -out: - if (result < 0) { - if (!lli->lli_async_rc) - lli->lli_async_rc = result; - SetPageError(vmpage); - if (!unlocked) - unlock_page(vmpage); - } - return result; -} - -int ll_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - struct inode *inode = mapping->host; - struct ll_sb_info *sbi = ll_i2sbi(inode); - loff_t start; - loff_t end; - enum cl_fsync_mode mode; - int range_whole = 0; - int result; - int ignore_layout = 0; - - if (wbc->range_cyclic) { - start = mapping->writeback_index << PAGE_SHIFT; - end = OBD_OBJECT_EOF; - } else { - start = wbc->range_start; - end = wbc->range_end; - if (end == LLONG_MAX) { - end = OBD_OBJECT_EOF; - range_whole = start == 0; - } - } - - mode = CL_FSYNC_NONE; - if (wbc->sync_mode == WB_SYNC_ALL) - mode = CL_FSYNC_LOCAL; - - if (sbi->ll_umounting) - /* if the mountpoint is being umounted, all pages have to be - * evicted to avoid hitting LBUG when truncate_inode_pages() - * is called later on. - */ - ignore_layout = 1; - - if (!ll_i2info(inode)->lli_clob) - return 0; - - result = cl_sync_file_range(inode, start, end, mode, ignore_layout); - if (result > 0) { - wbc->nr_to_write -= result; - result = 0; - } - - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) { - if (end == OBD_OBJECT_EOF) - mapping->writeback_index = 0; - else - mapping->writeback_index = (end >> PAGE_SHIFT) + 1; - } - return result; -} - -struct ll_cl_context *ll_cl_find(struct file *file) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_cl_context *lcc; - struct ll_cl_context *found = NULL; - - read_lock(&fd->fd_lock); - list_for_each_entry(lcc, &fd->fd_lccs, lcc_list) { - if (lcc->lcc_cookie == current) { - found = lcc; - break; - } - } - read_unlock(&fd->fd_lock); - - return found; -} - -void ll_cl_add(struct file *file, const struct lu_env *env, struct cl_io *io) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx; - - memset(lcc, 0, sizeof(*lcc)); - INIT_LIST_HEAD(&lcc->lcc_list); - lcc->lcc_cookie = current; - lcc->lcc_env = env; - lcc->lcc_io = io; - - write_lock(&fd->fd_lock); - list_add(&lcc->lcc_list, &fd->fd_lccs); - write_unlock(&fd->fd_lock); -} - -void ll_cl_remove(struct file *file, const struct lu_env *env) -{ - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct ll_cl_context *lcc = &ll_env_info(env)->lti_io_ctx; - - write_lock(&fd->fd_lock); - list_del_init(&lcc->lcc_list); - write_unlock(&fd->fd_lock); -} - -static int ll_io_read_page(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) -{ - struct inode *inode = vvp_object_inode(page->cp_obj); - struct ll_file_data *fd = vvp_env_io(env)->vui_fd; - struct ll_readahead_state *ras = &fd->fd_ras; - struct cl_2queue *queue = &io->ci_queue; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct vvp_page *vpg; - bool uptodate; - int rc = 0; - - vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page)); - uptodate = vpg->vpg_defer_uptodate; - - if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && - sbi->ll_ra_info.ra_max_pages > 0) { - struct vvp_io *vio = vvp_env_io(env); - enum ras_update_flags flags = 0; - - if (uptodate) - flags |= LL_RAS_HIT; - if (!vio->vui_ra_valid) - flags |= LL_RAS_MMAP; - ras_update(sbi, inode, ras, vvp_index(vpg), flags); - } - - cl_2queue_init(queue); - if (uptodate) { - vpg->vpg_ra_used = 1; - cl_page_export(env, page, 1); - cl_page_disown(env, io, page); - } else { - cl_page_list_add(&queue->c2_qin, page); - } - - if (sbi->ll_ra_info.ra_max_pages_per_file > 0 && - sbi->ll_ra_info.ra_max_pages > 0) { - int rc2; - - rc2 = ll_readahead(env, io, &queue->c2_qin, ras, - uptodate); - CDEBUG(D_READA, DFID "%d pages read ahead at %lu\n", - PFID(ll_inode2fid(inode)), rc2, vvp_index(vpg)); - } - - if (queue->c2_qin.pl_nr > 0) - rc = cl_io_submit_rw(env, io, CRT_READ, queue); - - /* - * Unlock unsent pages in case of error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - - return rc; -} - -int ll_readpage(struct file *file, struct page *vmpage) -{ - struct cl_object *clob = ll_i2info(file_inode(file))->lli_clob; - struct ll_cl_context *lcc; - const struct lu_env *env; - struct cl_io *io; - struct cl_page *page; - int result; - - lcc = ll_cl_find(file); - if (!lcc) { - unlock_page(vmpage); - return -EIO; - } - - env = lcc->lcc_env; - io = lcc->lcc_io; - LASSERT(io->ci_state == CIS_IO_GOING); - page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); - if (!IS_ERR(page)) { - LASSERT(page->cp_type == CPT_CACHEABLE); - if (likely(!PageUptodate(vmpage))) { - cl_page_assume(env, io, page); - result = ll_io_read_page(env, io, page); - } else { - /* Page from a non-object file. */ - unlock_page(vmpage); - result = 0; - } - cl_page_put(env, page); - } else { - unlock_page(vmpage); - result = PTR_ERR(page); - } - return result; -} - -int ll_page_sync_io(const struct lu_env *env, struct cl_io *io, - struct cl_page *page, enum cl_req_type crt) -{ - struct cl_2queue *queue; - int result; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - queue = &io->ci_queue; - cl_2queue_init_page(queue, page); - - result = cl_io_submit_sync(env, io, crt, queue, 0); - LASSERT(cl_page_is_owned(page, io)); - - if (crt == CRT_READ) - /* - * in CRT_WRITE case page is left locked even in case of - * error. - */ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_2queue_fini(env, queue); - - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c deleted file mode 100644 index 722e5ea1af5f..000000000000 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ /dev/null @@ -1,641 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lustre/llite/rw26.c - * - * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include "llite_internal.h" - -/** - * Implements Linux VM address_space::invalidatepage() method. This method is - * called when the page is truncate from a file, either as a result of - * explicit truncate, or when inode is removed from memory (as a result of - * final iput(), umount, or memory pressure induced icache shrinking). - * - * [0, offset] bytes of the page remain valid (this is for a case of not-page - * aligned truncate). Lustre leaves partially truncated page in the cache, - * relying on struct inode::i_size to limit further accesses. - */ -static void ll_invalidatepage(struct page *vmpage, unsigned int offset, - unsigned int length) -{ - struct inode *inode; - struct lu_env *env; - struct cl_page *page; - struct cl_object *obj; - - LASSERT(PageLocked(vmpage)); - LASSERT(!PageWriteback(vmpage)); - - /* - * It is safe to not check anything in invalidatepage/releasepage - * below because they are run with page locked and all our io is - * happening with locked page too - */ - if (offset == 0 && length == PAGE_SIZE) { - /* See the comment in ll_releasepage() */ - env = cl_env_percpu_get(); - LASSERT(!IS_ERR(env)); - inode = vmpage->mapping->host; - obj = ll_i2info(inode)->lli_clob; - if (obj) { - page = cl_vmpage_page(vmpage, obj); - if (page) { - cl_page_delete(env, page); - cl_page_put(env, page); - } - } else { - LASSERT(vmpage->private == 0); - } - cl_env_percpu_put(env); - } -} - -static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) -{ - struct lu_env *env; - struct cl_object *obj; - struct cl_page *page; - struct address_space *mapping; - int result = 0; - - LASSERT(PageLocked(vmpage)); - if (PageWriteback(vmpage) || PageDirty(vmpage)) - return 0; - - mapping = vmpage->mapping; - if (!mapping) - return 1; - - obj = ll_i2info(mapping->host)->lli_clob; - if (!obj) - return 1; - - /* 1 for caller, 1 for cl_page and 1 for page cache */ - if (page_count(vmpage) > 3) - return 0; - - page = cl_vmpage_page(vmpage, obj); - if (!page) - return 1; - - env = cl_env_percpu_get(); - LASSERT(!IS_ERR(env)); - - if (!cl_page_in_use(page)) { - result = 1; - cl_page_delete(env, page); - } - - /* To use percpu env array, the call path can not be rescheduled; - * otherwise percpu array will be messed if ll_releaspage() called - * again on the same CPU. - * - * If this page holds the last refc of cl_object, the following - * call path may cause reschedule: - * cl_page_put -> cl_page_free -> cl_object_put -> - * lu_object_put -> lu_object_free -> lov_delete_raid0. - * - * However, the kernel can't get rid of this inode until all pages have - * been cleaned up. Now that we hold page lock here, it's pretty safe - * that we won't get into object delete path. - */ - LASSERT(cl_object_refc(obj) > 1); - cl_page_put(env, page); - - cl_env_percpu_put(env); - return result; -} - -#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL) - -/* ll_free_user_pages - tear down page struct array - * @pages: array of page struct pointers underlying target buffer - */ -static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) -{ - int i; - - for (i = 0; i < npages; i++) { - if (do_dirty) - set_page_dirty_lock(pages[i]); - put_page(pages[i]); - } - kvfree(pages); -} - -ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, - int rw, struct inode *inode, - struct ll_dio_pages *pv) -{ - struct cl_page *clp; - struct cl_2queue *queue; - struct cl_object *obj = io->ci_obj; - int i; - ssize_t rc = 0; - loff_t file_offset = pv->ldp_start_offset; - size_t size = pv->ldp_size; - int page_count = pv->ldp_nr; - struct page **pages = pv->ldp_pages; - size_t page_size = cl_page_size(obj); - bool do_io; - int io_pages = 0; - - queue = &io->ci_queue; - cl_2queue_init(queue); - for (i = 0; i < page_count; i++) { - if (pv->ldp_offsets) - file_offset = pv->ldp_offsets[i]; - - LASSERT(!(file_offset & (page_size - 1))); - clp = cl_page_find(env, obj, cl_index(obj, file_offset), - pv->ldp_pages[i], CPT_TRANSIENT); - if (IS_ERR(clp)) { - rc = PTR_ERR(clp); - break; - } - - rc = cl_page_own(env, io, clp); - if (rc) { - LASSERT(clp->cp_state == CPS_FREEING); - cl_page_put(env, clp); - break; - } - - do_io = true; - - /* check the page type: if the page is a host page, then do - * write directly - */ - if (clp->cp_type == CPT_CACHEABLE) { - struct page *vmpage = cl_page_vmpage(clp); - struct page *src_page; - struct page *dst_page; - void *src; - void *dst; - - src_page = (rw == WRITE) ? pages[i] : vmpage; - dst_page = (rw == WRITE) ? vmpage : pages[i]; - - src = kmap_atomic(src_page); - dst = kmap_atomic(dst_page); - memcpy(dst, src, min(page_size, size)); - kunmap_atomic(dst); - kunmap_atomic(src); - - /* make sure page will be added to the transfer by - * cl_io_submit()->...->vvp_page_prep_write(). - */ - if (rw == WRITE) - set_page_dirty(vmpage); - - if (rw == READ) { - /* do not issue the page for read, since it - * may reread a ra page which has NOT uptodate - * bit set. - */ - cl_page_disown(env, io, clp); - do_io = false; - } - } - - if (likely(do_io)) { - /* - * Add a page to the incoming page list of 2-queue. - */ - cl_page_list_add(&queue->c2_qin, clp); - - /* - * Set page clip to tell transfer formation engine - * that page has to be sent even if it is beyond KMS. - */ - cl_page_clip(env, clp, 0, min(size, page_size)); - - ++io_pages; - } - - /* drop the reference count for cl_page_find */ - cl_page_put(env, clp); - size -= page_size; - file_offset += page_size; - } - - if (rc == 0 && io_pages) { - rc = cl_io_submit_sync(env, io, - rw == READ ? CRT_READ : CRT_WRITE, - queue, 0); - } - if (rc == 0) - rc = pv->ldp_size; - - cl_2queue_discard(env, io, queue); - cl_2queue_disown(env, io, queue); - cl_2queue_fini(env, queue); - return rc; -} -EXPORT_SYMBOL(ll_direct_rw_pages); - -static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, - int rw, struct inode *inode, - struct address_space *mapping, - size_t size, loff_t file_offset, - struct page **pages, int page_count) -{ - struct ll_dio_pages pvec = { - .ldp_pages = pages, - .ldp_nr = page_count, - .ldp_size = size, - .ldp_offsets = NULL, - .ldp_start_offset = file_offset - }; - - return ll_direct_rw_pages(env, io, rw, inode, &pvec); -} - -/* This is the maximum size of a single O_DIRECT request, based on the - * kmalloc limit. We need to fit all of the brw_page structs, each one - * representing PAGE_SIZE worth of user data, into a single buffer, and - * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is - * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. - */ -#define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ - PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) -{ - struct ll_cl_context *lcc; - const struct lu_env *env; - struct cl_io *io; - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - loff_t file_offset = iocb->ki_pos; - ssize_t count = iov_iter_count(iter); - ssize_t tot_bytes = 0, result = 0; - long size = MAX_DIO_SIZE; - - /* Check EOF by ourselves */ - if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode)) - return 0; - - /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) - return -EINVAL; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", - PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, - file_offset, file_offset, count >> PAGE_SHIFT, - MAX_DIO_SIZE >> PAGE_SHIFT); - - /* Check that all user buffers are aligned as well */ - if (iov_iter_alignment(iter) & ~PAGE_MASK) - return -EINVAL; - - lcc = ll_cl_find(file); - if (!lcc) - return -EIO; - - env = lcc->lcc_env; - LASSERT(!IS_ERR(env)); - io = lcc->lcc_io; - LASSERT(io); - - while (iov_iter_count(iter)) { - struct page **pages; - size_t offs; - - count = min_t(size_t, iov_iter_count(iter), size); - if (iov_iter_rw(iter) == READ) { - if (file_offset >= i_size_read(inode)) - break; - if (file_offset + count > i_size_read(inode)) - count = i_size_read(inode) - file_offset; - } - - result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); - if (likely(result > 0)) { - int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); - - result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter), - inode, file->f_mapping, - result, file_offset, pages, - n); - ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ); - } - if (unlikely(result <= 0)) { - /* If we can't allocate a large enough buffer - * for the request, shrink it to a smaller - * PAGE_SIZE multiple and try again. - * We should always be able to kmalloc for a - * page worth of page pointers = 4MB on i386. - */ - if (result == -ENOMEM && - size > (PAGE_SIZE / sizeof(*pages)) * - PAGE_SIZE) { - size = ((((size / 2) - 1) | - ~PAGE_MASK) + 1) & - PAGE_MASK; - CDEBUG(D_VFSTRACE, "DIO size now %lu\n", - size); - continue; - } - - goto out; - } - iov_iter_advance(iter, result); - tot_bytes += result; - file_offset += result; - } -out: - if (tot_bytes > 0) { - struct vvp_io *vio = vvp_env_io(env); - - /* no commit async for direct IO */ - vio->u.write.vui_written += tot_bytes; - } - - return tot_bytes ? tot_bytes : result; -} - -/** - * Prepare partially written-to page for a write. - */ -static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg) -{ - struct cl_attr *attr = vvp_env_thread_attr(env); - struct cl_object *obj = io->ci_obj; - struct vvp_page *vpg = cl_object_page_slice(obj, pg); - loff_t offset = cl_offset(obj, vvp_index(vpg)); - int result; - - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - if (result == 0) { - /* - * If are writing to a new page, no need to read old data. - * The extent locking will have updated the KMS, and for our - * purposes here we can treat it like i_size. - */ - if (attr->cat_kms <= offset) { - char *kaddr = kmap_atomic(vpg->vpg_page); - - memset(kaddr, 0, cl_page_size(obj)); - kunmap_atomic(kaddr); - } else if (vpg->vpg_defer_uptodate) { - vpg->vpg_ra_used = 1; - } else { - result = ll_page_sync_io(env, io, pg, CRT_READ); - } - } - return result; -} - -static int ll_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned int len, unsigned int flags, - struct page **pagep, void **fsdata) -{ - struct ll_cl_context *lcc; - const struct lu_env *env = NULL; - struct cl_io *io; - struct cl_page *page = NULL; - struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; - pgoff_t index = pos >> PAGE_SHIFT; - struct page *vmpage = NULL; - unsigned int from = pos & (PAGE_SIZE - 1); - unsigned int to = from + len; - int result = 0; - - CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); - - lcc = ll_cl_find(file); - if (!lcc) { - io = NULL; - result = -EIO; - goto out; - } - - env = lcc->lcc_env; - io = lcc->lcc_io; - - /* To avoid deadlock, try to lock page first. */ - vmpage = grab_cache_page_nowait(mapping, index); - if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) { - struct vvp_io *vio = vvp_env_io(env); - struct cl_page_list *plist = &vio->u.write.vui_queue; - - /* if the page is already in dirty cache, we have to commit - * the pages right now; otherwise, it may cause deadlock - * because it holds page lock of a dirty page and request for - * more grants. It's okay for the dirty page to be the first - * one in commit page list, though. - */ - if (vmpage && plist->pl_nr > 0) { - unlock_page(vmpage); - put_page(vmpage); - vmpage = NULL; - } - - /* commit pages and then wait for page lock */ - result = vvp_io_write_commit(env, io); - if (result < 0) - goto out; - - if (!vmpage) { - vmpage = grab_cache_page_write_begin(mapping, index, - flags); - if (!vmpage) { - result = -ENOMEM; - goto out; - } - } - } - - page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); - if (IS_ERR(page)) { - result = PTR_ERR(page); - goto out; - } - - lcc->lcc_page = page; - lu_ref_add(&page->cp_reference, "cl_io", io); - - cl_page_assume(env, io, page); - if (!PageUptodate(vmpage)) { - /* - * We're completely overwriting an existing page, - * so _don't_ set it up to date until commit_write - */ - if (from == 0 && to == PAGE_SIZE) { - CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); - POISON_PAGE(vmpage, 0x11); - } else { - /* TODO: can be optimized at OSC layer to check if it - * is a lockless IO. In that case, it's not necessary - * to read the data. - */ - result = ll_prepare_partial_page(env, io, page); - if (result == 0) - SetPageUptodate(vmpage); - } - } - if (result < 0) - cl_page_unassume(env, io, page); -out: - if (result < 0) { - if (vmpage) { - unlock_page(vmpage); - put_page(vmpage); - } - if (!IS_ERR_OR_NULL(page)) { - lu_ref_del(&page->cp_reference, "cl_io", io); - cl_page_put(env, page); - } - if (io) - io->ci_result = result; - } else { - *pagep = vmpage; - *fsdata = lcc; - } - return result; -} - -static int ll_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned int len, unsigned int copied, - struct page *vmpage, void *fsdata) -{ - struct ll_cl_context *lcc = fsdata; - const struct lu_env *env; - struct cl_io *io; - struct vvp_io *vio; - struct cl_page *page; - unsigned int from = pos & (PAGE_SIZE - 1); - bool unplug = false; - int result = 0; - - put_page(vmpage); - - env = lcc->lcc_env; - page = lcc->lcc_page; - io = lcc->lcc_io; - vio = vvp_env_io(env); - - LASSERT(cl_page_is_owned(page, io)); - if (copied > 0) { - struct cl_page_list *plist = &vio->u.write.vui_queue; - - lcc->lcc_page = NULL; /* page will be queued */ - - /* Add it into write queue */ - cl_page_list_add(plist, page); - if (plist->pl_nr == 1) /* first page */ - vio->u.write.vui_from = from; - else - LASSERT(from == 0); - vio->u.write.vui_to = from + copied; - - /* - * To address the deadlock in balance_dirty_pages() where - * this dirty page may be written back in the same thread. - */ - if (PageDirty(vmpage)) - unplug = true; - - /* We may have one full RPC, commit it soon */ - if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) - unplug = true; - - CL_PAGE_DEBUG(D_VFSTRACE, env, page, - "queued page: %d.\n", plist->pl_nr); - } else { - cl_page_disown(env, io, page); - - lcc->lcc_page = NULL; - lu_ref_del(&page->cp_reference, "cl_io", io); - cl_page_put(env, page); - - /* page list is not contiguous now, commit it now */ - unplug = true; - } - - if (unplug || - file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) - result = vvp_io_write_commit(env, io); - - if (result < 0) - io->ci_result = result; - return result >= 0 ? copied : result; -} - -#ifdef CONFIG_MIGRATION -static int ll_migratepage(struct address_space *mapping, - struct page *newpage, struct page *page, - enum migrate_mode mode - ) -{ - /* Always fail page migration until we have a proper implementation */ - return -EIO; -} -#endif - -const struct address_space_operations ll_aops = { - .readpage = ll_readpage, - .direct_IO = ll_direct_IO_26, - .writepage = ll_writepage, - .writepages = ll_writepages, - .set_page_dirty = __set_page_dirty_nobuffers, - .write_begin = ll_write_begin, - .write_end = ll_write_end, - .invalidatepage = ll_invalidatepage, - .releasepage = (void *)ll_releasepage, -#ifdef CONFIG_MIGRATION - .migratepage = ll_migratepage, -#endif -}; diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c deleted file mode 100644 index d864f5f36d85..000000000000 --- a/drivers/staging/lustre/lustre/llite/statahead.c +++ /dev/null @@ -1,1577 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include "llite_internal.h" - -#define SA_OMITTED_ENTRY_MAX 8ULL - -enum se_stat { - /** negative values are for error cases */ - SA_ENTRY_INIT = 0, /** init entry */ - SA_ENTRY_SUCC = 1, /** stat succeed */ - SA_ENTRY_INVA = 2, /** invalid entry */ -}; - -/* - * sa_entry is not refcounted: statahead thread allocates it and do async stat, - * and in async stat callback ll_statahead_interpret() will add it into - * sai_interim_entries, later statahead thread will call sa_handle_callback() to - * instantiate entry and move it into sai_entries, and then only scanner process - * can access and free it. - */ -struct sa_entry { - /* link into sai_interim_entries or sai_entries */ - struct list_head se_list; - /* link into sai hash table locally */ - struct list_head se_hash; - /* entry index in the sai */ - __u64 se_index; - /* low layer ldlm lock handle */ - __u64 se_handle; - /* entry status */ - enum se_stat se_state; - /* entry size, contains name */ - int se_size; - /* pointer to async getattr enqueue info */ - struct md_enqueue_info *se_minfo; - /* pointer to the async getattr request */ - struct ptlrpc_request *se_req; - /* pointer to the target inode */ - struct inode *se_inode; - /* entry name */ - struct qstr se_qstr; - /* entry fid */ - struct lu_fid se_fid; -}; - -static unsigned int sai_generation; -static DEFINE_SPINLOCK(sai_generation_lock); - -/* sa_entry is ready to use */ -static inline int sa_ready(struct sa_entry *entry) -{ - smp_rmb(); - return (entry->se_state != SA_ENTRY_INIT); -} - -/* hash value to put in sai_cache */ -static inline int sa_hash(int val) -{ - return val & LL_SA_CACHE_MASK; -} - -/* hash entry into sai_cache */ -static inline void -sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry) -{ - int i = sa_hash(entry->se_qstr.hash); - - spin_lock(&sai->sai_cache_lock[i]); - list_add_tail(&entry->se_hash, &sai->sai_cache[i]); - spin_unlock(&sai->sai_cache_lock[i]); -} - -/* - * Remove entry from SA table. - */ -static inline void -sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry) -{ - int i = sa_hash(entry->se_qstr.hash); - - spin_lock(&sai->sai_cache_lock[i]); - list_del_init(&entry->se_hash); - spin_unlock(&sai->sai_cache_lock[i]); -} - -static inline int agl_should_run(struct ll_statahead_info *sai, - struct inode *inode) -{ - return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid); -} - -/* statahead window is full */ -static inline int sa_sent_full(struct ll_statahead_info *sai) -{ - return atomic_read(&sai->sai_cache_count) >= sai->sai_max; -} - -/* got async stat replies */ -static inline int sa_has_callback(struct ll_statahead_info *sai) -{ - return !list_empty(&sai->sai_interim_entries); -} - -static inline int agl_list_empty(struct ll_statahead_info *sai) -{ - return list_empty(&sai->sai_agls); -} - -/** - * (1) hit ratio less than 80% - * or - * (2) consecutive miss more than 8 - * then means low hit. - */ -static inline int sa_low_hit(struct ll_statahead_info *sai) -{ - return ((sai->sai_hit > 7 && sai->sai_hit < 4 * sai->sai_miss) || - (sai->sai_consecutive_miss > 8)); -} - -/* - * if the given index is behind of statahead window more than - * SA_OMITTED_ENTRY_MAX, then it is old. - */ -static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index) -{ - return ((__u64)sai->sai_max + index + SA_OMITTED_ENTRY_MAX < - sai->sai_index); -} - -/* allocate sa_entry and hash it to allow scanner process to find it */ -static struct sa_entry * -sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index, - const char *name, int len, const struct lu_fid *fid) -{ - struct ll_inode_info *lli; - struct sa_entry *entry; - int entry_size; - char *dname; - - entry_size = sizeof(struct sa_entry) + (len & ~3) + 4; - entry = kzalloc(entry_size, GFP_NOFS); - if (unlikely(!entry)) - return ERR_PTR(-ENOMEM); - - CDEBUG(D_READA, "alloc sa entry %.*s(%p) index %llu\n", - len, name, entry, index); - - entry->se_index = index; - entry->se_state = SA_ENTRY_INIT; - entry->se_size = entry_size; - dname = (char *)entry + sizeof(struct sa_entry); - memcpy(dname, name, len); - dname[len] = 0; - - entry->se_qstr.hash = full_name_hash(parent, name, len); - entry->se_qstr.len = len; - entry->se_qstr.name = dname; - entry->se_fid = *fid; - - lli = ll_i2info(sai->sai_dentry->d_inode); - spin_lock(&lli->lli_sa_lock); - INIT_LIST_HEAD(&entry->se_list); - sa_rehash(sai, entry); - spin_unlock(&lli->lli_sa_lock); - - atomic_inc(&sai->sai_cache_count); - - return entry; -} - -/* free sa_entry, which should have been unhashed and not in any list */ -static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry) -{ - CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n", - entry->se_qstr.len, entry->se_qstr.name, entry, - entry->se_index); - - LASSERT(list_empty(&entry->se_list)); - LASSERT(list_empty(&entry->se_hash)); - - kfree(entry); - atomic_dec(&sai->sai_cache_count); -} - -/* - * find sa_entry by name, used by directory scanner, lock is not needed because - * only scanner can remove the entry from cache. - */ -static struct sa_entry * -sa_get(struct ll_statahead_info *sai, const struct qstr *qstr) -{ - struct sa_entry *entry; - int i = sa_hash(qstr->hash); - - list_for_each_entry(entry, &sai->sai_cache[i], se_hash) { - if (entry->se_qstr.hash == qstr->hash && - entry->se_qstr.len == qstr->len && - memcmp(entry->se_qstr.name, qstr->name, qstr->len) == 0) - return entry; - } - return NULL; -} - -/* unhash and unlink sa_entry, and then free it */ -static inline void -sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry) -{ - struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode); - - LASSERT(!list_empty(&entry->se_hash)); - LASSERT(!list_empty(&entry->se_list)); - LASSERT(sa_ready(entry)); - - sa_unhash(sai, entry); - - spin_lock(&lli->lli_sa_lock); - list_del_init(&entry->se_list); - spin_unlock(&lli->lli_sa_lock); - - if (entry->se_inode) - iput(entry->se_inode); - - sa_free(sai, entry); -} - -/* called by scanner after use, sa_entry will be killed */ -static void -sa_put(struct ll_statahead_info *sai, struct sa_entry *entry, struct ll_inode_info *lli) -{ - struct sa_entry *tmp, *next; - - if (entry && entry->se_state == SA_ENTRY_SUCC) { - struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode); - - sai->sai_hit++; - sai->sai_consecutive_miss = 0; - sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max); - } else { - sai->sai_miss++; - sai->sai_consecutive_miss++; - } - - if (entry) - sa_kill(sai, entry); - - /* - * kill old completed entries, only scanner process does this, no need - * to lock - */ - list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) { - if (!is_omitted_entry(sai, tmp->se_index)) - break; - sa_kill(sai, tmp); - } - - spin_lock(&lli->lli_sa_lock); - if (sai->sai_task) - wake_up_process(sai->sai_task); - spin_unlock(&lli->lli_sa_lock); - -} - -/* - * update state and sort add entry to sai_entries by index, return true if - * scanner is waiting on this entry. - */ -static bool -__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret) -{ - struct list_head *pos = &sai->sai_entries; - __u64 index = entry->se_index; - struct sa_entry *se; - - LASSERT(!sa_ready(entry)); - LASSERT(list_empty(&entry->se_list)); - - list_for_each_entry_reverse(se, &sai->sai_entries, se_list) { - if (se->se_index < entry->se_index) { - pos = &se->se_list; - break; - } - } - list_add(&entry->se_list, pos); - entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC; - - return (index == sai->sai_index_wait); -} - -/* - * release resources used in async stat RPC, update entry state and wakeup if - * scanner process it waiting on this entry. - */ -static void -sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret) -{ - struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode); - struct md_enqueue_info *minfo = entry->se_minfo; - struct ptlrpc_request *req = entry->se_req; - bool wakeup; - - /* release resources used in RPC */ - if (minfo) { - entry->se_minfo = NULL; - ll_intent_release(&minfo->mi_it); - iput(minfo->mi_dir); - kfree(minfo); - } - - if (req) { - entry->se_req = NULL; - ptlrpc_req_finished(req); - } - - spin_lock(&lli->lli_sa_lock); - wakeup = __sa_make_ready(sai, entry, ret); - spin_unlock(&lli->lli_sa_lock); - - if (wakeup) - wake_up(&sai->sai_waitq); -} - -/* Insert inode into the list of sai_agls. */ -static void ll_agl_add(struct ll_statahead_info *sai, - struct inode *inode, int index) -{ - struct ll_inode_info *child = ll_i2info(inode); - struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode); - int added = 0; - - spin_lock(&child->lli_agl_lock); - if (child->lli_agl_index == 0) { - child->lli_agl_index = index; - spin_unlock(&child->lli_agl_lock); - - LASSERT(list_empty(&child->lli_agl_list)); - - igrab(inode); - spin_lock(&parent->lli_agl_lock); - if (list_empty(&sai->sai_agls)) - added = 1; - list_add_tail(&child->lli_agl_list, &sai->sai_agls); - spin_unlock(&parent->lli_agl_lock); - } else { - spin_unlock(&child->lli_agl_lock); - } - - if (added > 0) - wake_up_process(sai->sai_agl_task); -} - -/* allocate sai */ -static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry) -{ - struct ll_inode_info *lli = ll_i2info(dentry->d_inode); - struct ll_statahead_info *sai; - int i; - - sai = kzalloc(sizeof(*sai), GFP_NOFS); - if (!sai) - return NULL; - - sai->sai_dentry = dget(dentry); - atomic_set(&sai->sai_refcount, 1); - - sai->sai_max = LL_SA_RPC_MIN; - sai->sai_index = 1; - init_waitqueue_head(&sai->sai_waitq); - - INIT_LIST_HEAD(&sai->sai_interim_entries); - INIT_LIST_HEAD(&sai->sai_entries); - INIT_LIST_HEAD(&sai->sai_agls); - - for (i = 0; i < LL_SA_CACHE_SIZE; i++) { - INIT_LIST_HEAD(&sai->sai_cache[i]); - spin_lock_init(&sai->sai_cache_lock[i]); - } - atomic_set(&sai->sai_cache_count, 0); - - spin_lock(&sai_generation_lock); - lli->lli_sa_generation = ++sai_generation; - if (unlikely(!sai_generation)) - lli->lli_sa_generation = ++sai_generation; - spin_unlock(&sai_generation_lock); - - return sai; -} - -/* free sai */ -static inline void ll_sai_free(struct ll_statahead_info *sai) -{ - LASSERT(sai->sai_dentry); - dput(sai->sai_dentry); - kfree(sai); -} - -/* - * take refcount of sai if sai for @dir exists, which means statahead is on for - * this directory. - */ -static inline struct ll_statahead_info *ll_sai_get(struct inode *dir) -{ - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai = NULL; - - spin_lock(&lli->lli_sa_lock); - sai = lli->lli_sai; - if (sai) - atomic_inc(&sai->sai_refcount); - spin_unlock(&lli->lli_sa_lock); - - return sai; -} - -/* - * put sai refcount after use, if refcount reaches zero, free sai and sa_entries - * attached to it. - */ -static void ll_sai_put(struct ll_statahead_info *sai) -{ - struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode); - - if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) { - struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode); - struct sa_entry *entry, *next; - - lli->lli_sai = NULL; - spin_unlock(&lli->lli_sa_lock); - - LASSERT(sai->sai_task == NULL); - LASSERT(sai->sai_agl_task == NULL); - LASSERT(sai->sai_sent == sai->sai_replied); - LASSERT(!sa_has_callback(sai)); - - list_for_each_entry_safe(entry, next, &sai->sai_entries, - se_list) - sa_kill(sai, entry); - - LASSERT(atomic_read(&sai->sai_cache_count) == 0); - LASSERT(list_empty(&sai->sai_agls)); - - ll_sai_free(sai); - atomic_dec(&sbi->ll_sa_running); - } -} - -/* Do NOT forget to drop inode refcount when into sai_agls. */ -static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai) -{ - struct ll_inode_info *lli = ll_i2info(inode); - __u64 index = lli->lli_agl_index; - int rc; - - LASSERT(list_empty(&lli->lli_agl_list)); - - /* AGL maybe fall behind statahead with one entry */ - if (is_omitted_entry(sai, index + 1)) { - lli->lli_agl_index = 0; - iput(inode); - return; - } - - /* Someone is in glimpse (sync or async), do nothing. */ - rc = down_write_trylock(&lli->lli_glimpse_sem); - if (rc == 0) { - lli->lli_agl_index = 0; - iput(inode); - return; - } - - /* - * Someone triggered glimpse within 1 sec before. - * 1) The former glimpse succeeded with glimpse lock granted by OST, and - * if the lock is still cached on client, AGL needs to do nothing. If - * it is cancelled by other client, AGL maybe cannot obtain new lock - * for no glimpse callback triggered by AGL. - * 2) The former glimpse succeeded, but OST did not grant glimpse lock. - * Under such case, it is quite possible that the OST will not grant - * glimpse lock for AGL also. - * 3) The former glimpse failed, compared with other two cases, it is - * relative rare. AGL can ignore such case, and it will not muchly - * affect the performance. - */ - if (lli->lli_glimpse_time != 0 && - time_before(jiffies - 1 * HZ, lli->lli_glimpse_time)) { - up_write(&lli->lli_glimpse_sem); - lli->lli_agl_index = 0; - iput(inode); - return; - } - - CDEBUG(D_READA, "Handling (init) async glimpse: inode = " - DFID ", idx = %llu\n", PFID(&lli->lli_fid), index); - - cl_agl(inode); - lli->lli_agl_index = 0; - lli->lli_glimpse_time = jiffies; - up_write(&lli->lli_glimpse_sem); - - CDEBUG(D_READA, "Handled (init) async glimpse: inode= " - DFID ", idx = %llu, rc = %d\n", - PFID(&lli->lli_fid), index, rc); - - iput(inode); -} - -/* - * prepare inode for sa entry, add it into agl list, now sa_entry is ready - * to be used by scanner process. - */ -static void sa_instantiate(struct ll_statahead_info *sai, - struct sa_entry *entry) -{ - struct inode *dir = sai->sai_dentry->d_inode; - struct inode *child; - struct md_enqueue_info *minfo; - struct lookup_intent *it; - struct ptlrpc_request *req; - struct mdt_body *body; - int rc = 0; - - LASSERT(entry->se_handle != 0); - - minfo = entry->se_minfo; - it = &minfo->mi_it; - req = entry->se_req; - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) { - rc = -EFAULT; - goto out; - } - - child = entry->se_inode; - if (child) { - /* revalidate; unlinked and re-created with the same name */ - if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) { - entry->se_inode = NULL; - iput(child); - child = NULL; - } - } - - it->it_lock_handle = entry->se_handle; - rc = md_revalidate_lock(ll_i2mdexp(dir), it, ll_inode2fid(dir), NULL); - if (rc != 1) { - rc = -EAGAIN; - goto out; - } - - rc = ll_prep_inode(&child, req, dir->i_sb, it); - if (rc) - goto out; - - CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n", - ll_get_fsname(child->i_sb, NULL, 0), - entry->se_qstr.len, entry->se_qstr.name, - PFID(ll_inode2fid(child)), child); - ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL); - - entry->se_inode = child; - - if (agl_should_run(sai, child)) - ll_agl_add(sai, child, entry->se_index); - -out: - /* - * sa_make_ready() will drop ldlm ibits lock refcount by calling - * ll_intent_drop_lock() in spite of failures. Do not worry about - * calling ll_intent_drop_lock() more than once. - */ - sa_make_ready(sai, entry, rc); -} - -/* once there are async stat replies, instantiate sa_entry from replies */ -static void sa_handle_callback(struct ll_statahead_info *sai) -{ - struct ll_inode_info *lli; - - lli = ll_i2info(sai->sai_dentry->d_inode); - - while (sa_has_callback(sai)) { - struct sa_entry *entry; - - spin_lock(&lli->lli_sa_lock); - if (unlikely(!sa_has_callback(sai))) { - spin_unlock(&lli->lli_sa_lock); - break; - } - entry = list_entry(sai->sai_interim_entries.next, - struct sa_entry, se_list); - list_del_init(&entry->se_list); - spin_unlock(&lli->lli_sa_lock); - - sa_instantiate(sai, entry); - } -} - -/* - * callback for async stat, because this is called in ptlrpcd context, we only - * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really - * prepare inode and instantiate sa_entry later. - */ -static int ll_statahead_interpret(struct ptlrpc_request *req, - struct md_enqueue_info *minfo, int rc) -{ - struct lookup_intent *it = &minfo->mi_it; - struct inode *dir = minfo->mi_dir; - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai = lli->lli_sai; - struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata; - __u64 handle = 0; - - if (it_disposition(it, DISP_LOOKUP_NEG)) - rc = -ENOENT; - - /* - * because statahead thread will wait for all inflight RPC to finish, - * sai should be always valid, no need to refcount - */ - LASSERT(sai); - LASSERT(entry); - - CDEBUG(D_READA, "sa_entry %.*s rc %d\n", - entry->se_qstr.len, entry->se_qstr.name, rc); - - if (rc) { - ll_intent_release(it); - iput(dir); - kfree(minfo); - } else { - /* - * release ibits lock ASAP to avoid deadlock when statahead - * thread enqueues lock on parent in readdir and another - * process enqueues lock on child with parent lock held, eg. - * unlink. - */ - handle = it->it_lock_handle; - ll_intent_drop_lock(it); - } - - spin_lock(&lli->lli_sa_lock); - if (rc) { - if (__sa_make_ready(sai, entry, rc)) - wake_up(&sai->sai_waitq); - } else { - int first = 0; - entry->se_minfo = minfo; - entry->se_req = ptlrpc_request_addref(req); - /* - * Release the async ibits lock ASAP to avoid deadlock - * when statahead thread tries to enqueue lock on parent - * for readpage and other tries to enqueue lock on child - * with parent's lock held, for example: unlink. - */ - entry->se_handle = handle; - if (!sa_has_callback(sai)) - first = 1; - - list_add_tail(&entry->se_list, &sai->sai_interim_entries); - - if (first && sai->sai_task) - wake_up_process(sai->sai_task); - } - sai->sai_replied++; - - spin_unlock(&lli->lli_sa_lock); - - return rc; -} - -/* finish async stat RPC arguments */ -static void sa_fini_data(struct md_enqueue_info *minfo) -{ - iput(minfo->mi_dir); - kfree(minfo); -} - -/** - * prepare arguments for async stat RPC. - */ -static struct md_enqueue_info * -sa_prep_data(struct inode *dir, struct inode *child, struct sa_entry *entry) -{ - struct md_enqueue_info *minfo; - struct ldlm_enqueue_info *einfo; - struct md_op_data *op_data; - - minfo = kzalloc(sizeof(*minfo), GFP_NOFS); - if (!minfo) - return ERR_PTR(-ENOMEM); - - op_data = ll_prep_md_op_data(&minfo->mi_data, dir, child, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) { - kfree(minfo); - return (struct md_enqueue_info *)op_data; - } - - if (!child) - op_data->op_fid2 = entry->se_fid; - - minfo->mi_it.it_op = IT_GETATTR; - minfo->mi_dir = igrab(dir); - minfo->mi_cb = ll_statahead_interpret; - minfo->mi_cbdata = entry; - - einfo = &minfo->mi_einfo; - einfo->ei_type = LDLM_IBITS; - einfo->ei_mode = it_to_lock_mode(&minfo->mi_it); - einfo->ei_cb_bl = ll_md_blocking_ast; - einfo->ei_cb_cp = ldlm_completion_ast; - einfo->ei_cb_gl = NULL; - einfo->ei_cbdata = NULL; - - return minfo; -} - -/* async stat for file not found in dcache */ -static int sa_lookup(struct inode *dir, struct sa_entry *entry) -{ - struct md_enqueue_info *minfo; - int rc; - - minfo = sa_prep_data(dir, NULL, entry); - if (IS_ERR(minfo)) - return PTR_ERR(minfo); - - rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo); - if (rc) - sa_fini_data(minfo); - - return rc; -} - -/** - * async stat for file found in dcache, similar to .revalidate - * - * \retval 1 dentry valid, no RPC sent - * \retval 0 dentry invalid, will send async stat RPC - * \retval negative number upon error - */ -static int sa_revalidate(struct inode *dir, struct sa_entry *entry, - struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - struct lookup_intent it = { .it_op = IT_GETATTR, - .it_lock_handle = 0 }; - struct md_enqueue_info *minfo; - int rc; - - if (unlikely(!inode)) - return 1; - - if (d_mountpoint(dentry)) - return 1; - - entry->se_inode = igrab(inode); - rc = md_revalidate_lock(ll_i2mdexp(dir), &it, ll_inode2fid(inode), - NULL); - if (rc == 1) { - entry->se_handle = it.it_lock_handle; - ll_intent_release(&it); - return 1; - } - - minfo = sa_prep_data(dir, inode, entry); - if (IS_ERR(minfo)) { - entry->se_inode = NULL; - iput(inode); - return PTR_ERR(minfo); - } - - rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo); - if (rc) { - entry->se_inode = NULL; - iput(inode); - sa_fini_data(minfo); - } - - return rc; -} - -/* async stat for file with @name */ -static void sa_statahead(struct dentry *parent, const char *name, int len, - const struct lu_fid *fid) -{ - struct inode *dir = d_inode(parent); - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai = lli->lli_sai; - struct dentry *dentry = NULL; - struct sa_entry *entry; - int rc; - - entry = sa_alloc(parent, sai, sai->sai_index, name, len, fid); - if (IS_ERR(entry)) - return; - - dentry = d_lookup(parent, &entry->se_qstr); - if (!dentry) { - rc = sa_lookup(dir, entry); - } else { - rc = sa_revalidate(dir, entry, dentry); - if (rc == 1 && agl_should_run(sai, d_inode(dentry))) - ll_agl_add(sai, d_inode(dentry), entry->se_index); - } - - if (dentry) - dput(dentry); - - if (rc) - sa_make_ready(sai, entry, rc); - else - sai->sai_sent++; - - sai->sai_index++; -} - -/* async glimpse (agl) thread main function */ -static int ll_agl_thread(void *arg) -{ - struct dentry *parent = arg; - struct inode *dir = d_inode(parent); - struct ll_inode_info *plli = ll_i2info(dir); - struct ll_inode_info *clli; - /* We already own this reference, so it is safe to take it without a lock. */ - struct ll_statahead_info *sai = plli->lli_sai; - - CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n", - sai, parent); - - while (!kthread_should_stop()) { - - spin_lock(&plli->lli_agl_lock); - /* The statahead thread maybe help to process AGL entries, - * so check whether list empty again. - */ - if (!list_empty(&sai->sai_agls)) { - clli = list_entry(sai->sai_agls.next, - struct ll_inode_info, lli_agl_list); - list_del_init(&clli->lli_agl_list); - spin_unlock(&plli->lli_agl_lock); - ll_agl_trigger(&clli->lli_vfs_inode, sai); - } else { - spin_unlock(&plli->lli_agl_lock); - } - - set_current_state(TASK_IDLE); - if (list_empty(&sai->sai_agls) && - !kthread_should_stop()) - schedule(); - __set_current_state(TASK_RUNNING); - } - - spin_lock(&plli->lli_agl_lock); - sai->sai_agl_valid = 0; - while (!list_empty(&sai->sai_agls)) { - clli = list_entry(sai->sai_agls.next, - struct ll_inode_info, lli_agl_list); - list_del_init(&clli->lli_agl_list); - spin_unlock(&plli->lli_agl_lock); - clli->lli_agl_index = 0; - iput(&clli->lli_vfs_inode); - spin_lock(&plli->lli_agl_lock); - } - spin_unlock(&plli->lli_agl_lock); - CDEBUG(D_READA, "agl thread stopped: sai %p, parent %pd\n", - sai, parent); - ll_sai_put(sai); - return 0; -} - -/* start agl thread */ -static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai) -{ - struct ll_inode_info *plli; - struct task_struct *task; - - CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n", - sai, parent); - - plli = ll_i2info(d_inode(parent)); - task = kthread_create(ll_agl_thread, parent, "ll_agl_%u", - plli->lli_opendir_pid); - if (IS_ERR(task)) { - CERROR("can't start ll_agl thread, rc: %ld\n", PTR_ERR(task)); - return; - } - - sai->sai_agl_task = task; - atomic_inc(&ll_i2sbi(d_inode(parent))->ll_agl_total); - spin_lock(&plli->lli_agl_lock); - sai->sai_agl_valid = 1; - spin_unlock(&plli->lli_agl_lock); - /* Get an extra reference that the thread holds */ - ll_sai_get(d_inode(parent)); - - wake_up_process(task); -} - -/* statahead thread main function */ -static int ll_statahead_thread(void *arg) -{ - struct dentry *parent = arg; - struct inode *dir = d_inode(parent); - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_statahead_info *sai = lli->lli_sai; - struct page *page = NULL; - __u64 pos = 0; - int first = 0; - int rc = 0; - struct md_op_data *op_data; - - CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n", - sai, parent); - - op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0, - LUSTRE_OPC_ANY, dir); - if (IS_ERR(op_data)) { - rc = PTR_ERR(op_data); - goto out; - } - - op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages; - - while (pos != MDS_DIR_END_OFF && sai->sai_task) { - struct lu_dirpage *dp; - struct lu_dirent *ent; - - sai->sai_in_readpage = 1; - page = ll_get_dir_page(dir, op_data, pos); - sai->sai_in_readpage = 0; - if (IS_ERR(page)) { - rc = PTR_ERR(page); - CDEBUG(D_READA, "error reading dir " DFID " at %llu/%llu: opendir_pid = %u: rc = %d\n", - PFID(ll_inode2fid(dir)), pos, sai->sai_index, - lli->lli_opendir_pid, rc); - break; - } - - dp = page_address(page); - for (ent = lu_dirent_start(dp); - ent && sai->sai_task && !sa_low_hit(sai); - ent = lu_dirent_next(ent)) { - struct lu_fid fid; - __u64 hash; - int namelen; - char *name; - - hash = le64_to_cpu(ent->lde_hash); - if (unlikely(hash < pos)) - /* - * Skip until we find target hash value. - */ - continue; - - namelen = le16_to_cpu(ent->lde_namelen); - if (unlikely(namelen == 0)) - /* - * Skip dummy record. - */ - continue; - - name = ent->lde_name; - if (name[0] == '.') { - if (namelen == 1) { - /* - * skip "." - */ - continue; - } else if (name[1] == '.' && namelen == 2) { - /* - * skip ".." - */ - continue; - } else if (!sai->sai_ls_all) { - /* - * skip hidden files. - */ - sai->sai_skip_hidden++; - continue; - } - } - - /* - * don't stat-ahead first entry. - */ - if (unlikely(++first == 1)) - continue; - - fid_le_to_cpu(&fid, &ent->lde_fid); - - do { - sa_handle_callback(sai); - - spin_lock(&lli->lli_agl_lock); - while (sa_sent_full(sai) && - !agl_list_empty(sai)) { - struct ll_inode_info *clli; - - clli = list_entry(sai->sai_agls.next, - struct ll_inode_info, - lli_agl_list); - list_del_init(&clli->lli_agl_list); - spin_unlock(&lli->lli_agl_lock); - - ll_agl_trigger(&clli->lli_vfs_inode, - sai); - - spin_lock(&lli->lli_agl_lock); - } - spin_unlock(&lli->lli_agl_lock); - - set_current_state(TASK_IDLE); - if (sa_sent_full(sai) && - !sa_has_callback(sai) && - agl_list_empty(sai) && - sai->sai_task) - /* wait for spare statahead window */ - schedule(); - __set_current_state(TASK_RUNNING); - } while (sa_sent_full(sai) && sai->sai_task); - - sa_statahead(parent, name, namelen, &fid); - } - - pos = le64_to_cpu(dp->ldp_hash_end); - ll_release_page(dir, page, - le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); - - if (sa_low_hit(sai)) { - rc = -EFAULT; - atomic_inc(&sbi->ll_sa_wrong); - CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n", - PFID(&lli->lli_fid), sai->sai_hit, - sai->sai_miss, sai->sai_sent, - sai->sai_replied, current->pid); - break; - } - } - ll_finish_md_op_data(op_data); - - if (rc < 0) { - spin_lock(&lli->lli_sa_lock); - sai->sai_task = NULL; - lli->lli_sa_enabled = 0; - spin_unlock(&lli->lli_sa_lock); - } - - /* - * statahead is finished, but statahead entries need to be cached, wait - * for file release to stop me. - */ - while (sai->sai_task) { - sa_handle_callback(sai); - - set_current_state(TASK_IDLE); - if (!sa_has_callback(sai) && - sai->sai_task) - schedule(); - __set_current_state(TASK_RUNNING); - } -out: - if (sai->sai_agl_task) { - kthread_stop(sai->sai_agl_task); - - CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n", - sai, (unsigned int)sai->sai_agl_task->pid); - sai->sai_agl_task = NULL; - } - /* - * wait for inflight statahead RPCs to finish, and then we can free sai - * safely because statahead RPC will access sai data - */ - while (sai->sai_sent != sai->sai_replied) { - /* in case we're not woken up, timeout wait */ - schedule_timeout_idle(HZ>>3); - } - - /* release resources held by statahead RPCs */ - sa_handle_callback(sai); - - CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n", - sai, parent); - - spin_lock(&lli->lli_sa_lock); - sai->sai_task = NULL; - spin_unlock(&lli->lli_sa_lock); - - wake_up(&sai->sai_waitq); - ll_sai_put(sai); - - do_exit(rc); -} - -/* authorize opened dir handle @key to statahead */ -void ll_authorize_statahead(struct inode *dir, void *key) -{ - struct ll_inode_info *lli = ll_i2info(dir); - - spin_lock(&lli->lli_sa_lock); - if (!lli->lli_opendir_key && !lli->lli_sai) { - /* - * if lli_sai is not NULL, it means previous statahead is not - * finished yet, we'd better not start a new statahead for now. - */ - LASSERT(!lli->lli_opendir_pid); - lli->lli_opendir_key = key; - lli->lli_opendir_pid = current->pid; - lli->lli_sa_enabled = 1; - } - spin_unlock(&lli->lli_sa_lock); -} - -/* - * deauthorize opened dir handle @key to statahead, but statahead thread may - * still be running, notify it to quit. - */ -void ll_deauthorize_statahead(struct inode *dir, void *key) -{ - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai; - - LASSERT(lli->lli_opendir_key == key); - LASSERT(lli->lli_opendir_pid); - - CDEBUG(D_READA, "deauthorize statahead for " DFID "\n", - PFID(&lli->lli_fid)); - - spin_lock(&lli->lli_sa_lock); - lli->lli_opendir_key = NULL; - lli->lli_opendir_pid = 0; - lli->lli_sa_enabled = 0; - sai = lli->lli_sai; - if (sai && sai->sai_task) { - /* - * statahead thread may not quit yet because it needs to cache - * entries, now it's time to tell it to quit. - */ - wake_up_process(sai->sai_task); - sai->sai_task = NULL; - } - spin_unlock(&lli->lli_sa_lock); -} - -enum { - /** - * not first dirent, or is "." - */ - LS_NOT_FIRST_DE = 0, - /** - * the first non-hidden dirent - */ - LS_FIRST_DE, - /** - * the first hidden dirent, that is "." - */ - LS_FIRST_DOT_DE -}; - -/* file is first dirent under @dir */ -static int is_first_dirent(struct inode *dir, struct dentry *dentry) -{ - const struct qstr *target = &dentry->d_name; - struct md_op_data *op_data; - struct page *page; - __u64 pos = 0; - int dot_de; - int rc = LS_NOT_FIRST_DE; - - op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0, - LUSTRE_OPC_ANY, dir); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - /** - * FIXME choose the start offset of the readdir - */ - op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages; - - page = ll_get_dir_page(dir, op_data, pos); - - while (1) { - struct lu_dirpage *dp; - struct lu_dirent *ent; - - if (IS_ERR(page)) { - struct ll_inode_info *lli = ll_i2info(dir); - - rc = PTR_ERR(page); - CERROR("%s: error reading dir " DFID " at %llu: opendir_pid = %u : rc = %d\n", - ll_get_fsname(dir->i_sb, NULL, 0), - PFID(ll_inode2fid(dir)), pos, - lli->lli_opendir_pid, rc); - break; - } - - dp = page_address(page); - for (ent = lu_dirent_start(dp); ent; - ent = lu_dirent_next(ent)) { - __u64 hash; - int namelen; - char *name; - - hash = le64_to_cpu(ent->lde_hash); - /* The ll_get_dir_page() can return any page containing - * the given hash which may be not the start hash. - */ - if (unlikely(hash < pos)) - continue; - - namelen = le16_to_cpu(ent->lde_namelen); - if (unlikely(namelen == 0)) - /* - * skip dummy record. - */ - continue; - - name = ent->lde_name; - if (name[0] == '.') { - if (namelen == 1) - /* - * skip "." - */ - continue; - else if (name[1] == '.' && namelen == 2) - /* - * skip ".." - */ - continue; - else - dot_de = 1; - } else { - dot_de = 0; - } - - if (dot_de && target->name[0] != '.') { - CDEBUG(D_READA, "%.*s skip hidden file %.*s\n", - target->len, target->name, - namelen, name); - continue; - } - - if (target->len != namelen || - memcmp(target->name, name, namelen) != 0) - rc = LS_NOT_FIRST_DE; - else if (!dot_de) - rc = LS_FIRST_DE; - else - rc = LS_FIRST_DOT_DE; - - ll_release_page(dir, page, false); - goto out; - } - pos = le64_to_cpu(dp->ldp_hash_end); - if (pos == MDS_DIR_END_OFF) { - /* - * End of directory reached. - */ - ll_release_page(dir, page, false); - goto out; - } else { - /* - * chain is exhausted - * Normal case: continue to the next page. - */ - ll_release_page(dir, page, - le32_to_cpu(dp->ldp_flags) & - LDF_COLLIDE); - page = ll_get_dir_page(dir, op_data, pos); - } - } -out: - ll_finish_md_op_data(op_data); - return rc; -} - -/** - * revalidate @dentryp from statahead cache - * - * \param[in] dir parent directory - * \param[in] sai sai structure - * \param[out] dentryp pointer to dentry which will be revalidated - * \param[in] unplug unplug statahead window only (normally for negative - * dentry) - * \retval 1 on success, dentry is saved in @dentryp - * \retval 0 if revalidation failed (no proper lock on client) - * \retval negative number upon error - */ -static int revalidate_statahead_dentry(struct inode *dir, - struct ll_statahead_info *sai, - struct dentry **dentryp, - bool unplug) -{ - struct ll_inode_info *lli = ll_i2info(dir); - struct sa_entry *entry = NULL; - struct ll_dentry_data *ldd; - int rc = 0; - - if ((*dentryp)->d_name.name[0] == '.') { - if (sai->sai_ls_all || - sai->sai_miss_hidden >= sai->sai_skip_hidden) { - /* - * Hidden dentry is the first one, or statahead - * thread does not skip so many hidden dentries - * before "sai_ls_all" enabled as below. - */ - } else { - if (!sai->sai_ls_all) - /* - * It maybe because hidden dentry is not - * the first one, "sai_ls_all" was not - * set, then "ls -al" missed. Enable - * "sai_ls_all" for such case. - */ - sai->sai_ls_all = 1; - - /* - * Such "getattr" has been skipped before - * "sai_ls_all" enabled as above. - */ - sai->sai_miss_hidden++; - return -EAGAIN; - } - } - - if (unplug) { - rc = 1; - goto out_unplug; - } - - entry = sa_get(sai, &(*dentryp)->d_name); - if (!entry) { - rc = -EAGAIN; - goto out_unplug; - } - - /* if statahead is busy in readdir, help it do post-work */ - if (!sa_ready(entry) && sai->sai_in_readpage) - sa_handle_callback(sai); - - if (!sa_ready(entry)) { - spin_lock(&lli->lli_sa_lock); - sai->sai_index_wait = entry->se_index; - spin_unlock(&lli->lli_sa_lock); - if (0 == wait_event_idle_timeout(sai->sai_waitq, - sa_ready(entry), 30 * HZ)) { - /* - * entry may not be ready, so it may be used by inflight - * statahead RPC, don't free it. - */ - entry = NULL; - rc = -EAGAIN; - goto out_unplug; - } - } - - if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) { - struct inode *inode = entry->se_inode; - struct lookup_intent it = { .it_op = IT_GETATTR, - .it_lock_handle = entry->se_handle }; - __u64 bits; - - rc = md_revalidate_lock(ll_i2mdexp(dir), &it, - ll_inode2fid(inode), &bits); - if (rc == 1) { - if (!(*dentryp)->d_inode) { - struct dentry *alias; - - alias = ll_splice_alias(inode, *dentryp); - if (IS_ERR(alias)) { - ll_intent_release(&it); - rc = PTR_ERR(alias); - goto out_unplug; - } - *dentryp = alias; - /** - * statahead prepared this inode, transfer inode - * refcount from sa_entry to dentry - */ - entry->se_inode = NULL; - } else if ((*dentryp)->d_inode != inode) { - /* revalidate, but inode is recreated */ - CDEBUG(D_READA, - "%s: stale dentry %pd inode " DFID ", statahead inode " DFID "\n", - ll_get_fsname((*dentryp)->d_inode->i_sb, - NULL, 0), - *dentryp, - PFID(ll_inode2fid((*dentryp)->d_inode)), - PFID(ll_inode2fid(inode))); - ll_intent_release(&it); - rc = -ESTALE; - goto out_unplug; - } - - if ((bits & MDS_INODELOCK_LOOKUP) && - d_lustre_invalid(*dentryp)) - d_lustre_revalidate(*dentryp); - ll_intent_release(&it); - } - } -out_unplug: - /* - * statahead cached sa_entry can be used only once, and will be killed - * right after use, so if lookup/revalidate accessed statahead cache, - * set dentry ldd_sa_generation to parent lli_sa_generation, later if we - * stat this file again, we know we've done statahead before, see - * dentry_may_statahead(). - */ - ldd = ll_d2d(*dentryp); - ldd->lld_sa_generation = lli->lli_sa_generation; - sa_put(sai, entry, lli); - return rc; -} - -/** - * start statahead thread - * - * \param[in] dir parent directory - * \param[in] dentry dentry that triggers statahead, normally the first - * dirent under @dir - * \retval -EAGAIN on success, because when this function is - * called, it's already in lookup call, so client should - * do it itself instead of waiting for statahead thread - * to do it asynchronously. - * \retval negative number upon error - */ -static int start_statahead_thread(struct inode *dir, struct dentry *dentry) -{ - struct ll_inode_info *lli = ll_i2info(dir); - struct ll_statahead_info *sai = NULL; - struct task_struct *task; - struct dentry *parent = dentry->d_parent; - int rc; - - /* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */ - rc = is_first_dirent(dir, dentry); - if (rc == LS_NOT_FIRST_DE) { - /* It is not "ls -{a}l" operation, no need statahead for it. */ - rc = -EFAULT; - goto out; - } - - sai = ll_sai_alloc(parent); - if (!sai) { - rc = -ENOMEM; - goto out; - } - - sai->sai_ls_all = (rc == LS_FIRST_DOT_DE); - /* - * if current lli_opendir_key was deauthorized, or dir re-opened by - * another process, don't start statahead, otherwise the newly spawned - * statahead thread won't be notified to quit. - */ - spin_lock(&lli->lli_sa_lock); - if (unlikely(lli->lli_sai || lli->lli_opendir_key || - lli->lli_opendir_pid != current->pid)) { - spin_unlock(&lli->lli_sa_lock); - rc = -EPERM; - goto out; - } - lli->lli_sai = sai; - spin_unlock(&lli->lli_sa_lock); - - atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running); - - CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n", - current->pid, parent); - - task = kthread_create(ll_statahead_thread, parent, "ll_sa_%u", - lli->lli_opendir_pid); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("can't start ll_sa thread, rc : %d\n", rc); - goto out; - } - - if (ll_i2sbi(parent->d_inode)->ll_flags & LL_SBI_AGL_ENABLED) - ll_start_agl(parent, sai); - - atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_total); - sai->sai_task = task; - - wake_up_process(task); - - /* - * We don't stat-ahead for the first dirent since we are already in - * lookup. - */ - return -EAGAIN; - -out: - /* - * once we start statahead thread failed, disable statahead so - * that subsequent stat won't waste time to try it. - */ - spin_lock(&lli->lli_sa_lock); - lli->lli_sa_enabled = 0; - lli->lli_sai = NULL; - spin_unlock(&lli->lli_sa_lock); - if (sai) - ll_sai_free(sai); - return rc; -} - -/** - * statahead entry function, this is called when client getattr on a file, it - * will start statahead thread if this is the first dir entry, else revalidate - * dentry from statahead cache. - * - * \param[in] dir parent directory - * \param[out] dentryp dentry to getattr - * \param[in] unplug unplug statahead window only (normally for negative - * dentry) - * \retval 1 on success - * \retval 0 revalidation from statahead cache failed, caller needs - * to getattr from server directly - * \retval negative number on error, caller often ignores this and - * then getattr from server - */ -int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug) -{ - struct ll_statahead_info *sai; - - sai = ll_sai_get(dir); - if (sai) { - int rc; - - rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug); - CDEBUG(D_READA, "revalidate statahead %pd: %d.\n", - *dentryp, rc); - ll_sai_put(sai); - return rc; - } - return start_statahead_thread(dir, *dentryp); -} diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c deleted file mode 100644 index d335f29556c2..000000000000 --- a/drivers/staging/lustre/lustre/llite/super25.c +++ /dev/null @@ -1,189 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include -#include -#include -#include "llite_internal.h" - -static struct kmem_cache *ll_inode_cachep; - -static struct inode *ll_alloc_inode(struct super_block *sb) -{ - struct ll_inode_info *lli; - - ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_ALLOC_INODE, 1); - lli = kmem_cache_zalloc(ll_inode_cachep, GFP_NOFS); - if (!lli) - return NULL; - - inode_init_once(&lli->lli_vfs_inode); - return &lli->lli_vfs_inode; -} - -static void ll_inode_destroy_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - struct ll_inode_info *ptr = ll_i2info(inode); - - kmem_cache_free(ll_inode_cachep, ptr); -} - -static void ll_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, ll_inode_destroy_callback); -} - -/* exported operations */ -struct super_operations lustre_super_operations = { - .alloc_inode = ll_alloc_inode, - .destroy_inode = ll_destroy_inode, - .evict_inode = ll_delete_inode, - .put_super = ll_put_super, - .statfs = ll_statfs, - .umount_begin = ll_umount_begin, - .remount_fs = ll_remount_fs, - .show_options = ll_show_options, -}; -MODULE_ALIAS_FS("lustre"); - -static int __init lustre_init(void) -{ - int rc; - - BUILD_BUG_ON(sizeof(LUSTRE_VOLATILE_HDR) != - LUSTRE_VOLATILE_HDR_LEN + 1); - - rc = libcfs_setup(); - if (rc) - return rc; - - /* print an address of _any_ initialized kernel symbol from this - * module, to allow debugging with gdb that doesn't support data - * symbols from modules. - */ - CDEBUG(D_INFO, "Lustre client module (%p).\n", - &lustre_super_operations); - - rc = -ENOMEM; - ll_inode_cachep = kmem_cache_create("lustre_inode_cache", - sizeof(struct ll_inode_info), 0, - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, - NULL); - if (!ll_inode_cachep) - goto out_cache; - - ll_file_data_slab = kmem_cache_create("ll_file_data", - sizeof(struct ll_file_data), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!ll_file_data_slab) - goto out_cache; - - llite_root = debugfs_create_dir("llite", debugfs_lustre_root); - if (IS_ERR_OR_NULL(llite_root)) { - rc = llite_root ? PTR_ERR(llite_root) : -ENOMEM; - llite_root = NULL; - goto out_cache; - } - - llite_kset = kset_create_and_add("llite", NULL, lustre_kobj); - if (!llite_kset) { - rc = -ENOMEM; - goto out_debugfs; - } - - rc = vvp_global_init(); - if (rc != 0) - goto out_sysfs; - - cl_inode_fini_env = cl_env_alloc(&cl_inode_fini_refcheck, - LCT_REMEMBER | LCT_NOREF); - if (IS_ERR(cl_inode_fini_env)) { - rc = PTR_ERR(cl_inode_fini_env); - goto out_vvp; - } - - cl_inode_fini_env->le_ctx.lc_cookie = 0x4; - - rc = ll_xattr_init(); - if (rc != 0) - goto out_inode_fini_env; - - lustre_register_super_ops(THIS_MODULE, ll_fill_super, ll_kill_super); - lustre_register_client_process_config(ll_process_config); - - return 0; - -out_inode_fini_env: - cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); -out_vvp: - vvp_global_fini(); -out_sysfs: - kset_unregister(llite_kset); -out_debugfs: - debugfs_remove(llite_root); -out_cache: - kmem_cache_destroy(ll_inode_cachep); - kmem_cache_destroy(ll_file_data_slab); - return rc; -} - -static void __exit lustre_exit(void) -{ - lustre_register_super_ops(NULL, NULL, NULL); - lustre_register_client_process_config(NULL); - - debugfs_remove(llite_root); - kset_unregister(llite_kset); - - ll_xattr_fini(); - cl_env_put(cl_inode_fini_env, &cl_inode_fini_refcheck); - vvp_global_fini(); - - kmem_cache_destroy(ll_inode_cachep); - kmem_cache_destroy(ll_file_data_slab); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Client File System"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(lustre_init); -module_exit(lustre_exit); diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c deleted file mode 100644 index 0690fdbf49f5..000000000000 --- a/drivers/staging/lustre/lustre/llite/symlink.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#define DEBUG_SUBSYSTEM S_LLITE - -#include "llite_internal.h" - -static int ll_readlink_internal(struct inode *inode, - struct ptlrpc_request **request, char **symname) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc, symlen = i_size_read(inode) + 1; - struct mdt_body *body; - struct md_op_data *op_data; - - *request = NULL; - - if (lli->lli_symlink_name) { - int print_limit = min_t(int, PAGE_SIZE - 128, symlen); - - *symname = lli->lli_symlink_name; - /* If the total CDEBUG() size is larger than a page, it - * will print a warning to the console, avoid this by - * printing just the last part of the symlink. - */ - CDEBUG(D_INODE, "using cached symlink %s%.*s, len = %d\n", - print_limit < symlen ? "..." : "", print_limit, - (*symname) + symlen - print_limit, symlen); - return 0; - } - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, symlen, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) - return PTR_ERR(op_data); - - op_data->op_valid = OBD_MD_LINKNAME; - rc = md_getattr(sbi->ll_md_exp, op_data, request); - ll_finish_md_op_data(op_data); - if (rc) { - if (rc != -ENOENT) - CERROR("%s: inode " DFID ": rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), rc); - goto failed; - } - - body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); - if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) { - CERROR("OBD_MD_LINKNAME not set on reply\n"); - rc = -EPROTO; - goto failed; - } - - LASSERT(symlen != 0); - if (body->mbo_eadatasize != symlen) { - CERROR("%s: inode " DFID ": symlink length %d not expected %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1, - symlen - 1); - rc = -EPROTO; - goto failed; - } - - *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD); - if (!*symname || - strnlen(*symname, symlen) != symlen - 1) { - /* not full/NULL terminated */ - CERROR("inode %lu: symlink not NULL terminated string of length %d\n", - inode->i_ino, symlen - 1); - rc = -EPROTO; - goto failed; - } - - lli->lli_symlink_name = kzalloc(symlen, GFP_NOFS); - /* do not return an error if we cannot cache the symlink locally */ - if (lli->lli_symlink_name) { - memcpy(lli->lli_symlink_name, *symname, symlen); - *symname = lli->lli_symlink_name; - } - return 0; - -failed: - return rc; -} - -static void ll_put_link(void *p) -{ - ptlrpc_req_finished(p); -} - -static const char *ll_get_link(struct dentry *dentry, - struct inode *inode, - struct delayed_call *done) -{ - struct ptlrpc_request *request = NULL; - int rc; - char *symname = NULL; - - if (!dentry) - return ERR_PTR(-ECHILD); - - CDEBUG(D_VFSTRACE, "VFS Op\n"); - ll_inode_size_lock(inode); - rc = ll_readlink_internal(inode, &request, &symname); - ll_inode_size_unlock(inode); - if (rc) { - ptlrpc_req_finished(request); - return ERR_PTR(rc); - } - - /* symname may contain a pointer to the request message buffer, - * we delay request releasing then. - */ - set_delayed_call(done, ll_put_link, request); - return symname; -} - -const struct inode_operations ll_fast_symlink_inode_operations = { - .setattr = ll_setattr, - .get_link = ll_get_link, - .getattr = ll_getattr, - .permission = ll_inode_permission, - .listxattr = ll_listxattr, -}; diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c deleted file mode 100644 index 31dc3c0ade01..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_dev.c +++ /dev/null @@ -1,640 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl_device and cl_device_type implementation for VVP layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include "llite_internal.h" -#include "vvp_internal.h" - -/***************************************************************************** - * - * Vvp device and device type functions. - * - */ - -/* - * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical - * "llite_" (var. "ll_") prefix. - */ - -static struct kmem_cache *ll_thread_kmem; -struct kmem_cache *vvp_lock_kmem; -struct kmem_cache *vvp_object_kmem; -static struct kmem_cache *vvp_session_kmem; -static struct kmem_cache *vvp_thread_kmem; - -static struct lu_kmem_descr vvp_caches[] = { - { - .ckd_cache = &ll_thread_kmem, - .ckd_name = "ll_thread_kmem", - .ckd_size = sizeof(struct ll_thread_info), - }, - { - .ckd_cache = &vvp_lock_kmem, - .ckd_name = "vvp_lock_kmem", - .ckd_size = sizeof(struct vvp_lock), - }, - { - .ckd_cache = &vvp_object_kmem, - .ckd_name = "vvp_object_kmem", - .ckd_size = sizeof(struct vvp_object), - }, - { - .ckd_cache = &vvp_session_kmem, - .ckd_name = "vvp_session_kmem", - .ckd_size = sizeof(struct vvp_session) - }, - { - .ckd_cache = &vvp_thread_kmem, - .ckd_name = "vvp_thread_kmem", - .ckd_size = sizeof(struct vvp_thread_info), - }, - { - .ckd_cache = NULL - } -}; - -static void *ll_thread_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct vvp_thread_info *info; - - info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void ll_thread_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct vvp_thread_info *info = data; - - kmem_cache_free(ll_thread_kmem, info); -} - -struct lu_context_key ll_thread_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = ll_thread_key_init, - .lct_fini = ll_thread_key_fini -}; - -static void *vvp_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct vvp_session *session; - - session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS); - if (!session) - session = ERR_PTR(-ENOMEM); - return session; -} - -static void vvp_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct vvp_session *session = data; - - kmem_cache_free(vvp_session_kmem, session); -} - -struct lu_context_key vvp_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = vvp_session_key_init, - .lct_fini = vvp_session_key_fini -}; - -static void *vvp_thread_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct vvp_thread_info *vti; - - vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS); - if (!vti) - vti = ERR_PTR(-ENOMEM); - return vti; -} - -static void vvp_thread_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct vvp_thread_info *vti = data; - - kmem_cache_free(vvp_thread_kmem, vti); -} - -struct lu_context_key vvp_thread_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = vvp_thread_key_init, - .lct_fini = vvp_thread_key_fini -}; - -/* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */ -LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key); - -static const struct lu_device_operations vvp_lu_ops = { - .ldo_object_alloc = vvp_object_alloc -}; - -static struct lu_device *vvp_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct vvp_device *vdv = lu2vvp_dev(d); - struct cl_site *site = lu2cl_site(d->ld_site); - struct lu_device *next = cl2lu_dev(vdv->vdv_next); - - if (d->ld_site) { - cl_site_fini(site); - kfree(site); - } - cl_device_fini(lu2cl_dev(d)); - kfree(vdv); - return next; -} - -static struct lu_device *vvp_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) -{ - struct vvp_device *vdv; - struct lu_device *lud; - struct cl_site *site; - int rc; - - vdv = kzalloc(sizeof(*vdv), GFP_NOFS); - if (!vdv) - return ERR_PTR(-ENOMEM); - - lud = &vdv->vdv_cl.cd_lu_dev; - cl_device_init(&vdv->vdv_cl, t); - vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops; - - site = kzalloc(sizeof(*site), GFP_NOFS); - if (site) { - rc = cl_site_init(site, &vdv->vdv_cl); - if (rc == 0) { - rc = lu_site_init_finish(&site->cs_lu); - } else { - LASSERT(!lud->ld_site); - CERROR("Cannot init lu_site, rc %d.\n", rc); - kfree(site); - } - } else { - rc = -ENOMEM; - } - if (rc != 0) { - vvp_device_free(env, lud); - lud = ERR_PTR(rc); - } - return lud; -} - -static int vvp_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - struct vvp_device *vdv; - int rc; - - vdv = lu2vvp_dev(d); - vdv->vdv_next = lu2cl_dev(next); - - LASSERT(d->ld_site && next->ld_type); - next->ld_site = d->ld_site; - rc = next->ld_type->ldt_ops->ldto_device_init(env, next, - next->ld_type->ldt_name, - NULL); - if (rc == 0) { - lu_device_get(next); - lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); - } - return rc; -} - -static struct lu_device *vvp_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - return cl2lu_dev(lu2vvp_dev(d)->vdv_next); -} - -static const struct lu_device_type_operations vvp_device_type_ops = { - .ldto_init = vvp_type_init, - .ldto_fini = vvp_type_fini, - - .ldto_start = vvp_type_start, - .ldto_stop = vvp_type_stop, - - .ldto_device_alloc = vvp_device_alloc, - .ldto_device_free = vvp_device_free, - .ldto_device_init = vvp_device_init, - .ldto_device_fini = vvp_device_fini, -}; - -struct lu_device_type vvp_device_type = { - .ldt_tags = LU_DEVICE_CL, - .ldt_name = LUSTRE_VVP_NAME, - .ldt_ops = &vvp_device_type_ops, - .ldt_ctx_tags = LCT_CL_THREAD -}; - -/** - * A mutex serializing calls to vvp_inode_fini() under extreme memory - * pressure, when environments cannot be allocated. - */ -int vvp_global_init(void) -{ - int rc; - - rc = lu_kmem_init(vvp_caches); - if (rc != 0) - return rc; - - rc = lu_device_type_init(&vvp_device_type); - if (rc != 0) - goto out_kmem; - - return 0; - -out_kmem: - lu_kmem_fini(vvp_caches); - - return rc; -} - -void vvp_global_fini(void) -{ - lu_device_type_fini(&vvp_device_type); - lu_kmem_fini(vvp_caches); -} - -/***************************************************************************** - * - * mirror obd-devices into cl devices. - * - */ - -int cl_sb_init(struct super_block *sb) -{ - struct ll_sb_info *sbi; - struct cl_device *cl; - struct lu_env *env; - int rc = 0; - u16 refcheck; - - sbi = ll_s2sbi(sb); - env = cl_env_get(&refcheck); - if (!IS_ERR(env)) { - cl = cl_type_setup(env, NULL, &vvp_device_type, - sbi->ll_dt_exp->exp_obd->obd_lu_dev); - if (!IS_ERR(cl)) { - sbi->ll_cl = cl; - sbi->ll_site = cl2lu_dev(cl)->ld_site; - } - cl_env_put(env, &refcheck); - } else { - rc = PTR_ERR(env); - } - return rc; -} - -int cl_sb_fini(struct super_block *sb) -{ - struct ll_sb_info *sbi; - struct lu_env *env; - struct cl_device *cld; - u16 refcheck; - int result; - - sbi = ll_s2sbi(sb); - env = cl_env_get(&refcheck); - if (!IS_ERR(env)) { - cld = sbi->ll_cl; - - if (cld) { - cl_stack_fini(env, cld); - sbi->ll_cl = NULL; - sbi->ll_site = NULL; - } - cl_env_put(env, &refcheck); - result = 0; - } else { - CERROR("Cannot cleanup cl-stack due to memory shortage.\n"); - result = PTR_ERR(env); - } - return result; -} - -/**************************************************************************** - * - * debugfs/lustre/llite/$MNT/dump_page_cache - * - ****************************************************************************/ - -/* - * To represent contents of a page cache as a byte stream, following - * information if encoded in 64bit offset: - * - * - file hash bucket in lu_site::ls_hash[] 28bits - * - * - how far file is from bucket head 4bits - * - * - page index 32bits - * - * First two data identify a file in the cache uniquely. - */ - -#define PGC_OBJ_SHIFT (32 + 4) -#define PGC_DEPTH_SHIFT (32) - -struct vvp_pgcache_id { - unsigned int vpi_bucket; - unsigned int vpi_depth; - u32 vpi_index; - - unsigned int vpi_curdep; - struct lu_object_header *vpi_obj; -}; - -struct seq_private { - struct ll_sb_info *sbi; - struct lu_env *env; - u16 refcheck; - struct cl_object *clob; -}; - -static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id) -{ - BUILD_BUG_ON(sizeof(pos) != sizeof(__u64)); - - id->vpi_index = pos & 0xffffffff; - id->vpi_depth = (pos >> PGC_DEPTH_SHIFT) & 0xf; - id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT; -} - -static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id) -{ - return - ((__u64)id->vpi_index) | - ((__u64)id->vpi_depth << PGC_DEPTH_SHIFT) | - ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT); -} - -static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - struct vvp_pgcache_id *id = data; - struct lu_object_header *hdr = cfs_hash_object(hs, hnode); - - if (id->vpi_curdep-- > 0) - return 0; /* continue */ - - if (lu_object_is_dying(hdr)) - return 1; - - cfs_hash_get(hs, hnode); - id->vpi_obj = hdr; - return 1; -} - -static struct cl_object *vvp_pgcache_obj(const struct lu_env *env, - struct lu_device *dev, - struct vvp_pgcache_id *id) -{ - LASSERT(lu_device_is_cl(dev)); - - id->vpi_depth &= 0xf; - id->vpi_obj = NULL; - id->vpi_curdep = id->vpi_depth; - - cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket, - vvp_pgcache_obj_get, id); - if (id->vpi_obj) { - struct lu_object *lu_obj; - - lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type); - if (lu_obj) { - lu_object_ref_add(lu_obj, "dump", current); - return lu2cl(lu_obj); - } - lu_object_put(env, lu_object_top(id->vpi_obj)); - - } else if (id->vpi_curdep > 0) { - id->vpi_depth = 0xf; - } - return NULL; -} - -static struct page *vvp_pgcache_find(const struct lu_env *env, - struct lu_device *dev, - struct cl_object **clobp, loff_t *pos) -{ - struct cl_object *clob; - struct lu_site *site; - struct vvp_pgcache_id id; - - site = dev->ld_site; - vvp_pgcache_id_unpack(*pos, &id); - - while (1) { - if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash)) - return NULL; - clob = vvp_pgcache_obj(env, dev, &id); - if (clob) { - struct inode *inode = vvp_object_inode(clob); - struct page *vmpage; - int nr; - - nr = find_get_pages_contig(inode->i_mapping, - id.vpi_index, 1, &vmpage); - if (nr > 0) { - id.vpi_index = vmpage->index; - /* Cant support over 16T file */ - if (vmpage->index <= 0xffffffff) { - *clobp = clob; - *pos = vvp_pgcache_id_pack(&id); - return vmpage; - } - put_page(vmpage); - } - - lu_object_ref_del(&clob->co_lu, "dump", current); - cl_object_put(env, clob); - } - /* to the next object. */ - ++id.vpi_depth; - id.vpi_depth &= 0xf; - if (id.vpi_depth == 0 && ++id.vpi_bucket == 0) - return NULL; - id.vpi_index = 0; - } -} - -#define seq_page_flag(seq, page, flag, has_flags) do { \ - if (test_bit(PG_##flag, &(page)->flags)) { \ - seq_printf(seq, "%s"#flag, has_flags ? "|" : ""); \ - has_flags = 1; \ - } \ -} while (0) - -static void vvp_pgcache_page_show(const struct lu_env *env, - struct seq_file *seq, struct cl_page *page) -{ - struct vvp_page *vpg; - struct page *vmpage; - int has_flags; - - vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type)); - vmpage = vpg->vpg_page; - seq_printf(seq, " %5i | %p %p %s %s %s | %p " DFID "(%p) %lu %u [", - 0 /* gen */, - vpg, page, - "none", - vpg->vpg_defer_uptodate ? "du" : "- ", - PageWriteback(vmpage) ? "wb" : "-", - vmpage, PFID(ll_inode2fid(vmpage->mapping->host)), - vmpage->mapping->host, vmpage->index, - page_count(vmpage)); - has_flags = 0; - seq_page_flag(seq, vmpage, locked, has_flags); - seq_page_flag(seq, vmpage, error, has_flags); - seq_page_flag(seq, vmpage, referenced, has_flags); - seq_page_flag(seq, vmpage, uptodate, has_flags); - seq_page_flag(seq, vmpage, dirty, has_flags); - seq_page_flag(seq, vmpage, writeback, has_flags); - seq_printf(seq, "%s]\n", has_flags ? "" : "-"); -} - -static int vvp_pgcache_show(struct seq_file *f, void *v) -{ - struct seq_private *priv = f->private; - struct page *vmpage = v; - struct cl_page *page; - - seq_printf(f, "%8lx@" DFID ": ", vmpage->index, - PFID(lu_object_fid(&priv->clob->co_lu))); - lock_page(vmpage); - page = cl_vmpage_page(vmpage, priv->clob); - unlock_page(vmpage); - put_page(vmpage); - - if (page) { - vvp_pgcache_page_show(priv->env, f, page); - cl_page_put(priv->env, page); - } else { - seq_puts(f, "missing\n"); - } - lu_object_ref_del(&priv->clob->co_lu, "dump", current); - cl_object_put(priv->env, priv->clob); - - return 0; -} - -static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos) -{ - struct seq_private *priv = f->private; - struct page *ret; - - if (priv->sbi->ll_site->ls_obj_hash->hs_cur_bits > - 64 - PGC_OBJ_SHIFT) - ret = ERR_PTR(-EFBIG); - else - ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev, - &priv->clob, pos); - - return ret; -} - -static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos) -{ - struct seq_private *priv = f->private; - struct page *ret; - - *pos += 1; - ret = vvp_pgcache_find(priv->env, &priv->sbi->ll_cl->cd_lu_dev, - &priv->clob, pos); - return ret; -} - -static void vvp_pgcache_stop(struct seq_file *f, void *v) -{ - /* Nothing to do */ -} - -static const struct seq_operations vvp_pgcache_ops = { - .start = vvp_pgcache_start, - .next = vvp_pgcache_next, - .stop = vvp_pgcache_stop, - .show = vvp_pgcache_show -}; - -static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp) -{ - struct seq_private *priv; - - priv = __seq_open_private(filp, &vvp_pgcache_ops, sizeof(*priv)); - if (!priv) - return -ENOMEM; - - priv->sbi = inode->i_private; - priv->env = cl_env_get(&priv->refcheck); - if (IS_ERR(priv->env)) { - int err = PTR_ERR(priv->env); - - seq_release_private(inode, filp); - return err; - } - return 0; -} - -static int vvp_dump_pgcache_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct seq_private *priv = seq->private; - - cl_env_put(priv->env, &priv->refcheck); - return seq_release_private(inode, file); -} - -const struct file_operations vvp_dump_pgcache_file_ops = { - .owner = THIS_MODULE, - .open = vvp_dump_pgcache_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = vvp_dump_pgcache_seq_release, -}; diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h deleted file mode 100644 index 7d3abb43584a..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_internal.h +++ /dev/null @@ -1,321 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Internal definitions for VVP layer. - * - * Author: Nikita Danilov - */ - -#ifndef VVP_INTERNAL_H -#define VVP_INTERNAL_H - -#include -#include - -enum obd_notify_event; -struct inode; -struct lustre_md; -struct obd_device; -struct obd_export; -struct page; - -/** - * IO state private to IO state private to VVP layer. - */ -struct vvp_io { - /** super class */ - struct cl_io_slice vui_cl; - struct cl_io_lock_link vui_link; - /** - * I/O vector information to or from which read/write is going. - */ - struct iov_iter *vui_iter; - /** - * Total size for the left IO. - */ - size_t vui_tot_count; - - union { - struct vvp_fault_io { - /** - * Inode modification time that is checked across DLM - * lock request. - */ - time64_t ft_mtime; - struct vm_area_struct *ft_vma; - /** - * locked page returned from vvp_io - */ - struct page *ft_vmpage; - /** - * kernel fault info - */ - struct vm_fault *ft_vmf; - /** - * fault API used bitflags for return code. - */ - unsigned int ft_flags; - /** - * check that flags are from filemap_fault - */ - bool ft_flags_valid; - } fault; - struct { - struct cl_page_list vui_queue; - unsigned long vui_written; - int vui_from; - int vui_to; - } write; - } u; - - /** - * Layout version when this IO is initialized - */ - __u32 vui_layout_gen; - /** - * File descriptor against which IO is done. - */ - struct ll_file_data *vui_fd; - struct kiocb *vui_iocb; - - /* Readahead state. */ - pgoff_t vui_ra_start; - pgoff_t vui_ra_count; - /* Set when vui_ra_{start,count} have been initialized. */ - bool vui_ra_valid; -}; - -extern struct lu_device_type vvp_device_type; - -extern struct lu_context_key vvp_session_key; -extern struct lu_context_key vvp_thread_key; - -extern struct kmem_cache *vvp_lock_kmem; -extern struct kmem_cache *vvp_object_kmem; - -struct vvp_thread_info { - struct cl_lock vti_lock; - struct cl_lock_descr vti_descr; - struct cl_io vti_io; - struct cl_attr vti_attr; -}; - -static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env) -{ - struct vvp_thread_info *vti; - - vti = lu_context_key_get(&env->le_ctx, &vvp_thread_key); - LASSERT(vti); - - return vti; -} - -static inline struct cl_lock *vvp_env_lock(const struct lu_env *env) -{ - struct cl_lock *lock = &vvp_env_info(env)->vti_lock; - - memset(lock, 0, sizeof(*lock)); - return lock; -} - -static inline struct cl_attr *vvp_env_thread_attr(const struct lu_env *env) -{ - struct cl_attr *attr = &vvp_env_info(env)->vti_attr; - - memset(attr, 0, sizeof(*attr)); - - return attr; -} - -static inline struct cl_io *vvp_env_thread_io(const struct lu_env *env) -{ - struct cl_io *io = &vvp_env_info(env)->vti_io; - - memset(io, 0, sizeof(*io)); - - return io; -} - -struct vvp_session { - struct vvp_io cs_ios; -}; - -static inline struct vvp_session *vvp_env_session(const struct lu_env *env) -{ - struct vvp_session *ses; - - ses = lu_context_key_get(env->le_ses, &vvp_session_key); - LASSERT(ses); - - return ses; -} - -static inline struct vvp_io *vvp_env_io(const struct lu_env *env) -{ - return &vvp_env_session(env)->cs_ios; -} - -/** - * ccc-private object state. - */ -struct vvp_object { - struct cl_object_header vob_header; - struct cl_object vob_cl; - struct inode *vob_inode; - - /** - * Number of transient pages. This is no longer protected by i_sem, - * and needs to be atomic. This is not actually used for anything, - * and can probably be removed. - */ - atomic_t vob_transient_pages; - - /** - * Number of outstanding mmaps on this file. - * - * \see ll_vm_open(), ll_vm_close(). - */ - atomic_t vob_mmap_cnt; - - /** - * various flags - * vob_discard_page_warned - * if pages belonging to this object are discarded when a client - * is evicted, some debug info will be printed, this flag will be set - * during processing the first discarded page, then avoid flooding - * debug message for lots of discarded pages. - * - * \see ll_dirty_page_discard_warn. - */ - unsigned int vob_discard_page_warned:1; -}; - -/** - * VVP-private page state. - */ -struct vvp_page { - struct cl_page_slice vpg_cl; - unsigned int vpg_defer_uptodate:1, - vpg_ra_used:1; - /** VM page */ - struct page *vpg_page; -}; - -static inline struct vvp_page *cl2vvp_page(const struct cl_page_slice *slice) -{ - return container_of(slice, struct vvp_page, vpg_cl); -} - -static inline pgoff_t vvp_index(struct vvp_page *vvp) -{ - return vvp->vpg_cl.cpl_index; -} - -struct vvp_device { - struct cl_device vdv_cl; - struct cl_device *vdv_next; -}; - -struct vvp_lock { - struct cl_lock_slice vlk_cl; -}; - -void *ccc_key_init(const struct lu_context *ctx, - struct lu_context_key *key); -void ccc_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data); - -void ccc_umount(const struct lu_env *env, struct cl_device *dev); - -static inline struct lu_device *vvp2lu_dev(struct vvp_device *vdv) -{ - return &vdv->vdv_cl.cd_lu_dev; -} - -static inline struct vvp_device *lu2vvp_dev(const struct lu_device *d) -{ - return container_of_safe(d, struct vvp_device, vdv_cl.cd_lu_dev); -} - -static inline struct vvp_device *cl2vvp_dev(const struct cl_device *d) -{ - return container_of_safe(d, struct vvp_device, vdv_cl); -} - -static inline struct vvp_object *cl2vvp(const struct cl_object *obj) -{ - return container_of_safe(obj, struct vvp_object, vob_cl); -} - -static inline struct vvp_object *lu2vvp(const struct lu_object *obj) -{ - return container_of_safe(obj, struct vvp_object, vob_cl.co_lu); -} - -static inline struct inode *vvp_object_inode(const struct cl_object *obj) -{ - return cl2vvp(obj)->vob_inode; -} - -int vvp_object_invariant(const struct cl_object *obj); -struct vvp_object *cl_inode2vvp(struct inode *inode); - -static inline struct page *cl2vm_page(const struct cl_page_slice *slice) -{ - return cl2vvp_page(slice)->vpg_page; -} - -static inline struct vvp_lock *cl2vvp_lock(const struct cl_lock_slice *slice) -{ - return container_of(slice, struct vvp_lock, vlk_cl); -} - -# define CLOBINVRNT(env, clob, expr) \ - ((void)sizeof(env), (void)sizeof(clob), (void)sizeof(!!(expr))) - -int vvp_io_init(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io); -int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); -int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io); -int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); -struct lu_object *vvp_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev); - -int vvp_global_init(void); -void vvp_global_fini(void); - -extern const struct file_operations vvp_dump_pgcache_file_ops; - -#endif /* VVP_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c deleted file mode 100644 index e7a4778e02e4..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_io.c +++ /dev/null @@ -1,1374 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_io for VVP layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include - -#include "llite_internal.h" -#include "vvp_internal.h" - -static struct vvp_io *cl2vvp_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct vvp_io *vio; - - vio = container_of(slice, struct vvp_io, vui_cl); - LASSERT(vio == vvp_env_io(env)); - - return vio; -} - -/** - * For swapping layout. The file's layout may have changed. - * To avoid populating pages to a wrong stripe, we have to verify the - * correctness of layout. It works because swapping layout processes - * have to acquire group lock. - */ -static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, - struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct vvp_io *vio = vvp_env_io(env); - bool rc = true; - - switch (io->ci_type) { - case CIT_READ: - case CIT_WRITE: - /* don't need lock here to check lli_layout_gen as we have held - * extent lock and GROUP lock has to hold to swap layout - */ - if (ll_layout_version_get(lli) != vio->vui_layout_gen || - OBD_FAIL_CHECK_RESET(OBD_FAIL_LLITE_LOST_LAYOUT, 0)) { - io->ci_need_restart = 1; - /* this will cause a short read/write */ - io->ci_continue = 0; - rc = false; - } - case CIT_FAULT: - /* fault is okay because we've already had a page. */ - default: - break; - } - - return rc; -} - -static void vvp_object_size_lock(struct cl_object *obj) -{ - struct inode *inode = vvp_object_inode(obj); - - ll_inode_size_lock(inode); - cl_object_attr_lock(obj); -} - -static void vvp_object_size_unlock(struct cl_object *obj) -{ - struct inode *inode = vvp_object_inode(obj); - - cl_object_attr_unlock(obj); - ll_inode_size_unlock(inode); -} - -/** - * Helper function that if necessary adjusts file size (inode->i_size), when - * position at the offset \a pos is accessed. File size can be arbitrary stale - * on a Lustre client, but client at least knows KMS. If accessed area is - * inside [0, KMS], set file size to KMS, otherwise glimpse file size. - * - * Locking: cl_isize_lock is used to serialize changes to inode size and to - * protect consistency between inode size and cl_object - * attributes. cl_object_size_lock() protects consistency between cl_attr's of - * top-object and sub-objects. - */ -static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io, loff_t start, size_t count, - int *exceed) -{ - struct cl_attr *attr = vvp_env_thread_attr(env); - struct inode *inode = vvp_object_inode(obj); - loff_t pos = start + count - 1; - loff_t kms; - int result; - - /* - * Consistency guarantees: following possibilities exist for the - * relation between region being accessed and real file size at this - * moment: - * - * (A): the region is completely inside of the file; - * - * (B-x): x bytes of region are inside of the file, the rest is - * outside; - * - * (C): the region is completely outside of the file. - * - * This classification is stable under DLM lock already acquired by - * the caller, because to change the class, other client has to take - * DLM lock conflicting with our lock. Also, any updates to ->i_size - * by other threads on this client are serialized by - * ll_inode_size_lock(). This guarantees that short reads are handled - * correctly in the face of concurrent writes and truncates. - */ - vvp_object_size_lock(obj); - result = cl_object_attr_get(env, obj, attr); - if (result == 0) { - kms = attr->cat_kms; - if (pos > kms) { - /* - * A glimpse is necessary to determine whether we - * return a short read (B) or some zeroes at the end - * of the buffer (C) - */ - vvp_object_size_unlock(obj); - result = cl_glimpse_lock(env, io, inode, obj, 0); - if (result == 0 && exceed) { - /* If objective page index exceed end-of-file - * page index, return directly. Do not expect - * kernel will check such case correctly. - * linux-2.6.18-128.1.1 miss to do that. - * --bug 17336 - */ - loff_t size = i_size_read(inode); - loff_t cur_index = start >> PAGE_SHIFT; - loff_t size_index = (size - 1) >> PAGE_SHIFT; - - if ((size == 0 && cur_index != 0) || - size_index < cur_index) - *exceed = 1; - } - return result; - } - /* - * region is within kms and, hence, within real file - * size (A). We need to increase i_size to cover the - * read region so that generic_file_read() will do its - * job, but that doesn't mean the kms size is - * _correct_, it is only the _minimum_ size. If - * someone does a stat they will get the correct size - * which will always be >= the kms value here. - * b=11081 - */ - if (i_size_read(inode) < kms) { - i_size_write(inode, kms); - CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n", - PFID(lu_object_fid(&obj->co_lu)), - (__u64)i_size_read(inode)); - } - } - - vvp_object_size_unlock(obj); - - return result; -} - -/***************************************************************************** - * - * io operations. - * - */ - -static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - pgoff_t start, pgoff_t end) -{ - struct vvp_io *vio = vvp_env_io(env); - struct cl_lock_descr *descr = &vio->vui_link.cill_descr; - struct cl_object *obj = io->ci_obj; - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end); - - memset(&vio->vui_link, 0, sizeof(vio->vui_link)); - - if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - descr->cld_mode = CLM_GROUP; - descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid; - enqflags |= CEF_LOCK_MATCH; - } else { - descr->cld_mode = mode; - } - descr->cld_obj = obj; - descr->cld_start = start; - descr->cld_end = end; - descr->cld_enq_flags = enqflags; - - cl_io_lock_add(env, io, &vio->vui_link); - return 0; -} - -static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io, - __u32 enqflags, enum cl_lock_mode mode, - loff_t start, loff_t end) -{ - struct cl_object *obj = io->ci_obj; - - return vvp_io_one_lock_index(env, io, enqflags, mode, - cl_index(obj, start), cl_index(obj, end)); -} - -static int vvp_io_write_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - - cl_page_list_init(&vio->u.write.vui_queue); - vio->u.write.vui_written = 0; - vio->u.write.vui_from = 0; - vio->u.write.vui_to = PAGE_SIZE; - - return 0; -} - -static void vvp_io_write_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - - LASSERT(vio->u.write.vui_queue.pl_nr == 0); -} - -static int vvp_io_fault_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - struct inode *inode = vvp_object_inode(ios->cis_obj); - - LASSERT(inode == file_inode(vio->vui_fd->fd_file)); - vio->u.fault.ft_mtime = inode->i_mtime.tv_sec; - return 0; -} - -static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct vvp_io *vio = cl2vvp_io(env, ios); - struct inode *inode = vvp_object_inode(obj); - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, DFID - " ignore/verify layout %d/%d, layout version %d restore needed %d\n", - PFID(lu_object_fid(&obj->co_lu)), - io->ci_ignore_layout, io->ci_verify_layout, - vio->vui_layout_gen, io->ci_restore_needed); - - if (io->ci_restore_needed) { - int rc; - - /* file was detected release, we need to restore it - * before finishing the io - */ - rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF); - /* if restore registration failed, no restart, - * we will return -ENODATA - */ - /* The layout will change after restore, so we need to - * block on layout lock hold by the MDT - * as MDT will not send new layout in lvb (see LU-3124) - * we have to explicitly fetch it, all this will be done - * by ll_layout_refresh() - */ - if (rc == 0) { - io->ci_restore_needed = 0; - io->ci_need_restart = 1; - io->ci_verify_layout = 1; - } else { - io->ci_restore_needed = 1; - io->ci_need_restart = 0; - io->ci_verify_layout = 0; - io->ci_result = rc; - } - } - - if (!io->ci_ignore_layout && io->ci_verify_layout) { - __u32 gen = 0; - - /* check layout version */ - ll_layout_refresh(inode, &gen); - io->ci_need_restart = vio->vui_layout_gen != gen; - if (io->ci_need_restart) { - CDEBUG(D_VFSTRACE, - DFID " layout changed from %d to %d.\n", - PFID(lu_object_fid(&obj->co_lu)), - vio->vui_layout_gen, gen); - /* today successful restore is the only possible case */ - /* restore was done, clear restoring state */ - clear_bit(LLIF_FILE_RESTORING, - &ll_i2info(inode)->lli_flags); - } - } -} - -static void vvp_io_fault_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct cl_page *page = io->u.ci_fault.ft_page; - - CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj)); - - if (page) { - lu_ref_del(&page->cp_reference, "fault", io); - cl_page_put(env, page); - io->u.ci_fault.ft_page = NULL; - } - vvp_io_fini(env, ios); -} - -static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma) -{ - /* - * we only want to hold PW locks if the mmap() can generate - * writes back to the file and that only happens in shared - * writable vmas - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return CLM_WRITE; - return CLM_READ; -} - -static int vvp_mmap_locks(const struct lu_env *env, - struct vvp_io *vio, struct cl_io *io) -{ - struct vvp_thread_info *cti = vvp_env_info(env); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - struct cl_lock_descr *descr = &cti->vti_descr; - union ldlm_policy_data policy; - unsigned long addr; - ssize_t count; - int result = 0; - struct iov_iter i; - struct iovec iov; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - if (!vio->vui_iter) /* nfs or loop back device write */ - return 0; - - /* No MM (e.g. NFS)? No vmas too. */ - if (!mm) - return 0; - - iov_for_each(iov, i, *vio->vui_iter) { - addr = (unsigned long)iov.iov_base; - count = iov.iov_len; - if (count == 0) - continue; - - count += addr & (~PAGE_MASK); - addr &= PAGE_MASK; - - down_read(&mm->mmap_sem); - while ((vma = our_vma(mm, addr, count)) != NULL) { - struct inode *inode = file_inode(vma->vm_file); - int flags = CEF_MUST; - - if (ll_file_nolock(vma->vm_file)) { - /* - * For no lock case is not allowed for mmap - */ - result = -EINVAL; - break; - } - - /* - * XXX: Required lock mode can be weakened: CIT_WRITE - * io only ever reads user level buffer, and CIT_READ - * only writes on it. - */ - policy_from_vma(&policy, vma, addr, count); - descr->cld_mode = vvp_mode_from_vma(vma); - descr->cld_obj = ll_i2info(inode)->lli_clob; - descr->cld_start = cl_index(descr->cld_obj, - policy.l_extent.start); - descr->cld_end = cl_index(descr->cld_obj, - policy.l_extent.end); - descr->cld_enq_flags = flags; - result = cl_io_lock_alloc_add(env, io, descr); - - CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", - descr->cld_mode, descr->cld_start, - descr->cld_end); - - if (result < 0) - break; - - if (vma->vm_end - addr >= count) - break; - - count -= vma->vm_end - addr; - addr = vma->vm_end; - } - up_read(&mm->mmap_sem); - if (result < 0) - break; - } - return result; -} - -static void vvp_io_advance(const struct lu_env *env, - const struct cl_io_slice *ios, - size_t nob) -{ - struct cl_object *obj = ios->cis_io->ci_obj; - struct vvp_io *vio = cl2vvp_io(env, ios); - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - vio->vui_tot_count -= nob; - iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count); -} - -static void vvp_io_update_iov(const struct lu_env *env, - struct vvp_io *vio, struct cl_io *io) -{ - size_t size = io->u.ci_rw.crw_count; - - if (!vio->vui_iter) - return; - - iov_iter_truncate(vio->vui_iter, size); -} - -static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, - enum cl_lock_mode mode, loff_t start, loff_t end) -{ - struct vvp_io *vio = vvp_env_io(env); - int result; - int ast_flags = 0; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - vvp_io_update_iov(env, vio, io); - - if (io->u.ci_rw.crw_nonblock) - ast_flags |= CEF_NONBLOCK; - result = vvp_mmap_locks(env, vio, io); - if (result == 0) - result = vvp_io_one_lock(env, io, ast_flags, mode, start, end); - return result; -} - -static int vvp_io_read_lock(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct cl_io_rw_common *rd = &io->u.ci_rd.rd; - int result; - - result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos, - rd->crw_pos + rd->crw_count - 1); - - return result; -} - -static int vvp_io_fault_lock(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct vvp_io *vio = cl2vvp_io(env, ios); - /* - * XXX LDLM_FL_CBPENDING - */ - return vvp_io_one_lock_index(env, - io, 0, - vvp_mode_from_vma(vio->u.fault.ft_vma), - io->u.ci_fault.ft_index, - io->u.ci_fault.ft_index); -} - -static int vvp_io_write_lock(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - loff_t start; - loff_t end; - - if (io->u.ci_wr.wr_append) { - start = 0; - end = OBD_OBJECT_EOF; - } else { - start = io->u.ci_wr.wr.crw_pos; - end = start + io->u.ci_wr.wr.crw_count - 1; - } - return vvp_io_rw_lock(env, io, CLM_WRITE, start, end); -} - -static int vvp_io_setattr_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - return 0; -} - -/** - * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io. - * - * Handles "lockless io" mode when extent locking is done by server. - */ -static int vvp_io_setattr_lock(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - __u64 new_size; - __u32 enqflags = 0; - - if (cl_io_is_trunc(io)) { - new_size = io->u.ci_setattr.sa_attr.lvb_size; - if (new_size == 0) - enqflags = CEF_DISCARD_DATA; - } else { - unsigned int valid = io->u.ci_setattr.sa_valid; - - if (!(valid & TIMES_SET_FLAGS)) - return 0; - - if ((!(valid & ATTR_MTIME) || - io->u.ci_setattr.sa_attr.lvb_mtime >= - io->u.ci_setattr.sa_attr.lvb_ctime) && - (!(valid & ATTR_ATIME) || - io->u.ci_setattr.sa_attr.lvb_atime >= - io->u.ci_setattr.sa_attr.lvb_ctime)) - return 0; - new_size = 0; - } - - return vvp_io_one_lock(env, io, enqflags, CLM_WRITE, - new_size, OBD_OBJECT_EOF); -} - -static int vvp_do_vmtruncate(struct inode *inode, size_t size) -{ - int result; - /* - * Only ll_inode_size_lock is taken at this level. - */ - ll_inode_size_lock(inode); - result = inode_newsize_ok(inode, size); - if (result < 0) { - ll_inode_size_unlock(inode); - return result; - } - truncate_setsize(inode, size); - ll_inode_size_unlock(inode); - return result; -} - -static int vvp_io_setattr_time(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct cl_attr *attr = vvp_env_thread_attr(env); - int result; - unsigned valid = CAT_CTIME; - - cl_object_attr_lock(obj); - attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime; - if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) { - attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime; - valid |= CAT_ATIME; - } - if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) { - attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime; - valid |= CAT_MTIME; - } - result = cl_object_attr_update(env, obj, attr, valid); - cl_object_attr_unlock(obj); - - return result; -} - -static int vvp_io_setattr_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct inode *inode = vvp_object_inode(io->ci_obj); - struct ll_inode_info *lli = ll_i2info(inode); - - if (cl_io_is_trunc(io)) { - down_write(&lli->lli_trunc_sem); - inode_lock(inode); - inode_dio_wait(inode); - } else { - inode_lock(inode); - } - - if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS) - return vvp_io_setattr_time(env, ios); - - return 0; -} - -static void vvp_io_setattr_end(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct inode *inode = vvp_object_inode(io->ci_obj); - struct ll_inode_info *lli = ll_i2info(inode); - - if (cl_io_is_trunc(io)) { - /* Truncate in memory pages - they must be clean pages - * because osc has already notified to destroy osc_extents. - */ - vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); - inode_unlock(inode); - up_write(&lli->lli_trunc_sem); - } else { - inode_unlock(inode); - } -} - -static void vvp_io_setattr_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - bool restore_needed = ios->cis_io->ci_restore_needed; - struct inode *inode = vvp_object_inode(ios->cis_obj); - - vvp_io_fini(env, ios); - - if (restore_needed && !ios->cis_io->ci_restore_needed) { - /* restore finished, set data modified flag for HSM */ - set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags); - } -} - -static int vvp_io_read_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct inode *inode = vvp_object_inode(obj); - struct ll_inode_info *lli = ll_i2info(inode); - struct file *file = vio->vui_fd->fd_file; - - int result; - loff_t pos = io->u.ci_rd.rd.crw_pos; - long cnt = io->u.ci_rd.rd.crw_count; - long tot = vio->vui_tot_count; - int exceed = 0; - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); - - down_read(&lli->lli_trunc_sem); - - if (!can_populate_pages(env, io, inode)) - return 0; - - result = vvp_prep_size(env, obj, io, pos, tot, &exceed); - if (result != 0) - return result; - if (exceed != 0) - goto out; - - LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, - "Read ino %lu, %lu bytes, offset %lld, size %llu\n", - inode->i_ino, cnt, pos, i_size_read(inode)); - - /* turn off the kernel's read-ahead */ - vio->vui_fd->fd_file->f_ra.ra_pages = 0; - - /* initialize read-ahead window once per syscall */ - if (!vio->vui_ra_valid) { - vio->vui_ra_valid = true; - vio->vui_ra_start = cl_index(obj, pos); - vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1); - ll_ras_enter(file); - } - - /* BUG: 5972 */ - file_accessed(file); - LASSERT(vio->vui_iocb->ki_pos == pos); - result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter); - -out: - if (result >= 0) { - if (result < cnt) - io->ci_continue = 0; - io->ci_nob += result; - ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - vio->vui_fd, pos, result, READ); - result = 0; - } - return result; -} - -static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *plist, int from, int to) -{ - struct cl_2queue *queue = &io->ci_queue; - struct cl_page *page; - unsigned int bytes = 0; - int rc = 0; - - if (plist->pl_nr == 0) - return 0; - - if (from > 0 || to != PAGE_SIZE) { - page = cl_page_list_first(plist); - if (plist->pl_nr == 1) { - cl_page_clip(env, page, from, to); - } else { - if (from > 0) - cl_page_clip(env, page, from, PAGE_SIZE); - if (to != PAGE_SIZE) { - page = cl_page_list_last(plist); - cl_page_clip(env, page, 0, to); - } - } - } - - cl_2queue_init(queue); - cl_page_list_splice(plist, &queue->c2_qin); - rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0); - - /* plist is not sorted any more */ - cl_page_list_splice(&queue->c2_qin, plist); - cl_page_list_splice(&queue->c2_qout, plist); - cl_2queue_fini(env, queue); - - if (rc == 0) { - /* calculate bytes */ - bytes = plist->pl_nr << PAGE_SHIFT; - bytes -= from + PAGE_SIZE - to; - - while (plist->pl_nr > 0) { - page = cl_page_list_first(plist); - cl_page_list_del(env, plist, page); - - cl_page_clip(env, page, 0, PAGE_SIZE); - - SetPageUptodate(cl_page_vmpage(page)); - cl_page_disown(env, io, page); - - /* held in ll_cl_init() */ - lu_ref_del(&page->cp_reference, "cl_io", io); - cl_page_put(env, page); - } - } - - return bytes > 0 ? bytes : rc; -} - -static void write_commit_callback(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) -{ - struct page *vmpage = page->cp_vmpage; - - SetPageUptodate(vmpage); - set_page_dirty(vmpage); - - cl_page_disown(env, io, page); - - /* held in ll_cl_init() */ - lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io)); - cl_page_put(env, page); -} - -/* make sure the page list is contiguous */ -static bool page_list_sanity_check(struct cl_object *obj, - struct cl_page_list *plist) -{ - struct cl_page *page; - pgoff_t index = CL_PAGE_EOF; - - cl_page_list_for_each(page, plist) { - struct vvp_page *vpg = cl_object_page_slice(obj, page); - - if (index == CL_PAGE_EOF) { - index = vvp_index(vpg); - continue; - } - - ++index; - if (index == vvp_index(vpg)) - continue; - - return false; - } - return true; -} - -/* Return how many bytes have queued or written */ -int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io) -{ - struct cl_object *obj = io->ci_obj; - struct inode *inode = vvp_object_inode(obj); - struct vvp_io *vio = vvp_env_io(env); - struct cl_page_list *queue = &vio->u.write.vui_queue; - struct cl_page *page; - int rc = 0; - int bytes = 0; - unsigned int npages = vio->u.write.vui_queue.pl_nr; - - if (npages == 0) - return 0; - - CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n", - npages, vio->u.write.vui_from, vio->u.write.vui_to); - - LASSERT(page_list_sanity_check(obj, queue)); - - /* submit IO with async write */ - rc = cl_io_commit_async(env, io, queue, - vio->u.write.vui_from, vio->u.write.vui_to, - write_commit_callback); - npages -= queue->pl_nr; /* already committed pages */ - if (npages > 0) { - /* calculate how many bytes were written */ - bytes = npages << PAGE_SHIFT; - - /* first page */ - bytes -= vio->u.write.vui_from; - if (queue->pl_nr == 0) /* last page */ - bytes -= PAGE_SIZE - vio->u.write.vui_to; - LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages); - - vio->u.write.vui_written += bytes; - - CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n", - npages, bytes, vio->u.write.vui_written); - - /* the first page must have been written. */ - vio->u.write.vui_from = 0; - } - LASSERT(page_list_sanity_check(obj, queue)); - LASSERT(ergo(rc == 0, queue->pl_nr == 0)); - - /* out of quota, try sync write */ - if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) { - rc = vvp_io_commit_sync(env, io, queue, - vio->u.write.vui_from, - vio->u.write.vui_to); - if (rc > 0) { - vio->u.write.vui_written += rc; - rc = 0; - } - } - - /* update inode size */ - ll_merge_attr(env, inode); - - /* Now the pages in queue were failed to commit, discard them - * unless they were dirtied before. - */ - while (queue->pl_nr > 0) { - page = cl_page_list_first(queue); - cl_page_list_del(env, queue, page); - - if (!PageDirty(cl_page_vmpage(page))) - cl_page_discard(env, io, page); - - cl_page_disown(env, io, page); - - /* held in ll_cl_init() */ - lu_ref_del(&page->cp_reference, "cl_io", io); - cl_page_put(env, page); - } - cl_page_list_fini(env, queue); - - return rc; -} - -static int vvp_io_write_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct inode *inode = vvp_object_inode(obj); - struct ll_inode_info *lli = ll_i2info(inode); - ssize_t result = 0; - loff_t pos = io->u.ci_wr.wr.crw_pos; - size_t cnt = io->u.ci_wr.wr.crw_count; - - down_read(&lli->lli_trunc_sem); - - if (!can_populate_pages(env, io, inode)) - return 0; - - if (cl_io_is_append(io)) { - /* - * PARALLEL IO This has to be changed for parallel IO doing - * out-of-order writes. - */ - ll_merge_attr(env, inode); - pos = i_size_read(inode); - io->u.ci_wr.wr.crw_pos = pos; - vio->vui_iocb->ki_pos = pos; - } else { - LASSERT(vio->vui_iocb->ki_pos == pos); - } - - CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); - - /* - * The maximum Lustre file size is variable, based on the OST maximum - * object size and number of stripes. This needs another check in - * addition to the VFS checks earlier. - */ - if (pos + cnt > ll_file_maxbytes(inode)) { - CDEBUG(D_INODE, - "%s: file " DFID " offset %llu > maxbytes %llu\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(ll_inode2fid(inode)), pos + cnt, - ll_file_maxbytes(inode)); - return -EFBIG; - } - - if (!vio->vui_iter) { - /* from a temp io in ll_cl_init(). */ - result = 0; - } else { - /* - * When using the locked AIO function (generic_file_aio_write()) - * testing has shown the inode mutex to be a limiting factor - * with multi-threaded single shared file performance. To get - * around this, we now use the lockless version. To maintain - * consistency, proper locking to protect against writes, - * trucates, etc. is handled in the higher layers of lustre. - */ - bool lock_node = !IS_NOSEC(inode); - - if (lock_node) - inode_lock(inode); - result = __generic_file_write_iter(vio->vui_iocb, - vio->vui_iter); - if (lock_node) - inode_unlock(inode); - - if (result > 0 || result == -EIOCBQUEUED) - result = generic_write_sync(vio->vui_iocb, result); - } - - if (result > 0) { - result = vvp_io_write_commit(env, io); - if (vio->u.write.vui_written > 0) { - result = vio->u.write.vui_written; - io->ci_nob += result; - - CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n", - io->ci_nob, result); - } - } - if (result > 0) { - set_bit(LLIF_DATA_MODIFIED, &(ll_i2info(inode))->lli_flags); - - if (result < cnt) - io->ci_continue = 0; - ll_rw_stats_tally(ll_i2sbi(inode), current->pid, - vio->vui_fd, pos, result, WRITE); - result = 0; - } - return result; -} - -static void vvp_io_rw_end(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct inode *inode = vvp_object_inode(ios->cis_obj); - struct ll_inode_info *lli = ll_i2info(inode); - - up_read(&lli->lli_trunc_sem); -} - -static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) -{ - struct vm_fault *vmf = cfio->ft_vmf; - - cfio->ft_flags = filemap_fault(vmf); - cfio->ft_flags_valid = 1; - - if (vmf->page) { - CDEBUG(D_PAGE, - "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n", - vmf->page, vmf->page->mapping, vmf->page->index, - (long)vmf->page->flags, page_count(vmf->page), - page_private(vmf->page), (void *)vmf->address); - if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) { - lock_page(vmf->page); - cfio->ft_flags |= VM_FAULT_LOCKED; - } - - cfio->ft_vmpage = vmf->page; - return 0; - } - - if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { - CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", (void *)vmf->address); - return -EFAULT; - } - - if (cfio->ft_flags & VM_FAULT_OOM) { - CDEBUG(D_PAGE, "got addr %p - OOM\n", (void *)vmf->address); - return -ENOMEM; - } - - if (cfio->ft_flags & VM_FAULT_RETRY) - return -EAGAIN; - - CERROR("Unknown error in page fault %d!\n", cfio->ft_flags); - return -EINVAL; -} - -static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) -{ - set_page_dirty(page->cp_vmpage); -} - -static int vvp_io_fault_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct vvp_io *vio = cl2vvp_io(env, ios); - struct cl_io *io = ios->cis_io; - struct cl_object *obj = io->ci_obj; - struct inode *inode = vvp_object_inode(obj); - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_fault_io *fio = &io->u.ci_fault; - struct vvp_fault_io *cfio = &vio->u.fault; - loff_t offset; - int result = 0; - struct page *vmpage = NULL; - struct cl_page *page; - loff_t size; - pgoff_t last_index; - - down_read(&lli->lli_trunc_sem); - - /* offset of the last byte on the page */ - offset = cl_offset(obj, fio->ft_index + 1) - 1; - LASSERT(cl_index(obj, offset) == fio->ft_index); - result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL); - if (result != 0) - return result; - - /* must return locked page */ - if (fio->ft_mkwrite) { - LASSERT(cfio->ft_vmpage); - lock_page(cfio->ft_vmpage); - } else { - result = vvp_io_kernel_fault(cfio); - if (result != 0) - return result; - } - - vmpage = cfio->ft_vmpage; - LASSERT(PageLocked(vmpage)); - - if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) - ll_invalidate_page(vmpage); - - size = i_size_read(inode); - /* Though we have already held a cl_lock upon this page, but - * it still can be truncated locally. - */ - if (unlikely((vmpage->mapping != inode->i_mapping) || - (page_offset(vmpage) > size))) { - CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); - - /* return +1 to stop cl_io_loop() and ll_fault() will catch - * and retry. - */ - result = 1; - goto out; - } - - last_index = cl_index(obj, size - 1); - - if (fio->ft_mkwrite) { - /* - * Capture the size while holding the lli_trunc_sem from above - * we want to make sure that we complete the mkwrite action - * while holding this lock. We need to make sure that we are - * not past the end of the file. - */ - if (last_index < fio->ft_index) { - CDEBUG(D_PAGE, - "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n", - vmpage->mapping, fio->ft_index, last_index); - /* - * We need to return if we are - * passed the end of the file. This will propagate - * up the call stack to ll_page_mkwrite where - * we will return VM_FAULT_NOPAGE. Any non-negative - * value returned here will be silently - * converted to 0. If the vmpage->mapping is null - * the error code would be converted back to ENODATA - * in ll_page_mkwrite0. Thus we return -ENODATA - * to handle both cases - */ - result = -ENODATA; - goto out; - } - } - - page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE); - if (IS_ERR(page)) { - result = PTR_ERR(page); - goto out; - } - - /* if page is going to be written, we should add this page into cache - * earlier. - */ - if (fio->ft_mkwrite) { - wait_on_page_writeback(vmpage); - if (!PageDirty(vmpage)) { - struct cl_page_list *plist = &io->ci_queue.c2_qin; - struct vvp_page *vpg = cl_object_page_slice(obj, page); - int to = PAGE_SIZE; - - /* vvp_page_assume() calls wait_on_page_writeback(). */ - cl_page_assume(env, io, page); - - cl_page_list_init(plist); - cl_page_list_add(plist, page); - - /* size fixup */ - if (last_index == vvp_index(vpg)) - to = size & ~PAGE_MASK; - - /* Do not set Dirty bit here so that in case IO is - * started before the page is really made dirty, we - * still have chance to detect it. - */ - result = cl_io_commit_async(env, io, plist, 0, to, - mkwrite_commit_callback); - LASSERT(cl_page_is_owned(page, io)); - cl_page_list_fini(env, plist); - - vmpage = NULL; - if (result < 0) { - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - - cl_page_put(env, page); - - /* we're in big trouble, what can we do now? */ - if (result == -EDQUOT) - result = -ENOSPC; - goto out; - } else { - cl_page_disown(env, io, page); - } - } - } - - /* - * The ft_index is only used in the case of - * a mkwrite action. We need to check - * our assertions are correct, since - * we should have caught this above - */ - LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index); - if (fio->ft_index == last_index) - /* - * Last page is mapped partially. - */ - fio->ft_nob = size - cl_offset(obj, fio->ft_index); - else - fio->ft_nob = cl_page_size(obj); - - lu_ref_add(&page->cp_reference, "fault", io); - fio->ft_page = page; - -out: - /* return unlocked vmpage to avoid deadlocking */ - if (vmpage) - unlock_page(vmpage); - - cfio->ft_flags &= ~VM_FAULT_LOCKED; - - return result; -} - -static void vvp_io_fault_end(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct inode *inode = vvp_object_inode(ios->cis_obj); - struct ll_inode_info *lli = ll_i2info(inode); - - CLOBINVRNT(env, ios->cis_io->ci_obj, - vvp_object_invariant(ios->cis_io->ci_obj)); - up_read(&lli->lli_trunc_sem); -} - -static int vvp_io_fsync_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - /* we should mark TOWRITE bit to each dirty page in radix tree to - * verify pages have been written, but this is difficult because of - * race. - */ - return 0; -} - -static int vvp_io_read_ahead(const struct lu_env *env, - const struct cl_io_slice *ios, - pgoff_t start, struct cl_read_ahead *ra) -{ - int result = 0; - - if (ios->cis_io->ci_type == CIT_READ || - ios->cis_io->ci_type == CIT_FAULT) { - struct vvp_io *vio = cl2vvp_io(env, ios); - - if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - ra->cra_end = CL_PAGE_EOF; - result = 1; /* no need to call down */ - } - } - - return result; -} - -static const struct cl_io_operations vvp_io_ops = { - .op = { - [CIT_READ] = { - .cio_fini = vvp_io_fini, - .cio_lock = vvp_io_read_lock, - .cio_start = vvp_io_read_start, - .cio_end = vvp_io_rw_end, - .cio_advance = vvp_io_advance, - }, - [CIT_WRITE] = { - .cio_fini = vvp_io_fini, - .cio_iter_init = vvp_io_write_iter_init, - .cio_iter_fini = vvp_io_write_iter_fini, - .cio_lock = vvp_io_write_lock, - .cio_start = vvp_io_write_start, - .cio_end = vvp_io_rw_end, - .cio_advance = vvp_io_advance, - }, - [CIT_SETATTR] = { - .cio_fini = vvp_io_setattr_fini, - .cio_iter_init = vvp_io_setattr_iter_init, - .cio_lock = vvp_io_setattr_lock, - .cio_start = vvp_io_setattr_start, - .cio_end = vvp_io_setattr_end - }, - [CIT_FAULT] = { - .cio_fini = vvp_io_fault_fini, - .cio_iter_init = vvp_io_fault_iter_init, - .cio_lock = vvp_io_fault_lock, - .cio_start = vvp_io_fault_start, - .cio_end = vvp_io_fault_end, - }, - [CIT_FSYNC] = { - .cio_start = vvp_io_fsync_start, - .cio_fini = vvp_io_fini - }, - [CIT_MISC] = { - .cio_fini = vvp_io_fini - } - }, - .cio_read_ahead = vvp_io_read_ahead, -}; - -int vvp_io_init(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - struct vvp_io *vio = vvp_env_io(env); - struct inode *inode = vvp_object_inode(obj); - int result; - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - CDEBUG(D_VFSTRACE, DFID - " ignore/verify layout %d/%d, layout version %d restore needed %d\n", - PFID(lu_object_fid(&obj->co_lu)), - io->ci_ignore_layout, io->ci_verify_layout, - vio->vui_layout_gen, io->ci_restore_needed); - - CL_IO_SLICE_CLEAN(vio, vui_cl); - cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops); - vio->vui_ra_valid = false; - result = 0; - if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { - size_t count; - struct ll_inode_info *lli = ll_i2info(inode); - - count = io->u.ci_rw.crw_count; - /* "If nbyte is 0, read() will return 0 and have no other - * results." -- Single Unix Spec - */ - if (count == 0) - result = 1; - else - vio->vui_tot_count = count; - - /* for read/write, we store the jobid in the inode, and - * it'll be fetched by osc when building RPC. - * - * it's not accurate if the file is shared by different - * jobs. - */ - lustre_get_jobid(lli->lli_jobid); - } else if (io->ci_type == CIT_SETATTR) { - if (!cl_io_is_trunc(io)) - io->ci_lockreq = CILR_MANDATORY; - } - - /* Enqueue layout lock and get layout version. We need to do this - * even for operations requiring to open file, such as read and write, - * because it might not grant layout lock in IT_OPEN. - */ - if (result == 0 && !io->ci_ignore_layout) { - result = ll_layout_refresh(inode, &vio->vui_layout_gen); - if (result == -ENOENT) - /* If the inode on MDS has been removed, but the objects - * on OSTs haven't been destroyed (async unlink), layout - * fetch will return -ENOENT, we'd ignore this error - * and continue with dirty flush. LU-3230. - */ - result = 0; - if (result < 0) - CERROR("%s: refresh file layout " DFID " error %d.\n", - ll_get_fsname(inode->i_sb, NULL, 0), - PFID(lu_object_fid(&obj->co_lu)), result); - } - - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c deleted file mode 100644 index 4b6c7143bd2c..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_lock.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2014, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_lock for VVP layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include - -#include "vvp_internal.h" - -/***************************************************************************** - * - * Vvp lock functions. - * - */ - -static void vvp_lock_fini(const struct lu_env *env, struct cl_lock_slice *slice) -{ - struct vvp_lock *vlk = cl2vvp_lock(slice); - - kmem_cache_free(vvp_lock_kmem, vlk); -} - -static int vvp_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *unused, struct cl_sync_io *anchor) -{ - CLOBINVRNT(env, slice->cls_obj, vvp_object_invariant(slice->cls_obj)); - - return 0; -} - -static const struct cl_lock_operations vvp_lock_ops = { - .clo_fini = vvp_lock_fini, - .clo_enqueue = vvp_lock_enqueue, -}; - -int vvp_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *unused) -{ - struct vvp_lock *vlk; - int result; - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - vlk = kmem_cache_zalloc(vvp_lock_kmem, GFP_NOFS); - if (vlk) { - cl_lock_slice_add(lock, &vlk->vlk_cl, obj, &vvp_lock_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c deleted file mode 100644 index b2cb51c8f7f4..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_object.c +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * cl_object implementation for VVP layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include - -#include "llite_internal.h" -#include "vvp_internal.h" - -/***************************************************************************** - * - * Object operations. - * - */ - -int vvp_object_invariant(const struct cl_object *obj) -{ - struct inode *inode = vvp_object_inode(obj); - struct ll_inode_info *lli = ll_i2info(inode); - - return (S_ISREG(inode->i_mode) || inode->i_mode == 0) && - lli->lli_clob == obj; -} - -static int vvp_object_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - struct vvp_object *obj = lu2vvp(o); - struct inode *inode = obj->vob_inode; - struct ll_inode_info *lli; - - (*p)(env, cookie, "(%d %d) inode: %p ", - atomic_read(&obj->vob_transient_pages), - atomic_read(&obj->vob_mmap_cnt), inode); - if (inode) { - lli = ll_i2info(inode); - (*p)(env, cookie, "%lu/%u %o %u %d %p " DFID, - inode->i_ino, inode->i_generation, inode->i_mode, - inode->i_nlink, atomic_read(&inode->i_count), - lli->lli_clob, PFID(&lli->lli_fid)); - } - return 0; -} - -static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - struct inode *inode = vvp_object_inode(obj); - - /* - * lov overwrites most of these fields in - * lov_attr_get()->...lov_merge_lvb_kms(), except when inode - * attributes are newer. - */ - - attr->cat_size = i_size_read(inode); - attr->cat_mtime = inode->i_mtime.tv_sec; - attr->cat_atime = inode->i_atime.tv_sec; - attr->cat_ctime = inode->i_ctime.tv_sec; - attr->cat_blocks = inode->i_blocks; - attr->cat_uid = from_kuid(&init_user_ns, inode->i_uid); - attr->cat_gid = from_kgid(&init_user_ns, inode->i_gid); - /* KMS is not known by this layer */ - return 0; /* layers below have to fill in the rest */ -} - -static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid) -{ - struct inode *inode = vvp_object_inode(obj); - - if (valid & CAT_UID) - inode->i_uid = make_kuid(&init_user_ns, attr->cat_uid); - if (valid & CAT_GID) - inode->i_gid = make_kgid(&init_user_ns, attr->cat_gid); - if (valid & CAT_ATIME) - inode->i_atime.tv_sec = attr->cat_atime; - if (valid & CAT_MTIME) - inode->i_mtime.tv_sec = attr->cat_mtime; - if (valid & CAT_CTIME) - inode->i_ctime.tv_sec = attr->cat_ctime; - if (0 && valid & CAT_SIZE) - i_size_write(inode, attr->cat_size); - /* not currently necessary */ - if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE)) - mark_inode_dirty(inode); - return 0; -} - -static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf) -{ - struct ll_inode_info *lli = ll_i2info(conf->coc_inode); - - if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { - CDEBUG(D_VFSTRACE, DFID ": losing layout lock\n", - PFID(&lli->lli_fid)); - - ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE); - - /* Clean up page mmap for this inode. - * The reason for us to do this is that if the page has - * already been installed into memory space, the process - * can access it without interacting with lustre, so this - * page may be stale due to layout change, and the process - * will never be notified. - * This operation is expensive but mmap processes have to pay - * a price themselves. - */ - unmap_mapping_range(conf->coc_inode->i_mapping, - 0, OBD_OBJECT_EOF, 0); - } - - return 0; -} - -static int vvp_prune(const struct lu_env *env, struct cl_object *obj) -{ - struct inode *inode = vvp_object_inode(obj); - int rc; - - rc = cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); - if (rc < 0) { - CDEBUG(D_VFSTRACE, DFID ": writeback failed: %d\n", - PFID(lu_object_fid(&obj->co_lu)), rc); - return rc; - } - - truncate_inode_pages(inode->i_mapping, 0); - return 0; -} - -static int vvp_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb) -{ - struct inode *inode = vvp_object_inode(obj); - - lvb->lvb_mtime = LTIME_S(inode->i_mtime); - lvb->lvb_atime = LTIME_S(inode->i_atime); - lvb->lvb_ctime = LTIME_S(inode->i_ctime); - /* - * LU-417: Add dirty pages block count lest i_blocks reports 0, some - * "cp" or "tar" on remote node may think it's a completely sparse file - * and skip it. - */ - if (lvb->lvb_size > 0 && lvb->lvb_blocks == 0) - lvb->lvb_blocks = dirty_cnt(inode); - return 0; -} - -static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr) -{ - u64 valid_flags = OBD_MD_FLTYPE; - struct inode *inode; - struct obdo *oa; - - oa = attr->cra_oa; - inode = vvp_object_inode(obj); - - if (attr->cra_type == CRT_WRITE) - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLUID | OBD_MD_FLGID; - obdo_from_inode(oa, inode, valid_flags & attr->cra_flags); - obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid); - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) - oa->o_parent_oid++; - memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid, LUSTRE_JOBID_SIZE); -} - -static const struct cl_object_operations vvp_ops = { - .coo_page_init = vvp_page_init, - .coo_lock_init = vvp_lock_init, - .coo_io_init = vvp_io_init, - .coo_attr_get = vvp_attr_get, - .coo_attr_update = vvp_attr_update, - .coo_conf_set = vvp_conf_set, - .coo_prune = vvp_prune, - .coo_glimpse = vvp_object_glimpse, - .coo_req_attr_set = vvp_req_attr_set -}; - -static int vvp_object_init0(const struct lu_env *env, - struct vvp_object *vob, - const struct cl_object_conf *conf) -{ - vob->vob_inode = conf->coc_inode; - atomic_set(&vob->vob_transient_pages, 0); - cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page)); - return 0; -} - -static int vvp_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct vvp_device *dev = lu2vvp_dev(obj->lo_dev); - struct vvp_object *vob = lu2vvp(obj); - struct lu_object *below; - struct lu_device *under; - int result; - - under = &dev->vdv_next->cd_lu_dev; - below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); - if (below) { - const struct cl_object_conf *cconf; - - cconf = lu2cl_conf(conf); - lu_object_add(obj, below); - result = vvp_object_init0(env, vob, cconf); - } else { - result = -ENOMEM; - } - - return result; -} - -static void vvp_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct vvp_object *vob = lu2vvp(obj); - - lu_object_fini(obj); - lu_object_header_fini(obj->lo_header); - kmem_cache_free(vvp_object_kmem, vob); -} - -static const struct lu_object_operations vvp_lu_obj_ops = { - .loo_object_init = vvp_object_init, - .loo_object_free = vvp_object_free, - .loo_object_print = vvp_object_print, -}; - -struct vvp_object *cl_inode2vvp(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct cl_object *obj = lli->lli_clob; - struct lu_object *lu; - - lu = lu_object_locate(obj->co_lu.lo_header, &vvp_device_type); - LASSERT(lu); - return lu2vvp(lu); -} - -struct lu_object *vvp_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev) -{ - struct vvp_object *vob; - struct lu_object *obj; - - vob = kmem_cache_zalloc(vvp_object_kmem, GFP_NOFS); - if (vob) { - struct cl_object_header *hdr; - - obj = &vob->vob_cl.co_lu; - hdr = &vob->vob_header; - cl_object_header_init(hdr); - hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); - - lu_object_init(obj, &hdr->coh_lu, dev); - lu_object_add_top(&hdr->coh_lu, obj); - - vob->vob_cl.co_ops = &vvp_ops; - obj->lo_ops = &vvp_lu_obj_ops; - } else { - obj = NULL; - } - return obj; -} diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c deleted file mode 100644 index 6eb0565ddc22..000000000000 --- a/drivers/staging/lustre/lustre/llite/vvp_page.c +++ /dev/null @@ -1,523 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_page for VVP layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include -#include - -#include "llite_internal.h" -#include "vvp_internal.h" - -/***************************************************************************** - * - * Page operations. - * - */ - -static void vvp_page_fini_common(struct vvp_page *vpg) -{ - struct page *vmpage = vpg->vpg_page; - - LASSERT(vmpage); - put_page(vmpage); -} - -static void vvp_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct page *vmpage = vpg->vpg_page; - - /* - * vmpage->private was already cleared when page was moved into - * VPG_FREEING state. - */ - LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); - vvp_page_fini_common(vpg); -} - -static int vvp_page_own(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io, - int nonblock) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct page *vmpage = vpg->vpg_page; - - LASSERT(vmpage); - if (nonblock) { - if (!trylock_page(vmpage)) - return -EAGAIN; - - if (unlikely(PageWriteback(vmpage))) { - unlock_page(vmpage); - return -EAGAIN; - } - - return 0; - } - - lock_page(vmpage); - wait_on_page_writeback(vmpage); - - return 0; -} - -static void vvp_page_assume(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - wait_on_page_writeback(vmpage); -} - -static void vvp_page_unassume(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); -} - -static void vvp_page_disown(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io) -{ - struct page *vmpage = cl2vm_page(slice); - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - - unlock_page(cl2vm_page(slice)); -} - -static void vvp_page_discard(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - struct vvp_page *vpg = cl2vvp_page(slice); - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - - if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used) - ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED); - - ll_invalidate_page(vmpage); -} - -static void vvp_page_delete(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - struct page *vmpage = cl2vm_page(slice); - struct inode *inode = vmpage->mapping->host; - struct cl_object *obj = slice->cpl_obj; - struct cl_page *page = slice->cpl_page; - int refc; - - LASSERT(PageLocked(vmpage)); - LASSERT((struct cl_page *)vmpage->private == page); - LASSERT(inode == vvp_object_inode(obj)); - - /* Drop the reference count held in vvp_page_init */ - refc = atomic_dec_return(&page->cp_ref); - LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); - - ClearPagePrivate(vmpage); - vmpage->private = 0; - /* - * Reference from vmpage to cl_page is removed, but the reference back - * is still here. It is removed later in vvp_page_fini(). - */ -} - -static void vvp_page_export(const struct lu_env *env, - const struct cl_page_slice *slice, - int uptodate) -{ - struct page *vmpage = cl2vm_page(slice); - - LASSERT(vmpage); - LASSERT(PageLocked(vmpage)); - if (uptodate) - SetPageUptodate(vmpage); - else - ClearPageUptodate(vmpage); -} - -static int vvp_page_is_vmlocked(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA; -} - -static int vvp_page_prep_read(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* Skip the page already marked as PG_uptodate. */ - return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0; -} - -static int vvp_page_prep_write(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct page *vmpage = cl2vm_page(slice); - struct cl_page *pg = slice->cpl_page; - - LASSERT(PageLocked(vmpage)); - LASSERT(!PageDirty(vmpage)); - - /* ll_writepage path is not a sync write, so need to set page writeback - * flag - */ - if (!pg->cp_sync_io) - set_page_writeback(vmpage); - - return 0; -} - -/** - * Handles page transfer errors at VM level. - * - * This takes inode as a separate argument, because inode on which error is to - * be set can be different from \a vmpage inode in case of direct-io. - */ -static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, - int ioret) -{ - struct vvp_object *obj = cl_inode2vvp(inode); - - if (ioret == 0) { - ClearPageError(vmpage); - obj->vob_discard_page_warned = 0; - } else { - SetPageError(vmpage); - mapping_set_error(inode->i_mapping, ioret); - - if ((ioret == -ESHUTDOWN || ioret == -EINTR) && - obj->vob_discard_page_warned == 0) { - obj->vob_discard_page_warned = 1; - ll_dirty_page_discard_warn(vmpage, ioret); - } - } -} - -static void vvp_page_completion_read(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct page *vmpage = vpg->vpg_page; - struct cl_page *page = slice->cpl_page; - struct inode *inode = vvp_object_inode(page->cp_obj); - - LASSERT(PageLocked(vmpage)); - CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret); - - if (vpg->vpg_defer_uptodate) - ll_ra_count_put(ll_i2sbi(inode), 1); - - if (ioret == 0) { - if (!vpg->vpg_defer_uptodate) - cl_page_export(env, page, 1); - } else { - vpg->vpg_defer_uptodate = 0; - } - - if (!page->cp_sync_io) - unlock_page(vmpage); -} - -static void vvp_page_completion_write(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct cl_page *pg = slice->cpl_page; - struct page *vmpage = vpg->vpg_page; - - CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); - - if (pg->cp_sync_io) { - LASSERT(PageLocked(vmpage)); - LASSERT(!PageWriteback(vmpage)); - } else { - LASSERT(PageWriteback(vmpage)); - /* - * Only mark the page error only when it's an async write - * because applications won't wait for IO to finish. - */ - vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret); - - end_page_writeback(vmpage); - } -} - -/** - * Implements cl_page_operations::cpo_make_ready() method. - * - * This is called to yank a page from the transfer cache and to send it out as - * a part of transfer. This function try-locks the page. If try-lock failed, - * page is owned by some concurrent IO, and should be skipped (this is bad, - * but hopefully rare situation, as it usually results in transfer being - * shorter than possible). - * - * \retval 0 success, page can be placed into transfer - * - * \retval -EAGAIN page is either used by concurrent IO has been - * truncated. Skip it. - */ -static int vvp_page_make_ready(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - struct page *vmpage = cl2vm_page(slice); - struct cl_page *pg = slice->cpl_page; - int result = 0; - - lock_page(vmpage); - if (clear_page_dirty_for_io(vmpage)) { - LASSERT(pg->cp_state == CPS_CACHED); - /* This actually clears the dirty bit in the radix tree. */ - set_page_writeback(vmpage); - CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); - } else if (pg->cp_state == CPS_PAGEOUT) { - /* is it possible for osc_flush_async_page() to already - * make it ready? - */ - result = -EALREADY; - } else { - CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n", - pg->cp_state); - LBUG(); - } - unlock_page(vmpage); - return result; -} - -static int vvp_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct page *vmpage = vpg->vpg_page; - - (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d) vm@%p ", - vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used, vmpage); - if (vmpage) { - (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", - (long)vmpage->flags, page_count(vmpage), - page_mapcount(vmpage), vmpage->private, - vmpage->index, - list_empty(&vmpage->lru) ? "not-" : ""); - } - - (*printer)(env, cookie, "\n"); - - return 0; -} - -static int vvp_page_fail(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - /* - * Cached read? - */ - LBUG(); - - return 0; -} - -static const struct cl_page_operations vvp_page_ops = { - .cpo_own = vvp_page_own, - .cpo_assume = vvp_page_assume, - .cpo_unassume = vvp_page_unassume, - .cpo_disown = vvp_page_disown, - .cpo_discard = vvp_page_discard, - .cpo_delete = vvp_page_delete, - .cpo_export = vvp_page_export, - .cpo_is_vmlocked = vvp_page_is_vmlocked, - .cpo_fini = vvp_page_fini, - .cpo_print = vvp_page_print, - .io = { - [CRT_READ] = { - .cpo_prep = vvp_page_prep_read, - .cpo_completion = vvp_page_completion_read, - .cpo_make_ready = vvp_page_fail, - }, - [CRT_WRITE] = { - .cpo_prep = vvp_page_prep_write, - .cpo_completion = vvp_page_completion_write, - .cpo_make_ready = vvp_page_make_ready, - }, - }, -}; - -static int vvp_transient_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - /* transient page should always be sent. */ - return 0; -} - -static int vvp_transient_page_own(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused, int nonblock) -{ - return 0; -} - -static void vvp_transient_page_assume(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ -} - -static void vvp_transient_page_unassume(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ -} - -static void vvp_transient_page_disown(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ -} - -static void vvp_transient_page_discard(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - struct cl_page *page = slice->cpl_page; - - /* - * For transient pages, remove it from the radix tree. - */ - cl_page_delete(env, page); -} - -static int vvp_transient_page_is_vmlocked(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - struct inode *inode = vvp_object_inode(slice->cpl_obj); - int locked; - - locked = !inode_trylock(inode); - if (!locked) - inode_unlock(inode); - return locked ? -EBUSY : -ENODATA; -} - -static void -vvp_transient_page_completion(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ -} - -static void vvp_transient_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct vvp_page *vpg = cl2vvp_page(slice); - struct cl_page *clp = slice->cpl_page; - struct vvp_object *clobj = cl2vvp(clp->cp_obj); - - vvp_page_fini_common(vpg); - atomic_dec(&clobj->vob_transient_pages); -} - -static const struct cl_page_operations vvp_transient_page_ops = { - .cpo_own = vvp_transient_page_own, - .cpo_assume = vvp_transient_page_assume, - .cpo_unassume = vvp_transient_page_unassume, - .cpo_disown = vvp_transient_page_disown, - .cpo_discard = vvp_transient_page_discard, - .cpo_fini = vvp_transient_page_fini, - .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, - .cpo_print = vvp_page_print, - .io = { - [CRT_READ] = { - .cpo_prep = vvp_transient_page_prep, - .cpo_completion = vvp_transient_page_completion, - }, - [CRT_WRITE] = { - .cpo_prep = vvp_transient_page_prep, - .cpo_completion = vvp_transient_page_completion, - } - } -}; - -int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct vvp_page *vpg = cl_object_page_slice(obj, page); - struct page *vmpage = page->cp_vmpage; - - CLOBINVRNT(env, obj, vvp_object_invariant(obj)); - - vpg->vpg_page = vmpage; - get_page(vmpage); - - if (page->cp_type == CPT_CACHEABLE) { - /* in cache, decref in vvp_page_delete */ - atomic_inc(&page->cp_ref); - SetPagePrivate(vmpage); - vmpage->private = (unsigned long)page; - cl_page_slice_add(page, &vpg->vpg_cl, obj, index, - &vvp_page_ops); - } else { - struct vvp_object *clobj = cl2vvp(obj); - - cl_page_slice_add(page, &vpg->vpg_cl, obj, index, - &vvp_transient_page_ops); - atomic_inc(&clobj->vob_transient_pages); - } - return 0; -} diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c deleted file mode 100644 index 7fa0a419c094..000000000000 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ /dev/null @@ -1,665 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include - -#include "llite_internal.h" - -const struct xattr_handler *get_xattr_type(const char *name) -{ - int i; - - for (i = 0; ll_xattr_handlers[i]; i++) { - const char *prefix = xattr_prefix(ll_xattr_handlers[i]); - size_t prefix_len = strlen(prefix); - - if (!strncmp(prefix, name, prefix_len)) - return ll_xattr_handlers[i]; - } - - return NULL; -} - -static int xattr_type_filter(struct ll_sb_info *sbi, - const struct xattr_handler *handler) -{ - /* No handler means XATTR_OTHER_T */ - if (!handler) - return -EOPNOTSUPP; - - if ((handler->flags == XATTR_ACL_ACCESS_T || - handler->flags == XATTR_ACL_DEFAULT_T) && - !(sbi->ll_flags & LL_SBI_ACL)) - return -EOPNOTSUPP; - - if (handler->flags == XATTR_USER_T && - !(sbi->ll_flags & LL_SBI_USER_XATTR)) - return -EOPNOTSUPP; - - if (handler->flags == XATTR_TRUSTED_T && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - -static int ll_xattr_set_common(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, size_t size, - int flags) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - const char *pv = value; - char *fullname; - u64 valid; - int rc; - - /* When setxattr() is called with a size of 0 the value is - * unconditionally replaced by "". When removexattr() is - * called we get a NULL value and XATTR_REPLACE for flags. - */ - if (!value && flags == XATTR_REPLACE) { - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1); - valid = OBD_MD_FLXATTRRM; - } else { - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1); - valid = OBD_MD_FLXATTR; - } - - rc = xattr_type_filter(sbi, handler); - if (rc) - return rc; - - if ((handler->flags == XATTR_ACL_ACCESS_T || - handler->flags == XATTR_ACL_DEFAULT_T) && - !inode_owner_or_capable(inode)) - return -EPERM; - - /* b10667: ignore lustre special xattr for now */ - if (!strcmp(name, "hsm") || - ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) || - (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))) - return 0; - - /* LU-549: Disable security.selinux when selinux is disabled */ - if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() && - strcmp(name, "selinux") == 0) - return -EOPNOTSUPP; - - /*FIXME: enable IMA when the conditions are ready */ - if (handler->flags == XATTR_SECURITY_T && - (!strcmp(name, "ima") || !strcmp(name, "evm"))) - return -EOPNOTSUPP; - - /* - * In user.* namespace, only regular files and directories can have - * extended attributes. - */ - if (handler->flags == XATTR_USER_T) { - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return -EPERM; - } - - fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name); - if (!fullname) - return -ENOMEM; - - rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, - pv, size, flags, ll_i2suppgid(inode), &req); - kfree(fullname); - if (rc) { - if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) { - LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); - sbi->ll_flags &= ~LL_SBI_USER_XATTR; - } - return rc; - } - - ptlrpc_req_finished(req); - return 0; -} - -static int get_hsm_state(struct inode *inode, u32 *hus_states) -{ - struct md_op_data *op_data; - struct hsm_user_state *hus; - int rc; - - hus = kzalloc(sizeof(*hus), GFP_NOFS); - if (!hus) - return -ENOMEM; - - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, hus); - if (!IS_ERR(op_data)) { - rc = obd_iocontrol(LL_IOC_HSM_STATE_GET, ll_i2mdexp(inode), - sizeof(*op_data), op_data, NULL); - if (!rc) - *hus_states = hus->hus_states; - else - CDEBUG(D_VFSTRACE, "obd_iocontrol failed. rc = %d\n", - rc); - - ll_finish_md_op_data(op_data); - } else { - rc = PTR_ERR(op_data); - CDEBUG(D_VFSTRACE, "Could not prepare the opdata. rc = %d\n", - rc); - } - kfree(hus); - return rc; -} - -static int ll_adjust_lum(struct inode *inode, struct lov_user_md *lump) -{ - int rc = 0; - - if (!lump) - return 0; - - /* Attributes that are saved via getxattr will always have - * the stripe_offset as 0. Instead, the MDS should be - * allowed to pick the starting OST index. b=17846 - */ - if (lump->lmm_stripe_offset == 0) - lump->lmm_stripe_offset = -1; - - /* Avoid anyone directly setting the RELEASED flag. */ - if (lump->lmm_pattern & LOV_PATTERN_F_RELEASED) { - /* Only if we have a released flag check if the file - * was indeed archived. - */ - u32 state = HS_NONE; - - rc = get_hsm_state(inode, &state); - if (rc) - return rc; - - if (!(state & HS_ARCHIVED)) { - CDEBUG(D_VFSTRACE, - "hus_states state = %x, pattern = %x\n", - state, lump->lmm_pattern); - /* - * Here the state is: real file is not - * archived but user is requesting to set - * the RELEASED flag so we mask off the - * released flag from the request - */ - lump->lmm_pattern ^= LOV_PATTERN_F_RELEASED; - } - } - - return rc; -} - -static int ll_setstripe_ea(struct dentry *dentry, struct lov_user_md *lump, - size_t size) -{ - struct inode *inode = d_inode(dentry); - int rc = 0; - - /* - * It is possible to set an xattr to a "" value of zero size. - * For this case we are going to treat it as a removal. - */ - if (!size && lump) - lump = NULL; - - rc = ll_adjust_lum(inode, lump); - if (rc) - return rc; - - if (lump && S_ISREG(inode->i_mode)) { - u64 it_flags = FMODE_WRITE; - ssize_t lum_size; - - lum_size = ll_lov_user_md_size(lump); - if (lum_size < 0 || size < lum_size) - return -ERANGE; - - rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags, lump, - lum_size); - /** - * b=10667: ignore -EEXIST. - * Silently eat error on setting trusted.lov/lustre.lov - * attribute for platforms that added the default option - * to copy all attributes in 'cp' command. Both rsync and - * tar --xattrs also will try to set LOVEA for existing - * files. - */ - if (rc == -EEXIST) - rc = 0; - } else if (S_ISDIR(inode->i_mode)) { - if (size != 0 && size < sizeof(struct lov_user_md)) - return -EINVAL; - - rc = ll_dir_setstripe(inode, lump, 0); - } - - return rc; -} - -static int ll_xattr_set(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, const void *value, size_t size, - int flags) -{ - LASSERT(inode); - LASSERT(name); - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n", - PFID(ll_inode2fid(inode)), inode, name); - - /* lustre/trusted.lov.xxx would be passed through xattr API */ - if (!strcmp(name, "lov")) { - int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR : - LPROC_LL_SETXATTR; - - ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1); - - return ll_setstripe_ea(dentry, (struct lov_user_md *)value, - size); - } else if (!strcmp(name, "lma") || !strcmp(name, "link")) { - int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR : - LPROC_LL_SETXATTR; - - ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1); - return 0; - } - - return ll_xattr_set_common(handler, dentry, inode, name, value, size, - flags); -} - -int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer, - size_t size, u64 valid) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - struct mdt_body *body; - void *xdata; - int rc; - - if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T && - (type != XATTR_SECURITY_T || strcmp(name, "security.selinux"))) { - rc = ll_xattr_cache_get(inode, name, buffer, size, valid); - if (rc == -EAGAIN) - goto getxattr_nocache; - if (rc < 0) - goto out_xattr; - - /* Add "system.posix_acl_access" to the list */ - if (lli->lli_posix_acl && valid & OBD_MD_FLXATTRLS) { - if (size == 0) { - rc += sizeof(XATTR_NAME_ACL_ACCESS); - } else if (size - rc >= sizeof(XATTR_NAME_ACL_ACCESS)) { - memcpy(buffer + rc, XATTR_NAME_ACL_ACCESS, - sizeof(XATTR_NAME_ACL_ACCESS)); - rc += sizeof(XATTR_NAME_ACL_ACCESS); - } else { - rc = -ERANGE; - goto out_xattr; - } - } - } else { -getxattr_nocache: - rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, - name, size, &req); - if (rc < 0) - goto out_xattr; - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - LASSERT(body); - - /* only detect the xattr size */ - if (size == 0) { - rc = body->mbo_eadatasize; - goto out; - } - - if (size < body->mbo_eadatasize) { - CERROR("server bug: replied size %u > %u\n", - body->mbo_eadatasize, (int)size); - rc = -ERANGE; - goto out; - } - - if (body->mbo_eadatasize == 0) { - rc = -ENODATA; - goto out; - } - - /* do not need swab xattr data */ - xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, - body->mbo_eadatasize); - if (!xdata) { - rc = -EFAULT; - goto out; - } - - memcpy(buffer, xdata, body->mbo_eadatasize); - rc = body->mbo_eadatasize; - } - -out_xattr: - if (rc == -EOPNOTSUPP && type == XATTR_USER_T) { - LCONSOLE_INFO( - "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n", - ll_get_fsname(inode->i_sb, NULL, 0), rc); - sbi->ll_flags &= ~LL_SBI_USER_XATTR; - } -out: - ptlrpc_req_finished(req); - return rc; -} - -static int ll_xattr_get_common(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - char *fullname; - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); - - rc = xattr_type_filter(sbi, handler); - if (rc) - return rc; - - /* LU-549: Disable security.selinux when selinux is disabled */ - if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() && - !strcmp(name, "selinux")) - return -EOPNOTSUPP; - -#ifdef CONFIG_FS_POSIX_ACL - /* posix acl is under protection of LOOKUP lock. when calling to this, - * we just have path resolution to the target inode, so we have great - * chance that cached ACL is uptodate. - */ - if (handler->flags == XATTR_ACL_ACCESS_T) { - struct ll_inode_info *lli = ll_i2info(inode); - struct posix_acl *acl; - - spin_lock(&lli->lli_lock); - acl = posix_acl_dup(lli->lli_posix_acl); - spin_unlock(&lli->lli_lock); - - if (!acl) - return -ENODATA; - - rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - return rc; - } - if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode)) - return -ENODATA; -#endif - fullname = kasprintf(GFP_KERNEL, "%s%s", handler->prefix, name); - if (!fullname) - return -ENOMEM; - - rc = ll_xattr_list(inode, fullname, handler->flags, buffer, size, - OBD_MD_FLXATTR); - kfree(fullname); - return rc; -} - -static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) -{ - ssize_t rc; - - if (S_ISREG(inode->i_mode)) { - struct cl_object *obj = ll_i2info(inode)->lli_clob; - struct cl_layout cl = { - .cl_buf.lb_buf = buf, - .cl_buf.lb_len = buf_size, - }; - struct lu_env *env; - u16 refcheck; - - if (!obj) - return -ENODATA; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - rc = cl_object_layout_get(env, obj, &cl); - if (rc < 0) - goto out_env; - - if (!cl.cl_size) { - rc = -ENODATA; - goto out_env; - } - - rc = cl.cl_size; - - if (!buf_size) - goto out_env; - - LASSERT(buf && rc <= buf_size); - - /* - * Do not return layout gen for getxattr() since - * otherwise it would confuse tar --xattr by - * recognizing layout gen as stripe offset when the - * file is restored. See LU-2809. - */ - ((struct lov_mds_md *)buf)->lmm_layout_gen = 0; -out_env: - cl_env_put(env, &refcheck); - - return rc; - } else if (S_ISDIR(inode->i_mode)) { - struct ptlrpc_request *req = NULL; - struct lov_mds_md *lmm = NULL; - int lmm_size = 0; - - rc = ll_dir_getstripe(inode, (void **)&lmm, &lmm_size, - &req, 0); - if (rc < 0) - goto out_req; - - if (!buf_size) { - rc = lmm_size; - goto out_req; - } - - if (buf_size < lmm_size) { - rc = -ERANGE; - goto out_req; - } - - memcpy(buf, lmm, lmm_size); - rc = lmm_size; -out_req: - if (req) - ptlrpc_req_finished(req); - - return rc; - } else { - return -ENODATA; - } -} - -static int ll_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - LASSERT(inode); - LASSERT(name); - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n", - PFID(ll_inode2fid(inode)), inode, name); - - if (!strcmp(name, "lov")) { - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1); - - return ll_getxattr_lov(inode, buffer, size); - } - - return ll_xattr_get_common(handler, dentry, inode, name, buffer, size); -} - -ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - struct inode *inode = d_inode(dentry); - struct ll_sb_info *sbi = ll_i2sbi(inode); - char *xattr_name; - ssize_t rc, rc2; - size_t len, rem; - - LASSERT(inode); - - CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p)\n", - PFID(ll_inode2fid(inode)), inode); - - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1); - - rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size, - OBD_MD_FLXATTRLS); - if (rc < 0) - return rc; - - /* - * If we're being called to get the size of the xattr list - * (size == 0) then just assume that a lustre.lov xattr - * exists. - */ - if (!size) - return rc + sizeof(XATTR_LUSTRE_LOV); - - xattr_name = buffer; - rem = rc; - - while (rem > 0) { - len = strnlen(xattr_name, rem - 1) + 1; - rem -= len; - if (!xattr_type_filter(sbi, get_xattr_type(xattr_name))) { - /* Skip OK xattr type, leave it in buffer. */ - xattr_name += len; - continue; - } - - /* - * Move up remaining xattrs in buffer - * removing the xattr that is not OK. - */ - memmove(xattr_name, xattr_name + len, rem); - rc -= len; - } - - rc2 = ll_getxattr_lov(inode, NULL, 0); - if (rc2 == -ENODATA) - return rc; - - if (rc2 < 0) - return rc2; - - if (size < rc + sizeof(XATTR_LUSTRE_LOV)) - return -ERANGE; - - memcpy(buffer + rc, XATTR_LUSTRE_LOV, sizeof(XATTR_LUSTRE_LOV)); - - return rc + sizeof(XATTR_LUSTRE_LOV); -} - -static const struct xattr_handler ll_user_xattr_handler = { - .prefix = XATTR_USER_PREFIX, - .flags = XATTR_USER_T, - .get = ll_xattr_get_common, - .set = ll_xattr_set_common, -}; - -static const struct xattr_handler ll_trusted_xattr_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .flags = XATTR_TRUSTED_T, - .get = ll_xattr_get, - .set = ll_xattr_set, -}; - -static const struct xattr_handler ll_security_xattr_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .flags = XATTR_SECURITY_T, - .get = ll_xattr_get_common, - .set = ll_xattr_set_common, -}; - -static const struct xattr_handler ll_acl_access_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_ACCESS, - .flags = XATTR_ACL_ACCESS_T, - .get = ll_xattr_get_common, - .set = ll_xattr_set_common, -}; - -static const struct xattr_handler ll_acl_default_xattr_handler = { - .name = XATTR_NAME_POSIX_ACL_DEFAULT, - .flags = XATTR_ACL_DEFAULT_T, - .get = ll_xattr_get_common, - .set = ll_xattr_set_common, -}; - -static const struct xattr_handler ll_lustre_xattr_handler = { - .prefix = XATTR_LUSTRE_PREFIX, - .flags = XATTR_LUSTRE_T, - .get = ll_xattr_get, - .set = ll_xattr_set, -}; - -const struct xattr_handler *ll_xattr_handlers[] = { - &ll_user_xattr_handler, - &ll_trusted_xattr_handler, - &ll_security_xattr_handler, -#ifdef CONFIG_FS_POSIX_ACL - &ll_acl_access_xattr_handler, - &ll_acl_default_xattr_handler, -#endif - &ll_lustre_xattr_handler, - NULL, -}; diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c deleted file mode 100644 index 5da69ba088c4..000000000000 --- a/drivers/staging/lustre/lustre/llite/xattr_cache.c +++ /dev/null @@ -1,504 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright 2012 Xyratex Technology Limited - * - * Copyright (c) 2013, 2015, Intel Corporation. - * - * Author: Andrew Perepechko - * - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include -#include -#include -#include "llite_internal.h" - -/* If we ever have hundreds of extended attributes, we might want to consider - * using a hash or a tree structure instead of list for faster lookups. - */ -struct ll_xattr_entry { - struct list_head xe_list; /* protected with - * lli_xattrs_list_rwsem - */ - char *xe_name; /* xattr name, \0-terminated */ - char *xe_value; /* xattr value */ - unsigned int xe_namelen; /* strlen(xe_name) + 1 */ - unsigned int xe_vallen; /* xattr value length */ -}; - -static struct kmem_cache *xattr_kmem; -static struct lu_kmem_descr xattr_caches[] = { - { - .ckd_cache = &xattr_kmem, - .ckd_name = "xattr_kmem", - .ckd_size = sizeof(struct ll_xattr_entry) - }, - { - .ckd_cache = NULL - } -}; - -int ll_xattr_init(void) -{ - return lu_kmem_init(xattr_caches); -} - -void ll_xattr_fini(void) -{ - lu_kmem_fini(xattr_caches); -} - -/** - * Initializes xattr cache for an inode. - * - * This initializes the xattr list and marks cache presence. - */ -static void ll_xattr_cache_init(struct ll_inode_info *lli) -{ - INIT_LIST_HEAD(&lli->lli_xattrs); - set_bit(LLIF_XATTR_CACHE, &lli->lli_flags); -} - -/** - * This looks for a specific extended attribute. - * - * Find in @cache and return @xattr_name attribute in @xattr, - * for the NULL @xattr_name return the first cached @xattr. - * - * \retval 0 success - * \retval -ENODATA if not found - */ -static int ll_xattr_cache_find(struct list_head *cache, - const char *xattr_name, - struct ll_xattr_entry **xattr) -{ - struct ll_xattr_entry *entry; - - list_for_each_entry(entry, cache, xe_list) { - /* xattr_name == NULL means look for any entry */ - if (!xattr_name || strcmp(xattr_name, entry->xe_name) == 0) { - *xattr = entry; - CDEBUG(D_CACHE, "find: [%s]=%.*s\n", - entry->xe_name, entry->xe_vallen, - entry->xe_value); - return 0; - } - } - - return -ENODATA; -} - -/** - * This adds an xattr. - * - * Add @xattr_name attr with @xattr_val value and @xattr_val_len length, - * - * \retval 0 success - * \retval -ENOMEM if no memory could be allocated for the cached attr - * \retval -EPROTO if duplicate xattr is being added - */ -static int ll_xattr_cache_add(struct list_head *cache, - const char *xattr_name, - const char *xattr_val, - unsigned int xattr_val_len) -{ - struct ll_xattr_entry *xattr; - - if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { - CDEBUG(D_CACHE, "duplicate xattr: [%s]\n", xattr_name); - return -EPROTO; - } - - xattr = kmem_cache_zalloc(xattr_kmem, GFP_NOFS); - if (!xattr) { - CDEBUG(D_CACHE, "failed to allocate xattr\n"); - return -ENOMEM; - } - - xattr->xe_name = kstrdup(xattr_name, GFP_NOFS); - if (!xattr->xe_name) { - CDEBUG(D_CACHE, "failed to alloc xattr name %s\n", - xattr_name); - goto err_name; - } - xattr->xe_namelen = strlen(xattr_name) + 1; - - xattr->xe_value = kmemdup(xattr_val, xattr_val_len, GFP_NOFS); - if (!xattr->xe_value) - goto err_value; - - xattr->xe_vallen = xattr_val_len; - list_add(&xattr->xe_list, cache); - - CDEBUG(D_CACHE, "set: [%s]=%.*s\n", xattr_name, xattr_val_len, - xattr_val); - - return 0; -err_value: - kfree(xattr->xe_name); -err_name: - kmem_cache_free(xattr_kmem, xattr); - - return -ENOMEM; -} - -/** - * This removes an extended attribute from cache. - * - * Remove @xattr_name attribute from @cache. - * - * \retval 0 success - * \retval -ENODATA if @xattr_name is not cached - */ -static int ll_xattr_cache_del(struct list_head *cache, - const char *xattr_name) -{ - struct ll_xattr_entry *xattr; - - CDEBUG(D_CACHE, "del xattr: %s\n", xattr_name); - - if (ll_xattr_cache_find(cache, xattr_name, &xattr) == 0) { - list_del(&xattr->xe_list); - kfree(xattr->xe_name); - kfree(xattr->xe_value); - kmem_cache_free(xattr_kmem, xattr); - - return 0; - } - - return -ENODATA; -} - -/** - * This iterates cached extended attributes. - * - * Walk over cached attributes in @cache and - * fill in @xld_buffer or only calculate buffer - * size if @xld_buffer is NULL. - * - * \retval >= 0 buffer list size - * \retval -ENODATA if the list cannot fit @xld_size buffer - */ -static int ll_xattr_cache_list(struct list_head *cache, - char *xld_buffer, - int xld_size) -{ - struct ll_xattr_entry *xattr, *tmp; - int xld_tail = 0; - - list_for_each_entry_safe(xattr, tmp, cache, xe_list) { - CDEBUG(D_CACHE, "list: buffer=%p[%d] name=%s\n", - xld_buffer, xld_tail, xattr->xe_name); - - if (xld_buffer) { - xld_size -= xattr->xe_namelen; - if (xld_size < 0) - break; - memcpy(&xld_buffer[xld_tail], - xattr->xe_name, xattr->xe_namelen); - } - xld_tail += xattr->xe_namelen; - } - - if (xld_size < 0) - return -ERANGE; - - return xld_tail; -} - -/** - * Check if the xattr cache is initialized (filled). - * - * \retval 0 @cache is not initialized - * \retval 1 @cache is initialized - */ -static int ll_xattr_cache_valid(struct ll_inode_info *lli) -{ - return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags); -} - -/** - * This finalizes the xattr cache. - * - * Free all xattr memory. @lli is the inode info pointer. - * - * \retval 0 no error occurred - */ -static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) -{ - if (!ll_xattr_cache_valid(lli)) - return 0; - - while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) - ; /* empty loop */ - - clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags); - - return 0; -} - -int ll_xattr_cache_destroy(struct inode *inode) -{ - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - - down_write(&lli->lli_xattrs_list_rwsem); - rc = ll_xattr_cache_destroy_locked(lli); - up_write(&lli->lli_xattrs_list_rwsem); - - return rc; -} - -/** - * Match or enqueue a PR lock. - * - * Find or request an LDLM lock with xattr data. - * Since LDLM does not provide API for atomic match_or_enqueue, - * the function handles it with a separate enq lock. - * If successful, the function exits with the list lock held. - * - * \retval 0 no error occurred - * \retval -ENOMEM not enough memory - */ -static int ll_xattr_find_get_lock(struct inode *inode, - struct lookup_intent *oit, - struct ptlrpc_request **req) -{ - enum ldlm_mode mode; - struct lustre_handle lockh = { 0 }; - struct md_op_data *op_data; - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_export *exp = sbi->ll_md_exp; - int rc; - - mutex_lock(&lli->lli_xattrs_enq_lock); - /* inode may have been shrunk and recreated, so data is gone, match lock - * only when data exists. - */ - if (ll_xattr_cache_valid(lli)) { - /* Try matching first. */ - mode = ll_take_md_lock(inode, MDS_INODELOCK_XATTR, &lockh, 0, - LCK_PR); - if (mode != 0) { - /* fake oit in mdc_revalidate_lock() manner */ - oit->it_lock_handle = lockh.cookie; - oit->it_lock_mode = mode; - goto out; - } - } - - /* Enqueue if the lock isn't cached locally. */ - op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, - LUSTRE_OPC_ANY, NULL); - if (IS_ERR(op_data)) { - mutex_unlock(&lli->lli_xattrs_enq_lock); - return PTR_ERR(op_data); - } - - op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS; - - rc = md_intent_lock(exp, op_data, oit, req, &ll_md_blocking_ast, 0); - ll_finish_md_op_data(op_data); - *req = oit->it_request; - - if (rc < 0) { - CDEBUG(D_CACHE, - "md_intent_lock failed with %d for fid " DFID "\n", - rc, PFID(ll_inode2fid(inode))); - mutex_unlock(&lli->lli_xattrs_enq_lock); - return rc; - } - -out: - down_write(&lli->lli_xattrs_list_rwsem); - mutex_unlock(&lli->lli_xattrs_enq_lock); - - return 0; -} - -/** - * Refill the xattr cache. - * - * Fetch and cache the whole of xattrs for @inode, acquiring a read lock. - * - * \retval 0 no error occurred - * \retval -EPROTO network protocol error - * \retval -ENOMEM not enough memory for the cache - */ -static int ll_xattr_cache_refill(struct inode *inode) -{ - struct lookup_intent oit = { .it_op = IT_GETXATTR }; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *req = NULL; - const char *xdata, *xval, *xtail, *xvtail; - struct ll_inode_info *lli = ll_i2info(inode); - struct mdt_body *body; - __u32 *xsizes; - int rc, i; - - rc = ll_xattr_find_get_lock(inode, &oit, &req); - if (rc) - goto err_req; - - /* Do we have the data at this point? */ - if (ll_xattr_cache_valid(lli)) { - ll_stats_ops_tally(sbi, LPROC_LL_GETXATTR_HITS, 1); - ll_intent_drop_lock(&oit); - rc = 0; - goto err_req; - } - - /* Matched but no cache? Cancelled on error by a parallel refill. */ - if (unlikely(!req)) { - CDEBUG(D_CACHE, "cancelled by a parallel getxattr\n"); - ll_intent_drop_lock(&oit); - rc = -EAGAIN; - goto err_unlock; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) { - CERROR("no MDT BODY in the refill xattr reply\n"); - rc = -EPROTO; - goto err_cancel; - } - /* do not need swab xattr data */ - xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, - body->mbo_eadatasize); - xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS, - body->mbo_aclsize); - xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS, - body->mbo_max_mdsize * sizeof(__u32)); - if (!xdata || !xval || !xsizes) { - CERROR("wrong setxattr reply\n"); - rc = -EPROTO; - goto err_cancel; - } - - xtail = xdata + body->mbo_eadatasize; - xvtail = xval + body->mbo_aclsize; - - CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail); - - ll_xattr_cache_init(lli); - - for (i = 0; i < body->mbo_max_mdsize; i++) { - CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval); - /* Perform consistency checks: attr names and vals in pill */ - if (!memchr(xdata, 0, xtail - xdata)) { - CERROR("xattr protocol violation (names are broken)\n"); - rc = -EPROTO; - } else if (xval + *xsizes > xvtail) { - CERROR("xattr protocol violation (vals are broken)\n"); - rc = -EPROTO; - } else if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_XATTR_ENOMEM)) { - rc = -ENOMEM; - } else if (!strcmp(xdata, XATTR_NAME_ACL_ACCESS)) { - /* Filter out ACL ACCESS since it's cached separately */ - CDEBUG(D_CACHE, "not caching %s\n", - XATTR_NAME_ACL_ACCESS); - rc = 0; - } else if (!strcmp(xdata, "security.selinux")) { - /* Filter out security.selinux, it is cached in slab */ - CDEBUG(D_CACHE, "not caching security.selinux\n"); - rc = 0; - } else { - rc = ll_xattr_cache_add(&lli->lli_xattrs, xdata, xval, - *xsizes); - } - if (rc < 0) { - ll_xattr_cache_destroy_locked(lli); - goto err_cancel; - } - xdata += strlen(xdata) + 1; - xval += *xsizes; - xsizes++; - } - - if (xdata != xtail || xval != xvtail) - CERROR("a hole in xattr data\n"); - - ll_set_lock_data(sbi->ll_md_exp, inode, &oit, NULL); - ll_intent_drop_lock(&oit); - - ptlrpc_req_finished(req); - return rc; - -err_cancel: - ldlm_lock_decref_and_cancel((struct lustre_handle *) - &oit.it_lock_handle, - oit.it_lock_mode); -err_unlock: - up_write(&lli->lli_xattrs_list_rwsem); -err_req: - if (rc == -ERANGE) - rc = -EAGAIN; - - ptlrpc_req_finished(req); - return rc; -} - -/** - * Get an xattr value or list xattrs using the write-through cache. - * - * Get the xattr value (@valid has OBD_MD_FLXATTR set) of @name or - * list xattr names (@valid has OBD_MD_FLXATTRLS set) for @inode. - * The resulting value/list is stored in @buffer if the former - * is not larger than @size. - * - * \retval 0 no error occurred - * \retval -EPROTO network protocol error - * \retval -ENOMEM not enough memory for the cache - * \retval -ERANGE the buffer is not large enough - * \retval -ENODATA no such attr or the list is empty - */ -int ll_xattr_cache_get(struct inode *inode, const char *name, char *buffer, - size_t size, __u64 valid) -{ - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; - - LASSERT(!!(valid & OBD_MD_FLXATTR) ^ !!(valid & OBD_MD_FLXATTRLS)); - - down_read(&lli->lli_xattrs_list_rwsem); - if (!ll_xattr_cache_valid(lli)) { - up_read(&lli->lli_xattrs_list_rwsem); - rc = ll_xattr_cache_refill(inode); - if (rc) - return rc; - downgrade_write(&lli->lli_xattrs_list_rwsem); - } else { - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR_HITS, 1); - } - - if (valid & OBD_MD_FLXATTR) { - struct ll_xattr_entry *xattr; - - rc = ll_xattr_cache_find(&lli->lli_xattrs, name, &xattr); - if (rc == 0) { - rc = xattr->xe_vallen; - /* zero size means we are only requested size in rc */ - if (size != 0) { - if (size >= xattr->xe_vallen) - memcpy(buffer, xattr->xe_value, - xattr->xe_vallen); - else - rc = -ERANGE; - } - } - } else if (valid & OBD_MD_FLXATTRLS) { - rc = ll_xattr_cache_list(&lli->lli_xattrs, - size ? buffer : NULL, size); - } - - goto out; -out: - up_read(&lli->lli_xattrs_list_rwsem); - - return rc; -} diff --git a/drivers/staging/lustre/lustre/llite/xattr_security.c b/drivers/staging/lustre/lustre/llite/xattr_security.c deleted file mode 100644 index 93ec07531ac7..000000000000 --- a/drivers/staging/lustre/lustre/llite/xattr_security.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licenses - * - * GPL HEADER END - */ - -/* - * Copyright (c) 2014 Bull SAS - * Author: Sebastien Buisson sebastien.buisson@bull.net - */ - -/* - * lustre/llite/xattr_security.c - * Handler for storing security labels as extended attributes. - */ - -#include -#include -#include -#include -#include "llite_internal.h" - -/** - * A helper function for ll_security_inode_init_security() - * that takes care of setting xattrs - * - * Get security context of @inode from @xattr_array, - * and put it in 'security.xxx' xattr of dentry - * stored in @fs_info. - * - * \retval 0 success - * \retval -ENOMEM if no memory could be allocated for xattr name - * \retval < 0 failure to set xattr - */ -static int -ll_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) -{ - struct dentry *dentry = fs_info; - const struct xattr *xattr; - int err = 0; - - for (xattr = xattr_array; xattr->name; xattr++) { - char *full_name; - - full_name = kasprintf(GFP_KERNEL, "%s%s", - XATTR_SECURITY_PREFIX, xattr->name); - if (!full_name) { - err = -ENOMEM; - break; - } - - err = __vfs_setxattr(dentry, inode, full_name, xattr->value, - xattr->value_len, XATTR_CREATE); - kfree(full_name); - if (err < 0) - break; - } - return err; -} - -/** - * Initializes security context - * - * Get security context of @inode in @dir, - * and put it in 'security.xxx' xattr of @dentry. - * - * \retval 0 success, or SELinux is disabled - * \retval -ENOMEM if no memory could be allocated for xattr name - * \retval < 0 failure to get security context or set xattr - */ -int -ll_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir) -{ - if (!selinux_is_enabled()) - return 0; - - return security_inode_init_security(inode, dir, NULL, - &ll_initxattrs, dentry); -} diff --git a/drivers/staging/lustre/lustre/lmv/Makefile b/drivers/staging/lustre/lustre/lmv/Makefile deleted file mode 100644 index 91c99114aa13..000000000000 --- a/drivers/staging/lustre/lustre/lmv/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += lmv.o -lmv-y := lmv_obd.o lmv_intent.o lmv_fld.o lproc_lmv.o diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c deleted file mode 100644 index 00dc858c10c9..000000000000 --- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LMV -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "lmv_internal.h" - -int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds) -{ - struct obd_device *obd = lmv2obd_dev(lmv); - int rc; - - /* - * FIXME: Currently ZFS still use local seq for ROOT unfortunately, and - * this fid_is_local check should be removed once LU-2240 is fixed - */ - if (!fid_is_sane(fid) || !(fid_seq_in_fldb(fid_seq(fid)) || - fid_seq_is_local_file(fid_seq(fid)))) { - CERROR("%s: invalid FID " DFID "\n", obd->obd_name, PFID(fid)); - return -EINVAL; - } - - rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds, - LU_SEQ_RANGE_MDT, NULL); - if (rc) { - CERROR("Error while looking for mds number. Seq %#llx, err = %d\n", - fid_seq(fid), rc); - return rc; - } - - CDEBUG(D_INODE, "FLD lookup got mds #%x for fid=" DFID "\n", - *mds, PFID(fid)); - - if (*mds >= lmv->desc.ld_tgt_count) { - CERROR("FLD lookup got invalid mds #%x (max: %x) for fid=" DFID "\n", *mds, lmv->desc.ld_tgt_count, - PFID(fid)); - rc = -EINVAL; - } - return rc; -} diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c deleted file mode 100644 index 1e850fdbc623..000000000000 --- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c +++ /dev/null @@ -1,521 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LMV -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "lmv_internal.h" - -static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it, - const struct lu_fid *parent_fid, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct ptlrpc_request *req = NULL; - struct lustre_handle plock; - struct md_op_data *op_data; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - int pmode; - int rc = 0; - - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - LASSERT((body->mbo_valid & OBD_MD_MDS)); - - /* - * Unfortunately, we have to lie to MDC/MDS to retrieve - * attributes llite needs and provideproper locking. - */ - if (it->it_op & IT_LOOKUP) - it->it_op = IT_GETATTR; - - /* - * We got LOOKUP lock, but we really need attrs. - */ - pmode = it->it_lock_mode; - if (pmode) { - plock.cookie = it->it_lock_handle; - it->it_lock_mode = 0; - it->it_request = NULL; - } - - LASSERT(fid_is_sane(&body->mbo_fid1)); - - tgt = lmv_find_target(lmv, &body->mbo_fid1); - if (IS_ERR(tgt)) { - rc = PTR_ERR(tgt); - goto out; - } - - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) { - rc = -ENOMEM; - goto out; - } - - op_data->op_fid1 = body->mbo_fid1; - /* Sent the parent FID to the remote MDT */ - if (parent_fid) { - /* The parent fid is only for remote open to - * check whether the open is from OBF, - * see mdt_cross_open - */ - LASSERT(it->it_op & IT_OPEN); - op_data->op_fid2 = *parent_fid; - } - - op_data->op_bias = MDS_CROSS_REF; - CDEBUG(D_INODE, "REMOTE_INTENT with fid=" DFID " -> mds #%u\n", - PFID(&body->mbo_fid1), tgt->ltd_idx); - - rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking, - extra_lock_flags); - if (rc) - goto out_free_op_data; - - /* - * LLite needs LOOKUP lock to track dentry revocation in order to - * maintain dcache consistency. Thus drop UPDATE|PERM lock here - * and put LOOKUP in request. - */ - if (it->it_lock_mode != 0) { - it->it_remote_lock_handle = - it->it_lock_handle; - it->it_remote_lock_mode = it->it_lock_mode; - } - - if (pmode) { - it->it_lock_handle = plock.cookie; - it->it_lock_mode = pmode; - } - -out_free_op_data: - kfree(op_data); -out: - if (rc && pmode) - ldlm_lock_decref(&plock, pmode); - - ptlrpc_req_finished(*reqp); - *reqp = req; - return rc; -} - -int lmv_revalidate_slaves(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct ptlrpc_request *req = NULL; - struct mdt_body *body; - struct md_op_data *op_data; - int rc = 0, i; - - /** - * revalidate slaves has some problems, temporarily return, - * we may not need that - */ - op_data = kzalloc(sizeof(*op_data), GFP_NOFS); - if (!op_data) - return -ENOMEM; - - /** - * Loop over the stripe information, check validity and update them - * from MDS if needed. - */ - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - struct lookup_intent it = { .it_op = IT_GETATTR }; - struct lustre_handle *lockh = NULL; - struct lmv_tgt_desc *tgt = NULL; - struct inode *inode; - struct lu_fid fid; - - fid = lsm->lsm_md_oinfo[i].lmo_fid; - inode = lsm->lsm_md_oinfo[i].lmo_root; - - /* - * Prepare op_data for revalidating. Note that @fid2 shluld be - * defined otherwise it will go to server and take new lock - * which is not needed here. - */ - memset(op_data, 0, sizeof(*op_data)); - op_data->op_fid1 = fid; - op_data->op_fid2 = fid; - - tgt = lmv_locate_mds(lmv, op_data, &fid); - if (IS_ERR(tgt)) { - rc = PTR_ERR(tgt); - goto cleanup; - } - - CDEBUG(D_INODE, "Revalidate slave " DFID " -> mds #%u\n", - PFID(&fid), tgt->ltd_idx); - - if (req) { - ptlrpc_req_finished(req); - req = NULL; - } - - rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req, - cb_blocking, extra_lock_flags); - if (rc < 0) - goto cleanup; - - lockh = (struct lustre_handle *)&it.it_lock_handle; - if (rc > 0 && !req) { - /* slave inode is still valid */ - CDEBUG(D_INODE, "slave " DFID " is still valid.\n", - PFID(&fid)); - rc = 0; - } else { - /* refresh slave from server */ - body = req_capsule_server_get(&req->rq_pill, - &RMF_MDT_BODY); - if (!body) { - if (it.it_lock_mode && lockh) { - ldlm_lock_decref(lockh, it.it_lock_mode); - it.it_lock_mode = 0; - } - - rc = -ENOENT; - goto cleanup; - } - - i_size_write(inode, body->mbo_size); - inode->i_blocks = body->mbo_blocks; - set_nlink(inode, body->mbo_nlink); - LTIME_S(inode->i_atime) = body->mbo_atime; - LTIME_S(inode->i_ctime) = body->mbo_ctime; - LTIME_S(inode->i_mtime) = body->mbo_mtime; - } - - md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL); - - if (it.it_lock_mode && lockh) { - ldlm_lock_decref(lockh, it.it_lock_mode); - it.it_lock_mode = 0; - } - } - -cleanup: - if (req) - ptlrpc_req_finished(req); - - kfree(op_data); - return rc; -} - -/* - * IT_OPEN is intended to open (and create, possible) an object. Parent (pid) - * may be split dir. - */ -static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, - struct lookup_intent *it, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - int rc; - - if (it->it_flags & MDS_OPEN_BY_FID) { - LASSERT(fid_is_sane(&op_data->op_fid2)); - - /* - * for striped directory, we can't know parent stripe fid - * without name, but we can set it to child fid, and MDT - * will obtain it from linkea in open in such case. - */ - if (op_data->op_mea1) - op_data->op_fid1 = op_data->op_fid2; - - tgt = lmv_find_target(lmv, &op_data->op_fid2); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - op_data->op_mds = tgt->ltd_idx; - } else { - LASSERT(fid_is_sane(&op_data->op_fid1)); - LASSERT(fid_is_zero(&op_data->op_fid2)); - LASSERT(op_data->op_name); - - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - } - - /* If it is ready to open the file by FID, do not need - * allocate FID at all, otherwise it will confuse MDT - */ - if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) { - /* - * For lookup(IT_CREATE) cases allocate new fid and setup FLD - * for it. - */ - rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc != 0) - return rc; - } - - CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u\n", - PFID(&op_data->op_fid1), - PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx); - - rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, - extra_lock_flags); - if (rc != 0) - return rc; - /* - * Nothing is found, do not access body->mbo_fid1 as it is zero and thus - * pointless. - */ - if ((it->it_disposition & DISP_LOOKUP_NEG) && - !(it->it_disposition & DISP_OPEN_CREATE) && - !(it->it_disposition & DISP_OPEN_OPEN)) - return rc; - - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - /* Not cross-ref case, just get out of here. */ - if (unlikely((body->mbo_valid & OBD_MD_MDS))) { - rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp, - cb_blocking, extra_lock_flags); - if (rc != 0) - return rc; - - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - } - - return rc; -} - -/* - * Handler for: getattr, lookup and revalidate cases. - */ -static int lmv_intent_lookup(struct obd_export *exp, - struct md_op_data *op_data, - struct lookup_intent *it, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt = NULL; - struct mdt_body *body; - int rc = 0; - - /* - * If it returns ERR_PTR(-EBADFD) then it is an unknown hash type - * it will try all stripes to locate the object - */ - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD)) - return PTR_ERR(tgt); - - /* - * Both migrating dir and unknown hash dir need to try - * all of sub-stripes - */ - if (lsm && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) { - struct lmv_oinfo *oinfo = &lsm->lsm_md_oinfo[0]; - - op_data->op_fid1 = oinfo->lmo_fid; - op_data->op_mds = oinfo->lmo_mds; - tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - } - - if (!fid_is_sane(&op_data->op_fid2)) - fid_zero(&op_data->op_fid2); - - CDEBUG(D_INODE, "LOOKUP_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n", - PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), - op_data->op_name ? op_data->op_name : "", - tgt->ltd_idx, lsm, !lsm ? -1 : lsm->lsm_md_magic); - - op_data->op_bias &= ~MDS_CROSS_REF; - - rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, - extra_lock_flags); - if (rc < 0) - return rc; - - if (!*reqp) { - /* - * If RPC happens, lsm information will be revalidated - * during update_inode process (see ll_update_lsm_md) - */ - if (op_data->op_mea2) { - rc = lmv_revalidate_slaves(exp, op_data->op_mea2, - cb_blocking, - extra_lock_flags); - if (rc != 0) - return rc; - } - return rc; - } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm && - lmv_need_try_all_stripes(lsm)) { - /* - * For migrating and unknown hash type directory, it will - * try to target the entry on other stripes - */ - int stripe_index; - - for (stripe_index = 1; - stripe_index < lsm->lsm_md_stripe_count && - it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) { - struct lmv_oinfo *oinfo; - - /* release the previous request */ - ptlrpc_req_finished(*reqp); - it->it_request = NULL; - *reqp = NULL; - - oinfo = &lsm->lsm_md_oinfo[stripe_index]; - tgt = lmv_find_target(lmv, &oinfo->lmo_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - CDEBUG(D_INODE, "Try other stripes " DFID "\n", - PFID(&oinfo->lmo_fid)); - - op_data->op_fid1 = oinfo->lmo_fid; - it->it_disposition &= ~DISP_ENQ_COMPLETE; - rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, - cb_blocking, extra_lock_flags); - if (rc) - return rc; - } - } - - if (!it_has_reply_body(it)) - return 0; - - /* - * MDS has returned success. Probably name has been resolved in - * remote inode. Let's check this. - */ - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - /* Not cross-ref case, just get out of here. */ - if (unlikely((body->mbo_valid & OBD_MD_MDS))) { - rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking, - extra_lock_flags); - if (rc != 0) - return rc; - body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - } - - return rc; -} - -int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, - struct lookup_intent *it, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags) -{ - int rc; - - LASSERT(fid_is_sane(&op_data->op_fid1)); - - CDEBUG(D_INODE, "INTENT LOCK '%s' for " DFID " '%*s' on " DFID "\n", - LL_IT2STR(it), PFID(&op_data->op_fid2), - (int)op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1)); - - if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT | IT_GETXATTR)) - rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking, - extra_lock_flags); - else if (it->it_op & IT_OPEN) - rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking, - extra_lock_flags); - else - LBUG(); - - if (rc < 0) { - struct lustre_handle lock_handle; - - if (it->it_lock_mode) { - lock_handle.cookie = it->it_lock_handle; - ldlm_lock_decref_and_cancel(&lock_handle, - it->it_lock_mode); - } - - it->it_lock_handle = 0; - it->it_lock_mode = 0; - - if (it->it_remote_lock_mode) { - lock_handle.cookie = it->it_remote_lock_handle; - ldlm_lock_decref_and_cancel(&lock_handle, - it->it_remote_lock_mode); - } - - it->it_remote_lock_handle = 0; - it->it_remote_lock_mode = 0; - } - - return rc; -} diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h deleted file mode 100644 index 68a99170c424..000000000000 --- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h +++ /dev/null @@ -1,164 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _LMV_INTERNAL_H_ -#define _LMV_INTERNAL_H_ - -#include -#include -#include - -#define LMV_MAX_TGT_COUNT 128 - -#define LL_IT2STR(it) \ - ((it) ? ldlm_it2str((it)->it_op) : "0") - -int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, - struct lookup_intent *it, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags); - -int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds); -int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds); -int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data); - -int lmv_revalidate_slaves(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags); - -static inline struct obd_device *lmv2obd_dev(struct lmv_obd *lmv) -{ - return container_of_safe(lmv, struct obd_device, u.lmv); -} - -static inline struct lmv_tgt_desc * -lmv_get_target(struct lmv_obd *lmv, u32 mdt_idx, int *index) -{ - int i; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i]) - continue; - - if (lmv->tgts[i]->ltd_idx == mdt_idx) { - if (index) - *index = i; - return lmv->tgts[i]; - } - } - - return ERR_PTR(-ENODEV); -} - -static inline int -lmv_find_target_index(struct lmv_obd *lmv, const struct lu_fid *fid) -{ - struct lmv_tgt_desc *ltd; - u32 mdt_idx = 0; - int index = 0; - - if (lmv->desc.ld_tgt_count > 1) { - int rc; - - rc = lmv_fld_lookup(lmv, fid, &mdt_idx); - if (rc < 0) - return rc; - } - - ltd = lmv_get_target(lmv, mdt_idx, &index); - if (IS_ERR(ltd)) - return PTR_ERR(ltd); - - return index; -} - -static inline struct lmv_tgt_desc * -lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid) -{ - int index; - - index = lmv_find_target_index(lmv, fid); - if (index < 0) - return ERR_PTR(index); - - return lmv->tgts[index]; -} - -static inline int lmv_stripe_md_size(int stripe_count) -{ - struct lmv_stripe_md *lsm; - - return sizeof(*lsm) + stripe_count * sizeof(lsm->lsm_md_oinfo[0]); -} - -int lmv_name_to_stripe_index(enum lmv_hash_type hashtype, - unsigned int max_mdt_index, - const char *name, int namelen); - -static inline const struct lmv_oinfo * -lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name, - int namelen) -{ - int stripe_index; - - stripe_index = lmv_name_to_stripe_index(lsm->lsm_md_hash_type, - lsm->lsm_md_stripe_count, - name, namelen); - if (stripe_index < 0) - return ERR_PTR(stripe_index); - - LASSERTF(stripe_index < lsm->lsm_md_stripe_count, - "stripe_index = %d, stripe_count = %d hash_type = %x name = %.*s\n", - stripe_index, lsm->lsm_md_stripe_count, - lsm->lsm_md_hash_type, namelen, name); - - return &lsm->lsm_md_oinfo[stripe_index]; -} - -static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm) -{ - return !lmv_is_known_hash_type(lsm->lsm_md_hash_type) || - lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION; -} - -struct lmv_tgt_desc -*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, - struct lu_fid *fid); -/* lproc_lmv.c */ -void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars); - -extern const struct file_operations lmv_proc_target_fops; - -#endif diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c deleted file mode 100644 index 65f94e6ecaad..000000000000 --- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c +++ /dev/null @@ -1,3131 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LMV -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "lmv_internal.h" - -static int lmv_check_connect(struct obd_device *obd); - -static void lmv_activate_target(struct lmv_obd *lmv, - struct lmv_tgt_desc *tgt, - int activate) -{ - if (tgt->ltd_active == activate) - return; - - tgt->ltd_active = activate; - lmv->desc.ld_active_tgt_count += (activate ? 1 : -1); - tgt->ltd_exp->exp_obd->obd_inactive = !activate; -} - -/** - * Error codes: - * - * -EINVAL : UUID can't be found in the LMV's target list - * -ENOTCONN: The UUID is found, but the target connection is bad (!) - * -EBADF : The UUID is found, but the OBD of the wrong type (!) - */ -static int lmv_set_mdc_active(struct lmv_obd *lmv, const struct obd_uuid *uuid, - int activate) -{ - struct lmv_tgt_desc *tgt = NULL; - struct obd_device *obd; - u32 i; - int rc = 0; - - CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n", - lmv, uuid->uuid, activate); - - spin_lock(&lmv->lmv_lock); - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp) - continue; - - CDEBUG(D_INFO, "Target idx %d is %s conn %#llx\n", i, - tgt->ltd_uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie); - - if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) - break; - } - - if (i == lmv->desc.ld_tgt_count) { - rc = -EINVAL; - goto out_lmv_lock; - } - - obd = class_exp2obd(tgt->ltd_exp); - if (!obd) { - rc = -ENOTCONN; - goto out_lmv_lock; - } - - CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LMV idx %d\n", - obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd, - obd->obd_type->typ_name, i); - LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0); - - if (tgt->ltd_active == activate) { - CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd, - activate ? "" : "in"); - goto out_lmv_lock; - } - - CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, - activate ? "" : "in"); - lmv_activate_target(lmv, tgt, activate); - - out_lmv_lock: - spin_unlock(&lmv->lmv_lock); - return rc; -} - -static struct obd_uuid *lmv_get_uuid(struct obd_export *exp) -{ - struct lmv_obd *lmv = &exp->exp_obd->u.lmv; - struct lmv_tgt_desc *tgt = lmv->tgts[0]; - - return tgt ? obd_get_uuid(tgt->ltd_exp) : NULL; -} - -static int lmv_notify(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data) -{ - struct obd_connect_data *conn_data; - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_uuid *uuid; - int rc = 0; - - if (strcmp(watched->obd_type->typ_name, LUSTRE_MDC_NAME)) { - CERROR("unexpected notification of %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); - return -EINVAL; - } - - uuid = &watched->u.cli.cl_target_uuid; - if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE) { - /* - * Set MDC as active before notifying the observer, so the - * observer can use the MDC normally. - */ - rc = lmv_set_mdc_active(lmv, uuid, - ev == OBD_NOTIFY_ACTIVE); - if (rc) { - CERROR("%sactivation of %s failed: %d\n", - ev == OBD_NOTIFY_ACTIVE ? "" : "de", - uuid->uuid, rc); - return rc; - } - } else if (ev == OBD_NOTIFY_OCD) { - conn_data = &watched->u.cli.cl_import->imp_connect_data; - /* - * XXX: Make sure that ocd_connect_flags from all targets are - * the same. Otherwise one of MDTs runs wrong version or - * something like this. --umka - */ - obd->obd_self_export->exp_connect_data = *conn_data; - } - - /* - * Pass the notification up the chain. - */ - if (obd->obd_observer) - rc = obd_notify(obd->obd_observer, watched, ev, data); - - return rc; -} - -static int lmv_connect(const struct lu_env *env, - struct obd_export **pexp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data, - void *localdata) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct lustre_handle conn = { 0 }; - struct obd_export *exp; - int rc = 0; - - rc = class_connect(&conn, obd, cluuid); - if (rc) { - CERROR("class_connection() returned %d\n", rc); - return rc; - } - - exp = class_conn2export(&conn); - - lmv->connected = 0; - lmv->cluuid = *cluuid; - lmv->conn_data = *data; - - lmv->lmv_tgts_kobj = kobject_create_and_add("target_obds", - &obd->obd_kobj); - rc = lmv_check_connect(obd); - if (rc) - goto out_sysfs; - - *pexp = exp; - - return rc; - -out_sysfs: - if (lmv->lmv_tgts_kobj) - kobject_put(lmv->lmv_tgts_kobj); - - class_disconnect(exp); - - return rc; -} - -static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - u32 i; - int rc = 0; - int change = 0; - - if (lmv->max_easize < easize) { - lmv->max_easize = easize; - change = 1; - } - if (lmv->max_def_easize < def_easize) { - lmv->max_def_easize = def_easize; - change = 1; - } - - if (change == 0) - return 0; - - if (lmv->connected == 0) - return 0; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - struct lmv_tgt_desc *tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) { - CWARN("%s: NULL export for %d\n", obd->obd_name, i); - continue; - } - - rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize); - if (rc) { - CERROR("%s: obd_init_ea_size() failed on MDT target %d: rc = %d\n", - obd->obd_name, i, rc); - break; - } - } - return rc; -} - -#define MAX_STRING_SIZE 128 - -static int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_uuid *cluuid = &lmv->cluuid; - struct obd_uuid lmv_mdc_uuid = { "LMV_MDC_UUID" }; - struct obd_device *mdc_obd; - struct obd_export *mdc_exp; - struct lu_fld_target target; - int rc; - - mdc_obd = class_find_client_obd(&tgt->ltd_uuid, LUSTRE_MDC_NAME, - &obd->obd_uuid); - if (!mdc_obd) { - CERROR("target %s not attached\n", tgt->ltd_uuid.uuid); - return -EINVAL; - } - - CDEBUG(D_CONFIG, "connect to %s(%s) - %s, %s FOR %s\n", - mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, - tgt->ltd_uuid.uuid, obd->obd_uuid.uuid, cluuid->uuid); - - if (!mdc_obd->obd_set_up) { - CERROR("target %s is not set up\n", tgt->ltd_uuid.uuid); - return -EINVAL; - } - - rc = obd_connect(NULL, &mdc_exp, mdc_obd, &lmv_mdc_uuid, - &lmv->conn_data, NULL); - if (rc) { - CERROR("target %s connect error %d\n", tgt->ltd_uuid.uuid, rc); - return rc; - } - - /* - * Init fid sequence client for this mdc and add new fld target. - */ - rc = obd_fid_init(mdc_obd, mdc_exp, LUSTRE_SEQ_METADATA); - if (rc) - return rc; - - target.ft_srv = NULL; - target.ft_exp = mdc_exp; - target.ft_idx = tgt->ltd_idx; - - fld_client_add_target(&lmv->lmv_fld, &target); - - rc = obd_register_observer(mdc_obd, obd); - if (rc) { - obd_disconnect(mdc_exp); - CERROR("target %s register_observer error %d\n", - tgt->ltd_uuid.uuid, rc); - return rc; - } - - if (obd->obd_observer) { - /* - * Tell the observer about the new target. - */ - rc = obd_notify(obd->obd_observer, mdc_exp->exp_obd, - OBD_NOTIFY_ACTIVE, - (void *)(tgt - lmv->tgts[0])); - if (rc) { - obd_disconnect(mdc_exp); - return rc; - } - } - - tgt->ltd_active = 1; - tgt->ltd_exp = mdc_exp; - lmv->desc.ld_active_tgt_count++; - - md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize); - - CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", - mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); - - if (lmv->lmv_tgts_kobj) - /* Even if we failed to create the link, that's fine */ - rc = sysfs_create_link(lmv->lmv_tgts_kobj, &mdc_obd->obd_kobj, - mdc_obd->obd_name); - return 0; -} - -static void lmv_del_target(struct lmv_obd *lmv, int index) -{ - if (!lmv->tgts[index]) - return; - - kfree(lmv->tgts[index]); - lmv->tgts[index] = NULL; -} - -static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, - __u32 index, int gen) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_device *mdc_obd; - struct lmv_tgt_desc *tgt; - int orig_tgt_count = 0; - int rc = 0; - - CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index); - - mdc_obd = class_find_client_obd(uuidp, LUSTRE_MDC_NAME, - &obd->obd_uuid); - if (!mdc_obd) { - CERROR("%s: Target %s not attached: rc = %d\n", - obd->obd_name, uuidp->uuid, -EINVAL); - return -EINVAL; - } - - mutex_lock(&lmv->lmv_init_mutex); - - if ((index < lmv->tgts_size) && lmv->tgts[index]) { - tgt = lmv->tgts[index]; - CERROR("%s: UUID %s already assigned at LOV target index %d: rc = %d\n", - obd->obd_name, - obd_uuid2str(&tgt->ltd_uuid), index, -EEXIST); - mutex_unlock(&lmv->lmv_init_mutex); - return -EEXIST; - } - - if (index >= lmv->tgts_size) { - /* We need to reallocate the lmv target array. */ - struct lmv_tgt_desc **newtgts, **old = NULL; - __u32 newsize = 1; - __u32 oldsize = 0; - - while (newsize < index + 1) - newsize <<= 1; - newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS); - if (!newtgts) { - mutex_unlock(&lmv->lmv_init_mutex); - return -ENOMEM; - } - - if (lmv->tgts_size) { - memcpy(newtgts, lmv->tgts, - sizeof(*newtgts) * lmv->tgts_size); - old = lmv->tgts; - oldsize = lmv->tgts_size; - } - - lmv->tgts = newtgts; - lmv->tgts_size = newsize; - smp_rmb(); - kfree(old); - - CDEBUG(D_CONFIG, "tgts: %p size: %d\n", lmv->tgts, - lmv->tgts_size); - } - - tgt = kzalloc(sizeof(*tgt), GFP_NOFS); - if (!tgt) { - mutex_unlock(&lmv->lmv_init_mutex); - return -ENOMEM; - } - - mutex_init(&tgt->ltd_fid_mutex); - tgt->ltd_idx = index; - tgt->ltd_uuid = *uuidp; - tgt->ltd_active = 0; - lmv->tgts[index] = tgt; - if (index >= lmv->desc.ld_tgt_count) { - orig_tgt_count = lmv->desc.ld_tgt_count; - lmv->desc.ld_tgt_count = index + 1; - } - - if (!lmv->connected) { - /* lmv_check_connect() will connect this target. */ - mutex_unlock(&lmv->lmv_init_mutex); - return rc; - } - - /* Otherwise let's connect it ourselves */ - mutex_unlock(&lmv->lmv_init_mutex); - rc = lmv_connect_mdc(obd, tgt); - if (rc) { - spin_lock(&lmv->lmv_lock); - if (lmv->desc.ld_tgt_count == index + 1) - lmv->desc.ld_tgt_count = orig_tgt_count; - memset(tgt, 0, sizeof(*tgt)); - spin_unlock(&lmv->lmv_lock); - } else { - int easize = sizeof(struct lmv_stripe_md) + - lmv->desc.ld_tgt_count * sizeof(struct lu_fid); - lmv_init_ea_size(obd->obd_self_export, easize, 0); - } - - return rc; -} - -static int lmv_check_connect(struct obd_device *obd) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - u32 i; - int rc; - int easize; - - if (lmv->connected) - return 0; - - mutex_lock(&lmv->lmv_init_mutex); - if (lmv->connected) { - mutex_unlock(&lmv->lmv_init_mutex); - return 0; - } - - if (lmv->desc.ld_tgt_count == 0) { - mutex_unlock(&lmv->lmv_init_mutex); - CERROR("%s: no targets configured.\n", obd->obd_name); - return -EINVAL; - } - - LASSERT(lmv->tgts); - - if (!lmv->tgts[0]) { - mutex_unlock(&lmv->lmv_init_mutex); - CERROR("%s: no target configured for index 0.\n", - obd->obd_name); - return -EINVAL; - } - - CDEBUG(D_CONFIG, "Time to connect %s to %s\n", - lmv->cluuid.uuid, obd->obd_name); - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - tgt = lmv->tgts[i]; - if (!tgt) - continue; - rc = lmv_connect_mdc(obd, tgt); - if (rc) - goto out_disc; - } - - lmv->connected = 1; - easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC); - lmv_init_ea_size(obd->obd_self_export, easize, 0); - mutex_unlock(&lmv->lmv_init_mutex); - return 0; - - out_disc: - while (i-- > 0) { - int rc2; - - tgt = lmv->tgts[i]; - if (!tgt) - continue; - tgt->ltd_active = 0; - if (tgt->ltd_exp) { - --lmv->desc.ld_active_tgt_count; - rc2 = obd_disconnect(tgt->ltd_exp); - if (rc2) { - CERROR("LMV target %s disconnect on MDC idx %d: error %d\n", - tgt->ltd_uuid.uuid, i, rc2); - } - } - } - - mutex_unlock(&lmv->lmv_init_mutex); - return rc; -} - -static int lmv_disconnect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_device *mdc_obd; - int rc; - - mdc_obd = class_exp2obd(tgt->ltd_exp); - - if (mdc_obd) { - mdc_obd->obd_force = obd->obd_force; - mdc_obd->obd_fail = obd->obd_fail; - mdc_obd->obd_no_recov = obd->obd_no_recov; - - if (lmv->lmv_tgts_kobj) - sysfs_remove_link(lmv->lmv_tgts_kobj, - mdc_obd->obd_name); - } - - rc = obd_fid_fini(tgt->ltd_exp->exp_obd); - if (rc) - CERROR("Can't finalize fids factory\n"); - - CDEBUG(D_INFO, "Disconnected from %s(%s) successfully\n", - tgt->ltd_exp->exp_obd->obd_name, - tgt->ltd_exp->exp_obd->obd_uuid.uuid); - - obd_register_observer(tgt->ltd_exp->exp_obd, NULL); - rc = obd_disconnect(tgt->ltd_exp); - if (rc) { - if (tgt->ltd_active) { - CERROR("Target %s disconnect error %d\n", - tgt->ltd_uuid.uuid, rc); - } - } - - lmv_activate_target(lmv, tgt, 0); - tgt->ltd_exp = NULL; - return 0; -} - -static int lmv_disconnect(struct obd_export *exp) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lmv_obd *lmv = &obd->u.lmv; - int rc; - u32 i; - - if (!lmv->tgts) - goto out_local; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp) - continue; - - lmv_disconnect_mdc(obd, lmv->tgts[i]); - } - - if (lmv->lmv_tgts_kobj) - kobject_put(lmv->lmv_tgts_kobj); - -out_local: - /* - * This is the case when no real connection is established by - * lmv_check_connect(). - */ - if (!lmv->connected) - class_export_put(exp); - rc = class_disconnect(exp); - lmv->connected = 0; - return rc; -} - -static int lmv_fid2path(struct obd_export *exp, int len, void *karg, - void __user *uarg) -{ - struct obd_device *obddev = class_exp2obd(exp); - struct lmv_obd *lmv = &obddev->u.lmv; - struct getinfo_fid2path *gf; - struct lmv_tgt_desc *tgt; - struct getinfo_fid2path *remote_gf = NULL; - int remote_gf_size = 0; - int rc; - - gf = karg; - tgt = lmv_find_target(lmv, &gf->gf_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - -repeat_fid2path: - rc = obd_iocontrol(OBD_IOC_FID2PATH, tgt->ltd_exp, len, gf, uarg); - if (rc != 0 && rc != -EREMOTE) - goto out_fid2path; - - /* If remote_gf != NULL, it means just building the - * path on the remote MDT, copy this path segment to gf - */ - if (remote_gf) { - struct getinfo_fid2path *ori_gf; - char *ptr; - - ori_gf = karg; - if (strlen(ori_gf->gf_path) + 1 + - strlen(gf->gf_path) + 1 > ori_gf->gf_pathlen) { - rc = -EOVERFLOW; - goto out_fid2path; - } - - ptr = ori_gf->gf_path; - - memmove(ptr + strlen(gf->gf_path) + 1, ptr, - strlen(ori_gf->gf_path)); - - strncpy(ptr, gf->gf_path, strlen(gf->gf_path)); - ptr += strlen(gf->gf_path); - *ptr = '/'; - } - - CDEBUG(D_INFO, "%s: get path %s " DFID " rec: %llu ln: %u\n", - tgt->ltd_exp->exp_obd->obd_name, - gf->gf_path, PFID(&gf->gf_fid), gf->gf_recno, - gf->gf_linkno); - - if (rc == 0) - goto out_fid2path; - - /* sigh, has to go to another MDT to do path building further */ - if (!remote_gf) { - remote_gf_size = sizeof(*remote_gf) + PATH_MAX; - remote_gf = kzalloc(remote_gf_size, GFP_NOFS); - if (!remote_gf) { - rc = -ENOMEM; - goto out_fid2path; - } - remote_gf->gf_pathlen = PATH_MAX; - } - - if (!fid_is_sane(&gf->gf_fid)) { - CERROR("%s: invalid FID " DFID ": rc = %d\n", - tgt->ltd_exp->exp_obd->obd_name, - PFID(&gf->gf_fid), -EINVAL); - rc = -EINVAL; - goto out_fid2path; - } - - tgt = lmv_find_target(lmv, &gf->gf_fid); - if (IS_ERR(tgt)) { - rc = -EINVAL; - goto out_fid2path; - } - - remote_gf->gf_fid = gf->gf_fid; - remote_gf->gf_recno = -1; - remote_gf->gf_linkno = -1; - memset(remote_gf->gf_path, 0, remote_gf->gf_pathlen); - gf = remote_gf; - goto repeat_fid2path; - -out_fid2path: - kfree(remote_gf); - return rc; -} - -static int lmv_hsm_req_count(struct lmv_obd *lmv, - const struct hsm_user_request *hur, - const struct lmv_tgt_desc *tgt_mds) -{ - u32 i, nr = 0; - struct lmv_tgt_desc *curr_tgt; - - /* count how many requests must be sent to the given target */ - for (i = 0; i < hur->hur_request.hr_itemcount; i++) { - curr_tgt = lmv_find_target(lmv, &hur->hur_user_item[i].hui_fid); - if (IS_ERR(curr_tgt)) - return PTR_ERR(curr_tgt); - if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) - nr++; - } - return nr; -} - -static int lmv_hsm_req_build(struct lmv_obd *lmv, - struct hsm_user_request *hur_in, - const struct lmv_tgt_desc *tgt_mds, - struct hsm_user_request *hur_out) -{ - int i, nr_out; - struct lmv_tgt_desc *curr_tgt; - - /* build the hsm_user_request for the given target */ - hur_out->hur_request = hur_in->hur_request; - nr_out = 0; - for (i = 0; i < hur_in->hur_request.hr_itemcount; i++) { - curr_tgt = lmv_find_target(lmv, - &hur_in->hur_user_item[i].hui_fid); - if (IS_ERR(curr_tgt)) - return PTR_ERR(curr_tgt); - if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) { - hur_out->hur_user_item[nr_out] = - hur_in->hur_user_item[i]; - nr_out++; - } - } - hur_out->hur_request.hr_itemcount = nr_out; - memcpy(hur_data(hur_out), hur_data(hur_in), - hur_in->hur_request.hr_data_len); - - return 0; -} - -static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len, - struct lustre_kernelcomm *lk, - void __user *uarg) -{ - __u32 i; - - /* unregister request (call from llapi_hsm_copytool_fini) */ - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - struct lmv_tgt_desc *tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp) - continue; - - /* best effort: try to clean as much as possible - * (continue on error) - */ - obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg); - } - - /* Whatever the result, remove copytool from kuc groups. - * Unreached coordinators will get EPIPE on next requests - * and will unregister automatically. - */ - return libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group); -} - -static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len, - struct lustre_kernelcomm *lk, void __user *uarg) -{ - struct file *filp; - __u32 i, j; - int err, rc = 0; - bool any_set = false; - struct kkuc_ct_data kcd = { 0 }; - - /* All or nothing: try to register to all MDS. - * In case of failure, unregister from previous MDS, - * except if it because of inactive target. - */ - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - struct lmv_tgt_desc *tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp) - continue; - - err = obd_iocontrol(cmd, tgt->ltd_exp, len, lk, uarg); - if (err) { - if (tgt->ltd_active) { - /* permanent error */ - CERROR("error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - tgt->ltd_uuid.uuid, i, cmd, err); - rc = err; - lk->lk_flags |= LK_FLG_STOP; - /* unregister from previous MDS */ - for (j = 0; j < i; j++) { - tgt = lmv->tgts[j]; - - if (!tgt || !tgt->ltd_exp) - continue; - obd_iocontrol(cmd, tgt->ltd_exp, len, - lk, uarg); - } - return rc; - } - /* else: transient error. - * kuc will register to the missing MDT when it is back - */ - } else { - any_set = true; - } - } - - if (!any_set) - /* no registration done: return error */ - return -ENOTCONN; - - /* at least one registration done, with no failure */ - filp = fget(lk->lk_wfd); - if (!filp) - return -EBADF; - - kcd.kcd_magic = KKUC_CT_DATA_MAGIC; - kcd.kcd_uuid = lmv->cluuid; - kcd.kcd_archive = lk->lk_data; - - rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group, - &kcd, sizeof(kcd)); - if (rc) - fput(filp); - - return rc; -} - -static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp, - int len, void *karg, void __user *uarg) -{ - struct obd_device *obddev = class_exp2obd(exp); - struct lmv_obd *lmv = &obddev->u.lmv; - struct lmv_tgt_desc *tgt = NULL; - u32 i = 0; - int rc = 0; - int set = 0; - u32 count = lmv->desc.ld_tgt_count; - - if (count == 0) - return -ENOTTY; - - switch (cmd) { - case IOC_OBD_STATFS: { - struct obd_ioctl_data *data = karg; - struct obd_device *mdc_obd; - struct obd_statfs stat_buf = {0}; - __u32 index; - - memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); - if (index >= count) - return -ENODEV; - - tgt = lmv->tgts[index]; - if (!tgt || !tgt->ltd_active) - return -ENODATA; - - mdc_obd = class_exp2obd(tgt->ltd_exp); - if (!mdc_obd) - return -EINVAL; - - /* copy UUID */ - if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(mdc_obd), - min((int)data->ioc_plen2, - (int)sizeof(struct obd_uuid)))) - return -EFAULT; - - rc = obd_statfs(NULL, tgt->ltd_exp, &stat_buf, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - 0); - if (rc) - return rc; - if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min((int)data->ioc_plen1, - (int)sizeof(stat_buf)))) - return -EFAULT; - break; - } - case OBD_IOC_QUOTACTL: { - struct if_quotactl *qctl = karg; - struct obd_quotactl *oqctl; - - if (qctl->qc_valid == QC_MDTIDX) { - if (count <= qctl->qc_idx) - return -EINVAL; - - tgt = lmv->tgts[qctl->qc_idx]; - if (!tgt || !tgt->ltd_exp) - return -EINVAL; - } else if (qctl->qc_valid == QC_UUID) { - for (i = 0; i < count; i++) { - tgt = lmv->tgts[i]; - if (!tgt) - continue; - if (!obd_uuid_equals(&tgt->ltd_uuid, - &qctl->obd_uuid)) - continue; - - if (!tgt->ltd_exp) - return -EINVAL; - - break; - } - } else { - return -EINVAL; - } - - if (i >= count) - return -EAGAIN; - - LASSERT(tgt && tgt->ltd_exp); - oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); - if (!oqctl) - return -ENOMEM; - - QCTL_COPY(oqctl, qctl); - rc = obd_quotactl(tgt->ltd_exp, oqctl); - if (rc == 0) { - QCTL_COPY(qctl, oqctl); - qctl->qc_valid = QC_MDTIDX; - qctl->obd_uuid = tgt->ltd_uuid; - } - kfree(oqctl); - break; - } - case OBD_IOC_CHANGELOG_SEND: - case OBD_IOC_CHANGELOG_CLEAR: { - struct ioc_changelog *icc = karg; - - if (icc->icc_mdtindex >= count) - return -ENODEV; - - tgt = lmv->tgts[icc->icc_mdtindex]; - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) - return -ENODEV; - rc = obd_iocontrol(cmd, tgt->ltd_exp, sizeof(*icc), icc, NULL); - break; - } - case LL_IOC_GET_CONNECT_FLAGS: { - tgt = lmv->tgts[0]; - - if (!tgt || !tgt->ltd_exp) - return -ENODATA; - rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - break; - } - case LL_IOC_FID2MDTIDX: { - struct lu_fid *fid = karg; - int mdt_index; - - rc = lmv_fld_lookup(lmv, fid, &mdt_index); - if (rc) - return rc; - - /* - * Note: this is from llite(see ll_dir_ioctl()), @uarg does not - * point to user space memory for FID2MDTIDX. - */ - *(__u32 *)uarg = mdt_index; - break; - } - case OBD_IOC_FID2PATH: { - rc = lmv_fid2path(exp, len, karg, uarg); - break; - } - case LL_IOC_HSM_STATE_GET: - case LL_IOC_HSM_STATE_SET: - case LL_IOC_HSM_ACTION: { - struct md_op_data *op_data = karg; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - if (!tgt->ltd_exp) - return -EINVAL; - - rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - break; - } - case LL_IOC_HSM_PROGRESS: { - const struct hsm_progress_kernel *hpk = karg; - - tgt = lmv_find_target(lmv, &hpk->hpk_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - break; - } - case LL_IOC_HSM_REQUEST: { - struct hsm_user_request *hur = karg; - unsigned int reqcount = hur->hur_request.hr_itemcount; - - if (reqcount == 0) - return 0; - - /* if the request is about a single fid - * or if there is a single MDS, no need to split - * the request. - */ - if (reqcount == 1 || count == 1) { - tgt = lmv_find_target(lmv, - &hur->hur_user_item[0].hui_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - } else { - /* split fid list to their respective MDS */ - for (i = 0; i < count; i++) { - struct hsm_user_request *req; - size_t reqlen; - int nr, rc1; - - tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp) - continue; - - nr = lmv_hsm_req_count(lmv, hur, tgt); - if (nr < 0) - return nr; - if (nr == 0) /* nothing for this MDS */ - continue; - - /* build a request with fids for this MDS */ - reqlen = offsetof(typeof(*hur), - hur_user_item[nr]) - + hur->hur_request.hr_data_len; - req = kvzalloc(reqlen, GFP_NOFS); - if (!req) - return -ENOMEM; - - rc1 = lmv_hsm_req_build(lmv, hur, tgt, req); - if (rc1 < 0) - goto hsm_req_err; - - rc1 = obd_iocontrol(cmd, tgt->ltd_exp, reqlen, - req, uarg); -hsm_req_err: - if (rc1 != 0 && rc == 0) - rc = rc1; - kvfree(req); - } - } - break; - } - case LL_IOC_LOV_SWAP_LAYOUTS: { - struct md_op_data *op_data = karg; - struct lmv_tgt_desc *tgt1, *tgt2; - - tgt1 = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt1)) - return PTR_ERR(tgt1); - - tgt2 = lmv_find_target(lmv, &op_data->op_fid2); - if (IS_ERR(tgt2)) - return PTR_ERR(tgt2); - - if (!tgt1->ltd_exp || !tgt2->ltd_exp) - return -EINVAL; - - /* only files on same MDT can have their layouts swapped */ - if (tgt1->ltd_idx != tgt2->ltd_idx) - return -EPERM; - - rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg); - break; - } - case LL_IOC_HSM_CT_START: { - struct lustre_kernelcomm *lk = karg; - - if (lk->lk_flags & LK_FLG_STOP) - rc = lmv_hsm_ct_unregister(lmv, cmd, len, lk, uarg); - else - rc = lmv_hsm_ct_register(lmv, cmd, len, lk, uarg); - break; - } - default: - for (i = 0; i < count; i++) { - struct obd_device *mdc_obd; - int err; - - tgt = lmv->tgts[i]; - if (!tgt || !tgt->ltd_exp) - continue; - /* ll_umount_begin() sets force flag but for lmv, not - * mdc. Let's pass it through - */ - mdc_obd = class_exp2obd(tgt->ltd_exp); - mdc_obd->obd_force = obddev->obd_force; - err = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg); - if (err) { - if (tgt->ltd_active) { - CERROR("%s: error: iocontrol MDC %s on MDTidx %d cmd %x: err = %d\n", - lmv2obd_dev(lmv)->obd_name, - tgt->ltd_uuid.uuid, i, cmd, err); - if (!rc) - rc = err; - } - } else { - set = 1; - } - } - if (!set && !rc) - rc = -EIO; - } - return rc; -} - -/** - * This is _inode_ placement policy function (not name). - */ -static int lmv_placement_policy(struct obd_device *obd, - struct md_op_data *op_data, u32 *mds) -{ - struct lmv_obd *lmv = &obd->u.lmv; - - LASSERT(mds); - - if (lmv->desc.ld_tgt_count == 1) { - *mds = 0; - return 0; - } - - if (op_data->op_default_stripe_offset != -1) { - *mds = op_data->op_default_stripe_offset; - return 0; - } - - /** - * If stripe_offset is provided during setdirstripe - * (setdirstripe -i xx), xx MDS will be chosen. - */ - if (op_data->op_cli_flags & CLI_SET_MEA && op_data->op_data) { - struct lmv_user_md *lum; - - lum = op_data->op_data; - if (le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) { - *mds = le32_to_cpu(lum->lum_stripe_offset); - } else { - /* - * -1 means default, which will be in the same MDT with - * the stripe - */ - *mds = op_data->op_mds; - lum->lum_stripe_offset = cpu_to_le32(op_data->op_mds); - } - } else { - /* - * Allocate new fid on target according to operation type and - * parent home mds. - */ - *mds = op_data->op_mds; - } - - return 0; -} - -int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds) -{ - struct lmv_tgt_desc *tgt; - int rc; - - tgt = lmv_get_target(lmv, mds, NULL); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - /* - * New seq alloc and FLD setup should be atomic. Otherwise we may find - * on server that seq in new allocated fid is not yet known. - */ - mutex_lock(&tgt->ltd_fid_mutex); - - if (tgt->ltd_active == 0 || !tgt->ltd_exp) { - rc = -ENODEV; - goto out; - } - - /* - * Asking underlaying tgt layer to allocate new fid. - */ - rc = obd_fid_alloc(NULL, tgt->ltd_exp, fid, NULL); - if (rc > 0) { - LASSERT(fid_is_sane(fid)); - rc = 0; - } - -out: - mutex_unlock(&tgt->ltd_fid_mutex); - return rc; -} - -int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lmv_obd *lmv = &obd->u.lmv; - u32 mds = 0; - int rc; - - LASSERT(op_data); - LASSERT(fid); - - rc = lmv_placement_policy(obd, op_data, &mds); - if (rc) { - CERROR("Can't get target for allocating fid, rc %d\n", - rc); - return rc; - } - - rc = __lmv_fid_alloc(lmv, fid, mds); - if (rc) { - CERROR("Can't alloc new fid, rc %d\n", rc); - return rc; - } - - return rc; -} - -static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct lmv_obd *lmv = &obd->u.lmv; - struct lprocfs_static_vars lvars = { NULL }; - struct lmv_desc *desc; - int rc; - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { - CERROR("LMV setup requires a descriptor\n"); - return -EINVAL; - } - - desc = (struct lmv_desc *)lustre_cfg_buf(lcfg, 1); - if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) { - CERROR("Lmv descriptor size wrong: %d > %d\n", - (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1)); - return -EINVAL; - } - - lmv->tgts_size = 32U; - lmv->tgts = kcalloc(lmv->tgts_size, sizeof(*lmv->tgts), GFP_NOFS); - if (!lmv->tgts) - return -ENOMEM; - - obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid); - lmv->desc.ld_tgt_count = 0; - lmv->desc.ld_active_tgt_count = 0; - lmv->max_def_easize = 0; - lmv->max_easize = 0; - - spin_lock_init(&lmv->lmv_lock); - mutex_init(&lmv->lmv_init_mutex); - - lprocfs_lmv_init_vars(&lvars); - - lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars); - debugfs_create_file("target_obd", 0444, obd->obd_debugfs_entry, obd, - &lmv_proc_target_fops); - rc = fld_client_init(&lmv->lmv_fld, obd->obd_name, - LUSTRE_CLI_FLD_HASH_DHT); - if (rc) { - CERROR("Can't init FLD, err %d\n", rc); - goto out; - } - - return 0; - -out: - return rc; -} - -static int lmv_cleanup(struct obd_device *obd) -{ - struct lmv_obd *lmv = &obd->u.lmv; - - fld_client_fini(&lmv->lmv_fld); - if (lmv->tgts) { - int i; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i]) - continue; - lmv_del_target(lmv, i); - } - kfree(lmv->tgts); - lmv->tgts_size = 0; - } - return 0; -} - -static int lmv_process_config(struct obd_device *obd, u32 len, void *buf) -{ - struct lustre_cfg *lcfg = buf; - struct obd_uuid obd_uuid; - int gen; - __u32 index; - int rc; - - switch (lcfg->lcfg_command) { - case LCFG_ADD_MDC: - /* modify_mdc_tgts add 0:lustre-clilmv 1:lustre-MDT0000_UUID - * 2:0 3:1 4:lustre-MDT0000-mdc_UUID - */ - if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) { - rc = -EINVAL; - goto out; - } - - obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1)); - - if (sscanf(lustre_cfg_buf(lcfg, 2), "%u", &index) != 1) { - rc = -EINVAL; - goto out; - } - if (sscanf(lustre_cfg_buf(lcfg, 3), "%d", &gen) != 1) { - rc = -EINVAL; - goto out; - } - rc = lmv_add_target(obd, &obd_uuid, index, gen); - goto out; - default: - CERROR("Unknown command: %d\n", lcfg->lcfg_command); - rc = -EINVAL; - goto out; - } -out: - return rc; -} - -static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_statfs *temp; - int rc = 0; - u32 i; - - temp = kzalloc(sizeof(*temp), GFP_NOFS); - if (!temp) - return -ENOMEM; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp) - continue; - - rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp, - max_age, flags); - if (rc) { - CERROR("can't stat MDS #%d (%s), error %d\n", i, - lmv->tgts[i]->ltd_exp->exp_obd->obd_name, - rc); - goto out_free_temp; - } - - if (i == 0) { - *osfs = *temp; - /* If the statfs is from mount, it will needs - * retrieve necessary information from MDT0. - * i.e. mount does not need the merged osfs - * from all of MDT. - * And also clients can be mounted as long as - * MDT0 is in service - */ - if (flags & OBD_STATFS_FOR_MDT0) - goto out_free_temp; - } else { - osfs->os_bavail += temp->os_bavail; - osfs->os_blocks += temp->os_blocks; - osfs->os_ffree += temp->os_ffree; - osfs->os_files += temp->os_files; - } - } - -out_free_temp: - kfree(temp); - return rc; -} - -static int lmv_getstatus(struct obd_export *exp, - struct lu_fid *fid) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - - return md_getstatus(lmv->tgts[0]->ltd_exp, fid); -} - -static int lmv_getxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, size_t buf_size, - struct ptlrpc_request **req) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_getxattr(tgt->ltd_exp, fid, obd_md_valid, name, buf_size, - req); -} - -static int lmv_setxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, - const void *value, size_t value_size, - unsigned int xattr_flags, u32 suppgid, - struct ptlrpc_request **req) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_setxattr(tgt->ltd_exp, fid, obd_md_valid, name, - value, value_size, xattr_flags, suppgid, req); -} - -static int lmv_getattr(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - if (op_data->op_flags & MF_GET_MDT_IDX) { - op_data->op_mds = tgt->ltd_idx; - return 0; - } - - return md_getattr(tgt->ltd_exp, op_data, request); -} - -static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - u32 i; - - CDEBUG(D_INODE, "CBDATA for " DFID "\n", PFID(fid)); - - /* - * With DNE every object can have two locks in different namespaces: - * lookup lock in space of MDT storing direntry and update/open lock in - * space of MDT storing inode. - */ - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp) - continue; - md_null_inode(lmv->tgts[i]->ltd_exp, fid); - } - - return 0; -} - -static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, - struct md_open_data *mod, struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - CDEBUG(D_INODE, "CLOSE " DFID "\n", PFID(&op_data->op_fid1)); - return md_close(tgt->ltd_exp, op_data, mod, request); -} - -/** - * Choosing the MDT by name or FID in @op_data. - * For non-striped directory, it will locate MDT by fid. - * For striped-directory, it will locate MDT by name. And also - * it will reset op_fid1 with the FID of the chosen stripe. - **/ -static struct lmv_tgt_desc * -lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, - const char *name, int namelen, struct lu_fid *fid, - u32 *mds) -{ - const struct lmv_oinfo *oinfo; - struct lmv_tgt_desc *tgt; - - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) { - if (cfs_fail_val >= lsm->lsm_md_stripe_count) - return ERR_PTR(-EBADF); - oinfo = &lsm->lsm_md_oinfo[cfs_fail_val]; - } else { - oinfo = lsm_name_to_stripe_info(lsm, name, namelen); - if (IS_ERR(oinfo)) - return ERR_CAST(oinfo); - } - - if (fid) - *fid = oinfo->lmo_fid; - if (mds) - *mds = oinfo->lmo_mds; - - tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL); - - CDEBUG(D_INFO, "locate on mds %u " DFID "\n", oinfo->lmo_mds, - PFID(&oinfo->lmo_fid)); - return tgt; -} - -/** - * Locate mds by fid or name - * - * For striped directory (lsm != NULL), it will locate the stripe - * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type - * is unknown, it will return -EBADFD, and lmv_intent_lookup might need - * walk through all of stripes to locate the entry. - * - * For normal direcotry, it will locate MDS by FID directly. - * \param[in] lmv LMV device - * \param[in] op_data client MD stack parameters, name, namelen - * mds_num etc. - * \param[in] fid object FID used to locate MDS. - * - * retval pointer to the lmv_tgt_desc if succeed. - * ERR_PTR(errno) if failed. - */ -struct lmv_tgt_desc* -lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, - struct lu_fid *fid) -{ - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct lmv_tgt_desc *tgt; - - /* - * During creating VOLATILE file, it should honor the mdt - * index if the file under striped dir is being restored, see - * ct_restore(). - */ - if (op_data->op_bias & MDS_CREATE_VOLATILE && - (int)op_data->op_mds != -1) { - int i; - - tgt = lmv_get_target(lmv, op_data->op_mds, NULL); - if (IS_ERR(tgt)) - return tgt; - - if (lsm) { - /* refill the right parent fid */ - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - struct lmv_oinfo *oinfo; - - oinfo = &lsm->lsm_md_oinfo[i]; - if (oinfo->lmo_mds == op_data->op_mds) { - *fid = oinfo->lmo_fid; - break; - } - } - - if (i == lsm->lsm_md_stripe_count) - *fid = lsm->lsm_md_oinfo[0].lmo_fid; - } - - return tgt; - } - - if (!lsm || !op_data->op_namelen) { - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return tgt; - - op_data->op_mds = tgt->ltd_idx; - - return tgt; - } - - return lmv_locate_target_for_name(lmv, lsm, op_data->op_name, - op_data->op_namelen, fid, - &op_data->op_mds); -} - -static int lmv_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, - uid_t uid, gid_t gid, kernel_cap_t cap_effective, - __u64 rdev, struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - int rc; - - if (!lmv->desc.ld_active_tgt_count) - return -EIO; - - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - CDEBUG(D_INODE, "CREATE name '%.*s' on " DFID " -> mds #%x\n", - (int)op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1), op_data->op_mds); - - rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc) - return rc; - - if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) { - /* - * Send the create request to the MDT where the object - * will be located - */ - tgt = lmv_find_target(lmv, &op_data->op_fid2); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - op_data->op_mds = tgt->ltd_idx; - } else { - CDEBUG(D_CONFIG, "Server doesn't support striped dirs\n"); - } - - CDEBUG(D_INODE, "CREATE obj " DFID " -> mds #%x\n", - PFID(&op_data->op_fid1), op_data->op_mds); - - op_data->op_flags |= MF_MDC_CANCEL_FID1; - rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid, - cap_effective, rdev, request); - - if (rc == 0) { - if (!*request) - return rc; - CDEBUG(D_INODE, "Created - " DFID "\n", PFID(&op_data->op_fid2)); - } - return rc; -} - -static int -lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, struct md_op_data *op_data, - struct lustre_handle *lockh, __u64 extra_lock_flags) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - CDEBUG(D_INODE, "ENQUEUE on " DFID "\n", PFID(&op_data->op_fid1)); - - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - CDEBUG(D_INODE, "ENQUEUE on " DFID " -> mds #%u\n", - PFID(&op_data->op_fid1), tgt->ltd_idx); - - return md_enqueue(tgt->ltd_exp, einfo, policy, op_data, lockh, - extra_lock_flags); -} - -static int -lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **preq) -{ - struct ptlrpc_request *req = NULL; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - int rc; - - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - CDEBUG(D_INODE, "GETATTR_NAME for %*s on " DFID " -> mds #%u\n", - (int)op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1), tgt->ltd_idx); - - rc = md_getattr_name(tgt->ltd_exp, op_data, preq); - if (rc != 0) - return rc; - - body = req_capsule_server_get(&(*preq)->rq_pill, &RMF_MDT_BODY); - if (body->mbo_valid & OBD_MD_MDS) { - struct lu_fid rid = body->mbo_fid1; - - CDEBUG(D_INODE, "Request attrs for " DFID "\n", - PFID(&rid)); - - tgt = lmv_find_target(lmv, &rid); - if (IS_ERR(tgt)) { - ptlrpc_req_finished(*preq); - *preq = NULL; - return PTR_ERR(tgt); - } - - op_data->op_fid1 = rid; - op_data->op_valid |= OBD_MD_FLCROSSREF; - op_data->op_namelen = 0; - op_data->op_name = NULL; - rc = md_getattr_name(tgt->ltd_exp, op_data, &req); - ptlrpc_req_finished(*preq); - *preq = req; - } - - return rc; -} - -#define md_op_data_fid(op_data, fl) \ - (fl == MF_MDC_CANCEL_FID1 ? &op_data->op_fid1 : \ - fl == MF_MDC_CANCEL_FID2 ? &op_data->op_fid2 : \ - fl == MF_MDC_CANCEL_FID3 ? &op_data->op_fid3 : \ - fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \ - NULL) - -static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt, - struct md_op_data *op_data, int op_tgt, - enum ldlm_mode mode, int bits, int flag) -{ - struct lu_fid *fid = md_op_data_fid(op_data, flag); - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - union ldlm_policy_data policy = { { 0 } }; - int rc = 0; - - if (!fid_is_sane(fid)) - return 0; - - if (!tgt) { - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - } - - if (tgt->ltd_idx != op_tgt) { - CDEBUG(D_INODE, "EARLY_CANCEL on " DFID "\n", PFID(fid)); - policy.l_inodebits.bits = bits; - rc = md_cancel_unused(tgt->ltd_exp, fid, &policy, - mode, LCF_ASYNC, NULL); - } else { - CDEBUG(D_INODE, - "EARLY_CANCEL skip operation target %d on " DFID "\n", - op_tgt, PFID(fid)); - op_data->op_flags |= flag; - rc = 0; - } - - return rc; -} - -/* - * llite passes fid of an target inode in op_data->op_fid1 and id of directory in - * op_data->op_fid2 - */ -static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - int rc; - - LASSERT(op_data->op_namelen != 0); - - CDEBUG(D_INODE, "LINK " DFID ":%*s to " DFID "\n", - PFID(&op_data->op_fid2), (int)op_data->op_namelen, - op_data->op_name, PFID(&op_data->op_fid1)); - - op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); - op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); - op_data->op_cap = current_cap(); - if (op_data->op_mea2) { - struct lmv_stripe_md *lsm = op_data->op_mea2; - const struct lmv_oinfo *oinfo; - - oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name, - op_data->op_namelen); - if (IS_ERR(oinfo)) - return PTR_ERR(oinfo); - - op_data->op_fid2 = oinfo->lmo_fid; - } - - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - /* - * Cancel UPDATE lock on child (fid1). - */ - op_data->op_flags |= MF_MDC_CANCEL_FID2; - rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX, - MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1); - if (rc != 0) - return rc; - - return md_link(tgt->ltd_exp, op_data, request); -} - -static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, - const char *new, size_t newlen, - struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct obd_export *target_exp; - struct lmv_tgt_desc *src_tgt; - struct lmv_tgt_desc *tgt_tgt; - struct mdt_body *body; - int rc; - - LASSERT(oldlen != 0); - - CDEBUG(D_INODE, "RENAME %.*s in " DFID ":%d to %.*s in " DFID ":%d\n", - (int)oldlen, old, PFID(&op_data->op_fid1), - op_data->op_mea1 ? op_data->op_mea1->lsm_md_stripe_count : 0, - (int)newlen, new, PFID(&op_data->op_fid2), - op_data->op_mea2 ? op_data->op_mea2->lsm_md_stripe_count : 0); - - op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); - op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); - op_data->op_cap = current_cap(); - - if (op_data->op_cli_flags & CLI_MIGRATE) { - LASSERTF(fid_is_sane(&op_data->op_fid3), "invalid FID " DFID "\n", - PFID(&op_data->op_fid3)); - - if (op_data->op_mea1) { - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct lmv_tgt_desc *tmp; - - /* Fix the parent fid for striped dir */ - tmp = lmv_locate_target_for_name(lmv, lsm, old, - oldlen, - &op_data->op_fid1, - NULL); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - } - - rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc) - return rc; - src_tgt = lmv_find_target(lmv, &op_data->op_fid3); - if (IS_ERR(src_tgt)) - return PTR_ERR(src_tgt); - - target_exp = src_tgt->ltd_exp; - } else { - if (op_data->op_mea1) { - struct lmv_stripe_md *lsm = op_data->op_mea1; - - src_tgt = lmv_locate_target_for_name(lmv, lsm, old, - oldlen, - &op_data->op_fid1, - &op_data->op_mds); - } else { - src_tgt = lmv_find_target(lmv, &op_data->op_fid1); - } - if (IS_ERR(src_tgt)) - return PTR_ERR(src_tgt); - - if (op_data->op_mea2) { - struct lmv_stripe_md *lsm = op_data->op_mea2; - - tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new, - newlen, - &op_data->op_fid2, - &op_data->op_mds); - } else { - tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2); - } - if (IS_ERR(tgt_tgt)) - return PTR_ERR(tgt_tgt); - - target_exp = tgt_tgt->ltd_exp; - } - - /* - * LOOKUP lock on src child (fid3) should also be cancelled for - * src_tgt in mdc_rename. - */ - op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; - - /* - * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its - * own target. - */ - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_UPDATE, - MF_MDC_CANCEL_FID2); - if (rc) - return rc; - /* - * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt. - */ - if (fid_is_sane(&op_data->op_fid3)) { - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - /* Cancel LOOKUP lock on its parent */ - rc = lmv_early_cancel(exp, tgt, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_LOOKUP, - MF_MDC_CANCEL_FID3); - if (rc) - return rc; - - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_FULL, - MF_MDC_CANCEL_FID3); - if (rc) - return rc; - } - -retry_rename: - /* - * Cancel all the locks on tgt child (fid4). - */ - if (fid_is_sane(&op_data->op_fid4)) { - struct lmv_tgt_desc *tgt; - - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_FULL, - MF_MDC_CANCEL_FID4); - if (rc) - return rc; - - tgt = lmv_find_target(lmv, &op_data->op_fid4); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - /* - * Since the target child might be destroyed, and it might - * become orphan, and we can only check orphan on the local - * MDT right now, so we send rename request to the MDT where - * target child is located. If target child does not exist, - * then it will send the request to the target parent - */ - target_exp = tgt->ltd_exp; - } - - rc = md_rename(target_exp, op_data, old, oldlen, new, newlen, request); - if (rc && rc != -EREMOTE) - return rc; - - body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - /* Not cross-ref case, just get out of here. */ - if (likely(!(body->mbo_valid & OBD_MD_MDS))) - return rc; - - CDEBUG(D_INODE, "%s: try rename to another MDT for " DFID "\n", - exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); - - op_data->op_fid4 = body->mbo_fid1; - ptlrpc_req_finished(*request); - *request = NULL; - goto retry_rename; -} - -static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - CDEBUG(D_INODE, "SETATTR for " DFID ", valid 0x%x\n", - PFID(&op_data->op_fid1), op_data->op_attr.ia_valid); - - op_data->op_flags |= MF_MDC_CANCEL_FID1; - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_setattr(tgt->ltd_exp, op_data, ea, ealen, request); -} - -static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_sync(tgt->ltd_exp, fid, request); -} - -/** - * Get current minimum entry from striped directory - * - * This function will search the dir entry, whose hash value is the - * closest(>=) to @hash_offset, from all of sub-stripes, and it is - * only being called for striped directory. - * - * \param[in] exp export of LMV - * \param[in] op_data parameters transferred beween client MD stack - * stripe_information will be included in this - * parameter - * \param[in] cb_op ldlm callback being used in enqueue in - * mdc_read_page - * \param[in] hash_offset the hash value, which is used to locate - * minum(closet) dir entry - * \param[in|out] stripe_offset the caller use this to indicate the stripe - * index of last entry, so to avoid hash conflict - * between stripes. It will also be used to - * return the stripe index of current dir entry. - * \param[in|out] entp the minum entry and it also is being used - * to input the last dir entry to resolve the - * hash conflict - * - * \param[out] ppage the page which holds the minum entry - * - * \retval = 0 get the entry successfully - * negative errno (< 0) does not get the entry - */ -static int lmv_get_min_striped_entry(struct obd_export *exp, - struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 hash_offset, int *stripe_offset, - struct lu_dirent **entp, - struct page **ppage) -{ - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lu_dirent *min_ent = NULL; - struct page *min_page = NULL; - struct lmv_tgt_desc *tgt; - int stripe_count; - int min_idx = 0; - int rc = 0; - int i; - - stripe_count = lsm->lsm_md_stripe_count; - for (i = 0; i < stripe_count; i++) { - __u64 stripe_hash = hash_offset; - struct lu_dirent *ent = NULL; - struct page *page = NULL; - struct lu_dirpage *dp; - - tgt = lmv_get_target(lmv, lsm->lsm_md_oinfo[i].lmo_mds, NULL); - if (IS_ERR(tgt)) { - rc = PTR_ERR(tgt); - goto out; - } - - /* - * op_data will be shared by each stripe, so we need - * reset these value for each stripe - */ - op_data->op_fid1 = lsm->lsm_md_oinfo[i].lmo_fid; - op_data->op_fid2 = lsm->lsm_md_oinfo[i].lmo_fid; - op_data->op_data = lsm->lsm_md_oinfo[i].lmo_root; -next: - rc = md_read_page(tgt->ltd_exp, op_data, cb_op, stripe_hash, - &page); - if (rc) - goto out; - - dp = page_address(page); - for (ent = lu_dirent_start(dp); ent; - ent = lu_dirent_next(ent)) { - /* Skip dummy entry */ - if (!le16_to_cpu(ent->lde_namelen)) - continue; - - if (le64_to_cpu(ent->lde_hash) < hash_offset) - continue; - - if (le64_to_cpu(ent->lde_hash) == hash_offset && - (*entp == ent || i < *stripe_offset)) - continue; - - /* skip . and .. for other stripes */ - if (i && (!strncmp(ent->lde_name, ".", - le16_to_cpu(ent->lde_namelen)) || - !strncmp(ent->lde_name, "..", - le16_to_cpu(ent->lde_namelen)))) - continue; - break; - } - - if (!ent) { - stripe_hash = le64_to_cpu(dp->ldp_hash_end); - - kunmap(page); - put_page(page); - page = NULL; - - /* - * reach the end of current stripe, go to next stripe - */ - if (stripe_hash == MDS_DIR_END_OFF) - continue; - else - goto next; - } - - if (min_ent) { - if (le64_to_cpu(min_ent->lde_hash) > - le64_to_cpu(ent->lde_hash)) { - min_ent = ent; - kunmap(min_page); - put_page(min_page); - min_idx = i; - min_page = page; - } else { - kunmap(page); - put_page(page); - page = NULL; - } - } else { - min_ent = ent; - min_page = page; - min_idx = i; - } - } - -out: - if (*ppage) { - kunmap(*ppage); - put_page(*ppage); - } - *stripe_offset = min_idx; - *entp = min_ent; - *ppage = min_page; - return rc; -} - -/** - * Build dir entry page from a striped directory - * - * This function gets one entry by @offset from a striped directory. It will - * read entries from all of stripes, and choose one closest to the required - * offset(&offset). A few notes - * 1. skip . and .. for non-zero stripes, because there can only have one . - * and .. in a directory. - * 2. op_data will be shared by all of stripes, instead of allocating new - * one, so need to restore before reusing. - * 3. release the entry page if that is not being chosen. - * - * \param[in] exp obd export refer to LMV - * \param[in] op_data hold those MD parameters of read_entry - * \param[in] cb_op ldlm callback being used in enqueue in mdc_read_entry - * \param[out] ldp the entry being read - * \param[out] ppage the page holding the entry. Note: because the entry - * will be accessed in upper layer, so we need hold the - * page until the usages of entry is finished, see - * ll_dir_entry_next. - * - * retval =0 if get entry successfully - * <0 cannot get entry - */ -static int lmv_read_striped_page(struct obd_export *exp, - struct md_op_data *op_data, - struct md_callback *cb_op, - __u64 offset, struct page **ppage) -{ - struct inode *master_inode = op_data->op_data; - struct lu_fid master_fid = op_data->op_fid1; - __u64 hash_offset = offset; - __u32 ldp_flags; - struct page *min_ent_page = NULL; - struct page *ent_page = NULL; - struct lu_dirent *min_ent = NULL; - struct lu_dirent *last_ent; - struct lu_dirent *ent; - struct lu_dirpage *dp; - size_t left_bytes; - int ent_idx = 0; - void *area; - int rc; - - /* - * Allocate a page and read entries from all of stripes and fill - * the page by hash order - */ - ent_page = alloc_page(GFP_KERNEL); - if (!ent_page) - return -ENOMEM; - - /* Initialize the entry page */ - dp = kmap(ent_page); - memset(dp, 0, sizeof(*dp)); - dp->ldp_hash_start = cpu_to_le64(offset); - ldp_flags = LDF_COLLIDE; - - area = dp + 1; - left_bytes = PAGE_SIZE - sizeof(*dp); - ent = area; - last_ent = ent; - do { - __u16 ent_size; - - /* Find the minum entry from all sub-stripes */ - rc = lmv_get_min_striped_entry(exp, op_data, cb_op, hash_offset, - &ent_idx, &min_ent, - &min_ent_page); - if (rc) - goto out; - - /* - * If it can not get minum entry, it means it already reaches - * the end of this directory - */ - if (!min_ent) { - last_ent->lde_reclen = 0; - hash_offset = MDS_DIR_END_OFF; - goto out; - } - - ent_size = le16_to_cpu(min_ent->lde_reclen); - - /* - * the last entry lde_reclen is 0, but it might not - * the end of this entry of this temporay entry - */ - if (!ent_size) - ent_size = lu_dirent_calc_size( - le16_to_cpu(min_ent->lde_namelen), - le32_to_cpu(min_ent->lde_attrs)); - if (ent_size > left_bytes) { - last_ent->lde_reclen = cpu_to_le16(0); - hash_offset = le64_to_cpu(min_ent->lde_hash); - goto out; - } - - memcpy(ent, min_ent, ent_size); - - /* - * Replace . with master FID and Replace .. with the parent FID - * of master object - */ - if (!strncmp(ent->lde_name, ".", - le16_to_cpu(ent->lde_namelen)) && - le16_to_cpu(ent->lde_namelen) == 1) - fid_cpu_to_le(&ent->lde_fid, &master_fid); - else if (!strncmp(ent->lde_name, "..", - le16_to_cpu(ent->lde_namelen)) && - le16_to_cpu(ent->lde_namelen) == 2) - fid_cpu_to_le(&ent->lde_fid, &op_data->op_fid3); - - left_bytes -= ent_size; - ent->lde_reclen = cpu_to_le16(ent_size); - last_ent = ent; - ent = (void *)ent + ent_size; - hash_offset = le64_to_cpu(min_ent->lde_hash); - if (hash_offset == MDS_DIR_END_OFF) { - last_ent->lde_reclen = 0; - break; - } - } while (1); -out: - if (min_ent_page) { - kunmap(min_ent_page); - put_page(min_ent_page); - } - - if (unlikely(rc)) { - __free_page(ent_page); - ent_page = NULL; - } else { - if (ent == area) - ldp_flags |= LDF_EMPTY; - dp->ldp_flags |= cpu_to_le32(ldp_flags); - dp->ldp_hash_end = cpu_to_le64(hash_offset); - } - - /* - * We do not want to allocate md_op_data during each - * dir entry reading, so op_data will be shared by every stripe, - * then we need to restore it back to original value before - * return to the upper layer - */ - op_data->op_fid1 = master_fid; - op_data->op_fid2 = master_fid; - op_data->op_data = master_inode; - - *ppage = ent_page; - - return rc; -} - -static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, __u64 offset, - struct page **ppage) -{ - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - if (unlikely(lsm)) - return lmv_read_striped_page(exp, op_data, cb_op, offset, ppage); - - tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_read_page(tgt->ltd_exp, op_data, cb_op, offset, ppage); -} - -/** - * Unlink a file/directory - * - * Unlink a file or directory under the parent dir. The unlink request - * usually will be sent to the MDT where the child is located, but if - * the client does not have the child FID then request will be sent to the - * MDT where the parent is located. - * - * If the parent is a striped directory then it also needs to locate which - * stripe the name of the child is located, and replace the parent FID - * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown, - * it will walk through all of sub-stripes until the child is being - * unlinked finally. - * - * \param[in] exp export refer to LMV - * \param[in] op_data different parameters transferred beween client - * MD stacks, name, namelen, FIDs etc. - * op_fid1 is the parent FID, op_fid2 is the child - * FID. - * \param[out] request point to the request of unlink. - * - * retval 0 if succeed - * negative errno if failed. - */ -static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *parent_tgt = NULL; - struct lmv_tgt_desc *tgt = NULL; - struct mdt_body *body; - int stripe_index = 0; - int rc; - -retry_unlink: - /* For striped dir, we need to locate the parent as well */ - if (lsm) { - struct lmv_tgt_desc *tmp; - - LASSERT(op_data->op_name && op_data->op_namelen); - - tmp = lmv_locate_target_for_name(lmv, lsm, - op_data->op_name, - op_data->op_namelen, - &op_data->op_fid1, - &op_data->op_mds); - - /* - * return -EBADFD means unknown hash type, might - * need try all sub-stripe here - */ - if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD) - return PTR_ERR(tmp); - - /* - * Note: both migrating dir and unknown hash dir need to - * try all of sub-stripes, so we need start search the - * name from stripe 0, but migrating dir is already handled - * inside lmv_locate_target_for_name(), so we only check - * unknown hash type directory here - */ - if (!lmv_is_known_hash_type(lsm->lsm_md_hash_type)) { - struct lmv_oinfo *oinfo; - - oinfo = &lsm->lsm_md_oinfo[stripe_index]; - - op_data->op_fid1 = oinfo->lmo_fid; - op_data->op_mds = oinfo->lmo_mds; - } - } - -try_next_stripe: - /* Send unlink requests to the MDT where the child is located */ - if (likely(!fid_is_zero(&op_data->op_fid2))) - tgt = lmv_find_target(lmv, &op_data->op_fid2); - else if (lsm) - tgt = lmv_get_target(lmv, op_data->op_mds, NULL); - else - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); - op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); - op_data->op_cap = current_cap(); - - /* - * If child's fid is given, cancel unused locks for it if it is from - * another export than parent. - * - * LOOKUP lock for child (fid3) should also be cancelled on parent - * tgt_tgt in mdc_unlink(). - */ - op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; - - /* - * Cancel FULL locks on child (fid3). - */ - parent_tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(parent_tgt)) - return PTR_ERR(parent_tgt); - - if (parent_tgt != tgt) { - rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_LOOKUP, - MF_MDC_CANCEL_FID3); - } - - rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX, - MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3); - if (rc != 0) - return rc; - - CDEBUG(D_INODE, "unlink with fid=" DFID "/" DFID " -> mds #%u\n", - PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx); - - rc = md_unlink(tgt->ltd_exp, op_data, request); - if (rc != 0 && rc != -EREMOTE && rc != -ENOENT) - return rc; - - /* Try next stripe if it is needed. */ - if (rc == -ENOENT && lsm && lmv_need_try_all_stripes(lsm)) { - struct lmv_oinfo *oinfo; - - stripe_index++; - if (stripe_index >= lsm->lsm_md_stripe_count) - return rc; - - oinfo = &lsm->lsm_md_oinfo[stripe_index]; - - op_data->op_fid1 = oinfo->lmo_fid; - op_data->op_mds = oinfo->lmo_mds; - - ptlrpc_req_finished(*request); - *request = NULL; - - goto try_next_stripe; - } - - body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - /* Not cross-ref case, just get out of here. */ - if (likely(!(body->mbo_valid & OBD_MD_MDS))) - return rc; - - CDEBUG(D_INODE, "%s: try unlink to another MDT for " DFID "\n", - exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); - - /* This is a remote object, try remote MDT, Note: it may - * try more than 1 time here, Considering following case - * /mnt/lustre is root on MDT0, remote1 is on MDT1 - * 1. Initially A does not know where remote1 is, it send - * unlink RPC to MDT0, MDT0 return -EREMOTE, it will - * resend unlink RPC to MDT1 (retry 1st time). - * - * 2. During the unlink RPC in flight, - * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2 - * and create new remote1, but on MDT0 - * - * 3. MDT1 get unlink RPC(from A), then do remote lock on - * /mnt/lustre, then lookup get fid of remote1, and find - * it is remote dir again, and replay -EREMOTE again. - * - * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times). - * - * In theory, it might try unlimited time here, but it should - * be very rare case. - */ - op_data->op_fid2 = body->mbo_fid1; - ptlrpc_req_finished(*request); - *request = NULL; - - goto retry_unlink; -} - -static int lmv_precleanup(struct obd_device *obd) -{ - fld_client_debugfs_fini(&obd->u.lmv.lmv_fld); - lprocfs_obd_cleanup(obd); - return 0; -} - -/** - * Get by key a value associated with a LMV device. - * - * Dispatch request to lower-layer devices as needed. - * - * \param[in] env execution environment for this thread - * \param[in] exp export for the LMV device - * \param[in] keylen length of key identifier - * \param[in] key identifier of key to get value for - * \param[in] vallen size of \a val - * \param[out] val pointer to storage location for value - * - * \retval 0 on success - * \retval negative negated errno on failure - */ -static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val) -{ - struct obd_device *obd; - struct lmv_obd *lmv; - int rc = 0; - - obd = class_exp2obd(exp); - if (!obd) { - CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n", - exp->exp_handle.h_cookie); - return -EINVAL; - } - - lmv = &obd->u.lmv; - if (keylen >= strlen("remote_flag") && !strcmp(key, "remote_flag")) { - int i; - - LASSERT(*vallen == sizeof(__u32)); - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - struct lmv_tgt_desc *tgt = lmv->tgts[i]; - - /* - * All tgts should be connected when this gets called. - */ - if (!tgt || !tgt->ltd_exp) - continue; - - if (!obd_get_info(env, tgt->ltd_exp, keylen, key, - vallen, val)) - return 0; - } - return -EINVAL; - } else if (KEY_IS(KEY_MAX_EASIZE) || - KEY_IS(KEY_DEFAULT_EASIZE) || - KEY_IS(KEY_CONN_DATA)) { - /* - * Forwarding this request to first MDS, it should know LOV - * desc. - */ - rc = obd_get_info(env, lmv->tgts[0]->ltd_exp, keylen, key, - vallen, val); - if (!rc && KEY_IS(KEY_CONN_DATA)) - exp->exp_connect_data = *(struct obd_connect_data *)val; - return rc; - } else if (KEY_IS(KEY_TGT_COUNT)) { - *((int *)val) = lmv->desc.ld_tgt_count; - return 0; - } - - CDEBUG(D_IOCTL, "Invalid key\n"); - return -EINVAL; -} - -/** - * Asynchronously set by key a value associated with a LMV device. - * - * Dispatch request to lower-layer devices as needed. - * - * \param[in] env execution environment for this thread - * \param[in] exp export for the LMV device - * \param[in] keylen length of key identifier - * \param[in] key identifier of key to store value for - * \param[in] vallen size of value to store - * \param[in] val pointer to data to be stored - * \param[in] set optional list of related ptlrpc requests - * - * \retval 0 on success - * \retval negative negated errno on failure - */ -static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, u32 vallen, - void *val, struct ptlrpc_request_set *set) -{ - struct lmv_tgt_desc *tgt; - struct obd_device *obd; - struct lmv_obd *lmv; - int rc = 0; - - obd = class_exp2obd(exp); - if (!obd) { - CDEBUG(D_IOCTL, "Invalid client cookie %#llx\n", - exp->exp_handle.h_cookie); - return -EINVAL; - } - lmv = &obd->u.lmv; - - if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) || - KEY_IS(KEY_DEFAULT_EASIZE)) { - int i, err = 0; - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp) - continue; - - err = obd_set_info_async(env, tgt->ltd_exp, - keylen, key, vallen, val, set); - if (err && rc == 0) - rc = err; - } - - return rc; - } - - return -EINVAL; -} - -static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, - const struct lmv_mds_md_v1 *lmm1) -{ - struct lmv_obd *lmv = &exp->exp_obd->u.lmv; - int stripe_count; - int rc = 0; - int cplen; - int i; - - lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic); - lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count); - lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index); - if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE)) - lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN; - else - lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type); - lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version); - cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, - sizeof(lsm->lsm_md_pool_name)); - - if (cplen >= sizeof(lsm->lsm_md_pool_name)) - return -E2BIG; - - CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %d layout_version %d\n", - lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index, - lsm->lsm_md_hash_type, lsm->lsm_md_layout_version); - - stripe_count = le32_to_cpu(lmm1->lmv_stripe_count); - for (i = 0; i < stripe_count; i++) { - fid_le_to_cpu(&lsm->lsm_md_oinfo[i].lmo_fid, - &lmm1->lmv_stripe_fids[i]); - rc = lmv_fld_lookup(lmv, &lsm->lsm_md_oinfo[i].lmo_fid, - &lsm->lsm_md_oinfo[i].lmo_mds); - if (rc) - return rc; - CDEBUG(D_INFO, "unpack fid #%d " DFID "\n", i, - PFID(&lsm->lsm_md_oinfo[i].lmo_fid)); - } - - return rc; -} - -static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, - const union lmv_mds_md *lmm, size_t lmm_size) -{ - struct lmv_stripe_md *lsm; - bool allocated = false; - int lsm_size, rc; - - LASSERT(lsmp); - - lsm = *lsmp; - /* Free memmd */ - if (lsm && !lmm) { - int i; - - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - /* - * For migrating inode, the master stripe and master - * object will be the same, so do not need iput, see - * ll_update_lsm_md - */ - if (!(lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && - !i) && lsm->lsm_md_oinfo[i].lmo_root) - iput(lsm->lsm_md_oinfo[i].lmo_root); - } - - kvfree(lsm); - *lsmp = NULL; - return 0; - } - - if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE) - return -EPERM; - - /* Unpack memmd */ - if (le32_to_cpu(lmm->lmv_magic) != LMV_MAGIC_V1 && - le32_to_cpu(lmm->lmv_magic) != LMV_USER_MAGIC) { - CERROR("%s: invalid lmv magic %x: rc = %d\n", - exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic), - -EIO); - return -EIO; - } - - if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_V1) - lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm)); - else - /** - * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md, - * stripecount should be 0 then. - */ - lsm_size = lmv_stripe_md_size(0); - - if (!lsm) { - lsm = kvzalloc(lsm_size, GFP_NOFS); - if (!lsm) - return -ENOMEM; - allocated = true; - *lsmp = lsm; - } - - switch (le32_to_cpu(lmm->lmv_magic)) { - case LMV_MAGIC_V1: - rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1); - break; - default: - CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name, - le32_to_cpu(lmm->lmv_magic)); - rc = -EINVAL; - break; - } - - if (rc && allocated) { - kvfree(lsm); - *lsmp = NULL; - lsm_size = rc; - } - return lsm_size; -} - -void lmv_free_memmd(struct lmv_stripe_md *lsm) -{ - lmv_unpackmd(NULL, &lsm, NULL, 0); -} -EXPORT_SYMBOL(lmv_free_memmd); - -static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - union ldlm_policy_data *policy, - enum ldlm_mode mode, enum ldlm_cancel_flags flags, - void *opaque) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - int rc = 0; - int err; - u32 i; - - LASSERT(fid); - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - struct lmv_tgt_desc *tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) - continue; - - err = md_cancel_unused(tgt->ltd_exp, fid, policy, mode, flags, - opaque); - if (!rc) - rc = err; - } - return rc; -} - -static int lmv_set_lock_data(struct obd_export *exp, - const struct lustre_handle *lockh, - void *data, __u64 *bits) -{ - struct lmv_obd *lmv = &exp->exp_obd->u.lmv; - struct lmv_tgt_desc *tgt = lmv->tgts[0]; - - if (!tgt || !tgt->ltd_exp) - return -EINVAL; - - return md_set_lock_data(tgt->ltd_exp, lockh, data, bits); -} - -static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags, - const struct lu_fid *fid, - enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - enum ldlm_mode rc; - int tgt; - u32 i; - - CDEBUG(D_INODE, "Lock match for " DFID "\n", PFID(fid)); - - /* - * With DNE every object can have two locks in different namespaces: - * lookup lock in space of MDT storing direntry and update/open lock in - * space of MDT storing inode. Try the MDT that the FID maps to first, - * since this can be easily found, and only try others if that fails. - */ - for (i = 0, tgt = lmv_find_target_index(lmv, fid); - i < lmv->desc.ld_tgt_count; - i++, tgt = (tgt + 1) % lmv->desc.ld_tgt_count) { - if (tgt < 0) { - CDEBUG(D_HA, "%s: " DFID " is inaccessible: rc = %d\n", - obd->obd_name, PFID(fid), tgt); - tgt = 0; - } - - if (!lmv->tgts[tgt] || !lmv->tgts[tgt]->ltd_exp || - !lmv->tgts[tgt]->ltd_active) - continue; - - rc = md_lock_match(lmv->tgts[tgt]->ltd_exp, flags, fid, - type, policy, mode, lockh); - if (rc) - return rc; - } - - return 0; -} - -static int lmv_get_lustre_md(struct obd_export *exp, - struct ptlrpc_request *req, - struct obd_export *dt_exp, - struct obd_export *md_exp, - struct lustre_md *md) -{ - struct lmv_obd *lmv = &exp->exp_obd->u.lmv; - struct lmv_tgt_desc *tgt = lmv->tgts[0]; - - if (!tgt || !tgt->ltd_exp) - return -EINVAL; - return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md); -} - -static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt = lmv->tgts[0]; - - if (md->lmv) { - lmv_free_memmd(md->lmv); - md->lmv = NULL; - } - if (!tgt || !tgt->ltd_exp) - return -EINVAL; - return md_free_lustre_md(tgt->ltd_exp, md); -} - -static int lmv_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct lookup_intent *it) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &och->och_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_set_open_replay_data(tgt->ltd_exp, och, it); -} - -static int lmv_clear_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &och->och_fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_clear_open_replay_data(tgt->ltd_exp, och); -} - -static int lmv_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo) -{ - struct md_op_data *op_data = &minfo->mi_data; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *ptgt = NULL; - struct lmv_tgt_desc *ctgt = NULL; - - if (!fid_is_sane(&op_data->op_fid2)) - return -EINVAL; - - ptgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); - if (IS_ERR(ptgt)) - return PTR_ERR(ptgt); - - ctgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); - if (IS_ERR(ctgt)) - return PTR_ERR(ctgt); - - /* - * if child is on remote MDT, we need 2 async RPCs to fetch both LOOKUP - * lock on parent, and UPDATE lock on child MDT, which makes all - * complicated. Considering remote dir is rare case, and not supporting - * it in statahead won't cause any issue, drop its support for now. - */ - if (ptgt != ctgt) - return -ENOTSUPP; - - return md_intent_getattr_async(ptgt->ltd_exp, minfo); -} - -static int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, - struct lu_fid *fid, __u64 *bits) -{ - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return PTR_ERR(tgt); - - return md_revalidate_lock(tgt->ltd_exp, it, fid, bits); -} - -static int -lmv_get_fid_from_lsm(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - const char *name, int namelen, struct lu_fid *fid) -{ - const struct lmv_oinfo *oinfo; - - LASSERT(lsm); - oinfo = lsm_name_to_stripe_info(lsm, name, namelen); - if (IS_ERR(oinfo)) - return PTR_ERR(oinfo); - - *fid = oinfo->lmo_fid; - - return 0; -} - -/** - * For lmv, only need to send request to master MDT, and the master MDT will - * process with other slave MDTs. The only exception is Q_GETOQUOTA for which - * we directly fetch data from the slave MDTs. - */ -static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt = lmv->tgts[0]; - int rc = 0; - __u64 curspace = 0, curinodes = 0; - u32 i; - - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active || - !lmv->desc.ld_tgt_count) { - CERROR("master lmv inactive\n"); - return -EIO; - } - - if (oqctl->qc_cmd != Q_GETOQUOTA) - return obd_quotactl(tgt->ltd_exp, oqctl); - - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - int err; - - tgt = lmv->tgts[i]; - - if (!tgt || !tgt->ltd_exp || !tgt->ltd_active) - continue; - - err = obd_quotactl(tgt->ltd_exp, oqctl); - if (err) { - CERROR("getquota on mdt %d failed. %d\n", i, err); - if (!rc) - rc = err; - } else { - curspace += oqctl->qc_dqblk.dqb_curspace; - curinodes += oqctl->qc_dqblk.dqb_curinodes; - } - } - oqctl->qc_dqblk.dqb_curspace = curspace; - oqctl->qc_dqblk.dqb_curinodes = curinodes; - - return rc; -} - -static int lmv_merge_attr(struct obd_export *exp, - const struct lmv_stripe_md *lsm, - struct cl_attr *attr, - ldlm_blocking_callback cb_blocking) -{ - int rc, i; - - rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0); - if (rc < 0) - return rc; - - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root; - - CDEBUG(D_INFO, "" DFID " size %llu, blocks %llu nlink %u, atime %lu ctime %lu, mtime %lu.\n", - PFID(&lsm->lsm_md_oinfo[i].lmo_fid), - i_size_read(inode), (unsigned long long)inode->i_blocks, - inode->i_nlink, LTIME_S(inode->i_atime), - LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime)); - - /* for slave stripe, it needs to subtract nlink for . and .. */ - if (i) - attr->cat_nlink += inode->i_nlink - 2; - else - attr->cat_nlink = inode->i_nlink; - - attr->cat_size += i_size_read(inode); - attr->cat_blocks += inode->i_blocks; - - if (attr->cat_atime < LTIME_S(inode->i_atime)) - attr->cat_atime = LTIME_S(inode->i_atime); - - if (attr->cat_ctime < LTIME_S(inode->i_ctime)) - attr->cat_ctime = LTIME_S(inode->i_ctime); - - if (attr->cat_mtime < LTIME_S(inode->i_mtime)) - attr->cat_mtime = LTIME_S(inode->i_mtime); - } - return 0; -} - -static struct obd_ops lmv_obd_ops = { - .owner = THIS_MODULE, - .setup = lmv_setup, - .cleanup = lmv_cleanup, - .precleanup = lmv_precleanup, - .process_config = lmv_process_config, - .connect = lmv_connect, - .disconnect = lmv_disconnect, - .statfs = lmv_statfs, - .get_info = lmv_get_info, - .set_info_async = lmv_set_info_async, - .notify = lmv_notify, - .get_uuid = lmv_get_uuid, - .iocontrol = lmv_iocontrol, - .quotactl = lmv_quotactl -}; - -static struct md_ops lmv_md_ops = { - .getstatus = lmv_getstatus, - .null_inode = lmv_null_inode, - .close = lmv_close, - .create = lmv_create, - .enqueue = lmv_enqueue, - .getattr = lmv_getattr, - .getxattr = lmv_getxattr, - .getattr_name = lmv_getattr_name, - .intent_lock = lmv_intent_lock, - .link = lmv_link, - .rename = lmv_rename, - .setattr = lmv_setattr, - .setxattr = lmv_setxattr, - .sync = lmv_sync, - .read_page = lmv_read_page, - .unlink = lmv_unlink, - .init_ea_size = lmv_init_ea_size, - .cancel_unused = lmv_cancel_unused, - .set_lock_data = lmv_set_lock_data, - .lock_match = lmv_lock_match, - .get_lustre_md = lmv_get_lustre_md, - .free_lustre_md = lmv_free_lustre_md, - .merge_attr = lmv_merge_attr, - .set_open_replay_data = lmv_set_open_replay_data, - .clear_open_replay_data = lmv_clear_open_replay_data, - .intent_getattr_async = lmv_intent_getattr_async, - .revalidate_lock = lmv_revalidate_lock, - .get_fid_from_lsm = lmv_get_fid_from_lsm, - .unpackmd = lmv_unpackmd, -}; - -static int __init lmv_init(void) -{ - struct lprocfs_static_vars lvars; - int rc; - - lprocfs_lmv_init_vars(&lvars); - - rc = libcfs_setup(); - if (rc) - return rc; - - return class_register_type(&lmv_obd_ops, &lmv_md_ops, - LUSTRE_LMV_NAME, NULL); -} - -static void lmv_exit(void) -{ - class_unregister_type(LUSTRE_LMV_NAME); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Logical Metadata Volume"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(lmv_init); -module_exit(lmv_exit); diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c deleted file mode 100644 index 30727b7acccc..000000000000 --- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include "lmv_internal.h" - -static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct lmv_desc *desc; - - desc = &dev->u.lmv.desc; - return sprintf(buf, "%u\n", desc->ld_tgt_count); -} -LUSTRE_RO_ATTR(numobd); - -static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct lmv_desc *desc; - - desc = &dev->u.lmv.desc; - return sprintf(buf, "%u\n", desc->ld_active_tgt_count); -} -LUSTRE_RO_ATTR(activeobd); - -static int lmv_desc_uuid_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lmv_obd *lmv; - - LASSERT(dev); - lmv = &dev->u.lmv; - seq_printf(m, "%s\n", lmv->desc.ld_uuid.uuid); - return 0; -} - -LPROC_SEQ_FOPS_RO(lmv_desc_uuid); - -static void *lmv_tgt_seq_start(struct seq_file *p, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lmv_obd *lmv = &dev->u.lmv; - - while (*pos < lmv->tgts_size) { - if (lmv->tgts[*pos]) - return lmv->tgts[*pos]; - ++*pos; - } - - return NULL; -} - -static void lmv_tgt_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *lmv_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lmv_obd *lmv = &dev->u.lmv; - - ++*pos; - while (*pos < lmv->tgts_size) { - if (lmv->tgts[*pos]) - return lmv->tgts[*pos]; - ++*pos; - } - - return NULL; -} - -static int lmv_tgt_seq_show(struct seq_file *p, void *v) -{ - struct lmv_tgt_desc *tgt = v; - - if (!tgt) - return 0; - seq_printf(p, "%u: %s %sACTIVE\n", - tgt->ltd_idx, tgt->ltd_uuid.uuid, - tgt->ltd_active ? "" : "IN"); - return 0; -} - -static const struct seq_operations lmv_tgt_sops = { - .start = lmv_tgt_seq_start, - .stop = lmv_tgt_seq_stop, - .next = lmv_tgt_seq_next, - .show = lmv_tgt_seq_show, -}; - -static int lmv_target_seq_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int rc; - - rc = seq_open(file, &lmv_tgt_sops); - if (rc) - return rc; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; -} - -static struct lprocfs_vars lprocfs_lmv_obd_vars[] = { - { "desc_uuid", &lmv_desc_uuid_fops, NULL, 0 }, - { NULL } -}; - -const struct file_operations lmv_proc_target_fops = { - .owner = THIS_MODULE, - .open = lmv_target_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static struct attribute *lmv_attrs[] = { - &lustre_attr_activeobd.attr, - &lustre_attr_numobd.attr, - NULL, -}; - -static const struct attribute_group lmv_attr_group = { - .attrs = lmv_attrs, -}; - -void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->sysfs_vars = &lmv_attr_group; - lvars->obd_vars = lprocfs_lmv_obd_vars; -} diff --git a/drivers/staging/lustre/lustre/lov/Makefile b/drivers/staging/lustre/lustre/lov/Makefile deleted file mode 100644 index 1ebf0193f61a..000000000000 --- a/drivers/staging/lustre/lustre/lov/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += lov.o -lov-y := lov_obd.o lov_pack.o lov_offset.o lov_merge.o \ - lov_request.o lov_ea.o lov_dev.o lov_object.o lov_page.o \ - lov_lock.o lov_io.o lovsub_dev.o lovsub_object.o lovsub_page.o \ - lovsub_lock.o lov_pool.o lproc_lov.o diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h deleted file mode 100644 index e4f762137a4a..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h +++ /dev/null @@ -1,639 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Internal interfaces of LOV layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#ifndef LOV_CL_INTERNAL_H -#define LOV_CL_INTERNAL_H - -#include -#include -#include "lov_internal.h" - -/** \defgroup lov lov - * Logical object volume layer. This layer implements data striping (raid0). - * - * At the lov layer top-entity (object, page, lock, io) is connected to one or - * more sub-entities: top-object, representing a file is connected to a set of - * sub-objects, each representing a stripe, file-level top-lock is connected - * to a set of per-stripe sub-locks, top-page is connected to a (single) - * sub-page, and a top-level IO is connected to a set of (potentially - * concurrent) sub-IO's. - * - * Sub-object, sub-page, and sub-io have well-defined top-object and top-page - * respectively, while a single sub-lock can be part of multiple top-locks. - * - * Reference counting models are different for different types of entities: - * - * - top-object keeps a reference to its sub-objects, and destroys them - * when it is destroyed. - * - * - top-page keeps a reference to its sub-page, and destroys it when it - * is destroyed. - * - * - IO's are not reference counted. - * - * To implement a connection between top and sub entities, lov layer is split - * into two pieces: lov ("upper half"), and lovsub ("bottom half"), both - * implementing full set of cl-interfaces. For example, top-object has vvp and - * lov layers, and it's sub-object has lovsub and osc layers. lovsub layer is - * used to track child-parent relationship. - * - * @{ - */ - -struct lovsub_device; -struct lovsub_object; -struct lovsub_lock; - -enum lov_device_flags { - LOV_DEV_INITIALIZED = 1 << 0 -}; - -/* - * Upper half. - */ - -struct lov_device { - /* - * XXX Locking of lov-private data is missing. - */ - struct cl_device ld_cl; - struct lov_obd *ld_lov; - /** size of lov_device::ld_target[] array */ - __u32 ld_target_nr; - struct lovsub_device **ld_target; - __u32 ld_flags; -}; - -/** - * Layout type. - */ -enum lov_layout_type { - LLT_EMPTY, /** empty file without body (mknod + truncate) */ - LLT_RAID0, /** striped file */ - LLT_RELEASED, /** file with no objects (data in HSM) */ - LLT_NR -}; - -static inline char *llt2str(enum lov_layout_type llt) -{ - switch (llt) { - case LLT_EMPTY: - return "EMPTY"; - case LLT_RAID0: - return "RAID0"; - case LLT_RELEASED: - return "RELEASED"; - case LLT_NR: - LBUG(); - } - LBUG(); - return ""; -} - -/** - * lov-specific file state. - * - * lov object has particular layout type, determining how top-object is built - * on top of sub-objects. Layout type can change dynamically. When this - * happens, lov_object::lo_type_guard semaphore is taken in exclusive mode, - * all state pertaining to the old layout type is destroyed, and new state is - * constructed. All object methods take said semaphore in the shared mode, - * providing serialization against transition between layout types. - * - * To avoid multiple `if' or `switch' statements, selecting behavior for the - * current layout type, object methods perform double-dispatch, invoking - * function corresponding to the current layout type. - */ -struct lov_object { - struct cl_object lo_cl; - /** - * Serializes object operations with transitions between layout types. - * - * This semaphore is taken in shared mode by all object methods, and - * is taken in exclusive mode when object type is changed. - * - * \see lov_object::lo_type - */ - struct rw_semaphore lo_type_guard; - /** - * Type of an object. Protected by lov_object::lo_type_guard. - */ - enum lov_layout_type lo_type; - /** - * True if layout is invalid. This bit is cleared when layout lock - * is lost. - */ - bool lo_layout_invalid; - /** - * How many IOs are on going on this object. Layout can be changed - * only if there is no active IO. - */ - atomic_t lo_active_ios; - /** - * Waitq - wait for no one else is using lo_lsm - */ - wait_queue_head_t lo_waitq; - /** - * Layout metadata. NULL if empty layout. - */ - struct lov_stripe_md *lo_lsm; - - union lov_layout_state { - struct lov_layout_raid0 { - unsigned int lo_nr; - /** - * When this is true, lov_object::lo_attr contains - * valid up to date attributes for a top-level - * object. This field is reset to 0 when attributes of - * any sub-object change. - */ - int lo_attr_valid; - /** - * Array of sub-objects. Allocated when top-object is - * created (lov_init_raid0()). - * - * Top-object is a strict master of its sub-objects: - * it is created before them, and outlives its - * children (this later is necessary so that basic - * functions like cl_object_top() always - * work). Top-object keeps a reference on every - * sub-object. - * - * When top-object is destroyed (lov_delete_raid0()) - * it releases its reference to a sub-object and waits - * until the latter is finally destroyed. - */ - struct lovsub_object **lo_sub; - /** - * protect lo_sub - */ - spinlock_t lo_sub_lock; - /** - * Cached object attribute, built from sub-object - * attributes. - */ - struct cl_attr lo_attr; - } raid0; - struct lov_layout_state_empty { - } empty; - struct lov_layout_state_released { - } released; - } u; - /** - * Thread that acquired lov_object::lo_type_guard in an exclusive - * mode. - */ - struct task_struct *lo_owner; -}; - -/** - * State lov_lock keeps for each sub-lock. - */ -struct lov_lock_sub { - /** sub-lock itself */ - struct cl_lock sub_lock; - /** Set if the sublock has ever been enqueued, meaning it may - * hold resources of underlying layers - */ - unsigned int sub_is_enqueued:1, - sub_initialized:1; - int sub_stripe; -}; - -/** - * lov-specific lock state. - */ -struct lov_lock { - struct cl_lock_slice lls_cl; - /** Number of sub-locks in this lock */ - int lls_nr; - /** sublock array */ - struct lov_lock_sub lls_sub[0]; -}; - -struct lov_page { - struct cl_page_slice lps_cl; - unsigned int lps_stripe; /* stripe index */ -}; - -/* - * Bottom half. - */ - -struct lovsub_device { - struct cl_device acid_cl; - struct cl_device *acid_next; -}; - -struct lovsub_object { - struct cl_object_header lso_header; - struct cl_object lso_cl; - struct lov_object *lso_super; - int lso_index; -}; - -/** - * Lock state at lovsub layer. - */ -struct lovsub_lock { - struct cl_lock_slice lss_cl; -}; - -/** - * Describe the environment settings for sublocks. - */ -struct lov_sublock_env { - const struct lu_env *lse_env; - struct cl_io *lse_io; -}; - -struct lovsub_page { - struct cl_page_slice lsb_cl; -}; - -struct lov_thread_info { - struct cl_object_conf lti_stripe_conf; - struct lu_fid lti_fid; - struct ost_lvb lti_lvb; - struct cl_2queue lti_cl2q; - struct cl_page_list lti_plist; - wait_queue_entry_t lti_waiter; -}; - -/** - * State that lov_io maintains for every sub-io. - */ -struct lov_io_sub { - u16 sub_stripe; - /** - * environment's refcheck. - * - * \see cl_env_get() - */ - u16 sub_refcheck; - /** - * true, iff cl_io_init() was successfully executed against - * lov_io_sub::sub_io. - */ - u16 sub_io_initialized:1, - /** - * True, iff lov_io_sub::sub_io and lov_io_sub::sub_env weren't - * allocated, but borrowed from a per-device emergency pool. - */ - sub_borrowed:1; - /** - * Linkage into a list (hanging off lov_io::lis_active) of all - * sub-io's active for the current IO iteration. - */ - struct list_head sub_linkage; - /** - * sub-io for a stripe. Ideally sub-io's can be stopped and resumed - * independently, with lov acting as a scheduler to maximize overall - * throughput. - */ - struct cl_io *sub_io; - /** - * environment, in which sub-io executes. - */ - struct lu_env *sub_env; -}; - -/** - * IO state private for LOV. - */ -struct lov_io { - /** super-class */ - struct cl_io_slice lis_cl; - /** - * Pointer to the object slice. This is a duplicate of - * lov_io::lis_cl::cis_object. - */ - struct lov_object *lis_object; - /** - * Original end-of-io position for this IO, set by the upper layer as - * cl_io::u::ci_rw::pos + cl_io::u::ci_rw::count. lov remembers this, - * changes pos and count to fit IO into a single stripe and uses saved - * value to determine when IO iterations have to stop. - * - * This is used only for CIT_READ and CIT_WRITE io's. - */ - loff_t lis_io_endpos; - - /** - * starting position within a file, for the current io loop iteration - * (stripe), used by ci_io_loop(). - */ - u64 lis_pos; - /** - * end position with in a file, for the current stripe io. This is - * exclusive (i.e., next offset after last byte affected by io). - */ - u64 lis_endpos; - - int lis_stripe_count; - int lis_active_subios; - - /** - * the index of ls_single_subio in ls_subios array - */ - int lis_single_subio_index; - struct cl_io lis_single_subio; - - /** - * size of ls_subios array, actually the highest stripe # - */ - int lis_nr_subios; - struct lov_io_sub *lis_subs; - /** - * List of active sub-io's. - */ - struct list_head lis_active; -}; - -struct lov_session { - struct lov_io ls_io; - struct lov_sublock_env ls_subenv; -}; - -extern struct lu_device_type lov_device_type; -extern struct lu_device_type lovsub_device_type; - -extern struct lu_context_key lov_key; -extern struct lu_context_key lov_session_key; - -extern struct kmem_cache *lov_lock_kmem; -extern struct kmem_cache *lov_object_kmem; -extern struct kmem_cache *lov_thread_kmem; -extern struct kmem_cache *lov_session_kmem; - -extern struct kmem_cache *lovsub_lock_kmem; -extern struct kmem_cache *lovsub_object_kmem; - -int lov_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf); -int lovsub_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf); -int lov_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io); -int lov_io_init(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io); -int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io); - -int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io); -int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io); -int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io); -int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io); -int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io); - -struct lov_io_sub *lov_sub_get(const struct lu_env *env, struct lov_io *lio, - int stripe); - -int lov_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, pgoff_t index); -int lovsub_page_init(const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, pgoff_t index); -int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); -int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); -struct lu_object *lov_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev); -struct lu_object *lovsub_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev); - -struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov); -int lov_page_stripe(const struct cl_page *page); - -#define lov_foreach_target(lov, var) \ - for (var = 0; var < lov_targets_nr(lov); ++var) - -/***************************************************************************** - * - * Type conversions. - * - * Accessors. - * - */ - -static inline struct lov_session *lov_env_session(const struct lu_env *env) -{ - struct lov_session *ses; - - ses = lu_context_key_get(env->le_ses, &lov_session_key); - LASSERT(ses); - return ses; -} - -static inline struct lov_io *lov_env_io(const struct lu_env *env) -{ - return &lov_env_session(env)->ls_io; -} - -static inline int lov_is_object(const struct lu_object *obj) -{ - return obj->lo_dev->ld_type == &lov_device_type; -} - -static inline int lovsub_is_object(const struct lu_object *obj) -{ - return obj->lo_dev->ld_type == &lovsub_device_type; -} - -static inline struct lu_device *lov2lu_dev(struct lov_device *lov) -{ - return &lov->ld_cl.cd_lu_dev; -} - -static inline struct lov_device *lu2lov_dev(const struct lu_device *d) -{ - LINVRNT(d->ld_type == &lov_device_type); - return container_of(d, struct lov_device, ld_cl.cd_lu_dev); -} - -static inline struct cl_device *lovsub2cl_dev(struct lovsub_device *lovsub) -{ - return &lovsub->acid_cl; -} - -static inline struct lu_device *lovsub2lu_dev(struct lovsub_device *lovsub) -{ - return &lovsub2cl_dev(lovsub)->cd_lu_dev; -} - -static inline struct lovsub_device *lu2lovsub_dev(const struct lu_device *d) -{ - LINVRNT(d->ld_type == &lovsub_device_type); - return container_of(d, struct lovsub_device, acid_cl.cd_lu_dev); -} - -static inline struct lovsub_device *cl2lovsub_dev(const struct cl_device *d) -{ - LINVRNT(d->cd_lu_dev.ld_type == &lovsub_device_type); - return container_of(d, struct lovsub_device, acid_cl); -} - -static inline struct lu_object *lov2lu(struct lov_object *lov) -{ - return &lov->lo_cl.co_lu; -} - -static inline struct cl_object *lov2cl(struct lov_object *lov) -{ - return &lov->lo_cl; -} - -static inline struct lov_object *lu2lov(const struct lu_object *obj) -{ - LINVRNT(lov_is_object(obj)); - return container_of(obj, struct lov_object, lo_cl.co_lu); -} - -static inline struct lov_object *cl2lov(const struct cl_object *obj) -{ - LINVRNT(lov_is_object(&obj->co_lu)); - return container_of(obj, struct lov_object, lo_cl); -} - -static inline struct lu_object *lovsub2lu(struct lovsub_object *los) -{ - return &los->lso_cl.co_lu; -} - -static inline struct cl_object *lovsub2cl(struct lovsub_object *los) -{ - return &los->lso_cl; -} - -static inline struct lovsub_object *cl2lovsub(const struct cl_object *obj) -{ - LINVRNT(lovsub_is_object(&obj->co_lu)); - return container_of(obj, struct lovsub_object, lso_cl); -} - -static inline struct lovsub_object *lu2lovsub(const struct lu_object *obj) -{ - LINVRNT(lovsub_is_object(obj)); - return container_of(obj, struct lovsub_object, lso_cl.co_lu); -} - -static inline struct lovsub_lock * -cl2lovsub_lock(const struct cl_lock_slice *slice) -{ - LINVRNT(lovsub_is_object(&slice->cls_obj->co_lu)); - return container_of(slice, struct lovsub_lock, lss_cl); -} - -static inline struct lovsub_lock *cl2sub_lock(const struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - - slice = cl_lock_at(lock, &lovsub_device_type); - LASSERT(slice); - return cl2lovsub_lock(slice); -} - -static inline struct lov_lock *cl2lov_lock(const struct cl_lock_slice *slice) -{ - LINVRNT(lov_is_object(&slice->cls_obj->co_lu)); - return container_of(slice, struct lov_lock, lls_cl); -} - -static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice) -{ - LINVRNT(lov_is_object(&slice->cpl_obj->co_lu)); - return container_of(slice, struct lov_page, lps_cl); -} - -static inline struct lovsub_page * -cl2lovsub_page(const struct cl_page_slice *slice) -{ - LINVRNT(lovsub_is_object(&slice->cpl_obj->co_lu)); - return container_of(slice, struct lovsub_page, lsb_cl); -} - -static inline struct lov_io *cl2lov_io(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_io *lio; - - lio = container_of(ios, struct lov_io, lis_cl); - LASSERT(lio == lov_env_io(env)); - return lio; -} - -static inline int lov_targets_nr(const struct lov_device *lov) -{ - return lov->ld_lov->desc.ld_tgt_count; -} - -static inline struct lov_thread_info *lov_env_info(const struct lu_env *env) -{ - struct lov_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &lov_key); - LASSERT(info); - return info; -} - -static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov) -{ - LASSERT(lov->lo_type == LLT_RAID0); - LASSERT(lov->lo_lsm->lsm_magic == LOV_MAGIC || - lov->lo_lsm->lsm_magic == LOV_MAGIC_V3); - return &lov->u.raid0; -} - -/* lov_pack.c */ -int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, - struct lov_user_md __user *lump); - -/** @} lov */ - -#endif diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c deleted file mode 100644 index c7db23472346..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_dev.c +++ /dev/null @@ -1,384 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_device and cl_device_type for LOV layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -/* class_name2obd() */ -#include - -#include "lov_cl_internal.h" -#include "lov_internal.h" - -struct kmem_cache *lov_lock_kmem; -struct kmem_cache *lov_object_kmem; -struct kmem_cache *lov_thread_kmem; -struct kmem_cache *lov_session_kmem; - -struct kmem_cache *lovsub_lock_kmem; -struct kmem_cache *lovsub_object_kmem; - -struct lu_kmem_descr lov_caches[] = { - { - .ckd_cache = &lov_lock_kmem, - .ckd_name = "lov_lock_kmem", - .ckd_size = sizeof(struct lov_lock) - }, - { - .ckd_cache = &lov_object_kmem, - .ckd_name = "lov_object_kmem", - .ckd_size = sizeof(struct lov_object) - }, - { - .ckd_cache = &lov_thread_kmem, - .ckd_name = "lov_thread_kmem", - .ckd_size = sizeof(struct lov_thread_info) - }, - { - .ckd_cache = &lov_session_kmem, - .ckd_name = "lov_session_kmem", - .ckd_size = sizeof(struct lov_session) - }, - { - .ckd_cache = &lovsub_lock_kmem, - .ckd_name = "lovsub_lock_kmem", - .ckd_size = sizeof(struct lovsub_lock) - }, - { - .ckd_cache = &lovsub_object_kmem, - .ckd_name = "lovsub_object_kmem", - .ckd_size = sizeof(struct lovsub_object) - }, - { - .ckd_cache = NULL - } -}; - -/***************************************************************************** - * - * Lov device and device type functions. - * - */ - -static void *lov_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct lov_thread_info *info; - - info = kmem_cache_zalloc(lov_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void lov_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct lov_thread_info *info = data; - - kmem_cache_free(lov_thread_kmem, info); -} - -struct lu_context_key lov_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = lov_key_init, - .lct_fini = lov_key_fini -}; - -static void *lov_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct lov_session *info; - - info = kmem_cache_zalloc(lov_session_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void lov_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct lov_session *info = data; - - kmem_cache_free(lov_session_kmem, info); -} - -struct lu_context_key lov_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = lov_session_key_init, - .lct_fini = lov_session_key_fini -}; - -/* type constructor/destructor: lov_type_{init,fini,start,stop}() */ -LU_TYPE_INIT_FINI(lov, &lov_key, &lov_session_key); - -static struct lu_device *lov_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - int i; - struct lov_device *ld = lu2lov_dev(d); - - LASSERT(ld->ld_lov); - if (!ld->ld_target) - return NULL; - - lov_foreach_target(ld, i) { - struct lovsub_device *lsd; - - lsd = ld->ld_target[i]; - if (lsd) { - cl_stack_fini(env, lovsub2cl_dev(lsd)); - ld->ld_target[i] = NULL; - } - } - return NULL; -} - -static int lov_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - struct lov_device *ld = lu2lov_dev(d); - int i; - int rc = 0; - - LASSERT(d->ld_site); - if (!ld->ld_target) - return rc; - - lov_foreach_target(ld, i) { - struct lovsub_device *lsd; - struct cl_device *cl; - struct lov_tgt_desc *desc; - - desc = ld->ld_lov->lov_tgts[i]; - if (!desc) - continue; - - cl = cl_type_setup(env, d->ld_site, &lovsub_device_type, - desc->ltd_obd->obd_lu_dev); - if (IS_ERR(cl)) { - rc = PTR_ERR(cl); - break; - } - lsd = cl2lovsub_dev(cl); - ld->ld_target[i] = lsd; - } - - if (rc) - lov_device_fini(env, d); - else - ld->ld_flags |= LOV_DEV_INITIALIZED; - - return rc; -} - -static struct lu_device *lov_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct lov_device *ld = lu2lov_dev(d); - - cl_device_fini(lu2cl_dev(d)); - kfree(ld->ld_target); - kfree(ld); - return NULL; -} - -static void lov_cl_del_target(const struct lu_env *env, struct lu_device *dev, - __u32 index) -{ - struct lov_device *ld = lu2lov_dev(dev); - - if (ld->ld_target[index]) { - cl_stack_fini(env, lovsub2cl_dev(ld->ld_target[index])); - ld->ld_target[index] = NULL; - } -} - -static int lov_expand_targets(const struct lu_env *env, struct lov_device *dev) -{ - int result; - __u32 tgt_size; - __u32 sub_size; - - result = 0; - tgt_size = dev->ld_lov->lov_tgt_size; - sub_size = dev->ld_target_nr; - if (sub_size < tgt_size) { - struct lovsub_device **newd; - const size_t sz = sizeof(newd[0]); - - newd = kcalloc(tgt_size, sz, GFP_NOFS); - if (newd) { - if (sub_size > 0) { - memcpy(newd, dev->ld_target, sub_size * sz); - kfree(dev->ld_target); - } - dev->ld_target = newd; - dev->ld_target_nr = tgt_size; - } else { - result = -ENOMEM; - } - } - return result; -} - -static int lov_cl_add_target(const struct lu_env *env, struct lu_device *dev, - __u32 index) -{ - struct obd_device *obd = dev->ld_obd; - struct lov_device *ld = lu2lov_dev(dev); - struct lov_tgt_desc *tgt; - struct lovsub_device *lsd; - struct cl_device *cl; - int rc; - - obd_getref(obd); - - tgt = obd->u.lov.lov_tgts[index]; - - if (!tgt->ltd_obd->obd_set_up) { - CERROR("Target %s not set up\n", obd_uuid2str(&tgt->ltd_uuid)); - return -EINVAL; - } - - rc = lov_expand_targets(env, ld); - if (rc == 0 && ld->ld_flags & LOV_DEV_INITIALIZED) { - LASSERT(dev->ld_site); - - cl = cl_type_setup(env, dev->ld_site, &lovsub_device_type, - tgt->ltd_obd->obd_lu_dev); - if (!IS_ERR(cl)) { - lsd = cl2lovsub_dev(cl); - ld->ld_target[index] = lsd; - } else { - CERROR("add failed (%d), deleting %s\n", rc, - obd_uuid2str(&tgt->ltd_uuid)); - lov_cl_del_target(env, dev, index); - rc = PTR_ERR(cl); - } - } - obd_putref(obd); - return rc; -} - -static int lov_process_config(const struct lu_env *env, - struct lu_device *d, struct lustre_cfg *cfg) -{ - struct obd_device *obd = d->ld_obd; - int cmd; - int rc; - int gen; - __u32 index; - - obd_getref(obd); - - cmd = cfg->lcfg_command; - rc = lov_process_config_base(d->ld_obd, cfg, &index, &gen); - if (rc == 0) { - switch (cmd) { - case LCFG_LOV_ADD_OBD: - case LCFG_LOV_ADD_INA: - rc = lov_cl_add_target(env, d, index); - if (rc != 0) - lov_del_target(d->ld_obd, index, NULL, 0); - break; - case LCFG_LOV_DEL_OBD: - lov_cl_del_target(env, d, index); - break; - } - } - obd_putref(obd); - return rc; -} - -static const struct lu_device_operations lov_lu_ops = { - .ldo_object_alloc = lov_object_alloc, - .ldo_process_config = lov_process_config, -}; - -static struct lu_device *lov_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) -{ - struct lu_device *d; - struct lov_device *ld; - struct obd_device *obd; - int rc; - - ld = kzalloc(sizeof(*ld), GFP_NOFS); - if (!ld) - return ERR_PTR(-ENOMEM); - - cl_device_init(&ld->ld_cl, t); - d = lov2lu_dev(ld); - d->ld_ops = &lov_lu_ops; - - /* setup the LOV OBD */ - obd = class_name2obd(lustre_cfg_string(cfg, 0)); - LASSERT(obd); - rc = lov_setup(obd, cfg); - if (rc) { - lov_device_free(env, d); - return ERR_PTR(rc); - } - - ld->ld_lov = &obd->u.lov; - return d; -} - -static const struct lu_device_type_operations lov_device_type_ops = { - .ldto_init = lov_type_init, - .ldto_fini = lov_type_fini, - - .ldto_start = lov_type_start, - .ldto_stop = lov_type_stop, - - .ldto_device_alloc = lov_device_alloc, - .ldto_device_free = lov_device_free, - - .ldto_device_init = lov_device_init, - .ldto_device_fini = lov_device_fini -}; - -struct lu_device_type lov_device_type = { - .ldt_tags = LU_DEVICE_CL, - .ldt_name = LUSTRE_LOV_NAME, - .ldt_ops = &lov_device_type_ops, - .ldt_ctx_tags = LCT_CL_THREAD -}; - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c deleted file mode 100644 index c80320ab0858..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_ea.c +++ /dev/null @@ -1,331 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lov/lov_ea.c - * - * Author: Wang Di - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include - -#include -#include - -#include "lov_internal.h" - -static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes, - __u16 stripe_count) -{ - if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) { - CERROR("bad stripe count %d\n", stripe_count); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - - if (lmm_oi_id(&lmm->lmm_oi) == 0) { - CERROR("zero object id\n"); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - - if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) { - CERROR("bad striping pattern\n"); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - - if (lmm->lmm_stripe_size == 0 || - (le32_to_cpu(lmm->lmm_stripe_size) & - (LOV_MIN_STRIPE_SIZE - 1)) != 0) { - CERROR("bad stripe size %u\n", - le32_to_cpu(lmm->lmm_stripe_size)); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - return 0; -} - -struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count) -{ - size_t oinfo_ptrs_size, lsm_size; - struct lov_stripe_md *lsm; - struct lov_oinfo *loi; - int i; - - LASSERT(stripe_count <= LOV_MAX_STRIPE_COUNT); - - oinfo_ptrs_size = sizeof(struct lov_oinfo *) * stripe_count; - lsm_size = sizeof(*lsm) + oinfo_ptrs_size; - - lsm = kvzalloc(lsm_size, GFP_NOFS); - if (!lsm) - return NULL; - - for (i = 0; i < stripe_count; i++) { - loi = kmem_cache_zalloc(lov_oinfo_slab, GFP_NOFS); - if (!loi) - goto err; - lsm->lsm_oinfo[i] = loi; - } - lsm->lsm_stripe_count = stripe_count; - return lsm; - -err: - while (--i >= 0) - kmem_cache_free(lov_oinfo_slab, lsm->lsm_oinfo[i]); - kvfree(lsm); - return NULL; -} - -void lsm_free_plain(struct lov_stripe_md *lsm) -{ - __u16 stripe_count = lsm->lsm_stripe_count; - int i; - - for (i = 0; i < stripe_count; i++) - kmem_cache_free(lov_oinfo_slab, lsm->lsm_oinfo[i]); - kvfree(lsm); -} - -/* - * Find minimum stripe maxbytes value. For inactive or - * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES. - */ -static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt) -{ - loff_t maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; - struct obd_import *imp; - - if (!tgt->ltd_active) - return maxbytes; - - imp = tgt->ltd_obd->u.cli.cl_import; - if (!imp) - return maxbytes; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_FULL && - (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES) && - imp->imp_connect_data.ocd_maxbytes > 0) - maxbytes = imp->imp_connect_data.ocd_maxbytes; - - spin_unlock(&imp->imp_lock); - - return maxbytes; -} - -static int lsm_unpackmd_common(struct lov_obd *lov, - struct lov_stripe_md *lsm, - struct lov_mds_md *lmm, - struct lov_ost_data_v1 *objects) -{ - loff_t min_stripe_maxbytes = 0; - unsigned int stripe_count; - struct lov_oinfo *loi; - loff_t lov_bytes; - unsigned int i; - - /* - * This supposes lov_mds_md_v1/v3 first fields are - * are the same - */ - lmm_oi_le_to_cpu(&lsm->lsm_oi, &lmm->lmm_oi); - lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size); - lsm->lsm_pattern = le32_to_cpu(lmm->lmm_pattern); - lsm->lsm_layout_gen = le16_to_cpu(lmm->lmm_layout_gen); - lsm->lsm_pool_name[0] = '\0'; - - stripe_count = lsm_is_released(lsm) ? 0 : lsm->lsm_stripe_count; - - for (i = 0; i < stripe_count; i++) { - loi = lsm->lsm_oinfo[i]; - ostid_le_to_cpu(&objects[i].l_ost_oi, &loi->loi_oi); - loi->loi_ost_idx = le32_to_cpu(objects[i].l_ost_idx); - loi->loi_ost_gen = le32_to_cpu(objects[i].l_ost_gen); - if (lov_oinfo_is_dummy(loi)) - continue; - - if (loi->loi_ost_idx >= lov->desc.ld_tgt_count && - !lov2obd(lov)->obd_process_conf) { - CERROR("%s: OST index %d more than OST count %d\n", - (char *)lov->desc.ld_uuid.uuid, - loi->loi_ost_idx, lov->desc.ld_tgt_count); - lov_dump_lmm_v1(D_WARNING, lmm); - return -EINVAL; - } - - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CERROR("%s: OST index %d missing\n", - (char *)lov->desc.ld_uuid.uuid, - loi->loi_ost_idx); - lov_dump_lmm_v1(D_WARNING, lmm); - continue; - } - - lov_bytes = lov_tgt_maxbytes(lov->lov_tgts[loi->loi_ost_idx]); - if (min_stripe_maxbytes == 0 || lov_bytes < min_stripe_maxbytes) - min_stripe_maxbytes = lov_bytes; - } - - if (min_stripe_maxbytes == 0) - min_stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES; - - stripe_count = lsm->lsm_stripe_count ?: lov->desc.ld_tgt_count; - lov_bytes = min_stripe_maxbytes * stripe_count; - - if (lov_bytes < min_stripe_maxbytes) /* handle overflow */ - lsm->lsm_maxbytes = MAX_LFS_FILESIZE; - else - lsm->lsm_maxbytes = lov_bytes; - - return 0; -} - -static void -lsm_stripe_by_index_plain(struct lov_stripe_md *lsm, int *stripeno, - loff_t *lov_off, loff_t *swidth) -{ - if (swidth) - *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count; -} - -static void -lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno, - loff_t *lov_off, loff_t *swidth) -{ - if (swidth) - *swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count; -} - -static int lsm_lmm_verify_v1(struct lov_mds_md_v1 *lmm, int lmm_bytes, - __u16 *stripe_count) -{ - if (lmm_bytes < sizeof(*lmm)) { - CERROR("lov_mds_md_v1 too small: %d, need at least %d\n", - lmm_bytes, (int)sizeof(*lmm)); - return -EINVAL; - } - - *stripe_count = le16_to_cpu(lmm->lmm_stripe_count); - if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) - *stripe_count = 0; - - if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)) { - CERROR("LOV EA V1 too small: %d, need %d\n", - lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V1)); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - - return lsm_lmm_verify_common(lmm, lmm_bytes, *stripe_count); -} - -static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md_v1 *lmm) -{ - return lsm_unpackmd_common(lov, lsm, lmm, lmm->lmm_objects); -} - -const struct lsm_operations lsm_v1_ops = { - .lsm_free = lsm_free_plain, - .lsm_stripe_by_index = lsm_stripe_by_index_plain, - .lsm_stripe_by_offset = lsm_stripe_by_offset_plain, - .lsm_lmm_verify = lsm_lmm_verify_v1, - .lsm_unpackmd = lsm_unpackmd_v1, -}; - -static int lsm_lmm_verify_v3(struct lov_mds_md *lmmv1, int lmm_bytes, - __u16 *stripe_count) -{ - struct lov_mds_md_v3 *lmm; - - lmm = (struct lov_mds_md_v3 *)lmmv1; - - if (lmm_bytes < sizeof(*lmm)) { - CERROR("lov_mds_md_v3 too small: %d, need at least %d\n", - lmm_bytes, (int)sizeof(*lmm)); - return -EINVAL; - } - - *stripe_count = le16_to_cpu(lmm->lmm_stripe_count); - if (le32_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_F_RELEASED) - *stripe_count = 0; - - if (lmm_bytes < lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)) { - CERROR("LOV EA V3 too small: %d, need %d\n", - lmm_bytes, lov_mds_md_size(*stripe_count, LOV_MAGIC_V3)); - lov_dump_lmm_common(D_WARNING, lmm); - return -EINVAL; - } - - return lsm_lmm_verify_common((struct lov_mds_md_v1 *)lmm, lmm_bytes, - *stripe_count); -} - -static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm) -{ - struct lov_mds_md_v3 *lmm_v3 = (struct lov_mds_md_v3 *)lmm; - size_t cplen = 0; - int rc; - - rc = lsm_unpackmd_common(lov, lsm, lmm, lmm_v3->lmm_objects); - if (rc) - return rc; - - cplen = strlcpy(lsm->lsm_pool_name, lmm_v3->lmm_pool_name, - sizeof(lsm->lsm_pool_name)); - if (cplen >= sizeof(lsm->lsm_pool_name)) - return -E2BIG; - - return 0; -} - -const struct lsm_operations lsm_v3_ops = { - .lsm_free = lsm_free_plain, - .lsm_stripe_by_index = lsm_stripe_by_index_plain, - .lsm_stripe_by_offset = lsm_stripe_by_offset_plain, - .lsm_lmm_verify = lsm_lmm_verify_v3, - .lsm_unpackmd = lsm_unpackmd_v3, -}; - -void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm) -{ - CDEBUG(level, "lsm %p, objid " DOSTID ", maxbytes %#llx, magic 0x%08X, stripe_size %u, stripe_count %u, refc: %d, layout_gen %u, pool [" LOV_POOLNAMEF "]\n", - lsm, - POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic, - lsm->lsm_stripe_size, lsm->lsm_stripe_count, - atomic_read(&lsm->lsm_refc), lsm->lsm_layout_gen, - lsm->lsm_pool_name); -} diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h deleted file mode 100644 index 47042f27ca90..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_internal.h +++ /dev/null @@ -1,286 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef LOV_INTERNAL_H -#define LOV_INTERNAL_H - -#include -#include - -/* - * If we are unable to get the maximum object size from the OST in - * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using - * the old maximum object size from ext3. - */ -#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL - -struct lov_stripe_md { - atomic_t lsm_refc; - spinlock_t lsm_lock; - pid_t lsm_lock_owner; /* debugging */ - - /* - * maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated - */ - loff_t lsm_maxbytes; - struct ost_id lsm_oi; - u32 lsm_magic; - u32 lsm_stripe_size; - u32 lsm_pattern; /* RAID0, RAID1, released, ... */ - u16 lsm_stripe_count; - u16 lsm_layout_gen; - char lsm_pool_name[LOV_MAXPOOLNAME + 1]; - struct lov_oinfo *lsm_oinfo[0]; -}; - -static inline bool lsm_is_released(struct lov_stripe_md *lsm) -{ - return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED); -} - -static inline bool lsm_has_objects(struct lov_stripe_md *lsm) -{ - if (!lsm) - return false; - - if (lsm_is_released(lsm)) - return false; - - return true; -} - -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, loff_t *, - loff_t *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, loff_t *, - loff_t *); - int (*lsm_lmm_verify)(struct lov_mds_md *lmm, int lmm_bytes, - u16 *stripe_count); - int (*lsm_unpackmd)(struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern const struct lsm_operations lsm_v1_ops; -extern const struct lsm_operations lsm_v3_ops; - -static inline const struct lsm_operations *lsm_op_find(int magic) -{ - switch (magic) { - case LOV_MAGIC_V1: - return &lsm_v1_ops; - case LOV_MAGIC_V3: - return &lsm_v3_ops; - default: - CERROR("unrecognized lsm_magic %08x\n", magic); - return NULL; - } -} - -/* lov_do_div64(a, b) returns a % b, and a = a / b. - * The 32-bit code is LOV-specific due to knowing about stripe limits in - * order to reduce the divisor to a 32-bit number. If the divisor is - * already a 32-bit value the compiler handles this directly. - */ -#if BITS_PER_LONG == 64 -# define lov_do_div64(n, base) ({ \ - u64 __base = (base); \ - u64 __rem; \ - __rem = ((u64)(n)) % __base; \ - (n) = ((u64)(n)) / __base; \ - __rem; \ -}) -#elif BITS_PER_LONG == 32 -# define lov_do_div64(n, base) ({ \ - u64 __rem; \ - if ((sizeof(base) > 4) && (((base) & 0xffffffff00000000ULL) != 0)) { \ - int __remainder; \ - LASSERTF(!((base) & (LOV_MIN_STRIPE_SIZE - 1)), "64 bit lov " \ - "division %llu / %llu\n", (n), (u64)(base)); \ - __remainder = (n) & (LOV_MIN_STRIPE_SIZE - 1); \ - (n) >>= LOV_MIN_STRIPE_BITS; \ - __rem = do_div(n, (base) >> LOV_MIN_STRIPE_BITS); \ - __rem <<= LOV_MIN_STRIPE_BITS; \ - __rem += __remainder; \ - } else { \ - __rem = do_div(n, base); \ - } \ - __rem; \ -}) -#endif - -#define pool_tgt_size(p) ((p)->pool_obds.op_size) -#define pool_tgt_count(p) ((p)->pool_obds.op_count) -#define pool_tgt_array(p) ((p)->pool_obds.op_array) -#define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem) - -struct pool_desc { - char pool_name[LOV_MAXPOOLNAME + 1]; - struct ost_pool pool_obds; - atomic_t pool_refcount; - struct rhash_head pool_hash; /* access by poolname */ - union { - struct list_head pool_list; /* serial access */ - struct rcu_head rcu; /* delayed free */ - }; - struct dentry *pool_debugfs_entry; /* file in debugfs */ - struct obd_device *pool_lobd; /* owner */ -}; -int lov_pool_hash_init(struct rhashtable *tbl); -void lov_pool_hash_destroy(struct rhashtable *tbl); - -struct lov_request { - struct obd_info rq_oi; - struct lov_request_set *rq_rqset; - - struct list_head rq_link; - - int rq_idx; /* index in lov->tgts array */ -}; - -struct lov_request_set { - struct obd_info *set_oi; - struct obd_device *set_obd; - int set_count; - atomic_t set_completes; - atomic_t set_success; - struct list_head set_list; -}; - -extern struct kmem_cache *lov_oinfo_slab; - -extern struct lu_kmem_descr lov_caches[]; - -#define lov_uuid2str(lv, index) \ - (char *)((lv)->lov_tgts[index]->ltd_uuid.uuid) - -/* lov_merge.c */ -int lov_merge_lvb_kms(struct lov_stripe_md *lsm, - struct ost_lvb *lvb, __u64 *kms_place); - -/* lov_offset.c */ -u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno); -int lov_stripe_offset(struct lov_stripe_md *lsm, u64 lov_off, - int stripeno, u64 *u64); -u64 lov_size_to_stripe(struct lov_stripe_md *lsm, u64 file_size, int stripeno); -int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, - u64 start, u64 end, - u64 *obd_start, u64 *obd_end); -int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off); -pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, - int stripe); - -/* lov_request.c */ -int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, - struct lov_request_set **reqset); -int lov_fini_statfs_set(struct lov_request_set *set); - -/* lov_obd.c */ -void lov_stripe_lock(struct lov_stripe_md *md); -void lov_stripe_unlock(struct lov_stripe_md *md); -void lov_fix_desc(struct lov_desc *desc); -void lov_fix_desc_stripe_size(__u64 *val); -void lov_fix_desc_stripe_count(__u32 *val); -void lov_fix_desc_pattern(__u32 *val); -void lov_fix_desc_qos_maxage(__u32 *val); -__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count); -int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, - struct obd_connect_data *data); -int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg); -int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, - __u32 *indexp, int *genp); -int lov_del_target(struct obd_device *obd, __u32 index, - struct obd_uuid *uuidp, int gen); - -/* lov_pack.c */ -ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, - size_t buf_size); -struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm, - size_t lmm_size); -int lov_free_memmd(struct lov_stripe_md **lsmp); - -void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm); -void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm); -void lov_dump_lmm_common(int level, void *lmmp); - -/* lov_ea.c */ -struct lov_stripe_md *lsm_alloc_plain(u16 stripe_count); -void lsm_free_plain(struct lov_stripe_md *lsm); -void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm); - -/* lproc_lov.c */ -extern const struct file_operations lov_proc_target_fops; -void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars); - -/* lov_cl.c */ -extern struct lu_device_type lov_device_type; - -/* ost_pool methods */ -int lov_ost_pool_init(struct ost_pool *op, unsigned int count); -int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count); -int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count); -int lov_ost_pool_remove(struct ost_pool *op, __u32 idx); -int lov_ost_pool_free(struct ost_pool *op); - -/* high level pool methods */ -int lov_pool_new(struct obd_device *obd, char *poolname); -int lov_pool_del(struct obd_device *obd, char *poolname); -int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname); -int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname); -void lov_pool_putref(struct pool_desc *pool); - -static inline struct lov_stripe_md *lsm_addref(struct lov_stripe_md *lsm) -{ - LASSERT(atomic_read(&lsm->lsm_refc) > 0); - atomic_inc(&lsm->lsm_refc); - return lsm; -} - -static inline bool lov_oinfo_is_dummy(const struct lov_oinfo *loi) -{ - if (unlikely(loi->loi_oi.oi.oi_id == 0 && - loi->loi_oi.oi.oi_seq == 0 && - loi->loi_ost_idx == 0 && - loi->loi_ost_gen == 0)) - return true; - - return false; -} - -static inline struct obd_device *lov2obd(const struct lov_obd *lov) -{ - return container_of_safe(lov, struct obd_device, u.lov); -} - -#endif diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c deleted file mode 100644 index b823f8a21856..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_io.c +++ /dev/null @@ -1,1023 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_io for LOV layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -static void lov_io_sub_fini(const struct lu_env *env, struct lov_io *lio, - struct lov_io_sub *sub) -{ - if (sub->sub_io) { - if (sub->sub_io_initialized) { - cl_io_fini(sub->sub_env, sub->sub_io); - sub->sub_io_initialized = 0; - lio->lis_active_subios--; - } - if (sub->sub_stripe == lio->lis_single_subio_index) - lio->lis_single_subio_index = -1; - else if (!sub->sub_borrowed) - kfree(sub->sub_io); - sub->sub_io = NULL; - } - if (!IS_ERR_OR_NULL(sub->sub_env)) { - if (!sub->sub_borrowed) - cl_env_put(sub->sub_env, &sub->sub_refcheck); - sub->sub_env = NULL; - } -} - -static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio, - int stripe, loff_t start, loff_t end) -{ - struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; - struct cl_io *parent = lio->lis_cl.cis_io; - - switch (io->ci_type) { - case CIT_SETATTR: { - io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr; - io->u.ci_setattr.sa_attr_flags = - parent->u.ci_setattr.sa_attr_flags; - io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid; - io->u.ci_setattr.sa_stripe_index = stripe; - io->u.ci_setattr.sa_parent_fid = - parent->u.ci_setattr.sa_parent_fid; - if (cl_io_is_trunc(io)) { - loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size; - - new_size = lov_size_to_stripe(lsm, new_size, stripe); - io->u.ci_setattr.sa_attr.lvb_size = new_size; - } - break; - } - case CIT_DATA_VERSION: { - io->u.ci_data_version.dv_data_version = 0; - io->u.ci_data_version.dv_flags = - parent->u.ci_data_version.dv_flags; - break; - } - case CIT_FAULT: { - struct cl_object *obj = parent->ci_obj; - loff_t off = cl_offset(obj, parent->u.ci_fault.ft_index); - - io->u.ci_fault = parent->u.ci_fault; - off = lov_size_to_stripe(lsm, off, stripe); - io->u.ci_fault.ft_index = cl_index(obj, off); - break; - } - case CIT_FSYNC: { - io->u.ci_fsync.fi_start = start; - io->u.ci_fsync.fi_end = end; - io->u.ci_fsync.fi_fid = parent->u.ci_fsync.fi_fid; - io->u.ci_fsync.fi_mode = parent->u.ci_fsync.fi_mode; - break; - } - case CIT_READ: - case CIT_WRITE: { - io->u.ci_wr.wr_sync = cl_io_is_sync_write(parent); - if (cl_io_is_append(parent)) { - io->u.ci_wr.wr_append = 1; - } else { - io->u.ci_rw.crw_pos = start; - io->u.ci_rw.crw_count = end - start; - } - break; - } - default: - break; - } -} - -static int lov_io_sub_init(const struct lu_env *env, struct lov_io *lio, - struct lov_io_sub *sub) -{ - struct lov_object *lov = lio->lis_object; - struct cl_io *sub_io; - struct cl_object *sub_obj; - struct cl_io *io = lio->lis_cl.cis_io; - int stripe = sub->sub_stripe; - int rc; - - LASSERT(!sub->sub_io); - LASSERT(!sub->sub_env); - LASSERT(sub->sub_stripe < lio->lis_stripe_count); - - if (unlikely(!lov_r0(lov)->lo_sub[stripe])) - return -EIO; - - sub->sub_io_initialized = 0; - sub->sub_borrowed = 0; - - /* obtain new environment */ - sub->sub_env = cl_env_get(&sub->sub_refcheck); - if (IS_ERR(sub->sub_env)) { - rc = PTR_ERR(sub->sub_env); - goto fini_lov_io; - } - - /* - * First sub-io. Use ->lis_single_subio to - * avoid dynamic allocation. - */ - if (lio->lis_active_subios == 0) { - sub->sub_io = &lio->lis_single_subio; - lio->lis_single_subio_index = stripe; - } else { - sub->sub_io = kzalloc(sizeof(*sub->sub_io), - GFP_NOFS); - if (!sub->sub_io) { - rc = -ENOMEM; - goto fini_lov_io; - } - } - - sub_obj = lovsub2cl(lov_r0(lov)->lo_sub[stripe]); - sub_io = sub->sub_io; - - sub_io->ci_obj = sub_obj; - sub_io->ci_result = 0; - sub_io->ci_parent = io; - sub_io->ci_lockreq = io->ci_lockreq; - sub_io->ci_type = io->ci_type; - sub_io->ci_no_srvlock = io->ci_no_srvlock; - sub_io->ci_noatime = io->ci_noatime; - - rc = cl_io_sub_init(sub->sub_env, sub_io, io->ci_type, sub_obj); - if (rc >= 0) { - lio->lis_active_subios++; - sub->sub_io_initialized = 1; - rc = 0; - } -fini_lov_io: - if (rc) - lov_io_sub_fini(env, lio, sub); - return rc; -} - -struct lov_io_sub *lov_sub_get(const struct lu_env *env, - struct lov_io *lio, int stripe) -{ - int rc; - struct lov_io_sub *sub = &lio->lis_subs[stripe]; - - LASSERT(stripe < lio->lis_stripe_count); - - if (!sub->sub_io_initialized) { - sub->sub_stripe = stripe; - rc = lov_io_sub_init(env, lio, sub); - } else { - rc = 0; - } - if (rc < 0) - sub = ERR_PTR(rc); - - return sub; -} - -/***************************************************************************** - * - * Lov io operations. - * - */ - -int lov_page_stripe(const struct cl_page *page) -{ - const struct cl_page_slice *slice; - - slice = cl_page_at(page, &lov_device_type); - LASSERT(slice->cpl_obj); - - return cl2lov_page(slice)->lps_stripe; -} - -static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, - struct cl_io *io) -{ - struct lov_stripe_md *lsm; - int result; - - LASSERT(lio->lis_object); - lsm = lio->lis_object->lo_lsm; - - /* - * Need to be optimized, we can't afford to allocate a piece of memory - * when writing a page. -jay - */ - lio->lis_subs = - kvzalloc(lsm->lsm_stripe_count * - sizeof(lio->lis_subs[0]), - GFP_NOFS); - if (lio->lis_subs) { - lio->lis_nr_subios = lio->lis_stripe_count; - lio->lis_single_subio_index = -1; - lio->lis_active_subios = 0; - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - -static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj, - struct cl_io *io) -{ - io->ci_result = 0; - lio->lis_object = obj; - - lio->lis_stripe_count = obj->lo_lsm->lsm_stripe_count; - - switch (io->ci_type) { - case CIT_READ: - case CIT_WRITE: - lio->lis_pos = io->u.ci_rw.crw_pos; - lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count; - lio->lis_io_endpos = lio->lis_endpos; - if (cl_io_is_append(io)) { - LASSERT(io->ci_type == CIT_WRITE); - - /* - * If there is LOV EA hole, then we may cannot locate - * the current file-tail exactly. - */ - if (unlikely(obj->lo_lsm->lsm_pattern & - LOV_PATTERN_F_HOLE)) - return -EIO; - - lio->lis_pos = 0; - lio->lis_endpos = OBD_OBJECT_EOF; - } - break; - - case CIT_SETATTR: - if (cl_io_is_trunc(io)) - lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size; - else - lio->lis_pos = 0; - lio->lis_endpos = OBD_OBJECT_EOF; - break; - - case CIT_DATA_VERSION: - lio->lis_pos = 0; - lio->lis_endpos = OBD_OBJECT_EOF; - break; - - case CIT_FAULT: { - pgoff_t index = io->u.ci_fault.ft_index; - - lio->lis_pos = cl_offset(io->ci_obj, index); - lio->lis_endpos = cl_offset(io->ci_obj, index + 1); - break; - } - - case CIT_FSYNC: { - lio->lis_pos = io->u.ci_fsync.fi_start; - lio->lis_endpos = io->u.ci_fsync.fi_end; - break; - } - - case CIT_MISC: - lio->lis_pos = 0; - lio->lis_endpos = OBD_OBJECT_EOF; - break; - - default: - LBUG(); - } - return 0; -} - -static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_object *lov = cl2lov(ios->cis_obj); - int i; - - if (lio->lis_subs) { - for (i = 0; i < lio->lis_nr_subios; i++) - lov_io_sub_fini(env, lio, &lio->lis_subs[i]); - kvfree(lio->lis_subs); - lio->lis_nr_subios = 0; - } - - LASSERT(atomic_read(&lov->lo_active_ios) > 0); - if (atomic_dec_and_test(&lov->lo_active_ios)) - wake_up_all(&lov->lo_waitq); -} - -static u64 lov_offset_mod(u64 val, int delta) -{ - if (val != OBD_OBJECT_EOF) - val += delta; - return val; -} - -static int lov_io_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; - struct lov_io_sub *sub; - u64 endpos; - u64 start; - u64 end; - int stripe; - int rc = 0; - - endpos = lov_offset_mod(lio->lis_endpos, -1); - for (stripe = 0; stripe < lio->lis_stripe_count; stripe++) { - if (!lov_stripe_intersects(lsm, stripe, lio->lis_pos, - endpos, &start, &end)) - continue; - - if (unlikely(!lov_r0(lio->lis_object)->lo_sub[stripe])) { - if (ios->cis_io->ci_type == CIT_READ || - ios->cis_io->ci_type == CIT_WRITE || - ios->cis_io->ci_type == CIT_FAULT) - return -EIO; - - continue; - } - - end = lov_offset_mod(end, 1); - sub = lov_sub_get(env, lio, stripe); - if (IS_ERR(sub)) { - rc = PTR_ERR(sub); - break; - } - - lov_io_sub_inherit(sub->sub_io, lio, stripe, start, end); - rc = cl_io_iter_init(sub->sub_env, sub->sub_io); - if (rc) { - cl_io_iter_fini(sub->sub_env, sub->sub_io); - break; - } - CDEBUG(D_VFSTRACE, "shrink: %d [%llu, %llu)\n", - stripe, start, end); - - list_add_tail(&sub->sub_linkage, &lio->lis_active); - } - return rc; -} - -static int lov_io_rw_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_io *io = ios->cis_io; - struct lov_stripe_md *lsm = lio->lis_object->lo_lsm; - __u64 start = io->u.ci_rw.crw_pos; - loff_t next; - unsigned long ssize = lsm->lsm_stripe_size; - - LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); - - /* fast path for common case. */ - if (lio->lis_nr_subios != 1 && !cl_io_is_append(io)) { - lov_do_div64(start, ssize); - next = (start + 1) * ssize; - if (next <= start * ssize) - next = ~0ull; - - io->ci_continue = next < lio->lis_io_endpos; - io->u.ci_rw.crw_count = min_t(loff_t, lio->lis_io_endpos, - next) - io->u.ci_rw.crw_pos; - lio->lis_pos = io->u.ci_rw.crw_pos; - lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count; - CDEBUG(D_VFSTRACE, "stripe: %llu chunk: [%llu, %llu) %llu\n", - (__u64)start, lio->lis_pos, lio->lis_endpos, - (__u64)lio->lis_io_endpos); - } - /* - * XXX The following call should be optimized: we know, that - * [lio->lis_pos, lio->lis_endpos) intersects with exactly one stripe. - */ - return lov_io_iter_init(env, ios); -} - -static int lov_io_call(const struct lu_env *env, struct lov_io *lio, - int (*iofunc)(const struct lu_env *, struct cl_io *)) -{ - struct cl_io *parent = lio->lis_cl.cis_io; - struct lov_io_sub *sub; - int rc = 0; - - list_for_each_entry(sub, &lio->lis_active, sub_linkage) { - rc = iofunc(sub->sub_env, sub->sub_io); - if (rc) - break; - - if (parent->ci_result == 0) - parent->ci_result = sub->sub_io->ci_result; - } - return rc; -} - -static int lov_io_lock(const struct lu_env *env, const struct cl_io_slice *ios) -{ - return lov_io_call(env, cl2lov_io(env, ios), cl_io_lock); -} - -static int lov_io_start(const struct lu_env *env, const struct cl_io_slice *ios) -{ - return lov_io_call(env, cl2lov_io(env, ios), cl_io_start); -} - -static int lov_io_end_wrapper(const struct lu_env *env, struct cl_io *io) -{ - /* - * It's possible that lov_io_start() wasn't called against this - * sub-io, either because previous sub-io failed, or upper layer - * completed IO. - */ - if (io->ci_state == CIS_IO_GOING) - cl_io_end(env, io); - else - io->ci_state = CIS_IO_FINISHED; - return 0; -} - -static void -lov_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct cl_io *parent = lio->lis_cl.cis_io; - struct lov_io_sub *sub; - - list_for_each_entry(sub, &lio->lis_active, sub_linkage) { - lov_io_end_wrapper(sub->sub_env, sub->sub_io); - - parent->u.ci_data_version.dv_data_version += - sub->sub_io->u.ci_data_version.dv_data_version; - - if (!parent->ci_result) - parent->ci_result = sub->sub_io->ci_result; - } -} - -static int lov_io_iter_fini_wrapper(const struct lu_env *env, struct cl_io *io) -{ - cl_io_iter_fini(env, io); - return 0; -} - -static int lov_io_unlock_wrapper(const struct lu_env *env, struct cl_io *io) -{ - cl_io_unlock(env, io); - return 0; -} - -static void lov_io_end(const struct lu_env *env, const struct cl_io_slice *ios) -{ - int rc; - - rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_end_wrapper); - LASSERT(rc == 0); -} - -static void lov_io_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - int rc; - - rc = lov_io_call(env, lio, lov_io_iter_fini_wrapper); - LASSERT(rc == 0); - while (!list_empty(&lio->lis_active)) - list_del_init(lio->lis_active.next); -} - -static void lov_io_unlock(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - int rc; - - rc = lov_io_call(env, cl2lov_io(env, ios), lov_io_unlock_wrapper); - LASSERT(rc == 0); -} - -static int lov_io_read_ahead(const struct lu_env *env, - const struct cl_io_slice *ios, - pgoff_t start, struct cl_read_ahead *ra) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_object *loo = lio->lis_object; - struct cl_object *obj = lov2cl(loo); - struct lov_layout_raid0 *r0 = lov_r0(loo); - unsigned int pps; /* pages per stripe */ - struct lov_io_sub *sub; - pgoff_t ra_end; - loff_t suboff; - int stripe; - int rc; - - stripe = lov_stripe_number(loo->lo_lsm, cl_offset(obj, start)); - if (unlikely(!r0->lo_sub[stripe])) - return -EIO; - - sub = lov_sub_get(env, lio, stripe); - if (IS_ERR(sub)) - return PTR_ERR(sub); - - lov_stripe_offset(loo->lo_lsm, cl_offset(obj, start), stripe, &suboff); - rc = cl_io_read_ahead(sub->sub_env, sub->sub_io, - cl_index(lovsub2cl(r0->lo_sub[stripe]), suboff), - ra); - - CDEBUG(D_READA, DFID " cra_end = %lu, stripes = %d, rc = %d\n", - PFID(lu_object_fid(lov2lu(loo))), ra->cra_end, r0->lo_nr, rc); - if (rc) - return rc; - - /** - * Adjust the stripe index by layout of raid0. ra->cra_end is - * the maximum page index covered by an underlying DLM lock. - * This function converts cra_end from stripe level to file - * level, and make sure it's not beyond stripe boundary. - */ - if (r0->lo_nr == 1) /* single stripe file */ - return 0; - - /* cra_end is stripe level, convert it into file level */ - ra_end = ra->cra_end; - if (ra_end != CL_PAGE_EOF) - ra_end = lov_stripe_pgoff(loo->lo_lsm, ra_end, stripe); - - pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT; - - CDEBUG(D_READA, DFID " max_index = %lu, pps = %u, stripe_size = %u, stripe no = %u, start index = %lu\n", - PFID(lu_object_fid(lov2lu(loo))), ra_end, pps, - loo->lo_lsm->lsm_stripe_size, stripe, start); - - /* never exceed the end of the stripe */ - ra->cra_end = min_t(pgoff_t, ra_end, start + pps - start % pps - 1); - return 0; -} - -/** - * lov implementation of cl_operations::cio_submit() method. It takes a list - * of pages in \a queue, splits it into per-stripe sub-lists, invokes - * cl_io_submit() on underlying devices to submit sub-lists, and then splices - * everything back. - * - * Major complication of this function is a need to handle memory cleansing: - * cl_io_submit() is called to write out pages as a part of VM memory - * reclamation, and hence it may not fail due to memory shortages (system - * dead-locks otherwise). To deal with this, some resources (sub-lists, - * sub-environment, etc.) are allocated per-device on "startup" (i.e., in a - * not-memory cleansing context), and in case of memory shortage, these - * pre-allocated resources are used by lov_io_submit() under - * lov_device::ld_mutex mutex. - */ -static int lov_io_submit(const struct lu_env *env, - const struct cl_io_slice *ios, - enum cl_req_type crt, struct cl_2queue *queue) -{ - struct cl_page_list *qin = &queue->c2_qin; - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_io_sub *sub; - struct cl_page_list *plist = &lov_env_info(env)->lti_plist; - struct cl_page *page; - int stripe; - - int rc = 0; - - if (lio->lis_active_subios == 1) { - int idx = lio->lis_single_subio_index; - - LASSERT(idx < lio->lis_nr_subios); - sub = lov_sub_get(env, lio, idx); - LASSERT(!IS_ERR(sub)); - LASSERT(sub->sub_io == &lio->lis_single_subio); - rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, - crt, queue); - return rc; - } - - LASSERT(lio->lis_subs); - - cl_page_list_init(plist); - while (qin->pl_nr > 0) { - struct cl_2queue *cl2q = &lov_env_info(env)->lti_cl2q; - - cl_2queue_init(cl2q); - - page = cl_page_list_first(qin); - cl_page_list_move(&cl2q->c2_qin, qin, page); - - stripe = lov_page_stripe(page); - while (qin->pl_nr > 0) { - page = cl_page_list_first(qin); - if (stripe != lov_page_stripe(page)) - break; - - cl_page_list_move(&cl2q->c2_qin, qin, page); - } - - sub = lov_sub_get(env, lio, stripe); - if (!IS_ERR(sub)) { - rc = cl_io_submit_rw(sub->sub_env, sub->sub_io, - crt, cl2q); - } else { - rc = PTR_ERR(sub); - } - - cl_page_list_splice(&cl2q->c2_qin, plist); - cl_page_list_splice(&cl2q->c2_qout, &queue->c2_qout); - cl_2queue_fini(env, cl2q); - - if (rc != 0) - break; - } - - cl_page_list_splice(plist, qin); - cl_page_list_fini(env, plist); - - return rc; -} - -static int lov_io_commit_async(const struct lu_env *env, - const struct cl_io_slice *ios, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb) -{ - struct cl_page_list *plist = &lov_env_info(env)->lti_plist; - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_io_sub *sub; - struct cl_page *page; - int rc = 0; - - if (lio->lis_active_subios == 1) { - int idx = lio->lis_single_subio_index; - - LASSERT(idx < lio->lis_nr_subios); - sub = lov_sub_get(env, lio, idx); - LASSERT(!IS_ERR(sub)); - LASSERT(sub->sub_io == &lio->lis_single_subio); - rc = cl_io_commit_async(sub->sub_env, sub->sub_io, queue, - from, to, cb); - return rc; - } - - LASSERT(lio->lis_subs); - - cl_page_list_init(plist); - while (queue->pl_nr > 0) { - int stripe_to = to; - int stripe; - - LASSERT(plist->pl_nr == 0); - page = cl_page_list_first(queue); - cl_page_list_move(plist, queue, page); - - stripe = lov_page_stripe(page); - while (queue->pl_nr > 0) { - page = cl_page_list_first(queue); - if (stripe != lov_page_stripe(page)) - break; - - cl_page_list_move(plist, queue, page); - } - - if (queue->pl_nr > 0) /* still has more pages */ - stripe_to = PAGE_SIZE; - - sub = lov_sub_get(env, lio, stripe); - if (!IS_ERR(sub)) { - rc = cl_io_commit_async(sub->sub_env, sub->sub_io, - plist, from, stripe_to, cb); - } else { - rc = PTR_ERR(sub); - break; - } - - if (plist->pl_nr > 0) /* short write */ - break; - - from = 0; - } - - /* for error case, add the page back into the qin list */ - LASSERT(ergo(rc == 0, plist->pl_nr == 0)); - while (plist->pl_nr > 0) { - /* error occurred, add the uncommitted pages back into queue */ - page = cl_page_list_last(plist); - cl_page_list_move_head(queue, plist, page); - } - - return rc; -} - -static int lov_io_fault_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_fault_io *fio; - struct lov_io *lio; - struct lov_io_sub *sub; - - fio = &ios->cis_io->u.ci_fault; - lio = cl2lov_io(env, ios); - sub = lov_sub_get(env, lio, lov_page_stripe(fio->ft_page)); - if (IS_ERR(sub)) - return PTR_ERR(sub); - sub->sub_io->u.ci_fault.ft_nob = fio->ft_nob; - return lov_io_start(env, ios); -} - -static void lov_io_fsync_end(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_io *lio = cl2lov_io(env, ios); - struct lov_io_sub *sub; - unsigned int *written = &ios->cis_io->u.ci_fsync.fi_nr_written; - - *written = 0; - list_for_each_entry(sub, &lio->lis_active, sub_linkage) { - struct cl_io *subio = sub->sub_io; - - lov_io_end_wrapper(sub->sub_env, subio); - - if (subio->ci_result == 0) - *written += subio->u.ci_fsync.fi_nr_written; - } -} - -static const struct cl_io_operations lov_io_ops = { - .op = { - [CIT_READ] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_rw_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_start, - .cio_end = lov_io_end - }, - [CIT_WRITE] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_rw_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_start, - .cio_end = lov_io_end - }, - [CIT_SETATTR] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_start, - .cio_end = lov_io_end - }, - [CIT_DATA_VERSION] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_start, - .cio_end = lov_io_data_version_end, - }, - [CIT_FAULT] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_fault_start, - .cio_end = lov_io_end - }, - [CIT_FSYNC] = { - .cio_fini = lov_io_fini, - .cio_iter_init = lov_io_iter_init, - .cio_iter_fini = lov_io_iter_fini, - .cio_lock = lov_io_lock, - .cio_unlock = lov_io_unlock, - .cio_start = lov_io_start, - .cio_end = lov_io_fsync_end - }, - [CIT_MISC] = { - .cio_fini = lov_io_fini - } - }, - .cio_read_ahead = lov_io_read_ahead, - .cio_submit = lov_io_submit, - .cio_commit_async = lov_io_commit_async, -}; - -/***************************************************************************** - * - * Empty lov io operations. - * - */ - -static void lov_empty_io_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct lov_object *lov = cl2lov(ios->cis_obj); - - if (atomic_dec_and_test(&lov->lo_active_ios)) - wake_up_all(&lov->lo_waitq); -} - -static int lov_empty_io_submit(const struct lu_env *env, - const struct cl_io_slice *ios, - enum cl_req_type crt, struct cl_2queue *queue) -{ - return -EBADF; -} - -static void lov_empty_impossible(const struct lu_env *env, - struct cl_io_slice *ios) -{ - LBUG(); -} - -#define LOV_EMPTY_IMPOSSIBLE ((void *)lov_empty_impossible) - -/** - * An io operation vector for files without stripes. - */ -static const struct cl_io_operations lov_empty_io_ops = { - .op = { - [CIT_READ] = { - .cio_fini = lov_empty_io_fini, - }, - [CIT_WRITE] = { - .cio_fini = lov_empty_io_fini, - .cio_iter_init = LOV_EMPTY_IMPOSSIBLE, - .cio_lock = LOV_EMPTY_IMPOSSIBLE, - .cio_start = LOV_EMPTY_IMPOSSIBLE, - .cio_end = LOV_EMPTY_IMPOSSIBLE - }, - [CIT_SETATTR] = { - .cio_fini = lov_empty_io_fini, - .cio_iter_init = LOV_EMPTY_IMPOSSIBLE, - .cio_lock = LOV_EMPTY_IMPOSSIBLE, - .cio_start = LOV_EMPTY_IMPOSSIBLE, - .cio_end = LOV_EMPTY_IMPOSSIBLE - }, - [CIT_FAULT] = { - .cio_fini = lov_empty_io_fini, - .cio_iter_init = LOV_EMPTY_IMPOSSIBLE, - .cio_lock = LOV_EMPTY_IMPOSSIBLE, - .cio_start = LOV_EMPTY_IMPOSSIBLE, - .cio_end = LOV_EMPTY_IMPOSSIBLE - }, - [CIT_FSYNC] = { - .cio_fini = lov_empty_io_fini - }, - [CIT_MISC] = { - .cio_fini = lov_empty_io_fini - } - }, - .cio_submit = lov_empty_io_submit, - .cio_commit_async = LOV_EMPTY_IMPOSSIBLE -}; - -int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - struct lov_io *lio = lov_env_io(env); - struct lov_object *lov = cl2lov(obj); - - INIT_LIST_HEAD(&lio->lis_active); - io->ci_result = lov_io_slice_init(lio, lov, io); - if (io->ci_result == 0) { - io->ci_result = lov_io_subio_init(env, lio, io); - if (io->ci_result == 0) { - cl_io_slice_add(io, &lio->lis_cl, obj, &lov_io_ops); - atomic_inc(&lov->lo_active_ios); - } - } - return io->ci_result; -} - -int lov_io_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_io *lio = lov_env_io(env); - int result; - - lio->lis_object = lov; - switch (io->ci_type) { - default: - LBUG(); - case CIT_MISC: - case CIT_READ: - result = 0; - break; - case CIT_FSYNC: - case CIT_SETATTR: - case CIT_DATA_VERSION: - result = 1; - break; - case CIT_WRITE: - result = -EBADF; - break; - case CIT_FAULT: - result = -EFAULT; - CERROR("Page fault on a file without stripes: " DFID "\n", - PFID(lu_object_fid(&obj->co_lu))); - break; - } - if (result == 0) { - cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops); - atomic_inc(&lov->lo_active_ios); - } - - io->ci_result = result < 0 ? result : 0; - return result; -} - -int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_io *lio = lov_env_io(env); - int result; - - LASSERT(lov->lo_lsm); - lio->lis_object = lov; - - switch (io->ci_type) { - default: - LASSERTF(0, "invalid type %d\n", io->ci_type); - result = -EOPNOTSUPP; - break; - case CIT_MISC: - case CIT_FSYNC: - case CIT_DATA_VERSION: - result = 1; - break; - case CIT_SETATTR: - /* the truncate to 0 is managed by MDT: - * - in open, for open O_TRUNC - * - in setattr, for truncate - */ - /* the truncate is for size > 0 so triggers a restore */ - if (cl_io_is_trunc(io)) { - io->ci_restore_needed = 1; - result = -ENODATA; - } else { - result = 1; - } - break; - case CIT_READ: - case CIT_WRITE: - case CIT_FAULT: - io->ci_restore_needed = 1; - result = -ENODATA; - break; - } - if (result == 0) { - cl_io_slice_add(io, &lio->lis_cl, obj, &lov_empty_io_ops); - atomic_inc(&lov->lo_active_ios); - } - - io->ci_result = result < 0 ? result : 0; - return result; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_lock.c b/drivers/staging/lustre/lustre/lov/lov_lock.c deleted file mode 100644 index b0292100bf26..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_lock.c +++ /dev/null @@ -1,348 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_lock for LOV layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lov lock operations. - * - */ - -static struct lov_sublock_env *lov_sublock_env_get(const struct lu_env *env, - const struct cl_lock *parent, - struct lov_lock_sub *lls) -{ - struct lov_sublock_env *subenv; - struct lov_io *lio = lov_env_io(env); - struct cl_io *io = lio->lis_cl.cis_io; - struct lov_io_sub *sub; - - subenv = &lov_env_session(env)->ls_subenv; - - /* - * FIXME: We tend to use the subio's env & io to call the sublock - * lock operations because osc lock sometimes stores some control - * variables in thread's IO information(Now only lockless information). - * However, if the lock's host(object) is different from the object - * for current IO, we have no way to get the subenv and subio because - * they are not initialized at all. As a temp fix, in this case, - * we still borrow the parent's env to call sublock operations. - */ - if (!io || !cl_object_same(io->ci_obj, parent->cll_descr.cld_obj)) { - subenv->lse_env = env; - subenv->lse_io = io; - } else { - sub = lov_sub_get(env, lio, lls->sub_stripe); - if (!IS_ERR(sub)) { - subenv->lse_env = sub->sub_env; - subenv->lse_io = sub->sub_io; - } else { - subenv = (void *)sub; - } - } - return subenv; -} - -static int lov_sublock_init(const struct lu_env *env, - const struct cl_lock *parent, - struct lov_lock_sub *lls) -{ - struct lov_sublock_env *subenv; - int result; - - subenv = lov_sublock_env_get(env, parent, lls); - if (!IS_ERR(subenv)) { - result = cl_lock_init(subenv->lse_env, &lls->sub_lock, - subenv->lse_io); - } else { - /* error occurs. */ - result = PTR_ERR(subenv); - } - return result; -} - -/** - * Creates sub-locks for a given lov_lock for the first time. - * - * Goes through all sub-objects of top-object, and creates sub-locks on every - * sub-object intersecting with top-lock extent. This is complicated by the - * fact that top-lock (that is being created) can be accessed concurrently - * through already created sub-locks (possibly shared with other top-locks). - */ -static struct lov_lock *lov_lock_sub_init(const struct lu_env *env, - const struct cl_object *obj, - struct cl_lock *lock) -{ - int result = 0; - int i; - int nr; - u64 start; - u64 end; - u64 file_start; - u64 file_end; - - struct lov_object *loo = cl2lov(obj); - struct lov_layout_raid0 *r0 = lov_r0(loo); - struct lov_lock *lovlck; - - CDEBUG(D_INODE, "%p: lock/io FID " DFID "/" DFID ", lock/io clobj %p/%p\n", - loo, PFID(lu_object_fid(lov2lu(loo))), - PFID(lu_object_fid(&obj->co_lu)), - lov2cl(loo), obj); - - file_start = cl_offset(lov2cl(loo), lock->cll_descr.cld_start); - file_end = cl_offset(lov2cl(loo), lock->cll_descr.cld_end + 1) - 1; - - for (i = 0, nr = 0; i < r0->lo_nr; i++) { - /* - * XXX for wide striping smarter algorithm is desirable, - * breaking out of the loop, early. - */ - if (likely(r0->lo_sub[i]) && /* spare layout */ - lov_stripe_intersects(loo->lo_lsm, i, - file_start, file_end, &start, &end)) - nr++; - } - LASSERT(nr > 0); - lovlck = kvzalloc(offsetof(struct lov_lock, lls_sub[nr]), - GFP_NOFS); - if (!lovlck) - return ERR_PTR(-ENOMEM); - - lovlck->lls_nr = nr; - for (i = 0, nr = 0; i < r0->lo_nr; ++i) { - if (likely(r0->lo_sub[i]) && - lov_stripe_intersects(loo->lo_lsm, i, - file_start, file_end, &start, &end)) { - struct lov_lock_sub *lls = &lovlck->lls_sub[nr]; - struct cl_lock_descr *descr; - - descr = &lls->sub_lock.cll_descr; - - LASSERT(!descr->cld_obj); - descr->cld_obj = lovsub2cl(r0->lo_sub[i]); - descr->cld_start = cl_index(descr->cld_obj, start); - descr->cld_end = cl_index(descr->cld_obj, end); - descr->cld_mode = lock->cll_descr.cld_mode; - descr->cld_gid = lock->cll_descr.cld_gid; - descr->cld_enq_flags = lock->cll_descr.cld_enq_flags; - lls->sub_stripe = i; - - /* initialize sub lock */ - result = lov_sublock_init(env, lock, lls); - if (result < 0) - break; - - lls->sub_initialized = 1; - nr++; - } - } - LASSERT(ergo(result == 0, nr == lovlck->lls_nr)); - - if (result != 0) { - for (i = 0; i < nr; ++i) { - if (!lovlck->lls_sub[i].sub_initialized) - break; - - cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); - } - kvfree(lovlck); - lovlck = ERR_PTR(result); - } - - return lovlck; -} - -static void lov_lock_fini(const struct lu_env *env, - struct cl_lock_slice *slice) -{ - struct lov_lock *lovlck; - int i; - - lovlck = cl2lov_lock(slice); - for (i = 0; i < lovlck->lls_nr; ++i) { - LASSERT(!lovlck->lls_sub[i].sub_is_enqueued); - if (lovlck->lls_sub[i].sub_initialized) - cl_lock_fini(env, &lovlck->lls_sub[i].sub_lock); - } - kvfree(lovlck); -} - -/** - * Implementation of cl_lock_operations::clo_enqueue() for lov layer. This - * function is rather subtle, as it enqueues top-lock (i.e., advances top-lock - * state machine from CLS_QUEUING to CLS_ENQUEUED states) by juggling sub-lock - * state machines in the face of sub-locks sharing (by multiple top-locks), - * and concurrent sub-lock cancellations. - */ -static int lov_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *io, struct cl_sync_io *anchor) -{ - struct cl_lock *lock = slice->cls_lock; - struct lov_lock *lovlck = cl2lov_lock(slice); - int i; - int rc = 0; - - for (i = 0; i < lovlck->lls_nr; ++i) { - struct lov_lock_sub *lls = &lovlck->lls_sub[i]; - struct lov_sublock_env *subenv; - - subenv = lov_sublock_env_get(env, lock, lls); - if (IS_ERR(subenv)) { - rc = PTR_ERR(subenv); - break; - } - rc = cl_lock_enqueue(subenv->lse_env, subenv->lse_io, - &lls->sub_lock, anchor); - if (rc != 0) - break; - - lls->sub_is_enqueued = 1; - } - return rc; -} - -static void lov_lock_cancel(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct cl_lock *lock = slice->cls_lock; - struct lov_lock *lovlck = cl2lov_lock(slice); - int i; - - for (i = 0; i < lovlck->lls_nr; ++i) { - struct lov_lock_sub *lls = &lovlck->lls_sub[i]; - struct cl_lock *sublock = &lls->sub_lock; - struct lov_sublock_env *subenv; - - if (!lls->sub_is_enqueued) - continue; - - lls->sub_is_enqueued = 0; - subenv = lov_sublock_env_get(env, lock, lls); - if (!IS_ERR(subenv)) { - cl_lock_cancel(subenv->lse_env, sublock); - } else { - CL_LOCK_DEBUG(D_ERROR, env, slice->cls_lock, - "%s fails with %ld.\n", - __func__, PTR_ERR(subenv)); - } - } -} - -static int lov_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - int i; - - (*p)(env, cookie, "%d\n", lck->lls_nr); - for (i = 0; i < lck->lls_nr; ++i) { - struct lov_lock_sub *sub; - - sub = &lck->lls_sub[i]; - (*p)(env, cookie, " %d %x: ", i, sub->sub_is_enqueued); - cl_lock_print(env, cookie, p, &sub->sub_lock); - } - return 0; -} - -static const struct cl_lock_operations lov_lock_ops = { - .clo_fini = lov_lock_fini, - .clo_enqueue = lov_lock_enqueue, - .clo_cancel = lov_lock_cancel, - .clo_print = lov_lock_print -}; - -int lov_lock_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) -{ - struct lov_lock *lck; - int result = 0; - - lck = lov_lock_sub_init(env, obj, lock); - if (!IS_ERR(lck)) - cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_lock_ops); - else - result = PTR_ERR(lck); - return result; -} - -static void lov_empty_lock_fini(const struct lu_env *env, - struct cl_lock_slice *slice) -{ - struct lov_lock *lck = cl2lov_lock(slice); - - kmem_cache_free(lov_lock_kmem, lck); -} - -static int lov_empty_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t p, - const struct cl_lock_slice *slice) -{ - (*p)(env, cookie, "empty\n"); - return 0; -} - -/* XXX: more methods will be added later. */ -static const struct cl_lock_operations lov_empty_lock_ops = { - .clo_fini = lov_empty_lock_fini, - .clo_print = lov_empty_lock_print -}; - -int lov_lock_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) -{ - struct lov_lock *lck; - int result = -ENOMEM; - - lck = kmem_cache_zalloc(lov_lock_kmem, GFP_NOFS); - if (lck) { - cl_lock_slice_add(lock, &lck->lls_cl, obj, &lov_empty_lock_ops); - result = 0; - } - return result; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c deleted file mode 100644 index 006717cf7a41..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_merge.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include -#include "lov_internal.h" - -/** Merge the lock value block(&lvb) attributes and KMS from each of the - * stripes in a file into a single lvb. It is expected that the caller - * initializes the current atime, mtime, ctime to avoid regressing a more - * uptodate time on the local client. - */ -int lov_merge_lvb_kms(struct lov_stripe_md *lsm, - struct ost_lvb *lvb, __u64 *kms_place) -{ - __u64 size = 0; - __u64 kms = 0; - __u64 blocks = 0; - s64 current_mtime = lvb->lvb_mtime; - s64 current_atime = lvb->lvb_atime; - s64 current_ctime = lvb->lvb_ctime; - int i; - int rc = 0; - - assert_spin_locked(&lsm->lsm_lock); - LASSERT(lsm->lsm_lock_owner == current->pid); - - CDEBUG(D_INODE, "MDT ID " DOSTID " initial value: s=%llu m=%llu a=%llu c=%llu b=%llu\n", - POSTID(&lsm->lsm_oi), lvb->lvb_size, lvb->lvb_mtime, - lvb->lvb_atime, lvb->lvb_ctime, lvb->lvb_blocks); - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - u64 lov_size, tmpsize; - - if (OST_LVB_IS_ERR(loi->loi_lvb.lvb_blocks)) { - rc = OST_LVB_GET_ERR(loi->loi_lvb.lvb_blocks); - continue; - } - - tmpsize = loi->loi_kms; - lov_size = lov_stripe_size(lsm, tmpsize, i); - if (lov_size > kms) - kms = lov_size; - - if (loi->loi_lvb.lvb_size > tmpsize) - tmpsize = loi->loi_lvb.lvb_size; - - lov_size = lov_stripe_size(lsm, tmpsize, i); - if (lov_size > size) - size = lov_size; - /* merge blocks, mtime, atime */ - blocks += loi->loi_lvb.lvb_blocks; - if (loi->loi_lvb.lvb_mtime > current_mtime) - current_mtime = loi->loi_lvb.lvb_mtime; - if (loi->loi_lvb.lvb_atime > current_atime) - current_atime = loi->loi_lvb.lvb_atime; - if (loi->loi_lvb.lvb_ctime > current_ctime) - current_ctime = loi->loi_lvb.lvb_ctime; - - CDEBUG(D_INODE, "MDT ID " DOSTID " on OST[%u]: s=%llu m=%llu a=%llu c=%llu b=%llu\n", - POSTID(&lsm->lsm_oi), loi->loi_ost_idx, - loi->loi_lvb.lvb_size, loi->loi_lvb.lvb_mtime, - loi->loi_lvb.lvb_atime, loi->loi_lvb.lvb_ctime, - loi->loi_lvb.lvb_blocks); - } - - *kms_place = kms; - lvb->lvb_size = size; - lvb->lvb_blocks = blocks; - lvb->lvb_mtime = current_mtime; - lvb->lvb_atime = current_atime; - lvb->lvb_ctime = current_ctime; - return rc; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c deleted file mode 100644 index 344ff4b20168..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_obd.c +++ /dev/null @@ -1,1444 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lov/lov_obd.c - * - * Author: Phil Schwan - * Author: Peter Braam - * Author: Mike Shaver - * Author: Nathan Rutman - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "lov_internal.h" - -/* Keep a refcount of lov->tgt usage to prevent racing with addition/deletion. - * Any function that expects lov_tgts to remain stationary must take a ref. - */ -static void lov_getref(struct obd_device *obd) -{ - struct lov_obd *lov = &obd->u.lov; - - /* nobody gets through here until lov_putref is done */ - mutex_lock(&lov->lov_lock); - atomic_inc(&lov->lov_refcount); - mutex_unlock(&lov->lov_lock); -} - -static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); - -static void lov_putref(struct obd_device *obd) -{ - struct lov_obd *lov = &obd->u.lov; - - mutex_lock(&lov->lov_lock); - /* ok to dec to 0 more than once -- ltd_exp's will be null */ - if (atomic_dec_and_test(&lov->lov_refcount) && lov->lov_death_row) { - LIST_HEAD(kill); - int i; - struct lov_tgt_desc *tgt, *n; - - CDEBUG(D_CONFIG, "destroying %d lov targets\n", - lov->lov_death_row); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - tgt = lov->lov_tgts[i]; - - if (!tgt || !tgt->ltd_reap) - continue; - list_add(&tgt->ltd_kill, &kill); - /* XXX - right now there is a dependency on ld_tgt_count - * being the maximum tgt index for computing the - * mds_max_easize. So we can't shrink it. - */ - lov_ost_pool_remove(&lov->lov_packed, i); - lov->lov_tgts[i] = NULL; - lov->lov_death_row--; - } - mutex_unlock(&lov->lov_lock); - - list_for_each_entry_safe(tgt, n, &kill, ltd_kill) { - list_del(&tgt->ltd_kill); - /* Disconnect */ - __lov_del_obd(obd, tgt); - } - - if (lov->lov_tgts_kobj) - kobject_put(lov->lov_tgts_kobj); - - } else { - mutex_unlock(&lov->lov_lock); - } -} - -static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - enum obd_notify_event ev); -static int lov_notify(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - -int lov_connect_obd(struct obd_device *obd, __u32 index, int activate, - struct obd_connect_data *data) -{ - struct lov_obd *lov = &obd->u.lov; - struct obd_uuid *tgt_uuid; - struct obd_device *tgt_obd; - static struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; - struct obd_import *imp; - int rc; - - if (!lov->lov_tgts[index]) - return -EINVAL; - - tgt_uuid = &lov->lov_tgts[index]->ltd_uuid; - tgt_obd = lov->lov_tgts[index]->ltd_obd; - - if (!tgt_obd->obd_set_up) { - CERROR("Target %s not set up\n", obd_uuid2str(tgt_uuid)); - return -EINVAL; - } - - /* override the sp_me from lov */ - tgt_obd->u.cli.cl_sp_me = lov->lov_sp_me; - - if (data && (data->ocd_connect_flags & OBD_CONNECT_INDEX)) - data->ocd_index = index; - - /* - * Divine LOV knows that OBDs under it are OSCs. - */ - imp = tgt_obd->u.cli.cl_import; - - if (activate) { - tgt_obd->obd_no_recov = 0; - /* FIXME this is probably supposed to be - * ptlrpc_set_import_active. Horrible naming. - */ - ptlrpc_activate_import(imp); - } - - rc = obd_register_observer(tgt_obd, obd); - if (rc) { - CERROR("Target %s register_observer error %d\n", - obd_uuid2str(tgt_uuid), rc); - return rc; - } - - if (imp->imp_invalid) { - CDEBUG(D_CONFIG, "not connecting OSC %s; administratively disabled\n", - obd_uuid2str(tgt_uuid)); - return 0; - } - - rc = obd_connect(NULL, &lov->lov_tgts[index]->ltd_exp, tgt_obd, - &lov_osc_uuid, data, NULL); - if (rc || !lov->lov_tgts[index]->ltd_exp) { - CERROR("Target %s connect error %d\n", - obd_uuid2str(tgt_uuid), rc); - return -ENODEV; - } - - lov->lov_tgts[index]->ltd_reap = 0; - - CDEBUG(D_CONFIG, "Connected tgt idx %d %s (%s) %sactive\n", index, - obd_uuid2str(tgt_uuid), tgt_obd->obd_name, activate ? "":"in"); - - if (lov->lov_tgts_kobj) - /* Even if we failed, that's ok */ - rc = sysfs_create_link(lov->lov_tgts_kobj, &tgt_obd->obd_kobj, - tgt_obd->obd_name); - - return 0; -} - -static int lov_connect(const struct lu_env *env, - struct obd_export **exp, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *data, - void *localdata) -{ - struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgt; - struct lustre_handle conn; - int i, rc; - - CDEBUG(D_CONFIG, "connect #%d\n", lov->lov_connects); - - rc = class_connect(&conn, obd, cluuid); - if (rc) - return rc; - - *exp = class_conn2export(&conn); - - /* Why should there ever be more than 1 connect? */ - lov->lov_connects++; - LASSERT(lov->lov_connects == 1); - - memset(&lov->lov_ocd, 0, sizeof(lov->lov_ocd)); - if (data) - lov->lov_ocd = *data; - - obd_getref(obd); - - lov->lov_tgts_kobj = kobject_create_and_add("target_obds", - &obd->obd_kobj); - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - tgt = lov->lov_tgts[i]; - if (!tgt || obd_uuid_empty(&tgt->ltd_uuid)) - continue; - /* Flags will be lowest common denominator */ - rc = lov_connect_obd(obd, i, tgt->ltd_activate, &lov->lov_ocd); - if (rc) { - CERROR("%s: lov connect tgt %d failed: %d\n", - obd->obd_name, i, rc); - continue; - } - /* connect to administrative disabled ost */ - if (!lov->lov_tgts[i]->ltd_exp) - continue; - - rc = lov_notify(obd, lov->lov_tgts[i]->ltd_exp->exp_obd, - OBD_NOTIFY_CONNECT, (void *)&i); - if (rc) { - CERROR("%s error sending notify %d\n", - obd->obd_name, rc); - } - } - obd_putref(obd); - - return 0; -} - -static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) -{ - struct lov_obd *lov = &obd->u.lov; - struct obd_device *osc_obd; - int rc; - - osc_obd = class_exp2obd(tgt->ltd_exp); - CDEBUG(D_CONFIG, "%s: disconnecting target %s\n", - obd->obd_name, osc_obd ? osc_obd->obd_name : "NULL"); - - if (tgt->ltd_active) { - tgt->ltd_active = 0; - lov->desc.ld_active_tgt_count--; - tgt->ltd_exp->exp_obd->obd_inactive = 1; - } - - if (osc_obd) { - if (lov->lov_tgts_kobj) - sysfs_remove_link(lov->lov_tgts_kobj, - osc_obd->obd_name); - - /* Pass it on to our clients. - * XXX This should be an argument to disconnect, - * XXX not a back-door flag on the OBD. Ah well. - */ - osc_obd->obd_force = obd->obd_force; - osc_obd->obd_fail = obd->obd_fail; - osc_obd->obd_no_recov = obd->obd_no_recov; - } - - obd_register_observer(osc_obd, NULL); - - rc = obd_disconnect(tgt->ltd_exp); - if (rc) { - CERROR("Target %s disconnect error %d\n", - tgt->ltd_uuid.uuid, rc); - rc = 0; - } - - tgt->ltd_exp = NULL; - return 0; -} - -static int lov_disconnect(struct obd_export *exp) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lov_obd *lov = &obd->u.lov; - int i, rc; - - if (!lov->lov_tgts) - goto out; - - /* Only disconnect the underlying layers on the final disconnect. */ - lov->lov_connects--; - if (lov->lov_connects != 0) { - /* why should there be more than 1 connect? */ - CERROR("disconnect #%d\n", lov->lov_connects); - goto out; - } - - /* Let's hold another reference so lov_del_obd doesn't spin through - * putref every time - */ - obd_getref(obd); - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (lov->lov_tgts[i] && lov->lov_tgts[i]->ltd_exp) { - /* Disconnection is the last we know about an obd */ - lov_del_target(obd, i, NULL, lov->lov_tgts[i]->ltd_gen); - } - } - - obd_putref(obd); - -out: - rc = class_disconnect(exp); /* bz 9811 */ - return rc; -} - -/* Error codes: - * - * -EINVAL : UUID can't be found in the LOV's target list - * -ENOTCONN: The UUID is found, but the target connection is bad (!) - * -EBADF : The UUID is found, but the OBD is the wrong type (!) - * any >= 0 : is log target index - */ -static int lov_set_osc_active(struct obd_device *obd, struct obd_uuid *uuid, - enum obd_notify_event ev) -{ - struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgt; - int index, activate, active; - - CDEBUG(D_INFO, "Searching in lov %p for uuid %s event(%d)\n", - lov, uuid->uuid, ev); - - obd_getref(obd); - for (index = 0; index < lov->desc.ld_tgt_count; index++) { - tgt = lov->lov_tgts[index]; - if (!tgt) - continue; - /* - * LU-642, initially inactive OSC could miss the obd_connect, - * we make up for it here. - */ - if (ev == OBD_NOTIFY_ACTIVATE && !tgt->ltd_exp && - obd_uuid_equals(uuid, &tgt->ltd_uuid)) { - struct obd_uuid lov_osc_uuid = {"LOV_OSC_UUID"}; - - obd_connect(NULL, &tgt->ltd_exp, tgt->ltd_obd, - &lov_osc_uuid, &lov->lov_ocd, NULL); - } - if (!tgt->ltd_exp) - continue; - - CDEBUG(D_INFO, "lov idx %d is %s conn %#llx\n", - index, obd_uuid2str(&tgt->ltd_uuid), - tgt->ltd_exp->exp_handle.h_cookie); - if (obd_uuid_equals(uuid, &tgt->ltd_uuid)) - break; - } - - if (index == lov->desc.ld_tgt_count) { - index = -EINVAL; - goto out; - } - - if (ev == OBD_NOTIFY_DEACTIVATE || ev == OBD_NOTIFY_ACTIVATE) { - activate = (ev == OBD_NOTIFY_ACTIVATE) ? 1 : 0; - - if (lov->lov_tgts[index]->ltd_activate == activate) { - CDEBUG(D_INFO, "OSC %s already %sactivate!\n", - uuid->uuid, activate ? "" : "de"); - } else { - lov->lov_tgts[index]->ltd_activate = activate; - CDEBUG(D_CONFIG, "%sactivate OSC %s\n", - activate ? "" : "de", obd_uuid2str(uuid)); - } - - } else if (ev == OBD_NOTIFY_INACTIVE || ev == OBD_NOTIFY_ACTIVE) { - active = (ev == OBD_NOTIFY_ACTIVE) ? 1 : 0; - - if (lov->lov_tgts[index]->ltd_active == active) { - CDEBUG(D_INFO, "OSC %s already %sactive!\n", - uuid->uuid, active ? "" : "in"); - goto out; - } - CDEBUG(D_CONFIG, "Marking OSC %s %sactive\n", - obd_uuid2str(uuid), active ? "" : "in"); - - lov->lov_tgts[index]->ltd_active = active; - if (active) { - lov->desc.ld_active_tgt_count++; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 0; - } else { - lov->desc.ld_active_tgt_count--; - lov->lov_tgts[index]->ltd_exp->exp_obd->obd_inactive = 1; - } - } else { - CERROR("Unknown event(%d) for uuid %s", ev, uuid->uuid); - } - - out: - obd_putref(obd); - return index; -} - -static int lov_notify(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data) -{ - int rc = 0; - struct lov_obd *lov = &obd->u.lov; - - down_read(&lov->lov_notify_lock); - if (!lov->lov_connects) { - up_read(&lov->lov_notify_lock); - return rc; - } - - if (ev == OBD_NOTIFY_ACTIVE || ev == OBD_NOTIFY_INACTIVE || - ev == OBD_NOTIFY_ACTIVATE || ev == OBD_NOTIFY_DEACTIVATE) { - struct obd_uuid *uuid; - - LASSERT(watched); - - if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { - up_read(&lov->lov_notify_lock); - CERROR("unexpected notification of %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); - return -EINVAL; - } - uuid = &watched->u.cli.cl_target_uuid; - - /* Set OSC as active before notifying the observer, so the - * observer can use the OSC normally. - */ - rc = lov_set_osc_active(obd, uuid, ev); - if (rc < 0) { - up_read(&lov->lov_notify_lock); - CERROR("event(%d) of %s failed: %d\n", ev, - obd_uuid2str(uuid), rc); - return rc; - } - /* active event should be pass lov target index as data */ - data = &rc; - } - - /* Pass the notification up the chain. */ - if (watched) { - rc = obd_notify_observer(obd, watched, ev, data); - } else { - /* NULL watched means all osc's in the lov (only for syncs) */ - /* sync event should be send lov idx as data */ - struct lov_obd *lov = &obd->u.lov; - int i, is_sync; - - data = &i; - is_sync = (ev == OBD_NOTIFY_SYNC) || - (ev == OBD_NOTIFY_SYNC_NONBLOCK); - - obd_getref(obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; - - /* don't send sync event if target not - * connected/activated - */ - if (is_sync && !lov->lov_tgts[i]->ltd_active) - continue; - - rc = obd_notify_observer(obd, lov->lov_tgts[i]->ltd_obd, - ev, data); - if (rc) { - CERROR("%s: notify %s of %s failed %d\n", - obd->obd_name, - obd->obd_observer->obd_name, - lov->lov_tgts[i]->ltd_obd->obd_name, - rc); - } - } - obd_putref(obd); - } - - up_read(&lov->lov_notify_lock); - return rc; -} - -static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, - __u32 index, int gen, int active) -{ - struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgt; - struct obd_device *tgt_obd; - int rc; - - CDEBUG(D_CONFIG, "uuid:%s idx:%d gen:%d active:%d\n", - uuidp->uuid, index, gen, active); - - if (gen <= 0) { - CERROR("request to add OBD %s with invalid generation: %d\n", - uuidp->uuid, gen); - return -EINVAL; - } - - tgt_obd = class_find_client_obd(uuidp, LUSTRE_OSC_NAME, - &obd->obd_uuid); - if (!tgt_obd) - return -EINVAL; - - mutex_lock(&lov->lov_lock); - - if ((index < lov->lov_tgt_size) && lov->lov_tgts[index]) { - tgt = lov->lov_tgts[index]; - CERROR("UUID %s already assigned at LOV target index %d\n", - obd_uuid2str(&tgt->ltd_uuid), index); - mutex_unlock(&lov->lov_lock); - return -EEXIST; - } - - if (index >= lov->lov_tgt_size) { - /* We need to reallocate the lov target array. */ - struct lov_tgt_desc **newtgts, **old = NULL; - __u32 newsize, oldsize = 0; - - newsize = max_t(__u32, lov->lov_tgt_size, 2); - while (newsize < index + 1) - newsize <<= 1; - newtgts = kcalloc(newsize, sizeof(*newtgts), GFP_NOFS); - if (!newtgts) { - mutex_unlock(&lov->lov_lock); - return -ENOMEM; - } - - if (lov->lov_tgt_size) { - memcpy(newtgts, lov->lov_tgts, sizeof(*newtgts) * - lov->lov_tgt_size); - old = lov->lov_tgts; - oldsize = lov->lov_tgt_size; - } - - lov->lov_tgts = newtgts; - lov->lov_tgt_size = newsize; - smp_rmb(); - kfree(old); - - CDEBUG(D_CONFIG, "tgts: %p size: %d\n", - lov->lov_tgts, lov->lov_tgt_size); - } - - tgt = kzalloc(sizeof(*tgt), GFP_NOFS); - if (!tgt) { - mutex_unlock(&lov->lov_lock); - return -ENOMEM; - } - - rc = lov_ost_pool_add(&lov->lov_packed, index, lov->lov_tgt_size); - if (rc) { - mutex_unlock(&lov->lov_lock); - kfree(tgt); - return rc; - } - - tgt->ltd_uuid = *uuidp; - tgt->ltd_obd = tgt_obd; - /* XXX - add a sanity check on the generation number. */ - tgt->ltd_gen = gen; - tgt->ltd_index = index; - tgt->ltd_activate = active; - lov->lov_tgts[index] = tgt; - if (index >= lov->desc.ld_tgt_count) - lov->desc.ld_tgt_count = index + 1; - - mutex_unlock(&lov->lov_lock); - - CDEBUG(D_CONFIG, "idx=%d ltd_gen=%d ld_tgt_count=%d\n", - index, tgt->ltd_gen, lov->desc.ld_tgt_count); - - if (lov->lov_connects == 0) { - /* lov_connect hasn't been called yet. We'll do the - * lov_connect_obd on this target when that fn first runs, - * because we don't know the connect flags yet. - */ - return 0; - } - - obd_getref(obd); - - rc = lov_connect_obd(obd, index, active, &lov->lov_ocd); - if (rc) - goto out; - - /* connect to administrative disabled ost */ - if (!tgt->ltd_exp) { - rc = 0; - goto out; - } - - if (lov->lov_cache) { - rc = obd_set_info_async(NULL, tgt->ltd_exp, - sizeof(KEY_CACHE_SET), KEY_CACHE_SET, - sizeof(struct cl_client_cache), - lov->lov_cache, NULL); - if (rc < 0) - goto out; - } - - rc = lov_notify(obd, tgt->ltd_exp->exp_obd, - active ? OBD_NOTIFY_CONNECT : OBD_NOTIFY_INACTIVE, - (void *)&index); - -out: - if (rc) { - CERROR("add failed (%d), deleting %s\n", rc, - obd_uuid2str(&tgt->ltd_uuid)); - lov_del_target(obd, index, NULL, 0); - } - obd_putref(obd); - return rc; -} - -/* Schedule a target for deletion */ -int lov_del_target(struct obd_device *obd, __u32 index, - struct obd_uuid *uuidp, int gen) -{ - struct lov_obd *lov = &obd->u.lov; - int count = lov->desc.ld_tgt_count; - int rc = 0; - - if (index >= count) { - CERROR("LOV target index %d >= number of LOV OBDs %d.\n", - index, count); - return -EINVAL; - } - - /* to make sure there's no ongoing lov_notify() now */ - down_write(&lov->lov_notify_lock); - obd_getref(obd); - - if (!lov->lov_tgts[index]) { - CERROR("LOV target at index %d is not setup.\n", index); - rc = -EINVAL; - goto out; - } - - if (uuidp && !obd_uuid_equals(uuidp, &lov->lov_tgts[index]->ltd_uuid)) { - CERROR("LOV target UUID %s at index %d doesn't match %s.\n", - lov_uuid2str(lov, index), index, - obd_uuid2str(uuidp)); - rc = -EINVAL; - goto out; - } - - CDEBUG(D_CONFIG, "uuid: %s idx: %d gen: %d exp: %p active: %d\n", - lov_uuid2str(lov, index), index, - lov->lov_tgts[index]->ltd_gen, lov->lov_tgts[index]->ltd_exp, - lov->lov_tgts[index]->ltd_active); - - lov->lov_tgts[index]->ltd_reap = 1; - lov->lov_death_row++; - /* we really delete it from obd_putref */ -out: - obd_putref(obd); - up_write(&lov->lov_notify_lock); - - return rc; -} - -static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) -{ - struct obd_device *osc_obd; - - LASSERT(tgt); - LASSERT(tgt->ltd_reap); - - osc_obd = class_exp2obd(tgt->ltd_exp); - - CDEBUG(D_CONFIG, "Removing tgt %s : %s\n", - tgt->ltd_uuid.uuid, - osc_obd ? osc_obd->obd_name : ""); - - if (tgt->ltd_exp) - lov_disconnect_obd(obd, tgt); - - kfree(tgt); - - /* Manual cleanup - no cleanup logs to clean up the osc's. We must - * do it ourselves. And we can't do it from lov_cleanup, - * because we just lost our only reference to it. - */ - if (osc_obd) - class_manual_cleanup(osc_obd); -} - -void lov_fix_desc_stripe_size(__u64 *val) -{ - if (*val < LOV_MIN_STRIPE_SIZE) { - if (*val != 0) - LCONSOLE_INFO("Increasing default stripe size to minimum %u\n", - LOV_DESC_STRIPE_SIZE_DEFAULT); - *val = LOV_DESC_STRIPE_SIZE_DEFAULT; - } else if (*val & (LOV_MIN_STRIPE_SIZE - 1)) { - *val &= ~(LOV_MIN_STRIPE_SIZE - 1); - LCONSOLE_WARN("Changing default stripe size to %llu (a multiple of %u)\n", - *val, LOV_MIN_STRIPE_SIZE); - } -} - -void lov_fix_desc_stripe_count(__u32 *val) -{ - if (*val == 0) - *val = 1; -} - -void lov_fix_desc_pattern(__u32 *val) -{ - /* from lov_setstripe */ - if ((*val != 0) && (*val != LOV_PATTERN_RAID0)) { - LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val); - *val = 0; - } -} - -void lov_fix_desc_qos_maxage(__u32 *val) -{ - if (*val == 0) - *val = LOV_DESC_QOS_MAXAGE_DEFAULT; -} - -void lov_fix_desc(struct lov_desc *desc) -{ - lov_fix_desc_stripe_size(&desc->ld_default_stripe_size); - lov_fix_desc_stripe_count(&desc->ld_default_stripe_count); - lov_fix_desc_pattern(&desc->ld_pattern); - lov_fix_desc_qos_maxage(&desc->ld_qos_maxage); -} - -int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct lprocfs_static_vars lvars = { NULL }; - struct lov_desc *desc; - struct lov_obd *lov = &obd->u.lov; - int rc; - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { - CERROR("LOV setup requires a descriptor\n"); - return -EINVAL; - } - - desc = (struct lov_desc *)lustre_cfg_buf(lcfg, 1); - - if (sizeof(*desc) > LUSTRE_CFG_BUFLEN(lcfg, 1)) { - CERROR("descriptor size wrong: %d > %d\n", - (int)sizeof(*desc), LUSTRE_CFG_BUFLEN(lcfg, 1)); - return -EINVAL; - } - - if (desc->ld_magic != LOV_DESC_MAGIC) { - if (desc->ld_magic == __swab32(LOV_DESC_MAGIC)) { - CDEBUG(D_OTHER, "%s: Swabbing lov desc %p\n", - obd->obd_name, desc); - lustre_swab_lov_desc(desc); - } else { - CERROR("%s: Bad lov desc magic: %#x\n", - obd->obd_name, desc->ld_magic); - return -EINVAL; - } - } - - lov_fix_desc(desc); - - desc->ld_active_tgt_count = 0; - lov->desc = *desc; - lov->lov_tgt_size = 0; - - mutex_init(&lov->lov_lock); - atomic_set(&lov->lov_refcount, 0); - lov->lov_sp_me = LUSTRE_SP_CLI; - - init_rwsem(&lov->lov_notify_lock); - - INIT_LIST_HEAD(&lov->lov_pool_list); - lov->lov_pool_count = 0; - rc = lov_pool_hash_init(&lov->lov_pools_hash_body); - if (rc) - goto out; - rc = lov_ost_pool_init(&lov->lov_packed, 0); - if (rc) - goto out; - - lprocfs_lov_init_vars(&lvars); - lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars); - - debugfs_create_file("target_obd", 0444, obd->obd_debugfs_entry, obd, - &lov_proc_target_fops); - - lov->lov_pool_debugfs_entry = debugfs_create_dir("pools", - obd->obd_debugfs_entry); - return 0; - -out: - return rc; -} - -static int lov_cleanup(struct obd_device *obd) -{ - struct lov_obd *lov = &obd->u.lov; - struct pool_desc *pool, *tmp; - - list_for_each_entry_safe(pool, tmp, &lov->lov_pool_list, pool_list) { - /* free pool structs */ - CDEBUG(D_INFO, "delete pool %p\n", pool); - /* In the function below, .hs_keycmp resolves to - * pool_hashkey_keycmp() - */ - /* coverity[overrun-buffer-val] */ - lov_pool_del(obd, pool->pool_name); - } - lov_pool_hash_destroy(&lov->lov_pools_hash_body); - lov_ost_pool_free(&lov->lov_packed); - - lprocfs_obd_cleanup(obd); - if (lov->lov_tgts) { - int i; - - obd_getref(obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; - - /* Inactive targets may never have connected */ - if (lov->lov_tgts[i]->ltd_active || - atomic_read(&lov->lov_refcount)) - /* We should never get here - these - * should have been removed in the - * disconnect. - */ - CERROR("lov tgt %d not cleaned! deathrow=%d, lovrc=%d\n", - i, lov->lov_death_row, - atomic_read(&lov->lov_refcount)); - lov_del_target(obd, i, NULL, 0); - } - obd_putref(obd); - kfree(lov->lov_tgts); - lov->lov_tgt_size = 0; - } - - if (lov->lov_cache) { - cl_cache_decref(lov->lov_cache); - lov->lov_cache = NULL; - } - - return 0; -} - -int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg, - __u32 *indexp, int *genp) -{ - struct obd_uuid obd_uuid; - int cmd; - int rc = 0; - - switch (cmd = lcfg->lcfg_command) { - case LCFG_LOV_ADD_OBD: - case LCFG_LOV_ADD_INA: - case LCFG_LOV_DEL_OBD: { - __u32 index; - int gen; - /* lov_modify_tgts add 0:lov_mdsA 1:ost1_UUID 2:0 3:1 */ - if (LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(obd_uuid.uuid)) { - rc = -EINVAL; - goto out; - } - - obd_str2uuid(&obd_uuid, lustre_cfg_buf(lcfg, 1)); - - rc = kstrtoint(lustre_cfg_buf(lcfg, 2), 10, indexp); - if (rc < 0) - goto out; - rc = kstrtoint(lustre_cfg_buf(lcfg, 3), 10, genp); - if (rc < 0) - goto out; - index = *indexp; - gen = *genp; - if (cmd == LCFG_LOV_ADD_OBD) - rc = lov_add_target(obd, &obd_uuid, index, gen, 1); - else if (cmd == LCFG_LOV_ADD_INA) - rc = lov_add_target(obd, &obd_uuid, index, gen, 0); - else - rc = lov_del_target(obd, index, &obd_uuid, gen); - goto out; - } - case LCFG_PARAM: { - struct lprocfs_static_vars lvars = { NULL }; - struct lov_desc *desc = &obd->u.lov.desc; - - if (!desc) { - rc = -EINVAL; - goto out; - } - - lprocfs_lov_init_vars(&lvars); - - rc = class_process_proc_param(PARAM_LOV, lvars.obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; - goto out; - } - case LCFG_POOL_NEW: - case LCFG_POOL_ADD: - case LCFG_POOL_DEL: - case LCFG_POOL_REM: - goto out; - - default: { - CERROR("Unknown command: %d\n", lcfg->lcfg_command); - rc = -EINVAL; - goto out; - } - } -out: - return rc; -} - -static int -lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) -{ - struct lov_request_set *lovset = (struct lov_request_set *)data; - int err; - - if (rc) - atomic_set(&lovset->set_completes, 0); - - err = lov_fini_statfs_set(lovset); - return rc ? rc : err; -} - -static int lov_statfs_async(struct obd_export *exp, struct obd_info *oinfo, - __u64 max_age, struct ptlrpc_request_set *rqset) -{ - struct obd_device *obd = class_exp2obd(exp); - struct lov_request_set *set; - struct lov_request *req; - struct lov_obd *lov; - int rc = 0; - - LASSERT(oinfo->oi_osfs); - - lov = &obd->u.lov; - rc = lov_prep_statfs_set(obd, oinfo, &set); - if (rc) - return rc; - - list_for_each_entry(req, &set->set_list, rq_link) { - rc = obd_statfs_async(lov->lov_tgts[req->rq_idx]->ltd_exp, - &req->rq_oi, max_age, rqset); - if (rc) - break; - } - - if (rc || list_empty(&rqset->set_requests)) { - int err; - - if (rc) - atomic_set(&set->set_completes, 0); - err = lov_fini_statfs_set(set); - return rc ? rc : err; - } - - LASSERT(!rqset->set_interpret); - rqset->set_interpret = lov_statfs_interpret; - rqset->set_arg = (void *)set; - return 0; -} - -static int lov_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags) -{ - struct ptlrpc_request_set *set = NULL; - struct obd_info oinfo = { - .oi_osfs = osfs, - .oi_flags = flags, - }; - int rc = 0; - - /* for obdclass we forbid using obd_statfs_rqset, but prefer using async - * statfs requests - */ - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - - rc = lov_statfs_async(exp, &oinfo, max_age, set); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - - return rc; -} - -static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg) -{ - struct obd_device *obddev = class_exp2obd(exp); - struct lov_obd *lov = &obddev->u.lov; - int i = 0, rc = 0, count = lov->desc.ld_tgt_count; - struct obd_uuid *uuidp; - - switch (cmd) { - case IOC_OBD_STATFS: { - struct obd_ioctl_data *data = karg; - struct obd_device *osc_obd; - struct obd_statfs stat_buf = {0}; - __u32 index; - __u32 flags; - - memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); - if (index >= count) - return -ENODEV; - - if (!lov->lov_tgts[index]) - /* Try again with the next index */ - return -EAGAIN; - if (!lov->lov_tgts[index]->ltd_active) - return -ENODATA; - - osc_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); - if (!osc_obd) - return -EINVAL; - - /* copy UUID */ - if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(osc_obd), - min_t(unsigned long, data->ioc_plen2, - sizeof(struct obd_uuid)))) - return -EFAULT; - - memcpy(&flags, data->ioc_inlbuf1, sizeof(__u32)); - flags = flags & LL_STATFS_NODELAY ? OBD_STATFS_NODELAY : 0; - - /* got statfs data */ - rc = obd_statfs(NULL, lov->lov_tgts[index]->ltd_exp, &stat_buf, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - flags); - if (rc) - return rc; - if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min_t(unsigned long, data->ioc_plen1, - sizeof(stat_buf)))) - return -EFAULT; - break; - } - case OBD_IOC_LOV_GET_CONFIG: { - struct obd_ioctl_data *data; - struct lov_desc *desc; - char *buf = NULL; - __u32 *genp; - - len = 0; - if (obd_ioctl_getdata(&buf, &len, uarg)) - return -EINVAL; - - data = (struct obd_ioctl_data *)buf; - - if (sizeof(*desc) > data->ioc_inllen1) { - kvfree(buf); - return -EINVAL; - } - - if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) { - kvfree(buf); - return -EINVAL; - } - - if (sizeof(__u32) * count > data->ioc_inllen3) { - kvfree(buf); - return -EINVAL; - } - - desc = (struct lov_desc *)data->ioc_inlbuf1; - memcpy(desc, &lov->desc, sizeof(*desc)); - - uuidp = (struct obd_uuid *)data->ioc_inlbuf2; - genp = (__u32 *)data->ioc_inlbuf3; - /* the uuid will be empty for deleted OSTs */ - for (i = 0; i < count; i++, uuidp++, genp++) { - if (!lov->lov_tgts[i]) - continue; - *uuidp = lov->lov_tgts[i]->ltd_uuid; - *genp = lov->lov_tgts[i]->ltd_gen; - } - - if (copy_to_user(uarg, buf, len)) - rc = -EFAULT; - kvfree(buf); - break; - } - case OBD_IOC_QUOTACTL: { - struct if_quotactl *qctl = karg; - struct lov_tgt_desc *tgt = NULL; - struct obd_quotactl *oqctl; - - if (qctl->qc_valid == QC_OSTIDX) { - if (count <= qctl->qc_idx) - return -EINVAL; - - tgt = lov->lov_tgts[qctl->qc_idx]; - if (!tgt || !tgt->ltd_exp) - return -EINVAL; - } else if (qctl->qc_valid == QC_UUID) { - for (i = 0; i < count; i++) { - tgt = lov->lov_tgts[i]; - if (!tgt || - !obd_uuid_equals(&tgt->ltd_uuid, - &qctl->obd_uuid)) - continue; - - if (!tgt->ltd_exp) - return -EINVAL; - - break; - } - } else { - return -EINVAL; - } - - if (i >= count) - return -EAGAIN; - - LASSERT(tgt && tgt->ltd_exp); - oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); - if (!oqctl) - return -ENOMEM; - - QCTL_COPY(oqctl, qctl); - rc = obd_quotactl(tgt->ltd_exp, oqctl); - if (rc == 0) { - QCTL_COPY(qctl, oqctl); - qctl->qc_valid = QC_OSTIDX; - qctl->obd_uuid = tgt->ltd_uuid; - } - kfree(oqctl); - break; - } - default: { - int set = 0; - - if (count == 0) - return -ENOTTY; - - for (i = 0; i < count; i++) { - int err; - struct obd_device *osc_obd; - - /* OST was disconnected */ - if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_exp) - continue; - - /* ll_umount_begin() sets force flag but for lov, not - * osc. Let's pass it through - */ - osc_obd = class_exp2obd(lov->lov_tgts[i]->ltd_exp); - osc_obd->obd_force = obddev->obd_force; - err = obd_iocontrol(cmd, lov->lov_tgts[i]->ltd_exp, - len, karg, uarg); - if (err) { - if (lov->lov_tgts[i]->ltd_active) { - CDEBUG(err == -ENOTTY ? - D_IOCTL : D_WARNING, - "iocontrol OSC %s on OST idx %d cmd %x: err = %d\n", - lov_uuid2str(lov, i), - i, cmd, err); - if (!rc) - rc = err; - } - } else { - set = 1; - } - } - if (!set && !rc) - rc = -EIO; - } - } - - return rc; -} - -static int lov_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val) -{ - struct obd_device *obddev = class_exp2obd(exp); - struct lov_obd *lov = &obddev->u.lov; - struct lov_desc *ld = &lov->desc; - int rc = 0; - - if (!vallen || !val) - return -EFAULT; - - obd_getref(obddev); - - if (KEY_IS(KEY_MAX_EASIZE)) { - u32 max_stripe_count = min_t(u32, ld->ld_active_tgt_count, - LOV_MAX_STRIPE_COUNT); - - *((u32 *)val) = lov_mds_md_size(max_stripe_count, LOV_MAGIC_V3); - } else if (KEY_IS(KEY_DEFAULT_EASIZE)) { - u32 def_stripe_count = min_t(u32, ld->ld_default_stripe_count, - LOV_MAX_STRIPE_COUNT); - - *((u32 *)val) = lov_mds_md_size(def_stripe_count, LOV_MAGIC_V3); - } else if (KEY_IS(KEY_TGT_COUNT)) { - *((int *)val) = lov->desc.ld_tgt_count; - } else { - rc = -EINVAL; - } - - obd_putref(obddev); - return rc; -} - -static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, u32 vallen, - void *val, struct ptlrpc_request_set *set) -{ - struct obd_device *obddev = class_exp2obd(exp); - struct lov_obd *lov = &obddev->u.lov; - u32 count; - int i, rc = 0, err; - struct lov_tgt_desc *tgt; - int do_inactive = 0, no_set = 0; - - if (!set) { - no_set = 1; - set = ptlrpc_prep_set(); - if (!set) - return -ENOMEM; - } - - obd_getref(obddev); - count = lov->desc.ld_tgt_count; - - if (KEY_IS(KEY_CHECKSUM)) { - do_inactive = 1; - } else if (KEY_IS(KEY_CACHE_SET)) { - LASSERT(!lov->lov_cache); - lov->lov_cache = val; - do_inactive = 1; - cl_cache_incref(lov->lov_cache); - } - - for (i = 0; i < count; i++) { - tgt = lov->lov_tgts[i]; - - /* OST was disconnected */ - if (!tgt || !tgt->ltd_exp) - continue; - - /* OST is inactive and we don't want inactive OSCs */ - if (!tgt->ltd_active && !do_inactive) - continue; - - err = obd_set_info_async(env, tgt->ltd_exp, keylen, key, - vallen, val, set); - if (!rc) - rc = err; - } - - obd_putref(obddev); - if (no_set) { - err = ptlrpc_set_wait(set); - if (!rc) - rc = err; - ptlrpc_set_destroy(set); - } - return rc; -} - -void lov_stripe_lock(struct lov_stripe_md *md) - __acquires(&md->lsm_lock) -{ - LASSERT(md->lsm_lock_owner != current->pid); - spin_lock(&md->lsm_lock); - LASSERT(md->lsm_lock_owner == 0); - md->lsm_lock_owner = current->pid; -} - -void lov_stripe_unlock(struct lov_stripe_md *md) - __releases(&md->lsm_lock) -{ - LASSERT(md->lsm_lock_owner == current->pid); - md->lsm_lock_owner = 0; - spin_unlock(&md->lsm_lock); -} - -static int lov_quotactl(struct obd_device *obd, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct lov_obd *lov = &obd->u.lov; - struct lov_tgt_desc *tgt; - __u64 curspace = 0; - __u64 bhardlimit = 0; - int i, rc = 0; - - if (oqctl->qc_cmd != Q_GETOQUOTA && - oqctl->qc_cmd != LUSTRE_Q_SETQUOTA) { - CERROR("bad quota opc %x for lov obd\n", oqctl->qc_cmd); - return -EFAULT; - } - - /* for lov tgt */ - obd_getref(obd); - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int err; - - tgt = lov->lov_tgts[i]; - - if (!tgt) - continue; - - if (!tgt->ltd_active || tgt->ltd_reap) { - if (oqctl->qc_cmd == Q_GETOQUOTA && - lov->lov_tgts[i]->ltd_activate) { - rc = -EREMOTEIO; - CERROR("ost %d is inactive\n", i); - } else { - CDEBUG(D_HA, "ost %d is inactive\n", i); - } - continue; - } - - err = obd_quotactl(tgt->ltd_exp, oqctl); - if (err) { - if (tgt->ltd_active && !rc) - rc = err; - continue; - } - - if (oqctl->qc_cmd == Q_GETOQUOTA) { - curspace += oqctl->qc_dqblk.dqb_curspace; - bhardlimit += oqctl->qc_dqblk.dqb_bhardlimit; - } - } - obd_putref(obd); - - if (oqctl->qc_cmd == Q_GETOQUOTA) { - oqctl->qc_dqblk.dqb_curspace = curspace; - oqctl->qc_dqblk.dqb_bhardlimit = bhardlimit; - } - return rc; -} - -static struct obd_ops lov_obd_ops = { - .owner = THIS_MODULE, - .setup = lov_setup, - .cleanup = lov_cleanup, - /*.process_config = lov_process_config,*/ - .connect = lov_connect, - .disconnect = lov_disconnect, - .statfs = lov_statfs, - .statfs_async = lov_statfs_async, - .iocontrol = lov_iocontrol, - .get_info = lov_get_info, - .set_info_async = lov_set_info_async, - .notify = lov_notify, - .pool_new = lov_pool_new, - .pool_rem = lov_pool_remove, - .pool_add = lov_pool_add, - .pool_del = lov_pool_del, - .getref = lov_getref, - .putref = lov_putref, - .quotactl = lov_quotactl, -}; - -struct kmem_cache *lov_oinfo_slab; - -static int __init lov_init(void) -{ - struct lprocfs_static_vars lvars = { NULL }; - int rc; - - /* print an address of _any_ initialized kernel symbol from this - * module, to allow debugging with gdb that doesn't support data - * symbols from modules. - */ - CDEBUG(D_INFO, "Lustre LOV module (%p).\n", &lov_caches); - - rc = libcfs_setup(); - if (rc) - return rc; - - rc = lu_kmem_init(lov_caches); - if (rc) - return rc; - - lov_oinfo_slab = kmem_cache_create("lov_oinfo", - sizeof(struct lov_oinfo), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!lov_oinfo_slab) { - lu_kmem_fini(lov_caches); - return -ENOMEM; - } - lprocfs_lov_init_vars(&lvars); - - rc = class_register_type(&lov_obd_ops, NULL, - LUSTRE_LOV_NAME, &lov_device_type); - - if (rc) { - kmem_cache_destroy(lov_oinfo_slab); - lu_kmem_fini(lov_caches); - } - - return rc; -} - -static void /*__exit*/ lov_exit(void) -{ - class_unregister_type(LUSTRE_LOV_NAME); - kmem_cache_destroy(lov_oinfo_slab); - - lu_kmem_fini(lov_caches); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Logical Object Volume"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(LUSTRE_VERSION_STRING); - -module_init(lov_init); -module_exit(lov_exit); diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c deleted file mode 100644 index adc90f310fd7..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_object.c +++ /dev/null @@ -1,1625 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_object for LOV layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -static inline struct lov_device *lov_object_dev(struct lov_object *obj) -{ - return lu2lov_dev(obj->lo_cl.co_lu.lo_dev); -} - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Layout operations. - * - */ - -struct lov_layout_operations { - int (*llo_init)(const struct lu_env *env, struct lov_device *dev, - struct lov_object *lov, struct lov_stripe_md *lsm, - const struct cl_object_conf *conf, - union lov_layout_state *state); - int (*llo_delete)(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state); - void (*llo_fini)(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state); - void (*llo_install)(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state); - int (*llo_print)(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o); - int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); - int (*llo_lock_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); - int (*llo_io_init)(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); - int (*llo_getattr)(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr); -}; - -static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov); - -static void lov_lsm_put(struct lov_stripe_md *lsm) -{ - if (lsm) - lov_free_memmd(&lsm); -} - -/***************************************************************************** - * - * Lov object layout operations. - * - */ - -static void lov_install_empty(const struct lu_env *env, - struct lov_object *lov, - union lov_layout_state *state) -{ - /* - * File without objects. - */ -} - -static int lov_init_empty(const struct lu_env *env, struct lov_device *dev, - struct lov_object *lov, struct lov_stripe_md *lsm, - const struct cl_object_conf *conf, - union lov_layout_state *state) -{ - return 0; -} - -static void lov_install_raid0(const struct lu_env *env, - struct lov_object *lov, - union lov_layout_state *state) -{ -} - -static struct cl_object *lov_sub_find(const struct lu_env *env, - struct cl_device *dev, - const struct lu_fid *fid, - const struct cl_object_conf *conf) -{ - struct lu_object *o; - - o = lu_object_find_at(env, cl2lu_dev(dev), fid, &conf->coc_lu); - LASSERT(ergo(!IS_ERR(o), o->lo_dev->ld_type == &lovsub_device_type)); - return lu2cl(o); -} - -static int lov_init_sub(const struct lu_env *env, struct lov_object *lov, - struct cl_object *stripe, struct lov_layout_raid0 *r0, - int idx) -{ - struct cl_object_header *hdr; - struct cl_object_header *subhdr; - struct cl_object_header *parent; - struct lov_oinfo *oinfo; - int result; - - if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) { - /* For sanity:test_206. - * Do not leave the object in cache to avoid accessing - * freed memory. This is because osc_object is referring to - * lov_oinfo of lsm_stripe_data which will be freed due to - * this failure. - */ - cl_object_kill(env, stripe); - cl_object_put(env, stripe); - return -EIO; - } - - hdr = cl_object_header(lov2cl(lov)); - subhdr = cl_object_header(stripe); - - oinfo = lov->lo_lsm->lsm_oinfo[idx]; - CDEBUG(D_INODE, DFID "@%p[%d] -> " DFID "@%p: ostid: " DOSTID " idx: %d gen: %d\n", - PFID(&subhdr->coh_lu.loh_fid), subhdr, idx, - PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi), - oinfo->loi_ost_idx, oinfo->loi_ost_gen); - - /* reuse ->coh_attr_guard to protect coh_parent change */ - spin_lock(&subhdr->coh_attr_guard); - parent = subhdr->coh_parent; - if (!parent) { - subhdr->coh_parent = hdr; - spin_unlock(&subhdr->coh_attr_guard); - subhdr->coh_nesting = hdr->coh_nesting + 1; - lu_object_ref_add(&stripe->co_lu, "lov-parent", lov); - r0->lo_sub[idx] = cl2lovsub(stripe); - r0->lo_sub[idx]->lso_super = lov; - r0->lo_sub[idx]->lso_index = idx; - result = 0; - } else { - struct lu_object *old_obj; - struct lov_object *old_lov; - unsigned int mask = D_INODE; - - spin_unlock(&subhdr->coh_attr_guard); - old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type); - LASSERT(old_obj); - old_lov = cl2lov(lu2cl(old_obj)); - if (old_lov->lo_layout_invalid) { - /* the object's layout has already changed but isn't - * refreshed - */ - lu_object_unhash(env, &stripe->co_lu); - result = -EAGAIN; - } else { - mask = D_ERROR; - result = -EIO; - } - - LU_OBJECT_DEBUG(mask, env, &stripe->co_lu, - "stripe %d is already owned.", idx); - LU_OBJECT_DEBUG(mask, env, old_obj, "owned."); - LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n"); - cl_object_put(env, stripe); - } - return result; -} - -static int lov_page_slice_fixup(struct lov_object *lov, - struct cl_object *stripe) -{ - struct cl_object_header *hdr = cl_object_header(&lov->lo_cl); - struct cl_object *o; - - if (!stripe) - return hdr->coh_page_bufsize - lov->lo_cl.co_slice_off - - cfs_size_round(sizeof(struct lov_page)); - - cl_object_for_each(o, stripe) - o->co_slice_off += hdr->coh_page_bufsize; - - return cl_object_header(stripe)->coh_page_bufsize; -} - -static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev, - struct lov_object *lov, struct lov_stripe_md *lsm, - const struct cl_object_conf *conf, - union lov_layout_state *state) -{ - int result; - int i; - - struct cl_object *stripe; - struct lov_thread_info *lti = lov_env_info(env); - struct cl_object_conf *subconf = <i->lti_stripe_conf; - struct lu_fid *ofid = <i->lti_fid; - struct lov_layout_raid0 *r0 = &state->raid0; - - if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) { - dump_lsm(D_ERROR, lsm); - LASSERTF(0, "magic mismatch, expected %d/%d, actual %d.\n", - LOV_MAGIC_V1, LOV_MAGIC_V3, lsm->lsm_magic); - } - - LASSERT(!lov->lo_lsm); - lov->lo_lsm = lsm_addref(lsm); - lov->lo_layout_invalid = true; - r0->lo_nr = lsm->lsm_stripe_count; - LASSERT(r0->lo_nr <= lov_targets_nr(dev)); - - r0->lo_sub = kvzalloc(r0->lo_nr * sizeof(r0->lo_sub[0]), - GFP_NOFS); - if (r0->lo_sub) { - int psz = 0; - - result = 0; - subconf->coc_inode = conf->coc_inode; - spin_lock_init(&r0->lo_sub_lock); - /* - * Create stripe cl_objects. - */ - for (i = 0; i < r0->lo_nr && result == 0; ++i) { - struct cl_device *subdev; - struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; - int ost_idx = oinfo->loi_ost_idx; - - if (lov_oinfo_is_dummy(oinfo)) - continue; - - result = ostid_to_fid(ofid, &oinfo->loi_oi, - oinfo->loi_ost_idx); - if (result != 0) - goto out; - - if (!dev->ld_target[ost_idx]) { - CERROR("%s: OST %04x is not initialized\n", - lov2obd(dev->ld_lov)->obd_name, ost_idx); - result = -EIO; - goto out; - } - - subdev = lovsub2cl_dev(dev->ld_target[ost_idx]); - subconf->u.coc_oinfo = oinfo; - LASSERTF(subdev, "not init ost %d\n", ost_idx); - /* In the function below, .hs_keycmp resolves to - * lu_obj_hop_keycmp() - */ - /* coverity[overrun-buffer-val] */ - stripe = lov_sub_find(env, subdev, ofid, subconf); - if (!IS_ERR(stripe)) { - result = lov_init_sub(env, lov, stripe, r0, i); - if (result == -EAGAIN) { /* try again */ - --i; - result = 0; - continue; - } - } else { - result = PTR_ERR(stripe); - } - - if (result == 0) { - int sz = lov_page_slice_fixup(lov, stripe); - - LASSERT(ergo(psz > 0, psz == sz)); - psz = sz; - } - } - if (result == 0) - cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz; - } else { - result = -ENOMEM; - } -out: - return result; -} - -static int lov_init_released(const struct lu_env *env, struct lov_device *dev, - struct lov_object *lov, struct lov_stripe_md *lsm, - const struct cl_object_conf *conf, - union lov_layout_state *state) -{ - LASSERT(lsm); - LASSERT(lsm_is_released(lsm)); - LASSERT(!lov->lo_lsm); - - lov->lo_lsm = lsm_addref(lsm); - return 0; -} - -static struct cl_object *lov_find_subobj(const struct lu_env *env, - struct lov_object *lov, - struct lov_stripe_md *lsm, - int stripe_idx) -{ - struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev); - struct lov_oinfo *oinfo = lsm->lsm_oinfo[stripe_idx]; - struct lov_thread_info *lti = lov_env_info(env); - struct lu_fid *ofid = <i->lti_fid; - struct cl_device *subdev; - struct cl_object *result; - int ost_idx; - int rc; - - if (lov->lo_type != LLT_RAID0) { - result = NULL; - goto out; - } - - ost_idx = oinfo->loi_ost_idx; - rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx); - if (rc) { - result = NULL; - goto out; - } - - subdev = lovsub2cl_dev(dev->ld_target[ost_idx]); - result = lov_sub_find(env, subdev, ofid, NULL); -out: - if (!result) - result = ERR_PTR(-EINVAL); - return result; -} - -static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) -{ - LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED); - - lov_layout_wait(env, lov); - return 0; -} - -static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov, - struct lovsub_object *los, int idx) -{ - struct cl_object *sub; - struct lov_layout_raid0 *r0; - struct lu_site *site; - wait_queue_head_t *wq; - wait_queue_entry_t *waiter; - - r0 = &lov->u.raid0; - LASSERT(r0->lo_sub[idx] == los); - - sub = lovsub2cl(los); - site = sub->co_lu.lo_dev->ld_site; - wq = lu_site_wq_from_fid(site, &sub->co_lu.lo_header->loh_fid); - - cl_object_kill(env, sub); - /* release a reference to the sub-object and ... */ - lu_object_ref_del(&sub->co_lu, "lov-parent", lov); - cl_object_put(env, sub); - - /* ... wait until it is actually destroyed---sub-object clears its - * ->lo_sub[] slot in lovsub_object_fini() - */ - if (r0->lo_sub[idx] == los) { - waiter = &lov_env_info(env)->lti_waiter; - init_waitqueue_entry(waiter, current); - add_wait_queue(wq, waiter); - set_current_state(TASK_UNINTERRUPTIBLE); - while (1) { - /* this wait-queue is signaled at the end of - * lu_object_free(). - */ - set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock(&r0->lo_sub_lock); - if (r0->lo_sub[idx] == los) { - spin_unlock(&r0->lo_sub_lock); - schedule(); - } else { - spin_unlock(&r0->lo_sub_lock); - set_current_state(TASK_RUNNING); - break; - } - } - remove_wait_queue(wq, waiter); - } - LASSERT(!r0->lo_sub[idx]); -} - -static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) -{ - struct lov_layout_raid0 *r0 = &state->raid0; - struct lov_stripe_md *lsm = lov->lo_lsm; - int i; - - dump_lsm(D_INODE, lsm); - - lov_layout_wait(env, lov); - if (r0->lo_sub) { - for (i = 0; i < r0->lo_nr; ++i) { - struct lovsub_object *los = r0->lo_sub[i]; - - if (los) { - cl_object_prune(env, &los->lso_cl); - /* - * If top-level object is to be evicted from - * the cache, so are its sub-objects. - */ - lov_subobject_kill(env, lov, los, i); - } - } - } - return 0; -} - -static void lov_fini_empty(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) -{ - LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED); -} - -static void lov_fini_raid0(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) -{ - struct lov_layout_raid0 *r0 = &state->raid0; - - if (r0->lo_sub) { - kvfree(r0->lo_sub); - r0->lo_sub = NULL; - } - - dump_lsm(D_INODE, lov->lo_lsm); - lov_free_memmd(&lov->lo_lsm); -} - -static void lov_fini_released(const struct lu_env *env, struct lov_object *lov, - union lov_layout_state *state) -{ - dump_lsm(D_INODE, lov->lo_lsm); - lov_free_memmd(&lov->lo_lsm); -} - -static int lov_print_empty(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - (*p)(env, cookie, "empty %d\n", lu2lov(o)->lo_layout_invalid); - return 0; -} - -static int lov_print_raid0(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - struct lov_object *lov = lu2lov(o); - struct lov_layout_raid0 *r0 = lov_r0(lov); - struct lov_stripe_md *lsm = lov->lo_lsm; - int i; - - (*p)(env, cookie, "stripes: %d, %s, lsm{%p 0x%08X %d %u %u}:\n", - r0->lo_nr, lov->lo_layout_invalid ? "invalid" : "valid", lsm, - lsm->lsm_magic, atomic_read(&lsm->lsm_refc), - lsm->lsm_stripe_count, lsm->lsm_layout_gen); - for (i = 0; i < r0->lo_nr; ++i) { - struct lu_object *sub; - - if (r0->lo_sub[i]) { - sub = lovsub2lu(r0->lo_sub[i]); - lu_object_print(env, cookie, p, sub); - } else { - (*p)(env, cookie, "sub %d absent\n", i); - } - } - return 0; -} - -static int lov_print_released(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - struct lov_object *lov = lu2lov(o); - struct lov_stripe_md *lsm = lov->lo_lsm; - - (*p)(env, cookie, - "released: %s, lsm{%p 0x%08X %d %u %u}:\n", - lov->lo_layout_invalid ? "invalid" : "valid", lsm, - lsm->lsm_magic, atomic_read(&lsm->lsm_refc), - lsm->lsm_stripe_count, lsm->lsm_layout_gen); - return 0; -} - -/** - * Implements cl_object_operations::coo_attr_get() method for an object - * without stripes (LLT_EMPTY layout type). - * - * The only attributes this layer is authoritative in this case is - * cl_attr::cat_blocks---it's 0. - */ -static int lov_attr_get_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - attr->cat_blocks = 0; - return 0; -} - -static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_layout_raid0 *r0 = lov_r0(lov); - struct cl_attr *lov_attr = &r0->lo_attr; - int result = 0; - - /* this is called w/o holding type guard mutex, so it must be inside - * an on going IO otherwise lsm may be replaced. - * LU-2117: it turns out there exists one exception. For mmaped files, - * the lock of those files may be requested in the other file's IO - * context, and this function is called in ccc_lock_state(), it will - * hit this assertion. - * Anyway, it's still okay to call attr_get w/o type guard as layout - * can't go if locks exist. - */ - /* LASSERT(atomic_read(&lsm->lsm_refc) > 1); */ - - if (!r0->lo_attr_valid) { - struct lov_stripe_md *lsm = lov->lo_lsm; - struct ost_lvb *lvb = &lov_env_info(env)->lti_lvb; - __u64 kms = 0; - - memset(lvb, 0, sizeof(*lvb)); - /* XXX: timestamps can be negative by sanity:test_39m, - * how can it be? - */ - lvb->lvb_atime = LLONG_MIN; - lvb->lvb_ctime = LLONG_MIN; - lvb->lvb_mtime = LLONG_MIN; - - /* - * XXX that should be replaced with a loop over sub-objects, - * doing cl_object_attr_get() on them. But for now, let's - * reuse old lov code. - */ - - /* - * XXX take lsm spin-lock to keep lov_merge_lvb_kms() - * happy. It's not needed, because new code uses - * ->coh_attr_guard spin-lock to protect consistency of - * sub-object attributes. - */ - lov_stripe_lock(lsm); - result = lov_merge_lvb_kms(lsm, lvb, &kms); - lov_stripe_unlock(lsm); - if (result == 0) { - cl_lvb2attr(lov_attr, lvb); - lov_attr->cat_kms = kms; - r0->lo_attr_valid = 1; - } - } - if (result == 0) { /* merge results */ - attr->cat_blocks = lov_attr->cat_blocks; - attr->cat_size = lov_attr->cat_size; - attr->cat_kms = lov_attr->cat_kms; - if (attr->cat_atime < lov_attr->cat_atime) - attr->cat_atime = lov_attr->cat_atime; - if (attr->cat_ctime < lov_attr->cat_ctime) - attr->cat_ctime = lov_attr->cat_ctime; - if (attr->cat_mtime < lov_attr->cat_mtime) - attr->cat_mtime = lov_attr->cat_mtime; - } - return result; -} - -static const struct lov_layout_operations lov_dispatch[] = { - [LLT_EMPTY] = { - .llo_init = lov_init_empty, - .llo_delete = lov_delete_empty, - .llo_fini = lov_fini_empty, - .llo_install = lov_install_empty, - .llo_print = lov_print_empty, - .llo_page_init = lov_page_init_empty, - .llo_lock_init = lov_lock_init_empty, - .llo_io_init = lov_io_init_empty, - .llo_getattr = lov_attr_get_empty - }, - [LLT_RAID0] = { - .llo_init = lov_init_raid0, - .llo_delete = lov_delete_raid0, - .llo_fini = lov_fini_raid0, - .llo_install = lov_install_raid0, - .llo_print = lov_print_raid0, - .llo_page_init = lov_page_init_raid0, - .llo_lock_init = lov_lock_init_raid0, - .llo_io_init = lov_io_init_raid0, - .llo_getattr = lov_attr_get_raid0 - }, - [LLT_RELEASED] = { - .llo_init = lov_init_released, - .llo_delete = lov_delete_empty, - .llo_fini = lov_fini_released, - .llo_install = lov_install_empty, - .llo_print = lov_print_released, - .llo_page_init = lov_page_init_empty, - .llo_lock_init = lov_lock_init_empty, - .llo_io_init = lov_io_init_released, - .llo_getattr = lov_attr_get_empty - } -}; - -/** - * Performs a double-dispatch based on the layout type of an object. - */ -#define LOV_2DISPATCH_NOLOCK(obj, op, ...) \ -({ \ - struct lov_object *__obj = (obj); \ - enum lov_layout_type __llt; \ - \ - __llt = __obj->lo_type; \ - LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \ - lov_dispatch[__llt].op(__VA_ARGS__); \ -}) - -/** - * Return lov_layout_type associated with a given lsm - */ -static enum lov_layout_type lov_type(struct lov_stripe_md *lsm) -{ - if (!lsm) - return LLT_EMPTY; - if (lsm_is_released(lsm)) - return LLT_RELEASED; - return LLT_RAID0; -} - -static inline void lov_conf_freeze(struct lov_object *lov) -{ - CDEBUG(D_INODE, "To take share lov(%p) owner %p/%p\n", - lov, lov->lo_owner, current); - if (lov->lo_owner != current) - down_read(&lov->lo_type_guard); -} - -static inline void lov_conf_thaw(struct lov_object *lov) -{ - CDEBUG(D_INODE, "To release share lov(%p) owner %p/%p\n", - lov, lov->lo_owner, current); - if (lov->lo_owner != current) - up_read(&lov->lo_type_guard); -} - -#define LOV_2DISPATCH_MAYLOCK(obj, op, lock, ...) \ -({ \ - struct lov_object *__obj = (obj); \ - int __lock = !!(lock); \ - typeof(lov_dispatch[0].op(__VA_ARGS__)) __result; \ - \ - if (__lock) \ - lov_conf_freeze(__obj); \ - __result = LOV_2DISPATCH_NOLOCK(obj, op, __VA_ARGS__); \ - if (__lock) \ - lov_conf_thaw(__obj); \ - __result; \ -}) - -/** - * Performs a locked double-dispatch based on the layout type of an object. - */ -#define LOV_2DISPATCH(obj, op, ...) \ - LOV_2DISPATCH_MAYLOCK(obj, op, 1, __VA_ARGS__) - -#define LOV_2DISPATCH_VOID(obj, op, ...) \ -do { \ - struct lov_object *__obj = (obj); \ - enum lov_layout_type __llt; \ - \ - lov_conf_freeze(__obj); \ - __llt = __obj->lo_type; \ - LASSERT(__llt < ARRAY_SIZE(lov_dispatch)); \ - lov_dispatch[__llt].op(__VA_ARGS__); \ - lov_conf_thaw(__obj); \ -} while (0) - -static void lov_conf_lock(struct lov_object *lov) -{ - LASSERT(lov->lo_owner != current); - down_write(&lov->lo_type_guard); - LASSERT(!lov->lo_owner); - lov->lo_owner = current; - CDEBUG(D_INODE, "Took exclusive lov(%p) owner %p\n", - lov, lov->lo_owner); -} - -static void lov_conf_unlock(struct lov_object *lov) -{ - CDEBUG(D_INODE, "To release exclusive lov(%p) owner %p\n", - lov, lov->lo_owner); - lov->lo_owner = NULL; - up_write(&lov->lo_type_guard); -} - -static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov) -{ - while (atomic_read(&lov->lo_active_ios) > 0) { - CDEBUG(D_INODE, "file:" DFID " wait for active IO, now: %d.\n", - PFID(lu_object_fid(lov2lu(lov))), - atomic_read(&lov->lo_active_ios)); - - wait_event_idle(lov->lo_waitq, - atomic_read(&lov->lo_active_ios) == 0); - } - return 0; -} - -static int lov_layout_change(const struct lu_env *unused, - struct lov_object *lov, struct lov_stripe_md *lsm, - const struct cl_object_conf *conf) -{ - struct lov_device *lov_dev = lov_object_dev(lov); - enum lov_layout_type llt = lov_type(lsm); - union lov_layout_state *state = &lov->u; - const struct lov_layout_operations *old_ops; - const struct lov_layout_operations *new_ops; - struct lu_env *env; - u16 refcheck; - int rc; - - LASSERT(lov->lo_type < ARRAY_SIZE(lov_dispatch)); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - LASSERT(llt < ARRAY_SIZE(lov_dispatch)); - - CDEBUG(D_INODE, DFID " from %s to %s\n", - PFID(lu_object_fid(lov2lu(lov))), - llt2str(lov->lo_type), llt2str(llt)); - - old_ops = &lov_dispatch[lov->lo_type]; - new_ops = &lov_dispatch[llt]; - - rc = cl_object_prune(env, &lov->lo_cl); - if (rc) - goto out; - - rc = old_ops->llo_delete(env, lov, &lov->u); - if (rc) - goto out; - - old_ops->llo_fini(env, lov, &lov->u); - - LASSERT(!atomic_read(&lov->lo_active_ios)); - - CDEBUG(D_INODE, DFID "Apply new layout lov %p, type %d\n", - PFID(lu_object_fid(lov2lu(lov))), lov, llt); - - lov->lo_type = LLT_EMPTY; - - /* page bufsize fixup */ - cl_object_header(&lov->lo_cl)->coh_page_bufsize -= - lov_page_slice_fixup(lov, NULL); - - rc = new_ops->llo_init(env, lov_dev, lov, lsm, conf, state); - if (rc) { - struct obd_device *obd = lov2obd(lov_dev->ld_lov); - - CERROR("%s: cannot apply new layout on " DFID " : rc = %d\n", - obd->obd_name, PFID(lu_object_fid(lov2lu(lov))), rc); - new_ops->llo_delete(env, lov, state); - new_ops->llo_fini(env, lov, state); - /* this file becomes an EMPTY file. */ - goto out; - } - - new_ops->llo_install(env, lov, state); - lov->lo_type = llt; -out: - cl_env_put(env, &refcheck); - return rc; -} - -/***************************************************************************** - * - * Lov object operations. - * - */ -int lov_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct lov_object *lov = lu2lov(obj); - struct lov_device *dev = lov_object_dev(lov); - const struct cl_object_conf *cconf = lu2cl_conf(conf); - union lov_layout_state *set = &lov->u; - const struct lov_layout_operations *ops; - struct lov_stripe_md *lsm = NULL; - int rc; - - init_rwsem(&lov->lo_type_guard); - atomic_set(&lov->lo_active_ios, 0); - init_waitqueue_head(&lov->lo_waitq); - cl_object_page_init(lu2cl(obj), sizeof(struct lov_page)); - - lov->lo_type = LLT_EMPTY; - if (cconf->u.coc_layout.lb_buf) { - lsm = lov_unpackmd(dev->ld_lov, - cconf->u.coc_layout.lb_buf, - cconf->u.coc_layout.lb_len); - if (IS_ERR(lsm)) - return PTR_ERR(lsm); - } - - /* no locking is necessary, as object is being created */ - lov->lo_type = lov_type(lsm); - ops = &lov_dispatch[lov->lo_type]; - rc = ops->llo_init(env, dev, lov, lsm, cconf, set); - if (!rc) - ops->llo_install(env, lov, set); - - lov_lsm_put(lsm); - - return rc; -} - -static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf) -{ - struct lov_stripe_md *lsm = NULL; - struct lov_object *lov = cl2lov(obj); - int result = 0; - - if (conf->coc_opc == OBJECT_CONF_SET && - conf->u.coc_layout.lb_buf) { - lsm = lov_unpackmd(lov_object_dev(lov)->ld_lov, - conf->u.coc_layout.lb_buf, - conf->u.coc_layout.lb_len); - if (IS_ERR(lsm)) - return PTR_ERR(lsm); - } - - lov_conf_lock(lov); - if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { - lov->lo_layout_invalid = true; - result = 0; - goto out; - } - - if (conf->coc_opc == OBJECT_CONF_WAIT) { - if (lov->lo_layout_invalid && - atomic_read(&lov->lo_active_ios) > 0) { - lov_conf_unlock(lov); - result = lov_layout_wait(env, lov); - lov_conf_lock(lov); - } - goto out; - } - - LASSERT(conf->coc_opc == OBJECT_CONF_SET); - - if ((!lsm && !lov->lo_lsm) || - ((lsm && lov->lo_lsm) && - (lov->lo_lsm->lsm_layout_gen == lsm->lsm_layout_gen) && - (lov->lo_lsm->lsm_pattern == lsm->lsm_pattern))) { - /* same version of layout */ - lov->lo_layout_invalid = false; - result = 0; - goto out; - } - - /* will change layout - check if there still exists active IO. */ - if (atomic_read(&lov->lo_active_ios) > 0) { - lov->lo_layout_invalid = true; - result = -EBUSY; - goto out; - } - - result = lov_layout_change(env, lov, lsm, conf); - lov->lo_layout_invalid = result != 0; - -out: - lov_conf_unlock(lov); - lov_lsm_put(lsm); - CDEBUG(D_INODE, DFID " lo_layout_invalid=%d\n", - PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid); - return result; -} - -static void lov_object_delete(const struct lu_env *env, struct lu_object *obj) -{ - struct lov_object *lov = lu2lov(obj); - - LOV_2DISPATCH_VOID(lov, llo_delete, env, lov, &lov->u); -} - -static void lov_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct lov_object *lov = lu2lov(obj); - - LOV_2DISPATCH_VOID(lov, llo_fini, env, lov, &lov->u); - lu_object_fini(obj); - kmem_cache_free(lov_object_kmem, lov); -} - -static int lov_object_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - return LOV_2DISPATCH_NOLOCK(lu2lov(o), llo_print, env, cookie, p, o); -} - -int lov_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page, - index); -} - -/** - * Implements cl_object_operations::clo_io_init() method for lov - * layer. Dispatches to the appropriate layout io initialization method. - */ -int lov_io_init(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl); - - CDEBUG(D_INODE, DFID "io %p type %d ignore/verify layout %d/%d\n", - PFID(lu_object_fid(&obj->co_lu)), io, io->ci_type, - io->ci_ignore_layout, io->ci_verify_layout); - - return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init, - !io->ci_ignore_layout, env, obj, io); -} - -/** - * An implementation of cl_object_operations::clo_attr_get() method for lov - * layer. For raid0 layout this collects and merges attributes of all - * sub-objects. - */ -static int lov_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - /* do not take lock, as this function is called under a - * spin-lock. Layout is protected from changing by ongoing IO. - */ - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr); -} - -static int lov_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid) -{ - /* - * No dispatch is required here, as no layout implements this. - */ - return 0; -} - -int lov_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) -{ - /* No need to lock because we've taken one refcount of layout. */ - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_lock_init, env, obj, lock, - io); -} - -/** - * We calculate on which OST the mapping will end. If the length of mapping - * is greater than (stripe_size * stripe_count) then the last_stripe will - * will be one just before start_stripe. Else we check if the mapping - * intersects each OST and find last_stripe. - * This function returns the last_stripe and also sets the stripe_count - * over which the mapping is spread - * - * \param lsm [in] striping information for the file - * \param fm_start [in] logical start of mapping - * \param fm_end [in] logical end of mapping - * \param start_stripe [in] starting stripe of the mapping - * \param stripe_count [out] the number of stripes across which to map is - * returned - * - * \retval last_stripe return the last stripe of the mapping - */ -static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, - u64 fm_start, u64 fm_end, - int start_stripe, int *stripe_count) -{ - int last_stripe; - u64 obd_start; - u64 obd_end; - int i, j; - - if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) { - last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 : - start_stripe - 1); - *stripe_count = lsm->lsm_stripe_count; - } else { - for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count; - i = (i + 1) % lsm->lsm_stripe_count, j++) { - if (!(lov_stripe_intersects(lsm, i, fm_start, fm_end, - &obd_start, &obd_end))) - break; - } - *stripe_count = j; - last_stripe = (start_stripe + j - 1) % lsm->lsm_stripe_count; - } - - return last_stripe; -} - -/** - * Set fe_device and copy extents from local buffer into main return buffer. - * - * \param fiemap [out] fiemap to hold all extents - * \param lcl_fm_ext [in] array of fiemap extents get from OSC layer - * \param ost_index [in] OST index to be written into the fm_device - * field for each extent - * \param ext_count [in] number of extents to be copied - * \param current_extent [in] where to start copying in the extent array - */ -static void fiemap_prepare_and_copy_exts(struct fiemap *fiemap, - struct fiemap_extent *lcl_fm_ext, - int ost_index, unsigned int ext_count, - int current_extent) -{ - unsigned int ext; - char *to; - - for (ext = 0; ext < ext_count; ext++) { - lcl_fm_ext[ext].fe_device = ost_index; - lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET; - } - - /* Copy fm_extent's from fm_local to return buffer */ - to = (char *)fiemap + fiemap_count_to_size(current_extent); - memcpy(to, lcl_fm_ext, ext_count * sizeof(struct fiemap_extent)); -} - -#define FIEMAP_BUFFER_SIZE 4096 - -/** - * Non-zero fe_logical indicates that this is a continuation FIEMAP - * call. The local end offset and the device are sent in the first - * fm_extent. This function calculates the stripe number from the index. - * This function returns a stripe_no on which mapping is to be restarted. - * - * This function returns fm_end_offset which is the in-OST offset at which - * mapping should be restarted. If fm_end_offset=0 is returned then caller - * will re-calculate proper offset in next stripe. - * Note that the first extent is passed to lov_get_info via the value field. - * - * \param fiemap [in] fiemap request header - * \param lsm [in] striping information for the file - * \param fm_start [in] logical start of mapping - * \param fm_end [in] logical end of mapping - * \param start_stripe [out] starting stripe will be returned in this - */ -static u64 fiemap_calc_fm_end_offset(struct fiemap *fiemap, - struct lov_stripe_md *lsm, - u64 fm_start, u64 fm_end, - int *start_stripe) -{ - u64 local_end = fiemap->fm_extents[0].fe_logical; - u64 lun_start, lun_end; - u64 fm_end_offset; - int stripe_no = -1; - int i; - - if (!fiemap->fm_extent_count || !fiemap->fm_extents[0].fe_logical) - return 0; - - /* Find out stripe_no from ost_index saved in the fe_device */ - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *oinfo = lsm->lsm_oinfo[i]; - - if (lov_oinfo_is_dummy(oinfo)) - continue; - - if (oinfo->loi_ost_idx == fiemap->fm_extents[0].fe_device) { - stripe_no = i; - break; - } - } - - if (stripe_no == -1) - return -EINVAL; - - /* - * If we have finished mapping on previous device, shift logical - * offset to start of next device - */ - if (lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end, - &lun_start, &lun_end) && - local_end < lun_end) { - fm_end_offset = local_end; - *start_stripe = stripe_no; - } else { - /* This is a special value to indicate that caller should - * calculate offset in next stripe. - */ - fm_end_offset = 0; - *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count; - } - - return fm_end_offset; -} - -struct fiemap_state { - struct fiemap *fs_fm; - u64 fs_start; - u64 fs_length; - u64 fs_end; - u64 fs_end_offset; - int fs_cur_extent; - int fs_cnt_need; - int fs_start_stripe; - int fs_last_stripe; - bool fs_device_done; - bool fs_finish; - bool fs_enough; -}; - -static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj, - struct lov_stripe_md *lsm, - struct fiemap *fiemap, size_t *buflen, - struct ll_fiemap_info_key *fmkey, int stripeno, - struct fiemap_state *fs) -{ - struct cl_object *subobj; - struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov; - struct fiemap_extent *fm_ext = &fs->fs_fm->fm_extents[0]; - u64 req_fm_len; /* Stores length of required mapping */ - u64 len_mapped_single_call; - u64 lun_start; - u64 lun_end; - u64 obd_object_end; - unsigned int ext_count; - /* EOF for object */ - bool ost_eof = false; - /* done with required mapping for this OST? */ - bool ost_done = false; - int ost_index; - int rc = 0; - - fs->fs_device_done = false; - /* Find out range of mapping on this stripe */ - if ((lov_stripe_intersects(lsm, stripeno, fs->fs_start, fs->fs_end, - &lun_start, &obd_object_end)) == 0) - return 0; - - if (lov_oinfo_is_dummy(lsm->lsm_oinfo[stripeno])) - return -EIO; - - /* If this is a continuation FIEMAP call and we are on - * starting stripe then lun_start needs to be set to - * end_offset - */ - if (fs->fs_end_offset != 0 && stripeno == fs->fs_start_stripe) - lun_start = fs->fs_end_offset; - - lun_end = fs->fs_length; - if (lun_end != ~0ULL) { - /* Handle fs->fs_start + fs->fs_length overflow */ - if (fs->fs_start + fs->fs_length < fs->fs_start) - fs->fs_length = ~0ULL - fs->fs_start; - lun_end = lov_size_to_stripe(lsm, fs->fs_start + fs->fs_length, - stripeno); - } - - if (lun_start == lun_end) - return 0; - - req_fm_len = obd_object_end - lun_start; - fs->fs_fm->fm_length = 0; - len_mapped_single_call = 0; - - /* find lobsub object */ - subobj = lov_find_subobj(env, cl2lov(obj), lsm, stripeno); - if (IS_ERR(subobj)) - return PTR_ERR(subobj); - /* If the output buffer is very large and the objects have many - * extents we may need to loop on a single OST repeatedly - */ - do { - if (fiemap->fm_extent_count > 0) { - /* Don't get too many extents. */ - if (fs->fs_cur_extent + fs->fs_cnt_need > - fiemap->fm_extent_count) - fs->fs_cnt_need = fiemap->fm_extent_count - - fs->fs_cur_extent; - } - - lun_start += len_mapped_single_call; - fs->fs_fm->fm_length = req_fm_len - len_mapped_single_call; - req_fm_len = fs->fs_fm->fm_length; - fs->fs_fm->fm_extent_count = fs->fs_enough ? - 1 : fs->fs_cnt_need; - fs->fs_fm->fm_mapped_extents = 0; - fs->fs_fm->fm_flags = fiemap->fm_flags; - - ost_index = lsm->lsm_oinfo[stripeno]->loi_ost_idx; - - if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count) { - rc = -EINVAL; - goto obj_put; - } - /* If OST is inactive, return extent with UNKNOWN flag. */ - if (!lov->lov_tgts[ost_index]->ltd_active) { - fs->fs_fm->fm_flags |= FIEMAP_EXTENT_LAST; - fs->fs_fm->fm_mapped_extents = 1; - - fm_ext[0].fe_logical = lun_start; - fm_ext[0].fe_length = obd_object_end - lun_start; - fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN; - - goto inactive_tgt; - } - - fs->fs_fm->fm_start = lun_start; - fs->fs_fm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER; - memcpy(&fmkey->lfik_fiemap, fs->fs_fm, sizeof(*fs->fs_fm)); - *buflen = fiemap_count_to_size(fs->fs_fm->fm_extent_count); - - rc = cl_object_fiemap(env, subobj, fmkey, fs->fs_fm, buflen); - if (rc) - goto obj_put; -inactive_tgt: - ext_count = fs->fs_fm->fm_mapped_extents; - if (ext_count == 0) { - ost_done = true; - fs->fs_device_done = true; - /* If last stripe has hold at the end, - * we need to return - */ - if (stripeno == fs->fs_last_stripe) { - fiemap->fm_mapped_extents = 0; - fs->fs_finish = true; - goto obj_put; - } - break; - } else if (fs->fs_enough) { - /* - * We've collected enough extents and there are - * more extents after it. - */ - fs->fs_finish = true; - goto obj_put; - } - - /* If we just need num of extents, got to next device */ - if (fiemap->fm_extent_count == 0) { - fs->fs_cur_extent += ext_count; - break; - } - - /* prepare to copy retrived map extents */ - len_mapped_single_call = fm_ext[ext_count - 1].fe_logical + - fm_ext[ext_count - 1].fe_length - - lun_start; - - /* Have we finished mapping on this device? */ - if (req_fm_len <= len_mapped_single_call) { - ost_done = true; - fs->fs_device_done = true; - } - - /* Clear the EXTENT_LAST flag which can be present on - * the last extent - */ - if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST) - fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST; - if (lov_stripe_size(lsm, fm_ext[ext_count - 1].fe_logical + - fm_ext[ext_count - 1].fe_length, - stripeno) >= fmkey->lfik_oa.o_size) { - ost_eof = true; - fs->fs_device_done = true; - } - - fiemap_prepare_and_copy_exts(fiemap, fm_ext, ost_index, - ext_count, fs->fs_cur_extent); - fs->fs_cur_extent += ext_count; - - /* Ran out of available extents? */ - if (fs->fs_cur_extent >= fiemap->fm_extent_count) - fs->fs_enough = true; - } while (!ost_done && !ost_eof); - - if (stripeno == fs->fs_last_stripe) - fs->fs_finish = true; -obj_put: - cl_object_put(env, subobj); - - return rc; -} - -/** - * Break down the FIEMAP request and send appropriate calls to individual OSTs. - * This also handles the restarting of FIEMAP calls in case mapping overflows - * the available number of extents in single call. - * - * \param env [in] lustre environment - * \param obj [in] file object - * \param fmkey [in] fiemap request header and other info - * \param fiemap [out] fiemap buffer holding retrived map extents - * \param buflen [in/out] max buffer length of @fiemap, when iterate - * each OST, it is used to limit max map needed - * \retval 0 success - * \retval < 0 error - */ -static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, - struct fiemap *fiemap, size_t *buflen) -{ - unsigned int buffer_size = FIEMAP_BUFFER_SIZE; - struct fiemap *fm_local = NULL; - struct lov_stripe_md *lsm; - int rc = 0; - int cur_stripe; - int stripe_count; - struct fiemap_state fs = { NULL }; - - lsm = lov_lsm_addref(cl2lov(obj)); - if (!lsm) - return -ENODATA; - - /** - * If the stripe_count > 1 and the application does not understand - * DEVICE_ORDER flag, it cannot interpret the extents correctly. - */ - if (lsm->lsm_stripe_count > 1 && - !(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) { - rc = -ENOTSUPP; - goto out; - } - - if (lsm_is_released(lsm)) { - if (fiemap->fm_start < fmkey->lfik_oa.o_size) { - /** - * released file, return a minimal FIEMAP if - * request fits in file-size. - */ - fiemap->fm_mapped_extents = 1; - fiemap->fm_extents[0].fe_logical = fiemap->fm_start; - if (fiemap->fm_start + fiemap->fm_length < - fmkey->lfik_oa.o_size) - fiemap->fm_extents[0].fe_length = - fiemap->fm_length; - else - fiemap->fm_extents[0].fe_length = - fmkey->lfik_oa.o_size - - fiemap->fm_start; - fiemap->fm_extents[0].fe_flags |= - FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST; - } - rc = 0; - goto out; - } - - if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size) - buffer_size = fiemap_count_to_size(fiemap->fm_extent_count); - - fm_local = kvzalloc(buffer_size, GFP_NOFS); - if (!fm_local) { - rc = -ENOMEM; - goto out; - } - fs.fs_fm = fm_local; - fs.fs_cnt_need = fiemap_size_to_count(buffer_size); - - fs.fs_start = fiemap->fm_start; - /* fs_start is beyond the end of the file */ - if (fs.fs_start > fmkey->lfik_oa.o_size) { - rc = -EINVAL; - goto out; - } - /* Calculate start stripe, last stripe and length of mapping */ - fs.fs_start_stripe = lov_stripe_number(lsm, fs.fs_start); - fs.fs_end = (fs.fs_length == ~0ULL) ? fmkey->lfik_oa.o_size : - fs.fs_start + fs.fs_length - 1; - /* If fs_length != ~0ULL but fs_start+fs_length-1 exceeds file size */ - if (fs.fs_end > fmkey->lfik_oa.o_size) { - fs.fs_end = fmkey->lfik_oa.o_size; - fs.fs_length = fs.fs_end - fs.fs_start; - } - - fs.fs_last_stripe = fiemap_calc_last_stripe(lsm, fs.fs_start, fs.fs_end, - fs.fs_start_stripe, - &stripe_count); - fs.fs_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fs.fs_start, - fs.fs_end, - &fs.fs_start_stripe); - if (fs.fs_end_offset == -EINVAL) { - rc = -EINVAL; - goto out; - } - - - /** - * Requested extent count exceeds the fiemap buffer size, shrink our - * ambition. - */ - if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen) - fiemap->fm_extent_count = fiemap_size_to_count(*buflen); - if (!fiemap->fm_extent_count) - fs.fs_cnt_need = 0; - - fs.fs_finish = false; - fs.fs_enough = false; - fs.fs_cur_extent = 0; - - /* Check each stripe */ - for (cur_stripe = fs.fs_start_stripe; stripe_count > 0; - --stripe_count, - cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) { - rc = fiemap_for_stripe(env, obj, lsm, fiemap, buflen, fmkey, - cur_stripe, &fs); - if (rc < 0) - goto out; - if (fs.fs_finish) - break; - } /* for each stripe */ - /* - * Indicate that we are returning device offsets unless file just has - * single stripe - */ - if (lsm->lsm_stripe_count > 1) - fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER; - - if (!fiemap->fm_extent_count) - goto skip_last_device_calc; - - /* - * Check if we have reached the last stripe and whether mapping for that - * stripe is done. - */ - if ((cur_stripe == fs.fs_last_stripe) && fs.fs_device_done) - fiemap->fm_extents[fs.fs_cur_extent - 1].fe_flags |= - FIEMAP_EXTENT_LAST; -skip_last_device_calc: - fiemap->fm_mapped_extents = fs.fs_cur_extent; -out: - kvfree(fm_local); - lov_lsm_put(lsm); - return rc; -} - -static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *lum) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_stripe_md *lsm; - int rc = 0; - - lsm = lov_lsm_addref(lov); - if (!lsm) - return -ENODATA; - - rc = lov_getstripe(cl2lov(obj), lsm, lum); - lov_lsm_put(lsm); - return rc; -} - -static int lov_object_layout_get(const struct lu_env *env, - struct cl_object *obj, - struct cl_layout *cl) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_stripe_md *lsm = lov_lsm_addref(lov); - struct lu_buf *buf = &cl->cl_buf; - ssize_t rc; - - if (!lsm) { - cl->cl_size = 0; - cl->cl_layout_gen = CL_LAYOUT_GEN_EMPTY; - return 0; - } - - cl->cl_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic); - cl->cl_layout_gen = lsm->lsm_layout_gen; - - rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len); - lov_lsm_put(lsm); - - return rc < 0 ? rc : 0; -} - -static loff_t lov_object_maxbytes(struct cl_object *obj) -{ - struct lov_object *lov = cl2lov(obj); - struct lov_stripe_md *lsm = lov_lsm_addref(lov); - loff_t maxbytes; - - if (!lsm) - return LLONG_MAX; - - maxbytes = lsm->lsm_maxbytes; - - lov_lsm_put(lsm); - - return maxbytes; -} - -static const struct cl_object_operations lov_ops = { - .coo_page_init = lov_page_init, - .coo_lock_init = lov_lock_init, - .coo_io_init = lov_io_init, - .coo_attr_get = lov_attr_get, - .coo_attr_update = lov_attr_update, - .coo_conf_set = lov_conf_set, - .coo_getstripe = lov_object_getstripe, - .coo_layout_get = lov_object_layout_get, - .coo_maxbytes = lov_object_maxbytes, - .coo_fiemap = lov_object_fiemap, -}; - -static const struct lu_object_operations lov_lu_obj_ops = { - .loo_object_init = lov_object_init, - .loo_object_delete = lov_object_delete, - .loo_object_release = NULL, - .loo_object_free = lov_object_free, - .loo_object_print = lov_object_print, - .loo_object_invariant = NULL -}; - -struct lu_object *lov_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev) -{ - struct lov_object *lov; - struct lu_object *obj; - - lov = kmem_cache_zalloc(lov_object_kmem, GFP_NOFS); - if (lov) { - obj = lov2lu(lov); - lu_object_init(obj, NULL, dev); - lov->lo_cl.co_ops = &lov_ops; - lov->lo_type = -1; /* invalid, to catch uninitialized type */ - /* - * object io operation vector (cl_object::co_iop) is installed - * later in lov_object_init(), as different vectors are used - * for object with different layouts. - */ - obj->lo_ops = &lov_lu_obj_ops; - } else { - obj = NULL; - } - return obj; -} - -struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov) -{ - struct lov_stripe_md *lsm = NULL; - - lov_conf_freeze(lov); - if (lov->lo_lsm) { - lsm = lsm_addref(lov->lo_lsm); - CDEBUG(D_INODE, "lsm %p addref %d/%d by %p.\n", - lsm, atomic_read(&lsm->lsm_refc), - lov->lo_layout_invalid, current); - } - lov_conf_thaw(lov); - return lsm; -} - -int lov_read_and_clear_async_rc(struct cl_object *clob) -{ - struct lu_object *luobj; - int rc = 0; - - luobj = lu_object_locate(&cl_object_header(clob)->coh_lu, - &lov_device_type); - if (luobj) { - struct lov_object *lov = lu2lov(luobj); - - lov_conf_freeze(lov); - switch (lov->lo_type) { - case LLT_RAID0: { - struct lov_stripe_md *lsm; - int i; - - lsm = lov->lo_lsm; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - - if (lov_oinfo_is_dummy(loi)) - continue; - - if (loi->loi_ar.ar_rc && !rc) - rc = loi->loi_ar.ar_rc; - loi->loi_ar.ar_rc = 0; - } - } - case LLT_RELEASED: - case LLT_EMPTY: - break; - default: - LBUG(); - } - lov_conf_thaw(lov); - } - return rc; -} -EXPORT_SYMBOL(lov_read_and_clear_async_rc); - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_offset.c b/drivers/staging/lustre/lustre/lov/lov_offset.c deleted file mode 100644 index a5f00f6ec347..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_offset.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include - -#include "lov_internal.h" - -/* compute object size given "stripeno" and the ost size */ -u64 lov_stripe_size(struct lov_stripe_md *lsm, u64 ost_size, int stripeno) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long stripe_size; - u64 swidth; - u64 lov_size; - int magic = lsm->lsm_magic; - - if (ost_size == 0) - return 0; - - lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, NULL, &swidth); - - /* lov_do_div64(a, b) returns a % b, and a = a / b */ - stripe_size = lov_do_div64(ost_size, ssize); - if (stripe_size) - lov_size = ost_size * swidth + stripeno * ssize + stripe_size; - else - lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize; - - return lov_size; -} - -/** - * Compute file level page index by stripe level page offset - */ -pgoff_t lov_stripe_pgoff(struct lov_stripe_md *lsm, pgoff_t stripe_index, - int stripe) -{ - loff_t offset; - - offset = lov_stripe_size(lsm, (stripe_index << PAGE_SHIFT) + 1, stripe); - return offset >> PAGE_SHIFT; -} - -/* we have an offset in file backed by an lov and want to find out where - * that offset lands in our given stripe of the file. for the easy - * case where the offset is within the stripe, we just have to scale the - * offset down to make it relative to the stripe instead of the lov. - * - * the harder case is what to do when the offset doesn't intersect the - * stripe. callers will want start offsets clamped ahead to the start - * of the nearest stripe in the file. end offsets similarly clamped to the - * nearest ending byte of a stripe in the file: - * - * all this function does is move offsets to the nearest region of the - * stripe, and it does its work "mod" the full length of all the stripes. - * consider a file with 3 stripes: - * - * S E - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - * - * to find stripe 1's offsets for S and E, it divides by the full stripe - * width and does its math in the context of a single set of stripes: - * - * S E - * ----------------------------------- - * | 0 | 1 | 2 | - * ----------------------------------- - * - * it'll notice that E is outside stripe 1 and clamp it to the end of the - * stripe, then multiply it back out by lov_off to give the real offsets in - * the stripe: - * - * S E - * --------------------------------------------------------------------- - * | 1 | 1 | 1 | 1 | 1 | 1 | - * --------------------------------------------------------------------- - * - * it would have done similarly and pulled S forward to the start of a 1 - * stripe if, say, S had landed in a 0 stripe. - * - * this rounding isn't always correct. consider an E lov offset that lands - * on a 0 stripe, the "mod stripe width" math will pull it forward to the - * start of a 1 stripe, when in fact it wanted to be rounded back to the end - * of a previous 1 stripe. this logic is handled by callers and this is why: - * - * this function returns < 0 when the offset was "before" the stripe and - * was moved forward to the start of the stripe in question; 0 when it - * falls in the stripe and no shifting was done; > 0 when the offset - * was outside the stripe and was pulled back to its final byte. - */ -int lov_stripe_offset(struct lov_stripe_md *lsm, u64 lov_off, - int stripeno, u64 *obdoff) -{ - unsigned long ssize = lsm->lsm_stripe_size; - u64 stripe_off, this_stripe, swidth; - int magic = lsm->lsm_magic; - int ret = 0; - - if (lov_off == OBD_OBJECT_EOF) { - *obdoff = OBD_OBJECT_EOF; - return 0; - } - - lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &lov_off, - &swidth); - - /* lov_do_div64(a, b) returns a % b, and a = a / b */ - stripe_off = lov_do_div64(lov_off, swidth); - - this_stripe = (u64)stripeno * ssize; - if (stripe_off < this_stripe) { - stripe_off = 0; - ret = -1; - } else { - stripe_off -= this_stripe; - - if (stripe_off >= ssize) { - stripe_off = ssize; - ret = 1; - } - } - - *obdoff = lov_off * ssize + stripe_off; - return ret; -} - -/* Given a whole-file size and a stripe number, give the file size which - * corresponds to the individual object of that stripe. - * - * This behaves basically in the same was as lov_stripe_offset, except that - * file sizes falling before the beginning of a stripe are clamped to the end - * of the previous stripe, not the beginning of the next: - * - * S - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - * - * if clamped to stripe 2 becomes: - * - * S - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - */ -u64 lov_size_to_stripe(struct lov_stripe_md *lsm, u64 file_size, - int stripeno) -{ - unsigned long ssize = lsm->lsm_stripe_size; - u64 stripe_off, this_stripe, swidth; - int magic = lsm->lsm_magic; - - if (file_size == OBD_OBJECT_EOF) - return OBD_OBJECT_EOF; - - lsm_op_find(magic)->lsm_stripe_by_index(lsm, &stripeno, &file_size, - &swidth); - - /* lov_do_div64(a, b) returns a % b, and a = a / b */ - stripe_off = lov_do_div64(file_size, swidth); - - this_stripe = (u64)stripeno * ssize; - if (stripe_off < this_stripe) { - /* Move to end of previous stripe, or zero */ - if (file_size > 0) { - file_size--; - stripe_off = ssize; - } else { - stripe_off = 0; - } - } else { - stripe_off -= this_stripe; - - if (stripe_off >= ssize) { - /* Clamp to end of this stripe */ - stripe_off = ssize; - } - } - - return (file_size * ssize + stripe_off); -} - -/* given an extent in an lov and a stripe, calculate the extent of the stripe - * that is contained within the lov extent. this returns true if the given - * stripe does intersect with the lov extent. - */ -int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, - u64 start, u64 end, u64 *obd_start, u64 *obd_end) -{ - int start_side, end_side; - - start_side = lov_stripe_offset(lsm, start, stripeno, obd_start); - end_side = lov_stripe_offset(lsm, end, stripeno, obd_end); - - CDEBUG(D_INODE, "[%llu->%llu] -> [(%d) %llu->%llu (%d)]\n", - start, end, start_side, *obd_start, *obd_end, end_side); - - /* this stripe doesn't intersect the file extent when neither - * start or the end intersected the stripe and obd_start and - * obd_end got rounded up to the save value. - */ - if (start_side != 0 && end_side != 0 && *obd_start == *obd_end) - return 0; - - /* as mentioned in the lov_stripe_offset commentary, end - * might have been shifted in the wrong direction. This - * happens when an end offset is before the stripe when viewed - * through the "mod stripe size" math. we detect it being shifted - * in the wrong direction and touch it up. - * interestingly, this can't underflow since end must be > start - * if we passed through the previous check. - * (should we assert for that somewhere?) - */ - if (end_side != 0) - (*obd_end)--; - - return 1; -} - -/* compute which stripe number "lov_off" will be written into */ -int lov_stripe_number(struct lov_stripe_md *lsm, u64 lov_off) -{ - unsigned long ssize = lsm->lsm_stripe_size; - u64 stripe_off, swidth; - int magic = lsm->lsm_magic; - - lsm_op_find(magic)->lsm_stripe_by_offset(lsm, NULL, &lov_off, &swidth); - - stripe_off = lov_do_div64(lov_off, swidth); - - /* Puts stripe_off/ssize result into stripe_off */ - lov_do_div64(stripe_off, ssize); - - return stripe_off; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c deleted file mode 100644 index b1060d02a164..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_pack.c +++ /dev/null @@ -1,400 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lov/lov_pack.c - * - * (Un)packing of OST/MDS requests - * - * Author: Andreas Dilger - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include -#include -#include -#include -#include - -#include "lov_cl_internal.h" -#include "lov_internal.h" - -void lov_dump_lmm_common(int level, void *lmmp) -{ - struct lov_mds_md *lmm = lmmp; - struct ost_id oi; - - lmm_oi_le_to_cpu(&oi, &lmm->lmm_oi); - CDEBUG(level, "objid " DOSTID ", magic 0x%08x, pattern %#x\n", - POSTID(&oi), le32_to_cpu(lmm->lmm_magic), - le32_to_cpu(lmm->lmm_pattern)); - CDEBUG(level, "stripe_size %u, stripe_count %u, layout_gen %u\n", - le32_to_cpu(lmm->lmm_stripe_size), - le16_to_cpu(lmm->lmm_stripe_count), - le16_to_cpu(lmm->lmm_layout_gen)); -} - -static void lov_dump_lmm_objects(int level, struct lov_ost_data *lod, - int stripe_count) -{ - int i; - - if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) { - CDEBUG(level, "bad stripe_count %u > max_stripe_count %u\n", - stripe_count, LOV_V1_INSANE_STRIPE_COUNT); - return; - } - - for (i = 0; i < stripe_count; ++i, ++lod) { - struct ost_id oi; - - ostid_le_to_cpu(&lod->l_ost_oi, &oi); - CDEBUG(level, "stripe %u idx %u subobj " DOSTID "\n", i, - le32_to_cpu(lod->l_ost_idx), POSTID(&oi)); - } -} - -void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm) -{ - lov_dump_lmm_common(level, lmm); - lov_dump_lmm_objects(level, lmm->lmm_objects, - le16_to_cpu(lmm->lmm_stripe_count)); -} - -void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm) -{ - lov_dump_lmm_common(level, lmm); - CDEBUG(level, "pool_name " LOV_POOLNAMEF "\n", lmm->lmm_pool_name); - lov_dump_lmm_objects(level, lmm->lmm_objects, - le16_to_cpu(lmm->lmm_stripe_count)); -} - -/** - * Pack LOV striping metadata for disk storage format (in little - * endian byte order). - * - * This follows the getxattr() conventions. If \a buf_size is zero - * then return the size needed. If \a buf_size is too small then - * return -ERANGE. Otherwise return the size of the result. - */ -ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, - size_t buf_size) -{ - struct lov_ost_data_v1 *lmm_objects; - struct lov_mds_md_v1 *lmmv1 = buf; - struct lov_mds_md_v3 *lmmv3 = buf; - size_t lmm_size; - unsigned int i; - - lmm_size = lov_mds_md_size(lsm->lsm_stripe_count, lsm->lsm_magic); - if (!buf_size) - return lmm_size; - - if (buf_size < lmm_size) - return -ERANGE; - - /* - * lmmv1 and lmmv3 point to the same struct and have the - * same first fields - */ - lmmv1->lmm_magic = cpu_to_le32(lsm->lsm_magic); - lmm_oi_cpu_to_le(&lmmv1->lmm_oi, &lsm->lsm_oi); - lmmv1->lmm_stripe_size = cpu_to_le32(lsm->lsm_stripe_size); - lmmv1->lmm_stripe_count = cpu_to_le16(lsm->lsm_stripe_count); - lmmv1->lmm_pattern = cpu_to_le32(lsm->lsm_pattern); - lmmv1->lmm_layout_gen = cpu_to_le16(lsm->lsm_layout_gen); - - if (lsm->lsm_magic == LOV_MAGIC_V3) { - BUILD_BUG_ON(sizeof(lsm->lsm_pool_name) != - sizeof(lmmv3->lmm_pool_name)); - strlcpy(lmmv3->lmm_pool_name, lsm->lsm_pool_name, - sizeof(lmmv3->lmm_pool_name)); - lmm_objects = lmmv3->lmm_objects; - } else { - lmm_objects = lmmv1->lmm_objects; - } - - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_oinfo *loi = lsm->lsm_oinfo[i]; - - ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi); - lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen); - lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx); - } - - return lmm_size; -} - -/* Find the max stripecount we should use */ -__u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count) -{ - __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD; - - if (!stripe_count) - stripe_count = lov->desc.ld_default_stripe_count; - if (stripe_count > lov->desc.ld_active_tgt_count) - stripe_count = lov->desc.ld_active_tgt_count; - if (!stripe_count) - stripe_count = 1; - - /* stripe count is based on whether ldiskfs can handle - * larger EA sizes - */ - if (lov->lov_ocd.ocd_connect_flags & OBD_CONNECT_MAX_EASIZE && - lov->lov_ocd.ocd_max_easize) - max_stripes = lov_mds_md_max_stripe_count( - lov->lov_ocd.ocd_max_easize, magic); - - if (stripe_count > max_stripes) - stripe_count = max_stripes; - - return stripe_count; -} - -static int lov_verify_lmm(void *lmm, int lmm_bytes, __u16 *stripe_count) -{ - int rc; - - if (!lsm_op_find(le32_to_cpu(*(__u32 *)lmm))) { - CERROR("bad disk LOV MAGIC: 0x%08X; dumping LMM (size=%d):\n", - le32_to_cpu(*(__u32 *)lmm), lmm_bytes); - CERROR("%*phN\n", lmm_bytes, lmm); - return -EINVAL; - } - rc = lsm_op_find(le32_to_cpu(*(__u32 *)lmm))->lsm_lmm_verify(lmm, - lmm_bytes, - stripe_count); - return rc; -} - -static struct lov_stripe_md *lov_lsm_alloc(u16 stripe_count, u32 pattern, - u32 magic) -{ - struct lov_stripe_md *lsm; - unsigned int i; - - CDEBUG(D_INFO, "alloc lsm, stripe_count %u\n", stripe_count); - - lsm = lsm_alloc_plain(stripe_count); - if (!lsm) { - CERROR("cannot allocate LSM stripe_count %u\n", stripe_count); - return ERR_PTR(-ENOMEM); - } - - atomic_set(&lsm->lsm_refc, 1); - spin_lock_init(&lsm->lsm_lock); - lsm->lsm_magic = magic; - lsm->lsm_stripe_count = stripe_count; - lsm->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count; - lsm->lsm_pattern = pattern; - lsm->lsm_pool_name[0] = '\0'; - lsm->lsm_layout_gen = 0; - if (stripe_count > 0) - lsm->lsm_oinfo[0]->loi_ost_idx = ~0; - - for (i = 0; i < stripe_count; i++) - loi_init(lsm->lsm_oinfo[i]); - - return lsm; -} - -int lov_free_memmd(struct lov_stripe_md **lsmp) -{ - struct lov_stripe_md *lsm = *lsmp; - int refc; - - *lsmp = NULL; - LASSERT(atomic_read(&lsm->lsm_refc) > 0); - refc = atomic_dec_return(&lsm->lsm_refc); - if (refc == 0) - lsm_op_find(lsm->lsm_magic)->lsm_free(lsm); - - return refc; -} - -/* Unpack LOV object metadata from disk storage. It is packed in LE byte - * order and is opaque to the networking layer. - */ -struct lov_stripe_md *lov_unpackmd(struct lov_obd *lov, struct lov_mds_md *lmm, - size_t lmm_size) -{ - struct lov_stripe_md *lsm; - u16 stripe_count; - u32 pattern; - u32 magic; - int rc; - - rc = lov_verify_lmm(lmm, lmm_size, &stripe_count); - if (rc) - return ERR_PTR(rc); - - magic = le32_to_cpu(lmm->lmm_magic); - pattern = le32_to_cpu(lmm->lmm_pattern); - - lsm = lov_lsm_alloc(stripe_count, pattern, magic); - if (IS_ERR(lsm)) - return lsm; - - LASSERT(lsm_op_find(magic)); - rc = lsm_op_find(magic)->lsm_unpackmd(lov, lsm, lmm); - if (rc) { - lov_free_memmd(&lsm); - return ERR_PTR(rc); - } - - return lsm; -} - -/* Retrieve object striping information. - * - * @lump is a pointer to an in-core struct with lmm_ost_count indicating - * the maximum number of OST indices which will fit in the user buffer. - * lmm_magic must be LOV_USER_MAGIC. - */ -int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm, - struct lov_user_md __user *lump) -{ - /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */ - struct lov_user_md_v3 lum; - struct lov_mds_md *lmmk; - u32 stripe_count; - ssize_t lmm_size; - size_t lmmk_size; - size_t lum_size; - int rc; - - if (!lsm) - return -ENODATA; - - if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3) { - CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", - lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); - rc = -EIO; - goto out; - } - - if (!lsm_is_released(lsm)) - stripe_count = lsm->lsm_stripe_count; - else - stripe_count = 0; - - /* we only need the header part from user space to get lmm_magic and - * lmm_stripe_count, (the header part is common to v1 and v3) - */ - lum_size = sizeof(struct lov_user_md_v1); - if (copy_from_user(&lum, lump, lum_size)) { - rc = -EFAULT; - goto out; - } - if (lum.lmm_magic != LOV_USER_MAGIC_V1 && - lum.lmm_magic != LOV_USER_MAGIC_V3 && - lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC) { - rc = -EINVAL; - goto out; - } - - if (lum.lmm_stripe_count && - (lum.lmm_stripe_count < lsm->lsm_stripe_count)) { - /* Return right size of stripe to user */ - lum.lmm_stripe_count = stripe_count; - rc = copy_to_user(lump, &lum, lum_size); - rc = -EOVERFLOW; - goto out; - } - lmmk_size = lov_mds_md_size(stripe_count, lsm->lsm_magic); - - - lmmk = kvzalloc(lmmk_size, GFP_NOFS); - if (!lmmk) { - rc = -ENOMEM; - goto out; - } - - lmm_size = lov_lsm_pack(lsm, lmmk, lmmk_size); - if (lmm_size < 0) { - rc = lmm_size; - goto out_free; - } - - /* FIXME: Bug 1185 - copy fields properly when structs change */ - /* struct lov_user_md_v3 and struct lov_mds_md_v3 must be the same */ - BUILD_BUG_ON(sizeof(lum) != sizeof(struct lov_mds_md_v3)); - BUILD_BUG_ON(sizeof(lum.lmm_objects[0]) != sizeof(lmmk->lmm_objects[0])); - - if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC && - (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V1) || - lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_V3))) { - lustre_swab_lov_mds_md(lmmk); - lustre_swab_lov_user_md_objects( - (struct lov_user_ost_data *)lmmk->lmm_objects, - lmmk->lmm_stripe_count); - } - - if (lum.lmm_magic == LOV_USER_MAGIC) { - /* User request for v1, we need skip lmm_pool_name */ - if (lmmk->lmm_magic == LOV_MAGIC_V3) { - memmove(((struct lov_mds_md_v1 *)lmmk)->lmm_objects, - ((struct lov_mds_md_v3 *)lmmk)->lmm_objects, - lmmk->lmm_stripe_count * - sizeof(struct lov_ost_data_v1)); - lmm_size -= LOV_MAXPOOLNAME; - } - } else { - /* if v3 we just have to update the lum_size */ - lum_size = sizeof(struct lov_user_md_v3); - } - - /* User wasn't expecting this many OST entries */ - if (lum.lmm_stripe_count == 0) { - lmm_size = lum_size; - } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { - rc = -EOVERFLOW; - goto out_free; - } - /* - * Have a difference between lov_mds_md & lov_user_md. - * So we have to re-order the data before copy to user. - */ - lum.lmm_stripe_count = lmmk->lmm_stripe_count; - lum.lmm_layout_gen = lmmk->lmm_layout_gen; - ((struct lov_user_md *)lmmk)->lmm_layout_gen = lum.lmm_layout_gen; - ((struct lov_user_md *)lmmk)->lmm_stripe_count = lum.lmm_stripe_count; - if (copy_to_user(lump, lmmk, lmm_size)) - rc = -EFAULT; - else - rc = 0; - -out_free: - kvfree(lmmk); -out: - return rc; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c deleted file mode 100644 index cfae1294d77a..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_page.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_page for LOV layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lov page operations. - * - */ - -static int lov_raid0_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) -{ - struct lov_page *lp = cl2lov_page(slice); - - return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, raid0\n", lp); -} - -static const struct cl_page_operations lov_raid0_page_ops = { - .cpo_print = lov_raid0_page_print -}; - -int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct lov_object *loo = cl2lov(obj); - struct lov_layout_raid0 *r0 = lov_r0(loo); - struct lov_io *lio = lov_env_io(env); - struct cl_object *subobj; - struct cl_object *o; - struct lov_io_sub *sub; - struct lov_page *lpg = cl_object_page_slice(obj, page); - loff_t offset; - u64 suboff; - int stripe; - int rc; - - offset = cl_offset(obj, index); - stripe = lov_stripe_number(loo->lo_lsm, offset); - LASSERT(stripe < r0->lo_nr); - rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff); - LASSERT(rc == 0); - - lpg->lps_stripe = stripe; - cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops); - - sub = lov_sub_get(env, lio, stripe); - if (IS_ERR(sub)) - return PTR_ERR(sub); - - subobj = lovsub2cl(r0->lo_sub[stripe]); - list_for_each_entry(o, &subobj->co_lu.lo_header->loh_layers, - co_lu.lo_linkage) { - if (o->co_ops->coo_page_init) { - rc = o->co_ops->coo_page_init(sub->sub_env, o, page, - cl_index(subobj, suboff)); - if (rc != 0) - break; - } - } - - return rc; -} - -static int lov_empty_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) -{ - struct lov_page *lp = cl2lov_page(slice); - - return (*printer)(env, cookie, LUSTRE_LOV_NAME "-page@%p, empty.\n", - lp); -} - -static const struct cl_page_operations lov_empty_page_ops = { - .cpo_print = lov_empty_page_print -}; - -int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct lov_page *lpg = cl_object_page_slice(obj, page); - void *addr; - - cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_empty_page_ops); - addr = kmap(page->cp_vmpage); - memset(addr, 0, cl_page_size(obj)); - kunmap(page->cp_vmpage); - cl_page_export(env, page, 1); - return 0; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c deleted file mode 100644 index b2a88ba72eb2..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_pool.c +++ /dev/null @@ -1,546 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/lov/lov_pool.c - * - * OST pool methods - * - * Author: Jacques-Charles LAFOUCRIERE - * Author: Alex Lyashkov - * Author: Nathaniel Rutman - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include -#include "lov_internal.h" - -#define pool_tgt(_p, _i) \ - _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]] - -static u32 pool_hashfh(const void *data, u32 len, u32 seed) -{ - const char *pool_name = data; - return hashlen_hash(hashlen_string((void*)(unsigned long)seed, pool_name)); -} - -static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) -{ - const struct pool_desc *pool = obj; - const char *pool_name = arg->key; - return strcmp(pool_name, pool->pool_name); -} - -static const struct rhashtable_params pools_hash_params = { - .key_len = 1, /* actually variable */ - .key_offset = offsetof(struct pool_desc, pool_name), - .head_offset = offsetof(struct pool_desc, pool_hash), - .hashfn = pool_hashfh, - .obj_cmpfn = pool_cmpfn, - .automatic_shrinking = true, -}; - -static void lov_pool_getref(struct pool_desc *pool) -{ - CDEBUG(D_INFO, "pool %p\n", pool); - atomic_inc(&pool->pool_refcount); -} - -void lov_pool_putref(struct pool_desc *pool) -{ - CDEBUG(D_INFO, "pool %p\n", pool); - if (atomic_dec_and_test(&pool->pool_refcount)) { - LASSERT(list_empty(&pool->pool_list)); - lov_ost_pool_free(&pool->pool_obds); - kfree_rcu(pool, rcu); - } -} - -/* - * pool debugfs seq_file methods - */ -/* - * iterator is used to go through the target pool entries - * index is the current entry index in the lp_array[] array - * index >= pos returned to the seq_file interface - * pos is from 0 to (pool->pool_obds.op_count - 1) - */ -#define POOL_IT_MAGIC 0xB001CEA0 -struct pool_iterator { - int magic; - struct pool_desc *pool; - int idx; /* from 0 to pool_tgt_size - 1 */ -}; - -static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct pool_iterator *iter = (struct pool_iterator *)s->private; - int prev_idx; - - LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X\n", iter->magic); - - /* test if end of file */ - if (*pos >= pool_tgt_count(iter->pool)) - return NULL; - - /* iterate to find a non empty entry */ - prev_idx = iter->idx; - down_read(&pool_tgt_rw_sem(iter->pool)); - iter->idx++; - if (iter->idx == pool_tgt_count(iter->pool)) { - iter->idx = prev_idx; /* we stay on the last entry */ - up_read(&pool_tgt_rw_sem(iter->pool)); - return NULL; - } - up_read(&pool_tgt_rw_sem(iter->pool)); - (*pos)++; - /* return != NULL to continue */ - return iter; -} - -static void *pool_proc_start(struct seq_file *s, loff_t *pos) -{ - struct pool_desc *pool = (struct pool_desc *)s->private; - struct pool_iterator *iter; - - lov_pool_getref(pool); - if ((pool_tgt_count(pool) == 0) || - (*pos >= pool_tgt_count(pool))) { - /* iter is not created, so stop() has no way to - * find pool to dec ref - */ - lov_pool_putref(pool); - return NULL; - } - - iter = kzalloc(sizeof(*iter), GFP_NOFS); - if (!iter) - return ERR_PTR(-ENOMEM); - iter->magic = POOL_IT_MAGIC; - iter->pool = pool; - iter->idx = 0; - - /* we use seq_file private field to memorized iterator so - * we can free it at stop() - */ - /* /!\ do not forget to restore it to pool before freeing it */ - s->private = iter; - if (*pos > 0) { - loff_t i; - void *ptr; - - i = 0; - do { - ptr = pool_proc_next(s, &iter, &i); - } while ((i < *pos) && ptr); - return ptr; - } - return iter; -} - -static void pool_proc_stop(struct seq_file *s, void *v) -{ - struct pool_iterator *iter = (struct pool_iterator *)s->private; - - /* in some cases stop() method is called 2 times, without - * calling start() method (see seq_read() from fs/seq_file.c) - * we have to free only if s->private is an iterator - */ - if ((iter) && (iter->magic == POOL_IT_MAGIC)) { - /* we restore s->private so next call to pool_proc_start() - * will work - */ - s->private = iter->pool; - lov_pool_putref(iter->pool); - kfree(iter); - } -} - -static int pool_proc_show(struct seq_file *s, void *v) -{ - struct pool_iterator *iter = (struct pool_iterator *)v; - struct lov_tgt_desc *tgt; - - LASSERTF(iter->magic == POOL_IT_MAGIC, "%08X\n", iter->magic); - LASSERT(iter->pool); - LASSERT(iter->idx <= pool_tgt_count(iter->pool)); - - down_read(&pool_tgt_rw_sem(iter->pool)); - tgt = pool_tgt(iter->pool, iter->idx); - up_read(&pool_tgt_rw_sem(iter->pool)); - if (tgt) - seq_printf(s, "%s\n", obd_uuid2str(&tgt->ltd_uuid)); - - return 0; -} - -static const struct seq_operations pool_proc_ops = { - .start = pool_proc_start, - .next = pool_proc_next, - .stop = pool_proc_stop, - .show = pool_proc_show, -}; - -static int pool_proc_open(struct inode *inode, struct file *file) -{ - int rc; - - rc = seq_open(file, &pool_proc_ops); - if (!rc) { - struct seq_file *s = file->private_data; - - s->private = inode->i_private; - } - return rc; -} - -static const struct file_operations pool_proc_operations = { - .open = pool_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#define LOV_POOL_INIT_COUNT 2 -int lov_ost_pool_init(struct ost_pool *op, unsigned int count) -{ - if (count == 0) - count = LOV_POOL_INIT_COUNT; - op->op_array = NULL; - op->op_count = 0; - init_rwsem(&op->op_rw_sem); - op->op_size = count; - op->op_array = kcalloc(op->op_size, sizeof(op->op_array[0]), GFP_NOFS); - if (!op->op_array) { - op->op_size = 0; - return -ENOMEM; - } - return 0; -} - -/* Caller must hold write op_rwlock */ -int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count) -{ - __u32 *new; - int new_size; - - LASSERT(min_count != 0); - - if (op->op_count < op->op_size) - return 0; - - new_size = max(min_count, 2 * op->op_size); - new = kcalloc(new_size, sizeof(op->op_array[0]), GFP_NOFS); - if (!new) - return -ENOMEM; - - /* copy old array to new one */ - memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0])); - kfree(op->op_array); - op->op_array = new; - op->op_size = new_size; - return 0; -} - -int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count) -{ - int rc = 0, i; - - down_write(&op->op_rw_sem); - - rc = lov_ost_pool_extend(op, min_count); - if (rc) - goto out; - - /* search ost in pool array */ - for (i = 0; i < op->op_count; i++) { - if (op->op_array[i] == idx) { - rc = -EEXIST; - goto out; - } - } - /* ost not found we add it */ - op->op_array[op->op_count] = idx; - op->op_count++; -out: - up_write(&op->op_rw_sem); - return rc; -} - -int lov_ost_pool_remove(struct ost_pool *op, __u32 idx) -{ - int i; - - down_write(&op->op_rw_sem); - - for (i = 0; i < op->op_count; i++) { - if (op->op_array[i] == idx) { - memmove(&op->op_array[i], &op->op_array[i + 1], - (op->op_count - i - 1) * sizeof(op->op_array[0])); - op->op_count--; - up_write(&op->op_rw_sem); - return 0; - } - } - - up_write(&op->op_rw_sem); - return -EINVAL; -} - -int lov_ost_pool_free(struct ost_pool *op) -{ - if (op->op_size == 0) - return 0; - - down_write(&op->op_rw_sem); - - kfree(op->op_array); - op->op_array = NULL; - op->op_count = 0; - op->op_size = 0; - - up_write(&op->op_rw_sem); - return 0; -} - -static void -pools_hash_exit(void *vpool, void *data) -{ - struct pool_desc *pool = vpool; - lov_pool_putref(pool); -} - -int lov_pool_hash_init(struct rhashtable *tbl) -{ - return rhashtable_init(tbl, &pools_hash_params); -} - -void lov_pool_hash_destroy(struct rhashtable *tbl) -{ - rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL); -} - -int lov_pool_new(struct obd_device *obd, char *poolname) -{ - struct lov_obd *lov; - struct pool_desc *new_pool; - int rc; - - lov = &obd->u.lov; - - if (strlen(poolname) > LOV_MAXPOOLNAME) - return -ENAMETOOLONG; - - new_pool = kzalloc(sizeof(*new_pool), GFP_NOFS); - if (!new_pool) - return -ENOMEM; - - strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name)); - new_pool->pool_lobd = obd; - /* ref count init to 1 because when created a pool is always used - * up to deletion - */ - atomic_set(&new_pool->pool_refcount, 1); - rc = lov_ost_pool_init(&new_pool->pool_obds, 0); - if (rc) - goto out_err; - - /* get ref for debugfs file */ - lov_pool_getref(new_pool); - - new_pool->pool_debugfs_entry = debugfs_create_file(poolname, 0444, - lov->lov_pool_debugfs_entry, - new_pool, - &pool_proc_operations); - - spin_lock(&obd->obd_dev_lock); - list_add_tail(&new_pool->pool_list, &lov->lov_pool_list); - lov->lov_pool_count++; - spin_unlock(&obd->obd_dev_lock); - - /* Add to hash table only when it is fully ready. */ - rc = rhashtable_lookup_insert_fast(&lov->lov_pools_hash_body, - &new_pool->pool_hash, pools_hash_params); - if (rc) { - if (rc != -EEXIST) - /* - * Hide -E2BIG and -EBUSY which - * are not helpful. - */ - rc = -ENOMEM; - goto out_err; - } - - CDEBUG(D_CONFIG, LOV_POOLNAMEF " is pool #%d\n", - poolname, lov->lov_pool_count); - - return 0; - -out_err: - spin_lock(&obd->obd_dev_lock); - list_del_init(&new_pool->pool_list); - lov->lov_pool_count--; - spin_unlock(&obd->obd_dev_lock); - debugfs_remove_recursive(new_pool->pool_debugfs_entry); - lov_ost_pool_free(&new_pool->pool_obds); - kfree(new_pool); - - return rc; -} - -int lov_pool_del(struct obd_device *obd, char *poolname) -{ - struct lov_obd *lov; - struct pool_desc *pool; - - lov = &obd->u.lov; - - /* lookup and kill hash reference */ - rcu_read_lock(); - pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params); - if (pool) - if (rhashtable_remove_fast(&lov->lov_pools_hash_body, - &pool->pool_hash, pools_hash_params) != 0) - pool = NULL; - rcu_read_unlock(); - if (!pool) - return -ENOENT; - - debugfs_remove_recursive(pool->pool_debugfs_entry); - lov_pool_putref(pool); - - spin_lock(&obd->obd_dev_lock); - list_del_init(&pool->pool_list); - lov->lov_pool_count--; - spin_unlock(&obd->obd_dev_lock); - - /* release last reference */ - lov_pool_putref(pool); - - return 0; -} - -int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname) -{ - struct obd_uuid ost_uuid; - struct lov_obd *lov; - struct pool_desc *pool; - unsigned int lov_idx; - int rc; - - lov = &obd->u.lov; - - rcu_read_lock(); - pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params); - if (pool && !atomic_inc_not_zero(&pool->pool_refcount)) - pool = NULL; - rcu_read_unlock(); - if (!pool) - return -ENOENT; - - obd_str2uuid(&ost_uuid, ostname); - - /* search ost in lov array */ - obd_getref(obd); - for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) { - if (!lov->lov_tgts[lov_idx]) - continue; - if (obd_uuid_equals(&ost_uuid, - &lov->lov_tgts[lov_idx]->ltd_uuid)) - break; - } - /* test if ost found in lov */ - if (lov_idx == lov->desc.ld_tgt_count) { - rc = -EINVAL; - goto out; - } - - rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size); - if (rc) - goto out; - - CDEBUG(D_CONFIG, "Added %s to " LOV_POOLNAMEF " as member %d\n", - ostname, poolname, pool_tgt_count(pool)); - -out: - obd_putref(obd); - lov_pool_putref(pool); - return rc; -} - -int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname) -{ - struct obd_uuid ost_uuid; - struct lov_obd *lov; - struct pool_desc *pool; - unsigned int lov_idx; - int rc = 0; - - lov = &obd->u.lov; - - rcu_read_lock(); - pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname, pools_hash_params); - if (pool && !atomic_inc_not_zero(&pool->pool_refcount)) - pool = NULL; - rcu_read_unlock(); - if (!pool) - return -ENOENT; - - obd_str2uuid(&ost_uuid, ostname); - - obd_getref(obd); - /* search ost in lov array, to get index */ - for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) { - if (!lov->lov_tgts[lov_idx]) - continue; - - if (obd_uuid_equals(&ost_uuid, - &lov->lov_tgts[lov_idx]->ltd_uuid)) - break; - } - - /* test if ost found in lov */ - if (lov_idx == lov->desc.ld_tgt_count) { - rc = -EINVAL; - goto out; - } - - lov_ost_pool_remove(&pool->pool_obds, lov_idx); - - CDEBUG(D_CONFIG, "%s removed from " LOV_POOLNAMEF "\n", ostname, - poolname); - -out: - obd_putref(obd); - lov_pool_putref(pool); - return rc; -} diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c deleted file mode 100644 index cb8567f20ea7..000000000000 --- a/drivers/staging/lustre/lustre/lov/lov_request.c +++ /dev/null @@ -1,354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include -#include -#include "lov_internal.h" - -static void lov_init_set(struct lov_request_set *set) -{ - set->set_count = 0; - atomic_set(&set->set_completes, 0); - atomic_set(&set->set_success, 0); - INIT_LIST_HEAD(&set->set_list); -} - -static void lov_finish_set(struct lov_request_set *set) -{ - struct lov_request *req; - - LASSERT(set); - while ((req = list_first_entry_or_null(&set->set_list, - struct lov_request, - rq_link)) != NULL) { - list_del_init(&req->rq_link); - kfree(req->rq_oi.oi_osfs); - kfree(req); - } - kfree(set); -} - -static void lov_update_set(struct lov_request_set *set, - struct lov_request *req, int rc) -{ - atomic_inc(&set->set_completes); - if (rc == 0) - atomic_inc(&set->set_success); -} - -static void lov_set_add_req(struct lov_request *req, - struct lov_request_set *set) -{ - list_add_tail(&req->rq_link, &set->set_list); - set->set_count++; - req->rq_rqset = set; -} - -static int lov_check_set(struct lov_obd *lov, int idx) -{ - int rc; - struct lov_tgt_desc *tgt; - - mutex_lock(&lov->lov_lock); - tgt = lov->lov_tgts[idx]; - rc = !tgt || tgt->ltd_active || - (tgt->ltd_exp && - class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried); - mutex_unlock(&lov->lov_lock); - - return rc; -} - -/* Check if the OSC connection exists and is active. - * If the OSC has not yet had a chance to connect to the OST the first time, - * wait once for it to connect instead of returning an error. - */ -static int lov_check_and_wait_active(struct lov_obd *lov, int ost_idx) -{ - int cnt = 0; - struct lov_tgt_desc *tgt; - int rc = 0; - - mutex_lock(&lov->lov_lock); - - tgt = lov->lov_tgts[ost_idx]; - - if (unlikely(!tgt)) { - rc = 0; - goto out; - } - - if (likely(tgt->ltd_active)) { - rc = 1; - goto out; - } - - if (tgt->ltd_exp && class_exp2cliimp(tgt->ltd_exp)->imp_connect_tried) { - rc = 0; - goto out; - } - - mutex_unlock(&lov->lov_lock); - - while (cnt < obd_timeout && !lov_check_set(lov, ost_idx)) { - schedule_timeout_uninterruptible(HZ); - cnt++; - } - if (tgt->ltd_active) - return 1; - - return 0; - -out: - mutex_unlock(&lov->lov_lock); - return rc; -} - -#define LOV_U64_MAX ((__u64)~0ULL) -#define LOV_SUM_MAX(tot, add) \ - do { \ - if ((tot) + (add) < (tot)) \ - (tot) = LOV_U64_MAX; \ - else \ - (tot) += (add); \ - } while (0) - -static int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs, - int success) -{ - if (success) { - __u32 expected_stripes = lov_get_stripecnt(&obd->u.lov, - LOV_MAGIC, 0); - if (osfs->os_files != LOV_U64_MAX) - lov_do_div64(osfs->os_files, expected_stripes); - if (osfs->os_ffree != LOV_U64_MAX) - lov_do_div64(osfs->os_ffree, expected_stripes); - - spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(*osfs)); - obd->obd_osfs_age = get_jiffies_64(); - spin_unlock(&obd->obd_osfs_lock); - return 0; - } - - return -EIO; -} - -int lov_fini_statfs_set(struct lov_request_set *set) -{ - int rc = 0; - - if (!set) - return 0; - - if (atomic_read(&set->set_completes)) { - rc = lov_fini_statfs(set->set_obd, set->set_oi->oi_osfs, - atomic_read(&set->set_success)); - } - - lov_finish_set(set); - - return rc; -} - -static void lov_update_statfs(struct obd_statfs *osfs, - struct obd_statfs *lov_sfs, - int success) -{ - int shift = 0, quit = 0; - __u64 tmp; - - if (success == 0) { - memcpy(osfs, lov_sfs, sizeof(*lov_sfs)); - } else { - if (osfs->os_bsize != lov_sfs->os_bsize) { - /* assume all block sizes are always powers of 2 */ - /* get the bits difference */ - tmp = osfs->os_bsize | lov_sfs->os_bsize; - for (shift = 0; shift <= 64; ++shift) { - if (tmp & 1) { - if (quit) - break; - quit = 1; - shift = 0; - } - tmp >>= 1; - } - } - - if (osfs->os_bsize < lov_sfs->os_bsize) { - osfs->os_bsize = lov_sfs->os_bsize; - - osfs->os_bfree >>= shift; - osfs->os_bavail >>= shift; - osfs->os_blocks >>= shift; - } else if (shift != 0) { - lov_sfs->os_bfree >>= shift; - lov_sfs->os_bavail >>= shift; - lov_sfs->os_blocks >>= shift; - } - osfs->os_bfree += lov_sfs->os_bfree; - osfs->os_bavail += lov_sfs->os_bavail; - osfs->os_blocks += lov_sfs->os_blocks; - /* XXX not sure about this one - depends on policy. - * - could be minimum if we always stripe on all OBDs - * (but that would be wrong for any other policy, - * if one of the OBDs has no more objects left) - * - could be sum if we stripe whole objects - * - could be average, just to give a nice number - * - * To give a "reasonable" (if not wholly accurate) - * number, we divide the total number of free objects - * by expected stripe count (watch out for overflow). - */ - LOV_SUM_MAX(osfs->os_files, lov_sfs->os_files); - LOV_SUM_MAX(osfs->os_ffree, lov_sfs->os_ffree); - } -} - -/* The callback for osc_statfs_async that finalizes a request info when a - * response is received. - */ -static int cb_statfs_update(void *cookie, int rc) -{ - struct obd_info *oinfo = cookie; - struct lov_request *lovreq; - struct lov_request_set *set; - struct obd_statfs *osfs, *lov_sfs; - struct lov_obd *lov; - struct lov_tgt_desc *tgt; - struct obd_device *lovobd, *tgtobd; - int success; - - lovreq = container_of(oinfo, struct lov_request, rq_oi); - set = lovreq->rq_rqset; - lovobd = set->set_obd; - lov = &lovobd->u.lov; - osfs = set->set_oi->oi_osfs; - lov_sfs = oinfo->oi_osfs; - success = atomic_read(&set->set_success); - /* XXX: the same is done in lov_update_common_set, however - * lovset->set_exp is not initialized. - */ - lov_update_set(set, lovreq, rc); - if (rc) - goto out; - - obd_getref(lovobd); - tgt = lov->lov_tgts[lovreq->rq_idx]; - if (!tgt || !tgt->ltd_active) - goto out_update; - - tgtobd = class_exp2obd(tgt->ltd_exp); - spin_lock(&tgtobd->obd_osfs_lock); - memcpy(&tgtobd->obd_osfs, lov_sfs, sizeof(*lov_sfs)); - if ((oinfo->oi_flags & OBD_STATFS_FROM_CACHE) == 0) - tgtobd->obd_osfs_age = get_jiffies_64(); - spin_unlock(&tgtobd->obd_osfs_lock); - -out_update: - lov_update_statfs(osfs, lov_sfs, success); - obd_putref(lovobd); -out: - return 0; -} - -int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo, - struct lov_request_set **reqset) -{ - struct lov_request_set *set; - struct lov_obd *lov = &obd->u.lov; - int rc = 0, i; - - set = kzalloc(sizeof(*set), GFP_NOFS); - if (!set) - return -ENOMEM; - lov_init_set(set); - - set->set_obd = obd; - set->set_oi = oinfo; - - /* We only get block data from the OBD */ - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - struct lov_request *req; - - if (!lov->lov_tgts[i] || - (oinfo->oi_flags & OBD_STATFS_NODELAY && - !lov->lov_tgts[i]->ltd_active)) { - CDEBUG(D_HA, "lov idx %d inactive\n", i); - continue; - } - - /* skip targets that have been explicitly disabled by the - * administrator - */ - if (!lov->lov_tgts[i]->ltd_exp) { - CDEBUG(D_HA, "lov idx %d administratively disabled\n", i); - continue; - } - - if (!lov->lov_tgts[i]->ltd_active) - lov_check_and_wait_active(lov, i); - - req = kzalloc(sizeof(*req), GFP_NOFS); - if (!req) { - rc = -ENOMEM; - goto out_set; - } - - req->rq_oi.oi_osfs = kzalloc(sizeof(*req->rq_oi.oi_osfs), - GFP_NOFS); - if (!req->rq_oi.oi_osfs) { - kfree(req); - rc = -ENOMEM; - goto out_set; - } - - req->rq_idx = i; - req->rq_oi.oi_cb_up = cb_statfs_update; - req->rq_oi.oi_flags = oinfo->oi_flags; - - lov_set_add_req(req, set); - } - if (!set->set_count) { - rc = -EIO; - goto out_set; - } - *reqset = set; - return rc; -out_set: - lov_fini_statfs_set(set); - return rc; -} diff --git a/drivers/staging/lustre/lustre/lov/lovsub_dev.c b/drivers/staging/lustre/lustre/lov/lovsub_dev.c deleted file mode 100644 index 7e89a2e485fc..000000000000 --- a/drivers/staging/lustre/lustre/lov/lovsub_dev.c +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_device and cl_device_type for LOVSUB layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lov-sub device and device type functions. - * - */ - -static int lovsub_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - struct lovsub_device *lsd = lu2lovsub_dev(d); - struct lu_device_type *ldt; - int rc; - - next->ld_site = d->ld_site; - ldt = next->ld_type; - rc = ldt->ldt_ops->ldto_device_init(env, next, ldt->ldt_name, NULL); - if (rc) { - next->ld_site = NULL; - return rc; - } - - lu_device_get(next); - lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init); - lsd->acid_next = lu2cl_dev(next); - return rc; -} - -static struct lu_device *lovsub_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - struct lu_device *next; - struct lovsub_device *lsd; - - lsd = lu2lovsub_dev(d); - next = cl2lu_dev(lsd->acid_next); - lsd->acid_next = NULL; - return next; -} - -static struct lu_device *lovsub_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct lovsub_device *lsd = lu2lovsub_dev(d); - struct lu_device *next = cl2lu_dev(lsd->acid_next); - - if (atomic_read(&d->ld_ref) && d->ld_site) { - LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); - lu_site_print(env, d->ld_site, &msgdata, lu_cdebug_printer); - } - cl_device_fini(lu2cl_dev(d)); - kfree(lsd); - return next; -} - -static const struct lu_device_operations lovsub_lu_ops = { - .ldo_object_alloc = lovsub_object_alloc, - .ldo_process_config = NULL, - .ldo_recovery_complete = NULL -}; - -static struct lu_device *lovsub_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) -{ - struct lu_device *d; - struct lovsub_device *lsd; - - lsd = kzalloc(sizeof(*lsd), GFP_NOFS); - if (lsd) { - int result; - - result = cl_device_init(&lsd->acid_cl, t); - if (result == 0) { - d = lovsub2lu_dev(lsd); - d->ld_ops = &lovsub_lu_ops; - } else { - d = ERR_PTR(result); - } - } else { - d = ERR_PTR(-ENOMEM); - } - return d; -} - -static const struct lu_device_type_operations lovsub_device_type_ops = { - .ldto_device_alloc = lovsub_device_alloc, - .ldto_device_free = lovsub_device_free, - - .ldto_device_init = lovsub_device_init, - .ldto_device_fini = lovsub_device_fini -}; - -#define LUSTRE_LOVSUB_NAME "lovsub" - -struct lu_device_type lovsub_device_type = { - .ldt_tags = LU_DEVICE_CL, - .ldt_name = LUSTRE_LOVSUB_NAME, - .ldt_ops = &lovsub_device_type_ops, - .ldt_ctx_tags = LCT_CL_THREAD -}; - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_lock.c b/drivers/staging/lustre/lustre/lov/lovsub_lock.c deleted file mode 100644 index ea492be2eef3..000000000000 --- a/drivers/staging/lustre/lustre/lov/lovsub_lock.c +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_lock for LOVSUB layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lovsub lock operations. - * - */ - -static void lovsub_lock_fini(const struct lu_env *env, - struct cl_lock_slice *slice) -{ - struct lovsub_lock *lsl; - - lsl = cl2lovsub_lock(slice); - kmem_cache_free(lovsub_lock_kmem, lsl); -} - -static const struct cl_lock_operations lovsub_lock_ops = { - .clo_fini = lovsub_lock_fini, -}; - -int lovsub_lock_init(const struct lu_env *env, struct cl_object *obj, - struct cl_lock *lock, const struct cl_io *io) -{ - struct lovsub_lock *lsk; - int result; - - lsk = kmem_cache_zalloc(lovsub_lock_kmem, GFP_NOFS); - if (lsk) { - cl_lock_slice_add(lock, &lsk->lss_cl, obj, &lovsub_lock_ops); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c deleted file mode 100644 index 13d452086b61..000000000000 --- a/drivers/staging/lustre/lustre/lov/lovsub_object.c +++ /dev/null @@ -1,180 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_object for LOVSUB layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lovsub object operations. - * - */ - -int lovsub_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct lovsub_device *dev = lu2lovsub_dev(obj->lo_dev); - struct lu_object *below; - struct lu_device *under; - - int result; - - under = &dev->acid_next->cd_lu_dev; - below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, under); - if (below) { - lu_object_add(obj, below); - cl_object_page_init(lu2cl(obj), sizeof(struct lovsub_page)); - result = 0; - } else { - result = -ENOMEM; - } - return result; -} - -static void lovsub_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct lovsub_object *los = lu2lovsub(obj); - struct lov_object *lov = los->lso_super; - - /* We can't assume lov was assigned here, because of the shadow - * object handling in lu_object_find. - */ - if (lov) { - LASSERT(lov->lo_type == LLT_RAID0); - LASSERT(lov->u.raid0.lo_sub[los->lso_index] == los); - spin_lock(&lov->u.raid0.lo_sub_lock); - lov->u.raid0.lo_sub[los->lso_index] = NULL; - spin_unlock(&lov->u.raid0.lo_sub_lock); - } - - lu_object_fini(obj); - lu_object_header_fini(&los->lso_header.coh_lu); - kmem_cache_free(lovsub_object_kmem, los); -} - -static int lovsub_object_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *obj) -{ - struct lovsub_object *los = lu2lovsub(obj); - - return (*p)(env, cookie, "[%d]", los->lso_index); -} - -static int lovsub_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid) -{ - struct lov_object *lov = cl2lovsub(obj)->lso_super; - - lov_r0(lov)->lo_attr_valid = 0; - return 0; -} - -static int lovsub_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, - struct ost_lvb *lvb) -{ - struct lovsub_object *los = cl2lovsub(obj); - - return cl_object_glimpse(env, &los->lso_super->lo_cl, lvb); -} - -/** - * Implementation of struct cl_object_operations::coo_req_attr_set() for lovsub - * layer. Lov and lovsub are responsible only for struct obdo::o_stripe_idx - * field, which is filled there. - */ -static void lovsub_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr) -{ - struct lovsub_object *subobj = cl2lovsub(obj); - - cl_req_attr_set(env, &subobj->lso_super->lo_cl, attr); - - /* - * There is no OBD_MD_* flag for obdo::o_stripe_idx, so set it - * unconditionally. It never changes anyway. - */ - attr->cra_oa->o_stripe_idx = subobj->lso_index; -} - -static const struct cl_object_operations lovsub_ops = { - .coo_page_init = lovsub_page_init, - .coo_lock_init = lovsub_lock_init, - .coo_attr_update = lovsub_attr_update, - .coo_glimpse = lovsub_object_glimpse, - .coo_req_attr_set = lovsub_req_attr_set -}; - -static const struct lu_object_operations lovsub_lu_obj_ops = { - .loo_object_init = lovsub_object_init, - .loo_object_delete = NULL, - .loo_object_release = NULL, - .loo_object_free = lovsub_object_free, - .loo_object_print = lovsub_object_print, - .loo_object_invariant = NULL -}; - -struct lu_object *lovsub_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev) -{ - struct lovsub_object *los; - struct lu_object *obj; - - los = kmem_cache_zalloc(lovsub_object_kmem, GFP_NOFS); - if (los) { - struct cl_object_header *hdr; - - obj = lovsub2lu(los); - hdr = &los->lso_header; - cl_object_header_init(hdr); - lu_object_init(obj, &hdr->coh_lu, dev); - lu_object_add_top(&hdr->coh_lu, obj); - los->lso_cl.co_ops = &lovsub_ops; - obj->lo_ops = &lovsub_lu_obj_ops; - } else { - obj = NULL; - } - return obj; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lovsub_page.c b/drivers/staging/lustre/lustre/lov/lovsub_page.c deleted file mode 100644 index 915520bcdd60..000000000000 --- a/drivers/staging/lustre/lustre/lov/lovsub_page.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_page for LOVSUB layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lovsub page operations. - * - */ - -static void lovsub_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ -} - -static const struct cl_page_operations lovsub_page_ops = { - .cpo_fini = lovsub_page_fini -}; - -int lovsub_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct lovsub_page *lsb = cl_object_page_slice(obj, page); - - cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops); - return 0; -} - -/** @} lov */ diff --git a/drivers/staging/lustre/lustre/lov/lproc_lov.c b/drivers/staging/lustre/lustre/lov/lproc_lov.c deleted file mode 100644 index 721440feef72..000000000000 --- a/drivers/staging/lustre/lustre/lov/lproc_lov.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include "lov_internal.h" - -static int lov_stripesize_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lov_desc *desc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - seq_printf(m, "%llu\n", desc->ld_default_stripe_size); - return 0; -} - -static ssize_t lov_stripesize_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *dev = ((struct seq_file *)file->private_data)->private; - struct lov_desc *desc; - __u64 val; - int rc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - rc = lprocfs_write_u64_helper(buffer, count, &val); - if (rc) - return rc; - - lov_fix_desc_stripe_size(&val); - desc->ld_default_stripe_size = val; - return count; -} - -LPROC_SEQ_FOPS(lov_stripesize); - -static int lov_stripeoffset_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lov_desc *desc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - seq_printf(m, "%llu\n", desc->ld_default_stripe_offset); - return 0; -} - -static ssize_t lov_stripeoffset_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *dev = ((struct seq_file *)file->private_data)->private; - struct lov_desc *desc; - __u64 val; - int rc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - rc = lprocfs_write_u64_helper(buffer, count, &val); - if (rc) - return rc; - - desc->ld_default_stripe_offset = val; - return count; -} - -LPROC_SEQ_FOPS(lov_stripeoffset); - -static int lov_stripetype_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lov_desc *desc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - seq_printf(m, "%u\n", desc->ld_pattern); - return 0; -} - -static ssize_t lov_stripetype_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *dev = ((struct seq_file *)file->private_data)->private; - struct lov_desc *desc; - int val, rc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - lov_fix_desc_pattern(&val); - desc->ld_pattern = val; - return count; -} - -LPROC_SEQ_FOPS(lov_stripetype); - -static int lov_stripecount_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lov_desc *desc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - seq_printf(m, "%d\n", (__s16)(desc->ld_default_stripe_count + 1) - 1); - return 0; -} - -static ssize_t lov_stripecount_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *dev = ((struct seq_file *)file->private_data)->private; - struct lov_desc *desc; - int val, rc; - - LASSERT(dev); - desc = &dev->u.lov.desc; - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - lov_fix_desc_stripe_count(&val); - desc->ld_default_stripe_count = val; - return count; -} - -LPROC_SEQ_FOPS(lov_stripecount); - -static ssize_t numobd_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct lov_desc *desc; - - desc = &dev->u.lov.desc; - return sprintf(buf, "%u\n", desc->ld_tgt_count); -} -LUSTRE_RO_ATTR(numobd); - -static ssize_t activeobd_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct lov_desc *desc; - - desc = &dev->u.lov.desc; - return sprintf(buf, "%u\n", desc->ld_active_tgt_count); -} -LUSTRE_RO_ATTR(activeobd); - -static int lov_desc_uuid_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = (struct obd_device *)m->private; - struct lov_obd *lov; - - LASSERT(dev); - lov = &dev->u.lov; - seq_printf(m, "%s\n", lov->desc.ld_uuid.uuid); - return 0; -} - -LPROC_SEQ_FOPS_RO(lov_desc_uuid); - -static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lov_obd *lov = &dev->u.lov; - - while (*pos < lov->desc.ld_tgt_count) { - if (lov->lov_tgts[*pos]) - return lov->lov_tgts[*pos]; - ++*pos; - } - return NULL; -} - -static void lov_tgt_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lov_obd *lov = &dev->u.lov; - - while (++*pos < lov->desc.ld_tgt_count) { - if (lov->lov_tgts[*pos]) - return lov->lov_tgts[*pos]; - } - return NULL; -} - -static int lov_tgt_seq_show(struct seq_file *p, void *v) -{ - struct lov_tgt_desc *tgt = v; - - seq_printf(p, "%d: %s %sACTIVE\n", - tgt->ltd_index, obd_uuid2str(&tgt->ltd_uuid), - tgt->ltd_active ? "" : "IN"); - return 0; -} - -static const struct seq_operations lov_tgt_sops = { - .start = lov_tgt_seq_start, - .stop = lov_tgt_seq_stop, - .next = lov_tgt_seq_next, - .show = lov_tgt_seq_show, -}; - -static int lov_target_seq_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int rc; - - rc = seq_open(file, &lov_tgt_sops); - if (rc) - return rc; - - seq = file->private_data; - seq->private = inode->i_private; - return 0; -} - -static struct lprocfs_vars lprocfs_lov_obd_vars[] = { - { "stripesize", &lov_stripesize_fops, NULL }, - { "stripeoffset", &lov_stripeoffset_fops, NULL }, - { "stripecount", &lov_stripecount_fops, NULL }, - { "stripetype", &lov_stripetype_fops, NULL }, - /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/ - { "desc_uuid", &lov_desc_uuid_fops, NULL, 0 }, - { NULL } -}; - -static struct attribute *lov_attrs[] = { - &lustre_attr_activeobd.attr, - &lustre_attr_numobd.attr, - NULL, -}; - -static const struct attribute_group lov_attr_group = { - .attrs = lov_attrs, -}; - -void lprocfs_lov_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->sysfs_vars = &lov_attr_group; - lvars->obd_vars = lprocfs_lov_obd_vars; -} - -const struct file_operations lov_proc_target_fops = { - .owner = THIS_MODULE, - .open = lov_target_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = lprocfs_seq_release, -}; diff --git a/drivers/staging/lustre/lustre/mdc/Makefile b/drivers/staging/lustre/lustre/mdc/Makefile deleted file mode 100644 index c7bc3351ccb0..000000000000 --- a/drivers/staging/lustre/lustre/mdc/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += mdc.o -mdc-y := mdc_request.o mdc_reint.o mdc_lib.o mdc_locks.o lproc_mdc.o diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c deleted file mode 100644 index 6cce32491eb5..000000000000 --- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c +++ /dev/null @@ -1,231 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include "mdc_internal.h" - -static ssize_t active_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%u\n", !dev->u.cli.cl_import->imp_deactive); -} - -static ssize_t active_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - unsigned long val; - int rc; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val > 1) - return -ERANGE; - - /* opposite senses */ - if (dev->u.cli.cl_import->imp_deactive == val) { - rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val); - if (rc) - count = rc; - } else { - CDEBUG(D_CONFIG, "activate %lu: ignoring repeat request\n", val); - } - return count; -} -LUSTRE_RW_ATTR(active); - -static ssize_t max_rpcs_in_flight_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - int len; - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - __u32 max; - - max = obd_get_max_rpcs_in_flight(&dev->u.cli); - len = sprintf(buf, "%u\n", max); - - return len; -} - -static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - rc = obd_set_max_rpcs_in_flight(&dev->u.cli, val); - if (rc) - count = rc; - - return count; -} -LUSTRE_RW_ATTR(max_rpcs_in_flight); - -static ssize_t max_mod_rpcs_in_flight_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - u16 max; - int len; - - max = dev->u.cli.cl_max_mod_rpcs_in_flight; - len = sprintf(buf, "%hu\n", max); - - return len; -} - -static ssize_t max_mod_rpcs_in_flight_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - u16 val; - int rc; - - rc = kstrtou16(buffer, 10, &val); - if (rc) - return rc; - - rc = obd_set_max_mod_rpcs_in_flight(&dev->u.cli, val); - if (rc) - count = rc; - - return count; -} -LUSTRE_RW_ATTR(max_mod_rpcs_in_flight); - -static int mdc_rpc_stats_seq_show(struct seq_file *seq, void *v) -{ - struct obd_device *dev = seq->private; - - return obd_mod_rpc_stats_seq_show(&dev->u.cli, seq); -} - -static ssize_t mdc_rpc_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct obd_device *dev = seq->private; - struct client_obd *cli = &dev->u.cli; - - lprocfs_oh_clear(&cli->cl_mod_rpcs_hist); - - return len; -} -LPROC_SEQ_FOPS(mdc_rpc_stats); - -LPROC_SEQ_FOPS_WR_ONLY(mdc, ping); - -LPROC_SEQ_FOPS_RO_TYPE(mdc, connect_flags); -LPROC_SEQ_FOPS_RO_TYPE(mdc, server_uuid); -LPROC_SEQ_FOPS_RO_TYPE(mdc, conn_uuid); -LPROC_SEQ_FOPS_RO_TYPE(mdc, timeouts); -LPROC_SEQ_FOPS_RO_TYPE(mdc, state); - -/* - * Note: below sysfs entry is provided, but not currently in use, instead - * sbi->sb_md_brw_size is used, the per obd variable should be used - * when DNE is enabled, and dir pages are managed in MDC layer. - * Don't forget to enable sysfs store function then. - */ -static ssize_t max_pages_per_rpc_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - - return sprintf(buf, "%d\n", cli->cl_max_pages_per_rpc); -} -LUSTRE_RO_ATTR(max_pages_per_rpc); - -LPROC_SEQ_FOPS_RW_TYPE(mdc, import); -LPROC_SEQ_FOPS_RW_TYPE(mdc, pinger_recov); - -static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { - { "ping", &mdc_ping_fops, NULL, 0222 }, - { "connect_flags", &mdc_connect_flags_fops, NULL, 0 }, - /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/ - { "mds_server_uuid", &mdc_server_uuid_fops, NULL, 0 }, - { "mds_conn_uuid", &mdc_conn_uuid_fops, NULL, 0 }, - { "timeouts", &mdc_timeouts_fops, NULL, 0 }, - { "import", &mdc_import_fops, NULL, 0 }, - { "state", &mdc_state_fops, NULL, 0 }, - { "pinger_recov", &mdc_pinger_recov_fops, NULL, 0 }, - { .name = "rpc_stats", - .fops = &mdc_rpc_stats_fops }, - { NULL } -}; - -static struct attribute *mdc_attrs[] = { - &lustre_attr_active.attr, - &lustre_attr_max_rpcs_in_flight.attr, - &lustre_attr_max_mod_rpcs_in_flight.attr, - &lustre_attr_max_pages_per_rpc.attr, - NULL, -}; - -static const struct attribute_group mdc_attr_group = { - .attrs = mdc_attrs, -}; - -void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->sysfs_vars = &mdc_attr_group; - lvars->obd_vars = lprocfs_mdc_obd_vars; -} diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h deleted file mode 100644 index 28924e927b50..000000000000 --- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h +++ /dev/null @@ -1,144 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _MDC_INTERNAL_H -#define _MDC_INTERNAL_H - -#include - -void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars); - -void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid, - __u64 valid, size_t ea_size, __u32 suppgid, u32 flags); -void mdc_swap_layouts_pack(struct ptlrpc_request *req, - struct md_op_data *op_data); -void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size, - const struct lu_fid *fid); -void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags, - struct md_op_data *data, size_t ea_size); -void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - void *ea, size_t ealen); -void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, uid_t uid, - gid_t gid, kernel_cap_t capability, __u64 rdev); -void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - umode_t mode, __u64 rdev, __u64 flags, const void *data, - size_t datalen); -void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data); -void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data); -void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - const char *old, size_t oldlen, - const char *new, size_t newlen); -void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data); - -/* mdc/mdc_locks.c */ -int mdc_set_lock_data(struct obd_export *exp, - const struct lustre_handle *lockh, - void *data, __u64 *bits); - -int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid); - -int mdc_intent_lock(struct obd_export *exp, - struct md_op_data *op_data, - struct lookup_intent *it, - struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - __u64 extra_lock_flags); - -int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, - struct md_op_data *op_data, - struct lustre_handle *lockh, u64 extra_lock_flags); - -int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid, - struct list_head *cancels, enum ldlm_mode mode, - __u64 bits); -/* mdc/mdc_request.c */ -int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data); -struct obd_client_handle; - -int mdc_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct lookup_intent *it); - -void mdc_commit_open(struct ptlrpc_request *req); -void mdc_replay_open(struct ptlrpc_request *req); - -int mdc_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, uid_t uid, - gid_t gid, kernel_cap_t capability, __u64 rdev, - struct ptlrpc_request **request); -int mdc_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request); -int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, - const char *new, size_t newlen, - struct ptlrpc_request **request); -int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, struct ptlrpc_request **request); -int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request); -int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, - union ldlm_policy_data *policy, enum ldlm_mode mode, - enum ldlm_cancel_flags flags, void *opaque); - -int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, - struct lu_fid *fid, __u64 *bits); - -int mdc_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo); - -enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, - const struct lu_fid *fid, enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh); - -static inline int mdc_prep_elc_req(struct obd_export *exp, - struct ptlrpc_request *req, int opc, - struct list_head *cancels, int count) -{ - return ldlm_prep_elc_req(exp, req, LUSTRE_MDS_VERSION, opc, 0, cancels, - count); -} - -static inline unsigned long hash_x_index(__u64 hash, int hash64) -{ - if (BITS_PER_LONG == 32 && hash64) - hash >>= 32; - /* save hash 0 with hash 1 */ - return ~0UL - (hash + !hash); -} - -#endif diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c deleted file mode 100644 index d582968987ff..000000000000 --- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c +++ /dev/null @@ -1,498 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_MDC -#include -#include -#include "mdc_internal.h" - -static void set_mrc_cr_flags(struct mdt_rec_create *mrc, u64 flags) -{ - mrc->cr_flags_l = (u32)(flags & 0xFFFFFFFFUll); - mrc->cr_flags_h = (u32)(flags >> 32); -} - -static void __mdc_pack_body(struct mdt_body *b, __u32 suppgid) -{ - b->mbo_suppgid = suppgid; - b->mbo_uid = from_kuid(&init_user_ns, current_uid()); - b->mbo_gid = from_kgid(&init_user_ns, current_gid()); - b->mbo_fsuid = from_kuid(&init_user_ns, current_fsuid()); - b->mbo_fsgid = from_kgid(&init_user_ns, current_fsgid()); - b->mbo_capability = current_cap().cap[0]; -} - -void mdc_swap_layouts_pack(struct ptlrpc_request *req, - struct md_op_data *op_data) -{ - struct mdt_body *b = req_capsule_client_get(&req->rq_pill, - &RMF_MDT_BODY); - - __mdc_pack_body(b, op_data->op_suppgids[0]); - b->mbo_fid1 = op_data->op_fid1; - b->mbo_fid2 = op_data->op_fid2; - b->mbo_valid |= OBD_MD_FLID; -} - -void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid, - __u64 valid, size_t ea_size, __u32 suppgid, u32 flags) -{ - struct mdt_body *b = req_capsule_client_get(&req->rq_pill, - &RMF_MDT_BODY); - b->mbo_valid = valid; - b->mbo_eadatasize = ea_size; - b->mbo_flags = flags; - __mdc_pack_body(b, suppgid); - if (fid) { - b->mbo_fid1 = *fid; - b->mbo_valid |= OBD_MD_FLID; - } -} - -/** - * Pack a name (path component) into a request - * - * \param[in] req request - * \param[in] field request field (usually RMF_NAME) - * \param[in] name path component - * \param[in] name_len length of path component - * - * \a field must be present in \a req and of size \a name_len + 1. - * - * \a name must be '\0' terminated of length \a name_len and represent - * a single path component (not contain '/'). - */ -static void mdc_pack_name(struct ptlrpc_request *req, - const struct req_msg_field *field, - const char *name, size_t name_len) -{ - size_t buf_size; - size_t cpy_len; - char *buf; - - buf = req_capsule_client_get(&req->rq_pill, field); - buf_size = req_capsule_get_size(&req->rq_pill, field, RCL_CLIENT); - - LASSERT(name && name_len && buf && buf_size == name_len + 1); - - cpy_len = strlcpy(buf, name, buf_size); - - LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len)); -} - -void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size, - const struct lu_fid *fid) -{ - struct mdt_body *b = req_capsule_client_get(&req->rq_pill, - &RMF_MDT_BODY); - b->mbo_fid1 = *fid; - b->mbo_valid |= OBD_MD_FLID; - b->mbo_size = pgoff; /* !! */ - b->mbo_nlink = size; /* !! */ - __mdc_pack_body(b, -1); - b->mbo_mode = LUDA_FID | LUDA_TYPE; -} - -/* packing of MDS records */ -void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, - uid_t uid, gid_t gid, kernel_cap_t cap_effective, - __u64 rdev) -{ - struct mdt_rec_create *rec; - char *tmp; - __u64 flags; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_create)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - rec->cr_opcode = REINT_CREATE; - rec->cr_fsuid = uid; - rec->cr_fsgid = gid; - rec->cr_cap = cap_effective.cap[0]; - rec->cr_fid1 = op_data->op_fid1; - rec->cr_fid2 = op_data->op_fid2; - rec->cr_mode = mode; - rec->cr_rdev = rdev; - rec->cr_time = op_data->op_mod_time; - rec->cr_suppgid1 = op_data->op_suppgids[0]; - rec->cr_suppgid2 = op_data->op_suppgids[1]; - flags = 0; - if (op_data->op_bias & MDS_CREATE_VOLATILE) - flags |= MDS_OPEN_VOLATILE; - set_mrc_cr_flags(rec, flags); - rec->cr_bias = op_data->op_bias; - rec->cr_umask = current_umask(); - - mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen); - if (data) { - tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA); - memcpy(tmp, data, datalen); - } -} - -static inline __u64 mds_pack_open_flags(__u64 flags) -{ - __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE | - MDS_OPEN_FL_INTERNAL)); - if (flags & O_CREAT) - cr_flags |= MDS_OPEN_CREAT; - if (flags & O_EXCL) - cr_flags |= MDS_OPEN_EXCL; - if (flags & O_TRUNC) - cr_flags |= MDS_OPEN_TRUNC; - if (flags & O_APPEND) - cr_flags |= MDS_OPEN_APPEND; - if (flags & O_SYNC) - cr_flags |= MDS_OPEN_SYNC; - if (flags & O_DIRECTORY) - cr_flags |= MDS_OPEN_DIRECTORY; - if (flags & __FMODE_EXEC) - cr_flags |= MDS_FMODE_EXEC; - if (cl_is_lov_delay_create(flags)) - cr_flags |= MDS_OPEN_DELAY_CREATE; - - if (flags & O_NONBLOCK) - cr_flags |= MDS_OPEN_NORESTORE; - - return cr_flags; -} - -/* packing of MDS records */ -void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - umode_t mode, __u64 rdev, __u64 flags, const void *lmm, - size_t lmmlen) -{ - struct mdt_rec_create *rec; - char *tmp; - __u64 cr_flags; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_create)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - /* XXX do something about time, uid, gid */ - rec->cr_opcode = REINT_OPEN; - rec->cr_fsuid = from_kuid(&init_user_ns, current_fsuid()); - rec->cr_fsgid = from_kgid(&init_user_ns, current_fsgid()); - rec->cr_cap = current_cap().cap[0]; - rec->cr_fid1 = op_data->op_fid1; - rec->cr_fid2 = op_data->op_fid2; - - rec->cr_mode = mode; - cr_flags = mds_pack_open_flags(flags); - rec->cr_rdev = rdev; - rec->cr_time = op_data->op_mod_time; - rec->cr_suppgid1 = op_data->op_suppgids[0]; - rec->cr_suppgid2 = op_data->op_suppgids[1]; - rec->cr_bias = op_data->op_bias; - rec->cr_umask = current_umask(); - rec->cr_old_handle = op_data->op_handle; - - if (op_data->op_name) { - mdc_pack_name(req, &RMF_NAME, op_data->op_name, - op_data->op_namelen); - - if (op_data->op_bias & MDS_CREATE_VOLATILE) - cr_flags |= MDS_OPEN_VOLATILE; - } - - if (lmm) { - cr_flags |= MDS_OPEN_HAS_EA; - tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA); - memcpy(tmp, lmm, lmmlen); - } - set_mrc_cr_flags(rec, cr_flags); -} - -static inline __u64 attr_pack(unsigned int ia_valid) -{ - __u64 sa_valid = 0; - - if (ia_valid & ATTR_MODE) - sa_valid |= MDS_ATTR_MODE; - if (ia_valid & ATTR_UID) - sa_valid |= MDS_ATTR_UID; - if (ia_valid & ATTR_GID) - sa_valid |= MDS_ATTR_GID; - if (ia_valid & ATTR_SIZE) - sa_valid |= MDS_ATTR_SIZE; - if (ia_valid & ATTR_ATIME) - sa_valid |= MDS_ATTR_ATIME; - if (ia_valid & ATTR_MTIME) - sa_valid |= MDS_ATTR_MTIME; - if (ia_valid & ATTR_CTIME) - sa_valid |= MDS_ATTR_CTIME; - if (ia_valid & ATTR_ATIME_SET) - sa_valid |= MDS_ATTR_ATIME_SET; - if (ia_valid & ATTR_MTIME_SET) - sa_valid |= MDS_ATTR_MTIME_SET; - if (ia_valid & ATTR_FORCE) - sa_valid |= MDS_ATTR_FORCE; - if (ia_valid & ATTR_ATTR_FLAG) - sa_valid |= MDS_ATTR_ATTR_FLAG; - if (ia_valid & ATTR_KILL_SUID) - sa_valid |= MDS_ATTR_KILL_SUID; - if (ia_valid & ATTR_KILL_SGID) - sa_valid |= MDS_ATTR_KILL_SGID; - if (ia_valid & ATTR_CTIME_SET) - sa_valid |= MDS_ATTR_CTIME_SET; - if (ia_valid & ATTR_OPEN) - sa_valid |= MDS_ATTR_FROM_OPEN; - if (ia_valid & ATTR_BLOCKS) - sa_valid |= MDS_ATTR_BLOCKS; - if (ia_valid & MDS_OPEN_OWNEROVERRIDE) - /* NFSD hack (see bug 5781) */ - sa_valid |= MDS_OPEN_OWNEROVERRIDE; - return sa_valid; -} - -static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec, - struct md_op_data *op_data) -{ - rec->sa_opcode = REINT_SETATTR; - rec->sa_fsuid = from_kuid(&init_user_ns, current_fsuid()); - rec->sa_fsgid = from_kgid(&init_user_ns, current_fsgid()); - rec->sa_cap = current_cap().cap[0]; - rec->sa_suppgid = -1; - - rec->sa_fid = op_data->op_fid1; - rec->sa_valid = attr_pack(op_data->op_attr.ia_valid); - rec->sa_mode = op_data->op_attr.ia_mode; - rec->sa_uid = from_kuid(&init_user_ns, op_data->op_attr.ia_uid); - rec->sa_gid = from_kgid(&init_user_ns, op_data->op_attr.ia_gid); - rec->sa_size = op_data->op_attr.ia_size; - rec->sa_blocks = op_data->op_attr_blocks; - rec->sa_atime = LTIME_S(op_data->op_attr.ia_atime); - rec->sa_mtime = LTIME_S(op_data->op_attr.ia_mtime); - rec->sa_ctime = LTIME_S(op_data->op_attr.ia_ctime); - rec->sa_attr_flags = op_data->op_attr_flags; - if ((op_data->op_attr.ia_valid & ATTR_GID) && - in_group_p(op_data->op_attr.ia_gid)) - rec->sa_suppgid = - from_kgid(&init_user_ns, op_data->op_attr.ia_gid); - else - rec->sa_suppgid = op_data->op_suppgids[0]; - - rec->sa_bias = op_data->op_bias; -} - -static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch, - struct md_op_data *op_data) -{ - epoch->mio_handle = op_data->op_handle; - epoch->mio_unused1 = 0; - epoch->mio_unused2 = 0; - epoch->mio_padding = 0; -} - -void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - void *ea, size_t ealen) -{ - struct mdt_rec_setattr *rec; - struct lov_user_md *lum = NULL; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != - sizeof(struct mdt_rec_setattr)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - mdc_setattr_pack_rec(rec, op_data); - - if (ealen == 0) - return; - - lum = req_capsule_client_get(&req->rq_pill, &RMF_EADATA); - if (!ea) { /* Remove LOV EA */ - lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1); - lum->lmm_stripe_size = 0; - lum->lmm_stripe_count = 0; - lum->lmm_stripe_offset = (typeof(lum->lmm_stripe_offset))(-1); - } else { - memcpy(lum, ea, ealen); - } -} - -void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data) -{ - struct mdt_rec_unlink *rec; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_unlink)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - rec->ul_opcode = op_data->op_cli_flags & CLI_RM_ENTRY ? - REINT_RMENTRY : REINT_UNLINK; - rec->ul_fsuid = op_data->op_fsuid; - rec->ul_fsgid = op_data->op_fsgid; - rec->ul_cap = op_data->op_cap.cap[0]; - rec->ul_mode = op_data->op_mode; - rec->ul_suppgid1 = op_data->op_suppgids[0]; - rec->ul_suppgid2 = -1; - rec->ul_fid1 = op_data->op_fid1; - rec->ul_fid2 = op_data->op_fid2; - rec->ul_time = op_data->op_mod_time; - rec->ul_bias = op_data->op_bias; - - mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen); -} - -void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data) -{ - struct mdt_rec_link *rec; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_link)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - rec->lk_opcode = REINT_LINK; - rec->lk_fsuid = op_data->op_fsuid; /* current->fsuid; */ - rec->lk_fsgid = op_data->op_fsgid; /* current->fsgid; */ - rec->lk_cap = op_data->op_cap.cap[0]; /* current->cap_effective; */ - rec->lk_suppgid1 = op_data->op_suppgids[0]; - rec->lk_suppgid2 = op_data->op_suppgids[1]; - rec->lk_fid1 = op_data->op_fid1; - rec->lk_fid2 = op_data->op_fid2; - rec->lk_time = op_data->op_mod_time; - rec->lk_bias = op_data->op_bias; - - mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen); -} - -static void mdc_intent_close_pack(struct ptlrpc_request *req, - struct md_op_data *op_data) -{ - enum mds_op_bias bias = op_data->op_bias; - struct close_data *data; - struct ldlm_lock *lock; - - if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | - MDS_RENAME_MIGRATE))) - return; - - data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA); - LASSERT(data); - - lock = ldlm_handle2lock(&op_data->op_lease_handle); - if (lock) { - data->cd_handle = lock->l_remote_handle; - LDLM_LOCK_PUT(lock); - } - ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL); - - data->cd_data_version = op_data->op_data_version; - data->cd_fid = op_data->op_fid2; -} - -void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - const char *old, size_t oldlen, - const char *new, size_t newlen) -{ - struct mdt_rec_rename *rec; - - BUILD_BUG_ON(sizeof(struct mdt_rec_reint) != sizeof(struct mdt_rec_rename)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - /* XXX do something about time, uid, gid */ - rec->rn_opcode = op_data->op_cli_flags & CLI_MIGRATE ? - REINT_MIGRATE : REINT_RENAME; - rec->rn_opcode = REINT_RENAME; - rec->rn_fsuid = op_data->op_fsuid; - rec->rn_fsgid = op_data->op_fsgid; - rec->rn_cap = op_data->op_cap.cap[0]; - rec->rn_suppgid1 = op_data->op_suppgids[0]; - rec->rn_suppgid2 = op_data->op_suppgids[1]; - rec->rn_fid1 = op_data->op_fid1; - rec->rn_fid2 = op_data->op_fid2; - rec->rn_time = op_data->op_mod_time; - rec->rn_mode = op_data->op_mode; - rec->rn_bias = op_data->op_bias; - - mdc_pack_name(req, &RMF_NAME, old, oldlen); - - if (new) - mdc_pack_name(req, &RMF_SYMTGT, new, newlen); - - if (op_data->op_cli_flags & CLI_MIGRATE && - op_data->op_bias & MDS_RENAME_MIGRATE) { - struct mdt_ioepoch *epoch; - - mdc_intent_close_pack(req, op_data); - epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); - mdc_ioepoch_pack(epoch, op_data); - } -} - -void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags, - struct md_op_data *op_data, size_t ea_size) -{ - struct mdt_body *b = req_capsule_client_get(&req->rq_pill, - &RMF_MDT_BODY); - - b->mbo_valid = valid; - if (op_data->op_bias & MDS_CHECK_SPLIT) - b->mbo_valid |= OBD_MD_FLCKSPLIT; - if (op_data->op_bias & MDS_CROSS_REF) - b->mbo_valid |= OBD_MD_FLCROSSREF; - b->mbo_eadatasize = ea_size; - b->mbo_flags = flags; - __mdc_pack_body(b, op_data->op_suppgids[0]); - - b->mbo_fid1 = op_data->op_fid1; - b->mbo_fid2 = op_data->op_fid2; - b->mbo_valid |= OBD_MD_FLID; - - if (op_data->op_name) - mdc_pack_name(req, &RMF_NAME, op_data->op_name, - op_data->op_namelen); -} - -void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data) -{ - struct mdt_ioepoch *epoch; - struct mdt_rec_setattr *rec; - - epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - - mdc_setattr_pack_rec(rec, op_data); - /* - * The client will zero out local timestamps when losing the IBITS lock - * so any new RPC timestamps will update the client inode's timestamps. - * There was a defect on the server side which allowed the atime to be - * overwritten by a zeroed-out atime packed into the close RPC. - * - * Proactively clear the MDS_ATTR_ATIME flag in the RPC in this case - * to avoid zeroing the atime on old unpatched servers. See LU-8041. - */ - if (rec->sa_atime == 0) - rec->sa_valid &= ~MDS_ATTR_ATIME; - - mdc_ioepoch_pack(epoch, op_data); - mdc_intent_close_pack(req, op_data); -} diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c deleted file mode 100644 index a8aa0fa5e87a..000000000000 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ /dev/null @@ -1,1239 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_MDC - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mdc_internal.h" - -struct mdc_getattr_args { - struct obd_export *ga_exp; - struct md_enqueue_info *ga_minfo; -}; - -int it_open_error(int phase, struct lookup_intent *it) -{ - if (it_disposition(it, DISP_OPEN_LEASE)) { - if (phase >= DISP_OPEN_LEASE) - return it->it_status; - else - return 0; - } - if (it_disposition(it, DISP_OPEN_OPEN)) { - if (phase >= DISP_OPEN_OPEN) - return it->it_status; - else - return 0; - } - - if (it_disposition(it, DISP_OPEN_CREATE)) { - if (phase >= DISP_OPEN_CREATE) - return it->it_status; - else - return 0; - } - - if (it_disposition(it, DISP_LOOKUP_EXECD)) { - if (phase >= DISP_LOOKUP_EXECD) - return it->it_status; - else - return 0; - } - - if (it_disposition(it, DISP_IT_EXECD)) { - if (phase >= DISP_IT_EXECD) - return it->it_status; - else - return 0; - } - CERROR("it disp: %X, status: %d\n", it->it_disposition, - it->it_status); - LBUG(); - return 0; -} -EXPORT_SYMBOL(it_open_error); - -/* this must be called on a lockh that is known to have a referenced lock */ -int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh, - void *data, __u64 *bits) -{ - struct ldlm_lock *lock; - struct inode *new_inode = data; - - if (bits) - *bits = 0; - - if (!lustre_handle_is_used(lockh)) - return 0; - - lock = ldlm_handle2lock(lockh); - - LASSERT(lock); - lock_res_and_lock(lock); - if (lock->l_resource->lr_lvb_inode && - lock->l_resource->lr_lvb_inode != data) { - struct inode *old_inode = lock->l_resource->lr_lvb_inode; - - LASSERTF(old_inode->i_state & I_FREEING, - "Found existing inode %p/%lu/%u state %lu in lock: setting data to %p/%lu/%u\n", - old_inode, old_inode->i_ino, old_inode->i_generation, - old_inode->i_state, new_inode, new_inode->i_ino, - new_inode->i_generation); - } - lock->l_resource->lr_lvb_inode = new_inode; - if (bits) - *bits = lock->l_policy_data.l_inodebits.bits; - - unlock_res_and_lock(lock); - LDLM_LOCK_PUT(lock); - - return 0; -} - -enum ldlm_mode mdc_lock_match(struct obd_export *exp, __u64 flags, - const struct lu_fid *fid, enum ldlm_type type, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - struct lustre_handle *lockh) -{ - struct ldlm_res_id res_id; - enum ldlm_mode rc; - - fid_build_reg_res_name(fid, &res_id); - /* LU-4405: Clear bits not supported by server */ - policy->l_inodebits.bits &= exp_connect_ibits(exp); - rc = ldlm_lock_match(class_exp2obd(exp)->obd_namespace, flags, - &res_id, type, policy, mode, lockh, 0); - return rc; -} - -int mdc_cancel_unused(struct obd_export *exp, - const struct lu_fid *fid, - union ldlm_policy_data *policy, - enum ldlm_mode mode, - enum ldlm_cancel_flags flags, - void *opaque) -{ - struct ldlm_res_id res_id; - struct obd_device *obd = class_exp2obd(exp); - int rc; - - fid_build_reg_res_name(fid, &res_id); - rc = ldlm_cli_cancel_unused_resource(obd->obd_namespace, &res_id, - policy, mode, flags, opaque); - return rc; -} - -int mdc_null_inode(struct obd_export *exp, - const struct lu_fid *fid) -{ - struct ldlm_res_id res_id; - struct ldlm_resource *res; - struct ldlm_namespace *ns = class_exp2obd(exp)->obd_namespace; - - LASSERTF(ns, "no namespace passed\n"); - - fid_build_reg_res_name(fid, &res_id); - - res = ldlm_resource_get(ns, NULL, &res_id, 0, 0); - if (IS_ERR(res)) - return 0; - - lock_res(res); - res->lr_lvb_inode = NULL; - unlock_res(res); - - ldlm_resource_putref(res); - return 0; -} - -static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) -{ - /* Don't hold error requests for replay. */ - if (req->rq_replay) { - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - } - if (rc && req->rq_transno != 0) { - DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc); - LBUG(); - } -} - -/* Save a large LOV EA into the request buffer so that it is available - * for replay. We don't do this in the initial request because the - * original request doesn't need this buffer (at most it sends just the - * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty - * buffer and may also be difficult to allocate and save a very large - * request buffer for each open. (bug 5707) - * - * OOM here may cause recovery failure if lmm is needed (only for the - * original open if the MDS crashed just when this client also OOM'd) - * but this is incredibly unlikely, and questionable whether the client - * could do MDS recovery under OOM anyways... - */ -static void mdc_realloc_openmsg(struct ptlrpc_request *req, - struct mdt_body *body) -{ - int rc; - - /* FIXME: remove this explicit offset. */ - rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4, - body->mbo_eadatasize); - if (rc) { - CERROR("Can't enlarge segment %d size to %d\n", - DLM_INTENT_REC_OFF + 4, body->mbo_eadatasize); - body->mbo_valid &= ~OBD_MD_FLEASIZE; - body->mbo_eadatasize = 0; - } -} - -static struct ptlrpc_request * -mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it, - struct md_op_data *op_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - struct ldlm_intent *lit; - const void *lmm = op_data->op_data; - u32 lmmsize = op_data->op_data_size; - LIST_HEAD(cancels); - int count = 0; - int mode; - int rc; - - it->it_create_mode = (it->it_create_mode & ~S_IFMT) | S_IFREG; - - /* XXX: openlock is not cancelled for cross-refs. */ - /* If inode is known, cancel conflicting OPEN locks. */ - if (fid_is_sane(&op_data->op_fid2)) { - if (it->it_flags & MDS_OPEN_LEASE) { /* try to get lease */ - if (it->it_flags & FMODE_WRITE) - mode = LCK_EX; - else - mode = LCK_PR; - } else { - if (it->it_flags & (FMODE_WRITE | MDS_OPEN_TRUNC)) - mode = LCK_CW; - else if (it->it_flags & __FMODE_EXEC) - mode = LCK_PR; - else - mode = LCK_CR; - } - count = mdc_resource_get_unused(exp, &op_data->op_fid2, - &cancels, mode, - MDS_INODELOCK_OPEN); - } - - /* If CREATE, cancel parent's UPDATE lock. */ - if (it->it_op & IT_CREAT) - mode = LCK_EX; - else - mode = LCK_CR; - count += mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, mode, - MDS_INODELOCK_UPDATE); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_OPEN); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return ERR_PTR(-ENOMEM); - } - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, - max(lmmsize, obddev->u.cli.cl_default_mds_easize)); - - rc = ldlm_prep_enqueue_req(exp, req, &cancels, count); - if (rc < 0) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - spin_lock(&req->rq_lock); - req->rq_replay = req->rq_import->imp_replayable; - spin_unlock(&req->rq_lock); - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm, - lmmsize); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_max_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - - ptlrpc_request_set_replen(req); - return req; -} - -#define GA_DEFAULT_EA_NAME_LEN 20 -#define GA_DEFAULT_EA_VAL_LEN 250 -#define GA_DEFAULT_EA_NUM 10 - -static struct ptlrpc_request * -mdc_intent_getxattr_pack(struct obd_export *exp, - struct lookup_intent *it, - struct md_op_data *op_data) -{ - struct ptlrpc_request *req; - struct ldlm_intent *lit; - int rc, count = 0; - LIST_HEAD(cancels); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_GETXATTR); - if (!req) - return ERR_PTR(-ENOMEM); - - rc = ldlm_prep_enqueue_req(exp, req, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = IT_GETXATTR; - - /* pack the intended request */ - mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, - GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM, -1, 0); - - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, - GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM); - - req_capsule_set_size(&req->rq_pill, &RMF_EAVALS, RCL_SERVER, - GA_DEFAULT_EA_NAME_LEN * GA_DEFAULT_EA_NUM); - - req_capsule_set_size(&req->rq_pill, &RMF_EAVALS_LENS, RCL_SERVER, - sizeof(u32) * GA_DEFAULT_EA_NUM); - - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, 0); - - ptlrpc_request_set_replen(req); - - return req; -} - -static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp, - struct lookup_intent *it, - struct md_op_data *op_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - struct ldlm_intent *lit; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_UNLINK); - if (!req) - return ERR_PTR(-ENOMEM); - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mdc_unlink_pack(req, op_data); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_default_mds_easize); - ptlrpc_request_set_replen(req); - return req; -} - -static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp, - struct lookup_intent *it, - struct md_op_data *op_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | - OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA | - OBD_MD_MEA | OBD_MD_FLACL; - struct ldlm_intent *lit; - int rc; - u32 easize; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_GETATTR); - if (!req) - return ERR_PTR(-ENOMEM); - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = (__u64)it->it_op; - - if (obddev->u.cli.cl_default_mds_easize > 0) - easize = obddev->u.cli.cl_default_mds_easize; - else - easize = obddev->u.cli.cl_max_mds_easize; - - /* pack the intended request */ - mdc_getattr_pack(req, valid, it->it_flags, op_data, easize); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - ptlrpc_request_set_replen(req); - return req; -} - -static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp, - struct lookup_intent *it, - struct md_op_data *unused) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - struct ldlm_intent *lit; - struct layout_intent *layout; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_INTENT_LAYOUT); - if (!req) - return ERR_PTR(-ENOMEM); - - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, 0); - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - /* pack the intent */ - lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); - lit->opc = (__u64)it->it_op; - - /* pack the layout intent request */ - layout = req_capsule_client_get(&req->rq_pill, &RMF_LAYOUT_INTENT); - /* LAYOUT_INTENT_ACCESS is generic, specific operation will be - * set for replication - */ - layout->li_opc = LAYOUT_INTENT_ACCESS; - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - obd->u.cli.cl_default_mds_easize); - ptlrpc_request_set_replen(req); - return req; -} - -static struct ptlrpc_request * -mdc_enqueue_pack(struct obd_export *exp, int lvb_len) -{ - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LDLM_ENQUEUE); - if (!req) - return ERR_PTR(-ENOMEM); - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return ERR_PTR(rc); - } - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, lvb_len); - ptlrpc_request_set_replen(req); - return req; -} - -static int mdc_finish_enqueue(struct obd_export *exp, - struct ptlrpc_request *req, - struct ldlm_enqueue_info *einfo, - struct lookup_intent *it, - struct lustre_handle *lockh, - int rc) -{ - struct req_capsule *pill = &req->rq_pill; - struct ldlm_request *lockreq; - struct ldlm_reply *lockrep; - struct ldlm_lock *lock; - void *lvb_data = NULL; - u32 lvb_len = 0; - - LASSERT(rc >= 0); - /* Similarly, if we're going to replay this request, we don't want to - * actually get a lock, just perform the intent. - */ - if (req->rq_transno || req->rq_replay) { - lockreq = req_capsule_client_get(pill, &RMF_DLM_REQ); - lockreq->lock_flags |= ldlm_flags_to_wire(LDLM_FL_INTENT_ONLY); - } - - if (rc == ELDLM_LOCK_ABORTED) { - einfo->ei_mode = 0; - memset(lockh, 0, sizeof(*lockh)); - rc = 0; - } else { /* rc = 0 */ - lock = ldlm_handle2lock(lockh); - - /* If the server gave us back a different lock mode, we should - * fix up our variables. - */ - if (lock->l_req_mode != einfo->ei_mode) { - ldlm_lock_addref(lockh, lock->l_req_mode); - ldlm_lock_decref(lockh, einfo->ei_mode); - einfo->ei_mode = lock->l_req_mode; - } - LDLM_LOCK_PUT(lock); - } - - lockrep = req_capsule_server_get(pill, &RMF_DLM_REP); - - it->it_disposition = (int)lockrep->lock_policy_res1; - it->it_status = (int)lockrep->lock_policy_res2; - it->it_lock_mode = einfo->ei_mode; - it->it_lock_handle = lockh->cookie; - it->it_request = req; - - /* Technically speaking rq_transno must already be zero if - * it_status is in error, so the check is a bit redundant - */ - if ((!req->rq_transno || it->it_status < 0) && req->rq_replay) - mdc_clear_replay_flag(req, it->it_status); - - /* If we're doing an IT_OPEN which did not result in an actual - * successful open, then we need to remove the bit which saves - * this request for unconditional replay. - * - * It's important that we do this first! Otherwise we might exit the - * function without doing so, and try to replay a failed create - * (bug 3440) - */ - if (it->it_op & IT_OPEN && req->rq_replay && - (!it_disposition(it, DISP_OPEN_OPEN) || it->it_status != 0)) - mdc_clear_replay_flag(req, it->it_status); - - DEBUG_REQ(D_RPCTRACE, req, "op: %d disposition: %x, status: %d", - it->it_op, it->it_disposition, it->it_status); - - /* We know what to expect, so we do any byte flipping required here */ - if (it_has_reply_body(it)) { - struct mdt_body *body; - - body = req_capsule_server_get(pill, &RMF_MDT_BODY); - if (!body) { - CERROR("Can't swab mdt_body\n"); - return -EPROTO; - } - - if (it_disposition(it, DISP_OPEN_OPEN) && - !it_open_error(DISP_OPEN_OPEN, it)) { - /* - * If this is a successful OPEN request, we need to set - * replay handler and data early, so that if replay - * happens immediately after swabbing below, new reply - * is swabbed by that handler correctly. - */ - mdc_set_open_replay_data(NULL, NULL, it); - } - - if ((body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) { - void *eadata; - - mdc_update_max_ea_from_body(exp, body); - - /* - * The eadata is opaque; just check that it is there. - * Eventually, obd_unpackmd() will check the contents. - */ - eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD, - body->mbo_eadatasize); - if (!eadata) - return -EPROTO; - - /* save lvb data and length in case this is for layout - * lock - */ - lvb_data = eadata; - lvb_len = body->mbo_eadatasize; - - /* - * We save the reply LOV EA in case we have to replay a - * create for recovery. If we didn't allocate a large - * enough request buffer above we need to reallocate it - * here to hold the actual LOV EA. - * - * To not save LOV EA if request is not going to replay - * (for example error one). - */ - if ((it->it_op & IT_OPEN) && req->rq_replay) { - void *lmm; - - if (req_capsule_get_size(pill, &RMF_EADATA, - RCL_CLIENT) < - body->mbo_eadatasize) - mdc_realloc_openmsg(req, body); - else - req_capsule_shrink(pill, &RMF_EADATA, - body->mbo_eadatasize, - RCL_CLIENT); - - req_capsule_set_size(pill, &RMF_EADATA, - RCL_CLIENT, - body->mbo_eadatasize); - - lmm = req_capsule_client_get(pill, &RMF_EADATA); - if (lmm) - memcpy(lmm, eadata, body->mbo_eadatasize); - } - } - } else if (it->it_op & IT_LAYOUT) { - /* maybe the lock was granted right away and layout - * is packed into RMF_DLM_LVB of req - */ - lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER); - if (lvb_len > 0) { - lvb_data = req_capsule_server_sized_get(pill, - &RMF_DLM_LVB, - lvb_len); - if (!lvb_data) - return -EPROTO; - } - } - - /* fill in stripe data for layout lock */ - lock = ldlm_handle2lock(lockh); - if (lock && ldlm_has_layout(lock) && lvb_data) { - void *lmm; - - LDLM_DEBUG(lock, "layout lock returned by: %s, lvb_len: %d", - ldlm_it2str(it->it_op), lvb_len); - - lmm = kvzalloc(lvb_len, GFP_NOFS); - if (!lmm) { - LDLM_LOCK_PUT(lock); - return -ENOMEM; - } - memcpy(lmm, lvb_data, lvb_len); - - /* install lvb_data */ - lock_res_and_lock(lock); - if (!lock->l_lvb_data) { - lock->l_lvb_type = LVB_T_LAYOUT; - lock->l_lvb_data = lmm; - lock->l_lvb_len = lvb_len; - lmm = NULL; - } - unlock_res_and_lock(lock); - if (lmm) - kvfree(lmm); - } - if (lock) - LDLM_LOCK_PUT(lock); - - return rc; -} - -/* We always reserve enough space in the reply packet for a stripe MD, because - * we don't know in advance the file type. - */ -int mdc_enqueue_base(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, - struct lookup_intent *it, struct md_op_data *op_data, - struct lustre_handle *lockh, u64 extra_lock_flags) -{ - static const union ldlm_policy_data lookup_policy = { - .l_inodebits = { MDS_INODELOCK_LOOKUP } - }; - static const union ldlm_policy_data update_policy = { - .l_inodebits = { MDS_INODELOCK_UPDATE } - }; - static const union ldlm_policy_data layout_policy = { - .l_inodebits = { MDS_INODELOCK_LAYOUT } - }; - static const union ldlm_policy_data getxattr_policy = { - .l_inodebits = { MDS_INODELOCK_XATTR } - }; - struct obd_device *obddev = class_exp2obd(exp); - struct ptlrpc_request *req = NULL; - u64 flags, saved_flags = extra_lock_flags; - struct ldlm_res_id res_id; - int generation, resends = 0; - struct ldlm_reply *lockrep; - enum lvb_type lvb_type = LVB_T_NONE; - int rc; - - LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n", - einfo->ei_type); - fid_build_reg_res_name(&op_data->op_fid1, &res_id); - - if (it) { - LASSERT(!policy); - - saved_flags |= LDLM_FL_HAS_INTENT; - if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR)) - policy = &update_policy; - else if (it->it_op & IT_LAYOUT) - policy = &layout_policy; - else if (it->it_op & (IT_GETXATTR | IT_SETXATTR)) - policy = &getxattr_policy; - else - policy = &lookup_policy; - } - - generation = obddev->u.cli.cl_import->imp_generation; -resend: - flags = saved_flags; - if (!it) { - /* The only way right now is FLOCK. */ - LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n", - einfo->ei_type); - res_id.name[3] = LDLM_FLOCK; - } else if (it->it_op & IT_OPEN) { - req = mdc_intent_open_pack(exp, it, op_data); - } else if (it->it_op & IT_UNLINK) { - req = mdc_intent_unlink_pack(exp, it, op_data); - } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { - req = mdc_intent_getattr_pack(exp, it, op_data); - } else if (it->it_op & IT_READDIR) { - req = mdc_enqueue_pack(exp, 0); - } else if (it->it_op & IT_LAYOUT) { - if (!imp_connect_lvb_type(class_exp2cliimp(exp))) - return -EOPNOTSUPP; - req = mdc_intent_layout_pack(exp, it, op_data); - lvb_type = LVB_T_LAYOUT; - } else if (it->it_op & IT_GETXATTR) { - req = mdc_intent_getxattr_pack(exp, it, op_data); - } else { - LBUG(); - return -EINVAL; - } - - if (IS_ERR(req)) - return PTR_ERR(req); - - if (resends) { - req->rq_generation_set = 1; - req->rq_import_generation = generation; - req->rq_sent = ktime_get_real_seconds() + resends; - } - - /* It is important to obtain modify RPC slot first (if applicable), so - * that threads that are waiting for a modify RPC slot are not polluting - * our rpcs in flight counter. - * We do not do flock request limiting, though - */ - if (it) { - mdc_get_mod_rpc_slot(req, it); - rc = obd_get_request_slot(&obddev->u.cli); - if (rc != 0) { - mdc_put_mod_rpc_slot(req, it); - mdc_clear_replay_flag(req, 0); - ptlrpc_req_finished(req); - return rc; - } - } - - rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL, - 0, lvb_type, lockh, 0); - if (!it) { - /* For flock requests we immediately return without further - * delay and let caller deal with the rest, since rest of - * this function metadata processing makes no sense for flock - * requests anyway. But in case of problem during comms with - * Server (ETIMEDOUT) or any signal/kill attempt (EINTR), we - * can not rely on caller and this mainly for F_UNLCKs - * (explicits or automatically generated by Kernel to clean - * current FLocks upon exit) that can't be trashed - */ - if (((rc == -EINTR) || (rc == -ETIMEDOUT)) && - (einfo->ei_type == LDLM_FLOCK) && - (einfo->ei_mode == LCK_NL)) - goto resend; - return rc; - } - - obd_put_request_slot(&obddev->u.cli); - mdc_put_mod_rpc_slot(req, it); - - if (rc < 0) { - CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n", - obddev->obd_name, rc); - - mdc_clear_replay_flag(req, rc); - ptlrpc_req_finished(req); - return rc; - } - - lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - - lockrep->lock_policy_res2 = - ptlrpc_status_ntoh(lockrep->lock_policy_res2); - - /* - * Retry infinitely when the server returns -EINPROGRESS for the - * intent operation, when server returns -EINPROGRESS for acquiring - * intent lock, we'll retry in after_reply(). - */ - if (it->it_op && (int)lockrep->lock_policy_res2 == -EINPROGRESS) { - mdc_clear_replay_flag(req, rc); - ptlrpc_req_finished(req); - resends++; - - CDEBUG(D_HA, "%s: resend:%d op:%d " DFID "/" DFID "\n", - obddev->obd_name, resends, it->it_op, - PFID(&op_data->op_fid1), PFID(&op_data->op_fid2)); - - if (generation == obddev->u.cli.cl_import->imp_generation) { - goto resend; - } else { - CDEBUG(D_HA, "resend cross eviction\n"); - return -EIO; - } - } - - rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc); - if (rc < 0) { - if (lustre_handle_is_used(lockh)) { - ldlm_lock_decref(lockh, einfo->ei_mode); - memset(lockh, 0, sizeof(*lockh)); - } - ptlrpc_req_finished(req); - - it->it_lock_handle = 0; - it->it_lock_mode = 0; - it->it_request = NULL; - } - - return rc; -} - -int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, - const union ldlm_policy_data *policy, - struct md_op_data *op_data, - struct lustre_handle *lockh, u64 extra_lock_flags) -{ - return mdc_enqueue_base(exp, einfo, policy, NULL, - op_data, lockh, extra_lock_flags); -} - -static int mdc_finish_intent_lock(struct obd_export *exp, - struct ptlrpc_request *request, - struct md_op_data *op_data, - struct lookup_intent *it, - struct lustre_handle *lockh) -{ - struct lustre_handle old_lock; - struct ldlm_lock *lock; - int rc = 0; - - LASSERT(request != LP_POISON); - LASSERT(request->rq_repmsg != LP_POISON); - - if (it->it_op & IT_READDIR) - return 0; - - if (it->it_op & (IT_GETXATTR | IT_LAYOUT)) { - if (it->it_status != 0) { - rc = it->it_status; - goto out; - } - goto matching_lock; - } - - if (!it_disposition(it, DISP_IT_EXECD)) { - /* The server failed before it even started executing the - * intent, i.e. because it couldn't unpack the request. - */ - LASSERT(it->it_status != 0); - rc = it->it_status; - goto out; - } - - rc = it_open_error(DISP_IT_EXECD, it); - if (rc) - goto out; - - rc = it_open_error(DISP_LOOKUP_EXECD, it); - if (rc) - goto out; - - /* keep requests around for the multiple phases of the call - * this shows the DISP_XX must guarantee we make it into the call - */ - if (!it_disposition(it, DISP_ENQ_CREATE_REF) && - it_disposition(it, DISP_OPEN_CREATE) && - !it_open_error(DISP_OPEN_CREATE, it)) { - it_set_disposition(it, DISP_ENQ_CREATE_REF); - ptlrpc_request_addref(request); /* balanced in ll_create_node */ - } - if (!it_disposition(it, DISP_ENQ_OPEN_REF) && - it_disposition(it, DISP_OPEN_OPEN) && - !it_open_error(DISP_OPEN_OPEN, it)) { - it_set_disposition(it, DISP_ENQ_OPEN_REF); - ptlrpc_request_addref(request); /* balanced in ll_file_open */ - /* BUG 11546 - eviction in the middle of open rpc processing */ - OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_ENQUEUE_PAUSE, obd_timeout); - } - - if (it->it_op & IT_CREAT) - /* XXX this belongs in ll_create_it */ - ; - else if (it->it_op == IT_OPEN) - LASSERT(!it_disposition(it, DISP_OPEN_CREATE)); - else - LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP)); - -matching_lock: - /* If we already have a matching lock, then cancel the new - * one. We have to set the data here instead of in - * mdc_enqueue, because we need to use the child's inode as - * the l_ast_data to match, and that's not available until - * intent_finish has performed the iget().) - */ - lock = ldlm_handle2lock(lockh); - if (lock) { - union ldlm_policy_data policy = lock->l_policy_data; - - LDLM_DEBUG(lock, "matching against this"); - - if (it_has_reply_body(it)) { - struct mdt_body *body; - - body = req_capsule_server_get(&request->rq_pill, - &RMF_MDT_BODY); - - /* mdc_enqueue checked */ - LASSERT(body); - LASSERTF(fid_res_name_eq(&body->mbo_fid1, - &lock->l_resource->lr_name), - "Lock res_id: " DLDLMRES ", fid: " DFID "\n", - PLDLMRES(lock->l_resource), - PFID(&body->mbo_fid1)); - } - LDLM_LOCK_PUT(lock); - - memcpy(&old_lock, lockh, sizeof(*lockh)); - if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL, - LDLM_IBITS, &policy, LCK_NL, - &old_lock, 0)) { - ldlm_lock_decref_and_cancel(lockh, - it->it_lock_mode); - memcpy(lockh, &old_lock, sizeof(old_lock)); - it->it_lock_handle = lockh->cookie; - } - } -out: - CDEBUG(D_DENTRY, - "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n", - (int)op_data->op_namelen, op_data->op_name, - ldlm_it2str(it->it_op), it->it_status, it->it_disposition, rc); - return rc; -} - -int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, - struct lu_fid *fid, __u64 *bits) -{ - /* We could just return 1 immediately, but since we should only - * be called in revalidate_it if we already have a lock, let's - * verify that. - */ - struct ldlm_res_id res_id; - struct lustre_handle lockh; - union ldlm_policy_data policy; - enum ldlm_mode mode; - - if (it->it_lock_handle) { - lockh.cookie = it->it_lock_handle; - mode = ldlm_revalidate_lock_handle(&lockh, bits); - } else { - fid_build_reg_res_name(fid, &res_id); - switch (it->it_op) { - case IT_GETATTR: - /* File attributes are held under multiple bits: - * nlink is under lookup lock, size and times are - * under UPDATE lock and recently we've also got - * a separate permissions lock for owner/group/acl that - * were protected by lookup lock before. - * Getattr must provide all of that information, - * so we need to ensure we have all of those locks. - * Unfortunately, if the bits are split across multiple - * locks, there's no easy way to match all of them here, - * so an extra RPC would be performed to fetch all - * of those bits at once for now. - */ - /* For new MDTs(> 2.4), UPDATE|PERM should be enough, - * but for old MDTs (< 2.4), permission is covered - * by LOOKUP lock, so it needs to match all bits here. - */ - policy.l_inodebits.bits = MDS_INODELOCK_UPDATE | - MDS_INODELOCK_LOOKUP | - MDS_INODELOCK_PERM; - break; - case IT_READDIR: - policy.l_inodebits.bits = MDS_INODELOCK_UPDATE; - break; - case IT_LAYOUT: - policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT; - break; - default: - policy.l_inodebits.bits = MDS_INODELOCK_LOOKUP; - break; - } - - mode = mdc_lock_match(exp, LDLM_FL_BLOCK_GRANTED, fid, - LDLM_IBITS, &policy, - LCK_CR | LCK_CW | LCK_PR | LCK_PW, - &lockh); - } - - if (mode) { - it->it_lock_handle = lockh.cookie; - it->it_lock_mode = mode; - } else { - it->it_lock_handle = 0; - it->it_lock_mode = 0; - } - - return !!mode; -} - -/* - * This long block is all about fixing up the lock and request state - * so that it is correct as of the moment _before_ the operation was - * applied; that way, the VFS will think that everything is normal and - * call Lustre's regular VFS methods. - * - * If we're performing a creation, that means that unless the creation - * failed with EEXIST, we should fake up a negative dentry. - * - * For everything else, we want the lookup to succeed. - * - * One additional note: if CREATE or OPEN succeeded, we add an extra - * reference to the request because we need to keep it around until - * ll_create/ll_open gets called. - * - * The server will return to us, in it_disposition, an indication of - * exactly what it_status refers to. - * - * If DISP_OPEN_OPEN is set, then it_status refers to the open() call, - * otherwise if DISP_OPEN_CREATE is set, then it_status is the - * creation failure mode. In either case, one of DISP_LOOKUP_NEG or - * DISP_LOOKUP_POS will be set, indicating whether the child lookup - * was successful. - * - * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the - * child lookup. - */ -int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, - struct lookup_intent *it, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) -{ - struct ldlm_enqueue_info einfo = { - .ei_type = LDLM_IBITS, - .ei_mode = it_to_lock_mode(it), - .ei_cb_bl = cb_blocking, - .ei_cb_cp = ldlm_completion_ast, - }; - struct lustre_handle lockh; - int rc = 0; - - LASSERT(it); - - CDEBUG(D_DLMTRACE, "(name: %.*s," DFID ") in obj " DFID - ", intent: %s flags %#Lo\n", (int)op_data->op_namelen, - op_data->op_name, PFID(&op_data->op_fid2), - PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), - it->it_flags); - - lockh.cookie = 0; - if (fid_is_sane(&op_data->op_fid2) && - (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_READDIR))) { - /* We could just return 1 immediately, but since we should only - * be called in revalidate_it if we already have a lock, let's - * verify that. - */ - it->it_lock_handle = 0; - rc = mdc_revalidate_lock(exp, it, &op_data->op_fid2, NULL); - /* Only return failure if it was not GETATTR by cfid - * (from inode_revalidate) - */ - if (rc || op_data->op_namelen != 0) - return rc; - } - - /* For case if upper layer did not alloc fid, do it now. */ - if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) { - rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc < 0) { - CERROR("Can't alloc new fid, rc %d\n", rc); - return rc; - } - } - - rc = mdc_enqueue_base(exp, &einfo, NULL, it, op_data, &lockh, - extra_lock_flags); - if (rc < 0) - return rc; - - *reqp = it->it_request; - rc = mdc_finish_intent_lock(exp, *reqp, op_data, it, &lockh); - return rc; -} - -static int mdc_intent_getattr_async_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *args, int rc) -{ - struct mdc_getattr_args *ga = args; - struct obd_export *exp = ga->ga_exp; - struct md_enqueue_info *minfo = ga->ga_minfo; - struct ldlm_enqueue_info *einfo = &minfo->mi_einfo; - struct lookup_intent *it; - struct lustre_handle *lockh; - struct obd_device *obddev; - struct ldlm_reply *lockrep; - __u64 flags = LDLM_FL_HAS_INTENT; - - it = &minfo->mi_it; - lockh = &minfo->mi_lockh; - - obddev = class_exp2obd(exp); - - obd_put_request_slot(&obddev->u.cli); - if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE)) - rc = -ETIMEDOUT; - - rc = ldlm_cli_enqueue_fini(exp, req, einfo->ei_type, 1, einfo->ei_mode, - &flags, NULL, 0, lockh, rc); - if (rc < 0) { - CERROR("ldlm_cli_enqueue_fini: %d\n", rc); - mdc_clear_replay_flag(req, rc); - goto out; - } - - lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - - lockrep->lock_policy_res2 = - ptlrpc_status_ntoh(lockrep->lock_policy_res2); - - rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc); - if (rc) - goto out; - - rc = mdc_finish_intent_lock(exp, req, &minfo->mi_data, it, lockh); - -out: - minfo->mi_cb(req, minfo, rc); - return 0; -} - -int mdc_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo) -{ - struct md_op_data *op_data = &minfo->mi_data; - struct lookup_intent *it = &minfo->mi_it; - struct ptlrpc_request *req; - struct mdc_getattr_args *ga; - struct obd_device *obddev = class_exp2obd(exp); - struct ldlm_res_id res_id; - union ldlm_policy_data policy = { - .l_inodebits = { MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE } - }; - int rc = 0; - __u64 flags = LDLM_FL_HAS_INTENT; - - CDEBUG(D_DLMTRACE, - "name: %.*s in inode " DFID ", intent: %s flags %#Lo\n", - (int)op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), it->it_flags); - - fid_build_reg_res_name(&op_data->op_fid1, &res_id); - req = mdc_intent_getattr_pack(exp, it, op_data); - if (IS_ERR(req)) - return PTR_ERR(req); - - rc = obd_get_request_slot(&obddev->u.cli); - if (rc != 0) { - ptlrpc_req_finished(req); - return rc; - } - - rc = ldlm_cli_enqueue(exp, &req, &minfo->mi_einfo, &res_id, &policy, - &flags, NULL, 0, LVB_T_NONE, &minfo->mi_lockh, 1); - if (rc < 0) { - obd_put_request_slot(&obddev->u.cli); - ptlrpc_req_finished(req); - return rc; - } - - BUILD_BUG_ON(sizeof(*ga) > sizeof(req->rq_async_args)); - ga = ptlrpc_req_async_args(req); - ga->ga_exp = exp; - ga->ga_minfo = minfo; - - req->rq_interpret_reply = mdc_intent_getattr_async_interpret; - ptlrpcd_add_req(req); - - return 0; -} diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c deleted file mode 100644 index e77c00df0693..000000000000 --- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c +++ /dev/null @@ -1,421 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_MDC - -# include -# include - -#include -#include "mdc_internal.h" -#include - -/* mdc_setattr does its own semaphore handling */ -static int mdc_reint(struct ptlrpc_request *request, int level) -{ - int rc; - - request->rq_send_state = level; - - mdc_get_mod_rpc_slot(request, NULL); - rc = ptlrpc_queue_wait(request); - mdc_put_mod_rpc_slot(request, NULL); - if (rc) - CDEBUG(D_INFO, "error in handling %d\n", rc); - else if (!req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY)) - rc = -EPROTO; - - return rc; -} - -/* Find and cancel locally locks matched by inode @bits & @mode in the resource - * found by @fid. Found locks are added into @cancel list. Returns the amount of - * locks added to @cancels list. - */ -int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid, - struct list_head *cancels, enum ldlm_mode mode, - __u64 bits) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - union ldlm_policy_data policy = {}; - struct ldlm_res_id res_id; - struct ldlm_resource *res; - int count; - - /* Return, i.e. cancel nothing, only if ELC is supported (flag in - * export) but disabled through procfs (flag in NS). - * - * This distinguishes from a case when ELC is not supported originally, - * when we still want to cancel locks in advance and just cancel them - * locally, without sending any RPC. - */ - if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns)) - return 0; - - fid_build_reg_res_name(fid, &res_id); - res = ldlm_resource_get(exp->exp_obd->obd_namespace, - NULL, &res_id, 0, 0); - if (IS_ERR(res)) - return 0; - LDLM_RESOURCE_ADDREF(res); - /* Initialize ibits lock policy. */ - policy.l_inodebits.bits = bits; - count = ldlm_cancel_resource_local(res, cancels, &policy, - mode, 0, 0, NULL); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return count; -} - -int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, - void *ea, size_t ealen, struct ptlrpc_request **request) -{ - LIST_HEAD(cancels); - struct ptlrpc_request *req; - int count = 0, rc; - __u64 bits; - - bits = MDS_INODELOCK_UPDATE; - if (op_data->op_attr.ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) - bits |= MDS_INODELOCK_LOOKUP; - if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1))) - count = mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, LCK_EX, bits); - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_REINT_SETATTR); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - req_capsule_set_size(&req->rq_pill, &RMF_MDT_EPOCH, RCL_CLIENT, 0); - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, ealen); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT, 0); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - if (op_data->op_attr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) - CDEBUG(D_INODE, "setting mtime %ld, ctime %ld\n", - LTIME_S(op_data->op_attr.ia_mtime), - LTIME_S(op_data->op_attr.ia_ctime)); - mdc_setattr_pack(req, op_data, ea, ealen); - - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - ptlrpc_request_set_replen(req); - - rc = mdc_reint(req, LUSTRE_IMP_FULL); - - if (rc == -ERESTARTSYS) - rc = 0; - - *request = req; - - return rc; -} - -int mdc_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, size_t datalen, umode_t mode, - uid_t uid, gid_t gid, kernel_cap_t cap_effective, - __u64 rdev, struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int level, rc; - int count, resends = 0; - struct obd_import *import = exp->exp_obd->u.cli.cl_import; - int generation = import->imp_generation; - LIST_HEAD(cancels); - - /* For case if upper layer did not alloc fid, do it now. */ - if (!fid_is_sane(&op_data->op_fid2)) { - /* - * mdc_fid_alloc() may return errno 1 in case of switch to new - * sequence, handle this. - */ - rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc < 0) - return rc; - } - -rebuild: - count = 0; - if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1))) - count = mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_REINT_CREATE_ACL); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, - data && datalen ? datalen : 0); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - /* - * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with - * tgt, for symlinks or lov MD data. - */ - mdc_create_pack(req, op_data, data, datalen, mode, uid, - gid, cap_effective, rdev); - - ptlrpc_request_set_replen(req); - - /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry - * logic here - */ - req->rq_no_retry_einprogress = 1; - - if (resends) { - req->rq_generation_set = 1; - req->rq_import_generation = generation; - req->rq_sent = ktime_get_real_seconds() + resends; - } - level = LUSTRE_IMP_FULL; - resend: - rc = mdc_reint(req, level); - - /* Resend if we were told to. */ - if (rc == -ERESTARTSYS) { - level = LUSTRE_IMP_RECOVER; - goto resend; - } else if (rc == -EINPROGRESS) { - /* Retry create infinitely until succeed or get other - * error code. - */ - ptlrpc_req_finished(req); - resends++; - - CDEBUG(D_HA, "%s: resend:%d create on " DFID "/" DFID "\n", - exp->exp_obd->obd_name, resends, - PFID(&op_data->op_fid1), PFID(&op_data->op_fid2)); - - if (generation == import->imp_generation) { - goto rebuild; - } else { - CDEBUG(D_HA, "resend cross eviction\n"); - return -EIO; - } - } - - *request = req; - return rc; -} - -int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - LIST_HEAD(cancels); - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req = *request; - int count = 0, rc; - - LASSERT(!req); - - if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1))) - count = mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - if ((op_data->op_flags & MF_MDC_CANCEL_FID3) && - (fid_is_sane(&op_data->op_fid3))) - count += mdc_resource_get_unused(exp, &op_data->op_fid3, - &cancels, LCK_EX, - MDS_INODELOCK_FULL); - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_REINT_UNLINK); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_unlink_pack(req, op_data); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_default_mds_easize); - ptlrpc_request_set_replen(req); - - *request = req; - - rc = mdc_reint(req, LUSTRE_IMP_FULL); - if (rc == -ERESTARTSYS) - rc = 0; - return rc; -} - -int mdc_link(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - LIST_HEAD(cancels); - struct ptlrpc_request *req; - int count = 0, rc; - - if ((op_data->op_flags & MF_MDC_CANCEL_FID2) && - (fid_is_sane(&op_data->op_fid2))) - count = mdc_resource_get_unused(exp, &op_data->op_fid2, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1))) - count += mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_REINT_LINK); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_link_pack(req, op_data); - ptlrpc_request_set_replen(req); - - rc = mdc_reint(req, LUSTRE_IMP_FULL); - *request = req; - if (rc == -ERESTARTSYS) - rc = 0; - - return rc; -} - -int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, const char *new, size_t newlen, - struct ptlrpc_request **request) -{ - LIST_HEAD(cancels); - struct obd_device *obd = exp->exp_obd; - struct ptlrpc_request *req; - int count = 0, rc; - - if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1))) - count = mdc_resource_get_unused(exp, &op_data->op_fid1, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - if ((op_data->op_flags & MF_MDC_CANCEL_FID2) && - (fid_is_sane(&op_data->op_fid2))) - count += mdc_resource_get_unused(exp, &op_data->op_fid2, - &cancels, LCK_EX, - MDS_INODELOCK_UPDATE); - if ((op_data->op_flags & MF_MDC_CANCEL_FID3) && - (fid_is_sane(&op_data->op_fid3))) - count += mdc_resource_get_unused(exp, &op_data->op_fid3, - &cancels, LCK_EX, - MDS_INODELOCK_LOOKUP); - if ((op_data->op_flags & MF_MDC_CANCEL_FID4) && - (fid_is_sane(&op_data->op_fid4))) - count += mdc_resource_get_unused(exp, &op_data->op_fid4, - &cancels, LCK_EX, - MDS_INODELOCK_FULL); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - op_data->op_cli_flags & CLI_MIGRATE ? - &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1); - req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, - newlen + 1); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data) { - struct md_open_data *mod = op_data->op_data; - - LASSERTF(mod->mod_open_req && - mod->mod_open_req->rq_type != LI_POISON, - "POISONED open %p!\n", mod->mod_open_req); - - DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); - /* - * We no longer want to preserve this open for replay even - * though the open was committed. b=3632, b=3633 - */ - spin_lock(&mod->mod_open_req->rq_lock); - mod->mod_open_req->rq_replay = 0; - spin_unlock(&mod->mod_open_req->rq_lock); - } - - if (exp_connect_cancelset(exp) && req) - ldlm_cli_cancel_list(&cancels, count, req, 0); - - mdc_rename_pack(req, op_data, old, oldlen, new, newlen); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_default_mds_easize); - ptlrpc_request_set_replen(req); - - rc = mdc_reint(req, LUSTRE_IMP_FULL); - *request = req; - if (rc == -ERESTARTSYS) - rc = 0; - - return rc; -} diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c deleted file mode 100644 index cff31cb0a9ac..000000000000 --- a/drivers/staging/lustre/lustre/mdc/mdc_request.c +++ /dev/null @@ -1,2770 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_MDC - -# include -# include -# include -# include -# include -# include -# include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mdc_internal.h" - -#define REQUEST_MINOR 244 - -static int mdc_cleanup(struct obd_device *obd); - -static inline int mdc_queue_wait(struct ptlrpc_request *req) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - int rc; - - /* obd_get_request_slot() ensures that this client has no more - * than cl_max_rpcs_in_flight RPCs simultaneously inf light - * against an MDT. - */ - rc = obd_get_request_slot(cli); - if (rc != 0) - return rc; - - rc = ptlrpc_queue_wait(req); - obd_put_request_slot(cli); - - return rc; -} - -static int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid) -{ - struct ptlrpc_request *req; - struct mdt_body *body; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_GETSTATUS, - LUSTRE_MDS_VERSION, MDS_GETSTATUS); - if (!req) - return -ENOMEM; - - mdc_pack_body(req, NULL, 0, 0, -1, 0); - req->rq_send_state = LUSTRE_IMP_FULL; - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - *rootfid = body->mbo_fid1; - CDEBUG(D_NET, - "root fid=" DFID ", last_committed=%llu\n", - PFID(rootfid), - lustre_msg_get_last_committed(req->rq_repmsg)); -out: - ptlrpc_req_finished(req); - return rc; -} - -/* - * This function now is known to always saying that it will receive 4 buffers - * from server. Even for cases when acl_size and md_size is zero, RPC header - * will contain 4 fields and RPC itself will contain zero size fields. This is - * because mdt_getattr*() _always_ returns 4 fields, but if acl is not needed - * and thus zero, it shrinks it, making zero size. The same story about - * md_size. And this is course of problem when client waits for smaller number - * of fields. This issue will be fixed later when client gets aware of RPC - * layouts. --umka - */ -static int mdc_getattr_common(struct obd_export *exp, - struct ptlrpc_request *req) -{ - struct req_capsule *pill = &req->rq_pill; - struct mdt_body *body; - void *eadata; - int rc; - - /* Request message already built. */ - rc = ptlrpc_queue_wait(req); - if (rc != 0) - return rc; - - /* sanity check for the reply */ - body = req_capsule_server_get(pill, &RMF_MDT_BODY); - if (!body) - return -EPROTO; - - CDEBUG(D_NET, "mode: %o\n", body->mbo_mode); - - mdc_update_max_ea_from_body(exp, body); - if (body->mbo_eadatasize != 0) { - eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD, - body->mbo_eadatasize); - if (!eadata) - return -EPROTO; - } - - return 0; -} - -static int mdc_getattr(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc; - - /* Single MDS without an LMV case */ - if (op_data->op_flags & MF_GET_MDT_IDX) { - op_data->op_mds = 0; - return 0; - } - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_GETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, - op_data->op_mode, -1, 0); - - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - op_data->op_mode); - ptlrpc_request_set_replen(req); - - rc = mdc_getattr_common(exp, req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - return rc; -} - -static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc; - - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_GETATTR_NAME); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - op_data->op_namelen + 1); - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GETATTR_NAME); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, &op_data->op_fid1, op_data->op_valid, - op_data->op_mode, op_data->op_suppgids[0], 0); - - if (op_data->op_name) { - char *name = req_capsule_client_get(&req->rq_pill, &RMF_NAME); - - LASSERT(strnlen(op_data->op_name, op_data->op_namelen) == - op_data->op_namelen); - memcpy(name, op_data->op_name, op_data->op_namelen); - } - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - op_data->op_mode); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - req->rq_import->imp_connect_data.ocd_max_easize); - ptlrpc_request_set_replen(req); - - rc = mdc_getattr_common(exp, req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - return rc; -} - -static int mdc_xattr_common(struct obd_export *exp, - const struct req_format *fmt, - const struct lu_fid *fid, - int opcode, u64 valid, - const char *xattr_name, const char *input, - int input_size, int output_size, int flags, - __u32 suppgid, struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int xattr_namelen = 0; - char *tmp; - int rc; - - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), fmt); - if (!req) - return -ENOMEM; - - if (xattr_name) { - xattr_namelen = strlen(xattr_name) + 1; - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - xattr_namelen); - } - if (input_size) { - LASSERT(input); - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT, - input_size); - } - - /* Flush local XATTR locks to get rid of a possible cancel RPC */ - if (opcode == MDS_REINT && fid_is_sane(fid) && - exp->exp_connect_data.ocd_ibits_known & MDS_INODELOCK_XATTR) { - LIST_HEAD(cancels); - int count; - - /* Without that packing would fail */ - if (input_size == 0) - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, - RCL_CLIENT, 0); - - count = mdc_resource_get_unused(exp, fid, - &cancels, LCK_EX, - MDS_INODELOCK_XATTR); - - rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - } else { - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, opcode); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - } - - if (opcode == MDS_REINT) { - struct mdt_rec_setxattr *rec; - - BUILD_BUG_ON(sizeof(struct mdt_rec_setxattr) != - sizeof(struct mdt_rec_reint)); - rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT); - rec->sx_opcode = REINT_SETXATTR; - rec->sx_fsuid = from_kuid(&init_user_ns, current_fsuid()); - rec->sx_fsgid = from_kgid(&init_user_ns, current_fsgid()); - rec->sx_cap = current_cap().cap[0]; - rec->sx_suppgid1 = suppgid; - rec->sx_suppgid2 = -1; - rec->sx_fid = *fid; - rec->sx_valid = valid | OBD_MD_FLCTIME; - rec->sx_time = ktime_get_real_seconds(); - rec->sx_size = output_size; - rec->sx_flags = flags; - - } else { - mdc_pack_body(req, fid, valid, output_size, suppgid, flags); - } - - if (xattr_name) { - tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME); - memcpy(tmp, xattr_name, xattr_namelen); - } - if (input_size) { - tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA); - memcpy(tmp, input, input_size); - } - - if (req_capsule_has_field(&req->rq_pill, &RMF_EADATA, RCL_SERVER)) - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, - RCL_SERVER, output_size); - ptlrpc_request_set_replen(req); - - /* make rpc */ - if (opcode == MDS_REINT) - mdc_get_mod_rpc_slot(req, NULL); - - rc = ptlrpc_queue_wait(req); - - if (opcode == MDS_REINT) - mdc_put_mod_rpc_slot(req, NULL); - - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - return rc; -} - -static int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, - const void *value, size_t value_size, - unsigned int xattr_flags, u32 suppgid, - struct ptlrpc_request **req) -{ - LASSERT(obd_md_valid == OBD_MD_FLXATTR || - obd_md_valid == OBD_MD_FLXATTRRM); - - return mdc_xattr_common(exp, &RQF_MDS_REINT_SETXATTR, - fid, MDS_REINT, obd_md_valid, name, - value, value_size, 0, xattr_flags, suppgid, - req); -} - -static int mdc_getxattr(struct obd_export *exp, const struct lu_fid *fid, - u64 obd_md_valid, const char *name, size_t buf_size, - struct ptlrpc_request **req) -{ - LASSERT(obd_md_valid == OBD_MD_FLXATTR || - obd_md_valid == OBD_MD_FLXATTRLS); - - return mdc_xattr_common(exp, &RQF_MDS_GETXATTR, fid, MDS_GETXATTR, - obd_md_valid, name, NULL, 0, buf_size, 0, -1, - req); -} - -#ifdef CONFIG_FS_POSIX_ACL -static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md) -{ - struct req_capsule *pill = &req->rq_pill; - struct mdt_body *body = md->body; - struct posix_acl *acl; - void *buf; - int rc; - - if (!body->mbo_aclsize) - return 0; - - buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->mbo_aclsize); - - if (!buf) - return -EPROTO; - - acl = posix_acl_from_xattr(&init_user_ns, buf, body->mbo_aclsize); - if (!acl) - return 0; - - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - CERROR("convert xattr to acl: %d\n", rc); - return rc; - } - - rc = posix_acl_valid(&init_user_ns, acl); - if (rc) { - CERROR("validate acl: %d\n", rc); - posix_acl_release(acl); - return rc; - } - - md->posix_acl = acl; - return 0; -} -#else -#define mdc_unpack_acl(req, md) 0 -#endif - -static int mdc_get_lustre_md(struct obd_export *exp, - struct ptlrpc_request *req, - struct obd_export *dt_exp, - struct obd_export *md_exp, - struct lustre_md *md) -{ - struct req_capsule *pill = &req->rq_pill; - int rc; - - LASSERT(md); - memset(md, 0, sizeof(*md)); - - md->body = req_capsule_server_get(pill, &RMF_MDT_BODY); - - if (md->body->mbo_valid & OBD_MD_FLEASIZE) { - if (!S_ISREG(md->body->mbo_mode)) { - CDEBUG(D_INFO, - "OBD_MD_FLEASIZE set, should be a regular file, but is not\n"); - rc = -EPROTO; - goto out; - } - - if (md->body->mbo_eadatasize == 0) { - CDEBUG(D_INFO, - "OBD_MD_FLEASIZE set, but eadatasize 0\n"); - rc = -EPROTO; - goto out; - } - - md->layout.lb_len = md->body->mbo_eadatasize; - md->layout.lb_buf = req_capsule_server_sized_get(pill, - &RMF_MDT_MD, - md->layout.lb_len); - if (!md->layout.lb_buf) { - rc = -EPROTO; - goto out; - } - } else if (md->body->mbo_valid & OBD_MD_FLDIREA) { - const union lmv_mds_md *lmv; - size_t lmv_size; - - if (!S_ISDIR(md->body->mbo_mode)) { - CDEBUG(D_INFO, - "OBD_MD_FLDIREA set, should be a directory, but is not\n"); - rc = -EPROTO; - goto out; - } - - lmv_size = md->body->mbo_eadatasize; - if (!lmv_size) { - CDEBUG(D_INFO, - "OBD_MD_FLDIREA is set, but eadatasize 0\n"); - return -EPROTO; - } - if (md->body->mbo_valid & OBD_MD_MEA) { - lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD, - lmv_size); - if (!lmv) { - rc = -EPROTO; - goto out; - } - - rc = md_unpackmd(md_exp, &md->lmv, lmv, lmv_size); - if (rc < 0) - goto out; - - if (rc < (typeof(rc))sizeof(*md->lmv)) { - CDEBUG(D_INFO, - "size too small: rc < sizeof(*md->lmv) (%d < %d)\n", - rc, (int)sizeof(*md->lmv)); - rc = -EPROTO; - goto out; - } - } - } - rc = 0; - - if (md->body->mbo_valid & OBD_MD_FLACL) { - /* for ACL, it's possible that FLACL is set but aclsize is zero. - * only when aclsize != 0 there's an actual segment for ACL - * in reply buffer. - */ - if (md->body->mbo_aclsize) { - rc = mdc_unpack_acl(req, md); - if (rc) - goto out; -#ifdef CONFIG_FS_POSIX_ACL - } else { - md->posix_acl = NULL; -#endif - } - } - -out: - if (rc) { -#ifdef CONFIG_FS_POSIX_ACL - posix_acl_release(md->posix_acl); -#endif - } - return rc; -} - -static int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) -{ - return 0; -} - -void mdc_replay_open(struct ptlrpc_request *req) -{ - struct md_open_data *mod = req->rq_cb_data; - struct ptlrpc_request *close_req; - struct obd_client_handle *och; - struct lustre_handle old; - struct mdt_body *body; - - if (!mod) { - DEBUG_REQ(D_ERROR, req, - "Can't properly replay without open data."); - return; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - - och = mod->mod_och; - if (och) { - struct lustre_handle *file_fh; - - LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); - - file_fh = &och->och_fh; - CDEBUG(D_HA, "updating handle from %#llx to %#llx\n", - file_fh->cookie, body->mbo_handle.cookie); - old = *file_fh; - *file_fh = body->mbo_handle; - } - close_req = mod->mod_close_req; - if (close_req) { - __u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg); - struct mdt_ioepoch *epoch; - - LASSERT(opc == MDS_CLOSE); - epoch = req_capsule_client_get(&close_req->rq_pill, - &RMF_MDT_EPOCH); - LASSERT(epoch); - - if (och) - LASSERT(!memcmp(&old, &epoch->mio_handle, sizeof(old))); - DEBUG_REQ(D_HA, close_req, "updating close body with new fh"); - epoch->mio_handle = body->mbo_handle; - } -} - -void mdc_commit_open(struct ptlrpc_request *req) -{ - struct md_open_data *mod = req->rq_cb_data; - - if (!mod) - return; - - /** - * No need to touch md_open_data::mod_och, it holds a reference on - * \var mod and will zero references to each other, \var mod will be - * freed after that when md_open_data::mod_och will put the reference. - */ - - /** - * Do not let open request to disappear as it still may be needed - * for close rpc to happen (it may happen on evict only, otherwise - * ptlrpc_request::rq_replay does not let mdc_commit_open() to be - * called), just mark this rpc as committed to distinguish these 2 - * cases, see mdc_close() for details. The open request reference will - * be put along with freeing \var mod. - */ - ptlrpc_request_addref(req); - spin_lock(&req->rq_lock); - req->rq_committed = 1; - spin_unlock(&req->rq_lock); - req->rq_cb_data = NULL; - obd_mod_put(mod); -} - -int mdc_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct lookup_intent *it) -{ - struct md_open_data *mod; - struct mdt_rec_create *rec; - struct mdt_body *body; - struct ptlrpc_request *open_req = it->it_request; - struct obd_import *imp = open_req->rq_import; - - if (!open_req->rq_replay) - return 0; - - rec = req_capsule_client_get(&open_req->rq_pill, &RMF_REC_REINT); - body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY); - LASSERT(rec); - /* Incoming message in my byte order (it's been swabbed). */ - /* Outgoing messages always in my byte order. */ - LASSERT(body); - - /* Only if the import is replayable, we set replay_open data */ - if (och && imp->imp_replayable) { - mod = obd_mod_alloc(); - if (!mod) { - DEBUG_REQ(D_ERROR, open_req, - "Can't allocate md_open_data"); - return 0; - } - - /** - * Take a reference on \var mod, to be freed on mdc_close(). - * It protects \var mod from being freed on eviction (commit - * callback is called despite rq_replay flag). - * Another reference for \var och. - */ - obd_mod_get(mod); - obd_mod_get(mod); - - spin_lock(&open_req->rq_lock); - och->och_mod = mod; - mod->mod_och = och; - mod->mod_is_create = it_disposition(it, DISP_OPEN_CREATE) || - it_disposition(it, DISP_OPEN_STRIPE); - mod->mod_open_req = open_req; - open_req->rq_cb_data = mod; - open_req->rq_commit_cb = mdc_commit_open; - spin_unlock(&open_req->rq_lock); - } - - rec->cr_fid2 = body->mbo_fid1; - rec->cr_ioepoch = body->mbo_ioepoch; - rec->cr_old_handle.cookie = body->mbo_handle.cookie; - open_req->rq_replay_cb = mdc_replay_open; - if (!fid_is_sane(&body->mbo_fid1)) { - DEBUG_REQ(D_ERROR, open_req, - "Saving replay request with insane fid"); - LBUG(); - } - - DEBUG_REQ(D_RPCTRACE, open_req, "Set up open replay data"); - return 0; -} - -static void mdc_free_open(struct md_open_data *mod) -{ - int committed = 0; - - if (mod->mod_is_create == 0 && - imp_connect_disp_stripe(mod->mod_open_req->rq_import)) - committed = 1; - - /* - * No reason to asssert here if the open request has - * rq_replay == 1. It means that mdc_close failed, and - * close request wasn`t sent. It is not fatal to client. - * The worst thing is eviction if the client gets open lock - */ - DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, - "free open request rq_replay = %d\n", - mod->mod_open_req->rq_replay); - - ptlrpc_request_committed(mod->mod_open_req, committed); - if (mod->mod_close_req) - ptlrpc_request_committed(mod->mod_close_req, committed); -} - -static int mdc_clear_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och) -{ - struct md_open_data *mod = och->och_mod; - - /** - * It is possible to not have \var mod in a case of eviction between - * lookup and ll_file_open(). - **/ - if (!mod) - return 0; - - LASSERT(mod != LP_POISON); - LASSERT(mod->mod_open_req); - mdc_free_open(mod); - - mod->mod_och = NULL; - och->och_mod = NULL; - obd_mod_put(mod); - - return 0; -} - -static int mdc_close(struct obd_export *exp, struct md_op_data *op_data, - struct md_open_data *mod, struct ptlrpc_request **request) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - struct req_format *req_fmt; - int rc; - int saved_rc = 0; - - if (op_data->op_bias & MDS_HSM_RELEASE) { - req_fmt = &RQF_MDS_INTENT_CLOSE; - - /* allocate a FID for volatile file */ - rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc < 0) { - CERROR("%s: " DFID " failed to allocate FID: %d\n", - obd->obd_name, PFID(&op_data->op_fid1), rc); - /* save the errcode and proceed to close */ - saved_rc = rc; - } - } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) { - req_fmt = &RQF_MDS_INTENT_CLOSE; - } else { - req_fmt = &RQF_MDS_CLOSE; - } - - *request = NULL; - if (OBD_FAIL_CHECK(OBD_FAIL_MDC_CLOSE)) - req = NULL; - else - req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt); - - /* Ensure that this close's handle is fixed up during replay. */ - if (likely(mod)) { - LASSERTF(mod->mod_open_req && - mod->mod_open_req->rq_type != LI_POISON, - "POISONED open %p!\n", mod->mod_open_req); - - mod->mod_close_req = req; - - DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); - /* We no longer want to preserve this open for replay even - * though the open was committed. b=3632, b=3633 - */ - spin_lock(&mod->mod_open_req->rq_lock); - mod->mod_open_req->rq_replay = 0; - spin_unlock(&mod->mod_open_req->rq_lock); - } else { - CDEBUG(D_HA, - "couldn't find open req; expecting close error\n"); - } - if (!req) { - /* - * TODO: repeat close after errors - */ - CWARN("%s: close of FID " DFID " failed, file reference will be dropped when this client unmounts or is evicted\n", - obd->obd_name, PFID(&op_data->op_fid1)); - rc = -ENOMEM; - goto out; - } - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE); - if (rc) { - ptlrpc_request_free(req); - req = NULL; - goto out; - } - - /* - * To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a - * portal whose threads are not taking any DLM locks and are therefore - * always progressing - */ - req->rq_request_portal = MDS_READPAGE_PORTAL; - ptlrpc_at_set_req_timeout(req); - - mdc_close_pack(req, op_data); - - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_default_mds_easize); - - ptlrpc_request_set_replen(req); - - mdc_get_mod_rpc_slot(req, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_mod_rpc_slot(req, NULL); - - if (!req->rq_repmsg) { - CDEBUG(D_RPCTRACE, "request failed to send: %p, %d\n", req, - req->rq_status); - if (rc == 0) - rc = req->rq_status ?: -EIO; - } else if (rc == 0 || rc == -EAGAIN) { - struct mdt_body *body; - - rc = lustre_msg_get_status(req->rq_repmsg); - if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, - "type == PTL_RPC_MSG_ERR, err = %d", rc); - if (rc > 0) - rc = -rc; - } - body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (!body) - rc = -EPROTO; - } else if (rc == -ESTALE) { - /** - * it can be allowed error after 3633 if open was committed and - * server failed before close was sent. Let's check if mod - * exists and return no error in that case - */ - if (mod) { - DEBUG_REQ(D_HA, req, "Reset ESTALE = %d", rc); - if (mod->mod_open_req->rq_committed) - rc = 0; - } - } - -out: - if (mod) { - if (rc != 0) - mod->mod_close_req = NULL; - /* Since now, mod is accessed through open_req only, - * thus close req does not keep a reference on mod anymore. - */ - obd_mod_put(mod); - } - *request = req; - return rc < 0 ? rc : saved_rc; -} - -static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid, - u64 offset, struct page **pages, int npages, - struct ptlrpc_request **request) -{ - struct ptlrpc_bulk_desc *desc; - struct ptlrpc_request *req; - wait_queue_head_t waitq; - int resends = 0; - int rc; - int i; - - *request = NULL; - init_waitqueue_head(&waitq); - -restart_bulk: - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_READPAGE); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - req->rq_request_portal = MDS_READPAGE_PORTAL; - ptlrpc_at_set_req_timeout(req); - - desc = ptlrpc_prep_bulk_imp(req, npages, 1, - PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV, - MDS_BULK_PORTAL, - &ptlrpc_bulk_kiov_pin_ops); - if (!desc) { - ptlrpc_request_free(req); - return -ENOMEM; - } - - /* NB req now owns desc and will free it when it gets freed */ - for (i = 0; i < npages; i++) - desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE); - - mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid); - - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc) { - ptlrpc_req_finished(req); - if (rc != -ETIMEDOUT) - return rc; - - resends++; - if (!client_should_resend(resends, &exp->exp_obd->u.cli)) { - CERROR("%s: too many resend retries: rc = %d\n", - exp->exp_obd->obd_name, -EIO); - return -EIO; - } - wait_event_idle_timeout(waitq, 0, resends * HZ); - - goto restart_bulk; - } - - rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, - req->rq_bulk->bd_nob_transferred); - if (rc < 0) { - ptlrpc_req_finished(req); - return rc; - } - - if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) { - CERROR("%s: unexpected bytes transferred: %d (%ld expected)\n", - exp->exp_obd->obd_name, req->rq_bulk->bd_nob_transferred, - PAGE_SIZE * npages); - ptlrpc_req_finished(req); - return -EPROTO; - } - - *request = req; - return 0; -} - -static void mdc_release_page(struct page *page, int remove) -{ - if (remove) { - lock_page(page); - if (likely(page->mapping)) - truncate_complete_page(page->mapping, page); - unlock_page(page); - } - put_page(page); -} - -static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash, - __u64 *start, __u64 *end, int hash64) -{ - /* - * Complement of hash is used as an index so that - * radix_tree_gang_lookup() can be used to find a page with starting - * hash _smaller_ than one we are looking for. - */ - unsigned long offset = hash_x_index(*hash, hash64); - struct page *page; - int found; - - xa_lock_irq(&mapping->i_pages); - found = radix_tree_gang_lookup(&mapping->i_pages, - (void **)&page, offset, 1); - if (found > 0 && !radix_tree_exceptional_entry(page)) { - struct lu_dirpage *dp; - - get_page(page); - xa_unlock_irq(&mapping->i_pages); - /* - * In contrast to find_lock_page() we are sure that directory - * page cannot be truncated (while DLM lock is held) and, - * hence, can avoid restart. - * - * In fact, page cannot be locked here at all, because - * mdc_read_page_remote does synchronous io. - */ - wait_on_page_locked(page); - if (PageUptodate(page)) { - dp = kmap(page); - if (BITS_PER_LONG == 32 && hash64) { - *start = le64_to_cpu(dp->ldp_hash_start) >> 32; - *end = le64_to_cpu(dp->ldp_hash_end) >> 32; - *hash = *hash >> 32; - } else { - *start = le64_to_cpu(dp->ldp_hash_start); - *end = le64_to_cpu(dp->ldp_hash_end); - } - if (unlikely(*start == 1 && *hash == 0)) - *hash = *start; - else - LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n", - *start, *end, *hash); - CDEBUG(D_VFSTRACE, "offset %lx [%#llx %#llx], hash %#llx\n", - offset, *start, *end, *hash); - if (*hash > *end) { - kunmap(page); - mdc_release_page(page, 0); - page = NULL; - } else if (*end != *start && *hash == *end) { - /* - * upon hash collision, remove this page, - * otherwise put page reference, and - * mdc_read_page_remote() will issue RPC to - * fetch the page we want. - */ - kunmap(page); - mdc_release_page(page, - le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE); - page = NULL; - } - } else { - put_page(page); - page = ERR_PTR(-EIO); - } - } else { - xa_unlock_irq(&mapping->i_pages); - page = NULL; - } - return page; -} - -/* - * Adjust a set of pages, each page containing an array of lu_dirpages, - * so that each page can be used as a single logical lu_dirpage. - * - * A lu_dirpage is laid out as follows, where s = ldp_hash_start, - * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a - * struct lu_dirent. It has size up to LU_PAGE_SIZE. The ldp_hash_end - * value is used as a cookie to request the next lu_dirpage in a - * directory listing that spans multiple pages (two in this example): - * ________ - * | | - * .|--------v------- -----. - * |s|e|f|p|ent|ent| ... |ent| - * '--|-------------- -----' Each PAGE contains a single - * '------. lu_dirpage. - * .---------v------- -----. - * |s|e|f|p|ent| 0 | ... | 0 | - * '----------------- -----' - * - * However, on hosts where the native VM page size (PAGE_SIZE) is - * larger than LU_PAGE_SIZE, a single host page may contain multiple - * lu_dirpages. After reading the lu_dirpages from the MDS, the - * ldp_hash_end of the first lu_dirpage refers to the one immediately - * after it in the same PAGE (arrows simplified for brevity, but - * in general e0==s1, e1==s2, etc.): - * - * .-------------------- -----. - * |s0|e0|f0|p|ent|ent| ... |ent| - * |---v---------------- -----| - * |s1|e1|f1|p|ent|ent| ... |ent| - * |---v---------------- -----| Here, each PAGE contains - * ... multiple lu_dirpages. - * |---v---------------- -----| - * |s'|e'|f'|p|ent|ent| ... |ent| - * '---|---------------- -----' - * v - * .----------------------------. - * | next PAGE | - * - * This structure is transformed into a single logical lu_dirpage as follows: - * - * - Replace e0 with e' so the request for the next lu_dirpage gets the page - * labeled 'next PAGE'. - * - * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether - * a hash collision with the next page exists. - * - * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span - * to the first entry of the next lu_dirpage. - */ -#if PAGE_SIZE > LU_PAGE_SIZE -static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs) -{ - int i; - - for (i = 0; i < cfs_pgs; i++) { - struct lu_dirpage *dp = kmap(pages[i]); - __u64 hash_end = le64_to_cpu(dp->ldp_hash_end); - __u32 flags = le32_to_cpu(dp->ldp_flags); - struct lu_dirpage *first = dp; - - while (--lu_pgs > 0) { - struct lu_dirent *end_dirent = NULL; - struct lu_dirent *ent; - - for (ent = lu_dirent_start(dp); ent; - ent = lu_dirent_next(ent)) - end_dirent = ent; - - /* Advance dp to next lu_dirpage. */ - dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE); - - /* Check if we've reached the end of the CFS_PAGE. */ - if (!((unsigned long)dp & ~PAGE_MASK)) - break; - - /* Save the hash and flags of this lu_dirpage. */ - hash_end = le64_to_cpu(dp->ldp_hash_end); - flags = le32_to_cpu(dp->ldp_flags); - - /* Check if lu_dirpage contains no entries. */ - if (!end_dirent) - break; - - /* - * Enlarge the end entry lde_reclen from 0 to - * first entry of next lu_dirpage. - */ - LASSERT(!le16_to_cpu(end_dirent->lde_reclen)); - end_dirent->lde_reclen = - cpu_to_le16((char *)(dp->ldp_entries) - - (char *)end_dirent); - } - - first->ldp_hash_end = hash_end; - first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE); - first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE); - - kunmap(pages[i]); - } - LASSERTF(lu_pgs == 0, "left = %d", lu_pgs); -} -#else -#define mdc_adjust_dirpages(pages, cfs_pgs, lu_pgs) do {} while (0) -#endif /* PAGE_SIZE > LU_PAGE_SIZE */ - -/* parameters for readdir page */ -struct readpage_param { - struct md_op_data *rp_mod; - __u64 rp_off; - int rp_hash64; - struct obd_export *rp_exp; - struct md_callback *rp_cb; -}; - -/** - * Read pages from server. - * - * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains - * a header lu_dirpage which describes the start/end hash, and whether this - * page is empty (contains no dir entry) or hash collide with next page. - * After client receives reply, several pages will be integrated into dir page - * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the - * lu_dirpage for this integrated page will be adjusted. - **/ -static int mdc_read_page_remote(void *data, struct page *page0) -{ - struct readpage_param *rp = data; - struct page **page_pool; - struct page *page; - struct lu_dirpage *dp; - int rd_pgs = 0; /* number of pages read actually */ - int npages; - struct md_op_data *op_data = rp->rp_mod; - struct ptlrpc_request *req; - int max_pages = op_data->op_max_pages; - struct inode *inode; - struct lu_fid *fid; - int i; - int rc; - - LASSERT(max_pages > 0 && max_pages <= PTLRPC_MAX_BRW_PAGES); - inode = op_data->op_data; - fid = &op_data->op_fid1; - LASSERT(inode); - - page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS); - if (page_pool) { - page_pool[0] = page0; - } else { - page_pool = &page0; - max_pages = 1; - } - - for (npages = 1; npages < max_pages; npages++) { - page = page_cache_alloc(inode->i_mapping); - if (!page) - break; - page_pool[npages] = page; - } - - rc = mdc_getpage(rp->rp_exp, fid, rp->rp_off, page_pool, npages, &req); - if (!rc) { - int lu_pgs = req->rq_bulk->bd_nob_transferred; - - rd_pgs = (req->rq_bulk->bd_nob_transferred + - PAGE_SIZE - 1) >> PAGE_SHIFT; - lu_pgs >>= LU_PAGE_SHIFT; - LASSERT(!(req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK)); - - CDEBUG(D_INODE, "read %d(%d) pages\n", rd_pgs, lu_pgs); - - mdc_adjust_dirpages(page_pool, rd_pgs, lu_pgs); - - SetPageUptodate(page0); - } - - unlock_page(page0); - ptlrpc_req_finished(req); - CDEBUG(D_CACHE, "read %d/%d pages\n", rd_pgs, npages); - for (i = 1; i < npages; i++) { - unsigned long offset; - __u64 hash; - int ret; - - page = page_pool[i]; - - if (rc < 0 || i >= rd_pgs) { - put_page(page); - continue; - } - - SetPageUptodate(page); - - dp = kmap(page); - hash = le64_to_cpu(dp->ldp_hash_start); - kunmap(page); - - offset = hash_x_index(hash, rp->rp_hash64); - - prefetchw(&page->flags); - ret = add_to_page_cache_lru(page, inode->i_mapping, offset, - GFP_KERNEL); - if (!ret) - unlock_page(page); - else - CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: rc = %d\n", - offset, ret); - put_page(page); - } - - if (page_pool != &page0) - kfree(page_pool); - - return rc; -} - -/** - * Read dir page from cache first, if it can not find it, read it from - * server and add into the cache. - * - * \param[in] exp MDC export - * \param[in] op_data client MD stack parameters, transferring parameters - * between different layers on client MD stack. - * \param[in] cb_op callback required for ldlm lock enqueue during - * read page - * \param[in] hash_offset the hash offset of the page to be read - * \param[in] ppage the page to be read - * - * retval = 0 get the page successfully - * errno(<0) get the page failed - */ -static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data, - struct md_callback *cb_op, __u64 hash_offset, - struct page **ppage) -{ - struct lookup_intent it = { .it_op = IT_READDIR }; - struct page *page; - struct inode *dir = op_data->op_data; - struct address_space *mapping; - struct lu_dirpage *dp; - __u64 start = 0; - __u64 end = 0; - struct lustre_handle lockh; - struct ptlrpc_request *enq_req = NULL; - struct readpage_param rp_param; - int rc; - - *ppage = NULL; - - LASSERT(dir); - mapping = dir->i_mapping; - - rc = mdc_intent_lock(exp, op_data, &it, &enq_req, - cb_op->md_blocking_ast, 0); - if (enq_req) - ptlrpc_req_finished(enq_req); - - if (rc < 0) { - CERROR("%s: " DFID " lock enqueue fails: rc = %d\n", - exp->exp_obd->obd_name, PFID(&op_data->op_fid1), rc); - return rc; - } - - rc = 0; - lockh.cookie = it.it_lock_handle; - mdc_set_lock_data(exp, &lockh, dir, NULL); - - rp_param.rp_off = hash_offset; - rp_param.rp_hash64 = op_data->op_cli_flags & CLI_HASH64; - page = mdc_page_locate(mapping, &rp_param.rp_off, &start, &end, - rp_param.rp_hash64); - if (IS_ERR(page)) { - CDEBUG(D_INFO, "%s: dir page locate: " DFID " at %llu: rc %ld\n", - exp->exp_obd->obd_name, PFID(&op_data->op_fid1), - rp_param.rp_off, PTR_ERR(page)); - rc = PTR_ERR(page); - goto out_unlock; - } else if (page) { - /* - * XXX nikita: not entirely correct handling of a corner case: - * suppose hash chain of entries with hash value HASH crosses - * border between pages P0 and P1. First both P0 and P1 are - * cached, seekdir() is called for some entry from the P0 part - * of the chain. Later P0 goes out of cache. telldir(HASH) - * happens and finds P1, as it starts with matching hash - * value. Remaining entries from P0 part of the chain are - * skipped. (Is that really a bug?) - * - * Possible solutions: 0. don't cache P1 is such case, handle - * it as an "overflow" page. 1. invalidate all pages at - * once. 2. use HASH|1 as an index for P1. - */ - goto hash_collision; - } - - rp_param.rp_exp = exp; - rp_param.rp_mod = op_data; - page = read_cache_page(mapping, - hash_x_index(rp_param.rp_off, - rp_param.rp_hash64), - mdc_read_page_remote, &rp_param); - if (IS_ERR(page)) { - CERROR("%s: read cache page: " DFID " at %llu: rc %ld\n", - exp->exp_obd->obd_name, PFID(&op_data->op_fid1), - rp_param.rp_off, PTR_ERR(page)); - rc = PTR_ERR(page); - goto out_unlock; - } - - wait_on_page_locked(page); - (void)kmap(page); - if (!PageUptodate(page)) { - CERROR("%s: page not updated: " DFID " at %llu: rc %d\n", - exp->exp_obd->obd_name, PFID(&op_data->op_fid1), - rp_param.rp_off, -5); - goto fail; - } - if (!PageChecked(page)) - SetPageChecked(page); - if (PageError(page)) { - CERROR("%s: page error: " DFID " at %llu: rc %d\n", - exp->exp_obd->obd_name, PFID(&op_data->op_fid1), - rp_param.rp_off, -5); - goto fail; - } - -hash_collision: - dp = page_address(page); - if (BITS_PER_LONG == 32 && rp_param.rp_hash64) { - start = le64_to_cpu(dp->ldp_hash_start) >> 32; - end = le64_to_cpu(dp->ldp_hash_end) >> 32; - rp_param.rp_off = hash_offset >> 32; - } else { - start = le64_to_cpu(dp->ldp_hash_start); - end = le64_to_cpu(dp->ldp_hash_end); - rp_param.rp_off = hash_offset; - } - if (end == start) { - LASSERT(start == rp_param.rp_off); - CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end); -#if BITS_PER_LONG == 32 - CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n", - le64_to_cpu(dp->ldp_hash_start), - le64_to_cpu(dp->ldp_hash_end), hash_offset); -#endif - /* - * Fetch whole overflow chain... - * - * XXX not yet. - */ - goto fail; - } - *ppage = page; -out_unlock: - ldlm_lock_decref(&lockh, it.it_lock_mode); - return rc; -fail: - kunmap(page); - mdc_release_page(page, 1); - rc = -EIO; - goto out_unlock; -} - -static int mdc_statfs(const struct lu_env *env, - struct obd_export *exp, struct obd_statfs *osfs, - __u64 max_age, __u32 flags) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - struct obd_statfs *msfs; - struct obd_import *imp = NULL; - int rc; - - /* - * Since the request might also come from lprocfs, so we need - * sync this with client_disconnect_export Bug15684 - */ - down_read(&obd->u.cli.cl_sem); - if (obd->u.cli.cl_import) - imp = class_import_get(obd->u.cli.cl_import); - up_read(&obd->u.cli.cl_sem); - if (!imp) - return -ENODEV; - - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS, - LUSTRE_MDS_VERSION, MDS_STATFS); - if (!req) { - rc = -ENOMEM; - goto output; - } - - ptlrpc_request_set_replen(req); - - if (flags & OBD_STATFS_NODELAY) { - /* procfs requests not want stay in wait for avoid deadlock */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } - - rc = ptlrpc_queue_wait(req); - if (rc) { - /* check connection error first */ - if (imp->imp_connect_error) - rc = imp->imp_connect_error; - goto out; - } - - msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); - if (!msfs) { - rc = -EPROTO; - goto out; - } - - *osfs = *msfs; -out: - ptlrpc_req_finished(req); -output: - class_import_put(imp); - return rc; -} - -static int mdc_ioc_fid2path(struct obd_export *exp, struct getinfo_fid2path *gf) -{ - __u32 keylen, vallen; - void *key; - int rc; - - if (gf->gf_pathlen > PATH_MAX) - return -ENAMETOOLONG; - if (gf->gf_pathlen < 2) - return -EOVERFLOW; - - /* Key is KEY_FID2PATH + getinfo_fid2path description */ - keylen = cfs_size_round(sizeof(KEY_FID2PATH)) + sizeof(*gf); - key = kzalloc(keylen, GFP_NOFS); - if (!key) - return -ENOMEM; - memcpy(key, KEY_FID2PATH, sizeof(KEY_FID2PATH)); - memcpy(key + cfs_size_round(sizeof(KEY_FID2PATH)), gf, sizeof(*gf)); - - CDEBUG(D_IOCTL, "path get " DFID " from %llu #%d\n", - PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno); - - if (!fid_is_sane(&gf->gf_fid)) { - rc = -EINVAL; - goto out; - } - - /* Val is struct getinfo_fid2path result plus path */ - vallen = sizeof(*gf) + gf->gf_pathlen; - - rc = obd_get_info(NULL, exp, keylen, key, &vallen, gf); - if (rc != 0 && rc != -EREMOTE) - goto out; - - if (vallen <= sizeof(*gf)) { - rc = -EPROTO; - goto out; - } else if (vallen > sizeof(*gf) + gf->gf_pathlen) { - rc = -EOVERFLOW; - goto out; - } - - CDEBUG(D_IOCTL, "path got " DFID " from %llu #%d: %s\n", - PFID(&gf->gf_fid), gf->gf_recno, gf->gf_linkno, - gf->gf_pathlen < 512 ? gf->gf_path : - /* only log the last 512 characters of the path */ - gf->gf_path + gf->gf_pathlen - 512); - -out: - kfree(key); - return rc; -} - -static int mdc_ioc_hsm_progress(struct obd_export *exp, - struct hsm_progress_kernel *hpk) -{ - struct obd_import *imp = class_exp2cliimp(exp); - struct hsm_progress_kernel *req_hpk; - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_PROGRESS, - LUSTRE_MDS_VERSION, MDS_HSM_PROGRESS); - if (!req) { - rc = -ENOMEM; - goto out; - } - - mdc_pack_body(req, NULL, 0, 0, -1, 0); - - /* Copy hsm_progress struct */ - req_hpk = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_PROGRESS); - if (!req_hpk) { - rc = -EPROTO; - goto out; - } - - *req_hpk = *hpk; - req_hpk->hpk_errval = lustre_errno_hton(hpk->hpk_errval); - - ptlrpc_request_set_replen(req); - - mdc_get_mod_rpc_slot(req, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_mod_rpc_slot(req, NULL); -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_ct_register(struct obd_import *imp, __u32 archives) -{ - __u32 *archive_mask; - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_REGISTER, - LUSTRE_MDS_VERSION, - MDS_HSM_CT_REGISTER); - if (!req) { - rc = -ENOMEM; - goto out; - } - - mdc_pack_body(req, NULL, 0, 0, -1, 0); - - /* Copy hsm_progress struct */ - archive_mask = req_capsule_client_get(&req->rq_pill, - &RMF_MDS_HSM_ARCHIVE); - if (!archive_mask) { - rc = -EPROTO; - goto out; - } - - *archive_mask = archives; - - ptlrpc_request_set_replen(req); - - rc = mdc_queue_wait(req); -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_current_action(struct obd_export *exp, - struct md_op_data *op_data) -{ - struct hsm_current_action *hca = op_data->op_data; - struct hsm_current_action *req_hca; - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_HSM_ACTION); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_ACTION); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, &op_data->op_fid1, 0, 0, - op_data->op_suppgids[0], 0); - - ptlrpc_request_set_replen(req); - - rc = mdc_queue_wait(req); - if (rc) - goto out; - - req_hca = req_capsule_server_get(&req->rq_pill, - &RMF_MDS_HSM_CURRENT_ACTION); - if (!req_hca) { - rc = -EPROTO; - goto out; - } - - *hca = *req_hca; - -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_ct_unregister(struct obd_import *imp) -{ - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_HSM_CT_UNREGISTER, - LUSTRE_MDS_VERSION, - MDS_HSM_CT_UNREGISTER); - if (!req) { - rc = -ENOMEM; - goto out; - } - - mdc_pack_body(req, NULL, 0, 0, -1, 0); - - ptlrpc_request_set_replen(req); - - rc = mdc_queue_wait(req); -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_state_get(struct obd_export *exp, - struct md_op_data *op_data) -{ - struct hsm_user_state *hus = op_data->op_data; - struct hsm_user_state *req_hus; - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_HSM_STATE_GET); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_GET); - if (rc != 0) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, &op_data->op_fid1, 0, 0, - op_data->op_suppgids[0], 0); - - ptlrpc_request_set_replen(req); - - rc = mdc_queue_wait(req); - if (rc) - goto out; - - req_hus = req_capsule_server_get(&req->rq_pill, &RMF_HSM_USER_STATE); - if (!req_hus) { - rc = -EPROTO; - goto out; - } - - *hus = *req_hus; - -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_state_set(struct obd_export *exp, - struct md_op_data *op_data) -{ - struct hsm_state_set *hss = op_data->op_data; - struct hsm_state_set *req_hss; - struct ptlrpc_request *req; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_HSM_STATE_SET); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_STATE_SET); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, &op_data->op_fid1, 0, 0, - op_data->op_suppgids[0], 0); - - /* Copy states */ - req_hss = req_capsule_client_get(&req->rq_pill, &RMF_HSM_STATE_SET); - if (!req_hss) { - rc = -EPROTO; - goto out; - } - *req_hss = *hss; - - ptlrpc_request_set_replen(req); - - mdc_get_mod_rpc_slot(req, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_mod_rpc_slot(req, NULL); -out: - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_ioc_hsm_request(struct obd_export *exp, - struct hsm_user_request *hur) -{ - struct obd_import *imp = class_exp2cliimp(exp); - struct ptlrpc_request *req; - struct hsm_request *req_hr; - struct hsm_user_item *req_hui; - char *req_opaque; - int rc; - - req = ptlrpc_request_alloc(imp, &RQF_MDS_HSM_REQUEST); - if (!req) { - rc = -ENOMEM; - goto out; - } - - req_capsule_set_size(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM, RCL_CLIENT, - hur->hur_request.hr_itemcount - * sizeof(struct hsm_user_item)); - req_capsule_set_size(&req->rq_pill, &RMF_GENERIC_DATA, RCL_CLIENT, - hur->hur_request.hr_data_len); - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_HSM_REQUEST); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, NULL, 0, 0, -1, 0); - - /* Copy hsm_request struct */ - req_hr = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_REQUEST); - if (!req_hr) { - rc = -EPROTO; - goto out; - } - *req_hr = hur->hur_request; - - /* Copy hsm_user_item structs */ - req_hui = req_capsule_client_get(&req->rq_pill, &RMF_MDS_HSM_USER_ITEM); - if (!req_hui) { - rc = -EPROTO; - goto out; - } - memcpy(req_hui, hur->hur_user_item, - hur->hur_request.hr_itemcount * sizeof(struct hsm_user_item)); - - /* Copy opaque field */ - req_opaque = req_capsule_client_get(&req->rq_pill, &RMF_GENERIC_DATA); - if (!req_opaque) { - rc = -EPROTO; - goto out; - } - memcpy(req_opaque, hur_data(hur), hur->hur_request.hr_data_len); - - ptlrpc_request_set_replen(req); - - mdc_get_mod_rpc_slot(req, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_mod_rpc_slot(req, NULL); -out: - ptlrpc_req_finished(req); - return rc; -} - -static struct kuc_hdr *changelog_kuc_hdr(char *buf, size_t len, u32 flags) -{ - struct kuc_hdr *lh = (struct kuc_hdr *)buf; - - LASSERT(len <= KUC_CHANGELOG_MSG_MAXSIZE); - - lh->kuc_magic = KUC_MAGIC; - lh->kuc_transport = KUC_TRANSPORT_CHANGELOG; - lh->kuc_flags = flags; - lh->kuc_msgtype = CL_RECORD; - lh->kuc_msglen = len; - return lh; -} - -struct changelog_show { - __u64 cs_startrec; - enum changelog_send_flag cs_flags; - struct file *cs_fp; - char *cs_buf; - struct obd_device *cs_obd; -}; - -static inline char *cs_obd_name(struct changelog_show *cs) -{ - return cs->cs_obd->obd_name; -} - -static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh, - struct llog_rec_hdr *hdr, void *data) -{ - struct changelog_show *cs = data; - struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; - struct kuc_hdr *lh; - size_t len; - int rc; - - if (rec->cr_hdr.lrh_type != CHANGELOG_REC) { - rc = -EINVAL; - CERROR("%s: not a changelog rec %x/%d: rc = %d\n", - cs_obd_name(cs), rec->cr_hdr.lrh_type, - rec->cr.cr_type, rc); - return rc; - } - - if (rec->cr.cr_index < cs->cs_startrec) { - /* Skip entries earlier than what we are interested in */ - CDEBUG(D_HSM, "rec=%llu start=%llu\n", - rec->cr.cr_index, cs->cs_startrec); - return 0; - } - - CDEBUG(D_HSM, "%llu %02d%-5s %llu 0x%x t=" DFID " p=" DFID - " %.*s\n", rec->cr.cr_index, rec->cr.cr_type, - changelog_type2str(rec->cr.cr_type), rec->cr.cr_time, - rec->cr.cr_flags & CLF_FLAGMASK, - PFID(&rec->cr.cr_tfid), PFID(&rec->cr.cr_pfid), - rec->cr.cr_namelen, changelog_rec_name(&rec->cr)); - - len = sizeof(*lh) + changelog_rec_size(&rec->cr) + rec->cr.cr_namelen; - - /* Set up the message */ - lh = changelog_kuc_hdr(cs->cs_buf, len, cs->cs_flags); - memcpy(lh + 1, &rec->cr, len - sizeof(*lh)); - - rc = libcfs_kkuc_msg_put(cs->cs_fp, lh); - CDEBUG(D_HSM, "kucmsg fp %p len %zu rc %d\n", cs->cs_fp, len, rc); - - return rc; -} - -static int mdc_changelog_send_thread(void *csdata) -{ - enum llog_flag flags = LLOG_F_IS_CAT; - struct changelog_show *cs = csdata; - struct llog_ctxt *ctxt = NULL; - struct llog_handle *llh = NULL; - struct kuc_hdr *kuch; - int rc; - - CDEBUG(D_HSM, "changelog to fp=%p start %llu\n", - cs->cs_fp, cs->cs_startrec); - - cs->cs_buf = kzalloc(KUC_CHANGELOG_MSG_MAXSIZE, GFP_NOFS); - if (!cs->cs_buf) { - rc = -ENOMEM; - goto out; - } - - /* Set up the remote catalog handle */ - ctxt = llog_get_context(cs->cs_obd, LLOG_CHANGELOG_REPL_CTXT); - if (!ctxt) { - rc = -ENOENT; - goto out; - } - rc = llog_open(NULL, ctxt, &llh, NULL, CHANGELOG_CATALOG, - LLOG_OPEN_EXISTS); - if (rc) { - CERROR("%s: fail to open changelog catalog: rc = %d\n", - cs_obd_name(cs), rc); - goto out; - } - - if (cs->cs_flags & CHANGELOG_FLAG_JOBID) - flags |= LLOG_F_EXT_JOBID; - - rc = llog_init_handle(NULL, llh, flags, NULL); - if (rc) { - CERROR("llog_init_handle failed %d\n", rc); - goto out; - } - - rc = llog_cat_process(NULL, llh, changelog_kkuc_cb, cs, 0, 0); - - /* Send EOF no matter what our result */ - kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch), cs->cs_flags); - kuch->kuc_msgtype = CL_EOF; - libcfs_kkuc_msg_put(cs->cs_fp, kuch); - -out: - fput(cs->cs_fp); - if (llh) - llog_cat_close(NULL, llh); - if (ctxt) - llog_ctxt_put(ctxt); - kfree(cs->cs_buf); - kfree(cs); - return rc; -} - -static int mdc_ioc_changelog_send(struct obd_device *obd, - struct ioc_changelog *icc) -{ - struct changelog_show *cs; - struct task_struct *task; - int rc; - - /* Freed in mdc_changelog_send_thread */ - cs = kzalloc(sizeof(*cs), GFP_NOFS); - if (!cs) - return -ENOMEM; - - cs->cs_obd = obd; - cs->cs_startrec = icc->icc_recno; - /* matching fput in mdc_changelog_send_thread */ - cs->cs_fp = fget(icc->icc_id); - cs->cs_flags = icc->icc_flags; - - /* - * New thread because we should return to user app before - * writing into our pipe - */ - task = kthread_run(mdc_changelog_send_thread, cs, - "mdc_clg_send_thread"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("%s: can't start changelog thread: rc = %d\n", - cs_obd_name(cs), rc); - kfree(cs); - } else { - rc = 0; - CDEBUG(D_HSM, "%s: started changelog thread\n", - cs_obd_name(cs)); - } - - CERROR("Failed to start changelog thread: %d\n", rc); - return rc; -} - -static int mdc_ioc_hsm_ct_start(struct obd_export *exp, - struct lustre_kernelcomm *lk); - -static int mdc_quotactl(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct ptlrpc_request *req; - struct obd_quotactl *oqc; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MDS_QUOTACTL, LUSTRE_MDS_VERSION, - MDS_QUOTACTL); - if (!req) - return -ENOMEM; - - oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - *oqc = *oqctl; - - ptlrpc_request_set_replen(req); - ptlrpc_at_set_req_timeout(req); - req->rq_no_resend = 1; - - rc = ptlrpc_queue_wait(req); - if (rc) - CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); - - if (req->rq_repmsg) { - oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - if (oqc) { - *oqctl = *oqc; - } else if (!rc) { - CERROR("Can't unpack obd_quotactl\n"); - rc = -EPROTO; - } - } else if (!rc) { - CERROR("Can't unpack obd_quotactl\n"); - rc = -EPROTO; - } - ptlrpc_req_finished(req); - - return rc; -} - -static int mdc_ioc_swap_layouts(struct obd_export *exp, - struct md_op_data *op_data) -{ - LIST_HEAD(cancels); - struct ptlrpc_request *req; - int rc, count; - struct mdc_swap_layouts *msl, *payload; - - msl = op_data->op_data; - - /* When the MDT will get the MDS_SWAP_LAYOUTS RPC the - * first thing it will do is to cancel the 2 layout - * locks hold by this client. - * So the client must cancel its layout locks on the 2 fids - * with the request RPC to avoid extra RPC round trips - */ - count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, - LCK_CR, MDS_INODELOCK_LAYOUT | - MDS_INODELOCK_XATTR); - count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels, - LCK_CR, MDS_INODELOCK_LAYOUT | - MDS_INODELOCK_XATTR); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_MDS_SWAP_LAYOUTS); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - - rc = mdc_prep_elc_req(exp, req, MDS_SWAP_LAYOUTS, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_swap_layouts_pack(req, op_data); - - payload = req_capsule_client_get(&req->rq_pill, &RMF_SWAP_LAYOUTS); - LASSERT(payload); - - *payload = *msl; - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - - ptlrpc_req_finished(req); - return rc; -} - -static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg) -{ - struct obd_device *obd = exp->exp_obd; - struct obd_ioctl_data *data = karg; - struct obd_import *imp = obd->u.cli.cl_import; - int rc; - - if (!try_module_get(THIS_MODULE)) { - CERROR("%s: cannot get module '%s'\n", obd->obd_name, - module_name(THIS_MODULE)); - return -EINVAL; - } - switch (cmd) { - case OBD_IOC_CHANGELOG_SEND: - rc = mdc_ioc_changelog_send(obd, karg); - goto out; - case OBD_IOC_CHANGELOG_CLEAR: { - struct ioc_changelog *icc = karg; - struct changelog_setinfo cs = { - .cs_recno = icc->icc_recno, - .cs_id = icc->icc_id - }; - - rc = obd_set_info_async(NULL, exp, strlen(KEY_CHANGELOG_CLEAR), - KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, - NULL); - goto out; - } - case OBD_IOC_FID2PATH: - rc = mdc_ioc_fid2path(exp, karg); - goto out; - case LL_IOC_HSM_CT_START: - rc = mdc_ioc_hsm_ct_start(exp, karg); - /* ignore if it was already registered on this MDS. */ - if (rc == -EEXIST) - rc = 0; - goto out; - case LL_IOC_HSM_PROGRESS: - rc = mdc_ioc_hsm_progress(exp, karg); - goto out; - case LL_IOC_HSM_STATE_GET: - rc = mdc_ioc_hsm_state_get(exp, karg); - goto out; - case LL_IOC_HSM_STATE_SET: - rc = mdc_ioc_hsm_state_set(exp, karg); - goto out; - case LL_IOC_HSM_ACTION: - rc = mdc_ioc_hsm_current_action(exp, karg); - goto out; - case LL_IOC_HSM_REQUEST: - rc = mdc_ioc_hsm_request(exp, karg); - goto out; - case OBD_IOC_CLIENT_RECOVER: - rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1, 0); - if (rc < 0) - goto out; - rc = 0; - goto out; - case IOC_OSC_SET_ACTIVE: - rc = ptlrpc_set_import_active(imp, data->ioc_offset); - goto out; - case OBD_IOC_PING_TARGET: - rc = ptlrpc_obd_ping(obd); - goto out; - /* - * Normally IOC_OBD_STATFS, OBD_IOC_QUOTACTL iocontrol are handled by - * LMV instead of MDC. But when the cluster is upgraded from 1.8, - * there'd be no LMV layer thus we might be called here. Eventually - * this code should be removed. - * bz20731, LU-592. - */ - case IOC_OBD_STATFS: { - struct obd_statfs stat_buf = {0}; - - if (*((__u32 *)data->ioc_inlbuf2) != 0) { - rc = -ENODEV; - goto out; - } - - /* copy UUID */ - if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(obd), - min_t(size_t, data->ioc_plen2, - sizeof(struct obd_uuid)))) { - rc = -EFAULT; - goto out; - } - - rc = mdc_statfs(NULL, obd->obd_self_export, &stat_buf, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - 0); - if (rc != 0) - goto out; - - if (copy_to_user(data->ioc_pbuf1, &stat_buf, - min_t(size_t, data->ioc_plen1, - sizeof(stat_buf)))) { - rc = -EFAULT; - goto out; - } - - rc = 0; - goto out; - } - case OBD_IOC_QUOTACTL: { - struct if_quotactl *qctl = karg; - struct obd_quotactl *oqctl; - - oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS); - if (!oqctl) { - rc = -ENOMEM; - goto out; - } - - QCTL_COPY(oqctl, qctl); - rc = obd_quotactl(exp, oqctl); - if (rc == 0) { - QCTL_COPY(qctl, oqctl); - qctl->qc_valid = QC_MDTIDX; - qctl->obd_uuid = obd->u.cli.cl_target_uuid; - } - - kfree(oqctl); - goto out; - } - case LL_IOC_GET_CONNECT_FLAGS: - if (copy_to_user(uarg, exp_connect_flags_ptr(exp), - sizeof(*exp_connect_flags_ptr(exp)))) { - rc = -EFAULT; - goto out; - } - - rc = 0; - goto out; - case LL_IOC_LOV_SWAP_LAYOUTS: - rc = mdc_ioc_swap_layouts(exp, karg); - goto out; - default: - CERROR("unrecognised ioctl: cmd = %#x\n", cmd); - rc = -ENOTTY; - goto out; - } -out: - module_put(THIS_MODULE); - - return rc; -} - -static int mdc_get_info_rpc(struct obd_export *exp, - u32 keylen, void *key, - int vallen, void *val) -{ - struct obd_import *imp = class_exp2cliimp(exp); - struct ptlrpc_request *req; - char *tmp; - int rc = -EINVAL; - - req = ptlrpc_request_alloc(imp, &RQF_MDS_GET_INFO); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_KEY, - RCL_CLIENT, keylen); - req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VALLEN, - RCL_CLIENT, sizeof(__u32)); - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_KEY); - memcpy(tmp, key, keylen); - tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_VALLEN); - memcpy(tmp, &vallen, sizeof(__u32)); - - req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VAL, - RCL_SERVER, vallen); - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - /* -EREMOTE means the get_info result is partial, and it needs to - * continue on another MDT, see fid2path part in lmv_iocontrol - */ - if (rc == 0 || rc == -EREMOTE) { - tmp = req_capsule_server_get(&req->rq_pill, &RMF_GETINFO_VAL); - memcpy(val, tmp, vallen); - if (ptlrpc_rep_need_swab(req)) { - if (KEY_IS(KEY_FID2PATH)) - lustre_swab_fid2path(val); - } - } - ptlrpc_req_finished(req); - - return rc; -} - -static void lustre_swab_hai(struct hsm_action_item *h) -{ - __swab32s(&h->hai_len); - __swab32s(&h->hai_action); - lustre_swab_lu_fid(&h->hai_fid); - lustre_swab_lu_fid(&h->hai_dfid); - __swab64s(&h->hai_cookie); - __swab64s(&h->hai_extent.offset); - __swab64s(&h->hai_extent.length); - __swab64s(&h->hai_gid); -} - -static void lustre_swab_hal(struct hsm_action_list *h) -{ - struct hsm_action_item *hai; - u32 i; - - __swab32s(&h->hal_version); - __swab32s(&h->hal_count); - __swab32s(&h->hal_archive_id); - __swab64s(&h->hal_flags); - hai = hai_first(h); - for (i = 0; i < h->hal_count; i++, hai = hai_next(hai)) - lustre_swab_hai(hai); -} - -static void lustre_swab_kuch(struct kuc_hdr *l) -{ - __swab16s(&l->kuc_magic); - /* __u8 l->kuc_transport */ - __swab16s(&l->kuc_msgtype); - __swab16s(&l->kuc_msglen); -} - -static int mdc_ioc_hsm_ct_start(struct obd_export *exp, - struct lustre_kernelcomm *lk) -{ - struct obd_import *imp = class_exp2cliimp(exp); - __u32 archive = lk->lk_data; - int rc = 0; - - if (lk->lk_group != KUC_GRP_HSM) { - CERROR("Bad copytool group %d\n", lk->lk_group); - return -EINVAL; - } - - CDEBUG(D_HSM, "CT start r%d w%d u%d g%d f%#x\n", lk->lk_rfd, lk->lk_wfd, - lk->lk_uid, lk->lk_group, lk->lk_flags); - - if (lk->lk_flags & LK_FLG_STOP) { - /* Unregister with the coordinator */ - rc = mdc_ioc_hsm_ct_unregister(imp); - } else { - rc = mdc_ioc_hsm_ct_register(imp, archive); - } - - return rc; -} - -/** - * Send a message to any listening copytools - * @param val KUC message (kuc_hdr + hsm_action_list) - * @param len total length of message - */ -static int mdc_hsm_copytool_send(size_t len, void *val) -{ - struct kuc_hdr *lh = (struct kuc_hdr *)val; - struct hsm_action_list *hal = (struct hsm_action_list *)(lh + 1); - - if (len < sizeof(*lh) + sizeof(*hal)) { - CERROR("Short HSM message %zu < %zu\n", len, - sizeof(*lh) + sizeof(*hal)); - return -EPROTO; - } - if (lh->kuc_magic == __swab16(KUC_MAGIC)) { - lustre_swab_kuch(lh); - lustre_swab_hal(hal); - } else if (lh->kuc_magic != KUC_MAGIC) { - CERROR("Bad magic %x!=%x\n", lh->kuc_magic, KUC_MAGIC); - return -EPROTO; - } - - CDEBUG(D_HSM, - "Received message mg=%x t=%d m=%d l=%d actions=%d on %s\n", - lh->kuc_magic, lh->kuc_transport, lh->kuc_msgtype, - lh->kuc_msglen, hal->hal_count, hal->hal_fsname); - - /* Broadcast to HSM listeners */ - return libcfs_kkuc_group_put(KUC_GRP_HSM, lh); -} - -/** - * callback function passed to kuc for re-registering each HSM copytool - * running on MDC, after MDT shutdown/recovery. - * @param data copytool registration data - * @param cb_arg callback argument (obd_import) - */ -static int mdc_hsm_ct_reregister(void *data, void *cb_arg) -{ - struct kkuc_ct_data *kcd = data; - struct obd_import *imp = (struct obd_import *)cb_arg; - int rc; - - if (!kcd || kcd->kcd_magic != KKUC_CT_DATA_MAGIC) - return -EPROTO; - - if (!obd_uuid_equals(&kcd->kcd_uuid, &imp->imp_obd->obd_uuid)) - return 0; - - CDEBUG(D_HA, "%s: recover copytool registration to MDT (archive=%#x)\n", - imp->imp_obd->obd_name, kcd->kcd_archive); - rc = mdc_ioc_hsm_ct_register(imp, kcd->kcd_archive); - - /* ignore error if the copytool is already registered */ - return (rc == -EEXIST) ? 0 : rc; -} - -static int mdc_set_info_async(const struct lu_env *env, - struct obd_export *exp, - u32 keylen, void *key, - u32 vallen, void *val, - struct ptlrpc_request_set *set) -{ - struct obd_import *imp = class_exp2cliimp(exp); - int rc; - - if (KEY_IS(KEY_READ_ONLY)) { - if (vallen != sizeof(int)) - return -EINVAL; - - spin_lock(&imp->imp_lock); - if (*((int *)val)) { - imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags |= - OBD_CONNECT_RDONLY; - } else { - imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags &= - ~OBD_CONNECT_RDONLY; - } - spin_unlock(&imp->imp_lock); - - return do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION, - keylen, key, vallen, val, set); - } - if (KEY_IS(KEY_SPTLRPC_CONF)) { - sptlrpc_conf_client_adapt(exp->exp_obd); - return 0; - } - if (KEY_IS(KEY_FLUSH_CTX)) { - sptlrpc_import_flush_my_ctx(imp); - return 0; - } - if (KEY_IS(KEY_CHANGELOG_CLEAR)) { - rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION, - keylen, key, vallen, val, set); - return rc; - } - if (KEY_IS(KEY_HSM_COPYTOOL_SEND)) { - rc = mdc_hsm_copytool_send(vallen, val); - return rc; - } - if (KEY_IS(KEY_DEFAULT_EASIZE)) { - u32 *default_easize = val; - - exp->exp_obd->u.cli.cl_default_mds_easize = *default_easize; - return 0; - } - - CERROR("Unknown key %s\n", (char *)key); - return -EINVAL; -} - -static int mdc_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val) -{ - int rc = -EINVAL; - - if (KEY_IS(KEY_MAX_EASIZE)) { - u32 mdsize, *max_easize; - - if (*vallen != sizeof(int)) - return -EINVAL; - mdsize = *(u32 *)val; - if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize) - exp->exp_obd->u.cli.cl_max_mds_easize = mdsize; - max_easize = val; - *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; - return 0; - } else if (KEY_IS(KEY_DEFAULT_EASIZE)) { - u32 *default_easize; - - if (*vallen != sizeof(int)) - return -EINVAL; - default_easize = val; - *default_easize = exp->exp_obd->u.cli.cl_default_mds_easize; - return 0; - } else if (KEY_IS(KEY_CONN_DATA)) { - struct obd_import *imp = class_exp2cliimp(exp); - struct obd_connect_data *data = val; - - if (*vallen != sizeof(*data)) - return -EINVAL; - - *data = imp->imp_connect_data; - return 0; - } else if (KEY_IS(KEY_TGT_COUNT)) { - *((u32 *)val) = 1; - return 0; - } - - rc = mdc_get_info_rpc(exp, keylen, key, *vallen, val); - - return rc; -} - -static int mdc_sync(struct obd_export *exp, const struct lu_fid *fid, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc; - - *request = NULL; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_SYNC); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_SYNC); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - mdc_pack_body(req, fid, 0, 0, -1, 0); - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - ptlrpc_req_finished(req); - else - *request = req; - return rc; -} - -static int mdc_import_event(struct obd_device *obd, struct obd_import *imp, - enum obd_import_event event) -{ - int rc = 0; - - LASSERT(imp->imp_obd == obd); - - switch (event) { - case IMP_EVENT_INACTIVE: { - struct client_obd *cli = &obd->u.cli; - /* - * Flush current sequence to make client obtain new one - * from server in case of disconnect/reconnect. - */ - if (cli->cl_seq) - seq_client_flush(cli->cl_seq); - - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL); - break; - } - case IMP_EVENT_INVALIDATE: { - struct ldlm_namespace *ns = obd->obd_namespace; - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); - - break; - } - case IMP_EVENT_ACTIVE: - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL); - /* redo the kuc registration after reconnecting */ - if (rc == 0) - /* re-register HSM agents */ - rc = libcfs_kkuc_group_foreach(KUC_GRP_HSM, - mdc_hsm_ct_reregister, - (void *)imp); - break; - case IMP_EVENT_OCD: - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL); - break; - case IMP_EVENT_DISCON: - case IMP_EVENT_DEACTIVATE: - case IMP_EVENT_ACTIVATE: - break; - default: - CERROR("Unknown import event %x\n", event); - LBUG(); - } - return rc; -} - -int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp, - struct lu_fid *fid, struct md_op_data *op_data) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - struct lu_client_seq *seq = cli->cl_seq; - - return seq_client_alloc_fid(env, seq, fid); -} - -static struct obd_uuid *mdc_get_uuid(struct obd_export *exp) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - - return &cli->cl_target_uuid; -} - -/** - * Determine whether the lock can be canceled before replaying it during - * recovery, non zero value will be return if the lock can be canceled, - * or zero returned for not - */ -static int mdc_cancel_weight(struct ldlm_lock *lock) -{ - if (lock->l_resource->lr_type != LDLM_IBITS) - return 0; - - /* FIXME: if we ever get into a situation where there are too many - * opened files with open locks on a single node, then we really - * should replay these open locks to reget it - */ - if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN) - return 0; - - return 1; -} - -static int mdc_resource_inode_free(struct ldlm_resource *res) -{ - if (res->lr_lvb_inode) - res->lr_lvb_inode = NULL; - - return 0; -} - -static struct ldlm_valblock_ops inode_lvbo = { - .lvbo_free = mdc_resource_inode_free, -}; - -static int mdc_llog_init(struct obd_device *obd) -{ - struct obd_llog_group *olg = &obd->obd_olg; - struct llog_ctxt *ctxt; - int rc; - - rc = llog_setup(NULL, obd, olg, LLOG_CHANGELOG_REPL_CTXT, obd, - &llog_client_ops); - if (rc) - return rc; - - ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT); - llog_initiator_connect(ctxt); - llog_ctxt_put(ctxt); - - return 0; -} - -static void mdc_llog_finish(struct obd_device *obd) -{ - struct llog_ctxt *ctxt; - - ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT); - if (ctxt) - llog_cleanup(NULL, ctxt); -} - -static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg) -{ - struct lprocfs_static_vars lvars = { NULL }; - int rc; - - rc = ptlrpcd_addref(); - if (rc < 0) - return rc; - - rc = client_obd_setup(obd, cfg); - if (rc) - goto err_ptlrpcd_decref; - - lprocfs_mdc_init_vars(&lvars); - lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars); - sptlrpc_lprocfs_cliobd_attach(obd); - ptlrpc_lprocfs_register_obd(obd); - - ns_register_cancel(obd->obd_namespace, mdc_cancel_weight); - - obd->obd_namespace->ns_lvbo = &inode_lvbo; - - rc = mdc_llog_init(obd); - if (rc) { - mdc_cleanup(obd); - CERROR("failed to setup llogging subsystems\n"); - return rc; - } - - return rc; - -err_ptlrpcd_decref: - ptlrpcd_decref(); - return rc; -} - -/* Initialize the default and maximum LOV EA sizes. This allows - * us to make MDS RPCs with large enough reply buffers to hold a default - * sized EA without having to calculate this (via a call into the - * LOV + OSCs) each time we make an RPC. The maximum size is also tracked - * but not used to avoid wastefully vmalloc()'ing large reply buffers when - * a large number of stripes is possible. If a larger reply buffer is - * required it will be reallocated in the ptlrpc layer due to overflow. - */ -static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize) -{ - struct obd_device *obd = exp->exp_obd; - struct client_obd *cli = &obd->u.cli; - - if (cli->cl_max_mds_easize < easize) - cli->cl_max_mds_easize = easize; - - if (cli->cl_default_mds_easize < def_easize) - cli->cl_default_mds_easize = def_easize; - - return 0; -} - -static int mdc_precleanup(struct obd_device *obd) -{ - /* Failsafe, ok if racy */ - if (obd->obd_type->typ_refcnt <= 1) - libcfs_kkuc_group_rem(0, KUC_GRP_HSM); - - obd_cleanup_client_import(obd); - ptlrpc_lprocfs_unregister_obd(obd); - lprocfs_obd_cleanup(obd); - mdc_llog_finish(obd); - return 0; -} - -static int mdc_cleanup(struct obd_device *obd) -{ - ptlrpcd_decref(); - - return client_obd_cleanup(obd); -} - -static int mdc_process_config(struct obd_device *obd, u32 len, void *buf) -{ - struct lustre_cfg *lcfg = buf; - struct lprocfs_static_vars lvars = { NULL }; - int rc = 0; - - lprocfs_mdc_init_vars(&lvars); - switch (lcfg->lcfg_command) { - default: - rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; - break; - } - return rc; -} - -static struct obd_ops mdc_obd_ops = { - .owner = THIS_MODULE, - .setup = mdc_setup, - .precleanup = mdc_precleanup, - .cleanup = mdc_cleanup, - .add_conn = client_import_add_conn, - .del_conn = client_import_del_conn, - .connect = client_connect_import, - .disconnect = client_disconnect_export, - .iocontrol = mdc_iocontrol, - .set_info_async = mdc_set_info_async, - .statfs = mdc_statfs, - .fid_init = client_fid_init, - .fid_fini = client_fid_fini, - .fid_alloc = mdc_fid_alloc, - .import_event = mdc_import_event, - .get_info = mdc_get_info, - .process_config = mdc_process_config, - .get_uuid = mdc_get_uuid, - .quotactl = mdc_quotactl, -}; - -static struct md_ops mdc_md_ops = { - .getstatus = mdc_getstatus, - .null_inode = mdc_null_inode, - .close = mdc_close, - .create = mdc_create, - .enqueue = mdc_enqueue, - .getattr = mdc_getattr, - .getattr_name = mdc_getattr_name, - .intent_lock = mdc_intent_lock, - .link = mdc_link, - .rename = mdc_rename, - .setattr = mdc_setattr, - .setxattr = mdc_setxattr, - .getxattr = mdc_getxattr, - .sync = mdc_sync, - .read_page = mdc_read_page, - .unlink = mdc_unlink, - .cancel_unused = mdc_cancel_unused, - .init_ea_size = mdc_init_ea_size, - .set_lock_data = mdc_set_lock_data, - .lock_match = mdc_lock_match, - .get_lustre_md = mdc_get_lustre_md, - .free_lustre_md = mdc_free_lustre_md, - .set_open_replay_data = mdc_set_open_replay_data, - .clear_open_replay_data = mdc_clear_open_replay_data, - .intent_getattr_async = mdc_intent_getattr_async, - .revalidate_lock = mdc_revalidate_lock -}; - -static int __init mdc_init(void) -{ - struct lprocfs_static_vars lvars = { NULL }; - int rc; - - rc = libcfs_setup(); - if (rc) - return rc; - - lprocfs_mdc_init_vars(&lvars); - - return class_register_type(&mdc_obd_ops, &mdc_md_ops, - LUSTRE_MDC_NAME, NULL); -} - -static void /*__exit*/ mdc_exit(void) -{ - class_unregister_type(LUSTRE_MDC_NAME); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Metadata Client"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(mdc_init); -module_exit(mdc_exit); diff --git a/drivers/staging/lustre/lustre/mgc/Makefile b/drivers/staging/lustre/lustre/mgc/Makefile deleted file mode 100644 index 8abf108dbcf7..000000000000 --- a/drivers/staging/lustre/lustre/mgc/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += mgc.o -mgc-y := mgc_request.o lproc_mgc.o diff --git a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c b/drivers/staging/lustre/lustre/mgc/lproc_mgc.c deleted file mode 100644 index 636770624e8f..000000000000 --- a/drivers/staging/lustre/lustre/mgc/lproc_mgc.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include "mgc_internal.h" - -LPROC_SEQ_FOPS_RO_TYPE(mgc, connect_flags); -LPROC_SEQ_FOPS_RO_TYPE(mgc, server_uuid); -LPROC_SEQ_FOPS_RO_TYPE(mgc, conn_uuid); -LPROC_SEQ_FOPS_RO_TYPE(mgc, import); -LPROC_SEQ_FOPS_RO_TYPE(mgc, state); - -LPROC_SEQ_FOPS_WR_ONLY(mgc, ping); - -static int mgc_ir_state_seq_show(struct seq_file *m, void *v) -{ - return lprocfs_mgc_rd_ir_state(m, m->private); -} - -LPROC_SEQ_FOPS_RO(mgc_ir_state); - -static struct lprocfs_vars lprocfs_mgc_obd_vars[] = { - { "ping", &mgc_ping_fops, NULL, 0222 }, - { "connect_flags", &mgc_connect_flags_fops, NULL, 0 }, - { "mgs_server_uuid", &mgc_server_uuid_fops, NULL, 0 }, - { "mgs_conn_uuid", &mgc_conn_uuid_fops, NULL, 0 }, - { "import", &mgc_import_fops, NULL, 0 }, - { "state", &mgc_state_fops, NULL, 0 }, - { "ir_state", &mgc_ir_state_fops, NULL, 0 }, - { NULL } -}; - -void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->obd_vars = lprocfs_mgc_obd_vars; -} diff --git a/drivers/staging/lustre/lustre/mgc/mgc_internal.h b/drivers/staging/lustre/lustre/mgc/mgc_internal.h deleted file mode 100644 index 9541892b67c7..000000000000 --- a/drivers/staging/lustre/lustre/mgc/mgc_internal.h +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef _MGC_INTERNAL_H -#define _MGC_INTERNAL_H - -#include -#include -#include -#include - -void lprocfs_mgc_init_vars(struct lprocfs_static_vars *lvars); -int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data); - -int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld); - -static inline int cld_is_sptlrpc(struct config_llog_data *cld) -{ - return cld->cld_type == CONFIG_T_SPTLRPC; -} - -static inline int cld_is_recover(struct config_llog_data *cld) -{ - return cld->cld_type == CONFIG_T_RECOVER; -} - -#endif /* _MGC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c deleted file mode 100644 index 32df804614d3..000000000000 --- a/drivers/staging/lustre/lustre/mgc/mgc_request.c +++ /dev/null @@ -1,1851 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/mgc/mgc_request.c - * - * Author: Nathan Rutman - */ - -#define DEBUG_SUBSYSTEM S_MGC -#define D_MGC D_CONFIG /*|D_WARNING*/ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "mgc_internal.h" - -static int mgc_name2resid(char *name, int len, struct ldlm_res_id *res_id, - int type) -{ - __u64 resname = 0; - - if (len > sizeof(resname)) { - CERROR("name too long: %s\n", name); - return -EINVAL; - } - if (len <= 0) { - CERROR("missing name: %s\n", name); - return -EINVAL; - } - memcpy(&resname, name, len); - - /* Always use the same endianness for the resid */ - memset(res_id, 0, sizeof(*res_id)); - res_id->name[0] = cpu_to_le64(resname); - /* XXX: unfortunately, sptlprc and config llog share one lock */ - switch (type) { - case CONFIG_T_CONFIG: - case CONFIG_T_SPTLRPC: - resname = 0; - break; - case CONFIG_T_RECOVER: - case CONFIG_T_PARAMS: - resname = type; - break; - default: - LBUG(); - } - res_id->name[1] = cpu_to_le64(resname); - CDEBUG(D_MGC, "log %s to resid %#llx/%#llx (%.8s)\n", name, - res_id->name[0], res_id->name[1], (char *)&res_id->name[0]); - return 0; -} - -int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type) -{ - /* fsname is at most 8 chars long, maybe contain "-". - * e.g. "lustre", "SUN-000" - */ - return mgc_name2resid(fsname, strlen(fsname), res_id, type); -} -EXPORT_SYMBOL(mgc_fsname2resid); - -static int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type) -{ - char *name_end; - int len; - - /* logname consists of "fsname-nodetype". - * e.g. "lustre-MDT0001", "SUN-000-client" - * there is an exception: llog "params" - */ - name_end = strrchr(logname, '-'); - if (!name_end) - len = strlen(logname); - else - len = name_end - logname; - return mgc_name2resid(logname, len, res_id, type); -} - -/********************** config llog list **********************/ -static LIST_HEAD(config_llog_list); -static DEFINE_SPINLOCK(config_list_lock); - -/* Take a reference to a config log */ -static int config_log_get(struct config_llog_data *cld) -{ - atomic_inc(&cld->cld_refcount); - CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, - atomic_read(&cld->cld_refcount)); - return 0; -} - -/* Drop a reference to a config log. When no longer referenced, - * we can free the config log data - */ -static void config_log_put(struct config_llog_data *cld) -{ - CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, - atomic_read(&cld->cld_refcount)); - LASSERT(atomic_read(&cld->cld_refcount) > 0); - - /* spinlock to make sure no item with 0 refcount in the list */ - if (atomic_dec_and_lock(&cld->cld_refcount, &config_list_lock)) { - list_del(&cld->cld_list_chain); - spin_unlock(&config_list_lock); - - CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname); - - if (cld->cld_recover) - config_log_put(cld->cld_recover); - if (cld->cld_params) - config_log_put(cld->cld_params); - if (cld->cld_sptlrpc) - config_log_put(cld->cld_sptlrpc); - if (cld_is_sptlrpc(cld)) - sptlrpc_conf_log_stop(cld->cld_logname); - - class_export_put(cld->cld_mgcexp); - kfree(cld); - } -} - -/* Find a config log by name */ -static -struct config_llog_data *config_log_find(char *logname, - struct config_llog_instance *cfg) -{ - struct config_llog_data *cld; - struct config_llog_data *found = NULL; - void *instance; - - LASSERT(logname); - - instance = cfg ? cfg->cfg_instance : NULL; - spin_lock(&config_list_lock); - list_for_each_entry(cld, &config_llog_list, cld_list_chain) { - /* check if instance equals */ - if (instance != cld->cld_cfg.cfg_instance) - continue; - - /* instance may be NULL, should check name */ - if (strcmp(logname, cld->cld_logname) == 0) { - found = cld; - config_log_get(found); - break; - } - } - spin_unlock(&config_list_lock); - return found; -} - -static -struct config_llog_data *do_config_log_add(struct obd_device *obd, - char *logname, - int type, - struct config_llog_instance *cfg, - struct super_block *sb) -{ - struct config_llog_data *cld; - int rc; - - CDEBUG(D_MGC, "do adding config log %s:%p\n", logname, - cfg ? cfg->cfg_instance : NULL); - - cld = kzalloc(sizeof(*cld) + strlen(logname) + 1, GFP_NOFS); - if (!cld) - return ERR_PTR(-ENOMEM); - - rc = mgc_logname2resid(logname, &cld->cld_resid, type); - if (rc) { - kfree(cld); - return ERR_PTR(rc); - } - - strcpy(cld->cld_logname, logname); - if (cfg) - cld->cld_cfg = *cfg; - else - cld->cld_cfg.cfg_callback = class_config_llog_handler; - mutex_init(&cld->cld_lock); - cld->cld_cfg.cfg_last_idx = 0; - cld->cld_cfg.cfg_flags = 0; - cld->cld_cfg.cfg_sb = sb; - cld->cld_type = type; - atomic_set(&cld->cld_refcount, 1); - - /* Keep the mgc around until we are done */ - cld->cld_mgcexp = class_export_get(obd->obd_self_export); - - if (cld_is_sptlrpc(cld)) { - sptlrpc_conf_log_start(logname); - cld->cld_cfg.cfg_obdname = obd->obd_name; - } - - spin_lock(&config_list_lock); - list_add(&cld->cld_list_chain, &config_llog_list); - spin_unlock(&config_list_lock); - - if (cld_is_sptlrpc(cld)) { - rc = mgc_process_log(obd, cld); - if (rc && rc != -ENOENT) - CERROR("failed processing sptlrpc log: %d\n", rc); - } - - return cld; -} - -static struct config_llog_data * -config_recover_log_add(struct obd_device *obd, char *fsname, - struct config_llog_instance *cfg, - struct super_block *sb) -{ - struct config_llog_instance lcfg = *cfg; - struct config_llog_data *cld; - char logname[32]; - - /* we have to use different llog for clients and mdts for cmd - * where only clients are notified if one of cmd server restarts - */ - LASSERT(strlen(fsname) < sizeof(logname) / 2); - strcpy(logname, fsname); - LASSERT(lcfg.cfg_instance); - strcat(logname, "-cliir"); - - cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb); - return cld; -} - -static struct config_llog_data * -config_params_log_add(struct obd_device *obd, - struct config_llog_instance *cfg, struct super_block *sb) -{ - struct config_llog_instance lcfg = *cfg; - struct config_llog_data *cld; - - lcfg.cfg_instance = sb; - - cld = do_config_log_add(obd, PARAMS_FILENAME, CONFIG_T_PARAMS, - &lcfg, sb); - - return cld; -} - -/** Add this log to the list of active logs watched by an MGC. - * Active means we're watching for updates. - * We have one active log per "mount" - client instance or servername. - * Each instance may be at a different point in the log. - */ -static struct config_llog_data * -config_log_add(struct obd_device *obd, char *logname, - struct config_llog_instance *cfg, struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct config_llog_data *cld; - struct config_llog_data *sptlrpc_cld; - struct config_llog_data *params_cld; - struct config_llog_data *recover_cld = NULL; - char seclogname[32]; - char *ptr; - int rc; - - CDEBUG(D_MGC, "adding config log %s:%p\n", logname, cfg->cfg_instance); - - /* - * for each regular log, the depended sptlrpc log name is - * -sptlrpc. multiple regular logs may share one sptlrpc log. - */ - ptr = strrchr(logname, '-'); - if (!ptr || ptr - logname > 8) { - CERROR("logname %s is too long\n", logname); - return ERR_PTR(-EINVAL); - } - - memcpy(seclogname, logname, ptr - logname); - strcpy(seclogname + (ptr - logname), "-sptlrpc"); - - sptlrpc_cld = config_log_find(seclogname, NULL); - if (!sptlrpc_cld) { - sptlrpc_cld = do_config_log_add(obd, seclogname, - CONFIG_T_SPTLRPC, NULL, NULL); - if (IS_ERR(sptlrpc_cld)) { - CERROR("can't create sptlrpc log: %s\n", seclogname); - rc = PTR_ERR(sptlrpc_cld); - goto out_err; - } - } - params_cld = config_params_log_add(obd, cfg, sb); - if (IS_ERR(params_cld)) { - rc = PTR_ERR(params_cld); - CERROR("%s: can't create params log: rc = %d\n", - obd->obd_name, rc); - goto out_sptlrpc; - } - - cld = do_config_log_add(obd, logname, CONFIG_T_CONFIG, cfg, sb); - if (IS_ERR(cld)) { - CERROR("can't create log: %s\n", logname); - rc = PTR_ERR(cld); - goto out_params; - } - - LASSERT(lsi->lsi_lmd); - if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)) { - ptr = strrchr(seclogname, '-'); - if (ptr) { - *ptr = 0; - } else { - CERROR("%s: sptlrpc log name not correct, %s: rc = %d\n", - obd->obd_name, seclogname, -EINVAL); - rc = -EINVAL; - goto out_cld; - } - recover_cld = config_recover_log_add(obd, seclogname, cfg, sb); - if (IS_ERR(recover_cld)) { - rc = PTR_ERR(recover_cld); - goto out_cld; - } - } - - mutex_lock(&cld->cld_lock); - cld->cld_recover = recover_cld; - cld->cld_params = params_cld; - cld->cld_sptlrpc = sptlrpc_cld; - mutex_unlock(&cld->cld_lock); - - return cld; - -out_cld: - config_log_put(cld); - -out_params: - config_log_put(params_cld); - -out_sptlrpc: - config_log_put(sptlrpc_cld); - -out_err: - return ERR_PTR(rc); -} - -static DEFINE_MUTEX(llog_process_lock); - -static inline void config_mark_cld_stop(struct config_llog_data *cld) -{ - mutex_lock(&cld->cld_lock); - spin_lock(&config_list_lock); - cld->cld_stopping = 1; - spin_unlock(&config_list_lock); - mutex_unlock(&cld->cld_lock); -} - -/** Stop watching for updates on this log. - */ -static int config_log_end(char *logname, struct config_llog_instance *cfg) -{ - struct config_llog_data *cld; - struct config_llog_data *cld_sptlrpc = NULL; - struct config_llog_data *cld_params = NULL; - struct config_llog_data *cld_recover = NULL; - int rc = 0; - - cld = config_log_find(logname, cfg); - if (!cld) - return -ENOENT; - - mutex_lock(&cld->cld_lock); - /* - * if cld_stopping is set, it means we didn't start the log thus - * not owning the start ref. this can happen after previous umount: - * the cld still hanging there waiting for lock cancel, and we - * remount again but failed in the middle and call log_end without - * calling start_log. - */ - if (unlikely(cld->cld_stopping)) { - mutex_unlock(&cld->cld_lock); - /* drop the ref from the find */ - config_log_put(cld); - return rc; - } - - spin_lock(&config_list_lock); - cld->cld_stopping = 1; - spin_unlock(&config_list_lock); - - cld_recover = cld->cld_recover; - cld->cld_recover = NULL; - - cld_params = cld->cld_params; - cld->cld_params = NULL; - cld_sptlrpc = cld->cld_sptlrpc; - cld->cld_sptlrpc = NULL; - mutex_unlock(&cld->cld_lock); - - if (cld_recover) { - config_mark_cld_stop(cld_recover); - config_log_put(cld_recover); - } - - if (cld_params) { - config_mark_cld_stop(cld_params); - config_log_put(cld_params); - } - - if (cld_sptlrpc) - config_log_put(cld_sptlrpc); - - /* drop the ref from the find */ - config_log_put(cld); - /* drop the start ref */ - config_log_put(cld); - - CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client", - rc); - return rc; -} - -int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - struct obd_import *imp; - struct obd_connect_data *ocd; - struct config_llog_data *cld; - int rc; - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - imp = obd->u.cli.cl_import; - ocd = &imp->imp_connect_data; - - seq_printf(m, "imperative_recovery: %s\n", - OCD_HAS_FLAG(ocd, IMP_RECOV) ? "ENABLED" : "DISABLED"); - seq_printf(m, "client_state:\n"); - - spin_lock(&config_list_lock); - list_for_each_entry(cld, &config_llog_list, cld_list_chain) { - if (!cld->cld_recover) - continue; - seq_printf(m, " - { client: %s, nidtbl_version: %u }\n", - cld->cld_logname, - cld->cld_recover->cld_cfg.cfg_last_idx); - } - spin_unlock(&config_list_lock); - - up_read(&obd->u.cli.cl_sem); - return 0; -} - -/* reenqueue any lost locks */ -#define RQ_RUNNING 0x1 -#define RQ_NOW 0x2 -#define RQ_LATER 0x4 -#define RQ_STOP 0x8 -#define RQ_PRECLEANUP 0x10 -static int rq_state; -static wait_queue_head_t rq_waitq; -static DECLARE_COMPLETION(rq_exit); -static DECLARE_COMPLETION(rq_start); - -static void do_requeue(struct config_llog_data *cld) -{ - LASSERT(atomic_read(&cld->cld_refcount) > 0); - - /* Do not run mgc_process_log on a disconnected export or an - * export which is being disconnected. Take the client - * semaphore to make the check non-racy. - */ - down_read_nested(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem, - OBD_CLI_SEM_MGC); - - if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) { - int rc; - - CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname); - rc = mgc_process_log(cld->cld_mgcexp->exp_obd, cld); - if (rc && rc != -ENOENT) - CERROR("failed processing log: %d\n", rc); - } else { - CDEBUG(D_MGC, "disconnecting, won't update log %s\n", - cld->cld_logname); - } - up_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem); -} - -/* this timeout represents how many seconds MGC should wait before - * requeue config and recover lock to the MGS. We need to randomize this - * in order to not flood the MGS. - */ -#define MGC_TIMEOUT_MIN_SECONDS 5 -#define MGC_TIMEOUT_RAND_CENTISEC 500 - -static int mgc_requeue_thread(void *data) -{ - bool first = true; - - CDEBUG(D_MGC, "Starting requeue thread\n"); - - /* Keep trying failed locks periodically */ - spin_lock(&config_list_lock); - rq_state |= RQ_RUNNING; - while (!(rq_state & RQ_STOP)) { - struct config_llog_data *cld, *cld_prev; - int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC); - int to; - - /* Any new or requeued lostlocks will change the state */ - rq_state &= ~(RQ_NOW | RQ_LATER); - spin_unlock(&config_list_lock); - - if (first) { - first = false; - complete(&rq_start); - } - - /* Always wait a few seconds to allow the server who - * caused the lock revocation to finish its setup, plus some - * random so everyone doesn't try to reconnect at once. - */ - to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC); - /* rand is centi-seconds */ - to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100); - wait_event_idle_timeout(rq_waitq, - rq_state & (RQ_STOP | RQ_PRECLEANUP), - to); - - /* - * iterate & processing through the list. for each cld, process - * its depending sptlrpc cld firstly (if any) and then itself. - * - * it's guaranteed any item in the list must have - * reference > 0; and if cld_lostlock is set, at - * least one reference is taken by the previous enqueue. - */ - cld_prev = NULL; - - spin_lock(&config_list_lock); - rq_state &= ~RQ_PRECLEANUP; - list_for_each_entry(cld, &config_llog_list, cld_list_chain) { - if (!cld->cld_lostlock || cld->cld_stopping) - continue; - - /* - * hold reference to avoid being freed during - * subsequent processing. - */ - config_log_get(cld); - cld->cld_lostlock = 0; - spin_unlock(&config_list_lock); - - if (cld_prev) - config_log_put(cld_prev); - cld_prev = cld; - - if (likely(!(rq_state & RQ_STOP))) { - do_requeue(cld); - spin_lock(&config_list_lock); - } else { - spin_lock(&config_list_lock); - break; - } - } - spin_unlock(&config_list_lock); - if (cld_prev) - config_log_put(cld_prev); - - /* Wait a bit to see if anyone else needs a requeue */ - wait_event_idle(rq_waitq, rq_state & (RQ_NOW | RQ_STOP)); - spin_lock(&config_list_lock); - } - - /* spinlock and while guarantee RQ_NOW and RQ_LATER are not set */ - rq_state &= ~RQ_RUNNING; - spin_unlock(&config_list_lock); - - complete(&rq_exit); - - CDEBUG(D_MGC, "Ending requeue thread\n"); - return 0; -} - -/* Add a cld to the list to requeue. Start the requeue thread if needed. - * We are responsible for dropping the config log reference from here on out. - */ -static void mgc_requeue_add(struct config_llog_data *cld) -{ - bool wakeup = false; - - CDEBUG(D_INFO, "log %s: requeue (r=%d sp=%d st=%x)\n", - cld->cld_logname, atomic_read(&cld->cld_refcount), - cld->cld_stopping, rq_state); - LASSERT(atomic_read(&cld->cld_refcount) > 0); - - mutex_lock(&cld->cld_lock); - spin_lock(&config_list_lock); - if (!(rq_state & RQ_STOP) && !cld->cld_stopping && !cld->cld_lostlock) { - cld->cld_lostlock = 1; - rq_state |= RQ_NOW; - wakeup = true; - } - spin_unlock(&config_list_lock); - mutex_unlock(&cld->cld_lock); - if (wakeup) - wake_up(&rq_waitq); -} - -static int mgc_llog_init(const struct lu_env *env, struct obd_device *obd) -{ - struct llog_ctxt *ctxt; - int rc; - - /* setup only remote ctxt, the local disk context is switched per each - * filesystem during mgc_fs_setup() - */ - rc = llog_setup(env, obd, &obd->obd_olg, LLOG_CONFIG_REPL_CTXT, obd, - &llog_client_ops); - if (rc) - return rc; - - ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); - LASSERT(ctxt); - - llog_initiator_connect(ctxt); - llog_ctxt_put(ctxt); - - return 0; -} - -static int mgc_llog_fini(const struct lu_env *env, struct obd_device *obd) -{ - struct llog_ctxt *ctxt; - - ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); - if (ctxt) - llog_cleanup(env, ctxt); - - return 0; -} - -static atomic_t mgc_count = ATOMIC_INIT(0); -static int mgc_precleanup(struct obd_device *obd) -{ - int rc = 0; - int temp; - - if (atomic_dec_and_test(&mgc_count)) { - LASSERT(rq_state & RQ_RUNNING); - /* stop requeue thread */ - temp = RQ_STOP; - } else { - /* wakeup requeue thread to clean our cld */ - temp = RQ_NOW | RQ_PRECLEANUP; - } - - spin_lock(&config_list_lock); - rq_state |= temp; - spin_unlock(&config_list_lock); - wake_up(&rq_waitq); - - if (temp & RQ_STOP) - wait_for_completion(&rq_exit); - obd_cleanup_client_import(obd); - - rc = mgc_llog_fini(NULL, obd); - if (rc) - CERROR("failed to cleanup llogging subsystems\n"); - - return rc; -} - -static int mgc_cleanup(struct obd_device *obd) -{ - /* COMPAT_146 - old config logs may have added profiles we don't - * know about - */ - if (obd->obd_type->typ_refcnt <= 1) - /* Only for the last mgc */ - class_del_profiles(); - - lprocfs_obd_cleanup(obd); - ptlrpcd_decref(); - - return client_obd_cleanup(obd); -} - -static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct lprocfs_static_vars lvars = { NULL }; - struct task_struct *task; - int rc; - - rc = ptlrpcd_addref(); - if (rc < 0) - goto err_noref; - - rc = client_obd_setup(obd, lcfg); - if (rc) - goto err_decref; - - rc = mgc_llog_init(NULL, obd); - if (rc) { - CERROR("failed to setup llogging subsystems\n"); - goto err_cleanup; - } - - lprocfs_mgc_init_vars(&lvars); - lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars); - sptlrpc_lprocfs_cliobd_attach(obd); - - if (atomic_inc_return(&mgc_count) == 1) { - rq_state = 0; - init_waitqueue_head(&rq_waitq); - - /* start requeue thread */ - task = kthread_run(mgc_requeue_thread, NULL, "ll_cfg_requeue"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("%s: cannot start requeue thread: rc = %d; no more log updates\n", - obd->obd_name, rc); - goto err_cleanup; - } - /* rc is the task_struct pointer of mgc_requeue_thread. */ - rc = 0; - wait_for_completion(&rq_start); - } - - return rc; - -err_cleanup: - client_obd_cleanup(obd); -err_decref: - ptlrpcd_decref(); -err_noref: - return rc; -} - -/* based on ll_mdc_blocking_ast */ -static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) -{ - struct lustre_handle lockh; - struct config_llog_data *cld = data; - int rc = 0; - - switch (flag) { - case LDLM_CB_BLOCKING: - /* mgs wants the lock, give it up... */ - LDLM_DEBUG(lock, "MGC blocking CB"); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); - break; - case LDLM_CB_CANCELING: - /* We've given up the lock, prepare ourselves to update. */ - LDLM_DEBUG(lock, "MGC cancel CB"); - - CDEBUG(D_MGC, "Lock res " DLDLMRES " (%.8s)\n", - PLDLMRES(lock->l_resource), - (char *)&lock->l_resource->lr_name.name[0]); - - if (!cld) { - CDEBUG(D_INFO, "missing data, won't requeue\n"); - break; - } - - /* held at mgc_process_log(). */ - LASSERT(atomic_read(&cld->cld_refcount) > 0); - - lock->l_ast_data = NULL; - /* Are we done with this log? */ - if (cld->cld_stopping) { - CDEBUG(D_MGC, "log %s: stopping, won't requeue\n", - cld->cld_logname); - config_log_put(cld); - break; - } - /* Make sure not to re-enqueue when the mgc is stopping - * (we get called from client_disconnect_export) - */ - if (!lock->l_conn_export || - !lock->l_conn_export->exp_obd->u.cli.cl_conn_count) { - CDEBUG(D_MGC, "log %.8s: disconnecting, won't requeue\n", - cld->cld_logname); - config_log_put(cld); - break; - } - - /* Re-enqueue now */ - mgc_requeue_add(cld); - config_log_put(cld); - break; - default: - LBUG(); - } - - return rc; -} - -/* Not sure where this should go... */ -/* This is the timeout value for MGS_CONNECT request plus a ping interval, such - * that we can have a chance to try the secondary MGS if any. - */ -#define MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \ - + PING_INTERVAL) -#define MGC_TARGET_REG_LIMIT 10 -#define MGC_SEND_PARAM_LIMIT 10 - -/* Send parameter to MGS*/ -static int mgc_set_mgs_param(struct obd_export *exp, - struct mgs_send_param *msp) -{ - struct ptlrpc_request *req; - struct mgs_send_param *req_msp, *rep_msp; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MGS_SET_INFO, LUSTRE_MGS_VERSION, - MGS_SET_INFO); - if (!req) - return -ENOMEM; - - req_msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); - if (!req_msp) { - ptlrpc_req_finished(req); - return -ENOMEM; - } - - memcpy(req_msp, msp, sizeof(*req_msp)); - ptlrpc_request_set_replen(req); - - /* Limit how long we will wait for the enqueue to complete */ - req->rq_delay_limit = MGC_SEND_PARAM_LIMIT; - rc = ptlrpc_queue_wait(req); - if (!rc) { - rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); - memcpy(msp, rep_msp, sizeof(*rep_msp)); - } - - ptlrpc_req_finished(req); - - return rc; -} - -/* Take a config lock so we can get cancel notifications */ -static int mgc_enqueue(struct obd_export *exp, __u32 type, - union ldlm_policy_data *policy, __u32 mode, - __u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb, - void *data, __u32 lvb_len, void *lvb_swabber, - struct lustre_handle *lockh) -{ - struct config_llog_data *cld = data; - struct ldlm_enqueue_info einfo = { - .ei_type = type, - .ei_mode = mode, - .ei_cb_bl = mgc_blocking_ast, - .ei_cb_cp = ldlm_completion_ast, - }; - struct ptlrpc_request *req; - int short_limit = cld_is_sptlrpc(cld); - int rc; - - CDEBUG(D_MGC, "Enqueue for %s (res %#llx)\n", cld->cld_logname, - cld->cld_resid.name[0]); - - /* We need a callback for every lockholder, so don't try to - * ldlm_lock_match (see rev 1.1.2.11.2.47) - */ - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_LDLM_ENQUEUE, LUSTRE_DLM_VERSION, - LDLM_ENQUEUE); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, 0); - ptlrpc_request_set_replen(req); - - /* Limit how long we will wait for the enqueue to complete */ - req->rq_delay_limit = short_limit ? 5 : MGC_ENQUEUE_LIMIT; - rc = ldlm_cli_enqueue(exp, &req, &einfo, &cld->cld_resid, NULL, flags, - NULL, 0, LVB_T_NONE, lockh, 0); - /* A failed enqueue should still call the mgc_blocking_ast, - * where it will be requeued if needed ("grant failed"). - */ - ptlrpc_req_finished(req); - return rc; -} - -static void mgc_notify_active(struct obd_device *unused) -{ - /* wakeup mgc_requeue_thread to requeue mgc lock */ - spin_lock(&config_list_lock); - rq_state |= RQ_NOW; - spin_unlock(&config_list_lock); - wake_up(&rq_waitq); - - /* TODO: Help the MGS rebuild nidtbl. -jay */ -} - -/* Send target_reg message to MGS */ -static int mgc_target_register(struct obd_export *exp, - struct mgs_target_info *mti) -{ - struct ptlrpc_request *req; - struct mgs_target_info *req_mti, *rep_mti; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_MGS_TARGET_REG, LUSTRE_MGS_VERSION, - MGS_TARGET_REG); - if (!req) - return -ENOMEM; - - req_mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); - if (!req_mti) { - ptlrpc_req_finished(req); - return -ENOMEM; - } - - memcpy(req_mti, mti, sizeof(*req_mti)); - ptlrpc_request_set_replen(req); - CDEBUG(D_MGC, "register %s\n", mti->mti_svname); - /* Limit how long we will wait for the enqueue to complete */ - req->rq_delay_limit = MGC_TARGET_REG_LIMIT; - - rc = ptlrpc_queue_wait(req); - if (!rc) { - rep_mti = req_capsule_server_get(&req->rq_pill, - &RMF_MGS_TARGET_INFO); - memcpy(mti, rep_mti, sizeof(*rep_mti)); - CDEBUG(D_MGC, "register %s got index = %d\n", - mti->mti_svname, mti->mti_stripe_index); - } - ptlrpc_req_finished(req); - - return rc; -} - -static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, u32 vallen, - void *val, struct ptlrpc_request_set *set) -{ - int rc = -EINVAL; - - /* Turn off initial_recov after we try all backup servers once */ - if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { - struct obd_import *imp = class_exp2cliimp(exp); - int value; - - if (vallen != sizeof(int)) - return -EINVAL; - value = *(int *)val; - CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", - imp->imp_obd->obd_name, value, - imp->imp_deactive, imp->imp_invalid, - imp->imp_replayable, imp->imp_obd->obd_replayable, - ptlrpc_import_state_name(imp->imp_state)); - /* Resurrect if we previously died */ - if ((imp->imp_state != LUSTRE_IMP_FULL && - imp->imp_state != LUSTRE_IMP_NEW) || value > 1) - ptlrpc_reconnect_import(imp); - return 0; - } - if (KEY_IS(KEY_SET_INFO)) { - struct mgs_send_param *msp; - - msp = val; - rc = mgc_set_mgs_param(exp, msp); - return rc; - } - if (KEY_IS(KEY_MGSSEC)) { - struct client_obd *cli = &exp->exp_obd->u.cli; - struct sptlrpc_flavor flvr; - - /* - * empty string means using current flavor, if which haven't - * been set yet, set it as null. - * - * if flavor has been set previously, check the asking flavor - * must match the existing one. - */ - if (vallen == 0) { - if (cli->cl_flvr_mgc.sf_rpc != SPTLRPC_FLVR_INVALID) - return 0; - val = "null"; - vallen = 4; - } - - rc = sptlrpc_parse_flavor(val, &flvr); - if (rc) { - CERROR("invalid sptlrpc flavor %s to MGS\n", - (char *)val); - return rc; - } - - /* - * caller already hold a mutex - */ - if (cli->cl_flvr_mgc.sf_rpc == SPTLRPC_FLVR_INVALID) { - cli->cl_flvr_mgc = flvr; - } else if (memcmp(&cli->cl_flvr_mgc, &flvr, - sizeof(flvr)) != 0) { - char str[20]; - - sptlrpc_flavor2name(&cli->cl_flvr_mgc, - str, sizeof(str)); - LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but currently %s is in use\n", - (char *)val, str); - rc = -EPERM; - } - return rc; - } - - return rc; -} - -static int mgc_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val) -{ - int rc = -EINVAL; - - if (KEY_IS(KEY_CONN_DATA)) { - struct obd_import *imp = class_exp2cliimp(exp); - struct obd_connect_data *data = val; - - if (*vallen == sizeof(*data)) { - *data = imp->imp_connect_data; - rc = 0; - } - } - - return rc; -} - -static int mgc_import_event(struct obd_device *obd, - struct obd_import *imp, - enum obd_import_event event) -{ - LASSERT(imp->imp_obd == obd); - CDEBUG(D_MGC, "import event %#x\n", event); - - switch (event) { - case IMP_EVENT_DISCON: - /* MGC imports should not wait for recovery */ - if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV)) - ptlrpc_pinger_ir_down(); - break; - case IMP_EVENT_INACTIVE: - break; - case IMP_EVENT_INVALIDATE: { - struct ldlm_namespace *ns = obd->obd_namespace; - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); - break; - } - case IMP_EVENT_ACTIVE: - CDEBUG(D_INFO, "%s: Reactivating import\n", obd->obd_name); - /* Clearing obd_no_recov allows us to continue pinging */ - obd->obd_no_recov = 0; - mgc_notify_active(obd); - if (OCD_HAS_FLAG(&imp->imp_connect_data, IMP_RECOV)) - ptlrpc_pinger_ir_up(); - break; - case IMP_EVENT_OCD: - break; - case IMP_EVENT_DEACTIVATE: - case IMP_EVENT_ACTIVATE: - break; - default: - CERROR("Unknown import event %#x\n", event); - LBUG(); - } - return 0; -} - -enum { - CONFIG_READ_NRPAGES_INIT = 1 << (20 - PAGE_SHIFT), - CONFIG_READ_NRPAGES = 4 -}; - -static int mgc_apply_recover_logs(struct obd_device *mgc, - struct config_llog_data *cld, - __u64 max_version, - void *data, int datalen, bool mne_swab) -{ - struct config_llog_instance *cfg = &cld->cld_cfg; - struct mgs_nidtbl_entry *entry; - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs bufs; - u64 prev_version = 0; - char *inst; - char *buf; - int bufsz; - int pos; - int rc = 0; - int off = 0; - - LASSERT(cfg->cfg_instance); - LASSERT(cfg->cfg_sb == cfg->cfg_instance); - - inst = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!inst) - return -ENOMEM; - - pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance); - if (pos >= PAGE_SIZE) { - kfree(inst); - return -E2BIG; - } - - ++pos; - buf = inst + pos; - bufsz = PAGE_SIZE - pos; - - while (datalen > 0) { - int entry_len = sizeof(*entry); - int is_ost, i; - struct obd_device *obd; - char *obdname; - char *cname; - char *params; - char *uuid; - size_t len; - - rc = -EINVAL; - if (datalen < sizeof(*entry)) - break; - - entry = (typeof(entry))(data + off); - - /* sanity check */ - if (entry->mne_nid_type != 0) /* only support type 0 for ipv4 */ - break; - if (entry->mne_nid_count == 0) /* at least one nid entry */ - break; - if (entry->mne_nid_size != sizeof(lnet_nid_t)) - break; - - entry_len += entry->mne_nid_count * entry->mne_nid_size; - if (datalen < entry_len) /* must have entry_len at least */ - break; - - /* Keep this swab for normal mixed endian handling. LU-1644 */ - if (mne_swab) - lustre_swab_mgs_nidtbl_entry(entry); - if (entry->mne_length > PAGE_SIZE) { - CERROR("MNE too large (%u)\n", entry->mne_length); - break; - } - - if (entry->mne_length < entry_len) - break; - - off += entry->mne_length; - datalen -= entry->mne_length; - if (datalen < 0) - break; - - if (entry->mne_version > max_version) { - CERROR("entry index(%lld) is over max_index(%lld)\n", - entry->mne_version, max_version); - break; - } - - if (prev_version >= entry->mne_version) { - CERROR("index unsorted, prev %lld, now %lld\n", - prev_version, entry->mne_version); - break; - } - prev_version = entry->mne_version; - - /* - * Write a string with format "nid::instance" to - * lustre//--/import. - */ - - is_ost = entry->mne_type == LDD_F_SV_TYPE_OST; - memset(buf, 0, bufsz); - obdname = buf; - pos = 0; - - /* lustre-OST0001-osc- */ - strcpy(obdname, cld->cld_logname); - cname = strrchr(obdname, '-'); - if (!cname) { - CERROR("mgc %s: invalid logname %s\n", - mgc->obd_name, obdname); - break; - } - - pos = cname - obdname; - obdname[pos] = 0; - pos += sprintf(obdname + pos, "-%s%04x", - is_ost ? "OST" : "MDT", entry->mne_index); - - cname = is_ost ? "osc" : "mdc"; - pos += sprintf(obdname + pos, "-%s-%s", cname, inst); - lustre_cfg_bufs_reset(&bufs, obdname); - - /* find the obd by obdname */ - obd = class_name2obd(obdname); - if (!obd) { - CDEBUG(D_INFO, "mgc %s: cannot find obdname %s\n", - mgc->obd_name, obdname); - rc = 0; - /* this is a safe race, when the ost is starting up...*/ - continue; - } - - /* osc.import = "connection=::" */ - ++pos; - params = buf + pos; - pos += sprintf(params, "%s.import=%s", cname, "connection="); - uuid = buf + pos; - - down_read(&obd->u.cli.cl_sem); - if (!obd->u.cli.cl_import) { - /* client does not connect to the OST yet */ - up_read(&obd->u.cli.cl_sem); - rc = 0; - continue; - } - - /* iterate all nids to find one */ - /* find uuid by nid */ - rc = -ENOENT; - for (i = 0; i < entry->mne_nid_count; i++) { - rc = client_import_find_conn(obd->u.cli.cl_import, - entry->u.nids[0], - (struct obd_uuid *)uuid); - if (!rc) - break; - } - - up_read(&obd->u.cli.cl_sem); - if (rc < 0) { - CERROR("mgc: cannot find uuid by nid %s\n", - libcfs_nid2str(entry->u.nids[0])); - break; - } - - CDEBUG(D_INFO, "Find uuid %s by nid %s\n", - uuid, libcfs_nid2str(entry->u.nids[0])); - - pos += strlen(uuid); - pos += sprintf(buf + pos, "::%u", entry->mne_instance); - LASSERT(pos < bufsz); - - lustre_cfg_bufs_set_string(&bufs, 1, params); - - rc = -ENOMEM; - len = lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen); - lcfg = kzalloc(len, GFP_NOFS); - if (!lcfg) { - rc = -ENOMEM; - break; - } - lustre_cfg_init(lcfg, LCFG_PARAM, &bufs); - - CDEBUG(D_INFO, "ir apply logs %lld/%lld for %s -> %s\n", - prev_version, max_version, obdname, params); - - rc = class_process_config(lcfg); - kfree(lcfg); - if (rc) - CDEBUG(D_INFO, "process config for %s error %d\n", - obdname, rc); - - /* continue, even one with error */ - } - - kfree(inst); - return rc; -} - -/** - * This function is called if this client was notified for target restarting - * by the MGS. A CONFIG_READ RPC is going to send to fetch recovery logs. - */ -static int mgc_process_recover_log(struct obd_device *obd, - struct config_llog_data *cld) -{ - struct ptlrpc_request *req = NULL; - struct config_llog_instance *cfg = &cld->cld_cfg; - struct mgs_config_body *body; - struct mgs_config_res *res; - struct ptlrpc_bulk_desc *desc; - struct page **pages; - int nrpages; - bool eof = true; - bool mne_swab; - int i; - int ealen; - int rc; - - /* allocate buffer for bulk transfer. - * if this is the first time for this mgs to read logs, - * CONFIG_READ_NRPAGES_INIT will be used since it will read all logs - * once; otherwise, it only reads increment of logs, this should be - * small and CONFIG_READ_NRPAGES will be used. - */ - nrpages = CONFIG_READ_NRPAGES; - if (cfg->cfg_last_idx == 0) /* the first time */ - nrpages = CONFIG_READ_NRPAGES_INIT; - - pages = kcalloc(nrpages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < nrpages; i++) { - pages[i] = alloc_page(GFP_KERNEL); - if (!pages[i]) { - rc = -ENOMEM; - goto out; - } - } - -again: - LASSERT(cld_is_recover(cld)); - LASSERT(mutex_is_locked(&cld->cld_lock)); - req = ptlrpc_request_alloc(class_exp2cliimp(cld->cld_mgcexp), - &RQF_MGS_CONFIG_READ); - if (!req) { - rc = -ENOMEM; - goto out; - } - - rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_CONFIG_READ); - if (rc) - goto out; - - /* pack request */ - body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY); - LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname)); - if (strlcpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name)) - >= sizeof(body->mcb_name)) { - rc = -E2BIG; - goto out; - } - body->mcb_offset = cfg->cfg_last_idx + 1; - body->mcb_type = cld->cld_type; - body->mcb_bits = PAGE_SHIFT; - body->mcb_units = nrpages; - - /* allocate bulk transfer descriptor */ - desc = ptlrpc_prep_bulk_imp(req, nrpages, 1, - PTLRPC_BULK_PUT_SINK | PTLRPC_BULK_BUF_KIOV, - MGS_BULK_PORTAL, - &ptlrpc_bulk_kiov_pin_ops); - if (!desc) { - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < nrpages; i++) - desc->bd_frag_ops->add_kiov_frag(desc, pages[i], 0, PAGE_SIZE); - - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES); - if (res->mcr_size < res->mcr_offset) { - rc = -EINVAL; - goto out; - } - - /* always update the index even though it might have errors with - * handling the recover logs - */ - cfg->cfg_last_idx = res->mcr_offset; - eof = res->mcr_offset == res->mcr_size; - - CDEBUG(D_INFO, "Latest version %lld, more %d.\n", - res->mcr_offset, eof == false); - - ealen = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, 0); - if (ealen < 0) { - rc = ealen; - goto out; - } - - if (ealen > nrpages << PAGE_SHIFT) { - rc = -EINVAL; - goto out; - } - - if (ealen == 0) { /* no logs transferred */ - if (!eof) - rc = -EINVAL; - goto out; - } - - mne_swab = !!ptlrpc_rep_need_swab(req); -#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE - /* This import flag means the server did an extra swab of IR MNE - * records (fixed in LU-1252), reverse it here if needed. LU-1644 - */ - if (unlikely(req->rq_import->imp_need_mne_swab)) - mne_swab = !mne_swab; -#endif - - for (i = 0; i < nrpages && ealen > 0; i++) { - int rc2; - void *ptr; - - ptr = kmap(pages[i]); - rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr, - min_t(int, ealen, PAGE_SIZE), - mne_swab); - kunmap(pages[i]); - if (rc2 < 0) { - CWARN("Process recover log %s error %d\n", - cld->cld_logname, rc2); - break; - } - - ealen -= PAGE_SIZE; - } - -out: - if (req) - ptlrpc_req_finished(req); - - if (rc == 0 && !eof) - goto again; - - if (pages) { - for (i = 0; i < nrpages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - return rc; -} - -/* local_only means it cannot get remote llogs */ -static int mgc_process_cfg_log(struct obd_device *mgc, - struct config_llog_data *cld, int local_only) -{ - struct llog_ctxt *ctxt; - struct lustre_sb_info *lsi = NULL; - int rc = 0; - bool sptlrpc_started = false; - struct lu_env *env; - - LASSERT(cld); - LASSERT(mutex_is_locked(&cld->cld_lock)); - - /* - * local copy of sptlrpc log is controlled elsewhere, don't try to - * read it up here. - */ - if (cld_is_sptlrpc(cld) && local_only) - return 0; - - if (cld->cld_cfg.cfg_sb) - lsi = s2lsi(cld->cld_cfg.cfg_sb); - - env = kzalloc(sizeof(*env), GFP_KERNEL); - if (!env) - return -ENOMEM; - - rc = lu_env_init(env, LCT_MG_THREAD); - if (rc) - goto out_free; - - ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT); - LASSERT(ctxt); - - if (local_only) /* no local log at client side */ { - rc = -EIO; - goto out_pop; - } - - if (cld_is_sptlrpc(cld)) { - sptlrpc_conf_log_update_begin(cld->cld_logname); - sptlrpc_started = true; - } - - /* logname and instance info should be the same, so use our - * copy of the instance for the update. The cfg_last_idx will - * be updated here. - */ - rc = class_config_parse_llog(env, ctxt, cld->cld_logname, - &cld->cld_cfg); - -out_pop: - __llog_ctxt_put(env, ctxt); - - /* - * update settings on existing OBDs. doing it inside - * of llog_process_lock so no device is attaching/detaching - * in parallel. - * the logname must be -sptlrpc - */ - if (sptlrpc_started) { - LASSERT(cld_is_sptlrpc(cld)); - sptlrpc_conf_log_update_end(cld->cld_logname); - class_notify_sptlrpc_conf(cld->cld_logname, - strlen(cld->cld_logname) - - strlen("-sptlrpc")); - } - - lu_env_fini(env); -out_free: - kfree(env); - return rc; -} - -static bool mgc_import_in_recovery(struct obd_import *imp) -{ - bool in_recovery = true; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_FULL || - imp->imp_state == LUSTRE_IMP_CLOSED) - in_recovery = false; - spin_unlock(&imp->imp_lock); - - return in_recovery; -} - -/** - * Get a configuration log from the MGS and process it. - * - * This function is called for both clients and servers to process the - * configuration log from the MGS. The MGC enqueues a DLM lock on the - * log from the MGS, and if the lock gets revoked the MGC will be notified - * by the lock cancellation callback that the config log has changed, - * and will enqueue another MGS lock on it, and then continue processing - * the new additions to the end of the log. - * - * Since the MGC import is not replayable, if the import is being evicted - * (rcl == -ESHUTDOWN, \see ptlrpc_import_delay_req()), retry to process - * the log until recovery is finished or the import is closed. - * - * Make a local copy of the log before parsing it if appropriate (non-MGS - * server) so that the server can start even when the MGS is down. - * - * There shouldn't be multiple processes running process_log at once -- - * sounds like badness. It actually might be fine, as long as they're not - * trying to update from the same log simultaneously, in which case we - * should use a per-log semaphore instead of cld_lock. - * - * \param[in] mgc MGC device by which to fetch the configuration log - * \param[in] cld log processing state (stored in lock callback data) - * - * \retval 0 on success - * \retval negative errno on failure - */ -int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld) -{ - struct lustre_handle lockh = { 0 }; - __u64 flags = LDLM_FL_NO_LRU; - bool retry = false; - int rc = 0, rcl; - - LASSERT(cld); - - /* I don't want multiple processes running process_log at once -- - * sounds like badness. It actually might be fine, as long as - * we're not trying to update from the same log - * simultaneously (in which case we should use a per-log sem.) - */ -restart: - mutex_lock(&cld->cld_lock); - if (cld->cld_stopping) { - mutex_unlock(&cld->cld_lock); - return 0; - } - - OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); - - CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname, - cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); - - /* Get the cfg lock on the llog */ - rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL, - LCK_CR, &flags, NULL, NULL, NULL, - cld, 0, NULL, &lockh); - if (rcl == 0) { - /* Get the cld, it will be released in mgc_blocking_ast. */ - config_log_get(cld); - rc = ldlm_lock_set_data(&lockh, (void *)cld); - LASSERT(rc == 0); - } else { - CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl); - - if (rcl == -ESHUTDOWN && - atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) { - struct obd_import *imp; - - mutex_unlock(&cld->cld_lock); - imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp); - - /* - * Let's force the pinger, and wait the import to be - * connected, note: since mgc import is non-replayable, - * and even the import state is disconnected, it does - * not mean the "recovery" is stopped, so we will keep - * waitting until timeout or the import state is - * FULL or closed - */ - ptlrpc_pinger_force(imp); - - wait_event_idle_timeout(imp->imp_recovery_waitq, - !mgc_import_in_recovery(imp), - obd_timeout * HZ); - - if (imp->imp_state == LUSTRE_IMP_FULL) { - retry = true; - goto restart; - } else { - mutex_lock(&cld->cld_lock); - spin_lock(&config_list_lock); - cld->cld_lostlock = 1; - spin_unlock(&config_list_lock); - } - } else { - /* mark cld_lostlock so that it will requeue - * after MGC becomes available. - */ - spin_lock(&config_list_lock); - cld->cld_lostlock = 1; - spin_unlock(&config_list_lock); - } - } - - if (cld_is_recover(cld)) { - rc = 0; /* this is not a fatal error for recover log */ - if (!rcl) { - rc = mgc_process_recover_log(mgc, cld); - if (rc) { - CERROR("%s: recover log %s failed: rc = %d not fatal.\n", - mgc->obd_name, cld->cld_logname, rc); - rc = 0; - spin_lock(&config_list_lock); - cld->cld_lostlock = 1; - spin_unlock(&config_list_lock); - } - } - } else { - rc = mgc_process_cfg_log(mgc, cld, rcl != 0); - } - - CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n", - mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc); - - mutex_unlock(&cld->cld_lock); - - /* Now drop the lock so MGS can revoke it */ - if (!rcl) - ldlm_lock_decref(&lockh, LCK_CR); - - return rc; -} - -/** Called from lustre_process_log. - * LCFG_LOG_START gets the config log from the MGS, processes it to start - * any services, and adds it to the list logs to watch (follow). - */ -static int mgc_process_config(struct obd_device *obd, u32 len, void *buf) -{ - struct lustre_cfg *lcfg = buf; - struct config_llog_instance *cfg = NULL; - char *logname; - int rc = 0; - - switch (lcfg->lcfg_command) { - case LCFG_LOV_ADD_OBD: { - /* Overloading this cfg command: register a new target */ - struct mgs_target_info *mti; - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) != - sizeof(struct mgs_target_info)) { - rc = -EINVAL; - goto out; - } - - mti = (struct mgs_target_info *)lustre_cfg_buf(lcfg, 1); - CDEBUG(D_MGC, "add_target %s %#x\n", - mti->mti_svname, mti->mti_flags); - rc = mgc_target_register(obd->u.cli.cl_mgc_mgsexp, mti); - break; - } - case LCFG_LOV_DEL_OBD: - /* Unregister has no meaning at the moment. */ - CERROR("lov_del_obd unimplemented\n"); - rc = -ENOSYS; - break; - case LCFG_SPTLRPC_CONF: { - rc = sptlrpc_process_config(lcfg); - break; - } - case LCFG_LOG_START: { - struct config_llog_data *cld; - struct super_block *sb; - - logname = lustre_cfg_string(lcfg, 1); - cfg = (struct config_llog_instance *)lustre_cfg_buf(lcfg, 2); - sb = *(struct super_block **)lustre_cfg_buf(lcfg, 3); - - CDEBUG(D_MGC, "parse_log %s from %d\n", logname, - cfg->cfg_last_idx); - - /* We're only called through here on the initial mount */ - cld = config_log_add(obd, logname, cfg, sb); - if (IS_ERR(cld)) { - rc = PTR_ERR(cld); - break; - } - - /* COMPAT_146 */ - /* FIXME only set this for old logs! Right now this forces - * us to always skip the "inside markers" check - */ - cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146; - - rc = mgc_process_log(obd, cld); - if (rc == 0 && cld->cld_recover) { - if (OCD_HAS_FLAG(&obd->u.cli.cl_import-> - imp_connect_data, IMP_RECOV)) { - rc = mgc_process_log(obd, cld->cld_recover); - } else { - struct config_llog_data *cir; - - mutex_lock(&cld->cld_lock); - cir = cld->cld_recover; - cld->cld_recover = NULL; - mutex_unlock(&cld->cld_lock); - config_log_put(cir); - } - - if (rc) - CERROR("Cannot process recover llog %d\n", rc); - } - - if (rc == 0 && cld->cld_params) { - rc = mgc_process_log(obd, cld->cld_params); - if (rc == -ENOENT) { - CDEBUG(D_MGC, - "There is no params config file yet\n"); - rc = 0; - } - /* params log is optional */ - if (rc) - CERROR( - "%s: can't process params llog: rc = %d\n", - obd->obd_name, rc); - } - - break; - } - case LCFG_LOG_END: { - logname = lustre_cfg_string(lcfg, 1); - - if (lcfg->lcfg_bufcount >= 2) - cfg = (struct config_llog_instance *)lustre_cfg_buf( - lcfg, 2); - rc = config_log_end(logname, cfg); - break; - } - default: { - CERROR("Unknown command: %d\n", lcfg->lcfg_command); - rc = -EINVAL; - goto out; - } - } -out: - return rc; -} - -static struct obd_ops mgc_obd_ops = { - .owner = THIS_MODULE, - .setup = mgc_setup, - .precleanup = mgc_precleanup, - .cleanup = mgc_cleanup, - .add_conn = client_import_add_conn, - .del_conn = client_import_del_conn, - .connect = client_connect_import, - .disconnect = client_disconnect_export, - .set_info_async = mgc_set_info_async, - .get_info = mgc_get_info, - .import_event = mgc_import_event, - .process_config = mgc_process_config, -}; - -static int __init mgc_init(void) -{ - int rc; - - rc = libcfs_setup(); - if (rc) - return rc; - - return class_register_type(&mgc_obd_ops, NULL, - LUSTRE_MGC_NAME, NULL); -} - -static void /*__exit*/ mgc_exit(void) -{ - class_unregister_type(LUSTRE_MGC_NAME); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Management Client"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(mgc_init); -module_exit(mgc_exit); diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile deleted file mode 100644 index e3fa9acff4c4..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += obdclass.o - -obdclass-y := linux/linux-module.o linux/linux-sysctl.o \ - llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \ - genops.o uuid.o lprocfs_status.o lprocfs_counters.o \ - lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \ - obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \ - cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o diff --git a/drivers/staging/lustre/lustre/obdclass/cl_internal.h b/drivers/staging/lustre/lustre/obdclass/cl_internal.h deleted file mode 100644 index a0db830ca841..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/cl_internal.h +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Internal cl interfaces. - * - * Author: Nikita Danilov - */ -#ifndef _CL_INTERNAL_H -#define _CL_INTERNAL_H - -#define CLT_PVEC_SIZE (14) - -/** - * Possible levels of the nesting. Currently this is 2: there are "top" - * entities (files, extent locks), and "sub" entities (stripes and stripe - * locks). This is used only for debugging counters right now. - */ -enum clt_nesting_level { - CNL_TOP, - CNL_SUB, - CNL_NR -}; - -/** - * Thread local state internal for generic cl-code. - */ -struct cl_thread_info { - /* - * Common fields. - */ - struct cl_io clt_io; - struct cl_2queue clt_queue; - - /* - * Fields used by cl_lock.c - */ - struct cl_lock_descr clt_descr; - struct cl_page_list clt_list; - /** @} debugging */ - - /* - * Fields used by cl_page.c - */ - struct cl_page *clt_pvec[CLT_PVEC_SIZE]; - - /* - * Fields used by cl_io.c - */ - /** - * Pointer to the topmost ongoing IO in this thread. - */ - struct cl_io *clt_current_io; - /** - * Used for submitting a sync io. - */ - struct cl_sync_io clt_anchor; - /** - * Fields used by cl_lock_discard_pages(). - */ - pgoff_t clt_next_index; - pgoff_t clt_fn_index; /* first non-overlapped index */ -}; - -struct cl_thread_info *cl_env_info(const struct lu_env *env); - -#endif /* _CL_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c deleted file mode 100644 index fcdae6029258..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/cl_io.c +++ /dev/null @@ -1,1151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Client IO. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include -#include -#include "cl_internal.h" - -/***************************************************************************** - * - * cl_io interface. - * - */ - -#define cl_io_for_each(slice, io) \ - list_for_each_entry((slice), &io->ci_layers, cis_linkage) -#define cl_io_for_each_reverse(slice, io) \ - list_for_each_entry_reverse((slice), &io->ci_layers, cis_linkage) - -static inline int cl_io_type_is_valid(enum cl_io_type type) -{ - return CIT_READ <= type && type < CIT_OP_NR; -} - -static inline int cl_io_is_loopable(const struct cl_io *io) -{ - return cl_io_type_is_valid(io->ci_type) && io->ci_type != CIT_MISC; -} - -/** - * Returns true iff there is an IO ongoing in the given environment. - */ -int cl_io_is_going(const struct lu_env *env) -{ - return cl_env_info(env)->clt_current_io != NULL; -} - -/** - * cl_io invariant that holds at all times when exported cl_io_*() functions - * are entered and left. - */ -static int cl_io_invariant(const struct cl_io *io) -{ - struct cl_io *up; - - up = io->ci_parent; - return - /* - * io can own pages only when it is ongoing. Sub-io might - * still be in CIS_LOCKED state when top-io is in - * CIS_IO_GOING. - */ - ergo(io->ci_owned_nr > 0, io->ci_state == CIS_IO_GOING || - (io->ci_state == CIS_LOCKED && up)); -} - -/** - * Finalize \a io, by calling cl_io_operations::cio_fini() bottom-to-top. - */ -void cl_io_fini(const struct lu_env *env, struct cl_io *io) -{ - struct cl_io_slice *slice; - struct cl_thread_info *info; - - LINVRNT(cl_io_type_is_valid(io->ci_type)); - LINVRNT(cl_io_invariant(io)); - - while (!list_empty(&io->ci_layers)) { - slice = container_of(io->ci_layers.prev, struct cl_io_slice, - cis_linkage); - list_del_init(&slice->cis_linkage); - if (slice->cis_iop->op[io->ci_type].cio_fini) - slice->cis_iop->op[io->ci_type].cio_fini(env, slice); - /* - * Invalidate slice to catch use after free. This assumes that - * slices are allocated within session and can be touched - * after ->cio_fini() returns. - */ - slice->cis_io = NULL; - } - io->ci_state = CIS_FINI; - info = cl_env_info(env); - if (info->clt_current_io == io) - info->clt_current_io = NULL; - - /* sanity check for layout change */ - switch (io->ci_type) { - case CIT_READ: - case CIT_WRITE: - case CIT_DATA_VERSION: - break; - case CIT_FAULT: - break; - case CIT_FSYNC: - LASSERT(!io->ci_need_restart); - break; - case CIT_SETATTR: - case CIT_MISC: - /* Check ignore layout change conf */ - LASSERT(ergo(io->ci_ignore_layout || !io->ci_verify_layout, - !io->ci_need_restart)); - break; - default: - LBUG(); - } -} -EXPORT_SYMBOL(cl_io_fini); - -static int cl_io_init0(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj) -{ - struct cl_object *scan; - int result; - - LINVRNT(io->ci_state == CIS_ZERO || io->ci_state == CIS_FINI); - LINVRNT(cl_io_type_is_valid(iot)); - LINVRNT(cl_io_invariant(io)); - - io->ci_type = iot; - INIT_LIST_HEAD(&io->ci_lockset.cls_todo); - INIT_LIST_HEAD(&io->ci_lockset.cls_done); - INIT_LIST_HEAD(&io->ci_layers); - - result = 0; - cl_object_for_each(scan, obj) { - if (scan->co_ops->coo_io_init) { - result = scan->co_ops->coo_io_init(env, scan, io); - if (result != 0) - break; - } - } - if (result == 0) - io->ci_state = CIS_INIT; - return result; -} - -/** - * Initialize sub-io, by calling cl_io_operations::cio_init() top-to-bottom. - * - * \pre obj != cl_object_top(obj) - */ -int cl_io_sub_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj) -{ - struct cl_thread_info *info = cl_env_info(env); - - LASSERT(obj != cl_object_top(obj)); - if (!info->clt_current_io) - info->clt_current_io = io; - return cl_io_init0(env, io, iot, obj); -} -EXPORT_SYMBOL(cl_io_sub_init); - -/** - * Initialize \a io, by calling cl_io_operations::cio_init() top-to-bottom. - * - * Caller has to call cl_io_fini() after a call to cl_io_init(), no matter - * what the latter returned. - * - * \pre obj == cl_object_top(obj) - * \pre cl_io_type_is_valid(iot) - * \post cl_io_type_is_valid(io->ci_type) && io->ci_type == iot - */ -int cl_io_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, struct cl_object *obj) -{ - struct cl_thread_info *info = cl_env_info(env); - - LASSERT(obj == cl_object_top(obj)); - LASSERT(!info->clt_current_io); - - info->clt_current_io = io; - return cl_io_init0(env, io, iot, obj); -} -EXPORT_SYMBOL(cl_io_init); - -/** - * Initialize read or write io. - * - * \pre iot == CIT_READ || iot == CIT_WRITE - */ -int cl_io_rw_init(const struct lu_env *env, struct cl_io *io, - enum cl_io_type iot, loff_t pos, size_t count) -{ - LINVRNT(iot == CIT_READ || iot == CIT_WRITE); - LINVRNT(io->ci_obj); - - LU_OBJECT_HEADER(D_VFSTRACE, env, &io->ci_obj->co_lu, - "io range: %u [%llu, %llu) %u %u\n", - iot, (__u64)pos, (__u64)pos + count, - io->u.ci_rw.crw_nonblock, io->u.ci_wr.wr_append); - io->u.ci_rw.crw_pos = pos; - io->u.ci_rw.crw_count = count; - return cl_io_init(env, io, iot, io->ci_obj); -} -EXPORT_SYMBOL(cl_io_rw_init); - -static int cl_lock_descr_sort(const struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - return lu_fid_cmp(lu_object_fid(&d0->cld_obj->co_lu), - lu_object_fid(&d1->cld_obj->co_lu)); -} - -/* - * Sort locks in lexicographical order of their (fid, start-offset) pairs. - */ -static void cl_io_locks_sort(struct cl_io *io) -{ - int done = 0; - - /* hidden treasure: bubble sort for now. */ - do { - struct cl_io_lock_link *curr; - struct cl_io_lock_link *prev; - struct cl_io_lock_link *temp; - - done = 1; - prev = NULL; - - list_for_each_entry_safe(curr, temp, - &io->ci_lockset.cls_todo, - cill_linkage) { - if (prev) { - switch (cl_lock_descr_sort(&prev->cill_descr, - &curr->cill_descr)) { - case 0: - /* - * IMPOSSIBLE: Identical locks are - * already removed at - * this point. - */ - default: - LBUG(); - case 1: - list_move_tail(&curr->cill_linkage, - &prev->cill_linkage); - done = 0; - continue; /* don't change prev: it's - * still "previous" - */ - case -1: /* already in order */ - break; - } - } - prev = curr; - } - } while (!done); -} - -static void cl_lock_descr_merge(struct cl_lock_descr *d0, - const struct cl_lock_descr *d1) -{ - d0->cld_start = min(d0->cld_start, d1->cld_start); - d0->cld_end = max(d0->cld_end, d1->cld_end); - - if (d1->cld_mode == CLM_WRITE && d0->cld_mode != CLM_WRITE) - d0->cld_mode = CLM_WRITE; - - if (d1->cld_mode == CLM_GROUP && d0->cld_mode != CLM_GROUP) - d0->cld_mode = CLM_GROUP; -} - -static int cl_lockset_merge(const struct cl_lockset *set, - const struct cl_lock_descr *need) -{ - struct cl_io_lock_link *scan; - - list_for_each_entry(scan, &set->cls_todo, cill_linkage) { - if (!cl_object_same(scan->cill_descr.cld_obj, need->cld_obj)) - continue; - - /* Merge locks for the same object because ldlm lock server - * may expand the lock extent, otherwise there is a deadlock - * case if two conflicted locks are queueud for the same object - * and lock server expands one lock to overlap the another. - * The side effect is that it can generate a multi-stripe lock - * that may cause casacading problem - */ - cl_lock_descr_merge(&scan->cill_descr, need); - CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", - scan->cill_descr.cld_mode, scan->cill_descr.cld_start, - scan->cill_descr.cld_end); - return 1; - } - return 0; -} - -static int cl_lockset_lock(const struct lu_env *env, struct cl_io *io, - struct cl_lockset *set) -{ - struct cl_io_lock_link *link; - struct cl_io_lock_link *temp; - int result; - - result = 0; - list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { - result = cl_lock_request(env, io, &link->cill_lock); - if (result < 0) - break; - - list_move(&link->cill_linkage, &set->cls_done); - } - return result; -} - -/** - * Takes locks necessary for the current iteration of io. - * - * Calls cl_io_operations::cio_lock() top-to-bottom to collect locks required - * by layers for the current iteration. Then sort locks (to avoid dead-locks), - * and acquire them. - */ -int cl_io_lock(const struct lu_env *env, struct cl_io *io) -{ - const struct cl_io_slice *scan; - int result = 0; - - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(io->ci_state == CIS_IT_STARTED); - LINVRNT(cl_io_invariant(io)); - - cl_io_for_each(scan, io) { - if (!scan->cis_iop->op[io->ci_type].cio_lock) - continue; - result = scan->cis_iop->op[io->ci_type].cio_lock(env, scan); - if (result != 0) - break; - } - if (result == 0) { - cl_io_locks_sort(io); - result = cl_lockset_lock(env, io, &io->ci_lockset); - } - if (result != 0) - cl_io_unlock(env, io); - else - io->ci_state = CIS_LOCKED; - return result; -} -EXPORT_SYMBOL(cl_io_lock); - -/** - * Release locks takes by io. - */ -void cl_io_unlock(const struct lu_env *env, struct cl_io *io) -{ - struct cl_lockset *set; - struct cl_io_lock_link *link; - struct cl_io_lock_link *temp; - const struct cl_io_slice *scan; - - LASSERT(cl_io_is_loopable(io)); - LASSERT(CIS_IT_STARTED <= io->ci_state && io->ci_state < CIS_UNLOCKED); - LINVRNT(cl_io_invariant(io)); - - set = &io->ci_lockset; - - list_for_each_entry_safe(link, temp, &set->cls_todo, cill_linkage) { - list_del_init(&link->cill_linkage); - if (link->cill_fini) - link->cill_fini(env, link); - } - - list_for_each_entry_safe(link, temp, &set->cls_done, cill_linkage) { - list_del_init(&link->cill_linkage); - cl_lock_release(env, &link->cill_lock); - if (link->cill_fini) - link->cill_fini(env, link); - } - - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->op[io->ci_type].cio_unlock) - scan->cis_iop->op[io->ci_type].cio_unlock(env, scan); - } - io->ci_state = CIS_UNLOCKED; -} -EXPORT_SYMBOL(cl_io_unlock); - -/** - * Prepares next iteration of io. - * - * Calls cl_io_operations::cio_iter_init() top-to-bottom. This exists to give - * layers a chance to modify io parameters, e.g., so that lov can restrict io - * to a single stripe. - */ -int cl_io_iter_init(const struct lu_env *env, struct cl_io *io) -{ - const struct cl_io_slice *scan; - int result; - - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(io->ci_state == CIS_INIT || io->ci_state == CIS_IT_ENDED); - LINVRNT(cl_io_invariant(io)); - - result = 0; - cl_io_for_each(scan, io) { - if (!scan->cis_iop->op[io->ci_type].cio_iter_init) - continue; - result = scan->cis_iop->op[io->ci_type].cio_iter_init(env, - scan); - if (result != 0) - break; - } - if (result == 0) - io->ci_state = CIS_IT_STARTED; - return result; -} -EXPORT_SYMBOL(cl_io_iter_init); - -/** - * Finalizes io iteration. - * - * Calls cl_io_operations::cio_iter_fini() bottom-to-top. - */ -void cl_io_iter_fini(const struct lu_env *env, struct cl_io *io) -{ - const struct cl_io_slice *scan; - - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(io->ci_state == CIS_UNLOCKED); - LINVRNT(cl_io_invariant(io)); - - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->op[io->ci_type].cio_iter_fini) - scan->cis_iop->op[io->ci_type].cio_iter_fini(env, scan); - } - io->ci_state = CIS_IT_ENDED; -} -EXPORT_SYMBOL(cl_io_iter_fini); - -/** - * Records that read or write io progressed \a nob bytes forward. - */ -static void cl_io_rw_advance(const struct lu_env *env, struct cl_io *io, - size_t nob) -{ - const struct cl_io_slice *scan; - - LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE || - nob == 0); - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(cl_io_invariant(io)); - - io->u.ci_rw.crw_pos += nob; - io->u.ci_rw.crw_count -= nob; - - /* layers have to be notified. */ - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->op[io->ci_type].cio_advance) - scan->cis_iop->op[io->ci_type].cio_advance(env, scan, - nob); - } -} - -/** - * Adds a lock to a lockset. - */ -int cl_io_lock_add(const struct lu_env *env, struct cl_io *io, - struct cl_io_lock_link *link) -{ - int result; - - if (cl_lockset_merge(&io->ci_lockset, &link->cill_descr)) { - result = 1; - } else { - list_add(&link->cill_linkage, &io->ci_lockset.cls_todo); - result = 0; - } - return result; -} -EXPORT_SYMBOL(cl_io_lock_add); - -static void cl_free_io_lock_link(const struct lu_env *env, - struct cl_io_lock_link *link) -{ - kfree(link); -} - -/** - * Allocates new lock link, and uses it to add a lock to a lockset. - */ -int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io, - struct cl_lock_descr *descr) -{ - struct cl_io_lock_link *link; - int result; - - link = kzalloc(sizeof(*link), GFP_NOFS); - if (link) { - link->cill_descr = *descr; - link->cill_fini = cl_free_io_lock_link; - result = cl_io_lock_add(env, io, link); - if (result) /* lock match */ - link->cill_fini(env, link); - } else { - result = -ENOMEM; - } - - return result; -} -EXPORT_SYMBOL(cl_io_lock_alloc_add); - -/** - * Starts io by calling cl_io_operations::cio_start() top-to-bottom. - */ -int cl_io_start(const struct lu_env *env, struct cl_io *io) -{ - const struct cl_io_slice *scan; - int result = 0; - - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(io->ci_state == CIS_LOCKED); - LINVRNT(cl_io_invariant(io)); - - io->ci_state = CIS_IO_GOING; - cl_io_for_each(scan, io) { - if (!scan->cis_iop->op[io->ci_type].cio_start) - continue; - result = scan->cis_iop->op[io->ci_type].cio_start(env, scan); - if (result != 0) - break; - } - if (result >= 0) - result = 0; - return result; -} -EXPORT_SYMBOL(cl_io_start); - -/** - * Wait until current io iteration is finished by calling - * cl_io_operations::cio_end() bottom-to-top. - */ -void cl_io_end(const struct lu_env *env, struct cl_io *io) -{ - const struct cl_io_slice *scan; - - LINVRNT(cl_io_is_loopable(io)); - LINVRNT(io->ci_state == CIS_IO_GOING); - LINVRNT(cl_io_invariant(io)); - - cl_io_for_each_reverse(scan, io) { - if (scan->cis_iop->op[io->ci_type].cio_end) - scan->cis_iop->op[io->ci_type].cio_end(env, scan); - /* TODO: error handling. */ - } - io->ci_state = CIS_IO_FINISHED; -} -EXPORT_SYMBOL(cl_io_end); - -/** - * Called by read io, to decide the readahead extent - * - * \see cl_io_operations::cio_read_ahead() - */ -int cl_io_read_ahead(const struct lu_env *env, struct cl_io *io, - pgoff_t start, struct cl_read_ahead *ra) -{ - const struct cl_io_slice *scan; - int result = 0; - - LINVRNT(io->ci_type == CIT_READ || io->ci_type == CIT_FAULT); - LINVRNT(cl_io_invariant(io)); - - cl_io_for_each(scan, io) { - if (!scan->cis_iop->cio_read_ahead) - continue; - - result = scan->cis_iop->cio_read_ahead(env, scan, start, ra); - if (result) - break; - } - return result > 0 ? 0 : result; -} -EXPORT_SYMBOL(cl_io_read_ahead); - -/** - * Commit a list of contiguous pages into writeback cache. - * - * \returns 0 if all pages committed, or errcode if error occurred. - * \see cl_io_operations::cio_commit_async() - */ -int cl_io_commit_async(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *queue, int from, int to, - cl_commit_cbt cb) -{ - const struct cl_io_slice *scan; - int result = 0; - - cl_io_for_each(scan, io) { - if (!scan->cis_iop->cio_commit_async) - continue; - result = scan->cis_iop->cio_commit_async(env, scan, queue, - from, to, cb); - if (result != 0) - break; - } - return result; -} -EXPORT_SYMBOL(cl_io_commit_async); - -/** - * Submits a list of pages for immediate io. - * - * After the function gets returned, The submitted pages are moved to - * queue->c2_qout queue, and queue->c2_qin contain both the pages don't need - * to be submitted, and the pages are errant to submit. - * - * \returns 0 if at least one page was submitted, error code otherwise. - * \see cl_io_operations::cio_submit() - */ -int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io, - enum cl_req_type crt, struct cl_2queue *queue) -{ - const struct cl_io_slice *scan; - int result = 0; - - cl_io_for_each(scan, io) { - if (!scan->cis_iop->cio_submit) - continue; - result = scan->cis_iop->cio_submit(env, scan, crt, queue); - if (result != 0) - break; - } - /* - * If ->cio_submit() failed, no pages were sent. - */ - LASSERT(ergo(result != 0, list_empty(&queue->c2_qout.pl_pages))); - return result; -} -EXPORT_SYMBOL(cl_io_submit_rw); - -static void cl_page_list_assume(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *plist); - -/** - * Submit a sync_io and wait for the IO to be finished, or error happens. - * If \a timeout is zero, it means to wait for the IO unconditionally. - */ -int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io, - enum cl_req_type iot, struct cl_2queue *queue, - long timeout) -{ - struct cl_sync_io *anchor = &cl_env_info(env)->clt_anchor; - struct cl_page *pg; - int rc; - - cl_page_list_for_each(pg, &queue->c2_qin) { - LASSERT(!pg->cp_sync_io); - pg->cp_sync_io = anchor; - } - - cl_sync_io_init(anchor, queue->c2_qin.pl_nr, &cl_sync_io_end); - rc = cl_io_submit_rw(env, io, iot, queue); - if (rc == 0) { - /* - * If some pages weren't sent for any reason (e.g., - * read found up-to-date pages in the cache, or write found - * clean pages), count them as completed to avoid infinite - * wait. - */ - cl_page_list_for_each(pg, &queue->c2_qin) { - pg->cp_sync_io = NULL; - cl_sync_io_note(env, anchor, 1); - } - - /* wait for the IO to be finished. */ - rc = cl_sync_io_wait(env, anchor, timeout); - cl_page_list_assume(env, io, &queue->c2_qout); - } else { - LASSERT(list_empty(&queue->c2_qout.pl_pages)); - cl_page_list_for_each(pg, &queue->c2_qin) - pg->cp_sync_io = NULL; - } - return rc; -} -EXPORT_SYMBOL(cl_io_submit_sync); - -/** - * Main io loop. - * - * Pumps io through iterations calling - * - * - cl_io_iter_init() - * - * - cl_io_lock() - * - * - cl_io_start() - * - * - cl_io_end() - * - * - cl_io_unlock() - * - * - cl_io_iter_fini() - * - * repeatedly until there is no more io to do. - */ -int cl_io_loop(const struct lu_env *env, struct cl_io *io) -{ - int result = 0; - - LINVRNT(cl_io_is_loopable(io)); - - do { - size_t nob; - - io->ci_continue = 0; - result = cl_io_iter_init(env, io); - if (result == 0) { - nob = io->ci_nob; - result = cl_io_lock(env, io); - if (result == 0) { - /* - * Notify layers that locks has been taken, - * and do actual i/o. - * - * - llite: kms, short read; - * - llite: generic_file_read(); - */ - result = cl_io_start(env, io); - /* - * Send any remaining pending - * io, etc. - * - * - llite: ll_rw_stats_tally. - */ - cl_io_end(env, io); - cl_io_unlock(env, io); - cl_io_rw_advance(env, io, io->ci_nob - nob); - } - } - cl_io_iter_fini(env, io); - } while (result == 0 && io->ci_continue); - if (result == 0) - result = io->ci_result; - return result < 0 ? result : 0; -} -EXPORT_SYMBOL(cl_io_loop); - -/** - * Adds io slice to the cl_io. - * - * This is called by cl_object_operations::coo_io_init() methods to add a - * per-layer state to the io. New state is added at the end of - * cl_io::ci_layers list, that is, it is at the bottom of the stack. - * - * \see cl_lock_slice_add(), cl_req_slice_add(), cl_page_slice_add() - */ -void cl_io_slice_add(struct cl_io *io, struct cl_io_slice *slice, - struct cl_object *obj, - const struct cl_io_operations *ops) -{ - struct list_head *linkage = &slice->cis_linkage; - - LASSERT((!linkage->prev && !linkage->next) || - list_empty(linkage)); - - list_add_tail(linkage, &io->ci_layers); - slice->cis_io = io; - slice->cis_obj = obj; - slice->cis_iop = ops; -} -EXPORT_SYMBOL(cl_io_slice_add); - -/** - * Initializes page list. - */ -void cl_page_list_init(struct cl_page_list *plist) -{ - plist->pl_nr = 0; - INIT_LIST_HEAD(&plist->pl_pages); - plist->pl_owner = current; -} -EXPORT_SYMBOL(cl_page_list_init); - -/** - * Adds a page to a page list. - */ -void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page) -{ - /* it would be better to check that page is owned by "current" io, but - * it is not passed here. - */ - LASSERT(page->cp_owner); - LINVRNT(plist->pl_owner == current); - - LASSERT(list_empty(&page->cp_batch)); - list_add_tail(&page->cp_batch, &plist->pl_pages); - ++plist->pl_nr; - lu_ref_add_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist); - cl_page_get(page); -} -EXPORT_SYMBOL(cl_page_list_add); - -/** - * Removes a page from a page list. - */ -void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist, - struct cl_page *page) -{ - LASSERT(plist->pl_nr > 0); - LASSERT(cl_page_is_vmlocked(env, page)); - LINVRNT(plist->pl_owner == current); - - list_del_init(&page->cp_batch); - --plist->pl_nr; - lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist); - cl_page_put(env, page); -} -EXPORT_SYMBOL(cl_page_list_del); - -/** - * Moves a page from one page list to another. - */ -void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page) -{ - LASSERT(src->pl_nr > 0); - LINVRNT(dst->pl_owner == current); - LINVRNT(src->pl_owner == current); - - list_move_tail(&page->cp_batch, &dst->pl_pages); - --src->pl_nr; - ++dst->pl_nr; - lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue", - src, dst); -} -EXPORT_SYMBOL(cl_page_list_move); - -/** - * Moves a page from one page list to the head of another list. - */ -void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src, - struct cl_page *page) -{ - LASSERT(src->pl_nr > 0); - LINVRNT(dst->pl_owner == current); - LINVRNT(src->pl_owner == current); - - list_move(&page->cp_batch, &dst->pl_pages); - --src->pl_nr; - ++dst->pl_nr; - lu_ref_set_at(&page->cp_reference, &page->cp_queue_ref, "queue", - src, dst); -} -EXPORT_SYMBOL(cl_page_list_move_head); - -/** - * splice the cl_page_list, just as list head does - */ -void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head) -{ - struct cl_page *page; - struct cl_page *tmp; - - LINVRNT(list->pl_owner == current); - LINVRNT(head->pl_owner == current); - - cl_page_list_for_each_safe(page, tmp, list) - cl_page_list_move(head, list, page); -} -EXPORT_SYMBOL(cl_page_list_splice); - - -/** - * Disowns pages in a queue. - */ -void cl_page_list_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *plist) -{ - struct cl_page *page; - struct cl_page *temp; - - LINVRNT(plist->pl_owner == current); - - cl_page_list_for_each_safe(page, temp, plist) { - LASSERT(plist->pl_nr > 0); - - list_del_init(&page->cp_batch); - --plist->pl_nr; - /* - * cl_page_disown0 rather than usual cl_page_disown() is used, - * because pages are possibly in CPS_FREEING state already due - * to the call to cl_page_list_discard(). - */ - /* - * XXX cl_page_disown0() will fail if page is not locked. - */ - cl_page_disown0(env, io, page); - lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", - plist); - cl_page_put(env, page); - } -} -EXPORT_SYMBOL(cl_page_list_disown); - -/** - * Releases pages from queue. - */ -void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist) -{ - struct cl_page *page; - struct cl_page *temp; - - LINVRNT(plist->pl_owner == current); - - cl_page_list_for_each_safe(page, temp, plist) - cl_page_list_del(env, plist, page); - LASSERT(plist->pl_nr == 0); -} -EXPORT_SYMBOL(cl_page_list_fini); - -/** - * Assumes all pages in a queue. - */ -static void cl_page_list_assume(const struct lu_env *env, - struct cl_io *io, struct cl_page_list *plist) -{ - struct cl_page *page; - - LINVRNT(plist->pl_owner == current); - - cl_page_list_for_each(page, plist) - cl_page_assume(env, io, page); -} - -/** - * Discards all pages in a queue. - */ -static void cl_page_list_discard(const struct lu_env *env, struct cl_io *io, - struct cl_page_list *plist) -{ - struct cl_page *page; - - LINVRNT(plist->pl_owner == current); - cl_page_list_for_each(page, plist) - cl_page_discard(env, io, page); -} - -/** - * Initialize dual page queue. - */ -void cl_2queue_init(struct cl_2queue *queue) -{ - cl_page_list_init(&queue->c2_qin); - cl_page_list_init(&queue->c2_qout); -} -EXPORT_SYMBOL(cl_2queue_init); - -/** - * Disown pages in both lists of a 2-queue. - */ -void cl_2queue_disown(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue) -{ - cl_page_list_disown(env, io, &queue->c2_qin); - cl_page_list_disown(env, io, &queue->c2_qout); -} -EXPORT_SYMBOL(cl_2queue_disown); - -/** - * Discard (truncate) pages in both lists of a 2-queue. - */ -void cl_2queue_discard(const struct lu_env *env, - struct cl_io *io, struct cl_2queue *queue) -{ - cl_page_list_discard(env, io, &queue->c2_qin); - cl_page_list_discard(env, io, &queue->c2_qout); -} -EXPORT_SYMBOL(cl_2queue_discard); - -/** - * Finalize both page lists of a 2-queue. - */ -void cl_2queue_fini(const struct lu_env *env, struct cl_2queue *queue) -{ - cl_page_list_fini(env, &queue->c2_qout); - cl_page_list_fini(env, &queue->c2_qin); -} -EXPORT_SYMBOL(cl_2queue_fini); - -/** - * Initialize a 2-queue to contain \a page in its incoming page list. - */ -void cl_2queue_init_page(struct cl_2queue *queue, struct cl_page *page) -{ - cl_2queue_init(queue); - /* - * Add a page to the incoming page list of 2-queue. - */ - cl_page_list_add(&queue->c2_qin, page); -} -EXPORT_SYMBOL(cl_2queue_init_page); - -/** - * Returns top-level io. - * - * \see cl_object_top() - */ -struct cl_io *cl_io_top(struct cl_io *io) -{ - while (io->ci_parent) - io = io->ci_parent; - return io; -} -EXPORT_SYMBOL(cl_io_top); - -/** - * Fills in attributes that are passed to server together with transfer. Only - * attributes from \a flags may be touched. This can be called multiple times - * for the same request. - */ -void cl_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr) -{ - struct cl_object *scan; - - cl_object_for_each(scan, obj) { - if (scan->co_ops->coo_req_attr_set) - scan->co_ops->coo_req_attr_set(env, scan, attr); - } -} -EXPORT_SYMBOL(cl_req_attr_set); - -/* cl_sync_io_callback assumes the caller must call cl_sync_io_wait() to - * wait for the IO to finish. - */ -void cl_sync_io_end(const struct lu_env *env, struct cl_sync_io *anchor) -{ - wake_up_all(&anchor->csi_waitq); - - /* it's safe to nuke or reuse anchor now */ - atomic_set(&anchor->csi_barrier, 0); -} -EXPORT_SYMBOL(cl_sync_io_end); - -/** - * Initialize synchronous io wait anchor - */ -void cl_sync_io_init(struct cl_sync_io *anchor, int nr, - void (*end)(const struct lu_env *, struct cl_sync_io *)) -{ - memset(anchor, 0, sizeof(*anchor)); - init_waitqueue_head(&anchor->csi_waitq); - atomic_set(&anchor->csi_sync_nr, nr); - atomic_set(&anchor->csi_barrier, nr > 0); - anchor->csi_sync_rc = 0; - anchor->csi_end_io = end; - LASSERT(end); -} -EXPORT_SYMBOL(cl_sync_io_init); - -/** - * Wait until all IO completes. Transfer completion routine has to call - * cl_sync_io_note() for every entity. - */ -int cl_sync_io_wait(const struct lu_env *env, struct cl_sync_io *anchor, - long timeout) -{ - int rc = 1; - - LASSERT(timeout >= 0); - - if (timeout == 0) - wait_event_idle(anchor->csi_waitq, - atomic_read(&anchor->csi_sync_nr) == 0); - else - rc = wait_event_idle_timeout(anchor->csi_waitq, - atomic_read(&anchor->csi_sync_nr) == 0, - timeout * HZ); - if (rc == 0) { - rc = -ETIMEDOUT; - CERROR("IO failed: %d, still wait for %d remaining entries\n", - rc, atomic_read(&anchor->csi_sync_nr)); - - wait_event_idle(anchor->csi_waitq, - atomic_read(&anchor->csi_sync_nr) == 0); - } else { - rc = anchor->csi_sync_rc; - } - LASSERT(atomic_read(&anchor->csi_sync_nr) == 0); - - /* wait until cl_sync_io_note() has done wakeup */ - while (unlikely(atomic_read(&anchor->csi_barrier) != 0)) - cpu_relax(); - - - return rc; -} -EXPORT_SYMBOL(cl_sync_io_wait); - -/** - * Indicate that transfer of a single page completed. - */ -void cl_sync_io_note(const struct lu_env *env, struct cl_sync_io *anchor, - int ioret) -{ - if (anchor->csi_sync_rc == 0 && ioret < 0) - anchor->csi_sync_rc = ioret; - /* - * Synchronous IO done without releasing page lock (e.g., as a part of - * ->{prepare,commit}_write(). Completion is used to signal the end of - * IO. - */ - LASSERT(atomic_read(&anchor->csi_sync_nr) > 0); - if (atomic_dec_and_test(&anchor->csi_sync_nr)) { - LASSERT(anchor->csi_end_io); - anchor->csi_end_io(env, anchor); - /* Can't access anchor any more */ - } -} -EXPORT_SYMBOL(cl_sync_io_note); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_lock.c b/drivers/staging/lustre/lustre/obdclass/cl_lock.c deleted file mode 100644 index 9ca29a26a38b..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/cl_lock.c +++ /dev/null @@ -1,275 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Client Extent Lock. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include -#include "cl_internal.h" - -static void cl_lock_trace0(int level, const struct lu_env *env, - const char *prefix, const struct cl_lock *lock, - const char *func, const int line) -{ - struct cl_object_header *h = cl_object_header(lock->cll_descr.cld_obj); - - CDEBUG(level, "%s: %p (%p/%d) at %s():%d\n", - prefix, lock, env, h->coh_nesting, func, line); -} -#define cl_lock_trace(level, env, prefix, lock) \ - cl_lock_trace0(level, env, prefix, lock, __func__, __LINE__) - -/** - * Adds lock slice to the compound lock. - * - * This is called by cl_object_operations::coo_lock_init() methods to add a - * per-layer state to the lock. New state is added at the end of - * cl_lock::cll_layers list, that is, it is at the bottom of the stack. - * - * \see cl_req_slice_add(), cl_page_slice_add(), cl_io_slice_add() - */ -void cl_lock_slice_add(struct cl_lock *lock, struct cl_lock_slice *slice, - struct cl_object *obj, - const struct cl_lock_operations *ops) -{ - slice->cls_lock = lock; - list_add_tail(&slice->cls_linkage, &lock->cll_layers); - slice->cls_obj = obj; - slice->cls_ops = ops; -} -EXPORT_SYMBOL(cl_lock_slice_add); - -void cl_lock_fini(const struct lu_env *env, struct cl_lock *lock) -{ - struct cl_lock_slice *slice; - cl_lock_trace(D_DLMTRACE, env, "destroy lock", lock); - - while ((slice = list_first_entry_or_null(&lock->cll_layers, - struct cl_lock_slice, - cls_linkage)) != NULL) { - list_del_init(lock->cll_layers.next); - slice->cls_ops->clo_fini(env, slice); - } - POISON(lock, 0x5a, sizeof(*lock)); -} -EXPORT_SYMBOL(cl_lock_fini); - -int cl_lock_init(const struct lu_env *env, struct cl_lock *lock, - const struct cl_io *io) -{ - struct cl_object *obj = lock->cll_descr.cld_obj; - struct cl_object *scan; - int result = 0; - - /* Make sure cl_lock::cll_descr is initialized. */ - LASSERT(obj); - - INIT_LIST_HEAD(&lock->cll_layers); - list_for_each_entry(scan, &obj->co_lu.lo_header->loh_layers, - co_lu.lo_linkage) { - result = scan->co_ops->coo_lock_init(env, scan, lock, io); - if (result != 0) { - cl_lock_fini(env, lock); - break; - } - } - - return result; -} -EXPORT_SYMBOL(cl_lock_init); - -/** - * Returns a slice with a lock, corresponding to the given layer in the - * device stack. - * - * \see cl_page_at() - */ -const struct cl_lock_slice *cl_lock_at(const struct cl_lock *lock, - const struct lu_device_type *dtype) -{ - const struct cl_lock_slice *slice; - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_obj->co_lu.lo_dev->ld_type == dtype) - return slice; - } - return NULL; -} -EXPORT_SYMBOL(cl_lock_at); - -void cl_lock_cancel(const struct lu_env *env, struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - - cl_lock_trace(D_DLMTRACE, env, "cancel lock", lock); - list_for_each_entry_reverse(slice, &lock->cll_layers, cls_linkage) { - if (slice->cls_ops->clo_cancel) - slice->cls_ops->clo_cancel(env, slice); - } -} -EXPORT_SYMBOL(cl_lock_cancel); - -/** - * Enqueue a lock. - * \param anchor: if we need to wait for resources before getting the lock, - * use @anchor for the purpose. - * \retval 0 enqueue successfully - * \retval <0 error code - */ -int cl_lock_enqueue(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock, struct cl_sync_io *anchor) -{ - const struct cl_lock_slice *slice; - int rc = -ENOSYS; - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - if (!slice->cls_ops->clo_enqueue) - continue; - - rc = slice->cls_ops->clo_enqueue(env, slice, io, anchor); - if (rc != 0) - break; - } - return rc; -} -EXPORT_SYMBOL(cl_lock_enqueue); - -/** - * Main high-level entry point of cl_lock interface that finds existing or - * enqueues new lock matching given description. - */ -int cl_lock_request(const struct lu_env *env, struct cl_io *io, - struct cl_lock *lock) -{ - struct cl_sync_io *anchor = NULL; - __u32 enq_flags = lock->cll_descr.cld_enq_flags; - int rc; - - rc = cl_lock_init(env, lock, io); - if (rc < 0) - return rc; - - if ((enq_flags & CEF_ASYNC) && !(enq_flags & CEF_AGL)) { - anchor = &cl_env_info(env)->clt_anchor; - cl_sync_io_init(anchor, 1, cl_sync_io_end); - } - - rc = cl_lock_enqueue(env, io, lock, anchor); - - if (anchor) { - int rc2; - - /* drop the reference count held at initialization time */ - cl_sync_io_note(env, anchor, 0); - rc2 = cl_sync_io_wait(env, anchor, 0); - if (rc2 < 0 && rc == 0) - rc = rc2; - } - - if (rc < 0) - cl_lock_release(env, lock); - - return rc; -} -EXPORT_SYMBOL(cl_lock_request); - -/** - * Releases a hold and a reference on a lock, obtained by cl_lock_hold(). - */ -void cl_lock_release(const struct lu_env *env, struct cl_lock *lock) -{ - cl_lock_trace(D_DLMTRACE, env, "release lock", lock); - cl_lock_cancel(env, lock); - cl_lock_fini(env, lock); -} -EXPORT_SYMBOL(cl_lock_release); - -const char *cl_lock_mode_name(const enum cl_lock_mode mode) -{ - static const char * const names[] = { - [CLM_READ] = "R", - [CLM_WRITE] = "W", - [CLM_GROUP] = "G" - }; - if (0 <= mode && mode < ARRAY_SIZE(names)) - return names[mode]; - else - return "U"; -} -EXPORT_SYMBOL(cl_lock_mode_name); - -/** - * Prints human readable representation of a lock description. - */ -void cl_lock_descr_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct cl_lock_descr *descr) -{ - const struct lu_fid *fid; - - fid = lu_object_fid(&descr->cld_obj->co_lu); - (*printer)(env, cookie, DDESCR "@" DFID, PDESCR(descr), PFID(fid)); -} -EXPORT_SYMBOL(cl_lock_descr_print); - -/** - * Prints human readable representation of \a lock to the \a f. - */ -void cl_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_lock *lock) -{ - const struct cl_lock_slice *slice; - - (*printer)(env, cookie, "lock@%p", lock); - cl_lock_descr_print(env, cookie, printer, &lock->cll_descr); - (*printer)(env, cookie, " {\n"); - - list_for_each_entry(slice, &lock->cll_layers, cls_linkage) { - (*printer)(env, cookie, " %s@%p: ", - slice->cls_obj->co_lu.lo_dev->ld_type->ldt_name, - slice); - if (slice->cls_ops->clo_print) - slice->cls_ops->clo_print(env, cookie, printer, slice); - (*printer)(env, cookie, "\n"); - } - (*printer)(env, cookie, "} lock@%p\n", lock); -} -EXPORT_SYMBOL(cl_lock_print); diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c deleted file mode 100644 index 42cce2dc5a45..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/cl_object.c +++ /dev/null @@ -1,1059 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Client Lustre Object. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -/* - * Locking. - * - * i_mutex - * PG_locked - * ->coh_attr_guard - * ->ls_guard - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -/* class_put_type() */ -#include -#include -#include -#include -#include -#include -#include "cl_internal.h" - -static struct kmem_cache *cl_env_kmem; - -/** Lock class of cl_object_header::coh_attr_guard */ -static struct lock_class_key cl_attr_guard_class; - -/** - * Initialize cl_object_header. - */ -int cl_object_header_init(struct cl_object_header *h) -{ - int result; - - result = lu_object_header_init(&h->coh_lu); - if (result == 0) { - spin_lock_init(&h->coh_attr_guard); - lockdep_set_class(&h->coh_attr_guard, &cl_attr_guard_class); - h->coh_page_bufsize = 0; - } - return result; -} -EXPORT_SYMBOL(cl_object_header_init); - -/** - * Returns a cl_object with a given \a fid. - * - * Returns either cached or newly created object. Additional reference on the - * returned object is acquired. - * - * \see lu_object_find(), cl_page_find(), cl_lock_find() - */ -struct cl_object *cl_object_find(const struct lu_env *env, - struct cl_device *cd, const struct lu_fid *fid, - const struct cl_object_conf *c) -{ - might_sleep(); - return lu2cl(lu_object_find_slice(env, cl2lu_dev(cd), fid, &c->coc_lu)); -} -EXPORT_SYMBOL(cl_object_find); - -/** - * Releases a reference on \a o. - * - * When last reference is released object is returned to the cache, unless - * lu_object_header_flags::LU_OBJECT_HEARD_BANSHEE bit is set in its header. - * - * \see cl_page_put(), cl_lock_put(). - */ -void cl_object_put(const struct lu_env *env, struct cl_object *o) -{ - lu_object_put(env, &o->co_lu); -} -EXPORT_SYMBOL(cl_object_put); - -/** - * Acquire an additional reference to the object \a o. - * - * This can only be used to acquire _additional_ reference, i.e., caller - * already has to possess at least one reference to \a o before calling this. - * - * \see cl_page_get(), cl_lock_get(). - */ -void cl_object_get(struct cl_object *o) -{ - lu_object_get(&o->co_lu); -} -EXPORT_SYMBOL(cl_object_get); - -/** - * Returns the top-object for a given \a o. - * - * \see cl_io_top() - */ -struct cl_object *cl_object_top(struct cl_object *o) -{ - struct cl_object_header *hdr = cl_object_header(o); - struct cl_object *top; - - while (hdr->coh_parent) - hdr = hdr->coh_parent; - - top = lu2cl(lu_object_top(&hdr->coh_lu)); - CDEBUG(D_TRACE, "%p -> %p\n", o, top); - return top; -} -EXPORT_SYMBOL(cl_object_top); - -/** - * Returns pointer to the lock protecting data-attributes for the given object - * \a o. - * - * Data-attributes are protected by the cl_object_header::coh_attr_guard - * spin-lock in the top-object. - * - * \see cl_attr, cl_object_attr_lock(), cl_object_operations::coo_attr_get(). - */ -static spinlock_t *cl_object_attr_guard(struct cl_object *o) -{ - return &cl_object_header(cl_object_top(o))->coh_attr_guard; -} - -/** - * Locks data-attributes. - * - * Prevents data-attributes from changing, until lock is released by - * cl_object_attr_unlock(). This has to be called before calls to - * cl_object_attr_get(), cl_object_attr_update(). - */ -void cl_object_attr_lock(struct cl_object *o) - __acquires(cl_object_attr_guard(o)) -{ - spin_lock(cl_object_attr_guard(o)); -} -EXPORT_SYMBOL(cl_object_attr_lock); - -/** - * Releases data-attributes lock, acquired by cl_object_attr_lock(). - */ -void cl_object_attr_unlock(struct cl_object *o) - __releases(cl_object_attr_guard(o)) -{ - spin_unlock(cl_object_attr_guard(o)); -} -EXPORT_SYMBOL(cl_object_attr_unlock); - -/** - * Returns data-attributes of an object \a obj. - * - * Every layer is asked (by calling cl_object_operations::coo_attr_get()) - * top-to-bottom to fill in parts of \a attr that this layer is responsible - * for. - */ -int cl_object_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - struct lu_object_header *top; - int result; - - assert_spin_locked(cl_object_attr_guard(obj)); - - top = obj->co_lu.lo_header; - result = 0; - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_attr_get) { - result = obj->co_ops->coo_attr_get(env, obj, attr); - if (result != 0) { - if (result > 0) - result = 0; - break; - } - } - } - return result; -} -EXPORT_SYMBOL(cl_object_attr_get); - -/** - * Updates data-attributes of an object \a obj. - * - * Only attributes, mentioned in a validness bit-mask \a v are - * updated. Calls cl_object_operations::coo_attr_update() on every layer, - * bottom to top. - */ -int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int v) -{ - struct lu_object_header *top; - int result; - - assert_spin_locked(cl_object_attr_guard(obj)); - - top = obj->co_lu.lo_header; - result = 0; - list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_attr_update) { - result = obj->co_ops->coo_attr_update(env, obj, attr, - v); - if (result != 0) { - if (result > 0) - result = 0; - break; - } - } - } - return result; -} -EXPORT_SYMBOL(cl_object_attr_update); - -/** - * Notifies layers (bottom-to-top) that glimpse AST was received. - * - * Layers have to fill \a lvb fields with information that will be shipped - * back to glimpse issuer. - * - * \see cl_lock_operations::clo_glimpse() - */ -int cl_object_glimpse(const struct lu_env *env, struct cl_object *obj, - struct ost_lvb *lvb) -{ - struct lu_object_header *top; - int result; - - top = obj->co_lu.lo_header; - result = 0; - list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_glimpse) { - result = obj->co_ops->coo_glimpse(env, obj, lvb); - if (result != 0) - break; - } - } - LU_OBJECT_HEADER(D_DLMTRACE, env, lu_object_top(top), - "size: %llu mtime: %llu atime: %llu ctime: %llu blocks: %llu\n", - lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime, - lvb->lvb_ctime, lvb->lvb_blocks); - return result; -} -EXPORT_SYMBOL(cl_object_glimpse); - -/** - * Updates a configuration of an object \a obj. - */ -int cl_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf) -{ - struct lu_object_header *top; - int result; - - top = obj->co_lu.lo_header; - result = 0; - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_conf_set) { - result = obj->co_ops->coo_conf_set(env, obj, conf); - if (result != 0) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_conf_set); - -/** - * Prunes caches of pages and locks for this object. - */ -int cl_object_prune(const struct lu_env *env, struct cl_object *obj) -{ - struct lu_object_header *top; - struct cl_object *o; - int result; - - top = obj->co_lu.lo_header; - result = 0; - list_for_each_entry(o, &top->loh_layers, co_lu.lo_linkage) { - if (o->co_ops->coo_prune) { - result = o->co_ops->coo_prune(env, o); - if (result != 0) - break; - } - } - - return result; -} -EXPORT_SYMBOL(cl_object_prune); - -/** - * Get stripe information of this object. - */ -int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj, - struct lov_user_md __user *uarg) -{ - struct lu_object_header *top; - int result = 0; - - top = obj->co_lu.lo_header; - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_getstripe) { - result = obj->co_ops->coo_getstripe(env, obj, uarg); - if (result) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_object_getstripe); - -/** - * Get fiemap extents from file object. - * - * \param env [in] lustre environment - * \param obj [in] file object - * \param key [in] fiemap request argument - * \param fiemap [out] fiemap extents mapping retrived - * \param buflen [in] max buffer length of @fiemap - * - * \retval 0 success - * \retval < 0 error - */ -int cl_object_fiemap(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *key, - struct fiemap *fiemap, size_t *buflen) -{ - struct lu_object_header *top; - int result = 0; - - top = obj->co_lu.lo_header; - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_fiemap) { - result = obj->co_ops->coo_fiemap(env, obj, key, fiemap, - buflen); - if (result) - break; - } - } - return result; -} -EXPORT_SYMBOL(cl_object_fiemap); - -int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, - struct cl_layout *cl) -{ - struct lu_object_header *top = obj->co_lu.lo_header; - - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_layout_get) - return obj->co_ops->coo_layout_get(env, obj, cl); - } - - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(cl_object_layout_get); - -loff_t cl_object_maxbytes(struct cl_object *obj) -{ - struct lu_object_header *top = obj->co_lu.lo_header; - loff_t maxbytes = LLONG_MAX; - - list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) { - if (obj->co_ops->coo_maxbytes) - maxbytes = min_t(loff_t, obj->co_ops->coo_maxbytes(obj), - maxbytes); - } - - return maxbytes; -} -EXPORT_SYMBOL(cl_object_maxbytes); - -/** - * Helper function removing all object locks, and marking object for - * deletion. All object pages must have been deleted at this point. - * - * This is called by cl_inode_fini() and lov_object_delete() to destroy top- - * and sub- objects respectively. - */ -void cl_object_kill(const struct lu_env *env, struct cl_object *obj) -{ - struct cl_object_header *hdr = cl_object_header(obj); - - set_bit(LU_OBJECT_HEARD_BANSHEE, &hdr->coh_lu.loh_flags); -} -EXPORT_SYMBOL(cl_object_kill); - -void cache_stats_init(struct cache_stats *cs, const char *name) -{ - int i; - - cs->cs_name = name; - for (i = 0; i < CS_NR; i++) - atomic_set(&cs->cs_stats[i], 0); -} - -static int cache_stats_print(const struct cache_stats *cs, - struct seq_file *m, int h) -{ - int i; - /* - * lookup hit total cached create - * env: ...... ...... ...... ...... ...... - */ - if (h) { - const char *names[CS_NR] = CS_NAMES; - - seq_printf(m, "%6s", " "); - for (i = 0; i < CS_NR; i++) - seq_printf(m, "%8s", names[i]); - seq_printf(m, "\n"); - } - - seq_printf(m, "%5.5s:", cs->cs_name); - for (i = 0; i < CS_NR; i++) - seq_printf(m, "%8u", atomic_read(&cs->cs_stats[i])); - return 0; -} - -static void cl_env_percpu_refill(void); - -/** - * Initialize client site. - * - * Perform common initialization (lu_site_init()), and initialize statistical - * counters. Also perform global initializations on the first call. - */ -int cl_site_init(struct cl_site *s, struct cl_device *d) -{ - size_t i; - int result; - - result = lu_site_init(&s->cs_lu, &d->cd_lu_dev); - if (result == 0) { - cache_stats_init(&s->cs_pages, "pages"); - for (i = 0; i < ARRAY_SIZE(s->cs_pages_state); ++i) - atomic_set(&s->cs_pages_state[0], 0); - cl_env_percpu_refill(); - } - return result; -} -EXPORT_SYMBOL(cl_site_init); - -/** - * Finalize client site. Dual to cl_site_init(). - */ -void cl_site_fini(struct cl_site *s) -{ - lu_site_fini(&s->cs_lu); -} -EXPORT_SYMBOL(cl_site_fini); - -static struct cache_stats cl_env_stats = { - .cs_name = "envs", - .cs_stats = { ATOMIC_INIT(0), } -}; - -/** - * Outputs client site statistical counters into a buffer. Suitable for - * ll_rd_*()-style functions. - */ -int cl_site_stats_print(const struct cl_site *site, struct seq_file *m) -{ - size_t i; - static const char * const pstate[] = { - [CPS_CACHED] = "c", - [CPS_OWNED] = "o", - [CPS_PAGEOUT] = "w", - [CPS_PAGEIN] = "r", - [CPS_FREEING] = "f" - }; -/* - lookup hit total busy create -pages: ...... ...... ...... ...... ...... [...... ...... ...... ......] -locks: ...... ...... ...... ...... ...... [...... ...... ...... ...... ......] - env: ...... ...... ...... ...... ...... - */ - lu_site_stats_print(&site->cs_lu, m); - cache_stats_print(&site->cs_pages, m, 1); - seq_puts(m, " ["); - for (i = 0; i < ARRAY_SIZE(site->cs_pages_state); ++i) - seq_printf(m, "%s: %u ", pstate[i], - atomic_read(&site->cs_pages_state[i])); - seq_puts(m, "]\n"); - cache_stats_print(&cl_env_stats, m, 0); - seq_puts(m, "\n"); - return 0; -} -EXPORT_SYMBOL(cl_site_stats_print); - -/***************************************************************************** - * - * lu_env handling on client. - * - */ - -/** - * The most efficient way is to store cl_env pointer in task specific - * structures. On Linux, it wont' be easy to use task_struct->journal_info - * because Lustre code may call into other fs which has certain assumptions - * about journal_info. Currently following fields in task_struct are identified - * can be used for this purpose: - * - tux_info: only on RedHat kernel. - * - ... - * \note As long as we use task_struct to store cl_env, we assume that once - * called into Lustre, we'll never call into the other part of the kernel - * which will use those fields in task_struct without explicitly exiting - * Lustre. - * - * If there's no space in task_struct is available, hash will be used. - * bz20044, bz22683. - */ - -static unsigned int cl_envs_cached_max = 32; /* XXX: prototype: arbitrary limit - * for now. - */ -static struct cl_env_cache { - rwlock_t cec_guard; - unsigned int cec_count; - struct list_head cec_envs; -} *cl_envs = NULL; - -struct cl_env { - void *ce_magic; - struct lu_env ce_lu; - struct lu_context ce_ses; - - /* - * Linkage into global list of all client environments. Used for - * garbage collection. - */ - struct list_head ce_linkage; - /* - * - */ - int ce_ref; - /* - * Debugging field: address of the caller who made original - * allocation. - */ - void *ce_debug; -}; - -#define CL_ENV_INC(counter) -#define CL_ENV_DEC(counter) - -static void cl_env_init0(struct cl_env *cle, void *debug) -{ - LASSERT(cle->ce_ref == 0); - LASSERT(cle->ce_magic == &cl_env_init0); - LASSERT(!cle->ce_debug); - - cle->ce_ref = 1; - cle->ce_debug = debug; - CL_ENV_INC(busy); -} - -static struct lu_env *cl_env_new(__u32 ctx_tags, __u32 ses_tags, void *debug) -{ - struct lu_env *env; - struct cl_env *cle; - - cle = kmem_cache_zalloc(cl_env_kmem, GFP_NOFS); - if (cle) { - int rc; - - INIT_LIST_HEAD(&cle->ce_linkage); - cle->ce_magic = &cl_env_init0; - env = &cle->ce_lu; - rc = lu_env_init(env, ctx_tags | LCT_CL_THREAD); - if (rc == 0) { - rc = lu_context_init(&cle->ce_ses, - ses_tags | LCT_SESSION); - if (rc == 0) { - lu_context_enter(&cle->ce_ses); - env->le_ses = &cle->ce_ses; - cl_env_init0(cle, debug); - } else { - lu_env_fini(env); - } - } - if (rc != 0) { - kmem_cache_free(cl_env_kmem, cle); - env = ERR_PTR(rc); - } else { - CL_ENV_INC(create); - CL_ENV_INC(total); - } - } else { - env = ERR_PTR(-ENOMEM); - } - return env; -} - -static void cl_env_fini(struct cl_env *cle) -{ - CL_ENV_DEC(total); - lu_context_fini(&cle->ce_lu.le_ctx); - lu_context_fini(&cle->ce_ses); - kmem_cache_free(cl_env_kmem, cle); -} - -static struct lu_env *cl_env_obtain(void *debug) -{ - struct cl_env *cle; - struct lu_env *env; - int cpu = get_cpu(); - - read_lock(&cl_envs[cpu].cec_guard); - LASSERT(equi(cl_envs[cpu].cec_count == 0, - list_empty(&cl_envs[cpu].cec_envs))); - if (cl_envs[cpu].cec_count > 0) { - int rc; - - cle = container_of(cl_envs[cpu].cec_envs.next, struct cl_env, - ce_linkage); - list_del_init(&cle->ce_linkage); - cl_envs[cpu].cec_count--; - read_unlock(&cl_envs[cpu].cec_guard); - put_cpu(); - - env = &cle->ce_lu; - rc = lu_env_refill(env); - if (rc == 0) { - cl_env_init0(cle, debug); - lu_context_enter(&env->le_ctx); - lu_context_enter(&cle->ce_ses); - } else { - cl_env_fini(cle); - env = ERR_PTR(rc); - } - } else { - read_unlock(&cl_envs[cpu].cec_guard); - put_cpu(); - env = cl_env_new(lu_context_tags_default, - lu_session_tags_default, debug); - } - return env; -} - -static inline struct cl_env *cl_env_container(struct lu_env *env) -{ - return container_of(env, struct cl_env, ce_lu); -} - -/** - * Returns lu_env: if there already is an environment associated with the - * current thread, it is returned, otherwise, new environment is allocated. - * - * Allocations are amortized through the global cache of environments. - * - * \param refcheck pointer to a counter used to detect environment leaks. In - * the usual case cl_env_get() and cl_env_put() are called in the same lexical - * scope and pointer to the same integer is passed as \a refcheck. This is - * used to detect missed cl_env_put(). - * - * \see cl_env_put() - */ -struct lu_env *cl_env_get(u16 *refcheck) -{ - struct lu_env *env; - - env = cl_env_obtain(__builtin_return_address(0)); - if (!IS_ERR(env)) { - struct cl_env *cle; - - cle = cl_env_container(env); - *refcheck = cle->ce_ref; - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); - } - return env; -} -EXPORT_SYMBOL(cl_env_get); - -/** - * Forces an allocation of a fresh environment with given tags. - * - * \see cl_env_get() - */ -struct lu_env *cl_env_alloc(u16 *refcheck, u32 tags) -{ - struct lu_env *env; - - env = cl_env_new(tags, tags, __builtin_return_address(0)); - if (!IS_ERR(env)) { - struct cl_env *cle; - - cle = cl_env_container(env); - *refcheck = cle->ce_ref; - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); - } - return env; -} -EXPORT_SYMBOL(cl_env_alloc); - -static void cl_env_exit(struct cl_env *cle) -{ - lu_context_exit(&cle->ce_lu.le_ctx); - lu_context_exit(&cle->ce_ses); -} - -/** - * Finalizes and frees a given number of cached environments. This is done to - * (1) free some memory (not currently hooked into VM), or (2) release - * references to modules. - */ -unsigned int cl_env_cache_purge(unsigned int nr) -{ - struct cl_env *cle; - unsigned int i; - - for_each_possible_cpu(i) { - write_lock(&cl_envs[i].cec_guard); - for (; !list_empty(&cl_envs[i].cec_envs) && nr > 0; --nr) { - cle = container_of(cl_envs[i].cec_envs.next, - struct cl_env, ce_linkage); - list_del_init(&cle->ce_linkage); - LASSERT(cl_envs[i].cec_count > 0); - cl_envs[i].cec_count--; - write_unlock(&cl_envs[i].cec_guard); - - cl_env_fini(cle); - write_lock(&cl_envs[i].cec_guard); - } - LASSERT(equi(cl_envs[i].cec_count == 0, - list_empty(&cl_envs[i].cec_envs))); - write_unlock(&cl_envs[i].cec_guard); - } - return nr; -} -EXPORT_SYMBOL(cl_env_cache_purge); - -/** - * Release an environment. - * - * Decrement \a env reference counter. When counter drops to 0, nothing in - * this thread is using environment and it is returned to the allocation - * cache, or freed straight away, if cache is large enough. - */ -void cl_env_put(struct lu_env *env, u16 *refcheck) -{ - struct cl_env *cle; - - cle = cl_env_container(env); - - LASSERT(cle->ce_ref > 0); - LASSERT(ergo(refcheck, cle->ce_ref == *refcheck)); - - CDEBUG(D_OTHER, "%d@%p\n", cle->ce_ref, cle); - if (--cle->ce_ref == 0) { - int cpu = get_cpu(); - - CL_ENV_DEC(busy); - cle->ce_debug = NULL; - cl_env_exit(cle); - /* - * Don't bother to take a lock here. - * - * Return environment to the cache only when it was allocated - * with the standard tags. - */ - if (cl_envs[cpu].cec_count < cl_envs_cached_max && - (env->le_ctx.lc_tags & ~LCT_HAS_EXIT) == LCT_CL_THREAD && - (env->le_ses->lc_tags & ~LCT_HAS_EXIT) == LCT_SESSION) { - read_lock(&cl_envs[cpu].cec_guard); - list_add(&cle->ce_linkage, &cl_envs[cpu].cec_envs); - cl_envs[cpu].cec_count++; - read_unlock(&cl_envs[cpu].cec_guard); - } else { - cl_env_fini(cle); - } - put_cpu(); - } -} -EXPORT_SYMBOL(cl_env_put); - -/** - * Converts struct ost_lvb to struct cl_attr. - * - * \see cl_attr2lvb - */ -void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb) -{ - attr->cat_size = lvb->lvb_size; - attr->cat_mtime = lvb->lvb_mtime; - attr->cat_atime = lvb->lvb_atime; - attr->cat_ctime = lvb->lvb_ctime; - attr->cat_blocks = lvb->lvb_blocks; -} -EXPORT_SYMBOL(cl_lvb2attr); - -static struct cl_env cl_env_percpu[NR_CPUS]; - -static int cl_env_percpu_init(void) -{ - struct cl_env *cle; - int tags = LCT_REMEMBER | LCT_NOREF; - int i, j; - int rc = 0; - - for_each_possible_cpu(i) { - struct lu_env *env; - - rwlock_init(&cl_envs[i].cec_guard); - INIT_LIST_HEAD(&cl_envs[i].cec_envs); - cl_envs[i].cec_count = 0; - - cle = &cl_env_percpu[i]; - env = &cle->ce_lu; - - INIT_LIST_HEAD(&cle->ce_linkage); - cle->ce_magic = &cl_env_init0; - rc = lu_env_init(env, LCT_CL_THREAD | tags); - if (rc == 0) { - rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags); - if (rc == 0) { - lu_context_enter(&cle->ce_ses); - env->le_ses = &cle->ce_ses; - } else { - lu_env_fini(env); - } - } - if (rc != 0) - break; - } - if (rc != 0) { - /* Indices 0 to i (excluding i) were correctly initialized, - * thus we must uninitialize up to i, the rest are undefined. - */ - for (j = 0; j < i; j++) { - cle = &cl_env_percpu[j]; - lu_context_exit(&cle->ce_ses); - lu_context_fini(&cle->ce_ses); - lu_env_fini(&cle->ce_lu); - } - } - - return rc; -} - -static void cl_env_percpu_fini(void) -{ - int i; - - for_each_possible_cpu(i) { - struct cl_env *cle = &cl_env_percpu[i]; - - lu_context_exit(&cle->ce_ses); - lu_context_fini(&cle->ce_ses); - lu_env_fini(&cle->ce_lu); - } -} - -static void cl_env_percpu_refill(void) -{ - int i; - - for_each_possible_cpu(i) - lu_env_refill(&cl_env_percpu[i].ce_lu); -} - -void cl_env_percpu_put(struct lu_env *env) -{ - struct cl_env *cle; - int cpu; - - cpu = smp_processor_id(); - cle = cl_env_container(env); - LASSERT(cle == &cl_env_percpu[cpu]); - - cle->ce_ref--; - LASSERT(cle->ce_ref == 0); - - CL_ENV_DEC(busy); - cle->ce_debug = NULL; - - put_cpu(); -} -EXPORT_SYMBOL(cl_env_percpu_put); - -struct lu_env *cl_env_percpu_get(void) -{ - struct cl_env *cle; - - cle = &cl_env_percpu[get_cpu()]; - cl_env_init0(cle, __builtin_return_address(0)); - - return &cle->ce_lu; -} -EXPORT_SYMBOL(cl_env_percpu_get); - -/***************************************************************************** - * - * Temporary prototype thing: mirror obd-devices into cl devices. - * - */ - -struct cl_device *cl_type_setup(const struct lu_env *env, struct lu_site *site, - struct lu_device_type *ldt, - struct lu_device *next) -{ - const char *typename; - struct lu_device *d; - - typename = ldt->ldt_name; - d = ldt->ldt_ops->ldto_device_alloc(env, ldt, NULL); - if (!IS_ERR(d)) { - int rc; - - if (site) - d->ld_site = site; - rc = ldt->ldt_ops->ldto_device_init(env, d, typename, next); - if (rc == 0) { - lu_device_get(d); - lu_ref_add(&d->ld_reference, - "lu-stack", &lu_site_init); - } else { - ldt->ldt_ops->ldto_device_free(env, d); - CERROR("can't init device '%s', %d\n", typename, rc); - d = ERR_PTR(rc); - } - } else { - CERROR("Cannot allocate device: '%s'\n", typename); - } - return lu2cl_dev(d); -} -EXPORT_SYMBOL(cl_type_setup); - -/** - * Finalize device stack by calling lu_stack_fini(). - */ -void cl_stack_fini(const struct lu_env *env, struct cl_device *cl) -{ - lu_stack_fini(env, cl2lu_dev(cl)); -} -EXPORT_SYMBOL(cl_stack_fini); - -static struct lu_context_key cl_key; - -struct cl_thread_info *cl_env_info(const struct lu_env *env) -{ - return lu_context_key_get(&env->le_ctx, &cl_key); -} - -/* defines cl0_key_{init,fini}() */ -LU_KEY_INIT_FINI(cl0, struct cl_thread_info); - -static void *cl_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - return cl0_key_init(ctx, key); -} - -static void cl_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - cl0_key_fini(ctx, key, data); -} - -static struct lu_context_key cl_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = cl_key_init, - .lct_fini = cl_key_fini, -}; - -static struct lu_kmem_descr cl_object_caches[] = { - { - .ckd_cache = &cl_env_kmem, - .ckd_name = "cl_env_kmem", - .ckd_size = sizeof(struct cl_env) - }, - { - .ckd_cache = NULL - } -}; - -/** - * Global initialization of cl-data. Create kmem caches, register - * lu_context_key's, etc. - * - * \see cl_global_fini() - */ -int cl_global_init(void) -{ - int result; - - cl_envs = kcalloc(num_possible_cpus(), sizeof(*cl_envs), GFP_KERNEL); - if (!cl_envs) { - result = -ENOMEM; - goto out; - } - - result = lu_kmem_init(cl_object_caches); - if (result) - goto out_envs; - - LU_CONTEXT_KEY_INIT(&cl_key); - result = lu_context_key_register(&cl_key); - if (result) - goto out_kmem; - - result = cl_env_percpu_init(); - if (result) - /* no cl_env_percpu_fini on error */ - goto out_keys; - - return 0; - -out_keys: - lu_context_key_degister(&cl_key); -out_kmem: - lu_kmem_fini(cl_object_caches); -out_envs: - kfree(cl_envs); -out: - return result; -} - -/** - * Finalization of global cl-data. Dual to cl_global_init(). - */ -void cl_global_fini(void) -{ - cl_env_percpu_fini(); - lu_context_key_degister(&cl_key); - lu_kmem_fini(cl_object_caches); - kfree(cl_envs); -} diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c deleted file mode 100644 index 916cf81c5997..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/cl_page.c +++ /dev/null @@ -1,1045 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Client Lustre Page. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include - -#include -#include "cl_internal.h" - -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg); - -# define PASSERT(env, page, expr) \ - do { \ - if (unlikely(!(expr))) { \ - CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \ - LASSERT(0); \ - } \ - } while (0) - -# define PINVRNT(env, page, exp) \ - ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp)) - -/** - * Internal version of cl_page_get(). - * - * This function can be used to obtain initial reference to previously - * unreferenced cached object. It can be called only if concurrent page - * reclamation is somehow prevented, e.g., by keeping a lock on a VM page, - * associated with \a page. - * - * Use with care! Not exported. - */ -static void cl_page_get_trust(struct cl_page *page) -{ - LASSERT(atomic_read(&page->cp_ref) > 0); - atomic_inc(&page->cp_ref); -} - -/** - * Returns a slice within a page, corresponding to the given layer in the - * device stack. - * - * \see cl_lock_at() - */ -static const struct cl_page_slice * -cl_page_at_trusted(const struct cl_page *page, - const struct lu_device_type *dtype) -{ - const struct cl_page_slice *slice; - - list_for_each_entry(slice, &page->cp_layers, cpl_linkage) { - if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype) - return slice; - } - return NULL; -} - -static void cl_page_free(const struct lu_env *env, struct cl_page *page) -{ - struct cl_object *obj = page->cp_obj; - - PASSERT(env, page, list_empty(&page->cp_batch)); - PASSERT(env, page, !page->cp_owner); - PASSERT(env, page, page->cp_state == CPS_FREEING); - - while (!list_empty(&page->cp_layers)) { - struct cl_page_slice *slice; - - slice = list_entry(page->cp_layers.next, - struct cl_page_slice, cpl_linkage); - list_del_init(page->cp_layers.next); - if (unlikely(slice->cpl_ops->cpo_fini)) - slice->cpl_ops->cpo_fini(env, slice); - } - lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page); - cl_object_put(env, obj); - lu_ref_fini(&page->cp_reference); - kfree(page); -} - -/** - * Helper function updating page state. This is the only place in the code - * where cl_page::cp_state field is mutated. - */ -static inline void cl_page_state_set_trust(struct cl_page *page, - enum cl_page_state state) -{ - /* bypass const. */ - *(enum cl_page_state *)&page->cp_state = state; -} - -struct cl_page *cl_page_alloc(const struct lu_env *env, - struct cl_object *o, pgoff_t ind, - struct page *vmpage, - enum cl_page_type type) -{ - struct cl_page *page; - struct lu_object_header *head; - - page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS); - if (page) { - int result = 0; - - atomic_set(&page->cp_ref, 1); - page->cp_obj = o; - cl_object_get(o); - lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page", - page); - page->cp_vmpage = vmpage; - cl_page_state_set_trust(page, CPS_CACHED); - page->cp_type = type; - INIT_LIST_HEAD(&page->cp_layers); - INIT_LIST_HEAD(&page->cp_batch); - lu_ref_init(&page->cp_reference); - head = o->co_lu.lo_header; - list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) { - if (o->co_ops->coo_page_init) { - result = o->co_ops->coo_page_init(env, o, page, - ind); - if (result != 0) { - cl_page_delete0(env, page); - cl_page_free(env, page); - page = ERR_PTR(result); - break; - } - } - } - } else { - page = ERR_PTR(-ENOMEM); - } - return page; -} - -/** - * Returns a cl_page with index \a idx at the object \a o, and associated with - * the VM page \a vmpage. - * - * This is the main entry point into the cl_page caching interface. First, a - * cache (implemented as a per-object radix tree) is consulted. If page is - * found there, it is returned immediately. Otherwise new page is allocated - * and returned. In any case, additional reference to page is acquired. - * - * \see cl_object_find(), cl_lock_find() - */ -struct cl_page *cl_page_find(const struct lu_env *env, - struct cl_object *o, - pgoff_t idx, struct page *vmpage, - enum cl_page_type type) -{ - struct cl_page *page = NULL; - struct cl_object_header *hdr; - - LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT); - might_sleep(); - - hdr = cl_object_header(o); - - CDEBUG(D_PAGE, "%lu@" DFID " %p %lx %d\n", - idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type); - /* fast path. */ - if (type == CPT_CACHEABLE) { - /* - * vmpage lock is used to protect the child/parent - * relationship - */ - LASSERT(PageLocked(vmpage)); - /* - * cl_vmpage_page() can be called here without any locks as - * - * - "vmpage" is locked (which prevents ->private from - * concurrent updates), and - * - * - "o" cannot be destroyed while current thread holds a - * reference on it. - */ - page = cl_vmpage_page(vmpage, o); - - if (page) - return page; - } - - /* allocate and initialize cl_page */ - page = cl_page_alloc(env, o, idx, vmpage, type); - return page; -} -EXPORT_SYMBOL(cl_page_find); - -static inline int cl_page_invariant(const struct cl_page *pg) -{ - return cl_page_in_use_noref(pg); -} - -static void cl_page_state_set0(const struct lu_env *env, - struct cl_page *page, enum cl_page_state state) -{ - enum cl_page_state old; - - /* - * Matrix of allowed state transitions [old][new], for sanity - * checking. - */ - static const int allowed_transitions[CPS_NR][CPS_NR] = { - [CPS_CACHED] = { - [CPS_CACHED] = 0, - [CPS_OWNED] = 1, /* io finds existing cached page */ - [CPS_PAGEIN] = 0, - [CPS_PAGEOUT] = 1, /* write-out from the cache */ - [CPS_FREEING] = 1, /* eviction on the memory pressure */ - }, - [CPS_OWNED] = { - [CPS_CACHED] = 1, /* release to the cache */ - [CPS_OWNED] = 0, - [CPS_PAGEIN] = 1, /* start read immediately */ - [CPS_PAGEOUT] = 1, /* start write immediately */ - [CPS_FREEING] = 1, /* lock invalidation or truncate */ - }, - [CPS_PAGEIN] = { - [CPS_CACHED] = 1, /* io completion */ - [CPS_OWNED] = 0, - [CPS_PAGEIN] = 0, - [CPS_PAGEOUT] = 0, - [CPS_FREEING] = 0, - }, - [CPS_PAGEOUT] = { - [CPS_CACHED] = 1, /* io completion */ - [CPS_OWNED] = 0, - [CPS_PAGEIN] = 0, - [CPS_PAGEOUT] = 0, - [CPS_FREEING] = 0, - }, - [CPS_FREEING] = { - [CPS_CACHED] = 0, - [CPS_OWNED] = 0, - [CPS_PAGEIN] = 0, - [CPS_PAGEOUT] = 0, - [CPS_FREEING] = 0, - } - }; - - old = page->cp_state; - PASSERT(env, page, allowed_transitions[old][state]); - CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state); - PASSERT(env, page, page->cp_state == old); - PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner)); - cl_page_state_set_trust(page, state); -} - -static void cl_page_state_set(const struct lu_env *env, - struct cl_page *page, enum cl_page_state state) -{ - cl_page_state_set0(env, page, state); -} - -/** - * Acquires an additional reference to a page. - * - * This can be called only by caller already possessing a reference to \a - * page. - * - * \see cl_object_get(), cl_lock_get(). - */ -void cl_page_get(struct cl_page *page) -{ - cl_page_get_trust(page); -} -EXPORT_SYMBOL(cl_page_get); - -/** - * Releases a reference to a page. - * - * When last reference is released, page is returned to the cache, unless it - * is in cl_page_state::CPS_FREEING state, in which case it is immediately - * destroyed. - * - * \see cl_object_put(), cl_lock_put(). - */ -void cl_page_put(const struct lu_env *env, struct cl_page *page) -{ - CL_PAGE_HEADER(D_TRACE, env, page, "%d\n", - atomic_read(&page->cp_ref)); - - if (atomic_dec_and_test(&page->cp_ref)) { - LASSERT(page->cp_state == CPS_FREEING); - - LASSERT(atomic_read(&page->cp_ref) == 0); - PASSERT(env, page, !page->cp_owner); - PASSERT(env, page, list_empty(&page->cp_batch)); - /* - * Page is no longer reachable by other threads. Tear - * it down. - */ - cl_page_free(env, page); - } -} -EXPORT_SYMBOL(cl_page_put); - -/** - * Returns a cl_page associated with a VM page, and given cl_object. - */ -struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj) -{ - struct cl_page *page; - - LASSERT(PageLocked(vmpage)); - - /* - * NOTE: absence of races and liveness of data are guaranteed by page - * lock on a "vmpage". That works because object destruction has - * bottom-to-top pass. - */ - - page = (struct cl_page *)vmpage->private; - if (page) { - cl_page_get_trust(page); - LASSERT(page->cp_type == CPT_CACHEABLE); - } - return page; -} -EXPORT_SYMBOL(cl_vmpage_page); - -const struct cl_page_slice *cl_page_at(const struct cl_page *page, - const struct lu_device_type *dtype) -{ - return cl_page_at_trusted(page, dtype); -} -EXPORT_SYMBOL(cl_page_at); - -#define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname) - -#define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...) \ -({ \ - const struct lu_env *__env = (_env); \ - struct cl_page *__page = (_page); \ - const struct cl_page_slice *__scan; \ - int __result; \ - ptrdiff_t __op = (_op); \ - int (*__method)_proto; \ - \ - __result = 0; \ - list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + __op); \ - if (__method) { \ - __result = (*__method)(__env, __scan, ## __VA_ARGS__); \ - if (__result != 0) \ - break; \ - } \ - } \ - if (__result > 0) \ - __result = 0; \ - __result; \ -}) - -#define CL_PAGE_INVOID(_env, _page, _op, _proto, ...) \ -do { \ - const struct lu_env *__env = (_env); \ - struct cl_page *__page = (_page); \ - const struct cl_page_slice *__scan; \ - ptrdiff_t __op = (_op); \ - void (*__method)_proto; \ - \ - list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + __op); \ - if (__method) \ - (*__method)(__env, __scan, ## __VA_ARGS__); \ - } \ -} while (0) - -#define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...) \ -do { \ - const struct lu_env *__env = (_env); \ - struct cl_page *__page = (_page); \ - const struct cl_page_slice *__scan; \ - ptrdiff_t __op = (_op); \ - void (*__method)_proto; \ - \ - list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \ - __method = *(void **)((char *)__scan->cpl_ops + __op); \ - if (__method) \ - (*__method)(__env, __scan, ## __VA_ARGS__); \ - } \ -} while (0) - -static int cl_page_invoke(const struct lu_env *env, - struct cl_io *io, struct cl_page *page, ptrdiff_t op) - -{ - PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj)); - return CL_PAGE_INVOKE(env, page, op, - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), - io); -} - -static void cl_page_invoid(const struct lu_env *env, - struct cl_io *io, struct cl_page *page, ptrdiff_t op) - -{ - PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj)); - CL_PAGE_INVOID(env, page, op, - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), io); -} - -static void cl_page_owner_clear(struct cl_page *page) -{ - if (page->cp_owner) { - LASSERT(page->cp_owner->ci_owned_nr > 0); - page->cp_owner->ci_owned_nr--; - page->cp_owner = NULL; - } -} - -static void cl_page_owner_set(struct cl_page *page) -{ - page->cp_owner->ci_owned_nr++; -} - -void cl_page_disown0(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - enum cl_page_state state; - - state = pg->cp_state; - PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING); - PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING); - cl_page_owner_clear(pg); - - if (state == CPS_OWNED) - cl_page_state_set(env, pg, CPS_CACHED); - /* - * Completion call-backs are executed in the bottom-up order, so that - * uppermost layer (llite), responsible for VFS/VM interaction runs - * last and can release locks safely. - */ - CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown), - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), - io); -} - -/** - * returns true, iff page is owned by the given io. - */ -int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io) -{ - struct cl_io *top = cl_io_top((struct cl_io *)io); - - LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj)); - return pg->cp_state == CPS_OWNED && pg->cp_owner == top; -} -EXPORT_SYMBOL(cl_page_is_owned); - -/** - * Try to own a page by IO. - * - * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it - * into cl_page_state::CPS_OWNED state. - * - * \pre !cl_page_is_owned(pg, io) - * \post result == 0 iff cl_page_is_owned(pg, io) - * - * \retval 0 success - * - * \retval -ve failure, e.g., page was destroyed (and landed in - * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED). - * or, page was owned by another thread, or in IO. - * - * \see cl_page_disown() - * \see cl_page_operations::cpo_own() - * \see cl_page_own_try() - * \see cl_page_own - */ -static int cl_page_own0(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, int nonblock) -{ - int result; - - PINVRNT(env, pg, !cl_page_is_owned(pg, io)); - - io = cl_io_top(io); - - if (pg->cp_state == CPS_FREEING) { - result = -ENOENT; - } else { - result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own), - (const struct lu_env *, - const struct cl_page_slice *, - struct cl_io *, int), - io, nonblock); - if (result == 0) { - PASSERT(env, pg, !pg->cp_owner); - pg->cp_owner = cl_io_top(io); - cl_page_owner_set(pg); - if (pg->cp_state != CPS_FREEING) { - cl_page_state_set(env, pg, CPS_OWNED); - } else { - cl_page_disown0(env, io, pg); - result = -ENOENT; - } - } - } - PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg))); - return result; -} - -/** - * Own a page, might be blocked. - * - * \see cl_page_own0() - */ -int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg) -{ - return cl_page_own0(env, io, pg, 0); -} -EXPORT_SYMBOL(cl_page_own); - -/** - * Nonblock version of cl_page_own(). - * - * \see cl_page_own0() - */ -int cl_page_own_try(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg) -{ - return cl_page_own0(env, io, pg, 1); -} -EXPORT_SYMBOL(cl_page_own_try); - -/** - * Assume page ownership. - * - * Called when page is already locked by the hosting VM. - * - * \pre !cl_page_is_owned(pg, io) - * \post cl_page_is_owned(pg, io) - * - * \see cl_page_operations::cpo_assume() - */ -void cl_page_assume(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj)); - - io = cl_io_top(io); - - cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume)); - PASSERT(env, pg, !pg->cp_owner); - pg->cp_owner = cl_io_top(io); - cl_page_owner_set(pg); - cl_page_state_set(env, pg, CPS_OWNED); -} -EXPORT_SYMBOL(cl_page_assume); - -/** - * Releases page ownership without unlocking the page. - * - * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the - * underlying VM page (as VM is supposed to do this itself). - * - * \pre cl_page_is_owned(pg, io) - * \post !cl_page_is_owned(pg, io) - * - * \see cl_page_assume() - */ -void cl_page_unassume(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - io = cl_io_top(io); - cl_page_owner_clear(pg); - cl_page_state_set(env, pg, CPS_CACHED); - CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume), - (const struct lu_env *, - const struct cl_page_slice *, struct cl_io *), - io); -} -EXPORT_SYMBOL(cl_page_unassume); - -/** - * Releases page ownership. - * - * Moves page into cl_page_state::CPS_CACHED. - * - * \pre cl_page_is_owned(pg, io) - * \post !cl_page_is_owned(pg, io) - * - * \see cl_page_own() - * \see cl_page_operations::cpo_disown() - */ -void cl_page_disown(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_is_owned(pg, io) || - pg->cp_state == CPS_FREEING); - - io = cl_io_top(io); - cl_page_disown0(env, io, pg); -} -EXPORT_SYMBOL(cl_page_disown); - -/** - * Called when page is to be removed from the object, e.g., as a result of - * truncate. - * - * Calls cl_page_operations::cpo_discard() top-to-bottom. - * - * \pre cl_page_is_owned(pg, io) - * - * \see cl_page_operations::cpo_discard() - */ -void cl_page_discard(const struct lu_env *env, - struct cl_io *io, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard)); -} -EXPORT_SYMBOL(cl_page_discard); - -/** - * Version of cl_page_delete() that can be called for not fully constructed - * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0() - * path. Doesn't check page invariant. - */ -static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg) -{ - PASSERT(env, pg, pg->cp_state != CPS_FREEING); - - /* - * Sever all ways to obtain new pointers to @pg. - */ - cl_page_owner_clear(pg); - - cl_page_state_set0(env, pg, CPS_FREEING); - - CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete), - (const struct lu_env *, - const struct cl_page_slice *)); -} - -/** - * Called when a decision is made to throw page out of memory. - * - * Notifies all layers about page destruction by calling - * cl_page_operations::cpo_delete() method top-to-bottom. - * - * Moves page into cl_page_state::CPS_FREEING state (this is the only place - * where transition to this state happens). - * - * Eliminates all venues through which new references to the page can be - * obtained: - * - * - removes page from the radix trees, - * - * - breaks linkage from VM page to cl_page. - * - * Once page reaches cl_page_state::CPS_FREEING, all remaining references will - * drain after some time, at which point page will be recycled. - * - * \pre VM page is locked - * \post pg->cp_state == CPS_FREEING - * - * \see cl_page_operations::cpo_delete() - */ -void cl_page_delete(const struct lu_env *env, struct cl_page *pg) -{ - PINVRNT(env, pg, cl_page_invariant(pg)); - cl_page_delete0(env, pg); -} -EXPORT_SYMBOL(cl_page_delete); - -/** - * Marks page up-to-date. - * - * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The - * layer responsible for VM interaction has to mark/clear page as up-to-date - * by the \a uptodate argument. - * - * \see cl_page_operations::cpo_export() - */ -void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate) -{ - PINVRNT(env, pg, cl_page_invariant(pg)); - CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export), - (const struct lu_env *, - const struct cl_page_slice *, int), uptodate); -} -EXPORT_SYMBOL(cl_page_export); - -/** - * Returns true, iff \a pg is VM locked in a suitable sense by the calling - * thread. - */ -int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg) -{ - int result; - const struct cl_page_slice *slice; - - slice = container_of(pg->cp_layers.next, - const struct cl_page_slice, cpl_linkage); - PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked); - /* - * Call ->cpo_is_vmlocked() directly instead of going through - * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by - * cl_page_invariant(). - */ - result = slice->cpl_ops->cpo_is_vmlocked(env, slice); - PASSERT(env, pg, result == -EBUSY || result == -ENODATA); - return result == -EBUSY; -} -EXPORT_SYMBOL(cl_page_is_vmlocked); - -static enum cl_page_state cl_req_type_state(enum cl_req_type crt) -{ - return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN; -} - -static void cl_page_io_start(const struct lu_env *env, - struct cl_page *pg, enum cl_req_type crt) -{ - /* - * Page is queued for IO, change its state. - */ - cl_page_owner_clear(pg); - cl_page_state_set(env, pg, cl_req_type_state(crt)); -} - -/** - * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is - * called top-to-bottom. Every layer either agrees to submit this page (by - * returning 0), or requests to omit this page (by returning -EALREADY). Layer - * handling interactions with the VM also has to inform VM that page is under - * transfer now. - */ -int cl_page_prep(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg, enum cl_req_type crt) -{ - int result; - - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - PINVRNT(env, pg, crt < CRT_NR); - - /* - * XXX this has to be called bottom-to-top, so that llite can set up - * PG_writeback without risking other layers deciding to skip this - * page. - */ - if (crt >= CRT_NR) - return -EINVAL; - result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep)); - if (result == 0) - cl_page_io_start(env, pg, crt); - - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result); - return result; -} -EXPORT_SYMBOL(cl_page_prep); - -/** - * Notify layers about transfer completion. - * - * Invoked by transfer sub-system (which is a part of osc) to notify layers - * that a transfer, of which this page is a part of has completed. - * - * Completion call-backs are executed in the bottom-up order, so that - * uppermost layer (llite), responsible for the VFS/VM interaction runs last - * and can release locks safely. - * - * \pre pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT - * \post pg->cp_state == CPS_CACHED - * - * \see cl_page_operations::cpo_completion() - */ -void cl_page_completion(const struct lu_env *env, - struct cl_page *pg, enum cl_req_type crt, int ioret) -{ - struct cl_sync_io *anchor = pg->cp_sync_io; - - PASSERT(env, pg, crt < CRT_NR); - PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt)); - - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret); - - cl_page_state_set(env, pg, CPS_CACHED); - if (crt >= CRT_NR) - return; - CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion), - (const struct lu_env *, - const struct cl_page_slice *, int), ioret); - if (anchor) { - LASSERT(pg->cp_sync_io == anchor); - pg->cp_sync_io = NULL; - cl_sync_io_note(env, anchor, ioret); - } -} -EXPORT_SYMBOL(cl_page_completion); - -/** - * Notify layers that transfer formation engine decided to yank this page from - * the cache and to make it a part of a transfer. - * - * \pre pg->cp_state == CPS_CACHED - * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT - * - * \see cl_page_operations::cpo_make_ready() - */ -int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg, - enum cl_req_type crt) -{ - int result; - - PINVRNT(env, pg, crt < CRT_NR); - - if (crt >= CRT_NR) - return -EINVAL; - result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready), - (const struct lu_env *, - const struct cl_page_slice *)); - if (result == 0) { - PASSERT(env, pg, pg->cp_state == CPS_CACHED); - cl_page_io_start(env, pg, crt); - } - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result); - return result; -} -EXPORT_SYMBOL(cl_page_make_ready); - -/** - * Called if a pge is being written back by kernel's intention. - * - * \pre cl_page_is_owned(pg, io) - * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT) - * - * \see cl_page_operations::cpo_flush() - */ -int cl_page_flush(const struct lu_env *env, struct cl_io *io, - struct cl_page *pg) -{ - int result; - - PINVRNT(env, pg, cl_page_is_owned(pg, io)); - PINVRNT(env, pg, cl_page_invariant(pg)); - - result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush)); - - CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result); - return result; -} -EXPORT_SYMBOL(cl_page_flush); - -/** - * Tells transfer engine that only part of a page is to be transmitted. - * - * \see cl_page_operations::cpo_clip() - */ -void cl_page_clip(const struct lu_env *env, struct cl_page *pg, - int from, int to) -{ - PINVRNT(env, pg, cl_page_invariant(pg)); - - CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to); - CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip), - (const struct lu_env *, - const struct cl_page_slice *, int, int), - from, to); -} -EXPORT_SYMBOL(cl_page_clip); - -/** - * Prints human readable representation of \a pg to the \a f. - */ -void cl_page_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_page *pg) -{ - (*printer)(env, cookie, - "page@%p[%d %p %d %d %p]\n", - pg, atomic_read(&pg->cp_ref), pg->cp_obj, - pg->cp_state, pg->cp_type, - pg->cp_owner); -} -EXPORT_SYMBOL(cl_page_header_print); - -/** - * Prints human readable representation of \a pg to the \a f. - */ -void cl_page_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct cl_page *pg) -{ - cl_page_header_print(env, cookie, printer, pg); - CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print), - (const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t p), cookie, printer); - (*printer)(env, cookie, "end page@%p\n", pg); -} -EXPORT_SYMBOL(cl_page_print); - -/** - * Cancel a page which is still in a transfer. - */ -int cl_page_cancel(const struct lu_env *env, struct cl_page *page) -{ - return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel), - (const struct lu_env *, - const struct cl_page_slice *)); -} - -/** - * Converts a byte offset within object \a obj into a page index. - */ -loff_t cl_offset(const struct cl_object *obj, pgoff_t idx) -{ - /* - * XXX for now. - */ - return (loff_t)idx << PAGE_SHIFT; -} -EXPORT_SYMBOL(cl_offset); - -/** - * Converts a page index into a byte offset within object \a obj. - */ -pgoff_t cl_index(const struct cl_object *obj, loff_t offset) -{ - /* - * XXX for now. - */ - return offset >> PAGE_SHIFT; -} -EXPORT_SYMBOL(cl_index); - -size_t cl_page_size(const struct cl_object *obj) -{ - return 1UL << PAGE_SHIFT; -} -EXPORT_SYMBOL(cl_page_size); - -/** - * Adds page slice to the compound page. - * - * This is called by cl_object_operations::coo_page_init() methods to add a - * per-layer state to the page. New state is added at the end of - * cl_page::cp_layers list, that is, it is at the bottom of the stack. - * - * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add() - */ -void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice, - struct cl_object *obj, pgoff_t index, - const struct cl_page_operations *ops) -{ - list_add_tail(&slice->cpl_linkage, &page->cp_layers); - slice->cpl_obj = obj; - slice->cpl_index = index; - slice->cpl_ops = ops; - slice->cpl_page = page; -} -EXPORT_SYMBOL(cl_page_slice_add); - -/** - * Allocate and initialize cl_cache, called by ll_init_sbi(). - */ -struct cl_client_cache *cl_cache_init(unsigned long lru_page_max) -{ - struct cl_client_cache *cache = NULL; - - cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) - return NULL; - - /* Initialize cache data */ - atomic_set(&cache->ccc_users, 1); - cache->ccc_lru_max = lru_page_max; - atomic_long_set(&cache->ccc_lru_left, lru_page_max); - spin_lock_init(&cache->ccc_lru_lock); - INIT_LIST_HEAD(&cache->ccc_lru); - - atomic_long_set(&cache->ccc_unstable_nr, 0); - init_waitqueue_head(&cache->ccc_unstable_waitq); - - return cache; -} -EXPORT_SYMBOL(cl_cache_init); - -/** - * Increase cl_cache refcount - */ -void cl_cache_incref(struct cl_client_cache *cache) -{ - atomic_inc(&cache->ccc_users); -} -EXPORT_SYMBOL(cl_cache_incref); - -/** - * Decrease cl_cache refcount and free the cache if refcount=0. - * Since llite, lov and osc all hold cl_cache refcount, - * the free will not cause race. (LU-6173) - */ -void cl_cache_decref(struct cl_client_cache *cache) -{ - if (atomic_dec_and_test(&cache->ccc_users)) - kfree(cache); -} -EXPORT_SYMBOL(cl_cache_decref); diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c deleted file mode 100644 index d6c46858941b..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/class_obd.c +++ /dev/null @@ -1,544 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "llog_internal.h" - -struct obd_device *obd_devs[MAX_OBD_DEVICES]; -struct list_head obd_types; -DEFINE_RWLOCK(obd_dev_lock); - -/* The following are visible and mutable through /sys/fs/lustre. */ -unsigned int obd_debug_peer_on_timeout; -EXPORT_SYMBOL(obd_debug_peer_on_timeout); -unsigned int obd_dump_on_timeout; -EXPORT_SYMBOL(obd_dump_on_timeout); -unsigned int obd_dump_on_eviction; -EXPORT_SYMBOL(obd_dump_on_eviction); -unsigned long obd_max_dirty_pages; -EXPORT_SYMBOL(obd_max_dirty_pages); -atomic_long_t obd_dirty_pages; -EXPORT_SYMBOL(obd_dirty_pages); -unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT; /* seconds */ -EXPORT_SYMBOL(obd_timeout); -unsigned int obd_timeout_set; -EXPORT_SYMBOL(obd_timeout_set); -/* Adaptive timeout defs here instead of ptlrpc module for /sys/fs/ access */ -unsigned int at_min; -EXPORT_SYMBOL(at_min); -unsigned int at_max = 600; -EXPORT_SYMBOL(at_max); -unsigned int at_history = 600; -EXPORT_SYMBOL(at_history); -int at_early_margin = 5; -EXPORT_SYMBOL(at_early_margin); -int at_extra = 30; -EXPORT_SYMBOL(at_extra); - -atomic_long_t obd_dirty_transit_pages; -EXPORT_SYMBOL(obd_dirty_transit_pages); - -char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE; -char obd_jobid_node[LUSTRE_JOBID_SIZE + 1]; - -/* Get jobid of current process from stored variable or calculate - * it from pid and user_id. - * - * Historically this was also done by reading the environment variable - * stored in between the "env_start" & "env_end" of task struct. - * This is now deprecated. - */ -int lustre_get_jobid(char *jobid) -{ - memset(jobid, 0, LUSTRE_JOBID_SIZE); - /* Jobstats isn't enabled */ - if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) - return 0; - - /* Use process name + fsuid as jobid */ - if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { - snprintf(jobid, LUSTRE_JOBID_SIZE, "%s.%u", - current->comm, - from_kuid(&init_user_ns, current_fsuid())); - return 0; - } - - /* Whole node dedicated to single job */ - if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { - strcpy(jobid, obd_jobid_node); - return 0; - } - - return -ENOENT; -} -EXPORT_SYMBOL(lustre_get_jobid); - -static int class_resolve_dev_name(__u32 len, const char *name) -{ - int rc; - int dev; - - if (!len || !name) { - CERROR("No name passed,!\n"); - rc = -EINVAL; - goto out; - } - if (name[len - 1] != 0) { - CERROR("Name not nul terminated!\n"); - rc = -EINVAL; - goto out; - } - - CDEBUG(D_IOCTL, "device name %s\n", name); - dev = class_name2dev(name); - if (dev == -1) { - CDEBUG(D_IOCTL, "No device for name %s!\n", name); - rc = -EINVAL; - goto out; - } - - CDEBUG(D_IOCTL, "device name %s, dev %d\n", name, dev); - rc = dev; - -out: - return rc; -} - -int class_handle_ioctl(unsigned int cmd, unsigned long arg) -{ - char *buf = NULL; - struct obd_ioctl_data *data; - struct libcfs_debug_ioctl_data *debug_data; - struct obd_device *obd = NULL; - int err = 0, len = 0; - - /* only for debugging */ - if (cmd == LIBCFS_IOC_DEBUG_MASK) { - debug_data = (struct libcfs_debug_ioctl_data *)arg; - libcfs_subsystem_debug = debug_data->subs; - libcfs_debug = debug_data->debug; - return 0; - } - - CDEBUG(D_IOCTL, "cmd = %x\n", cmd); - if (obd_ioctl_getdata(&buf, &len, (void __user *)arg)) { - CERROR("OBD ioctl: data error\n"); - return -EINVAL; - } - data = (struct obd_ioctl_data *)buf; - - switch (cmd) { - case OBD_IOC_PROCESS_CFG: { - struct lustre_cfg *lcfg; - - if (!data->ioc_plen1 || !data->ioc_pbuf1) { - CERROR("No config buffer passed!\n"); - err = -EINVAL; - goto out; - } - lcfg = kzalloc(data->ioc_plen1, GFP_NOFS); - if (!lcfg) { - err = -ENOMEM; - goto out; - } - if (copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1)) - err = -EFAULT; - if (!err) - err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); - if (!err) - err = class_process_config(lcfg); - - kfree(lcfg); - goto out; - } - - case OBD_GET_VERSION: - if (!data->ioc_inlbuf1) { - CERROR("No buffer passed in ioctl\n"); - err = -EINVAL; - goto out; - } - - if (strlen(LUSTRE_VERSION_STRING) + 1 > data->ioc_inllen1) { - CERROR("ioctl buffer too small to hold version\n"); - err = -EINVAL; - goto out; - } - - memcpy(data->ioc_bulk, LUSTRE_VERSION_STRING, - strlen(LUSTRE_VERSION_STRING) + 1); - - if (copy_to_user((void __user *)arg, data, len)) - err = -EFAULT; - goto out; - - case OBD_IOC_NAME2DEV: { - /* Resolve a device name. This does not change the - * currently selected device. - */ - int dev; - - dev = class_resolve_dev_name(data->ioc_inllen1, - data->ioc_inlbuf1); - data->ioc_dev = dev; - if (dev < 0) { - err = -EINVAL; - goto out; - } - - if (copy_to_user((void __user *)arg, data, sizeof(*data))) - err = -EFAULT; - goto out; - } - - case OBD_IOC_UUID2DEV: { - /* Resolve a device uuid. This does not change the - * currently selected device. - */ - int dev; - struct obd_uuid uuid; - - if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No UUID passed!\n"); - err = -EINVAL; - goto out; - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("UUID not NUL terminated!\n"); - err = -EINVAL; - goto out; - } - - CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1); - obd_str2uuid(&uuid, data->ioc_inlbuf1); - dev = class_uuid2dev(&uuid); - data->ioc_dev = dev; - if (dev == -1) { - CDEBUG(D_IOCTL, "No device for UUID %s!\n", - data->ioc_inlbuf1); - err = -EINVAL; - goto out; - } - - CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, - dev); - - if (copy_to_user((void __user *)arg, data, sizeof(*data))) - err = -EFAULT; - goto out; - } - - case OBD_IOC_GETDEVICE: { - int index = data->ioc_count; - char *status, *str; - - if (!data->ioc_inlbuf1) { - CERROR("No buffer passed in ioctl\n"); - err = -EINVAL; - goto out; - } - if (data->ioc_inllen1 < 128) { - CERROR("ioctl buffer too small to hold version\n"); - err = -EINVAL; - goto out; - } - - obd = class_num2obd(index); - if (!obd) { - err = -ENOENT; - goto out; - } - - if (obd->obd_stopping) - status = "ST"; - else if (obd->obd_set_up) - status = "UP"; - else if (obd->obd_attached) - status = "AT"; - else - status = "--"; - str = (char *)data->ioc_bulk; - snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", - (int)index, status, obd->obd_type->typ_name, - obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); - - if (copy_to_user((void __user *)arg, data, len)) - err = -EFAULT; - goto out; - } - } - - if (data->ioc_dev == OBD_DEV_BY_DEVNAME) { - if (data->ioc_inllen4 <= 0 || !data->ioc_inlbuf4) { - err = -EINVAL; - goto out; - } - if (strnlen(data->ioc_inlbuf4, MAX_OBD_NAME) >= MAX_OBD_NAME) { - err = -EINVAL; - goto out; - } - obd = class_name2obd(data->ioc_inlbuf4); - } else if (data->ioc_dev < class_devno_max()) { - obd = class_num2obd(data->ioc_dev); - } else { - CERROR("OBD ioctl: No device\n"); - err = -EINVAL; - goto out; - } - - if (!obd) { - CERROR("OBD ioctl : No Device %d\n", data->ioc_dev); - err = -EINVAL; - goto out; - } - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - - if (!obd->obd_set_up || obd->obd_stopping) { - CERROR("OBD ioctl: device not setup %d\n", data->ioc_dev); - err = -EINVAL; - goto out; - } - - switch (cmd) { - case OBD_IOC_NO_TRANSNO: { - if (!obd->obd_attached) { - CERROR("Device %d not attached\n", obd->obd_minor); - err = -ENODEV; - goto out; - } - CDEBUG(D_HA, "%s: disabling committed-transno notification\n", - obd->obd_name); - obd->obd_no_transno = 1; - err = 0; - goto out; - } - - default: { - err = obd_iocontrol(cmd, obd->obd_self_export, len, data, NULL); - if (err) - goto out; - - if (copy_to_user((void __user *)arg, data, len)) - err = -EFAULT; - goto out; - } - } - - out: - kvfree(buf); - return err; -} /* class_handle_ioctl */ - -#define OBD_INIT_CHECK -static int obd_init_checks(void) -{ - __u64 u64val, div64val; - char buf[64]; - int len, ret = 0; - - CDEBUG(D_INFO, "LPU64=%s, LPD64=%s, LPX64=%s\n", "%llu", "%lld", - "%#llx"); - - CDEBUG(D_INFO, "OBD_OBJECT_EOF = %#llx\n", (__u64)OBD_OBJECT_EOF); - - u64val = OBD_OBJECT_EOF; - CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val); - if (u64val != OBD_OBJECT_EOF) { - CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n", - u64val, (int)sizeof(u64val)); - ret = -EINVAL; - } - len = snprintf(buf, sizeof(buf), "%#llx", u64val); - if (len != 18) { - CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len); - ret = -EINVAL; - } - - div64val = OBD_OBJECT_EOF; - CDEBUG(D_INFO, "u64val OBD_OBJECT_EOF = %#llx\n", u64val); - if (u64val != OBD_OBJECT_EOF) { - CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n", - u64val, (int)sizeof(u64val)); - ret = -EOVERFLOW; - } - if (u64val >> 8 != OBD_OBJECT_EOF >> 8) { - CERROR("__u64 %#llx(%d) != 0xffffffffffffffff\n", - u64val, (int)sizeof(u64val)); - return -EOVERFLOW; - } - if (do_div(div64val, 256) != (u64val & 255)) { - CERROR("do_div(%#llx,256) != %llu\n", u64val, u64val & 255); - return -EOVERFLOW; - } - if (u64val >> 8 != div64val) { - CERROR("do_div(%#llx,256) %llu != %llu\n", - u64val, div64val, u64val >> 8); - return -EOVERFLOW; - } - len = snprintf(buf, sizeof(buf), "%#llx", u64val); - if (len != 18) { - CWARN("LPX64 wrong length! strlen(%s)=%d != 18\n", buf, len); - ret = -EINVAL; - } - len = snprintf(buf, sizeof(buf), "%llu", u64val); - if (len != 20) { - CWARN("LPU64 wrong length! strlen(%s)=%d != 20\n", buf, len); - ret = -EINVAL; - } - len = snprintf(buf, sizeof(buf), "%lld", u64val); - if (len != 2) { - CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); - ret = -EINVAL; - } - if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) { - CWARN("mask failed: u64val %llu >= %llu\n", u64val, - (__u64)PAGE_SIZE); - ret = -EINVAL; - } - - return ret; -} - -static int __init obdclass_init(void) -{ - int i, err; - - LCONSOLE_INFO("Lustre: Build Version: " LUSTRE_VERSION_STRING "\n"); - - spin_lock_init(&obd_types_lock); - - err = libcfs_setup(); - if (err) - return err; - - obd_zombie_impexp_init(); - - err = obd_init_checks(); - if (err) - return err; - - class_init_uuidlist(); - err = class_handle_init(); - if (err) - return err; - - INIT_LIST_HEAD(&obd_types); - - err = misc_register(&obd_psdev); - if (err) { - CERROR("cannot register OBD miscdevices: err %d\n", err); - return err; - } - - /* This struct is already zeroed for us (static global) */ - for (i = 0; i < class_devno_max(); i++) - obd_devs[i] = NULL; - - /* Default the dirty page cache cap to 1/2 of system memory. - * For clients with less memory, a larger fraction is needed - * for other purposes (mostly for BGL). - */ - if (totalram_pages <= 512 << (20 - PAGE_SHIFT)) - obd_max_dirty_pages = totalram_pages / 4; - else - obd_max_dirty_pages = totalram_pages / 2; - - err = obd_init_caches(); - if (err) - return err; - - err = class_procfs_init(); - if (err) - return err; - - err = obd_sysctl_init(); - if (err) - return err; - - err = lu_global_init(); - if (err) - return err; - - err = cl_global_init(); - if (err != 0) - return err; - - err = llog_info_init(); - if (err) - return err; - - err = lustre_register_fs(); - - return err; -} - -static void obdclass_exit(void) -{ - lustre_unregister_fs(); - - misc_deregister(&obd_psdev); - llog_info_fini(); - cl_global_fini(); - lu_global_fini(); - - obd_cleanup_caches(); - - class_procfs_clean(); - - class_handle_cleanup(); - class_exit_uuidlist(); - obd_zombie_impexp_stop(); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Class Driver"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(obdclass_init); -module_exit(obdclass_exit); diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c deleted file mode 100644 index 2156a82a613a..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/debug.c +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/debug.c - * - * Helper routines for dumping data structs for debugging. - */ - -#define DEBUG_SUBSYSTEM D_OTHER - -#include - -#include -#include -#include - -#define LPDS sizeof(__u64) -int block_debug_setup(void *addr, int len, __u64 off, __u64 id) -{ - LASSERT(addr); - - put_unaligned_le64(off, addr); - put_unaligned_le64(id, addr + LPDS); - addr += len - LPDS - LPDS; - put_unaligned_le64(off, addr); - put_unaligned_le64(id, addr + LPDS); - - return 0; -} -EXPORT_SYMBOL(block_debug_setup); - -int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) -{ - __u64 ne_off; - int err = 0; - - LASSERT(addr); - - ne_off = le64_to_cpu(off); - id = le64_to_cpu(id); - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CDEBUG(D_ERROR, "%s: id %#llx offset %llu off: %#llx != %#llx\n", - who, id, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CDEBUG(D_ERROR, "%s: id %#llx offset %llu id: %#llx != %#llx\n", - who, id, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - addr += end - LPDS - LPDS; - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CDEBUG(D_ERROR, "%s: id %#llx offset %llu end off: %#llx != %#llx\n", - who, id, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CDEBUG(D_ERROR, "%s: id %#llx offset %llu end id: %#llx != %#llx\n", - who, id, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - return err; -} -EXPORT_SYMBOL(block_debug_check); -#undef LPDS diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c deleted file mode 100644 index 234f383ce6d9..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/genops.c +++ /dev/null @@ -1,1480 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/genops.c - * - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#include -#include -#include - -spinlock_t obd_types_lock; - -static struct kmem_cache *obd_device_cachep; -struct kmem_cache *obdo_cachep; -EXPORT_SYMBOL(obdo_cachep); -static struct kmem_cache *import_cachep; - -static struct workqueue_struct *zombie_wq; -static void obd_zombie_export_add(struct obd_export *exp); -static void obd_zombie_import_add(struct obd_import *imp); - -int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); -EXPORT_SYMBOL(ptlrpc_put_connection_superhack); - -/* - * support functions: we could use inter-module communication, but this - * is more portable to other OS's - */ -static struct obd_device *obd_device_alloc(void) -{ - struct obd_device *obd; - - obd = kmem_cache_zalloc(obd_device_cachep, GFP_NOFS); - if (obd) - obd->obd_magic = OBD_DEVICE_MAGIC; - return obd; -} - -static void obd_device_free(struct obd_device *obd) -{ - LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n", - obd, obd->obd_magic, OBD_DEVICE_MAGIC); - if (obd->obd_namespace) { - CERROR("obd %p: namespace %p was not properly cleaned up (obd_force=%d)!\n", - obd, obd->obd_namespace, obd->obd_force); - LBUG(); - } - lu_ref_fini(&obd->obd_reference); - kmem_cache_free(obd_device_cachep, obd); -} - -static struct obd_type *class_search_type(const char *name) -{ - struct list_head *tmp; - struct obd_type *type; - - spin_lock(&obd_types_lock); - list_for_each(tmp, &obd_types) { - type = list_entry(tmp, struct obd_type, typ_chain); - if (strcmp(type->typ_name, name) == 0) { - spin_unlock(&obd_types_lock); - return type; - } - } - spin_unlock(&obd_types_lock); - return NULL; -} - -static struct obd_type *class_get_type(const char *name) -{ - struct obd_type *type = class_search_type(name); - - if (!type) { - const char *modname = name; - - if (!request_module("%s", modname)) { - CDEBUG(D_INFO, "Loaded module '%s'\n", modname); - type = class_search_type(name); - } else { - LCONSOLE_ERROR_MSG(0x158, "Can't load module '%s'\n", - modname); - } - } - if (type) { - spin_lock(&type->obd_type_lock); - type->typ_refcnt++; - try_module_get(type->typ_dt_ops->owner); - spin_unlock(&type->obd_type_lock); - } - return type; -} - -void class_put_type(struct obd_type *type) -{ - LASSERT(type); - spin_lock(&type->obd_type_lock); - type->typ_refcnt--; - module_put(type->typ_dt_ops->owner); - spin_unlock(&type->obd_type_lock); -} - -#define CLASS_MAX_NAME 1024 - -int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops, - const char *name, - struct lu_device_type *ldt) -{ - struct obd_type *type; - int rc; - - /* sanity check */ - LASSERT(strnlen(name, CLASS_MAX_NAME) < CLASS_MAX_NAME); - - if (class_search_type(name)) { - CDEBUG(D_IOCTL, "Type %s already registered\n", name); - return -EEXIST; - } - - rc = -ENOMEM; - type = kzalloc(sizeof(*type), GFP_NOFS); - if (!type) - return rc; - - type->typ_dt_ops = kzalloc(sizeof(*type->typ_dt_ops), GFP_NOFS); - type->typ_md_ops = kzalloc(sizeof(*type->typ_md_ops), GFP_NOFS); - type->typ_name = kzalloc(strlen(name) + 1, GFP_NOFS); - - if (!type->typ_dt_ops || - !type->typ_md_ops || - !type->typ_name) - goto failed; - - *type->typ_dt_ops = *dt_ops; - /* md_ops is optional */ - if (md_ops) - *type->typ_md_ops = *md_ops; - strcpy(type->typ_name, name); - spin_lock_init(&type->obd_type_lock); - - type->typ_debugfs_entry = debugfs_create_dir(type->typ_name, - debugfs_lustre_root); - - type->typ_kobj = kobject_create_and_add(type->typ_name, lustre_kobj); - if (!type->typ_kobj) { - rc = -ENOMEM; - goto failed; - } - - if (ldt) { - type->typ_lu = ldt; - rc = lu_device_type_init(ldt); - if (rc != 0) - goto failed; - } - - spin_lock(&obd_types_lock); - list_add(&type->typ_chain, &obd_types); - spin_unlock(&obd_types_lock); - - return 0; - - failed: - if (type->typ_kobj) - kobject_put(type->typ_kobj); - kfree(type->typ_name); - kfree(type->typ_md_ops); - kfree(type->typ_dt_ops); - kfree(type); - return rc; -} -EXPORT_SYMBOL(class_register_type); - -int class_unregister_type(const char *name) -{ - struct obd_type *type = class_search_type(name); - - if (!type) { - CERROR("unknown obd type\n"); - return -EINVAL; - } - - if (type->typ_refcnt) { - CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt); - /* This is a bad situation, let's make the best of it */ - /* Remove ops, but leave the name for debugging */ - kfree(type->typ_dt_ops); - kfree(type->typ_md_ops); - return -EBUSY; - } - - if (type->typ_kobj) - kobject_put(type->typ_kobj); - - debugfs_remove_recursive(type->typ_debugfs_entry); - - if (type->typ_lu) - lu_device_type_fini(type->typ_lu); - - spin_lock(&obd_types_lock); - list_del(&type->typ_chain); - spin_unlock(&obd_types_lock); - kfree(type->typ_name); - kfree(type->typ_dt_ops); - kfree(type->typ_md_ops); - kfree(type); - return 0; -} /* class_unregister_type */ -EXPORT_SYMBOL(class_unregister_type); - -/** - * Create a new obd device. - * - * Find an empty slot in ::obd_devs[], create a new obd device in it. - * - * \param[in] type_name obd device type string. - * \param[in] name obd device name. - * - * \retval NULL if create fails, otherwise return the obd device - * pointer created. - */ -struct obd_device *class_newdev(const char *type_name, const char *name) -{ - struct obd_device *result = NULL; - struct obd_device *newdev; - struct obd_type *type = NULL; - int i; - int new_obd_minor = 0; - - if (strlen(name) >= MAX_OBD_NAME) { - CERROR("name/uuid must be < %u bytes long\n", MAX_OBD_NAME); - return ERR_PTR(-EINVAL); - } - - type = class_get_type(type_name); - if (!type) { - CERROR("OBD: unknown type: %s\n", type_name); - return ERR_PTR(-ENODEV); - } - - newdev = obd_device_alloc(); - if (!newdev) { - result = ERR_PTR(-ENOMEM); - goto out_type; - } - - LASSERT(newdev->obd_magic == OBD_DEVICE_MAGIC); - - write_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - - if (obd && (strcmp(name, obd->obd_name) == 0)) { - CERROR("Device %s already exists at %d, won't add\n", - name, i); - if (result) { - LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC, - "%p obd_magic %08x != %08x\n", result, - result->obd_magic, OBD_DEVICE_MAGIC); - LASSERTF(result->obd_minor == new_obd_minor, - "%p obd_minor %d != %d\n", result, - result->obd_minor, new_obd_minor); - - obd_devs[result->obd_minor] = NULL; - result->obd_name[0] = '\0'; - } - result = ERR_PTR(-EEXIST); - break; - } - if (!result && !obd) { - result = newdev; - result->obd_minor = i; - new_obd_minor = i; - result->obd_type = type; - strncpy(result->obd_name, name, - sizeof(result->obd_name) - 1); - obd_devs[i] = result; - } - } - write_unlock(&obd_dev_lock); - - if (!result && i >= class_devno_max()) { - CERROR("all %u OBD devices used, increase MAX_OBD_DEVICES\n", - class_devno_max()); - result = ERR_PTR(-EOVERFLOW); - goto out; - } - - if (IS_ERR(result)) - goto out; - - CDEBUG(D_IOCTL, "Adding new device %s (%p)\n", - result->obd_name, result); - - return result; -out: - obd_device_free(newdev); -out_type: - class_put_type(type); - return result; -} - -void class_release_dev(struct obd_device *obd) -{ - struct obd_type *obd_type = obd->obd_type; - - LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n", - obd, obd->obd_magic, OBD_DEVICE_MAGIC); - LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n", - obd, obd->obd_minor, obd_devs[obd->obd_minor]); - LASSERT(obd_type); - - CDEBUG(D_INFO, "Release obd device %s at %d obd_type name =%s\n", - obd->obd_name, obd->obd_minor, obd->obd_type->typ_name); - - write_lock(&obd_dev_lock); - obd_devs[obd->obd_minor] = NULL; - write_unlock(&obd_dev_lock); - obd_device_free(obd); - - class_put_type(obd_type); -} - -int class_name2dev(const char *name) -{ - int i; - - if (!name) - return -1; - - read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - - if (obd && strcmp(name, obd->obd_name) == 0) { - /* Make sure we finished attaching before we give - * out any references - */ - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - if (obd->obd_attached) { - read_unlock(&obd_dev_lock); - return i; - } - break; - } - } - read_unlock(&obd_dev_lock); - - return -1; -} - -struct obd_device *class_name2obd(const char *name) -{ - int dev = class_name2dev(name); - - if (dev < 0 || dev > class_devno_max()) - return NULL; - return class_num2obd(dev); -} -EXPORT_SYMBOL(class_name2obd); - -int class_uuid2dev(struct obd_uuid *uuid) -{ - int i; - - read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - - if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) { - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - read_unlock(&obd_dev_lock); - return i; - } - } - read_unlock(&obd_dev_lock); - - return -1; -} - -/** - * Get obd device from ::obd_devs[] - * - * \param num [in] array index - * - * \retval NULL if ::obd_devs[\a num] does not contains an obd device - * otherwise return the obd device there. - */ -struct obd_device *class_num2obd(int num) -{ - struct obd_device *obd = NULL; - - if (num < class_devno_max()) { - obd = obd_devs[num]; - if (!obd) - return NULL; - - LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, - "%p obd_magic %08x != %08x\n", - obd, obd->obd_magic, OBD_DEVICE_MAGIC); - LASSERTF(obd->obd_minor == num, - "%p obd_minor %0d != %0d\n", - obd, obd->obd_minor, num); - } - - return obd; -} - -/* Search for a client OBD connected to tgt_uuid. If grp_uuid is - * specified, then only the client with that uuid is returned, - * otherwise any client connected to the tgt is returned. - */ -struct obd_device *class_find_client_obd(struct obd_uuid *tgt_uuid, - const char *typ_name, - struct obd_uuid *grp_uuid) -{ - int i; - - read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - - if (!obd) - continue; - if ((strncmp(obd->obd_type->typ_name, typ_name, - strlen(typ_name)) == 0)) { - if (obd_uuid_equals(tgt_uuid, - &obd->u.cli.cl_target_uuid) && - ((grp_uuid) ? obd_uuid_equals(grp_uuid, - &obd->obd_uuid) : 1)) { - read_unlock(&obd_dev_lock); - return obd; - } - } - } - read_unlock(&obd_dev_lock); - - return NULL; -} -EXPORT_SYMBOL(class_find_client_obd); - -/* Iterate the obd_device list looking devices have grp_uuid. Start - * searching at *next, and if a device is found, the next index to look - * at is saved in *next. If next is NULL, then the first matching device - * will always be returned. - */ -struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, int *next) -{ - int i; - - if (!next) - i = 0; - else if (*next >= 0 && *next < class_devno_max()) - i = *next; - else - return NULL; - - read_lock(&obd_dev_lock); - for (; i < class_devno_max(); i++) { - struct obd_device *obd = class_num2obd(i); - - if (!obd) - continue; - if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) { - if (next) - *next = i + 1; - read_unlock(&obd_dev_lock); - return obd; - } - } - read_unlock(&obd_dev_lock); - - return NULL; -} -EXPORT_SYMBOL(class_devices_in_group); - -/** - * to notify sptlrpc log for \a fsname has changed, let every relevant OBD - * adjust sptlrpc settings accordingly. - */ -int class_notify_sptlrpc_conf(const char *fsname, int namelen) -{ - struct obd_device *obd; - const char *type; - int i, rc = 0, rc2; - - LASSERT(namelen > 0); - - read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - obd = class_num2obd(i); - - if (!obd || obd->obd_set_up == 0 || obd->obd_stopping) - continue; - - /* only notify mdc, osc, mdt, ost */ - type = obd->obd_type->typ_name; - if (strcmp(type, LUSTRE_MDC_NAME) != 0 && - strcmp(type, LUSTRE_OSC_NAME) != 0 && - strcmp(type, LUSTRE_MDT_NAME) != 0 && - strcmp(type, LUSTRE_OST_NAME) != 0) - continue; - - if (strncmp(obd->obd_name, fsname, namelen)) - continue; - - class_incref(obd, __func__, obd); - read_unlock(&obd_dev_lock); - rc2 = obd_set_info_async(NULL, obd->obd_self_export, - sizeof(KEY_SPTLRPC_CONF), - KEY_SPTLRPC_CONF, 0, NULL, NULL); - rc = rc ? rc : rc2; - class_decref(obd, __func__, obd); - read_lock(&obd_dev_lock); - } - read_unlock(&obd_dev_lock); - return rc; -} -EXPORT_SYMBOL(class_notify_sptlrpc_conf); - -void obd_cleanup_caches(void) -{ - kmem_cache_destroy(obd_device_cachep); - obd_device_cachep = NULL; - kmem_cache_destroy(obdo_cachep); - obdo_cachep = NULL; - kmem_cache_destroy(import_cachep); - import_cachep = NULL; -} - -int obd_init_caches(void) -{ - LASSERT(!obd_device_cachep); - obd_device_cachep = kmem_cache_create("ll_obd_dev_cache", - sizeof(struct obd_device), - 0, 0, NULL); - if (!obd_device_cachep) - goto out; - - LASSERT(!obdo_cachep); - obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), - 0, 0, NULL); - if (!obdo_cachep) - goto out; - - LASSERT(!import_cachep); - import_cachep = kmem_cache_create("ll_import_cache", - sizeof(struct obd_import), - 0, 0, NULL); - if (!import_cachep) - goto out; - - return 0; - out: - obd_cleanup_caches(); - return -ENOMEM; -} - -/* map connection to client */ -struct obd_export *class_conn2export(struct lustre_handle *conn) -{ - struct obd_export *export; - - if (!conn) { - CDEBUG(D_CACHE, "looking for null handle\n"); - return NULL; - } - - if (conn->cookie == -1) { /* this means assign a new connection */ - CDEBUG(D_CACHE, "want a new connection\n"); - return NULL; - } - - CDEBUG(D_INFO, "looking for export cookie %#llx\n", conn->cookie); - export = class_handle2object(conn->cookie, NULL); - return export; -} -EXPORT_SYMBOL(class_conn2export); - -struct obd_device *class_exp2obd(struct obd_export *exp) -{ - if (exp) - return exp->exp_obd; - return NULL; -} -EXPORT_SYMBOL(class_exp2obd); - -struct obd_import *class_exp2cliimp(struct obd_export *exp) -{ - struct obd_device *obd = exp->exp_obd; - - if (!obd) - return NULL; - return obd->u.cli.cl_import; -} -EXPORT_SYMBOL(class_exp2cliimp); - -/* Export management functions */ -static void class_export_destroy(struct obd_export *exp) -{ - struct obd_device *obd = exp->exp_obd; - - LASSERT_ATOMIC_ZERO(&exp->exp_refcount); - LASSERT(obd); - - CDEBUG(D_IOCTL, "destroying export %p/%s for %s\n", exp, - exp->exp_client_uuid.uuid, obd->obd_name); - - /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ - if (exp->exp_connection) - ptlrpc_put_connection_superhack(exp->exp_connection); - - LASSERT(list_empty(&exp->exp_outstanding_replies)); - LASSERT(list_empty(&exp->exp_uncommitted_replies)); - LASSERT(list_empty(&exp->exp_req_replay_queue)); - LASSERT(list_empty(&exp->exp_hp_rpcs)); - obd_destroy_export(exp); - class_decref(obd, "export", exp); - - OBD_FREE_RCU(exp, sizeof(*exp), &exp->exp_handle); -} - -static void export_handle_addref(void *export) -{ - class_export_get(export); -} - -static struct portals_handle_ops export_handle_ops = { - .hop_addref = export_handle_addref, - .hop_free = NULL, -}; - -struct obd_export *class_export_get(struct obd_export *exp) -{ - atomic_inc(&exp->exp_refcount); - CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount)); - return exp; -} -EXPORT_SYMBOL(class_export_get); - -void class_export_put(struct obd_export *exp) -{ - LASSERT_ATOMIC_GT_LT(&exp->exp_refcount, 0, LI_POISON); - CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount) - 1); - - if (atomic_dec_and_test(&exp->exp_refcount)) { - LASSERT(!list_empty(&exp->exp_obd_chain)); - CDEBUG(D_IOCTL, "final put %p/%s\n", - exp, exp->exp_client_uuid.uuid); - - /* release nid stat refererence */ - lprocfs_exp_cleanup(exp); - - obd_zombie_export_add(exp); - } -} -EXPORT_SYMBOL(class_export_put); - -static void obd_zombie_exp_cull(struct work_struct *ws) -{ - struct obd_export *export = container_of(ws, struct obd_export, exp_zombie_work); - - class_export_destroy(export); -} - -/* Creates a new export, adds it to the hash table, and returns a - * pointer to it. The refcount is 2: one for the hash reference, and - * one for the pointer returned by this function. - */ -struct obd_export *class_new_export(struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct obd_export *export; - int rc = 0; - - export = kzalloc(sizeof(*export), GFP_NOFS); - if (!export) - return ERR_PTR(-ENOMEM); - - export->exp_conn_cnt = 0; - atomic_set(&export->exp_refcount, 2); - atomic_set(&export->exp_rpc_count, 0); - atomic_set(&export->exp_cb_count, 0); - atomic_set(&export->exp_locks_count, 0); -#if LUSTRE_TRACKS_LOCK_EXP_REFS - INIT_LIST_HEAD(&export->exp_locks_list); - spin_lock_init(&export->exp_locks_list_guard); -#endif - atomic_set(&export->exp_replay_count, 0); - export->exp_obd = obd; - INIT_LIST_HEAD(&export->exp_outstanding_replies); - spin_lock_init(&export->exp_uncommitted_replies_lock); - INIT_LIST_HEAD(&export->exp_uncommitted_replies); - INIT_LIST_HEAD(&export->exp_req_replay_queue); - INIT_LIST_HEAD(&export->exp_handle.h_link); - INIT_LIST_HEAD(&export->exp_hp_rpcs); - class_handle_hash(&export->exp_handle, &export_handle_ops); - spin_lock_init(&export->exp_lock); - spin_lock_init(&export->exp_rpc_lock); - spin_lock_init(&export->exp_bl_list_lock); - INIT_LIST_HEAD(&export->exp_bl_list); - INIT_WORK(&export->exp_zombie_work, obd_zombie_exp_cull); - - export->exp_sp_peer = LUSTRE_SP_ANY; - export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID; - export->exp_client_uuid = *cluuid; - obd_init_export(export); - - spin_lock(&obd->obd_dev_lock); - /* shouldn't happen, but might race */ - if (obd->obd_stopping) { - rc = -ENODEV; - goto exit_unlock; - } - - if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) { - rc = obd_uuid_add(obd, export); - if (rc) { - LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n", - obd->obd_name, cluuid->uuid, rc); - goto exit_unlock; - } - } - - class_incref(obd, "export", export); - list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - export->exp_obd->obd_num_exports++; - spin_unlock(&obd->obd_dev_lock); - return export; - -exit_unlock: - spin_unlock(&obd->obd_dev_lock); - class_handle_unhash(&export->exp_handle); - obd_destroy_export(export); - kfree(export); - return ERR_PTR(rc); -} -EXPORT_SYMBOL(class_new_export); - -void class_unlink_export(struct obd_export *exp) -{ - class_handle_unhash(&exp->exp_handle); - - spin_lock(&exp->exp_obd->obd_dev_lock); - /* delete an uuid-export hashitem from hashtables */ - if (exp != exp->exp_obd->obd_self_export) - obd_uuid_del(exp->exp_obd, exp); - - list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports); - exp->exp_obd->obd_num_exports--; - spin_unlock(&exp->exp_obd->obd_dev_lock); - class_export_put(exp); -} - -/* Import management functions */ -static void class_import_destroy(struct obd_import *imp) -{ - CDEBUG(D_IOCTL, "destroying import %p for %s\n", imp, - imp->imp_obd->obd_name); - - LASSERT_ATOMIC_ZERO(&imp->imp_refcount); - - ptlrpc_put_connection_superhack(imp->imp_connection); - - while (!list_empty(&imp->imp_conn_list)) { - struct obd_import_conn *imp_conn; - - imp_conn = list_entry(imp->imp_conn_list.next, - struct obd_import_conn, oic_item); - list_del_init(&imp_conn->oic_item); - ptlrpc_put_connection_superhack(imp_conn->oic_conn); - kfree(imp_conn); - } - - LASSERT(!imp->imp_sec); - class_decref(imp->imp_obd, "import", imp); - OBD_FREE_RCU(imp, sizeof(*imp), &imp->imp_handle); -} - -static void import_handle_addref(void *import) -{ - class_import_get(import); -} - -static struct portals_handle_ops import_handle_ops = { - .hop_addref = import_handle_addref, - .hop_free = NULL, -}; - -struct obd_import *class_import_get(struct obd_import *import) -{ - atomic_inc(&import->imp_refcount); - CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", import, - atomic_read(&import->imp_refcount), - import->imp_obd->obd_name); - return import; -} -EXPORT_SYMBOL(class_import_get); - -void class_import_put(struct obd_import *imp) -{ - LASSERT_ATOMIC_GT_LT(&imp->imp_refcount, 0, LI_POISON); - - CDEBUG(D_INFO, "import %p refcount=%d obd=%s\n", imp, - atomic_read(&imp->imp_refcount) - 1, - imp->imp_obd->obd_name); - - if (atomic_dec_and_test(&imp->imp_refcount)) { - CDEBUG(D_INFO, "final put import %p\n", imp); - obd_zombie_import_add(imp); - } - - /* catch possible import put race */ - LASSERT_ATOMIC_GE_LT(&imp->imp_refcount, 0, LI_POISON); -} -EXPORT_SYMBOL(class_import_put); - -static void init_imp_at(struct imp_at *at) -{ - int i; - - at_init(&at->iat_net_latency, 0, 0); - for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { - /* max service estimates are tracked on the server side, so - * don't use the AT history here, just use the last reported - * val. (But keep hist for proc histogram, worst_ever) - */ - at_init(&at->iat_service_estimate[i], INITIAL_CONNECT_TIMEOUT, - AT_FLG_NOHIST); - } -} - -static void obd_zombie_imp_cull(struct work_struct *ws) -{ - struct obd_import *import = container_of(ws, struct obd_import, imp_zombie_work); - - class_import_destroy(import); -} - -struct obd_import *class_new_import(struct obd_device *obd) -{ - struct obd_import *imp; - - imp = kzalloc(sizeof(*imp), GFP_NOFS); - if (!imp) - return NULL; - - INIT_LIST_HEAD(&imp->imp_pinger_chain); - INIT_LIST_HEAD(&imp->imp_replay_list); - INIT_LIST_HEAD(&imp->imp_sending_list); - INIT_LIST_HEAD(&imp->imp_delayed_list); - INIT_LIST_HEAD(&imp->imp_committed_list); - INIT_LIST_HEAD(&imp->imp_unreplied_list); - imp->imp_known_replied_xid = 0; - imp->imp_replay_cursor = &imp->imp_committed_list; - spin_lock_init(&imp->imp_lock); - imp->imp_last_success_conn = 0; - imp->imp_state = LUSTRE_IMP_NEW; - imp->imp_obd = class_incref(obd, "import", imp); - mutex_init(&imp->imp_sec_mutex); - init_waitqueue_head(&imp->imp_recovery_waitq); - INIT_WORK(&imp->imp_zombie_work, obd_zombie_imp_cull); - - atomic_set(&imp->imp_refcount, 2); - atomic_set(&imp->imp_unregistering, 0); - atomic_set(&imp->imp_inflight, 0); - atomic_set(&imp->imp_replay_inflight, 0); - atomic_set(&imp->imp_inval_count, 0); - INIT_LIST_HEAD(&imp->imp_conn_list); - INIT_LIST_HEAD(&imp->imp_handle.h_link); - class_handle_hash(&imp->imp_handle, &import_handle_ops); - init_imp_at(&imp->imp_at); - - /* the default magic is V2, will be used in connect RPC, and - * then adjusted according to the flags in request/reply. - */ - imp->imp_msg_magic = LUSTRE_MSG_MAGIC_V2; - - return imp; -} -EXPORT_SYMBOL(class_new_import); - -void class_destroy_import(struct obd_import *import) -{ - LASSERT(import); - LASSERT(import != LP_POISON); - - class_handle_unhash(&import->imp_handle); - - spin_lock(&import->imp_lock); - import->imp_generation++; - spin_unlock(&import->imp_lock); - class_import_put(import); -} -EXPORT_SYMBOL(class_destroy_import); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS - -void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) -{ - spin_lock(&exp->exp_locks_list_guard); - - LASSERT(lock->l_exp_refs_nr >= 0); - - if (lock->l_exp_refs_target && lock->l_exp_refs_target != exp) { - LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n", - exp, lock, lock->l_exp_refs_target); - } - if ((lock->l_exp_refs_nr++) == 0) { - list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); - lock->l_exp_refs_target = exp; - } - CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", - lock, exp, lock->l_exp_refs_nr); - spin_unlock(&exp->exp_locks_list_guard); -} - -void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) -{ - spin_lock(&exp->exp_locks_list_guard); - LASSERT(lock->l_exp_refs_nr > 0); - if (lock->l_exp_refs_target != exp) { - LCONSOLE_WARN("lock %p, mismatching export pointers: %p, %p\n", - lock, lock->l_exp_refs_target, exp); - } - if (-- lock->l_exp_refs_nr == 0) { - list_del_init(&lock->l_exp_refs_link); - lock->l_exp_refs_target = NULL; - } - CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", - lock, exp, lock->l_exp_refs_nr); - spin_unlock(&exp->exp_locks_list_guard); -} -#endif - -/* A connection defines an export context in which preallocation can - * be managed. This releases the export pointer reference, and returns - * the export handle, so the export refcount is 1 when this function - * returns. - */ -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct obd_export *export; - - LASSERT(conn); - LASSERT(obd); - LASSERT(cluuid); - - export = class_new_export(obd, cluuid); - if (IS_ERR(export)) - return PTR_ERR(export); - - conn->cookie = export->exp_handle.h_cookie; - class_export_put(export); - - CDEBUG(D_IOCTL, "connect: client %s, cookie %#llx\n", - cluuid->uuid, conn->cookie); - return 0; -} -EXPORT_SYMBOL(class_connect); - -/* This function removes 1-3 references from the export: - * 1 - for export pointer passed - * and if disconnect really need - * 2 - removing from hash - * 3 - in client_unlink_export - * The export pointer passed to this function can destroyed - */ -int class_disconnect(struct obd_export *export) -{ - int already_disconnected; - - if (!export) { - CWARN("attempting to free NULL export %p\n", export); - return -EINVAL; - } - - spin_lock(&export->exp_lock); - already_disconnected = export->exp_disconnected; - export->exp_disconnected = 1; - spin_unlock(&export->exp_lock); - - /* class_cleanup(), abort_recovery(), and class_fail_export() - * all end up in here, and if any of them race we shouldn't - * call extra class_export_puts(). - */ - if (already_disconnected) - goto no_disconn; - - CDEBUG(D_IOCTL, "disconnect: cookie %#llx\n", - export->exp_handle.h_cookie); - - class_unlink_export(export); -no_disconn: - class_export_put(export); - return 0; -} -EXPORT_SYMBOL(class_disconnect); - -void class_fail_export(struct obd_export *exp) -{ - int rc, already_failed; - - spin_lock(&exp->exp_lock); - already_failed = exp->exp_failed; - exp->exp_failed = 1; - spin_unlock(&exp->exp_lock); - - if (already_failed) { - CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n", - exp, exp->exp_client_uuid.uuid); - return; - } - - CDEBUG(D_HA, "disconnecting export %p/%s\n", - exp, exp->exp_client_uuid.uuid); - - if (obd_dump_on_timeout) - libcfs_debug_dumplog(); - - /* need for safe call CDEBUG after obd_disconnect */ - class_export_get(exp); - - /* Most callers into obd_disconnect are removing their own reference - * (request, for example) in addition to the one from the hash table. - * We don't have such a reference here, so make one. - */ - class_export_get(exp); - rc = obd_disconnect(exp); - if (rc) - CERROR("disconnecting export %p failed: %d\n", exp, rc); - else - CDEBUG(D_HA, "disconnected export %p/%s\n", - exp, exp->exp_client_uuid.uuid); - class_export_put(exp); -} -EXPORT_SYMBOL(class_fail_export); - -#if LUSTRE_TRACKS_LOCK_EXP_REFS -void (*class_export_dump_hook)(struct obd_export *) = NULL; -#endif - -/** - * Add export to the obd_zombie thread and notify it. - */ -static void obd_zombie_export_add(struct obd_export *exp) -{ - spin_lock(&exp->exp_obd->obd_dev_lock); - LASSERT(!list_empty(&exp->exp_obd_chain)); - list_del_init(&exp->exp_obd_chain); - spin_unlock(&exp->exp_obd->obd_dev_lock); - queue_work(zombie_wq, &exp->exp_zombie_work); -} - -/** - * Add import to the obd_zombie thread and notify it. - */ -static void obd_zombie_import_add(struct obd_import *imp) -{ - LASSERT(!imp->imp_sec); - queue_work(zombie_wq, &imp->imp_zombie_work); -} - -/** - * wait when obd_zombie import/export queues become empty - */ -void obd_zombie_barrier(void) -{ - flush_workqueue(zombie_wq); -} -EXPORT_SYMBOL(obd_zombie_barrier); - -/** - * start destroy zombie import/export thread - */ -int obd_zombie_impexp_init(void) -{ - zombie_wq = alloc_workqueue("obd_zombid", 0, 0); - if (!zombie_wq) - return -ENOMEM; - - return 0; -} - -/** - * stop destroy zombie import/export thread - */ -void obd_zombie_impexp_stop(void) -{ - destroy_workqueue(zombie_wq); -} - -struct obd_request_slot_waiter { - struct list_head orsw_entry; - wait_queue_head_t orsw_waitq; - bool orsw_signaled; -}; - -static bool obd_request_slot_avail(struct client_obd *cli, - struct obd_request_slot_waiter *orsw) -{ - bool avail; - - spin_lock(&cli->cl_loi_list_lock); - avail = !!list_empty(&orsw->orsw_entry); - spin_unlock(&cli->cl_loi_list_lock); - - return avail; -}; - -/* - * For network flow control, the RPC sponsor needs to acquire a credit - * before sending the RPC. The credits count for a connection is defined - * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then - * the subsequent RPC sponsors need to wait until others released their - * credits, or the administrator increased the "cl_max_rpcs_in_flight". - */ -int obd_get_request_slot(struct client_obd *cli) -{ - struct obd_request_slot_waiter orsw; - int rc; - - spin_lock(&cli->cl_loi_list_lock); - if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) { - cli->cl_r_in_flight++; - spin_unlock(&cli->cl_loi_list_lock); - return 0; - } - - init_waitqueue_head(&orsw.orsw_waitq); - list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list); - orsw.orsw_signaled = false; - spin_unlock(&cli->cl_loi_list_lock); - - rc = l_wait_event_abortable(orsw.orsw_waitq, - obd_request_slot_avail(cli, &orsw) || - orsw.orsw_signaled); - - /* - * Here, we must take the lock to avoid the on-stack 'orsw' to be - * freed but other (such as obd_put_request_slot) is using it. - */ - spin_lock(&cli->cl_loi_list_lock); - if (rc) { - if (!orsw.orsw_signaled) { - if (list_empty(&orsw.orsw_entry)) - cli->cl_r_in_flight--; - else - list_del(&orsw.orsw_entry); - } - } - - if (orsw.orsw_signaled) { - LASSERT(list_empty(&orsw.orsw_entry)); - - rc = -EINTR; - } - spin_unlock(&cli->cl_loi_list_lock); - - return rc; -} -EXPORT_SYMBOL(obd_get_request_slot); - -void obd_put_request_slot(struct client_obd *cli) -{ - struct obd_request_slot_waiter *orsw; - - spin_lock(&cli->cl_loi_list_lock); - cli->cl_r_in_flight--; - - /* If there is free slot, wakeup the first waiter. */ - if (!list_empty(&cli->cl_loi_read_list) && - likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) { - orsw = list_entry(cli->cl_loi_read_list.next, - struct obd_request_slot_waiter, orsw_entry); - list_del_init(&orsw->orsw_entry); - cli->cl_r_in_flight++; - wake_up(&orsw->orsw_waitq); - } - spin_unlock(&cli->cl_loi_list_lock); -} -EXPORT_SYMBOL(obd_put_request_slot); - -__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli) -{ - return cli->cl_max_rpcs_in_flight; -} -EXPORT_SYMBOL(obd_get_max_rpcs_in_flight); - -int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max) -{ - struct obd_request_slot_waiter *orsw; - const char *typ_name; - __u32 old; - int diff; - int rc; - int i; - - if (max > OBD_MAX_RIF_MAX || max < 1) - return -ERANGE; - - typ_name = cli->cl_import->imp_obd->obd_type->typ_name; - if (!strcmp(typ_name, LUSTRE_MDC_NAME)) { - /* - * adjust max_mod_rpcs_in_flight to ensure it is always - * strictly lower that max_rpcs_in_flight - */ - if (max < 2) { - CERROR("%s: cannot set max_rpcs_in_flight to 1 because it must be higher than max_mod_rpcs_in_flight value\n", - cli->cl_import->imp_obd->obd_name); - return -ERANGE; - } - if (max <= cli->cl_max_mod_rpcs_in_flight) { - rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1); - if (rc) - return rc; - } - } - - spin_lock(&cli->cl_loi_list_lock); - old = cli->cl_max_rpcs_in_flight; - cli->cl_max_rpcs_in_flight = max; - diff = max - old; - - /* We increase the max_rpcs_in_flight, then wakeup some waiters. */ - for (i = 0; i < diff; i++) { - if (list_empty(&cli->cl_loi_read_list)) - break; - - orsw = list_entry(cli->cl_loi_read_list.next, - struct obd_request_slot_waiter, orsw_entry); - list_del_init(&orsw->orsw_entry); - cli->cl_r_in_flight++; - wake_up(&orsw->orsw_waitq); - } - spin_unlock(&cli->cl_loi_list_lock); - - return 0; -} -EXPORT_SYMBOL(obd_set_max_rpcs_in_flight); - -int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max) -{ - struct obd_connect_data *ocd; - u16 maxmodrpcs; - u16 prev; - - if (max > OBD_MAX_RIF_MAX || max < 1) - return -ERANGE; - - /* cannot exceed or equal max_rpcs_in_flight */ - if (max >= cli->cl_max_rpcs_in_flight) { - CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher or equal to max_rpcs_in_flight value (%u)\n", - cli->cl_import->imp_obd->obd_name, - max, cli->cl_max_rpcs_in_flight); - return -ERANGE; - } - - /* cannot exceed max modify RPCs in flight supported by the server */ - ocd = &cli->cl_import->imp_connect_data; - if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) - maxmodrpcs = ocd->ocd_maxmodrpcs; - else - maxmodrpcs = 1; - if (max > maxmodrpcs) { - CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) higher than max_mod_rpcs_per_client value (%hu) returned by the server at connection\n", - cli->cl_import->imp_obd->obd_name, - max, maxmodrpcs); - return -ERANGE; - } - - spin_lock(&cli->cl_mod_rpcs_lock); - - prev = cli->cl_max_mod_rpcs_in_flight; - cli->cl_max_mod_rpcs_in_flight = max; - - /* wakeup waiters if limit has been increased */ - if (cli->cl_max_mod_rpcs_in_flight > prev) - wake_up(&cli->cl_mod_rpcs_waitq); - - spin_unlock(&cli->cl_mod_rpcs_lock); - - return 0; -} -EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight); - -#define pct(a, b) (b ? (a * 100) / b : 0) - -int obd_mod_rpc_stats_seq_show(struct client_obd *cli, struct seq_file *seq) -{ - unsigned long mod_tot = 0, mod_cum; - struct timespec64 now; - int i; - - ktime_get_real_ts64(&now); - - spin_lock(&cli->cl_mod_rpcs_lock); - - seq_printf(seq, "snapshot_time: %llu.%9lu (secs.nsecs)\n", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - seq_printf(seq, "modify_RPCs_in_flight: %hu\n", - cli->cl_mod_rpcs_in_flight); - - seq_puts(seq, "\n\t\t\tmodify\n"); - seq_puts(seq, "rpcs in flight rpcs %% cum %%\n"); - - mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist); - - mod_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i]; - - mod_cum += mod; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n", - i, mod, pct(mod, mod_tot), - pct(mod_cum, mod_tot)); - if (mod_cum == mod_tot) - break; - } - - spin_unlock(&cli->cl_mod_rpcs_lock); - - return 0; -} -EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); -#undef pct - -/* - * The number of modify RPCs sent in parallel is limited - * because the server has a finite number of slots per client to - * store request result and ensure reply reconstruction when needed. - * On the client, this limit is stored in cl_max_mod_rpcs_in_flight - * that takes into account server limit and cl_max_rpcs_in_flight - * value. - * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462), - * one close request is allowed above the maximum. - */ -static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli, - bool close_req) -{ - bool avail; - - /* A slot is available if - * - number of modify RPCs in flight is less than the max - * - it's a close RPC and no other close request is in flight - */ - avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight || - (close_req && !cli->cl_close_rpcs_in_flight); - - return avail; -} - -static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli, - bool close_req) -{ - bool avail; - - spin_lock(&cli->cl_mod_rpcs_lock); - avail = obd_mod_rpc_slot_avail_locked(cli, close_req); - spin_unlock(&cli->cl_mod_rpcs_lock); - return avail; -} - -/* Get a modify RPC slot from the obd client @cli according - * to the kind of operation @opc that is going to be sent - * and the intent @it of the operation if it applies. - * If the maximum number of modify RPCs in flight is reached - * the thread is put to sleep. - * Returns the tag to be set in the request message. Tag 0 - * is reserved for non-modifying requests. - */ -u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc, - struct lookup_intent *it) -{ - bool close_req = false; - u16 i, max; - - /* read-only metadata RPCs don't consume a slot on MDT - * for reply reconstruction - */ - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return 0; - - if (opc == MDS_CLOSE) - close_req = true; - - do { - spin_lock(&cli->cl_mod_rpcs_lock); - max = cli->cl_max_mod_rpcs_in_flight; - if (obd_mod_rpc_slot_avail_locked(cli, close_req)) { - /* there is a slot available */ - cli->cl_mod_rpcs_in_flight++; - if (close_req) - cli->cl_close_rpcs_in_flight++; - lprocfs_oh_tally(&cli->cl_mod_rpcs_hist, - cli->cl_mod_rpcs_in_flight); - /* find a free tag */ - i = find_first_zero_bit(cli->cl_mod_tag_bitmap, - max + 1); - LASSERT(i < OBD_MAX_RIF_MAX); - LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap)); - spin_unlock(&cli->cl_mod_rpcs_lock); - /* tag 0 is reserved for non-modify RPCs */ - return i + 1; - } - spin_unlock(&cli->cl_mod_rpcs_lock); - - CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot opc %u, max %hu\n", - cli->cl_import->imp_obd->obd_name, opc, max); - - wait_event_idle(cli->cl_mod_rpcs_waitq, - obd_mod_rpc_slot_avail(cli, close_req)); - } while (true); -} -EXPORT_SYMBOL(obd_get_mod_rpc_slot); - -/* - * Put a modify RPC slot from the obd client @cli according - * to the kind of operation @opc that has been sent and the - * intent @it of the operation if it applies. - */ -void obd_put_mod_rpc_slot(struct client_obd *cli, u32 opc, - struct lookup_intent *it, u16 tag) -{ - bool close_req = false; - - if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP || - it->it_op == IT_LAYOUT || it->it_op == IT_READDIR)) - return; - - if (opc == MDS_CLOSE) - close_req = true; - - spin_lock(&cli->cl_mod_rpcs_lock); - cli->cl_mod_rpcs_in_flight--; - if (close_req) - cli->cl_close_rpcs_in_flight--; - /* release the tag in the bitmap */ - LASSERT(tag - 1 < OBD_MAX_RIF_MAX); - LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0); - spin_unlock(&cli->cl_mod_rpcs_lock); - wake_up(&cli->cl_mod_rpcs_waitq); -} -EXPORT_SYMBOL(obd_put_mod_rpc_slot); diff --git a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c b/drivers/staging/lustre/lustre/obdclass/kernelcomm.c deleted file mode 100644 index 63067a7f1e19..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/kernelcomm.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Author: Nathan Rutman - * - * Kernel <-> userspace communication routines. - * Using pipes for all arches. - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#define D_KUC D_OTHER - -#include -#include -#include -#include - -/** - * libcfs_kkuc_msg_put - send an message from kernel to userspace - * @param fp to send the message to - * @param payload Payload data. First field of payload is always - * struct kuc_hdr - */ -int libcfs_kkuc_msg_put(struct file *filp, void *payload) -{ - struct kuc_hdr *kuch = (struct kuc_hdr *)payload; - ssize_t count = kuch->kuc_msglen; - loff_t offset = 0; - int rc = -ENXIO; - - if (IS_ERR_OR_NULL(filp)) - return -EBADF; - - if (kuch->kuc_magic != KUC_MAGIC) { - CERROR("KernelComm: bad magic %x\n", kuch->kuc_magic); - return rc; - } - - while (count > 0) { - rc = kernel_write(filp, payload, count, &offset); - if (rc < 0) - break; - count -= rc; - payload += rc; - rc = 0; - } - - if (rc < 0) - CWARN("message send failed (%d)\n", rc); - else - CDEBUG(D_KUC, "Sent message rc=%d, fp=%p\n", rc, filp); - - return rc; -} -EXPORT_SYMBOL(libcfs_kkuc_msg_put); - -/* - * Broadcast groups are global across all mounted filesystems; - * i.e. registering for a group on 1 fs will get messages for that - * group from any fs - */ -/** A single group registration has a uid and a file pointer */ -struct kkuc_reg { - struct list_head kr_chain; - int kr_uid; - struct file *kr_fp; - char kr_data[0]; -}; - -static struct list_head kkuc_groups[KUC_GRP_MAX + 1] = {}; -/* Protect message sending against remove and adds */ -static DECLARE_RWSEM(kg_sem); - -/** Add a receiver to a broadcast group - * @param filp pipe to write into - * @param uid identifier for this receiver - * @param group group number - * @param data user data - */ -int libcfs_kkuc_group_add(struct file *filp, int uid, unsigned int group, - void *data, size_t data_len) -{ - struct kkuc_reg *reg; - - if (group > KUC_GRP_MAX) { - CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); - return -EINVAL; - } - - /* fput in group_rem */ - if (!filp) - return -EBADF; - - /* freed in group_rem */ - reg = kmalloc(sizeof(*reg) + data_len, 0); - if (!reg) - return -ENOMEM; - - reg->kr_fp = filp; - reg->kr_uid = uid; - memcpy(reg->kr_data, data, data_len); - - down_write(&kg_sem); - if (!kkuc_groups[group].next) - INIT_LIST_HEAD(&kkuc_groups[group]); - list_add(®->kr_chain, &kkuc_groups[group]); - up_write(&kg_sem); - - CDEBUG(D_KUC, "Added uid=%d fp=%p to group %d\n", uid, filp, group); - - return 0; -} -EXPORT_SYMBOL(libcfs_kkuc_group_add); - -int libcfs_kkuc_group_rem(int uid, unsigned int group) -{ - struct kkuc_reg *reg, *next; - - if (!kkuc_groups[group].next) - return 0; - - if (!uid) { - /* Broadcast a shutdown message */ - struct kuc_hdr lh; - - lh.kuc_magic = KUC_MAGIC; - lh.kuc_transport = KUC_TRANSPORT_GENERIC; - lh.kuc_msgtype = KUC_MSG_SHUTDOWN; - lh.kuc_msglen = sizeof(lh); - libcfs_kkuc_group_put(group, &lh); - } - - down_write(&kg_sem); - list_for_each_entry_safe(reg, next, &kkuc_groups[group], kr_chain) { - if (!uid || (uid == reg->kr_uid)) { - list_del(®->kr_chain); - CDEBUG(D_KUC, "Removed uid=%d fp=%p from group %d\n", - reg->kr_uid, reg->kr_fp, group); - if (reg->kr_fp) - fput(reg->kr_fp); - kfree(reg); - } - } - up_write(&kg_sem); - - return 0; -} -EXPORT_SYMBOL(libcfs_kkuc_group_rem); - -int libcfs_kkuc_group_put(unsigned int group, void *payload) -{ - struct kkuc_reg *reg; - int rc = 0; - int one_success = 0; - - down_write(&kg_sem); - list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { - if (reg->kr_fp) { - rc = libcfs_kkuc_msg_put(reg->kr_fp, payload); - if (!rc) { - one_success = 1; - } else if (rc == -EPIPE) { - fput(reg->kr_fp); - reg->kr_fp = NULL; - } - } - } - up_write(&kg_sem); - - /* - * don't return an error if the message has been delivered - * at least to one agent - */ - if (one_success) - rc = 0; - - return rc; -} -EXPORT_SYMBOL(libcfs_kkuc_group_put); - -/** - * Calls a callback function for each link of the given kuc group. - * @param group the group to call the function on. - * @param cb_func the function to be called. - * @param cb_arg extra argument to be passed to the callback function. - */ -int libcfs_kkuc_group_foreach(unsigned int group, libcfs_kkuc_cb_t cb_func, - void *cb_arg) -{ - struct kkuc_reg *reg; - int rc = 0; - - if (group > KUC_GRP_MAX) { - CDEBUG(D_WARNING, "Kernelcomm: bad group %d\n", group); - return -EINVAL; - } - - /* no link for this group */ - if (!kkuc_groups[group].next) - return 0; - - down_read(&kg_sem); - list_for_each_entry(reg, &kkuc_groups[group], kr_chain) { - if (reg->kr_fp) - rc = cb_func(reg->kr_data, cb_arg); - } - up_read(&kg_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_kkuc_group_foreach); diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c deleted file mode 100644 index 74c99ee216bb..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/linkea.c +++ /dev/null @@ -1,249 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2013, 2014, Intel Corporation. - * Use is subject to license terms. - * - * Author: Di Wang - */ - -#include -#include -#include - -int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf) -{ - buf->lb_buf = kzalloc(PAGE_SIZE, GFP_NOFS); - if (!buf->lb_buf) - return -ENOMEM; - buf->lb_len = PAGE_SIZE; - ldata->ld_buf = buf; - ldata->ld_leh = ldata->ld_buf->lb_buf; - ldata->ld_leh->leh_magic = LINK_EA_MAGIC; - ldata->ld_leh->leh_len = sizeof(struct link_ea_header); - ldata->ld_leh->leh_reccount = 0; - ldata->ld_leh->leh_overflow_time = 0; - ldata->ld_leh->leh_padding = 0; - return 0; -} -EXPORT_SYMBOL(linkea_data_new); - -int linkea_init(struct linkea_data *ldata) -{ - struct link_ea_header *leh; - - LASSERT(ldata->ld_buf); - leh = ldata->ld_buf->lb_buf; - if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) { - leh->leh_magic = LINK_EA_MAGIC; - leh->leh_reccount = __swab32(leh->leh_reccount); - leh->leh_len = __swab64(leh->leh_len); - leh->leh_overflow_time = __swab32(leh->leh_overflow_time); - leh->leh_padding = __swab32(leh->leh_padding); - /* individual entries are swabbed by linkea_entry_unpack() */ - } - - if (leh->leh_magic != LINK_EA_MAGIC) - return -EINVAL; - - if (leh->leh_reccount == 0 && leh->leh_overflow_time == 0) - return -ENODATA; - - ldata->ld_leh = leh; - return 0; -} -EXPORT_SYMBOL(linkea_init); - -int linkea_init_with_rec(struct linkea_data *ldata) -{ - int rc; - - rc = linkea_init(ldata); - if (!rc && ldata->ld_leh->leh_reccount == 0) - rc = -ENODATA; - - return rc; -} -EXPORT_SYMBOL(linkea_init_with_rec); - -/** - * Pack a link_ea_entry. - * All elements are stored as chars to avoid alignment issues. - * Numbers are always big-endian - * \retval record length - */ -int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname, - const struct lu_fid *pfid) -{ - struct lu_fid tmpfid; - int reclen; - - tmpfid = *pfid; - if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH)) - tmpfid.f_ver = ~0; - fid_cpu_to_be(&tmpfid, &tmpfid); - memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid)); - memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen); - reclen = sizeof(struct link_ea_entry) + lname->ln_namelen; - - lee->lee_reclen[0] = (reclen >> 8) & 0xff; - lee->lee_reclen[1] = reclen & 0xff; - return reclen; -} -EXPORT_SYMBOL(linkea_entry_pack); - -void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen, - struct lu_name *lname, struct lu_fid *pfid) -{ - LASSERT(lee); - - *reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1]; - memcpy(pfid, &lee->lee_parent_fid, sizeof(*pfid)); - fid_be_to_cpu(pfid, pfid); - if (lname) { - lname->ln_name = lee->lee_name; - lname->ln_namelen = *reclen - sizeof(struct link_ea_entry); - } -} -EXPORT_SYMBOL(linkea_entry_unpack); - -/** - * Add a record to the end of link ea buf - **/ -int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid) -{ - struct link_ea_header *leh = ldata->ld_leh; - int reclen; - - LASSERT(leh); - - if (!lname || !pfid) - return -EINVAL; - - reclen = lname->ln_namelen + sizeof(struct link_ea_entry); - if (unlikely(leh->leh_len + reclen > MAX_LINKEA_SIZE)) { - /* - * Use 32-bits to save the overflow time, although it will - * shrink the ktime_get_real_seconds() returned 64-bits value - * to 32-bits value, it is still quite large and can be used - * for about 140 years. That is enough. - */ - leh->leh_overflow_time = ktime_get_real_seconds(); - if (unlikely(leh->leh_overflow_time == 0)) - leh->leh_overflow_time++; - - CDEBUG(D_INODE, "No enough space to hold linkea entry '" DFID ": %.*s' at %u\n", - PFID(pfid), lname->ln_namelen, - lname->ln_name, leh->leh_overflow_time); - return 0; - } - - if (leh->leh_len + reclen > ldata->ld_buf->lb_len) { - /* Note: this never happens as MAX_LINKEA_SIZE is 4096, while - * the initial allocation is PAGE_SIZE. - */ - void *b = krealloc(ldata->ld_buf->lb_buf, leh->leh_len + reclen, GFP_NOFS); - if (!b) - return -ENOMEM; - - ldata->ld_buf->lb_len = leh->leh_len + reclen; - leh = ldata->ld_leh = ldata->ld_buf->lb_buf = b; - } - - ldata->ld_lee = ldata->ld_buf->lb_buf + leh->leh_len; - ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid); - leh->leh_len += ldata->ld_reclen; - leh->leh_reccount++; - CDEBUG(D_INODE, "New link_ea name '" DFID ":%.*s' is added\n", - PFID(pfid), lname->ln_namelen, lname->ln_name); - return 0; -} -EXPORT_SYMBOL(linkea_add_buf); - -/** Del the current record from the link ea buf */ -void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname) -{ - LASSERT(ldata->ld_leh && ldata->ld_lee); - LASSERT(ldata->ld_leh->leh_reccount > 0); - - ldata->ld_leh->leh_reccount--; - ldata->ld_leh->leh_len -= ldata->ld_reclen; - memmove(ldata->ld_lee, (char *)ldata->ld_lee + ldata->ld_reclen, - (char *)ldata->ld_leh + ldata->ld_leh->leh_len - - (char *)ldata->ld_lee); - CDEBUG(D_INODE, "Old link_ea name '%.*s' is removed\n", - lname->ln_namelen, lname->ln_name); - - if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh + - ldata->ld_leh->leh_len)) - ldata->ld_lee = NULL; -} -EXPORT_SYMBOL(linkea_del_buf); - -/** - * Check if such a link exists in linkEA. - * - * \param ldata link data the search to be done on - * \param lname name in the parent's directory entry pointing to this object - * \param pfid parent fid the link to be found for - * - * \retval 0 success - * \retval -ENOENT link does not exist - * \retval -ve on error - */ -int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname, - const struct lu_fid *pfid) -{ - struct lu_name tmpname; - struct lu_fid tmpfid; - int count; - - LASSERT(ldata->ld_leh); - - /* link #0, if leh_reccount == 0 we skip the loop and return -ENOENT */ - if (likely(ldata->ld_leh->leh_reccount > 0)) - ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1); - - for (count = 0; count < ldata->ld_leh->leh_reccount; count++) { - linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, - &tmpname, &tmpfid); - if (tmpname.ln_namelen == lname->ln_namelen && - lu_fid_eq(&tmpfid, pfid) && - (strncmp(tmpname.ln_name, lname->ln_name, - tmpname.ln_namelen) == 0)) - break; - ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee + - ldata->ld_reclen); - } - - if (count == ldata->ld_leh->leh_reccount) { - CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n", - lname->ln_namelen, lname->ln_name); - ldata->ld_lee = NULL; - ldata->ld_reclen = 0; - return -ENOENT; - } - return 0; -} -EXPORT_SYMBOL(linkea_links_find); diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c deleted file mode 100644 index 9c800580053b..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c +++ /dev/null @@ -1,514 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/linux/linux-module.c - * - * Object Devices Class Driver - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#define OBD_MAX_IOCTL_BUFFER 8192 - -static int obd_ioctl_is_invalid(struct obd_ioctl_data *data) -{ - if (data->ioc_len > BIT(30)) { - CERROR("OBD ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - - if (data->ioc_inllen1 > BIT(30)) { - CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - - if (data->ioc_inllen2 > BIT(30)) { - CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - - if (data->ioc_inllen3 > BIT(30)) { - CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); - return 1; - } - - if (data->ioc_inllen4 > BIT(30)) { - CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); - return 1; - } - - if (data->ioc_inlbuf1 && data->ioc_inllen1 == 0) { - CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - - if (data->ioc_inlbuf2 && data->ioc_inllen2 == 0) { - CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - - if (data->ioc_inlbuf3 && data->ioc_inllen3 == 0) { - CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n"); - return 1; - } - - if (data->ioc_inlbuf4 && data->ioc_inllen4 == 0) { - CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n"); - return 1; - } - - if (data->ioc_pbuf1 && data->ioc_plen1 == 0) { - CERROR("OBD ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - - if (data->ioc_pbuf2 && data->ioc_plen2 == 0) { - CERROR("OBD ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - - if (!data->ioc_pbuf1 && data->ioc_plen1 != 0) { - CERROR("OBD ioctl: plen1 set but NULL pointer\n"); - return 1; - } - - if (!data->ioc_pbuf2 && data->ioc_plen2 != 0) { - CERROR("OBD ioctl: plen2 set but NULL pointer\n"); - return 1; - } - - if (obd_ioctl_packlen(data) > data->ioc_len) { - CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n", - obd_ioctl_packlen(data), data->ioc_len); - return 1; - } - - return 0; -} - -/* buffer MUST be at least the size of obd_ioctl_hdr */ -int obd_ioctl_getdata(char **buf, int *len, void __user *arg) -{ - struct obd_ioctl_hdr hdr; - struct obd_ioctl_data *data; - int err; - int offset = 0; - - if (copy_from_user(&hdr, arg, sizeof(hdr))) - return -EFAULT; - - if (hdr.ioc_version != OBD_IOCTL_VERSION) { - CERROR("Version mismatch kernel (%x) vs application (%x)\n", - OBD_IOCTL_VERSION, hdr.ioc_version); - return -EINVAL; - } - - if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { - CERROR("User buffer len %d exceeds %d max buffer\n", - hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); - return -EINVAL; - } - - if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { - CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len); - return -EINVAL; - } - - /* When there are lots of processes calling vmalloc on multi-core - * system, the high lock contention will hurt performance badly, - * obdfilter-survey is an example, which relies on ioctl. So we'd - * better avoid vmalloc on ioctl path. LU-66 - */ - *buf = kvzalloc(hdr.ioc_len, GFP_KERNEL); - if (!*buf) { - CERROR("Cannot allocate control buffer of len %d\n", - hdr.ioc_len); - return -EINVAL; - } - *len = hdr.ioc_len; - data = (struct obd_ioctl_data *)*buf; - - if (copy_from_user(*buf, arg, hdr.ioc_len)) { - err = -EFAULT; - goto free_buf; - } - if (hdr.ioc_len != data->ioc_len) { - err = -EINVAL; - goto free_buf; - } - - if (obd_ioctl_is_invalid(data)) { - CERROR("ioctl not correctly formatted\n"); - err = -EINVAL; - goto free_buf; - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - offset += cfs_size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; - offset += cfs_size_round(data->ioc_inllen2); - } - - if (data->ioc_inllen3) { - data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; - offset += cfs_size_round(data->ioc_inllen3); - } - - if (data->ioc_inllen4) - data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; - - return 0; - -free_buf: - kvfree(*buf); - return err; -} -EXPORT_SYMBOL(obd_ioctl_getdata); - -/* opening /dev/obd */ -static int obd_class_open(struct inode *inode, struct file *file) -{ - try_module_get(THIS_MODULE); - return 0; -} - -/* closing /dev/obd */ -static int obd_class_release(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - return 0; -} - -/* to control /dev/obd */ -static long obd_class_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - int err = 0; - - /* Allow non-root access for OBD_IOC_PING_TARGET - used by lfs check */ - if (!capable(CAP_SYS_ADMIN) && (cmd != OBD_IOC_PING_TARGET)) - return err = -EACCES; - if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ - return err = -ENOTTY; - - err = class_handle_ioctl(cmd, (unsigned long)arg); - - return err; -} - -/* declare character device */ -static const struct file_operations obd_psdev_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */ - .open = obd_class_open, /* open */ - .release = obd_class_release, /* release */ -}; - -/* modules setup */ -struct miscdevice obd_psdev = { - .minor = MISC_DYNAMIC_MINOR, - .name = OBD_DEV_NAME, - .fops = &obd_psdev_fops, -}; - -static ssize_t version_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%s\n", LUSTRE_VERSION_STRING); -} - -static ssize_t pinger_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%s\n", "on"); -} - -static ssize_t -health_check_show(struct kobject *kobj, struct attribute *attr, char *buf) -{ - bool healthy = true; - int i; - size_t len = 0; - - if (libcfs_catastrophe) - return sprintf(buf, "LBUG\n"); - - read_lock(&obd_dev_lock); - for (i = 0; i < class_devno_max(); i++) { - struct obd_device *obd; - - obd = class_num2obd(i); - if (!obd || !obd->obd_attached || !obd->obd_set_up) - continue; - - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - if (obd->obd_stopping) - continue; - - class_incref(obd, __func__, current); - read_unlock(&obd_dev_lock); - - if (obd_health_check(NULL, obd)) - healthy = false; - class_decref(obd, __func__, current); - read_lock(&obd_dev_lock); - } - read_unlock(&obd_dev_lock); - - if (healthy) - len = sprintf(buf, "healthy\n"); - else - len = sprintf(buf, "NOT HEALTHY\n"); - - return len; -} - -static ssize_t jobid_var_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_var); -} - -static ssize_t jobid_var_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, - size_t count) -{ - if (!count || count > JOBSTATS_JOBID_VAR_MAX_LEN) - return -EINVAL; - - memset(obd_jobid_var, 0, JOBSTATS_JOBID_VAR_MAX_LEN + 1); - - memcpy(obd_jobid_var, buffer, count); - - /* Trim the trailing '\n' if any */ - if (obd_jobid_var[count - 1] == '\n') - obd_jobid_var[count - 1] = 0; - - return count; -} - -static ssize_t jobid_name_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%s\n", obd_jobid_node); -} - -static ssize_t jobid_name_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, - size_t count) -{ - if (!count || count > LUSTRE_JOBID_SIZE) - return -EINVAL; - - memcpy(obd_jobid_node, buffer, count); - - obd_jobid_node[count] = 0; - - /* Trim the trailing '\n' if any */ - if (obd_jobid_node[count - 1] == '\n') - obd_jobid_node[count - 1] = 0; - - return count; -} - -/* Root for /sys/kernel/debug/lustre */ -struct dentry *debugfs_lustre_root; -EXPORT_SYMBOL_GPL(debugfs_lustre_root); - -LUSTRE_RO_ATTR(version); -LUSTRE_RO_ATTR(pinger); -LUSTRE_RO_ATTR(health_check); -LUSTRE_RW_ATTR(jobid_var); -LUSTRE_RW_ATTR(jobid_name); - -static struct attribute *lustre_attrs[] = { - &lustre_attr_version.attr, - &lustre_attr_pinger.attr, - &lustre_attr_health_check.attr, - &lustre_attr_jobid_name.attr, - &lustre_attr_jobid_var.attr, - NULL, -}; - -static void *obd_device_list_seq_start(struct seq_file *p, loff_t *pos) -{ - if (*pos >= class_devno_max()) - return NULL; - - return pos; -} - -static void obd_device_list_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - ++*pos; - if (*pos >= class_devno_max()) - return NULL; - - return pos; -} - -static int obd_device_list_seq_show(struct seq_file *p, void *v) -{ - loff_t index = *(loff_t *)v; - struct obd_device *obd = class_num2obd((int)index); - char *status; - - if (!obd) - return 0; - - LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC); - if (obd->obd_stopping) - status = "ST"; - else if (obd->obd_inactive) - status = "IN"; - else if (obd->obd_set_up) - status = "UP"; - else if (obd->obd_attached) - status = "AT"; - else - status = "--"; - - seq_printf(p, "%3d %s %s %s %s %d\n", - (int)index, status, obd->obd_type->typ_name, - obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); - return 0; -} - -static const struct seq_operations obd_device_list_sops = { - .start = obd_device_list_seq_start, - .stop = obd_device_list_seq_stop, - .next = obd_device_list_seq_next, - .show = obd_device_list_seq_show, -}; - -static int obd_device_list_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int rc = seq_open(file, &obd_device_list_sops); - - if (rc) - return rc; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; -} - -static const struct file_operations obd_device_list_fops = { - .owner = THIS_MODULE, - .open = obd_device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -struct kobject *lustre_kobj; -EXPORT_SYMBOL_GPL(lustre_kobj); - -static const struct attribute_group lustre_attr_group = { - .attrs = lustre_attrs, -}; - -int class_procfs_init(void) -{ - int rc = -ENOMEM; - - lustre_kobj = kobject_create_and_add("lustre", fs_kobj); - if (!lustre_kobj) - goto out; - - /* Create the files associated with this kobject */ - rc = sysfs_create_group(lustre_kobj, &lustre_attr_group); - if (rc) { - kobject_put(lustre_kobj); - goto out; - } - - debugfs_lustre_root = debugfs_create_dir("lustre", NULL); - - debugfs_create_file("devices", 0444, debugfs_lustre_root, NULL, - &obd_device_list_fops); -out: - return rc; -} - -int class_procfs_clean(void) -{ - debugfs_remove_recursive(debugfs_lustre_root); - - debugfs_lustre_root = NULL; - - sysfs_remove_group(lustre_kobj, &lustre_attr_group); - kobject_put(lustre_kobj); - - return 0; -} diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c deleted file mode 100644 index e5e8687784ee..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c +++ /dev/null @@ -1,162 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include - -struct static_lustre_uintvalue_attr { - struct { - struct attribute attr; - ssize_t (*show)(struct kobject *kobj, struct attribute *attr, - char *buf); - ssize_t (*store)(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len); - } u; - int *value; -}; - -static ssize_t static_uintvalue_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct static_lustre_uintvalue_attr *lattr = (void *)attr; - - return sprintf(buf, "%d\n", *lattr->value); -} - -static ssize_t static_uintvalue_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct static_lustre_uintvalue_attr *lattr = (void *)attr; - int rc; - unsigned int val; - - rc = kstrtouint(buffer, 10, &val); - if (rc) - return rc; - - *lattr->value = val; - - return count; -} - -#define LUSTRE_STATIC_UINT_ATTR(name, value) \ -static struct static_lustre_uintvalue_attr lustre_sattr_##name = \ - {__ATTR(name, 0644, \ - static_uintvalue_show, \ - static_uintvalue_store),\ - value } - -LUSTRE_STATIC_UINT_ATTR(timeout, &obd_timeout); - -static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - return sprintf(buf, "%lu\n", - obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT))); -} - -static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - val *= 1 << (20 - PAGE_SHIFT); /* convert to pages */ - - if (val > ((totalram_pages / 10) * 9)) { - /* Somebody wants to assign too much memory to dirty pages */ - return -EINVAL; - } - - if (val < 4 << (20 - PAGE_SHIFT)) { - /* Less than 4 Mb for dirty cache is also bad */ - return -EINVAL; - } - - obd_max_dirty_pages = val; - - return count; -} -LUSTRE_RW_ATTR(max_dirty_mb); - -LUSTRE_STATIC_UINT_ATTR(debug_peer_on_timeout, &obd_debug_peer_on_timeout); -LUSTRE_STATIC_UINT_ATTR(dump_on_timeout, &obd_dump_on_timeout); -LUSTRE_STATIC_UINT_ATTR(dump_on_eviction, &obd_dump_on_eviction); -LUSTRE_STATIC_UINT_ATTR(at_min, &at_min); -LUSTRE_STATIC_UINT_ATTR(at_max, &at_max); -LUSTRE_STATIC_UINT_ATTR(at_extra, &at_extra); -LUSTRE_STATIC_UINT_ATTR(at_early_margin, &at_early_margin); -LUSTRE_STATIC_UINT_ATTR(at_history, &at_history); - -static struct attribute *lustre_attrs[] = { - &lustre_sattr_timeout.u.attr, - &lustre_attr_max_dirty_mb.attr, - &lustre_sattr_debug_peer_on_timeout.u.attr, - &lustre_sattr_dump_on_timeout.u.attr, - &lustre_sattr_dump_on_eviction.u.attr, - &lustre_sattr_at_min.u.attr, - &lustre_sattr_at_max.u.attr, - &lustre_sattr_at_extra.u.attr, - &lustre_sattr_at_early_margin.u.attr, - &lustre_sattr_at_history.u.attr, - NULL, -}; - -static const struct attribute_group lustre_attr_group = { - .attrs = lustre_attrs, -}; - -int obd_sysctl_init(void) -{ - return sysfs_create_group(lustre_kobj, &lustre_attr_group); -} diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c deleted file mode 100644 index bba84eae1e19..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/llog.c +++ /dev/null @@ -1,524 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/llog.c - * - * OST<->MDS recovery logging infrastructure. - * Invariants in implementation: - * - we do not share logs among different OST<->MDS connections, so that - * if an OST or MDS fails it need only look at log(s) relevant to itself - * - * Author: Andreas Dilger - * Author: Alex Zhuravlev - * Author: Mikhail Pershin - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include -#include -#include -#include -#include "llog_internal.h" - -/* - * Allocate a new log or catalog handle - * Used inside llog_open(). - */ -static struct llog_handle *llog_alloc_handle(void) -{ - struct llog_handle *loghandle; - - loghandle = kzalloc(sizeof(*loghandle), GFP_NOFS); - if (!loghandle) - return NULL; - - init_rwsem(&loghandle->lgh_lock); - spin_lock_init(&loghandle->lgh_hdr_lock); - INIT_LIST_HEAD(&loghandle->u.phd.phd_entry); - atomic_set(&loghandle->lgh_refcount, 1); - - return loghandle; -} - -/* - * Free llog handle and header data if exists. Used in llog_close() only - */ -static void llog_free_handle(struct llog_handle *loghandle) -{ - /* failed llog_init_handle */ - if (!loghandle->lgh_hdr) - goto out; - - if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) - LASSERT(list_empty(&loghandle->u.phd.phd_entry)); - else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) - LASSERT(list_empty(&loghandle->u.chd.chd_head)); - kvfree(loghandle->lgh_hdr); -out: - kfree(loghandle); -} - -void llog_handle_get(struct llog_handle *loghandle) -{ - atomic_inc(&loghandle->lgh_refcount); -} - -void llog_handle_put(struct llog_handle *loghandle) -{ - LASSERT(atomic_read(&loghandle->lgh_refcount) > 0); - if (atomic_dec_and_test(&loghandle->lgh_refcount)) - llog_free_handle(loghandle); -} - -static int llog_read_header(const struct lu_env *env, - struct llog_handle *handle, - struct obd_uuid *uuid) -{ - struct llog_operations *lop; - int rc; - - rc = llog_handle2ops(handle, &lop); - if (rc) - return rc; - - if (!lop->lop_read_header) - return -EOPNOTSUPP; - - rc = lop->lop_read_header(env, handle); - if (rc == LLOG_EEMPTY) { - struct llog_log_hdr *llh = handle->lgh_hdr; - size_t len; - - /* lrh_len should be initialized in llog_init_handle */ - handle->lgh_last_idx = 0; /* header is record with index 0 */ - llh->llh_count = 1; /* for the header record */ - llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; - LASSERT(handle->lgh_ctxt->loc_chunk_size >= LLOG_MIN_CHUNK_SIZE); - llh->llh_hdr.lrh_len = handle->lgh_ctxt->loc_chunk_size; - llh->llh_hdr.lrh_index = 0; - llh->llh_timestamp = ktime_get_real_seconds(); - if (uuid) - memcpy(&llh->llh_tgtuuid, uuid, - sizeof(llh->llh_tgtuuid)); - llh->llh_bitmap_offset = offsetof(typeof(*llh), llh_bitmap); - /* - * Since update llog header might also call this function, - * let's reset the bitmap to 0 here - */ - len = llh->llh_hdr.lrh_len - llh->llh_bitmap_offset; - memset(LLOG_HDR_BITMAP(llh), 0, len - sizeof(llh->llh_tail)); - ext2_set_bit(0, LLOG_HDR_BITMAP(llh)); - LLOG_HDR_TAIL(llh)->lrt_len = llh->llh_hdr.lrh_len; - LLOG_HDR_TAIL(llh)->lrt_index = llh->llh_hdr.lrh_index; - rc = 0; - } - return rc; -} - -int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, - int flags, struct obd_uuid *uuid) -{ - int chunk_size = handle->lgh_ctxt->loc_chunk_size; - enum llog_flag fmt = flags & LLOG_F_EXT_MASK; - struct llog_log_hdr *llh; - int rc; - - LASSERT(!handle->lgh_hdr); - - LASSERT(chunk_size >= LLOG_MIN_CHUNK_SIZE); - llh = kvzalloc(sizeof(*llh), GFP_KERNEL); - if (!llh) - return -ENOMEM; - handle->lgh_hdr = llh; - handle->lgh_hdr_size = chunk_size; - /* first assign flags to use llog_client_ops */ - llh->llh_flags = flags; - rc = llog_read_header(env, handle, uuid); - if (rc == 0) { - if (unlikely((llh->llh_flags & LLOG_F_IS_PLAIN && - flags & LLOG_F_IS_CAT) || - (llh->llh_flags & LLOG_F_IS_CAT && - flags & LLOG_F_IS_PLAIN))) { - CERROR("%s: llog type is %s but initializing %s\n", - handle->lgh_ctxt->loc_obd->obd_name, - llh->llh_flags & LLOG_F_IS_CAT ? - "catalog" : "plain", - flags & LLOG_F_IS_CAT ? "catalog" : "plain"); - rc = -EINVAL; - goto out; - } else if (llh->llh_flags & - (LLOG_F_IS_PLAIN | LLOG_F_IS_CAT)) { - /* - * it is possible to open llog without specifying llog - * type so it is taken from llh_flags - */ - flags = llh->llh_flags; - } else { - /* for some reason the llh_flags has no type set */ - CERROR("llog type is not specified!\n"); - rc = -EINVAL; - goto out; - } - if (unlikely(uuid && - !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) { - CERROR("%s: llog uuid mismatch: %s/%s\n", - handle->lgh_ctxt->loc_obd->obd_name, - (char *)uuid->uuid, - (char *)llh->llh_tgtuuid.uuid); - rc = -EEXIST; - goto out; - } - } - if (flags & LLOG_F_IS_CAT) { - LASSERT(list_empty(&handle->u.chd.chd_head)); - INIT_LIST_HEAD(&handle->u.chd.chd_head); - llh->llh_size = sizeof(struct llog_logid_rec); - llh->llh_flags |= LLOG_F_IS_FIXSIZE; - } else if (!(flags & LLOG_F_IS_PLAIN)) { - CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n", - handle->lgh_ctxt->loc_obd->obd_name, - flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN); - rc = -EINVAL; - } - llh->llh_flags |= fmt; -out: - if (rc) { - kvfree(llh); - handle->lgh_hdr = NULL; - } - return rc; -} -EXPORT_SYMBOL(llog_init_handle); - -static int llog_process_thread(void *arg) -{ - struct llog_process_info *lpi = arg; - struct llog_handle *loghandle = lpi->lpi_loghandle; - struct llog_log_hdr *llh = loghandle->lgh_hdr; - struct llog_process_cat_data *cd = lpi->lpi_catdata; - char *buf; - u64 cur_offset, tmp_offset; - int chunk_size; - int rc = 0, index = 1, last_index; - int saved_index = 0; - int last_called_index = 0; - - if (!llh) - return -EINVAL; - - cur_offset = llh->llh_hdr.lrh_len; - chunk_size = llh->llh_hdr.lrh_len; - /* expect chunk_size to be power of two */ - LASSERT(is_power_of_2(chunk_size)); - - buf = kvzalloc(chunk_size, GFP_NOFS); - if (!buf) { - lpi->lpi_rc = -ENOMEM; - return 0; - } - - if (cd) { - last_called_index = cd->lpcd_first_idx; - index = cd->lpcd_first_idx + 1; - } - if (cd && cd->lpcd_last_idx) - last_index = cd->lpcd_last_idx; - else - last_index = LLOG_HDR_BITMAP_SIZE(llh) - 1; - - while (rc == 0) { - unsigned int buf_offset = 0; - struct llog_rec_hdr *rec; - bool partial_chunk; - off_t chunk_offset; - - /* skip records not set in bitmap */ - while (index <= last_index && - !ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) - ++index; - - if (index > last_index) - break; - - CDEBUG(D_OTHER, "index: %d last_index %d\n", - index, last_index); -repeat: - /* get the buf with our target record; avoid old garbage */ - memset(buf, 0, chunk_size); - rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index, - index, &cur_offset, buf, chunk_size); - if (rc) - goto out; - - /* - * NB: after llog_next_block() call the cur_offset is the - * offset of the next block after read one. - * The absolute offset of the current chunk is calculated - * from cur_offset value and stored in chunk_offset variable. - */ - tmp_offset = cur_offset; - if (do_div(tmp_offset, chunk_size)) { - partial_chunk = true; - chunk_offset = cur_offset & ~(chunk_size - 1); - } else { - partial_chunk = false; - chunk_offset = cur_offset - chunk_size; - } - - /* NB: when rec->lrh_len is accessed it is already swabbed - * since it is used at the "end" of the loop and the rec - * swabbing is done at the beginning of the loop. - */ - for (rec = (struct llog_rec_hdr *)(buf + buf_offset); - (char *)rec < buf + chunk_size; - rec = llog_rec_hdr_next(rec)) { - CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n", - rec, rec->lrh_type); - - if (LLOG_REC_HDR_NEEDS_SWABBING(rec)) - lustre_swab_llog_rec(rec); - - CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n", - rec->lrh_type, rec->lrh_index); - - /* - * for partial chunk the end of it is zeroed, check - * for index 0 to distinguish it. - */ - if (partial_chunk && !rec->lrh_index) { - /* concurrent llog_add() might add new records - * while llog_processing, check this is not - * the case and re-read the current chunk - * otherwise. - */ - if (index > loghandle->lgh_last_idx) { - rc = 0; - goto out; - } - CDEBUG(D_OTHER, "Re-read last llog buffer for new records, index %u, last %u\n", - index, loghandle->lgh_last_idx); - /* save offset inside buffer for the re-read */ - buf_offset = (char *)rec - (char *)buf; - cur_offset = chunk_offset; - goto repeat; - } - - if (!rec->lrh_len || rec->lrh_len > chunk_size) { - CWARN("invalid length %d in llog record for index %d/%d\n", - rec->lrh_len, - rec->lrh_index, index); - rc = -EINVAL; - goto out; - } - - if (rec->lrh_index < index) { - CDEBUG(D_OTHER, "skipping lrh_index %d\n", - rec->lrh_index); - continue; - } - - if (rec->lrh_index != index) { - CERROR("%s: Invalid record: index %u but expected %u\n", - loghandle->lgh_ctxt->loc_obd->obd_name, - rec->lrh_index, index); - rc = -ERANGE; - goto out; - } - - CDEBUG(D_OTHER, - "lrh_index: %d lrh_len: %d (%d remains)\n", - rec->lrh_index, rec->lrh_len, - (int)(buf + chunk_size - (char *)rec)); - - loghandle->lgh_cur_idx = rec->lrh_index; - loghandle->lgh_cur_offset = (char *)rec - (char *)buf + - chunk_offset; - - /* if set, process the callback on this record */ - if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) { - rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec, - lpi->lpi_cbdata); - last_called_index = index; - if (rc) - goto out; - } - - /* exit if the last index is reached */ - if (index >= last_index) { - rc = 0; - goto out; - } - index++; - } - } - -out: - if (cd) - cd->lpcd_last_idx = last_called_index; - - kvfree(buf); - lpi->lpi_rc = rc; - return 0; -} - -static int llog_process_thread_daemonize(void *arg) -{ - struct llog_process_info *lpi = arg; - struct lu_env env; - int rc; - - unshare_fs_struct(); - - /* client env has no keys, tags is just 0 */ - rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD); - if (rc) - goto out; - lpi->lpi_env = &env; - - rc = llog_process_thread(arg); - - lu_env_fini(&env); -out: - complete(&lpi->lpi_completion); - return rc; -} - -int llog_process_or_fork(const struct lu_env *env, - struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata, bool fork) -{ - struct llog_process_info *lpi; - int rc; - - lpi = kzalloc(sizeof(*lpi), GFP_NOFS); - if (!lpi) - return -ENOMEM; - lpi->lpi_loghandle = loghandle; - lpi->lpi_cb = cb; - lpi->lpi_cbdata = data; - lpi->lpi_catdata = catdata; - - if (fork) { - struct task_struct *task; - - /* The new thread can't use parent env, - * init the new one in llog_process_thread_daemonize. - */ - lpi->lpi_env = NULL; - init_completion(&lpi->lpi_completion); - task = kthread_run(llog_process_thread_daemonize, lpi, - "llog_process_thread"); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("%s: cannot start thread: rc = %d\n", - loghandle->lgh_ctxt->loc_obd->obd_name, rc); - goto out_lpi; - } - wait_for_completion(&lpi->lpi_completion); - } else { - lpi->lpi_env = env; - llog_process_thread(lpi); - } - rc = lpi->lpi_rc; -out_lpi: - kfree(lpi); - return rc; -} -EXPORT_SYMBOL(llog_process_or_fork); - -int llog_process(const struct lu_env *env, struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata) -{ - return llog_process_or_fork(env, loghandle, cb, data, catdata, true); -} -EXPORT_SYMBOL(llog_process); - -int llog_open(const struct lu_env *env, struct llog_ctxt *ctxt, - struct llog_handle **lgh, struct llog_logid *logid, - char *name, enum llog_open_param open_param) -{ - const struct cred *old_cred = NULL; - int rc; - - LASSERT(ctxt); - LASSERT(ctxt->loc_logops); - - if (!ctxt->loc_logops->lop_open) { - *lgh = NULL; - return -EOPNOTSUPP; - } - - *lgh = llog_alloc_handle(); - if (!*lgh) - return -ENOMEM; - (*lgh)->lgh_ctxt = ctxt; - (*lgh)->lgh_logops = ctxt->loc_logops; - - if (cap_raised(current_cap(), CAP_SYS_RESOURCE)) { - struct cred *cred = prepare_creds(); - - if (cred) { - cap_raise(cred->cap_effective, CAP_SYS_RESOURCE); - old_cred = override_creds(cred); - } - } - rc = ctxt->loc_logops->lop_open(env, *lgh, logid, name, open_param); - if (old_cred) - revert_creds(old_cred); - - if (rc) { - llog_free_handle(*lgh); - *lgh = NULL; - } - return rc; -} -EXPORT_SYMBOL(llog_open); - -int llog_close(const struct lu_env *env, struct llog_handle *loghandle) -{ - struct llog_operations *lop; - int rc; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - goto out; - if (!lop->lop_close) { - rc = -EOPNOTSUPP; - goto out; - } - rc = lop->lop_close(env, loghandle); -out: - llog_handle_put(loghandle); - return rc; -} -EXPORT_SYMBOL(llog_close); diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c deleted file mode 100644 index d9c63adff206..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c +++ /dev/null @@ -1,236 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/llog_cat.c - * - * OST<->MDS recovery logging infrastructure. - * - * Invariants in implementation: - * - we do not share logs among different OST<->MDS connections, so that - * if an OST or MDS fails it need only look at log(s) relevant to itself - * - * Author: Andreas Dilger - * Author: Alexey Zhuravlev - * Author: Mikhail Pershin - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include - -#include "llog_internal.h" - -/* Open an existent log handle and add it to the open list. - * This log handle will be closed when all of the records in it are removed. - * - * Assumes caller has already pushed us into the kernel context and is locking. - * We return a lock on the handle to ensure nobody yanks it from us. - * - * This takes extra reference on llog_handle via llog_handle_get() and require - * this reference to be put by caller using llog_handle_put() - */ -static int llog_cat_id2handle(const struct lu_env *env, - struct llog_handle *cathandle, - struct llog_handle **res, - struct llog_logid *logid) -{ - struct llog_handle *loghandle; - enum llog_flag fmt; - int rc = 0; - - if (!cathandle) - return -EBADF; - - fmt = cathandle->lgh_hdr->llh_flags & LLOG_F_EXT_MASK; - down_write(&cathandle->lgh_lock); - list_for_each_entry(loghandle, &cathandle->u.chd.chd_head, - u.phd.phd_entry) { - struct llog_logid *cgl = &loghandle->lgh_id; - - if (ostid_id(&cgl->lgl_oi) == ostid_id(&logid->lgl_oi) && - ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) { - if (cgl->lgl_ogen != logid->lgl_ogen) { - CERROR("%s: log " DOSTID " generation %x != %x\n", - loghandle->lgh_ctxt->loc_obd->obd_name, - POSTID(&logid->lgl_oi), cgl->lgl_ogen, - logid->lgl_ogen); - continue; - } - loghandle->u.phd.phd_cat_handle = cathandle; - up_write(&cathandle->lgh_lock); - rc = 0; - goto out; - } - } - up_write(&cathandle->lgh_lock); - - rc = llog_open(env, cathandle->lgh_ctxt, &loghandle, logid, NULL, - LLOG_OPEN_EXISTS); - if (rc < 0) { - CERROR("%s: error opening log id " DOSTID ":%x: rc = %d\n", - cathandle->lgh_ctxt->loc_obd->obd_name, - POSTID(&logid->lgl_oi), logid->lgl_ogen, rc); - return rc; - } - - rc = llog_init_handle(env, loghandle, fmt | LLOG_F_IS_PLAIN, NULL); - if (rc < 0) { - llog_close(env, loghandle); - loghandle = NULL; - return rc; - } - - down_write(&cathandle->lgh_lock); - list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head); - up_write(&cathandle->lgh_lock); - - loghandle->u.phd.phd_cat_handle = cathandle; - loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id; - loghandle->u.phd.phd_cookie.lgc_index = - loghandle->lgh_hdr->llh_cat_idx; -out: - llog_handle_get(loghandle); - *res = loghandle; - return 0; -} - -int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle) -{ - struct llog_handle *loghandle, *n; - - list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head, - u.phd.phd_entry) { - /* unlink open-not-created llogs */ - list_del_init(&loghandle->u.phd.phd_entry); - llog_close(env, loghandle); - } - /* if handle was stored in ctxt, remove it too */ - if (cathandle->lgh_ctxt->loc_handle == cathandle) - cathandle->lgh_ctxt->loc_handle = NULL; - return llog_close(env, cathandle); -} -EXPORT_SYMBOL(llog_cat_close); - -static int llog_cat_process_cb(const struct lu_env *env, - struct llog_handle *cat_llh, - struct llog_rec_hdr *rec, void *data) -{ - struct llog_process_data *d = data; - struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; - struct llog_handle *llh; - int rc; - - if (rec->lrh_type != LLOG_LOGID_MAGIC) { - CERROR("invalid record in catalog\n"); - return -EINVAL; - } - CDEBUG(D_HA, "processing log " DOSTID ":%x at index %u of catalog " - DOSTID "\n", POSTID(&lir->lid_id.lgl_oi), lir->lid_id.lgl_ogen, - rec->lrh_index, POSTID(&cat_llh->lgh_id.lgl_oi)); - - rc = llog_cat_id2handle(env, cat_llh, &llh, &lir->lid_id); - if (rc) { - CERROR("%s: cannot find handle for llog " DOSTID ": %d\n", - cat_llh->lgh_ctxt->loc_obd->obd_name, - POSTID(&lir->lid_id.lgl_oi), rc); - return rc; - } - - if (rec->lrh_index < d->lpd_startcat) - /* Skip processing of the logs until startcat */ - rc = 0; - else if (d->lpd_startidx > 0) { - struct llog_process_cat_data cd; - - cd.lpcd_first_idx = d->lpd_startidx; - cd.lpcd_last_idx = 0; - rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data, - &cd, false); - /* Continue processing the next log from idx 0 */ - d->lpd_startidx = 0; - } else { - rc = llog_process_or_fork(env, llh, d->lpd_cb, d->lpd_data, - NULL, false); - } - - llog_handle_put(llh); - - return rc; -} - -static int llog_cat_process_or_fork(const struct lu_env *env, - struct llog_handle *cat_llh, - llog_cb_t cb, void *data, int startcat, - int startidx, bool fork) -{ - struct llog_process_data d; - struct llog_log_hdr *llh = cat_llh->lgh_hdr; - int rc; - - LASSERT(llh->llh_flags & LLOG_F_IS_CAT); - d.lpd_data = data; - d.lpd_cb = cb; - d.lpd_startcat = startcat; - d.lpd_startidx = startidx; - - if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { - struct llog_process_cat_data cd; - - CWARN("catlog " DOSTID " crosses index zero\n", - POSTID(&cat_llh->lgh_id.lgl_oi)); - - cd.lpcd_first_idx = llh->llh_cat_idx; - cd.lpcd_last_idx = 0; - rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb, - &d, &cd, fork); - if (rc != 0) - return rc; - - cd.lpcd_first_idx = 0; - cd.lpcd_last_idx = cat_llh->lgh_last_idx; - rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb, - &d, &cd, fork); - } else { - rc = llog_process_or_fork(env, cat_llh, llog_cat_process_cb, - &d, NULL, fork); - } - - return rc; -} - -int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh, - llog_cb_t cb, void *data, int startcat, int startidx) -{ - return llog_cat_process_or_fork(env, cat_llh, cb, data, startcat, - startidx, false); -} -EXPORT_SYMBOL(llog_cat_process); diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h deleted file mode 100644 index 4991d4e589dc..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h +++ /dev/null @@ -1,79 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef __LLOG_INTERNAL_H__ -#define __LLOG_INTERNAL_H__ - -#include - -struct llog_process_info { - struct llog_handle *lpi_loghandle; - llog_cb_t lpi_cb; - void *lpi_cbdata; - void *lpi_catdata; - int lpi_rc; - struct completion lpi_completion; - const struct lu_env *lpi_env; - -}; - -struct llog_thread_info { - struct lu_attr lgi_attr; - struct lu_fid lgi_fid; - struct lu_buf lgi_buf; - loff_t lgi_off; - struct llog_rec_hdr lgi_lrh; - struct llog_rec_tail lgi_tail; -}; - -extern struct lu_context_key llog_thread_key; - -int llog_info_init(void); -void llog_info_fini(void); - -void llog_handle_get(struct llog_handle *loghandle); -void llog_handle_put(struct llog_handle *loghandle); -int class_config_dump_handler(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, void *data); -int llog_process_or_fork(const struct lu_env *env, - struct llog_handle *loghandle, - llog_cb_t cb, void *data, void *catdata, bool fork); -int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle, - struct llog_handle *loghandle, int index); - -static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec) -{ - return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len); -} -#endif diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c deleted file mode 100644 index 26aea114a29b..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include -#include -#include "llog_internal.h" - -/* helper functions for calling the llog obd methods */ -static struct llog_ctxt *llog_new_ctxt(struct obd_device *obd) -{ - struct llog_ctxt *ctxt; - - ctxt = kzalloc(sizeof(*ctxt), GFP_NOFS); - if (!ctxt) - return NULL; - - ctxt->loc_obd = obd; - atomic_set(&ctxt->loc_refcount, 1); - - return ctxt; -} - -static void llog_ctxt_destroy(struct llog_ctxt *ctxt) -{ - if (ctxt->loc_exp) { - class_export_put(ctxt->loc_exp); - ctxt->loc_exp = NULL; - } - if (ctxt->loc_imp) { - class_import_put(ctxt->loc_imp); - ctxt->loc_imp = NULL; - } - kfree(ctxt); -} - -int __llog_ctxt_put(const struct lu_env *env, struct llog_ctxt *ctxt) -{ - struct obd_llog_group *olg = ctxt->loc_olg; - struct obd_device *obd; - int rc = 0; - - spin_lock(&olg->olg_lock); - if (!atomic_dec_and_test(&ctxt->loc_refcount)) { - spin_unlock(&olg->olg_lock); - return rc; - } - olg->olg_ctxts[ctxt->loc_idx] = NULL; - spin_unlock(&olg->olg_lock); - - obd = ctxt->loc_obd; - spin_lock(&obd->obd_dev_lock); - /* sync with llog ctxt user thread */ - spin_unlock(&obd->obd_dev_lock); - - /* obd->obd_starting is needed for the case of cleanup - * in error case while obd is starting up. - */ - LASSERTF(obd->obd_starting == 1 || - obd->obd_stopping == 1 || obd->obd_set_up == 0, - "wrong obd state: %d/%d/%d\n", !!obd->obd_starting, - !!obd->obd_stopping, !!obd->obd_set_up); - - /* cleanup the llog ctxt here */ - if (CTXTP(ctxt, cleanup)) - rc = CTXTP(ctxt, cleanup)(env, ctxt); - - llog_ctxt_destroy(ctxt); - wake_up(&olg->olg_waitq); - return rc; -} -EXPORT_SYMBOL(__llog_ctxt_put); - -int llog_cleanup(const struct lu_env *env, struct llog_ctxt *ctxt) -{ - struct obd_llog_group *olg; - int rc, idx; - - olg = ctxt->loc_olg; - LASSERT(olg); - LASSERT(olg != LP_POISON); - - idx = ctxt->loc_idx; - - /* - * Banlance the ctxt get when calling llog_cleanup() - */ - LASSERT(atomic_read(&ctxt->loc_refcount) < LI_POISON); - LASSERT(atomic_read(&ctxt->loc_refcount) > 1); - llog_ctxt_put(ctxt); - - /* - * Try to free the ctxt. - */ - rc = __llog_ctxt_put(env, ctxt); - if (rc) - CERROR("Error %d while cleaning up ctxt %p\n", - rc, ctxt); - - l_wait_event_abortable(olg->olg_waitq, - llog_group_ctxt_null(olg, idx)); - - return rc; -} -EXPORT_SYMBOL(llog_cleanup); - -int llog_setup(const struct lu_env *env, struct obd_device *obd, - struct obd_llog_group *olg, int index, - struct obd_device *disk_obd, struct llog_operations *op) -{ - struct llog_ctxt *ctxt; - int rc = 0; - - if (index < 0 || index >= LLOG_MAX_CTXTS) - return -EINVAL; - - LASSERT(olg); - - ctxt = llog_new_ctxt(obd); - if (!ctxt) - return -ENOMEM; - - ctxt->loc_obd = obd; - ctxt->loc_olg = olg; - ctxt->loc_idx = index; - ctxt->loc_logops = op; - mutex_init(&ctxt->loc_mutex); - ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); - ctxt->loc_flags = LLOG_CTXT_FLAG_UNINITIALIZED; - ctxt->loc_chunk_size = LLOG_MIN_CHUNK_SIZE; - - rc = llog_group_set_ctxt(olg, ctxt, index); - if (rc) { - llog_ctxt_destroy(ctxt); - if (rc == -EEXIST) { - ctxt = llog_group_get_ctxt(olg, index); - if (ctxt) { - /* - * mds_lov_update_desc() might call here multiple - * times. So if the llog is already set up then - * don't to do it again. - */ - CDEBUG(D_CONFIG, "obd %s ctxt %d already set up\n", - obd->obd_name, index); - LASSERT(ctxt->loc_olg == olg); - LASSERT(ctxt->loc_obd == obd); - LASSERT(ctxt->loc_exp == disk_obd->obd_self_export); - LASSERT(ctxt->loc_logops == op); - llog_ctxt_put(ctxt); - } - rc = 0; - } - return rc; - } - - if (op->lop_setup) { - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LLOG_SETUP)) - rc = -EOPNOTSUPP; - else - rc = op->lop_setup(env, obd, olg, index, disk_obd); - } - - if (rc) { - CERROR("%s: ctxt %d lop_setup=%p failed: rc = %d\n", - obd->obd_name, index, op->lop_setup, rc); - llog_group_clear_ctxt(olg, index); - llog_ctxt_destroy(ctxt); - } else { - CDEBUG(D_CONFIG, "obd %s ctxt %d is initialized\n", - obd->obd_name, index); - ctxt->loc_flags &= ~LLOG_CTXT_FLAG_UNINITIALIZED; - } - - return rc; -} -EXPORT_SYMBOL(llog_setup); - -/* context key constructor/destructor: llog_key_init, llog_key_fini */ -LU_KEY_INIT_FINI(llog, struct llog_thread_info); -/* context key: llog_thread_key */ -LU_CONTEXT_KEY_DEFINE(llog, LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL); -LU_KEY_INIT_GENERIC(llog); - -int llog_info_init(void) -{ - llog_key_init_generic(&llog_thread_key, NULL); - lu_context_key_register(&llog_thread_key); - return 0; -} - -void llog_info_fini(void) -{ - lu_context_key_degister(&llog_thread_key); -} diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c deleted file mode 100644 index b431c3408fe4..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c +++ /dev/null @@ -1,412 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/llog_swab.c - * - * Swabbing of llog datatypes (from disk or over the wire). - * - * Author: jacob berkman - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include -#include - -static void print_llogd_body(struct llogd_body *d) -{ - CDEBUG(D_OTHER, "llogd body: %p\n", d); - CDEBUG(D_OTHER, "\tlgd_logid.lgl_oi: " DOSTID "\n", - POSTID(&d->lgd_logid.lgl_oi)); - CDEBUG(D_OTHER, "\tlgd_logid.lgl_ogen: %#x\n", d->lgd_logid.lgl_ogen); - CDEBUG(D_OTHER, "\tlgd_ctxt_idx: %#x\n", d->lgd_ctxt_idx); - CDEBUG(D_OTHER, "\tlgd_llh_flags: %#x\n", d->lgd_llh_flags); - CDEBUG(D_OTHER, "\tlgd_index: %#x\n", d->lgd_index); - CDEBUG(D_OTHER, "\tlgd_saved_index: %#x\n", d->lgd_saved_index); - CDEBUG(D_OTHER, "\tlgd_len: %#x\n", d->lgd_len); - CDEBUG(D_OTHER, "\tlgd_cur_offset: %#llx\n", d->lgd_cur_offset); -} - -void lustre_swab_lu_fid(struct lu_fid *fid) -{ - __swab64s(&fid->f_seq); - __swab32s(&fid->f_oid); - __swab32s(&fid->f_ver); -} -EXPORT_SYMBOL(lustre_swab_lu_fid); - -void lustre_swab_ost_id(struct ost_id *oid) -{ - if (fid_seq_is_mdt0(oid->oi.oi_seq)) { - __swab64s(&oid->oi.oi_id); - __swab64s(&oid->oi.oi_seq); - } else { - lustre_swab_lu_fid(&oid->oi_fid); - } -} -EXPORT_SYMBOL(lustre_swab_ost_id); - -static void lustre_swab_llog_id(struct llog_logid *log_id) -{ - __swab64s(&log_id->lgl_oi.oi.oi_id); - __swab64s(&log_id->lgl_oi.oi.oi_seq); - __swab32s(&log_id->lgl_ogen); -} - -void lustre_swab_llogd_body(struct llogd_body *d) -{ - print_llogd_body(d); - lustre_swab_llog_id(&d->lgd_logid); - __swab32s(&d->lgd_ctxt_idx); - __swab32s(&d->lgd_llh_flags); - __swab32s(&d->lgd_index); - __swab32s(&d->lgd_saved_index); - __swab32s(&d->lgd_len); - __swab64s(&d->lgd_cur_offset); - print_llogd_body(d); -} -EXPORT_SYMBOL(lustre_swab_llogd_body); - -void lustre_swab_llogd_conn_body(struct llogd_conn_body *d) -{ - __swab64s(&d->lgdc_gen.mnt_cnt); - __swab64s(&d->lgdc_gen.conn_cnt); - lustre_swab_llog_id(&d->lgdc_logid); - __swab32s(&d->lgdc_ctxt_idx); -} -EXPORT_SYMBOL(lustre_swab_llogd_conn_body); - -static void lustre_swab_ll_fid(struct ll_fid *fid) -{ - __swab64s(&fid->id); - __swab32s(&fid->generation); - __swab32s(&fid->f_type); -} - -void lustre_swab_lu_seq_range(struct lu_seq_range *range) -{ - __swab64s(&range->lsr_start); - __swab64s(&range->lsr_end); - __swab32s(&range->lsr_index); - __swab32s(&range->lsr_flags); -} -EXPORT_SYMBOL(lustre_swab_lu_seq_range); - -void lustre_swab_llog_rec(struct llog_rec_hdr *rec) -{ - struct llog_rec_tail *tail = NULL; - - __swab32s(&rec->lrh_len); - __swab32s(&rec->lrh_index); - __swab32s(&rec->lrh_type); - __swab32s(&rec->lrh_id); - - switch (rec->lrh_type) { - case OST_SZ_REC: - { - struct llog_size_change_rec *lsc = - (struct llog_size_change_rec *)rec; - - lustre_swab_ll_fid(&lsc->lsc_fid); - __swab32s(&lsc->lsc_ioepoch); - tail = &lsc->lsc_tail; - break; - } - case MDS_UNLINK_REC: - { - struct llog_unlink_rec *lur = (struct llog_unlink_rec *)rec; - - __swab64s(&lur->lur_oid); - __swab32s(&lur->lur_oseq); - __swab32s(&lur->lur_count); - tail = &lur->lur_tail; - break; - } - case MDS_UNLINK64_REC: - { - struct llog_unlink64_rec *lur = - (struct llog_unlink64_rec *)rec; - - lustre_swab_lu_fid(&lur->lur_fid); - __swab32s(&lur->lur_count); - tail = &lur->lur_tail; - break; - } - case CHANGELOG_REC: - { - struct llog_changelog_rec *cr = - (struct llog_changelog_rec *)rec; - - __swab16s(&cr->cr.cr_namelen); - __swab16s(&cr->cr.cr_flags); - __swab32s(&cr->cr.cr_type); - __swab64s(&cr->cr.cr_index); - __swab64s(&cr->cr.cr_prev); - __swab64s(&cr->cr.cr_time); - lustre_swab_lu_fid(&cr->cr.cr_tfid); - lustre_swab_lu_fid(&cr->cr.cr_pfid); - if (cr->cr.cr_flags & CLF_RENAME) { - struct changelog_ext_rename *rnm = - changelog_rec_rename(&cr->cr); - - lustre_swab_lu_fid(&rnm->cr_sfid); - lustre_swab_lu_fid(&rnm->cr_spfid); - } - /* - * Because the tail follows a variable-length structure we need - * to compute its location at runtime - */ - tail = (struct llog_rec_tail *)((char *)&cr->cr + - changelog_rec_size(&cr->cr) + - cr->cr.cr_namelen); - break; - } - - case CHANGELOG_USER_REC: - { - struct llog_changelog_user_rec *cur = - (struct llog_changelog_user_rec *)rec; - - __swab32s(&cur->cur_id); - __swab64s(&cur->cur_endrec); - tail = &cur->cur_tail; - break; - } - - case HSM_AGENT_REC: { - struct llog_agent_req_rec *arr = - (struct llog_agent_req_rec *)rec; - - __swab32s(&arr->arr_hai.hai_len); - __swab32s(&arr->arr_hai.hai_action); - lustre_swab_lu_fid(&arr->arr_hai.hai_fid); - lustre_swab_lu_fid(&arr->arr_hai.hai_dfid); - __swab64s(&arr->arr_hai.hai_cookie); - __swab64s(&arr->arr_hai.hai_extent.offset); - __swab64s(&arr->arr_hai.hai_extent.length); - __swab64s(&arr->arr_hai.hai_gid); - /* no swabing for opaque data */ - /* hai_data[0]; */ - break; - } - - case MDS_SETATTR64_REC: - { - struct llog_setattr64_rec *lsr = - (struct llog_setattr64_rec *)rec; - - lustre_swab_ost_id(&lsr->lsr_oi); - __swab32s(&lsr->lsr_uid); - __swab32s(&lsr->lsr_uid_h); - __swab32s(&lsr->lsr_gid); - __swab32s(&lsr->lsr_gid_h); - __swab64s(&lsr->lsr_valid); - tail = &lsr->lsr_tail; - break; - } - case OBD_CFG_REC: - /* these are swabbed as they are consumed */ - break; - case LLOG_HDR_MAGIC: - { - struct llog_log_hdr *llh = (struct llog_log_hdr *)rec; - - __swab64s(&llh->llh_timestamp); - __swab32s(&llh->llh_count); - __swab32s(&llh->llh_bitmap_offset); - __swab32s(&llh->llh_flags); - __swab32s(&llh->llh_size); - __swab32s(&llh->llh_cat_idx); - tail = LLOG_HDR_TAIL(llh); - break; - } - case LLOG_LOGID_MAGIC: - { - struct llog_logid_rec *lid = (struct llog_logid_rec *)rec; - - lustre_swab_llog_id(&lid->lid_id); - tail = &lid->lid_tail; - break; - } - case LLOG_GEN_REC: - { - struct llog_gen_rec *lgr = (struct llog_gen_rec *)rec; - - __swab64s(&lgr->lgr_gen.mnt_cnt); - __swab64s(&lgr->lgr_gen.conn_cnt); - tail = &lgr->lgr_tail; - break; - } - case LLOG_PAD_MAGIC: - break; - default: - CERROR("Unknown llog rec type %#x swabbing rec %p\n", - rec->lrh_type, rec); - } - - if (tail) { - __swab32s(&tail->lrt_len); - __swab32s(&tail->lrt_index); - } -} -EXPORT_SYMBOL(lustre_swab_llog_rec); - -static void print_llog_hdr(struct llog_log_hdr *h) -{ - CDEBUG(D_OTHER, "llog header: %p\n", h); - CDEBUG(D_OTHER, "\tllh_hdr.lrh_index: %#x\n", h->llh_hdr.lrh_index); - CDEBUG(D_OTHER, "\tllh_hdr.lrh_len: %#x\n", h->llh_hdr.lrh_len); - CDEBUG(D_OTHER, "\tllh_hdr.lrh_type: %#x\n", h->llh_hdr.lrh_type); - CDEBUG(D_OTHER, "\tllh_timestamp: %#llx\n", h->llh_timestamp); - CDEBUG(D_OTHER, "\tllh_count: %#x\n", h->llh_count); - CDEBUG(D_OTHER, "\tllh_bitmap_offset: %#x\n", h->llh_bitmap_offset); - CDEBUG(D_OTHER, "\tllh_flags: %#x\n", h->llh_flags); - CDEBUG(D_OTHER, "\tllh_size: %#x\n", h->llh_size); - CDEBUG(D_OTHER, "\tllh_cat_idx: %#x\n", h->llh_cat_idx); - CDEBUG(D_OTHER, "\tllh_tail.lrt_index: %#x\n", - LLOG_HDR_TAIL(h)->lrt_index); - CDEBUG(D_OTHER, "\tllh_tail.lrt_len: %#x\n", - LLOG_HDR_TAIL(h)->lrt_len); -} - -void lustre_swab_llog_hdr(struct llog_log_hdr *h) -{ - print_llog_hdr(h); - - lustre_swab_llog_rec(&h->llh_hdr); - - print_llog_hdr(h); -} -EXPORT_SYMBOL(lustre_swab_llog_hdr); - -static void print_lustre_cfg(struct lustre_cfg *lcfg) -{ - int i; - - if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */ - return; - CDEBUG(D_OTHER, "lustre_cfg: %p\n", lcfg); - CDEBUG(D_OTHER, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version); - - CDEBUG(D_OTHER, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command); - CDEBUG(D_OTHER, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num); - CDEBUG(D_OTHER, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags); - CDEBUG(D_OTHER, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid)); - - CDEBUG(D_OTHER, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount); - if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT) - for (i = 0; i < lcfg->lcfg_bufcount; i++) - CDEBUG(D_OTHER, "\tlcfg->lcfg_buflens[%d]: %d\n", - i, lcfg->lcfg_buflens[i]); -} - -void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg) -{ - int i; - - __swab32s(&lcfg->lcfg_version); - - if (lcfg->lcfg_version != LUSTRE_CFG_VERSION) { - CERROR("not swabbing lustre_cfg version %#x (expecting %#x)\n", - lcfg->lcfg_version, LUSTRE_CFG_VERSION); - return; - } - - __swab32s(&lcfg->lcfg_command); - __swab32s(&lcfg->lcfg_num); - __swab32s(&lcfg->lcfg_flags); - __swab64s(&lcfg->lcfg_nid); - __swab32s(&lcfg->lcfg_bufcount); - for (i = 0; i < lcfg->lcfg_bufcount && i < LUSTRE_CFG_MAX_BUFCOUNT; i++) - __swab32s(&lcfg->lcfg_buflens[i]); - - print_lustre_cfg(lcfg); -} - -/* used only for compatibility with old on-disk cfg_marker data */ -struct cfg_marker32 { - __u32 cm_step; - __u32 cm_flags; - __u32 cm_vers; - __u32 padding; - __u32 cm_createtime; - __u32 cm_canceltime; - char cm_tgtname[MTI_NAME_MAXLEN]; - char cm_comment[MTI_NAME_MAXLEN]; -}; - -#define MTI_NAMELEN32 (MTI_NAME_MAXLEN - \ - (sizeof(struct cfg_marker) - sizeof(struct cfg_marker32))) - -void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size) -{ - struct cfg_marker32 *cm32 = (struct cfg_marker32 *)marker; - - if (swab) { - __swab32s(&marker->cm_step); - __swab32s(&marker->cm_flags); - __swab32s(&marker->cm_vers); - } - if (size == sizeof(*cm32)) { - __u32 createtime, canceltime; - /* There was a problem with the original declaration of - * cfg_marker on 32-bit systems because it used time_t as - * a wire protocol structure, and didn't verify this in - * wirecheck. We now have to convert the offsets of the - * later fields in order to work on 32- and 64-bit systems. - * - * Fortunately, the cm_comment field has no functional use - * so can be sacrificed when converting the timestamp size. - * - * Overwrite fields from the end first, so they are not - * clobbered, and use memmove() instead of memcpy() because - * the source and target buffers overlap. bug 16771 - */ - createtime = cm32->cm_createtime; - canceltime = cm32->cm_canceltime; - memmove(marker->cm_comment, cm32->cm_comment, MTI_NAMELEN32); - marker->cm_comment[MTI_NAMELEN32 - 1] = '\0'; - memmove(marker->cm_tgtname, cm32->cm_tgtname, - sizeof(marker->cm_tgtname)); - if (swab) { - __swab32s(&createtime); - __swab32s(&canceltime); - } - marker->cm_createtime = createtime; - marker->cm_canceltime = canceltime; - CDEBUG(D_CONFIG, "Find old cfg_marker(Srv32b,Clt64b) for target %s, converting\n", - marker->cm_tgtname); - } else if (swab) { - __swab64s(&marker->cm_createtime); - __swab64s(&marker->cm_canceltime); - } -} diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c deleted file mode 100644 index 85f09aff6e83..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_counters.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2013, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/lprocfs_counters.c - * - * Lustre lprocfs counter routines - * - * Author: Andreas Dilger - */ - -#include -#include -#include -#include - -void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, long amount) -{ - struct lprocfs_counter *percpu_cntr; - struct lprocfs_counter_header *header; - int smp_id; - unsigned long flags = 0; - - if (!stats) - return; - - LASSERTF(0 <= idx && idx < stats->ls_num, - "idx %d, ls_num %hu\n", idx, stats->ls_num); - - /* With per-client stats, statistics are allocated only for - * single CPU area, so the smp_id should be 0 always. - */ - smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags); - if (smp_id < 0) - return; - - header = &stats->ls_cnt_header[idx]; - percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx); - percpu_cntr->lc_count++; - - if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) { - /* - * lprocfs_counter_add() can be called in interrupt context, - * as memory allocation could trigger memory shrinker call - * ldlm_pool_shrink(), which calls lprocfs_counter_add(). - * LU-1727. - * - */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq += amount; - else - percpu_cntr->lc_sum += amount; - - if (header->lc_config & LPROCFS_CNTR_STDDEV) - percpu_cntr->lc_sumsquare += (__s64)amount * amount; - if (amount < percpu_cntr->lc_min) - percpu_cntr->lc_min = amount; - if (amount > percpu_cntr->lc_max) - percpu_cntr->lc_max = amount; - } - lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags); -} -EXPORT_SYMBOL(lprocfs_counter_add); - -void lprocfs_counter_sub(struct lprocfs_stats *stats, int idx, long amount) -{ - struct lprocfs_counter *percpu_cntr; - struct lprocfs_counter_header *header; - int smp_id; - unsigned long flags = 0; - - if (!stats) - return; - - LASSERTF(0 <= idx && idx < stats->ls_num, - "idx %d, ls_num %hu\n", idx, stats->ls_num); - - /* With per-client stats, statistics are allocated only for - * single CPU area, so the smp_id should be 0 always. - */ - smp_id = lprocfs_stats_lock(stats, LPROCFS_GET_SMP_ID, &flags); - if (smp_id < 0) - return; - - header = &stats->ls_cnt_header[idx]; - percpu_cntr = lprocfs_stats_counter_get(stats, smp_id, idx); - if (header->lc_config & LPROCFS_CNTR_AVGMINMAX) { - /* - * Sometimes we use RCU callbacks to free memory which calls - * lprocfs_counter_sub(), and RCU callbacks may execute in - * softirq context - right now that's the only case we're in - * softirq context here, use separate counter for that. - * bz20650. - * - */ - if (in_interrupt() && - (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq -= amount; - else - percpu_cntr->lc_sum -= amount; - } - lprocfs_stats_unlock(stats, LPROCFS_GET_SMP_ID, &flags); -} -EXPORT_SYMBOL(lprocfs_counter_sub); diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c deleted file mode 100644 index bdbe6f52031a..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ /dev/null @@ -1,1698 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/lprocfs_status.c - * - * Author: Hariharan Thantry - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include - -static const char * const obd_connect_names[] = { - "read_only", - "lov_index", - "connect_from_mds", - "write_grant", - "server_lock", - "version", - "request_portal", - "acl", - "xattr", - "create_on_write", - "truncate_lock", - "initial_transno", - "inode_bit_locks", - "join_file(obsolete)", - "getattr_by_fid", - "no_oh_for_devices", - "remote_client", - "remote_client_by_force", - "max_byte_per_rpc", - "64bit_qdata", - "mds_capability", - "oss_capability", - "early_lock_cancel", - "som", - "adaptive_timeouts", - "lru_resize", - "mds_mds_connection", - "real_conn", - "change_qunit_size", - "alt_checksum_algorithm", - "fid_is_enabled", - "version_recovery", - "pools", - "grant_shrink", - "skip_orphan", - "large_ea", - "full20", - "layout_lock", - "64bithash", - "object_max_bytes", - "imp_recov", - "jobstats", - "umask", - "einprogress", - "grant_param", - "flock_owner", - "lvb_type", - "nanoseconds_times", - "lightweight_conn", - "short_io", - "pingless", - "flock_deadlock", - "disp_stripe", - "open_by_fid", - "lfsck", - "unknown", - "unlink_close", - "multi_mod_rpcs", - "dir_stripe", - "subtree", - "lock_ahead", - "bulk_mbits", - "compact_obdo", - "second_flags", - NULL -}; - -int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep) -{ - __u64 mask = 1; - int i, ret = 0; - - for (i = 0; obd_connect_names[i]; i++, mask <<= 1) { - if (flags & mask) - ret += snprintf(page + ret, count - ret, "%s%s", - ret ? sep : "", obd_connect_names[i]); - } - if (flags & ~(mask - 1)) - ret += snprintf(page + ret, count - ret, - "%sunknown flags %#llx", - ret ? sep : "", flags & ~(mask - 1)); - return ret; -} -EXPORT_SYMBOL(obd_connect_flags2str); - -static void obd_connect_data_seqprint(struct seq_file *m, - struct obd_connect_data *ocd) -{ - u64 flags; - - LASSERT(ocd); - flags = ocd->ocd_connect_flags; - - seq_printf(m, " connect_data:\n" - " flags: %llx\n" - " instance: %u\n", - ocd->ocd_connect_flags, - ocd->ocd_instance); - if (flags & OBD_CONNECT_VERSION) - seq_printf(m, " target_version: %u.%u.%u.%u\n", - OBD_OCD_VERSION_MAJOR(ocd->ocd_version), - OBD_OCD_VERSION_MINOR(ocd->ocd_version), - OBD_OCD_VERSION_PATCH(ocd->ocd_version), - OBD_OCD_VERSION_FIX(ocd->ocd_version)); - if (flags & OBD_CONNECT_MDS) - seq_printf(m, " mdt_index: %d\n", ocd->ocd_group); - if (flags & OBD_CONNECT_GRANT) - seq_printf(m, " initial_grant: %d\n", ocd->ocd_grant); - if (flags & OBD_CONNECT_INDEX) - seq_printf(m, " target_index: %u\n", ocd->ocd_index); - if (flags & OBD_CONNECT_BRW_SIZE) - seq_printf(m, " max_brw_size: %d\n", ocd->ocd_brw_size); - if (flags & OBD_CONNECT_IBITS) - seq_printf(m, " ibits_known: %llx\n", - ocd->ocd_ibits_known); - if (flags & OBD_CONNECT_GRANT_PARAM) - seq_printf(m, " grant_block_size: %d\n" - " grant_inode_size: %d\n" - " grant_extent_overhead: %d\n", - ocd->ocd_blocksize, - ocd->ocd_inodespace, - ocd->ocd_grant_extent); - if (flags & OBD_CONNECT_TRANSNO) - seq_printf(m, " first_transno: %llx\n", - ocd->ocd_transno); - if (flags & OBD_CONNECT_CKSUM) - seq_printf(m, " cksum_types: %#x\n", - ocd->ocd_cksum_types); - if (flags & OBD_CONNECT_MAX_EASIZE) - seq_printf(m, " max_easize: %d\n", ocd->ocd_max_easize); - if (flags & OBD_CONNECT_MAXBYTES) - seq_printf(m, " max_object_bytes: %llx\n", - ocd->ocd_maxbytes); - if (flags & OBD_CONNECT_MULTIMODRPCS) - seq_printf(m, " max_mod_rpcs: %hu\n", - ocd->ocd_maxmodrpcs); -} - -int lprocfs_read_frac_helper(char *buffer, unsigned long count, long val, - int mult) -{ - long decimal_val, frac_val; - int prtn; - - if (count < 10) - return -EINVAL; - - decimal_val = val / mult; - prtn = snprintf(buffer, count, "%ld", decimal_val); - frac_val = val % mult; - - if (prtn < (count - 4) && frac_val > 0) { - long temp_frac; - int i, temp_mult = 1, frac_bits = 0; - - temp_frac = frac_val * 10; - buffer[prtn++] = '.'; - while (frac_bits < 2 && (temp_frac / mult) < 1) { - /* only reserved 2 bits fraction */ - buffer[prtn++] = '0'; - temp_frac *= 10; - frac_bits++; - } - /* - * Need to think these cases : - * 1. #echo x.00 > /sys/xxx output result : x - * 2. #echo x.0x > /sys/xxx output result : x.0x - * 3. #echo x.x0 > /sys/xxx output result : x.x - * 4. #echo x.xx > /sys/xxx output result : x.xx - * Only reserved 2 bits fraction. - */ - for (i = 0; i < (5 - prtn); i++) - temp_mult *= 10; - - frac_bits = min((int)count - prtn, 3 - frac_bits); - prtn += snprintf(buffer + prtn, frac_bits, "%ld", - frac_val * temp_mult / mult); - - prtn--; - while (buffer[prtn] < '1' || buffer[prtn] > '9') { - prtn--; - if (buffer[prtn] == '.') { - prtn--; - break; - } - } - prtn++; - } - buffer[prtn++] = '\n'; - return prtn; -} -EXPORT_SYMBOL(lprocfs_read_frac_helper); - -int lprocfs_write_frac_helper(const char __user *buffer, unsigned long count, - int *val, int mult) -{ - char kernbuf[20], *end, *pbuf; - - if (count > (sizeof(kernbuf) - 1)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - - kernbuf[count] = '\0'; - pbuf = kernbuf; - if (*pbuf == '-') { - mult = -mult; - pbuf++; - } - - *val = (int)simple_strtoul(pbuf, &end, 10) * mult; - if (pbuf == end) - return -EINVAL; - - if (end && *end == '.') { - int temp_val, pow = 1; - int i; - - pbuf = end + 1; - if (strlen(pbuf) > 5) - pbuf[5] = '\0'; /*only allow 5bits fractional*/ - - temp_val = (int)simple_strtoul(pbuf, &end, 10) * mult; - - if (pbuf < end) { - for (i = 0; i < (end - pbuf); i++) - pow *= 10; - - *val += temp_val / pow; - } - } - return 0; -} -EXPORT_SYMBOL(lprocfs_write_frac_helper); - -static int lprocfs_no_percpu_stats; -module_param(lprocfs_no_percpu_stats, int, 0644); -MODULE_PARM_DESC(lprocfs_no_percpu_stats, "Do not alloc percpu data for lprocfs stats"); - -#define MAX_STRING_SIZE 128 - -int lprocfs_single_release(struct inode *inode, struct file *file) -{ - return single_release(inode, file); -} -EXPORT_SYMBOL(lprocfs_single_release); - -int lprocfs_seq_release(struct inode *inode, struct file *file) -{ - return seq_release(inode, file); -} -EXPORT_SYMBOL(lprocfs_seq_release); - -/* lprocfs API calls */ - -static const struct file_operations lprocfs_generic_fops = { }; - -void ldebugfs_add_vars(struct dentry *parent, struct lprocfs_vars *list, - void *data) -{ - if (IS_ERR_OR_NULL(parent) || IS_ERR_OR_NULL(list)) - return; - - while (list->name) { - umode_t mode = 0; - - if (list->proc_mode != 0000) { - mode = list->proc_mode; - } else if (list->fops) { - if (list->fops->read) - mode = 0444; - if (list->fops->write) - mode |= 0200; - } - debugfs_create_file(list->name, mode, parent, - list->data ?: data, - list->fops ?: &lprocfs_generic_fops); - list++; - } - return; -} -EXPORT_SYMBOL_GPL(ldebugfs_add_vars); - -/* Generic callbacks */ -static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%s\n", obd->obd_uuid.uuid); -} -LUSTRE_RO_ATTR(uuid); - -static ssize_t blocksize_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%u\n", osfs.os_bsize); - - return rc; -} -LUSTRE_RO_ATTR(blocksize); - -static ssize_t kbytestotal_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_blocks; - - while (blk_size >>= 1) - result <<= 1; - - return sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytestotal); - -static ssize_t kbytesfree_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_bfree; - - while (blk_size >>= 1) - result <<= 1; - - return sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytesfree); - -static ssize_t kbytesavail_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) { - __u32 blk_size = osfs.os_bsize >> 10; - __u64 result = osfs.os_bavail; - - while (blk_size >>= 1) - result <<= 1; - - return sprintf(buf, "%llu\n", result); - } - - return rc; -} -LUSTRE_RO_ATTR(kbytesavail); - -static ssize_t filestotal_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%llu\n", osfs.os_files); - - return rc; -} -LUSTRE_RO_ATTR(filestotal); - -static ssize_t filesfree_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct obd_statfs osfs; - int rc = obd_statfs(NULL, obd->obd_self_export, &osfs, - get_jiffies_64() - OBD_STATFS_CACHE_SECONDS * HZ, - OBD_STATFS_NODELAY); - if (!rc) - return sprintf(buf, "%llu\n", osfs.os_ffree); - - return rc; -} -LUSTRE_RO_ATTR(filesfree); - -int lprocfs_rd_server_uuid(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - struct obd_import *imp; - char *imp_state_name = NULL; - int rc; - - LASSERT(obd); - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - imp = obd->u.cli.cl_import; - imp_state_name = ptlrpc_import_state_name(imp->imp_state); - seq_printf(m, "%s\t%s%s\n", - obd2cli_tgt(obd), imp_state_name, - imp->imp_deactive ? "\tDEACTIVATED" : ""); - - up_read(&obd->u.cli.cl_sem); - - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_server_uuid); - -int lprocfs_rd_conn_uuid(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - struct ptlrpc_connection *conn; - int rc; - - LASSERT(obd); - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - conn = obd->u.cli.cl_import->imp_connection; - if (conn && obd->u.cli.cl_import) - seq_printf(m, "%s\n", conn->c_remote_uuid.uuid); - else - seq_puts(m, "\n"); - - up_read(&obd->u.cli.cl_sem); - - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_conn_uuid); - -/** - * Lock statistics structure for access, possibly only on this CPU. - * - * The statistics struct may be allocated with per-CPU structures for - * efficient concurrent update (usually only on server-wide stats), or - * as a single global struct (e.g. for per-client or per-job statistics), - * so the required locking depends on the type of structure allocated. - * - * For per-CPU statistics, pin the thread to the current cpuid so that - * will only access the statistics for that CPU. If the stats structure - * for the current CPU has not been allocated (or previously freed), - * allocate it now. The per-CPU statistics do not need locking since - * the thread is pinned to the CPU during update. - * - * For global statistics, lock the stats structure to prevent concurrent update. - * - * \param[in] stats statistics structure to lock - * \param[in] opc type of operation: - * LPROCFS_GET_SMP_ID: "lock" and return current CPU index - * for incrementing statistics for that CPU - * LPROCFS_GET_NUM_CPU: "lock" and return number of used - * CPU indices to iterate over all indices - * \param[out] flags CPU interrupt saved state for IRQ-safe locking - * - * \retval cpuid of current thread or number of allocated structs - * \retval negative on error (only for opc LPROCFS_GET_SMP_ID + per-CPU stats) - */ -int lprocfs_stats_lock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags) -{ - if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, *flags); - else - spin_lock(&stats->ls_lock); - return opc == LPROCFS_GET_NUM_CPU ? 1 : 0; - } - - switch (opc) { - case LPROCFS_GET_SMP_ID: { - unsigned int cpuid = get_cpu(); - - if (unlikely(!stats->ls_percpu[cpuid])) { - int rc = lprocfs_stats_alloc_one(stats, cpuid); - - if (rc < 0) { - put_cpu(); - return rc; - } - } - return cpuid; - } - case LPROCFS_GET_NUM_CPU: - return stats->ls_biggest_alloc_num; - default: - LBUG(); - } -} - -/** - * Unlock statistics structure after access. - * - * Unlock the lock acquired via lprocfs_stats_lock() for global statistics, - * or unpin this thread from the current cpuid for per-CPU statistics. - * - * This function must be called using the same arguments as used when calling - * lprocfs_stats_lock() so that the correct operation can be performed. - * - * \param[in] stats statistics structure to unlock - * \param[in] opc type of operation (current cpuid or number of structs) - * \param[in] flags CPU interrupt saved state for IRQ-safe locking - */ -void lprocfs_stats_unlock(struct lprocfs_stats *stats, - enum lprocfs_stats_lock_ops opc, - unsigned long *flags) -{ - if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_unlock_irqrestore(&stats->ls_lock, *flags); - else - spin_unlock(&stats->ls_lock); - } else if (opc == LPROCFS_GET_SMP_ID) { - put_cpu(); - } -} - -/** add up per-cpu counters */ -void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, - struct lprocfs_counter *cnt) -{ - unsigned int num_entry; - struct lprocfs_counter *percpu_cntr; - int i; - unsigned long flags = 0; - - memset(cnt, 0, sizeof(*cnt)); - - if (!stats) { - /* set count to 1 to avoid divide-by-zero errs in callers */ - cnt->lc_count = 1; - return; - } - - cnt->lc_min = LC_MIN_INIT; - - num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); - - for (i = 0; i < num_entry; i++) { - if (!stats->ls_percpu[i]) - continue; - percpu_cntr = lprocfs_stats_counter_get(stats, i, idx); - - cnt->lc_count += percpu_cntr->lc_count; - cnt->lc_sum += percpu_cntr->lc_sum; - if (percpu_cntr->lc_min < cnt->lc_min) - cnt->lc_min = percpu_cntr->lc_min; - if (percpu_cntr->lc_max > cnt->lc_max) - cnt->lc_max = percpu_cntr->lc_max; - cnt->lc_sumsquare += percpu_cntr->lc_sumsquare; - } - - lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); -} - -/** - * Append a space separated list of current set flags to str. - */ -#define flag2str(flag, first) \ - do { \ - if (imp->imp_##flag) \ - seq_printf(m, "%s" #flag, first ? "" : ", "); \ - } while (0) -static int obd_import_flags2str(struct obd_import *imp, struct seq_file *m) -{ - bool first = true; - - if (imp->imp_obd->obd_no_recov) { - seq_puts(m, "no_recov"); - first = false; - } - - flag2str(invalid, first); - first = false; - flag2str(deactive, first); - flag2str(replayable, first); - flag2str(pingable, first); - return 0; -} - -#undef flags2str - -static void obd_connect_seq_flags2str(struct seq_file *m, __u64 flags, char *sep) -{ - __u64 mask = 1; - int i; - bool first = true; - - for (i = 0; obd_connect_names[i]; i++, mask <<= 1) { - if (flags & mask) { - seq_printf(m, "%s%s", - first ? sep : "", obd_connect_names[i]); - first = false; - } - } - if (flags & ~(mask - 1)) - seq_printf(m, "%sunknown flags %#llx", - first ? sep : "", flags & ~(mask - 1)); -} - -int lprocfs_rd_import(struct seq_file *m, void *data) -{ - char nidstr[LNET_NIDSTR_SIZE]; - struct lprocfs_counter ret; - struct lprocfs_counter_header *header; - struct obd_device *obd = data; - struct obd_import *imp; - struct obd_import_conn *conn; - struct obd_connect_data *ocd; - int j; - int k; - int rw = 0; - int rc; - - LASSERT(obd); - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - imp = obd->u.cli.cl_import; - ocd = &imp->imp_connect_data; - - seq_printf(m, "import:\n" - " name: %s\n" - " target: %s\n" - " state: %s\n" - " instance: %u\n" - " connect_flags: [ ", - obd->obd_name, - obd2cli_tgt(obd), - ptlrpc_import_state_name(imp->imp_state), - imp->imp_connect_data.ocd_instance); - obd_connect_seq_flags2str(m, imp->imp_connect_data.ocd_connect_flags, - ", "); - seq_puts(m, " ]\n"); - obd_connect_data_seqprint(m, ocd); - seq_puts(m, " import_flags: [ "); - obd_import_flags2str(imp, m); - - seq_puts(m, - " ]\n" - " connection:\n" - " failover_nids: [ "); - spin_lock(&imp->imp_lock); - j = 0; - list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { - libcfs_nid2str_r(conn->oic_conn->c_peer.nid, - nidstr, sizeof(nidstr)); - seq_printf(m, "%s%s", j ? ", " : "", nidstr); - j++; - } - if (imp->imp_connection) - libcfs_nid2str_r(imp->imp_connection->c_peer.nid, - nidstr, sizeof(nidstr)); - else - strncpy(nidstr, "", sizeof(nidstr)); - seq_printf(m, - " ]\n" - " current_connection: %s\n" - " connection_attempts: %u\n" - " generation: %u\n" - " in-progress_invalidations: %u\n", - nidstr, - imp->imp_conn_cnt, - imp->imp_generation, - atomic_read(&imp->imp_inval_count)); - spin_unlock(&imp->imp_lock); - - if (!obd->obd_svc_stats) - goto out_climp; - - header = &obd->obd_svc_stats->ls_cnt_header[PTLRPC_REQWAIT_CNTR]; - lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret); - if (ret.lc_count != 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - - do_div(sum, ret.lc_count); - ret.lc_sum = sum; - } else { - ret.lc_sum = 0; - } - seq_printf(m, - " rpcs:\n" - " inflight: %u\n" - " unregistering: %u\n" - " timeouts: %u\n" - " avg_waittime: %llu %s\n", - atomic_read(&imp->imp_inflight), - atomic_read(&imp->imp_unregistering), - atomic_read(&imp->imp_timeouts), - ret.lc_sum, header->lc_units); - - k = 0; - for (j = 0; j < IMP_AT_MAX_PORTALS; j++) { - if (imp->imp_at.iat_portal[j] == 0) - break; - k = max_t(unsigned int, k, - at_get(&imp->imp_at.iat_service_estimate[j])); - } - seq_printf(m, - " service_estimates:\n" - " services: %u sec\n" - " network: %u sec\n", - k, - at_get(&imp->imp_at.iat_net_latency)); - - seq_printf(m, - " transactions:\n" - " last_replay: %llu\n" - " peer_committed: %llu\n" - " last_checked: %llu\n", - imp->imp_last_replay_transno, - imp->imp_peer_committed_transno, - imp->imp_last_transno_checked); - - /* avg data rates */ - for (rw = 0; rw <= 1; rw++) { - lprocfs_stats_collect(obd->obd_svc_stats, - PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw, - &ret); - if (ret.lc_sum > 0 && ret.lc_count > 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - - do_div(sum, ret.lc_count); - ret.lc_sum = sum; - seq_printf(m, - " %s_data_averages:\n" - " bytes_per_rpc: %llu\n", - rw ? "write" : "read", - ret.lc_sum); - } - k = (int)ret.lc_sum; - j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES; - header = &obd->obd_svc_stats->ls_cnt_header[j]; - lprocfs_stats_collect(obd->obd_svc_stats, j, &ret); - if (ret.lc_sum > 0 && ret.lc_count != 0) { - /* first argument to do_div MUST be __u64 */ - __u64 sum = ret.lc_sum; - - do_div(sum, ret.lc_count); - ret.lc_sum = sum; - seq_printf(m, - " %s_per_rpc: %llu\n", - header->lc_units, ret.lc_sum); - j = (int)ret.lc_sum; - if (j > 0) - seq_printf(m, - " MB_per_sec: %u.%.02u\n", - k / j, (100 * k / j) % 100); - } - } - -out_climp: - up_read(&obd->u.cli.cl_sem); - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_import); - -int lprocfs_rd_state(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - struct obd_import *imp; - int j, k, rc; - - LASSERT(obd); - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - imp = obd->u.cli.cl_import; - - seq_printf(m, "current_state: %s\n", - ptlrpc_import_state_name(imp->imp_state)); - seq_puts(m, "state_history:\n"); - k = imp->imp_state_hist_idx; - for (j = 0; j < IMP_STATE_HIST_LEN; j++) { - struct import_state_hist *ish = - &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN]; - if (ish->ish_state == 0) - continue; - seq_printf(m, " - [ %lld, %s ]\n", (s64)ish->ish_time, - ptlrpc_import_state_name(ish->ish_state)); - } - - up_read(&obd->u.cli.cl_sem); - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_state); - -int lprocfs_at_hist_helper(struct seq_file *m, struct adaptive_timeout *at) -{ - int i; - - for (i = 0; i < AT_BINS; i++) - seq_printf(m, "%3u ", at->at_hist[i]); - seq_puts(m, "\n"); - return 0; -} -EXPORT_SYMBOL(lprocfs_at_hist_helper); - -/* See also ptlrpc_lprocfs_rd_timeouts */ -int lprocfs_rd_timeouts(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - struct obd_import *imp; - unsigned int cur, worst; - time64_t now, worstt; - struct dhms ts; - int i, rc; - - LASSERT(obd); - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - imp = obd->u.cli.cl_import; - - now = ktime_get_real_seconds(); - - /* Some network health info for kicks */ - s2dhms(&ts, now - imp->imp_last_reply_time); - seq_printf(m, "%-10s : %lld, " DHMS_FMT " ago\n", - "last reply", (s64)imp->imp_last_reply_time, DHMS_VARS(&ts)); - - cur = at_get(&imp->imp_at.iat_net_latency); - worst = imp->imp_at.iat_net_latency.at_worst_ever; - worstt = imp->imp_at.iat_net_latency.at_worst_time; - s2dhms(&ts, now - worstt); - seq_printf(m, "%-10s : cur %3u worst %3u (at %lld, " DHMS_FMT " ago) ", - "network", cur, worst, (s64)worstt, DHMS_VARS(&ts)); - lprocfs_at_hist_helper(m, &imp->imp_at.iat_net_latency); - - for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { - if (imp->imp_at.iat_portal[i] == 0) - break; - cur = at_get(&imp->imp_at.iat_service_estimate[i]); - worst = imp->imp_at.iat_service_estimate[i].at_worst_ever; - worstt = imp->imp_at.iat_service_estimate[i].at_worst_time; - s2dhms(&ts, now - worstt); - seq_printf(m, "portal %-2d : cur %3u worst %3u (at %lld, " - DHMS_FMT " ago) ", imp->imp_at.iat_portal[i], - cur, worst, (s64)worstt, DHMS_VARS(&ts)); - lprocfs_at_hist_helper(m, &imp->imp_at.iat_service_estimate[i]); - } - - up_read(&obd->u.cli.cl_sem); - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_timeouts); - -int lprocfs_rd_connect_flags(struct seq_file *m, void *data) -{ - struct obd_device *obd = data; - __u64 flags; - int rc; - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags; - seq_printf(m, "flags=%#llx\n", flags); - obd_connect_seq_flags2str(m, flags, "\n"); - seq_puts(m, "\n"); - up_read(&obd->u.cli.cl_sem); - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_connect_flags); - -static struct attribute *obd_def_attrs[] = { - &lustre_attr_blocksize.attr, - &lustre_attr_kbytestotal.attr, - &lustre_attr_kbytesfree.attr, - &lustre_attr_kbytesavail.attr, - &lustre_attr_filestotal.attr, - &lustre_attr_filesfree.attr, - &lustre_attr_uuid.attr, - NULL, -}; - -static void obd_sysfs_release(struct kobject *kobj) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - complete(&obd->obd_kobj_unregister); -} - -static struct kobj_type obd_ktype = { - .default_attrs = obd_def_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = obd_sysfs_release, -}; - -int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list, - const struct attribute_group *attrs) -{ - int rc = 0; - - init_completion(&obd->obd_kobj_unregister); - rc = kobject_init_and_add(&obd->obd_kobj, &obd_ktype, - obd->obd_type->typ_kobj, - "%s", obd->obd_name); - if (rc) - return rc; - - if (attrs) { - rc = sysfs_create_group(&obd->obd_kobj, attrs); - if (rc) { - kobject_put(&obd->obd_kobj); - return rc; - } - } - - obd->obd_debugfs_entry = debugfs_create_dir(obd->obd_name, - obd->obd_type->typ_debugfs_entry); - ldebugfs_add_vars(obd->obd_debugfs_entry, list, obd); - - return rc; -} -EXPORT_SYMBOL_GPL(lprocfs_obd_setup); - -int lprocfs_obd_cleanup(struct obd_device *obd) -{ - if (!obd) - return -EINVAL; - - debugfs_remove_recursive(obd->obd_debugfs_entry); - - kobject_put(&obd->obd_kobj); - wait_for_completion(&obd->obd_kobj_unregister); - - return 0; -} -EXPORT_SYMBOL_GPL(lprocfs_obd_cleanup); - -int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid) -{ - struct lprocfs_counter *cntr; - unsigned int percpusize; - int rc = -ENOMEM; - unsigned long flags = 0; - int i; - - LASSERT(!stats->ls_percpu[cpuid]); - LASSERT((stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) == 0); - - percpusize = lprocfs_stats_counter_size(stats); - stats->ls_percpu[cpuid] = kzalloc(percpusize, GFP_ATOMIC); - if (stats->ls_percpu[cpuid]) { - rc = 0; - if (unlikely(stats->ls_biggest_alloc_num <= cpuid)) { - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_lock_irqsave(&stats->ls_lock, flags); - else - spin_lock(&stats->ls_lock); - if (stats->ls_biggest_alloc_num <= cpuid) - stats->ls_biggest_alloc_num = cpuid + 1; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - spin_unlock_irqrestore(&stats->ls_lock, flags); - else - spin_unlock(&stats->ls_lock); - } - /* initialize the ls_percpu[cpuid] non-zero counter */ - for (i = 0; i < stats->ls_num; ++i) { - cntr = lprocfs_stats_counter_get(stats, cpuid, i); - cntr->lc_min = LC_MIN_INIT; - } - } - return rc; -} - -struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num, - enum lprocfs_stats_flags flags) -{ - struct lprocfs_stats *stats; - unsigned int num_entry; - unsigned int percpusize = 0; - int i; - - if (num == 0) - return NULL; - - if (lprocfs_no_percpu_stats != 0) - flags |= LPROCFS_STATS_FLAG_NOPERCPU; - - if (flags & LPROCFS_STATS_FLAG_NOPERCPU) - num_entry = 1; - else - num_entry = num_possible_cpus(); - - /* alloc percpu pointers for all possible cpu slots */ - stats = kvzalloc(offsetof(typeof(*stats), ls_percpu[num_entry]), - GFP_KERNEL); - if (!stats) - return NULL; - - stats->ls_num = num; - stats->ls_flags = flags; - spin_lock_init(&stats->ls_lock); - - /* alloc num of counter headers */ - stats->ls_cnt_header = kvmalloc_array(stats->ls_num, - sizeof(struct lprocfs_counter_header), - GFP_KERNEL | __GFP_ZERO); - if (!stats->ls_cnt_header) - goto fail; - - if ((flags & LPROCFS_STATS_FLAG_NOPERCPU) != 0) { - /* contains only one set counters */ - percpusize = lprocfs_stats_counter_size(stats); - stats->ls_percpu[0] = kzalloc(percpusize, GFP_ATOMIC); - if (!stats->ls_percpu[0]) - goto fail; - stats->ls_biggest_alloc_num = 1; - } else if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) { - /* alloc all percpu data */ - for (i = 0; i < num_entry; ++i) - if (lprocfs_stats_alloc_one(stats, i) < 0) - goto fail; - } - - return stats; - -fail: - lprocfs_free_stats(&stats); - return NULL; -} -EXPORT_SYMBOL(lprocfs_alloc_stats); - -void lprocfs_free_stats(struct lprocfs_stats **statsh) -{ - struct lprocfs_stats *stats = *statsh; - unsigned int num_entry; - unsigned int percpusize; - unsigned int i; - - if (!stats || stats->ls_num == 0) - return; - *statsh = NULL; - - if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) - num_entry = 1; - else - num_entry = num_possible_cpus(); - - percpusize = lprocfs_stats_counter_size(stats); - for (i = 0; i < num_entry; i++) - kfree(stats->ls_percpu[i]); - kvfree(stats->ls_cnt_header); - kvfree(stats); -} -EXPORT_SYMBOL(lprocfs_free_stats); - -__u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, - enum lprocfs_fields_flags field) -{ - unsigned int i; - unsigned int num_cpu; - unsigned long flags = 0; - __u64 ret = 0; - - LASSERT(stats); - - num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); - for (i = 0; i < num_cpu; i++) { - if (!stats->ls_percpu[i]) - continue; - ret += lprocfs_read_helper( - lprocfs_stats_counter_get(stats, i, idx), - &stats->ls_cnt_header[idx], stats->ls_flags, - field); - } - lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); - return ret; -} -EXPORT_SYMBOL(lprocfs_stats_collector); - -void lprocfs_clear_stats(struct lprocfs_stats *stats) -{ - struct lprocfs_counter *percpu_cntr; - int i; - int j; - unsigned int num_entry; - unsigned long flags = 0; - - num_entry = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); - - for (i = 0; i < num_entry; i++) { - if (!stats->ls_percpu[i]) - continue; - for (j = 0; j < stats->ls_num; j++) { - percpu_cntr = lprocfs_stats_counter_get(stats, i, j); - percpu_cntr->lc_count = 0; - percpu_cntr->lc_min = LC_MIN_INIT; - percpu_cntr->lc_max = 0; - percpu_cntr->lc_sumsquare = 0; - percpu_cntr->lc_sum = 0; - if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) - percpu_cntr->lc_sum_irq = 0; - } - } - - lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); -} -EXPORT_SYMBOL(lprocfs_clear_stats); - -static ssize_t lprocfs_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct lprocfs_stats *stats = seq->private; - - lprocfs_clear_stats(stats); - - return len; -} - -static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos) -{ - struct lprocfs_stats *stats = p->private; - - return (*pos < stats->ls_num) ? pos : NULL; -} - -static void lprocfs_stats_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - (*pos)++; - return lprocfs_stats_seq_start(p, pos); -} - -/* seq file export of one lprocfs counter */ -static int lprocfs_stats_seq_show(struct seq_file *p, void *v) -{ - struct lprocfs_stats *stats = p->private; - struct lprocfs_counter_header *hdr; - struct lprocfs_counter ctr; - int idx = *(loff_t *)v; - - if (idx == 0) { - struct timespec64 now; - - ktime_get_real_ts64(&now); - seq_printf(p, "%-25s %llu.%9lu secs.usecs\n", - "snapshot_time", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - } - - hdr = &stats->ls_cnt_header[idx]; - lprocfs_stats_collect(stats, idx, &ctr); - - if (ctr.lc_count != 0) { - seq_printf(p, "%-25s %lld samples [%s]", - hdr->lc_name, ctr.lc_count, hdr->lc_units); - - if ((hdr->lc_config & LPROCFS_CNTR_AVGMINMAX) && - (ctr.lc_count > 0)) { - seq_printf(p, " %lld %lld %lld", - ctr.lc_min, ctr.lc_max, ctr.lc_sum); - if (hdr->lc_config & LPROCFS_CNTR_STDDEV) - seq_printf(p, " %lld", ctr.lc_sumsquare); - } - seq_putc(p, '\n'); - } - - return 0; -} - -static const struct seq_operations lprocfs_stats_seq_sops = { - .start = lprocfs_stats_seq_start, - .stop = lprocfs_stats_seq_stop, - .next = lprocfs_stats_seq_next, - .show = lprocfs_stats_seq_show, -}; - -static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int rc; - - rc = seq_open(file, &lprocfs_stats_seq_sops); - if (rc) - return rc; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; -} - -const struct file_operations lprocfs_stats_seq_fops = { - .owner = THIS_MODULE, - .open = lprocfs_stats_seq_open, - .read = seq_read, - .write = lprocfs_stats_seq_write, - .llseek = seq_lseek, - .release = lprocfs_seq_release, -}; -EXPORT_SYMBOL_GPL(lprocfs_stats_seq_fops); - -void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned int conf, const char *name, - const char *units) -{ - struct lprocfs_counter_header *header; - struct lprocfs_counter *percpu_cntr; - unsigned long flags = 0; - unsigned int i; - unsigned int num_cpu; - - header = &stats->ls_cnt_header[index]; - LASSERTF(header, "Failed to allocate stats header:[%d]%s/%s\n", - index, name, units); - - header->lc_config = conf; - header->lc_name = name; - header->lc_units = units; - - num_cpu = lprocfs_stats_lock(stats, LPROCFS_GET_NUM_CPU, &flags); - for (i = 0; i < num_cpu; ++i) { - if (!stats->ls_percpu[i]) - continue; - percpu_cntr = lprocfs_stats_counter_get(stats, i, index); - percpu_cntr->lc_count = 0; - percpu_cntr->lc_min = LC_MIN_INIT; - percpu_cntr->lc_max = 0; - percpu_cntr->lc_sumsquare = 0; - percpu_cntr->lc_sum = 0; - if ((stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - percpu_cntr->lc_sum_irq = 0; - } - lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags); -} -EXPORT_SYMBOL(lprocfs_counter_init); - -int lprocfs_exp_cleanup(struct obd_export *exp) -{ - return 0; -} -EXPORT_SYMBOL(lprocfs_exp_cleanup); - -__s64 lprocfs_read_helper(struct lprocfs_counter *lc, - struct lprocfs_counter_header *header, - enum lprocfs_stats_flags flags, - enum lprocfs_fields_flags field) -{ - __s64 ret = 0; - - if (!lc || !header) - return 0; - - switch (field) { - case LPROCFS_FIELDS_FLAGS_CONFIG: - ret = header->lc_config; - break; - case LPROCFS_FIELDS_FLAGS_SUM: - ret = lc->lc_sum; - if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) - ret += lc->lc_sum_irq; - break; - case LPROCFS_FIELDS_FLAGS_MIN: - ret = lc->lc_min; - break; - case LPROCFS_FIELDS_FLAGS_MAX: - ret = lc->lc_max; - break; - case LPROCFS_FIELDS_FLAGS_AVG: - ret = (lc->lc_max - lc->lc_min) / 2; - break; - case LPROCFS_FIELDS_FLAGS_SUMSQUARE: - ret = lc->lc_sumsquare; - break; - case LPROCFS_FIELDS_FLAGS_COUNT: - ret = lc->lc_count; - break; - default: - break; - } - - return 0; -} -EXPORT_SYMBOL(lprocfs_read_helper); - -int lprocfs_write_helper(const char __user *buffer, unsigned long count, - int *val) -{ - return lprocfs_write_frac_helper(buffer, count, val, 1); -} -EXPORT_SYMBOL(lprocfs_write_helper); - -int lprocfs_write_u64_helper(const char __user *buffer, unsigned long count, - __u64 *val) -{ - return lprocfs_write_frac_u64_helper(buffer, count, val, 1); -} -EXPORT_SYMBOL(lprocfs_write_u64_helper); - -int lprocfs_write_frac_u64_helper(const char __user *buffer, - unsigned long count, __u64 *val, int mult) -{ - char kernbuf[22], *end, *pbuf; - __u64 whole, frac = 0, units; - unsigned int frac_d = 1; - int sign = 1; - - if (count > (sizeof(kernbuf) - 1)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - - kernbuf[count] = '\0'; - pbuf = kernbuf; - if (*pbuf == '-') { - sign = -1; - pbuf++; - } - - whole = simple_strtoull(pbuf, &end, 10); - if (pbuf == end) - return -EINVAL; - - if (*end == '.') { - int i; - - pbuf = end + 1; - - /* need to limit frac_d to a __u32 */ - if (strlen(pbuf) > 10) - pbuf[10] = '\0'; - - frac = simple_strtoull(pbuf, &end, 10); - /* count decimal places */ - for (i = 0; i < (end - pbuf); i++) - frac_d *= 10; - } - - units = 1; - if (end) { - switch (tolower(*end)) { - case 'p': - units <<= 10; - /* fall through */ - case 't': - units <<= 10; - /* fall through */ - case 'g': - units <<= 10; - /* fall through */ - case 'm': - units <<= 10; - /* fall through */ - case 'k': - units <<= 10; - } - } - /* Specified units override the multiplier */ - if (units > 1) - mult = units; - - frac *= mult; - do_div(frac, frac_d); - *val = sign * (whole * mult + frac); - return 0; -} -EXPORT_SYMBOL(lprocfs_write_frac_u64_helper); - -static char *lprocfs_strnstr(const char *s1, const char *s2, size_t len) -{ - size_t l2; - - l2 = strlen(s2); - if (!l2) - return (char *)s1; - while (len >= l2) { - len--; - if (!memcmp(s1, s2, l2)) - return (char *)s1; - s1++; - } - return NULL; -} - -/** - * Find the string \a name in the input \a buffer, and return a pointer to the - * value immediately following \a name, reducing \a count appropriately. - * If \a name is not found the original \a buffer is returned. - */ -char *lprocfs_find_named_value(const char *buffer, const char *name, - size_t *count) -{ - char *val; - size_t buflen = *count; - - /* there is no strnstr() in rhel5 and ubuntu kernels */ - val = lprocfs_strnstr(buffer, name, buflen); - if (!val) - return (char *)buffer; - - val += strlen(name); /* skip prefix */ - while (val < buffer + buflen && isspace(*val)) /* skip separator */ - val++; - - *count = 0; - while (val < buffer + buflen && isalnum(*val)) { - ++*count; - ++val; - } - - return val - *count; -} -EXPORT_SYMBOL(lprocfs_find_named_value); - -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) -{ - if (value >= OBD_HIST_MAX) - value = OBD_HIST_MAX - 1; - - spin_lock(&oh->oh_lock); - oh->oh_buckets[value]++; - spin_unlock(&oh->oh_lock); -} -EXPORT_SYMBOL(lprocfs_oh_tally); - -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) -{ - unsigned int val = 0; - - if (likely(value != 0)) - val = min(fls(value - 1), OBD_HIST_MAX); - - lprocfs_oh_tally(oh, val); -} -EXPORT_SYMBOL(lprocfs_oh_tally_log2); - -unsigned long lprocfs_oh_sum(struct obd_histogram *oh) -{ - unsigned long ret = 0; - int i; - - for (i = 0; i < OBD_HIST_MAX; i++) - ret += oh->oh_buckets[i]; - return ret; -} -EXPORT_SYMBOL(lprocfs_oh_sum); - -void lprocfs_oh_clear(struct obd_histogram *oh) -{ - spin_lock(&oh->oh_lock); - memset(oh->oh_buckets, 0, sizeof(oh->oh_buckets)); - spin_unlock(&oh->oh_lock); -} -EXPORT_SYMBOL(lprocfs_oh_clear); - -int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name) -{ - char kernbuf[64], *tmp, *errmsg; - unsigned long uid, gid; - int rc; - - if (count >= sizeof(kernbuf)) { - errmsg = "string too long"; - rc = -EINVAL; - goto failed_noprint; - } - if (copy_from_user(kernbuf, buffer, count)) { - errmsg = "bad address"; - rc = -EFAULT; - goto failed_noprint; - } - kernbuf[count] = '\0'; - - /* look for uid gid separator */ - tmp = strchr(kernbuf, ':'); - if (!tmp) { - errmsg = "needs uid:gid format"; - rc = -EINVAL; - goto failed; - } - *tmp = '\0'; - tmp++; - - /* parse uid */ - if (kstrtoul(kernbuf, 0, &uid) != 0) { - errmsg = "bad uid"; - rc = -EINVAL; - goto failed; - } - /* parse gid */ - if (kstrtoul(tmp, 0, &gid) != 0) { - errmsg = "bad gid"; - rc = -EINVAL; - goto failed; - } - - squash->rsi_uid = uid; - squash->rsi_gid = gid; - - LCONSOLE_INFO("%s: root_squash is set to %u:%u\n", - name, squash->rsi_uid, squash->rsi_gid); - return count; - -failed: - if (tmp) { - tmp--; - *tmp = ':'; - } - CWARN("%s: failed to set root_squash to \"%s\", %s, rc = %d\n", - name, kernbuf, errmsg, rc); - return rc; -failed_noprint: - CWARN("%s: failed to set root_squash due to %s, rc = %d\n", - name, errmsg, rc); - return rc; -} -EXPORT_SYMBOL(lprocfs_wr_root_squash); - -int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count, - struct root_squash_info *squash, char *name) -{ - char *kernbuf = NULL, *errmsg; - struct list_head tmp; - int len = count; - int rc; - - if (count > 4096) { - errmsg = "string too long"; - rc = -EINVAL; - goto failed; - } - - kernbuf = kzalloc(count + 1, GFP_NOFS); - if (!kernbuf) { - errmsg = "no memory"; - rc = -ENOMEM; - goto failed; - } - - if (copy_from_user(kernbuf, buffer, count)) { - errmsg = "bad address"; - rc = -EFAULT; - goto failed; - } - kernbuf[count] = '\0'; - - if (count > 0 && kernbuf[count - 1] == '\n') - len = count - 1; - - if ((len == 4 && !strncmp(kernbuf, "NONE", len)) || - (len == 5 && !strncmp(kernbuf, "clear", len))) { - /* empty string is special case */ - down_write(&squash->rsi_sem); - if (!list_empty(&squash->rsi_nosquash_nids)) - cfs_free_nidlist(&squash->rsi_nosquash_nids); - up_write(&squash->rsi_sem); - LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name); - kfree(kernbuf); - return count; - } - - INIT_LIST_HEAD(&tmp); - if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) { - errmsg = "can't parse"; - rc = -EINVAL; - goto failed; - } - LCONSOLE_INFO("%s: nosquash_nids set to %s\n", - name, kernbuf); - kfree(kernbuf); - kernbuf = NULL; - - down_write(&squash->rsi_sem); - if (!list_empty(&squash->rsi_nosquash_nids)) - cfs_free_nidlist(&squash->rsi_nosquash_nids); - list_splice(&tmp, &squash->rsi_nosquash_nids); - up_write(&squash->rsi_sem); - - return count; - -failed: - if (kernbuf) { - CWARN("%s: failed to set nosquash_nids to \"%s\", %s rc = %d\n", - name, kernbuf, errmsg, rc); - kfree(kernbuf); - kernbuf = NULL; - } else { - CWARN("%s: failed to set nosquash_nids due to %s rc = %d\n", - name, errmsg, rc); - } - return rc; -} -EXPORT_SYMBOL(lprocfs_wr_nosquash_nids); - -static ssize_t lustre_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct lustre_attr *a = container_of(attr, struct lustre_attr, attr); - - return a->show ? a->show(kobj, attr, buf) : 0; -} - -static ssize_t lustre_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct lustre_attr *a = container_of(attr, struct lustre_attr, attr); - - return a->store ? a->store(kobj, attr, buf, len) : len; -} - -const struct sysfs_ops lustre_sysfs_ops = { - .show = lustre_attr_show, - .store = lustre_attr_store, -}; -EXPORT_SYMBOL_GPL(lustre_sysfs_ops); diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c deleted file mode 100644 index aa9d74e087f4..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lu_object.c +++ /dev/null @@ -1,2056 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/lu_object.c - * - * Lustre Object. - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include - -/* hash_long() */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct lu_site_bkt_data { - /** - * LRU list, updated on each access to object. Protected by - * bucket lock of lu_site::ls_obj_hash. - * - * "Cold" end of LRU is lu_site::ls_lru.next. Accessed object are - * moved to the lu_site::ls_lru.prev (this is due to the non-existence - * of list_for_each_entry_safe_reverse()). - */ - struct list_head lsb_lru; - /** - * Wait-queue signaled when an object in this site is ultimately - * destroyed (lu_object_free()). It is used by lu_object_find() to - * wait before re-trying when object in the process of destruction is - * found in the hash table. - * - * \see htable_lookup(). - */ - wait_queue_head_t lsb_marche_funebre; -}; - -enum { - LU_CACHE_PERCENT_MAX = 50, - LU_CACHE_PERCENT_DEFAULT = 20 -}; - -#define LU_CACHE_NR_MAX_ADJUST 512 -#define LU_CACHE_NR_UNLIMITED -1 -#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED -#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED -#define LU_CACHE_NR_ZFS_LIMIT 256 - -#define LU_SITE_BITS_MIN 12 -#define LU_SITE_BITS_MAX 24 -#define LU_SITE_BITS_MAX_CL 19 -/** - * total 256 buckets, we don't want too many buckets because: - * - consume too much memory - * - avoid unbalanced LRU list - */ -#define LU_SITE_BKT_BITS 8 - -static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT; -module_param(lu_cache_percent, int, 0644); -MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache"); - -static long lu_cache_nr = LU_CACHE_NR_DEFAULT; -module_param(lu_cache_nr, long, 0644); -MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache"); - -static void lu_object_free(const struct lu_env *env, struct lu_object *o); -static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx); - -wait_queue_head_t * -lu_site_wq_from_fid(struct lu_site *site, struct lu_fid *fid) -{ - struct cfs_hash_bd bd; - struct lu_site_bkt_data *bkt; - - cfs_hash_bd_get(site->ls_obj_hash, fid, &bd); - bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd); - return &bkt->lsb_marche_funebre; -} -EXPORT_SYMBOL(lu_site_wq_from_fid); - -/** - * Decrease reference counter on object. If last reference is freed, return - * object to the cache, unless lu_object_is_dying(o) holds. In the latter - * case, free object immediately. - */ -void lu_object_put(const struct lu_env *env, struct lu_object *o) -{ - struct lu_site_bkt_data *bkt; - struct lu_object_header *top; - struct lu_site *site; - struct lu_object *orig; - struct cfs_hash_bd bd; - const struct lu_fid *fid; - - top = o->lo_header; - site = o->lo_dev->ld_site; - orig = o; - - /* - * till we have full fids-on-OST implemented anonymous objects - * are possible in OSP. such an object isn't listed in the site - * so we should not remove it from the site. - */ - fid = lu_object_fid(o); - if (fid_is_zero(fid)) { - LASSERT(!top->loh_hash.next && !top->loh_hash.pprev); - LASSERT(list_empty(&top->loh_lru)); - if (!atomic_dec_and_test(&top->loh_ref)) - return; - list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) { - if (o->lo_ops->loo_object_release) - o->lo_ops->loo_object_release(env, o); - } - lu_object_free(env, orig); - return; - } - - cfs_hash_bd_get(site->ls_obj_hash, &top->loh_fid, &bd); - bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd); - - if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) { - if (lu_object_is_dying(top)) { - /* - * somebody may be waiting for this, currently only - * used for cl_object, see cl_object_put_last(). - */ - wake_up_all(&bkt->lsb_marche_funebre); - } - return; - } - - /* - * When last reference is released, iterate over object - * layers, and notify them that object is no longer busy. - */ - list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) { - if (o->lo_ops->loo_object_release) - o->lo_ops->loo_object_release(env, o); - } - - if (!lu_object_is_dying(top)) { - LASSERT(list_empty(&top->loh_lru)); - list_add_tail(&top->loh_lru, &bkt->lsb_lru); - percpu_counter_inc(&site->ls_lru_len_counter); - CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p\n", - o, site->ls_obj_hash, bkt); - cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1); - return; - } - - /* - * If object is dying (will not be cached), then removed it - * from hash table and LRU. - * - * This is done with hash table and LRU lists locked. As the only - * way to acquire first reference to previously unreferenced - * object is through hash-table lookup (lu_object_find()), - * or LRU scanning (lu_site_purge()), that are done under hash-table - * and LRU lock, no race with concurrent object lookup is possible - * and we can safely destroy object below. - */ - if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) - cfs_hash_bd_del_locked(site->ls_obj_hash, &bd, &top->loh_hash); - cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1); - /* - * Object was already removed from hash and lru above, can - * kill it. - */ - lu_object_free(env, orig); -} -EXPORT_SYMBOL(lu_object_put); - -/** - * Kill the object and take it out of LRU cache. - * Currently used by client code for layout change. - */ -void lu_object_unhash(const struct lu_env *env, struct lu_object *o) -{ - struct lu_object_header *top; - - top = o->lo_header; - set_bit(LU_OBJECT_HEARD_BANSHEE, &top->loh_flags); - if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) { - struct lu_site *site = o->lo_dev->ld_site; - struct cfs_hash *obj_hash = site->ls_obj_hash; - struct cfs_hash_bd bd; - - cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1); - if (!list_empty(&top->loh_lru)) { - struct lu_site_bkt_data *bkt; - - list_del_init(&top->loh_lru); - bkt = cfs_hash_bd_extra_get(obj_hash, &bd); - percpu_counter_dec(&site->ls_lru_len_counter); - } - cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash); - cfs_hash_bd_unlock(obj_hash, &bd, 1); - } -} -EXPORT_SYMBOL(lu_object_unhash); - -/** - * Allocate new object. - * - * This follows object creation protocol, described in the comment within - * struct lu_device_operations definition. - */ -static struct lu_object *lu_object_alloc(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf) -{ - struct lu_object *scan; - struct lu_object *top; - struct list_head *layers; - unsigned int init_mask = 0; - unsigned int init_flag; - int clean; - int result; - - /* - * Create top-level object slice. This will also create - * lu_object_header. - */ - top = dev->ld_ops->ldo_object_alloc(env, NULL, dev); - if (!top) - return ERR_PTR(-ENOMEM); - if (IS_ERR(top)) - return top; - /* - * This is the only place where object fid is assigned. It's constant - * after this point. - */ - top->lo_header->loh_fid = *f; - layers = &top->lo_header->loh_layers; - - do { - /* - * Call ->loo_object_init() repeatedly, until no more new - * object slices are created. - */ - clean = 1; - init_flag = 1; - list_for_each_entry(scan, layers, lo_linkage) { - if (init_mask & init_flag) - goto next; - clean = 0; - scan->lo_header = top->lo_header; - result = scan->lo_ops->loo_object_init(env, scan, conf); - if (result != 0) { - lu_object_free(env, top); - return ERR_PTR(result); - } - init_mask |= init_flag; -next: - init_flag <<= 1; - } - } while (!clean); - - list_for_each_entry_reverse(scan, layers, lo_linkage) { - if (scan->lo_ops->loo_object_start) { - result = scan->lo_ops->loo_object_start(env, scan); - if (result != 0) { - lu_object_free(env, top); - return ERR_PTR(result); - } - } - } - - lprocfs_counter_incr(dev->ld_site->ls_stats, LU_SS_CREATED); - return top; -} - -/** - * Free an object. - */ -static void lu_object_free(const struct lu_env *env, struct lu_object *o) -{ - wait_queue_head_t *wq; - struct lu_site *site; - struct lu_object *scan; - struct list_head *layers; - struct list_head splice; - - site = o->lo_dev->ld_site; - layers = &o->lo_header->loh_layers; - wq = lu_site_wq_from_fid(site, &o->lo_header->loh_fid); - /* - * First call ->loo_object_delete() method to release all resources. - */ - list_for_each_entry_reverse(scan, layers, lo_linkage) { - if (scan->lo_ops->loo_object_delete) - scan->lo_ops->loo_object_delete(env, scan); - } - - /* - * Then, splice object layers into stand-alone list, and call - * ->loo_object_free() on all layers to free memory. Splice is - * necessary, because lu_object_header is freed together with the - * top-level slice. - */ - INIT_LIST_HEAD(&splice); - list_splice_init(layers, &splice); - while (!list_empty(&splice)) { - /* - * Free layers in bottom-to-top order, so that object header - * lives as long as possible and ->loo_object_free() methods - * can look at its contents. - */ - o = container_of(splice.prev, struct lu_object, lo_linkage); - list_del_init(&o->lo_linkage); - o->lo_ops->loo_object_free(env, o); - } - - if (waitqueue_active(wq)) - wake_up_all(wq); -} - -/** - * Free \a nr objects from the cold end of the site LRU list. - * if canblock is false, then don't block awaiting for another - * instance of lu_site_purge() to complete - */ -int lu_site_purge_objects(const struct lu_env *env, struct lu_site *s, - int nr, bool canblock) -{ - struct lu_object_header *h; - struct lu_object_header *temp; - struct lu_site_bkt_data *bkt; - struct cfs_hash_bd bd; - struct cfs_hash_bd bd2; - struct list_head dispose; - int did_sth; - unsigned int start = 0; - int count; - int bnr; - unsigned int i; - - if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU)) - return 0; - - INIT_LIST_HEAD(&dispose); - /* - * Under LRU list lock, scan LRU list and move unreferenced objects to - * the dispose list, removing them from LRU and hash table. - */ - if (nr != ~0) - start = s->ls_purge_start; - bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1; - again: - /* - * It doesn't make any sense to make purge threads parallel, that can - * only bring troubles to us. See LU-5331. - */ - if (canblock) - mutex_lock(&s->ls_purge_mutex); - else if (!mutex_trylock(&s->ls_purge_mutex)) - goto out; - - did_sth = 0; - cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) { - if (i < start) - continue; - count = bnr; - cfs_hash_bd_lock(s->ls_obj_hash, &bd, 1); - bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd); - - list_for_each_entry_safe(h, temp, &bkt->lsb_lru, loh_lru) { - LASSERT(atomic_read(&h->loh_ref) == 0); - - cfs_hash_bd_get(s->ls_obj_hash, &h->loh_fid, &bd2); - LASSERT(bd.bd_bucket == bd2.bd_bucket); - - cfs_hash_bd_del_locked(s->ls_obj_hash, - &bd2, &h->loh_hash); - list_move(&h->loh_lru, &dispose); - percpu_counter_dec(&s->ls_lru_len_counter); - if (did_sth == 0) - did_sth = 1; - - if (nr != ~0 && --nr == 0) - break; - - if (count > 0 && --count == 0) - break; - } - cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1); - cond_resched(); - /* - * Free everything on the dispose list. This is safe against - * races due to the reasons described in lu_object_put(). - */ - while (!list_empty(&dispose)) { - h = container_of(dispose.next, - struct lu_object_header, loh_lru); - list_del_init(&h->loh_lru); - lu_object_free(env, lu_object_top(h)); - lprocfs_counter_incr(s->ls_stats, LU_SS_LRU_PURGED); - } - - if (nr == 0) - break; - } - mutex_unlock(&s->ls_purge_mutex); - - if (nr != 0 && did_sth && start != 0) { - start = 0; /* restart from the first bucket */ - goto again; - } - /* race on s->ls_purge_start, but nobody cares */ - s->ls_purge_start = i % CFS_HASH_NBKT(s->ls_obj_hash); -out: - return nr; -} -EXPORT_SYMBOL(lu_site_purge_objects); - -/* - * Object printing. - * - * Code below has to jump through certain loops to output object description - * into libcfs_debug_msg-based log. The problem is that lu_object_print() - * composes object description from strings that are parts of _lines_ of - * output (i.e., strings that are not terminated by newline). This doesn't fit - * very well into libcfs_debug_msg() interface that assumes that each message - * supplied to it is a self-contained output line. - * - * To work around this, strings are collected in a temporary buffer - * (implemented as a value of lu_cdebug_key key), until terminating newline - * character is detected. - * - */ - -enum { - /** - * Maximal line size. - * - * XXX overflow is not handled correctly. - */ - LU_CDEBUG_LINE = 512 -}; - -struct lu_cdebug_data { - /** - * Temporary buffer. - */ - char lck_area[LU_CDEBUG_LINE]; -}; - -/* context key constructor/destructor: lu_global_key_init, lu_global_key_fini */ -LU_KEY_INIT_FINI(lu_global, struct lu_cdebug_data); - -/** - * Key, holding temporary buffer. This key is registered very early by - * lu_global_init(). - */ -static struct lu_context_key lu_global_key = { - .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD | - LCT_MG_THREAD | LCT_CL_THREAD | LCT_LOCAL, - .lct_init = lu_global_key_init, - .lct_fini = lu_global_key_fini -}; - -/** - * Printer function emitting messages through libcfs_debug_msg(). - */ -int lu_cdebug_printer(const struct lu_env *env, - void *cookie, const char *format, ...) -{ - struct libcfs_debug_msg_data *msgdata = cookie; - struct lu_cdebug_data *key; - int used; - int complete; - va_list args; - - va_start(args, format); - - key = lu_context_key_get(&env->le_ctx, &lu_global_key); - - used = strlen(key->lck_area); - complete = format[strlen(format) - 1] == '\n'; - /* - * Append new chunk to the buffer. - */ - vsnprintf(key->lck_area + used, - ARRAY_SIZE(key->lck_area) - used, format, args); - if (complete) { - if (cfs_cdebug_show(msgdata->msg_mask, msgdata->msg_subsys)) - libcfs_debug_msg(msgdata, "%s\n", key->lck_area); - key->lck_area[0] = 0; - } - va_end(args); - return 0; -} -EXPORT_SYMBOL(lu_cdebug_printer); - -/** - * Print object header. - */ -void lu_object_header_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, - const struct lu_object_header *hdr) -{ - (*printer)(env, cookie, "header@%p[%#lx, %d, " DFID "%s%s%s]", - hdr, hdr->loh_flags, atomic_read(&hdr->loh_ref), - PFID(&hdr->loh_fid), - hlist_unhashed(&hdr->loh_hash) ? "" : " hash", - list_empty((struct list_head *)&hdr->loh_lru) ? \ - "" : " lru", - hdr->loh_attr & LOHA_EXISTS ? " exist":""); -} -EXPORT_SYMBOL(lu_object_header_print); - -/** - * Print human readable representation of the \a o to the \a printer. - */ -void lu_object_print(const struct lu_env *env, void *cookie, - lu_printer_t printer, const struct lu_object *o) -{ - static const char ruler[] = "........................................"; - struct lu_object_header *top; - int depth = 4; - - top = o->lo_header; - lu_object_header_print(env, cookie, printer, top); - (*printer)(env, cookie, "{\n"); - - list_for_each_entry(o, &top->loh_layers, lo_linkage) { - /* - * print `.' \a depth times followed by type name and address - */ - (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler, - o->lo_dev->ld_type->ldt_name, o); - - if (o->lo_ops->loo_object_print) - (*o->lo_ops->loo_object_print)(env, cookie, printer, o); - - (*printer)(env, cookie, "\n"); - } - - (*printer)(env, cookie, "} header@%p\n", top); -} -EXPORT_SYMBOL(lu_object_print); - -/* - * NOTE: htable_lookup() is called with the relevant - * hash bucket locked, but might drop and re-acquire the lock. - */ -static struct lu_object *htable_lookup(struct lu_site *s, - struct cfs_hash_bd *bd, - const struct lu_fid *f, - __u64 *version) -{ - struct lu_site_bkt_data *bkt; - struct lu_object_header *h; - struct hlist_node *hnode; - u64 ver = cfs_hash_bd_version_get(bd); - - if (*version == ver) - return ERR_PTR(-ENOENT); - - *version = ver; - bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd); - /* cfs_hash_bd_peek_locked is a somehow "internal" function - * of cfs_hash, it doesn't add refcount on object. - */ - hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f); - if (!hnode) { - lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS); - return ERR_PTR(-ENOENT); - } - - h = container_of(hnode, struct lu_object_header, loh_hash); - cfs_hash_get(s->ls_obj_hash, hnode); - lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT); - if (!list_empty(&h->loh_lru)) { - list_del_init(&h->loh_lru); - percpu_counter_dec(&s->ls_lru_len_counter); - } - return lu_object_top(h); -} - -/** - * Search cache for an object with the fid \a f. If such object is found, - * return it. Otherwise, create new object, insert it into cache and return - * it. In any case, additional reference is acquired on the returned object. - */ -static struct lu_object *lu_object_find(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf) -{ - return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf); -} - -/* - * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because - * the calculation for the number of objects to reclaim is not covered by - * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST. - * This ensures that many concurrent threads will not accidentally purge - * the entire cache. - */ -static void lu_object_limit(const struct lu_env *env, struct lu_device *dev) -{ - __u64 size, nr; - - if (lu_cache_nr == LU_CACHE_NR_UNLIMITED) - return; - - size = cfs_hash_size_get(dev->ld_site->ls_obj_hash); - nr = (__u64)lu_cache_nr; - if (size <= nr) - return; - - lu_site_purge_objects(env, dev->ld_site, - min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST), - false); -} - -/** - * Core logic of lu_object_find*() functions. - * - * Much like lu_object_find(), but top level device of object is specifically - * \a dev rather than top level device of the site. This interface allows - * objects of different "stacking" to be created within the same site. - */ -struct lu_object *lu_object_find_at(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf) -{ - struct lu_object *o; - struct lu_object *shadow; - struct lu_site *s; - struct cfs_hash *hs; - struct cfs_hash_bd bd; - __u64 version = 0; - - /* - * This uses standard index maintenance protocol: - * - * - search index under lock, and return object if found; - * - otherwise, unlock index, allocate new object; - * - lock index and search again; - * - if nothing is found (usual case), insert newly created - * object into index; - * - otherwise (race: other thread inserted object), free - * object just allocated. - * - unlock index; - * - return object. - * - * For "LOC_F_NEW" case, we are sure the object is new established. - * It is unnecessary to perform lookup-alloc-lookup-insert, instead, - * just alloc and insert directly. - * - */ - s = dev->ld_site; - hs = s->ls_obj_hash; - - cfs_hash_bd_get(hs, f, &bd); - if (!(conf && conf->loc_flags & LOC_F_NEW)) { - cfs_hash_bd_lock(hs, &bd, 1); - o = htable_lookup(s, &bd, f, &version); - cfs_hash_bd_unlock(hs, &bd, 1); - - if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT) - return o; - } - /* - * Allocate new object. This may result in rather complicated - * operations, including fld queries, inode loading, etc. - */ - o = lu_object_alloc(env, dev, f, conf); - if (IS_ERR(o)) - return o; - - LASSERT(lu_fid_eq(lu_object_fid(o), f)); - - cfs_hash_bd_lock(hs, &bd, 1); - - if (conf && conf->loc_flags & LOC_F_NEW) - shadow = ERR_PTR(-ENOENT); - else - shadow = htable_lookup(s, &bd, f, &version); - if (likely(PTR_ERR(shadow) == -ENOENT)) { - cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash); - cfs_hash_bd_unlock(hs, &bd, 1); - - lu_object_limit(env, dev); - - return o; - } - - lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_RACE); - cfs_hash_bd_unlock(hs, &bd, 1); - lu_object_free(env, o); - return shadow; -} -EXPORT_SYMBOL(lu_object_find_at); - -/** - * Find object with given fid, and return its slice belonging to given device. - */ -struct lu_object *lu_object_find_slice(const struct lu_env *env, - struct lu_device *dev, - const struct lu_fid *f, - const struct lu_object_conf *conf) -{ - struct lu_object *top; - struct lu_object *obj; - - top = lu_object_find(env, dev, f, conf); - if (IS_ERR(top)) - return top; - - obj = lu_object_locate(top->lo_header, dev->ld_type); - if (unlikely(!obj)) { - lu_object_put(env, top); - obj = ERR_PTR(-ENOENT); - } - - return obj; -} -EXPORT_SYMBOL(lu_object_find_slice); - -/** - * Global list of all device types. - */ -static LIST_HEAD(lu_device_types); - -int lu_device_type_init(struct lu_device_type *ldt) -{ - int result = 0; - - atomic_set(&ldt->ldt_device_nr, 0); - INIT_LIST_HEAD(&ldt->ldt_linkage); - if (ldt->ldt_ops->ldto_init) - result = ldt->ldt_ops->ldto_init(ldt); - - if (!result) { - spin_lock(&obd_types_lock); - list_add(&ldt->ldt_linkage, &lu_device_types); - spin_unlock(&obd_types_lock); - } - - return result; -} -EXPORT_SYMBOL(lu_device_type_init); - -void lu_device_type_fini(struct lu_device_type *ldt) -{ - spin_lock(&obd_types_lock); - list_del_init(&ldt->ldt_linkage); - spin_unlock(&obd_types_lock); - if (ldt->ldt_ops->ldto_fini) - ldt->ldt_ops->ldto_fini(ldt); -} -EXPORT_SYMBOL(lu_device_type_fini); - -/** - * Global list of all sites on this node - */ -static LIST_HEAD(lu_sites); -static DECLARE_RWSEM(lu_sites_guard); - -/** - * Global environment used by site shrinker. - */ -static struct lu_env lu_shrink_env; - -struct lu_site_print_arg { - struct lu_env *lsp_env; - void *lsp_cookie; - lu_printer_t lsp_printer; -}; - -static int -lu_site_obj_print(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *data) -{ - struct lu_site_print_arg *arg = (struct lu_site_print_arg *)data; - struct lu_object_header *h; - - h = hlist_entry(hnode, struct lu_object_header, loh_hash); - if (!list_empty(&h->loh_layers)) { - const struct lu_object *o; - - o = lu_object_top(h); - lu_object_print(arg->lsp_env, arg->lsp_cookie, - arg->lsp_printer, o); - } else { - lu_object_header_print(arg->lsp_env, arg->lsp_cookie, - arg->lsp_printer, h); - } - return 0; -} - -/** - * Print all objects in \a s. - */ -void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie, - lu_printer_t printer) -{ - struct lu_site_print_arg arg = { - .lsp_env = (struct lu_env *)env, - .lsp_cookie = cookie, - .lsp_printer = printer, - }; - - cfs_hash_for_each(s->ls_obj_hash, lu_site_obj_print, &arg); -} -EXPORT_SYMBOL(lu_site_print); - -/** - * Return desired hash table order. - */ -static unsigned long lu_htable_order(struct lu_device *top) -{ - unsigned long bits_max = LU_SITE_BITS_MAX; - unsigned long cache_size; - unsigned long bits; - - if (!strcmp(top->ld_type->ldt_name, LUSTRE_VVP_NAME)) - bits_max = LU_SITE_BITS_MAX_CL; - - /* - * Calculate hash table size, assuming that we want reasonable - * performance when 20% of total memory is occupied by cache of - * lu_objects. - * - * Size of lu_object is (arbitrary) taken as 1K (together with inode). - */ - cache_size = totalram_pages; - -#if BITS_PER_LONG == 32 - /* limit hashtable size for lowmem systems to low RAM */ - if (cache_size > 1 << (30 - PAGE_SHIFT)) - cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4; -#endif - - /* clear off unreasonable cache setting. */ - if (lu_cache_percent == 0 || lu_cache_percent > LU_CACHE_PERCENT_MAX) { - CWARN("obdclass: invalid lu_cache_percent: %u, it must be in the range of (0, %u]. Will use default value: %u.\n", - lu_cache_percent, LU_CACHE_PERCENT_MAX, - LU_CACHE_PERCENT_DEFAULT); - - lu_cache_percent = LU_CACHE_PERCENT_DEFAULT; - } - cache_size = cache_size / 100 * lu_cache_percent * - (PAGE_SIZE / 1024); - - for (bits = 1; (1 << bits) < cache_size; ++bits) - ; - return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max); -} - -static unsigned int lu_obj_hop_hash(struct cfs_hash *hs, - const void *key, unsigned int mask) -{ - struct lu_fid *fid = (struct lu_fid *)key; - __u32 hash; - - hash = fid_flatten32(fid); - hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */ - hash = hash_long(hash, hs->hs_bkt_bits); - - /* give me another random factor */ - hash -= hash_long((unsigned long)hs, fid_oid(fid) % 11 + 3); - - hash <<= hs->hs_cur_bits - hs->hs_bkt_bits; - hash |= (fid_seq(fid) + fid_oid(fid)) & (CFS_HASH_NBKT(hs) - 1); - - return hash & mask; -} - -static void *lu_obj_hop_object(struct hlist_node *hnode) -{ - return hlist_entry(hnode, struct lu_object_header, loh_hash); -} - -static void *lu_obj_hop_key(struct hlist_node *hnode) -{ - struct lu_object_header *h; - - h = hlist_entry(hnode, struct lu_object_header, loh_hash); - return &h->loh_fid; -} - -static int lu_obj_hop_keycmp(const void *key, struct hlist_node *hnode) -{ - struct lu_object_header *h; - - h = hlist_entry(hnode, struct lu_object_header, loh_hash); - return lu_fid_eq(&h->loh_fid, (struct lu_fid *)key); -} - -static void lu_obj_hop_get(struct cfs_hash *hs, struct hlist_node *hnode) -{ - struct lu_object_header *h; - - h = hlist_entry(hnode, struct lu_object_header, loh_hash); - atomic_inc(&h->loh_ref); -} - -static void lu_obj_hop_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) -{ - LBUG(); /* we should never called it */ -} - -static struct cfs_hash_ops lu_site_hash_ops = { - .hs_hash = lu_obj_hop_hash, - .hs_key = lu_obj_hop_key, - .hs_keycmp = lu_obj_hop_keycmp, - .hs_object = lu_obj_hop_object, - .hs_get = lu_obj_hop_get, - .hs_put_locked = lu_obj_hop_put_locked, -}; - -static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d) -{ - spin_lock(&s->ls_ld_lock); - if (list_empty(&d->ld_linkage)) - list_add(&d->ld_linkage, &s->ls_ld_linkage); - spin_unlock(&s->ls_ld_lock); -} - -/** - * Initialize site \a s, with \a d as the top level device. - */ -int lu_site_init(struct lu_site *s, struct lu_device *top) -{ - struct lu_site_bkt_data *bkt; - struct cfs_hash_bd bd; - unsigned long bits; - unsigned long i; - char name[16]; - int rc; - - memset(s, 0, sizeof(*s)); - mutex_init(&s->ls_purge_mutex); - - rc = percpu_counter_init(&s->ls_lru_len_counter, 0, GFP_NOFS); - if (rc) - return -ENOMEM; - - snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name); - for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) { - s->ls_obj_hash = cfs_hash_create(name, bits, bits, - bits - LU_SITE_BKT_BITS, - sizeof(*bkt), 0, 0, - &lu_site_hash_ops, - CFS_HASH_SPIN_BKTLOCK | - CFS_HASH_NO_ITEMREF | - CFS_HASH_DEPTH | - CFS_HASH_ASSERT_EMPTY | - CFS_HASH_COUNTER); - if (s->ls_obj_hash) - break; - } - - if (!s->ls_obj_hash) { - CERROR("failed to create lu_site hash with bits: %lu\n", bits); - return -ENOMEM; - } - - cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) { - bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd); - INIT_LIST_HEAD(&bkt->lsb_lru); - init_waitqueue_head(&bkt->lsb_marche_funebre); - } - - s->ls_stats = lprocfs_alloc_stats(LU_SS_LAST_STAT, 0); - if (!s->ls_stats) { - cfs_hash_putref(s->ls_obj_hash); - s->ls_obj_hash = NULL; - return -ENOMEM; - } - - lprocfs_counter_init(s->ls_stats, LU_SS_CREATED, - 0, "created", "created"); - lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_HIT, - 0, "cache_hit", "cache_hit"); - lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_MISS, - 0, "cache_miss", "cache_miss"); - lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_RACE, - 0, "cache_race", "cache_race"); - lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_DEATH_RACE, - 0, "cache_death_race", "cache_death_race"); - lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED, - 0, "lru_purged", "lru_purged"); - - INIT_LIST_HEAD(&s->ls_linkage); - s->ls_top_dev = top; - top->ld_site = s; - lu_device_get(top); - lu_ref_add(&top->ld_reference, "site-top", s); - - INIT_LIST_HEAD(&s->ls_ld_linkage); - spin_lock_init(&s->ls_ld_lock); - - lu_dev_add_linkage(s, top); - - return 0; -} -EXPORT_SYMBOL(lu_site_init); - -/** - * Finalize \a s and release its resources. - */ -void lu_site_fini(struct lu_site *s) -{ - down_write(&lu_sites_guard); - list_del_init(&s->ls_linkage); - up_write(&lu_sites_guard); - - percpu_counter_destroy(&s->ls_lru_len_counter); - - if (s->ls_obj_hash) { - cfs_hash_putref(s->ls_obj_hash); - s->ls_obj_hash = NULL; - } - - if (s->ls_top_dev) { - s->ls_top_dev->ld_site = NULL; - lu_ref_del(&s->ls_top_dev->ld_reference, "site-top", s); - lu_device_put(s->ls_top_dev); - s->ls_top_dev = NULL; - } - - if (s->ls_stats) - lprocfs_free_stats(&s->ls_stats); -} -EXPORT_SYMBOL(lu_site_fini); - -/** - * Called when initialization of stack for this site is completed. - */ -int lu_site_init_finish(struct lu_site *s) -{ - int result; - - down_write(&lu_sites_guard); - result = lu_context_refill(&lu_shrink_env.le_ctx); - if (result == 0) - list_add(&s->ls_linkage, &lu_sites); - up_write(&lu_sites_guard); - return result; -} -EXPORT_SYMBOL(lu_site_init_finish); - -/** - * Acquire additional reference on device \a d - */ -void lu_device_get(struct lu_device *d) -{ - atomic_inc(&d->ld_ref); -} -EXPORT_SYMBOL(lu_device_get); - -/** - * Release reference on device \a d. - */ -void lu_device_put(struct lu_device *d) -{ - LASSERT(atomic_read(&d->ld_ref) > 0); - atomic_dec(&d->ld_ref); -} -EXPORT_SYMBOL(lu_device_put); - -/** - * Initialize device \a d of type \a t. - */ -int lu_device_init(struct lu_device *d, struct lu_device_type *t) -{ - if (atomic_inc_return(&t->ldt_device_nr) == 1 && - t->ldt_ops->ldto_start) - t->ldt_ops->ldto_start(t); - - memset(d, 0, sizeof(*d)); - atomic_set(&d->ld_ref, 0); - d->ld_type = t; - lu_ref_init(&d->ld_reference); - INIT_LIST_HEAD(&d->ld_linkage); - return 0; -} -EXPORT_SYMBOL(lu_device_init); - -/** - * Finalize device \a d. - */ -void lu_device_fini(struct lu_device *d) -{ - struct lu_device_type *t = d->ld_type; - - if (d->ld_obd) { - d->ld_obd->obd_lu_dev = NULL; - d->ld_obd = NULL; - } - - lu_ref_fini(&d->ld_reference); - LASSERTF(atomic_read(&d->ld_ref) == 0, - "Refcount is %u\n", atomic_read(&d->ld_ref)); - LASSERT(atomic_read(&t->ldt_device_nr) > 0); - - if (atomic_dec_and_test(&t->ldt_device_nr) && - t->ldt_ops->ldto_stop) - t->ldt_ops->ldto_stop(t); -} -EXPORT_SYMBOL(lu_device_fini); - -/** - * Initialize object \a o that is part of compound object \a h and was created - * by device \a d. - */ -int lu_object_init(struct lu_object *o, struct lu_object_header *h, - struct lu_device *d) -{ - memset(o, 0, sizeof(*o)); - o->lo_header = h; - o->lo_dev = d; - lu_device_get(d); - lu_ref_add_at(&d->ld_reference, &o->lo_dev_ref, "lu_object", o); - INIT_LIST_HEAD(&o->lo_linkage); - - return 0; -} -EXPORT_SYMBOL(lu_object_init); - -/** - * Finalize object and release its resources. - */ -void lu_object_fini(struct lu_object *o) -{ - struct lu_device *dev = o->lo_dev; - - LASSERT(list_empty(&o->lo_linkage)); - - if (dev) { - lu_ref_del_at(&dev->ld_reference, &o->lo_dev_ref, - "lu_object", o); - lu_device_put(dev); - o->lo_dev = NULL; - } -} -EXPORT_SYMBOL(lu_object_fini); - -/** - * Add object \a o as first layer of compound object \a h - * - * This is typically called by the ->ldo_object_alloc() method of top-level - * device. - */ -void lu_object_add_top(struct lu_object_header *h, struct lu_object *o) -{ - list_move(&o->lo_linkage, &h->loh_layers); -} -EXPORT_SYMBOL(lu_object_add_top); - -/** - * Add object \a o as a layer of compound object, going after \a before. - * - * This is typically called by the ->ldo_object_alloc() method of \a - * before->lo_dev. - */ -void lu_object_add(struct lu_object *before, struct lu_object *o) -{ - list_move(&o->lo_linkage, &before->lo_linkage); -} -EXPORT_SYMBOL(lu_object_add); - -/** - * Initialize compound object. - */ -int lu_object_header_init(struct lu_object_header *h) -{ - memset(h, 0, sizeof(*h)); - atomic_set(&h->loh_ref, 1); - INIT_HLIST_NODE(&h->loh_hash); - INIT_LIST_HEAD(&h->loh_lru); - INIT_LIST_HEAD(&h->loh_layers); - lu_ref_init(&h->loh_reference); - return 0; -} -EXPORT_SYMBOL(lu_object_header_init); - -/** - * Finalize compound object. - */ -void lu_object_header_fini(struct lu_object_header *h) -{ - LASSERT(list_empty(&h->loh_layers)); - LASSERT(list_empty(&h->loh_lru)); - LASSERT(hlist_unhashed(&h->loh_hash)); - lu_ref_fini(&h->loh_reference); -} -EXPORT_SYMBOL(lu_object_header_fini); - -/** - * Given a compound object, find its slice, corresponding to the device type - * \a dtype. - */ -struct lu_object *lu_object_locate(struct lu_object_header *h, - const struct lu_device_type *dtype) -{ - struct lu_object *o; - - list_for_each_entry(o, &h->loh_layers, lo_linkage) { - if (o->lo_dev->ld_type == dtype) - return o; - } - return NULL; -} -EXPORT_SYMBOL(lu_object_locate); - -/** - * Finalize and free devices in the device stack. - * - * Finalize device stack by purging object cache, and calling - * lu_device_type_operations::ldto_device_fini() and - * lu_device_type_operations::ldto_device_free() on all devices in the stack. - */ -void lu_stack_fini(const struct lu_env *env, struct lu_device *top) -{ - struct lu_site *site = top->ld_site; - struct lu_device *scan; - struct lu_device *next; - - lu_site_purge(env, site, ~0); - for (scan = top; scan; scan = next) { - next = scan->ld_type->ldt_ops->ldto_device_fini(env, scan); - lu_ref_del(&scan->ld_reference, "lu-stack", &lu_site_init); - lu_device_put(scan); - } - - /* purge again. */ - lu_site_purge(env, site, ~0); - - for (scan = top; scan; scan = next) { - const struct lu_device_type *ldt = scan->ld_type; - struct obd_type *type; - - next = ldt->ldt_ops->ldto_device_free(env, scan); - type = ldt->ldt_obd_type; - if (type) { - type->typ_refcnt--; - class_put_type(type); - } - } -} - -enum { - /** - * Maximal number of tld slots. - */ - LU_CONTEXT_KEY_NR = 40 -}; - -static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, }; - -static DEFINE_RWLOCK(lu_keys_guard); -static atomic_t lu_key_initing_cnt = ATOMIC_INIT(0); - -/** - * Global counter incremented whenever key is registered, unregistered, - * revived or quiesced. This is used to void unnecessary calls to - * lu_context_refill(). No locking is provided, as initialization and shutdown - * are supposed to be externally serialized. - */ -static unsigned int key_set_version; - -/** - * Register new key. - */ -int lu_context_key_register(struct lu_context_key *key) -{ - int result; - unsigned int i; - - LASSERT(key->lct_init); - LASSERT(key->lct_fini); - LASSERT(key->lct_tags != 0); - - result = -ENFILE; - write_lock(&lu_keys_guard); - for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { - if (!lu_keys[i]) { - key->lct_index = i; - atomic_set(&key->lct_used, 1); - lu_keys[i] = key; - lu_ref_init(&key->lct_reference); - result = 0; - ++key_set_version; - break; - } - } - write_unlock(&lu_keys_guard); - return result; -} -EXPORT_SYMBOL(lu_context_key_register); - -static void key_fini(struct lu_context *ctx, int index) -{ - if (ctx->lc_value && ctx->lc_value[index]) { - struct lu_context_key *key; - - key = lu_keys[index]; - LASSERT(atomic_read(&key->lct_used) > 1); - - key->lct_fini(ctx, key, ctx->lc_value[index]); - lu_ref_del(&key->lct_reference, "ctx", ctx); - atomic_dec(&key->lct_used); - - if ((ctx->lc_tags & LCT_NOREF) == 0) - module_put(key->lct_owner); - ctx->lc_value[index] = NULL; - } -} - -/** - * Deregister key. - */ -void lu_context_key_degister(struct lu_context_key *key) -{ - LASSERT(atomic_read(&key->lct_used) >= 1); - LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys)); - - lu_context_key_quiesce(key); - - write_lock(&lu_keys_guard); - ++key_set_version; - key_fini(&lu_shrink_env.le_ctx, key->lct_index); - - /** - * Wait until all transient contexts referencing this key have - * run lu_context_key::lct_fini() method. - */ - while (atomic_read(&key->lct_used) > 1) { - write_unlock(&lu_keys_guard); - CDEBUG(D_INFO, "%s: \"%s\" %p, %d\n", - __func__, module_name(key->lct_owner), - key, atomic_read(&key->lct_used)); - schedule(); - write_lock(&lu_keys_guard); - } - if (lu_keys[key->lct_index]) { - lu_keys[key->lct_index] = NULL; - lu_ref_fini(&key->lct_reference); - } - write_unlock(&lu_keys_guard); - - LASSERTF(atomic_read(&key->lct_used) == 1, - "key has instances: %d\n", - atomic_read(&key->lct_used)); -} -EXPORT_SYMBOL(lu_context_key_degister); - -/** - * Register a number of keys. This has to be called after all keys have been - * initialized by a call to LU_CONTEXT_KEY_INIT(). - */ -int lu_context_key_register_many(struct lu_context_key *k, ...) -{ - struct lu_context_key *key = k; - va_list args; - int result; - - va_start(args, k); - do { - result = lu_context_key_register(key); - if (result) - break; - key = va_arg(args, struct lu_context_key *); - } while (key); - va_end(args); - - if (result != 0) { - va_start(args, k); - while (k != key) { - lu_context_key_degister(k); - k = va_arg(args, struct lu_context_key *); - } - va_end(args); - } - - return result; -} -EXPORT_SYMBOL(lu_context_key_register_many); - -/** - * De-register a number of keys. This is a dual to - * lu_context_key_register_many(). - */ -void lu_context_key_degister_many(struct lu_context_key *k, ...) -{ - va_list args; - - va_start(args, k); - do { - lu_context_key_degister(k); - k = va_arg(args, struct lu_context_key*); - } while (k); - va_end(args); -} -EXPORT_SYMBOL(lu_context_key_degister_many); - -/** - * Revive a number of keys. - */ -void lu_context_key_revive_many(struct lu_context_key *k, ...) -{ - va_list args; - - va_start(args, k); - do { - lu_context_key_revive(k); - k = va_arg(args, struct lu_context_key*); - } while (k); - va_end(args); -} -EXPORT_SYMBOL(lu_context_key_revive_many); - -/** - * Quiescent a number of keys. - */ -void lu_context_key_quiesce_many(struct lu_context_key *k, ...) -{ - va_list args; - - va_start(args, k); - do { - lu_context_key_quiesce(k); - k = va_arg(args, struct lu_context_key*); - } while (k); - va_end(args); -} -EXPORT_SYMBOL(lu_context_key_quiesce_many); - -/** - * Return value associated with key \a key in context \a ctx. - */ -void *lu_context_key_get(const struct lu_context *ctx, - const struct lu_context_key *key) -{ - LINVRNT(ctx->lc_state == LCS_ENTERED); - LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys)); - LASSERT(lu_keys[key->lct_index] == key); - return ctx->lc_value[key->lct_index]; -} -EXPORT_SYMBOL(lu_context_key_get); - -/** - * List of remembered contexts. XXX document me. - */ -static LIST_HEAD(lu_context_remembered); - -/** - * Destroy \a key in all remembered contexts. This is used to destroy key - * values in "shared" contexts (like service threads), when a module owning - * the key is about to be unloaded. - */ -void lu_context_key_quiesce(struct lu_context_key *key) -{ - struct lu_context *ctx; - - if (!(key->lct_tags & LCT_QUIESCENT)) { - /* - * XXX memory barrier has to go here. - */ - write_lock(&lu_keys_guard); - key->lct_tags |= LCT_QUIESCENT; - - /** - * Wait until all lu_context_key::lct_init() methods - * have completed. - */ - while (atomic_read(&lu_key_initing_cnt) > 0) { - write_unlock(&lu_keys_guard); - CDEBUG(D_INFO, "%s: \"%s\" %p, %d (%d)\n", - __func__, - module_name(key->lct_owner), - key, atomic_read(&key->lct_used), - atomic_read(&lu_key_initing_cnt)); - schedule(); - write_lock(&lu_keys_guard); - } - - list_for_each_entry(ctx, &lu_context_remembered, lc_remember) - key_fini(ctx, key->lct_index); - - ++key_set_version; - write_unlock(&lu_keys_guard); - } -} - -void lu_context_key_revive(struct lu_context_key *key) -{ - write_lock(&lu_keys_guard); - key->lct_tags &= ~LCT_QUIESCENT; - ++key_set_version; - write_unlock(&lu_keys_guard); -} - -static void keys_fini(struct lu_context *ctx) -{ - unsigned int i; - - if (!ctx->lc_value) - return; - - for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) - key_fini(ctx, i); - - kfree(ctx->lc_value); - ctx->lc_value = NULL; -} - -static int keys_fill(struct lu_context *ctx) -{ - unsigned int pre_version; - unsigned int i; - - /* - * A serialisation with lu_context_key_quiesce() is needed, but some - * "key->lct_init()" are calling kernel memory allocation routine and - * can't be called while holding a spin_lock. - * "lu_keys_guard" is held while incrementing "lu_key_initing_cnt" - * to ensure the start of the serialisation. - * An atomic_t variable is still used, in order not to reacquire the - * lock when decrementing the counter. - */ - read_lock(&lu_keys_guard); - atomic_inc(&lu_key_initing_cnt); - pre_version = key_set_version; - read_unlock(&lu_keys_guard); - -refill: - LINVRNT(ctx->lc_value); - for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { - struct lu_context_key *key; - - key = lu_keys[i]; - if (!ctx->lc_value[i] && key && - (key->lct_tags & ctx->lc_tags) && - /* - * Don't create values for a LCT_QUIESCENT key, as this - * will pin module owning a key. - */ - !(key->lct_tags & LCT_QUIESCENT)) { - void *value; - - LINVRNT(key->lct_init); - LINVRNT(key->lct_index == i); - - if (!(ctx->lc_tags & LCT_NOREF) && - !try_module_get(key->lct_owner)) { - /* module is unloading, skip this key */ - continue; - } - - value = key->lct_init(ctx, key); - if (unlikely(IS_ERR(value))) { - atomic_dec(&lu_key_initing_cnt); - return PTR_ERR(value); - } - - lu_ref_add_atomic(&key->lct_reference, "ctx", ctx); - atomic_inc(&key->lct_used); - /* - * This is the only place in the code, where an - * element of ctx->lc_value[] array is set to non-NULL - * value. - */ - ctx->lc_value[i] = value; - if (key->lct_exit) - ctx->lc_tags |= LCT_HAS_EXIT; - } - } - - read_lock(&lu_keys_guard); - if (pre_version != key_set_version) { - pre_version = key_set_version; - read_unlock(&lu_keys_guard); - goto refill; - } - ctx->lc_version = key_set_version; - atomic_dec(&lu_key_initing_cnt); - read_unlock(&lu_keys_guard); - return 0; -} - -static int keys_init(struct lu_context *ctx) -{ - ctx->lc_value = kcalloc(ARRAY_SIZE(lu_keys), sizeof(ctx->lc_value[0]), - GFP_NOFS); - if (likely(ctx->lc_value)) - return keys_fill(ctx); - - return -ENOMEM; -} - -/** - * Initialize context data-structure. Create values for all keys. - */ -int lu_context_init(struct lu_context *ctx, __u32 tags) -{ - int rc; - - memset(ctx, 0, sizeof(*ctx)); - ctx->lc_state = LCS_INITIALIZED; - ctx->lc_tags = tags; - if (tags & LCT_REMEMBER) { - write_lock(&lu_keys_guard); - list_add(&ctx->lc_remember, &lu_context_remembered); - write_unlock(&lu_keys_guard); - } else { - INIT_LIST_HEAD(&ctx->lc_remember); - } - - rc = keys_init(ctx); - if (rc != 0) - lu_context_fini(ctx); - - return rc; -} -EXPORT_SYMBOL(lu_context_init); - -/** - * Finalize context data-structure. Destroy key values. - */ -void lu_context_fini(struct lu_context *ctx) -{ - LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT); - ctx->lc_state = LCS_FINALIZED; - - if ((ctx->lc_tags & LCT_REMEMBER) == 0) { - LASSERT(list_empty(&ctx->lc_remember)); - keys_fini(ctx); - - } else { /* could race with key degister */ - write_lock(&lu_keys_guard); - keys_fini(ctx); - list_del_init(&ctx->lc_remember); - write_unlock(&lu_keys_guard); - } -} -EXPORT_SYMBOL(lu_context_fini); - -/** - * Called before entering context. - */ -void lu_context_enter(struct lu_context *ctx) -{ - LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT); - ctx->lc_state = LCS_ENTERED; -} -EXPORT_SYMBOL(lu_context_enter); - -/** - * Called after exiting from \a ctx - */ -void lu_context_exit(struct lu_context *ctx) -{ - unsigned int i; - - LINVRNT(ctx->lc_state == LCS_ENTERED); - ctx->lc_state = LCS_LEFT; - if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) { - /* could race with key quiescency */ - if (ctx->lc_tags & LCT_REMEMBER) - read_lock(&lu_keys_guard); - - for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) { - if (ctx->lc_value[i]) { - struct lu_context_key *key; - - key = lu_keys[i]; - if (key->lct_exit) - key->lct_exit(ctx, - key, ctx->lc_value[i]); - } - } - - if (ctx->lc_tags & LCT_REMEMBER) - read_unlock(&lu_keys_guard); - } -} -EXPORT_SYMBOL(lu_context_exit); - -/** - * Allocate for context all missing keys that were registered after context - * creation. key_set_version is only changed in rare cases when modules - * are loaded and removed. - */ -int lu_context_refill(struct lu_context *ctx) -{ - read_lock(&lu_keys_guard); - if (likely(ctx->lc_version == key_set_version)) { - read_unlock(&lu_keys_guard); - return 0; - } - - read_unlock(&lu_keys_guard); - return keys_fill(ctx); -} - -/** - * lu_ctx_tags/lu_ses_tags will be updated if there are new types of - * obd being added. Currently, this is only used on client side, specifically - * for echo device client, for other stack (like ptlrpc threads), context are - * predefined when the lu_device type are registered, during the module probe - * phase. - */ -__u32 lu_context_tags_default; -__u32 lu_session_tags_default; - -int lu_env_init(struct lu_env *env, __u32 tags) -{ - int result; - - env->le_ses = NULL; - result = lu_context_init(&env->le_ctx, tags); - if (likely(result == 0)) - lu_context_enter(&env->le_ctx); - return result; -} -EXPORT_SYMBOL(lu_env_init); - -void lu_env_fini(struct lu_env *env) -{ - lu_context_exit(&env->le_ctx); - lu_context_fini(&env->le_ctx); - env->le_ses = NULL; -} -EXPORT_SYMBOL(lu_env_fini); - -int lu_env_refill(struct lu_env *env) -{ - int result; - - result = lu_context_refill(&env->le_ctx); - if (result == 0 && env->le_ses) - result = lu_context_refill(env->le_ses); - return result; -} -EXPORT_SYMBOL(lu_env_refill); - -struct lu_site_stats { - unsigned int lss_populated; - unsigned int lss_max_search; - unsigned int lss_total; - unsigned int lss_busy; -}; - -static void lu_site_stats_get(const struct lu_site *s, - struct lu_site_stats *stats, int populated) -{ - struct cfs_hash *hs = s->ls_obj_hash; - struct cfs_hash_bd bd; - unsigned int i; - /* - * percpu_counter_sum_positive() won't accept a const pointer - * as it does modify the struct by taking a spinlock - */ - struct lu_site *s2 = (struct lu_site *)s; - - stats->lss_busy += cfs_hash_size_get(hs) - - percpu_counter_sum_positive(&s2->ls_lru_len_counter); - cfs_hash_for_each_bucket(hs, &bd, i) { - struct hlist_head *hhead; - - cfs_hash_bd_lock(hs, &bd, 1); - stats->lss_total += cfs_hash_bd_count_get(&bd); - stats->lss_max_search = max((int)stats->lss_max_search, - cfs_hash_bd_depmax_get(&bd)); - if (!populated) { - cfs_hash_bd_unlock(hs, &bd, 1); - continue; - } - - cfs_hash_bd_for_each_hlist(hs, &bd, hhead) { - if (!hlist_empty(hhead)) - stats->lss_populated++; - } - cfs_hash_bd_unlock(hs, &bd, 1); - } -} - -/* - * lu_cache_shrink_count() returns an approximate number of cached objects - * that can be freed by shrink_slab(). A counter, which tracks the - * number of items in the site's lru, is maintained in a percpu_counter - * for each site. The percpu values are incremented and decremented as - * objects are added or removed from the lru. The percpu values are summed - * and saved whenever a percpu value exceeds a threshold. Thus the saved, - * summed value at any given time may not accurately reflect the current - * lru length. But this value is sufficiently accurate for the needs of - * a shrinker. - * - * Using a per cpu counter is a compromise solution to concurrent access: - * lu_object_put() can update the counter without locking the site and - * lu_cache_shrink_count can sum the counters without locking each - * ls_obj_hash bucket. - */ -static unsigned long lu_cache_shrink_count(struct shrinker *sk, - struct shrink_control *sc) -{ - struct lu_site *s; - struct lu_site *tmp; - unsigned long cached = 0; - - if (!(sc->gfp_mask & __GFP_FS)) - return 0; - - down_read(&lu_sites_guard); - list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) - cached += percpu_counter_read_positive(&s->ls_lru_len_counter); - up_read(&lu_sites_guard); - - cached = (cached / 100) * sysctl_vfs_cache_pressure; - CDEBUG(D_INODE, "%ld objects cached, cache pressure %d\n", - cached, sysctl_vfs_cache_pressure); - - return cached; -} - -static unsigned long lu_cache_shrink_scan(struct shrinker *sk, - struct shrink_control *sc) -{ - struct lu_site *s; - struct lu_site *tmp; - unsigned long remain = sc->nr_to_scan, freed = 0; - LIST_HEAD(splice); - - if (!(sc->gfp_mask & __GFP_FS)) - /* We must not take the lu_sites_guard lock when - * __GFP_FS is *not* set because of the deadlock - * possibility detailed above. Additionally, - * since we cannot determine the number of - * objects in the cache without taking this - * lock, we're in a particularly tough spot. As - * a result, we'll just lie and say our cache is - * empty. This _should_ be ok, as we can't - * reclaim objects when __GFP_FS is *not* set - * anyways. - */ - return SHRINK_STOP; - - down_write(&lu_sites_guard); - list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) { - freed = lu_site_purge(&lu_shrink_env, s, remain); - remain -= freed; - /* - * Move just shrunk site to the tail of site list to - * assure shrinking fairness. - */ - list_move_tail(&s->ls_linkage, &splice); - } - list_splice(&splice, lu_sites.prev); - up_write(&lu_sites_guard); - - return sc->nr_to_scan - remain; -} - -/** - * Debugging printer function using printk(). - */ -static struct shrinker lu_site_shrinker = { - .count_objects = lu_cache_shrink_count, - .scan_objects = lu_cache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - -/** - * Initialization of global lu_* data. - */ -int lu_global_init(void) -{ - int result; - - CDEBUG(D_INFO, "Lustre LU module (%p).\n", &lu_keys); - - result = lu_ref_global_init(); - if (result != 0) - return result; - - LU_CONTEXT_KEY_INIT(&lu_global_key); - result = lu_context_key_register(&lu_global_key); - if (result != 0) { - lu_ref_global_fini(); - return result; - } - - /* - * At this level, we don't know what tags are needed, so allocate them - * conservatively. This should not be too bad, because this - * environment is global. - */ - down_write(&lu_sites_guard); - result = lu_env_init(&lu_shrink_env, LCT_SHRINKER); - up_write(&lu_sites_guard); - if (result != 0) { - lu_context_key_degister(&lu_global_key); - lu_ref_global_fini(); - return result; - } - - /* - * seeks estimation: 3 seeks to read a record from oi, one to read - * inode, one for ea. Unfortunately setting this high value results in - * lu_object/inode cache consuming all the memory. - */ - result = register_shrinker(&lu_site_shrinker); - if (result != 0) { - /* Order explained in lu_global_fini(). */ - lu_context_key_degister(&lu_global_key); - - down_write(&lu_sites_guard); - lu_env_fini(&lu_shrink_env); - up_write(&lu_sites_guard); - - lu_ref_global_fini(); - return result; - } - - return 0; -} - -/** - * Dual to lu_global_init(). - */ -void lu_global_fini(void) -{ - unregister_shrinker(&lu_site_shrinker); - lu_context_key_degister(&lu_global_key); - - /* - * Tear shrinker environment down _after_ de-registering - * lu_global_key, because the latter has a value in the former. - */ - down_write(&lu_sites_guard); - lu_env_fini(&lu_shrink_env); - up_write(&lu_sites_guard); - - lu_ref_global_fini(); -} - -static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx) -{ - struct lprocfs_counter ret; - - lprocfs_stats_collect(stats, idx, &ret); - return (__u32)ret.lc_count; -} - -/** - * Output site statistical counters into a buffer. Suitable for - * lprocfs_rd_*()-style functions. - */ -int lu_site_stats_print(const struct lu_site *s, struct seq_file *m) -{ - struct lu_site_stats stats; - - memset(&stats, 0, sizeof(stats)); - lu_site_stats_get(s, &stats, 1); - - seq_printf(m, "%d/%d %d/%ld %d %d %d %d %d %d %d\n", - stats.lss_busy, - stats.lss_total, - stats.lss_populated, - CFS_HASH_NHLIST(s->ls_obj_hash), - stats.lss_max_search, - ls_stats_read(s->ls_stats, LU_SS_CREATED), - ls_stats_read(s->ls_stats, LU_SS_CACHE_HIT), - ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS), - ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE), - ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE), - ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED)); - return 0; -} -EXPORT_SYMBOL(lu_site_stats_print); - -/** - * Helper function to initialize a number of kmem slab caches at once. - */ -int lu_kmem_init(struct lu_kmem_descr *caches) -{ - int result; - struct lu_kmem_descr *iter = caches; - - for (result = 0; iter->ckd_cache; ++iter) { - *iter->ckd_cache = kmem_cache_create(iter->ckd_name, - iter->ckd_size, - 0, 0, NULL); - if (!*iter->ckd_cache) { - result = -ENOMEM; - /* free all previously allocated caches */ - lu_kmem_fini(caches); - break; - } - } - return result; -} -EXPORT_SYMBOL(lu_kmem_init); - -/** - * Helper function to finalize a number of kmem slab cached at once. Dual to - * lu_kmem_init(). - */ -void lu_kmem_fini(struct lu_kmem_descr *caches) -{ - for (; caches->ckd_cache; ++caches) { - kmem_cache_destroy(*caches->ckd_cache); - *caches->ckd_cache = NULL; - } -} -EXPORT_SYMBOL(lu_kmem_fini); diff --git a/drivers/staging/lustre/lustre/obdclass/lu_ref.c b/drivers/staging/lustre/lustre/obdclass/lu_ref.c deleted file mode 100644 index f67cb89ea0ba..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lu_ref.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/lu_ref.c - * - * Lustre reference. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c deleted file mode 100644 index cdc8dc10690d..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c +++ /dev/null @@ -1,241 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/lustre_handles.c - * - * Author: Phil Schwan - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include - -static __u64 handle_base; -#define HANDLE_INCR 7 -static spinlock_t handle_base_lock; - -static struct handle_bucket { - spinlock_t lock; - struct list_head head; -} *handle_hash; - -#define HANDLE_HASH_SIZE (1 << 16) -#define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1) - -/* - * Generate a unique 64bit cookie (hash) for a handle and insert it into - * global (per-node) hash-table. - */ -void class_handle_hash(struct portals_handle *h, - struct portals_handle_ops *ops) -{ - struct handle_bucket *bucket; - - LASSERT(h); - LASSERT(list_empty(&h->h_link)); - - /* - * This is fast, but simplistic cookie generation algorithm, it will - * need a re-do at some point in the future for security. - */ - spin_lock(&handle_base_lock); - handle_base += HANDLE_INCR; - - if (unlikely(handle_base == 0)) { - /* - * Cookie of zero is "dangerous", because in many places it's - * assumed that 0 means "unassigned" handle, not bound to any - * object. - */ - CWARN("The universe has been exhausted: cookie wrap-around.\n"); - handle_base += HANDLE_INCR; - } - h->h_cookie = handle_base; - spin_unlock(&handle_base_lock); - - h->h_ops = ops; - spin_lock_init(&h->h_lock); - - bucket = &handle_hash[h->h_cookie & HANDLE_HASH_MASK]; - spin_lock(&bucket->lock); - list_add_rcu(&h->h_link, &bucket->head); - h->h_in = 1; - spin_unlock(&bucket->lock); - - CDEBUG(D_INFO, "added object %p with handle %#llx to hash\n", - h, h->h_cookie); -} -EXPORT_SYMBOL(class_handle_hash); - -static void class_handle_unhash_nolock(struct portals_handle *h) -{ - if (list_empty(&h->h_link)) { - CERROR("removing an already-removed handle (%#llx)\n", - h->h_cookie); - return; - } - - CDEBUG(D_INFO, "removing object %p with handle %#llx from hash\n", - h, h->h_cookie); - - spin_lock(&h->h_lock); - if (h->h_in == 0) { - spin_unlock(&h->h_lock); - return; - } - h->h_in = 0; - spin_unlock(&h->h_lock); - list_del_rcu(&h->h_link); -} - -void class_handle_unhash(struct portals_handle *h) -{ - struct handle_bucket *bucket; - - bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); - - spin_lock(&bucket->lock); - class_handle_unhash_nolock(h); - spin_unlock(&bucket->lock); -} -EXPORT_SYMBOL(class_handle_unhash); - -void *class_handle2object(__u64 cookie, const void *owner) -{ - struct handle_bucket *bucket; - struct portals_handle *h; - void *retval = NULL; - - LASSERT(handle_hash); - - /* Be careful when you want to change this code. See the - * rcu_read_lock() definition on top this file. - jxiong - */ - bucket = handle_hash + (cookie & HANDLE_HASH_MASK); - - rcu_read_lock(); - list_for_each_entry_rcu(h, &bucket->head, h_link) { - if (h->h_cookie != cookie || h->h_owner != owner) - continue; - - spin_lock(&h->h_lock); - if (likely(h->h_in != 0)) { - h->h_ops->hop_addref(h); - retval = h; - } - spin_unlock(&h->h_lock); - break; - } - rcu_read_unlock(); - - return retval; -} -EXPORT_SYMBOL(class_handle2object); - -void class_handle_free_cb(struct rcu_head *rcu) -{ - struct portals_handle *h; - void *ptr; - - h = container_of(rcu, struct portals_handle, h_rcu); - ptr = (void *)(unsigned long)h->h_cookie; - - if (h->h_ops->hop_free) - h->h_ops->hop_free(ptr, h->h_size); - else - kfree(ptr); -} -EXPORT_SYMBOL(class_handle_free_cb); - -int class_handle_init(void) -{ - struct handle_bucket *bucket; - - LASSERT(!handle_hash); - - handle_hash = kvzalloc(sizeof(*bucket) * HANDLE_HASH_SIZE, - GFP_KERNEL); - if (!handle_hash) - return -ENOMEM; - - spin_lock_init(&handle_base_lock); - for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash; - bucket--) { - INIT_LIST_HEAD(&bucket->head); - spin_lock_init(&bucket->lock); - } - - get_random_bytes(&handle_base, sizeof(handle_base)); - LASSERT(handle_base != 0ULL); - - return 0; -} - -static int cleanup_all_handles(void) -{ - int rc; - int i; - - for (rc = i = 0; i < HANDLE_HASH_SIZE; i++) { - struct portals_handle *h; - - spin_lock(&handle_hash[i].lock); - list_for_each_entry_rcu(h, &handle_hash[i].head, h_link) { - CERROR("force clean handle %#llx addr %p ops %p\n", - h->h_cookie, h, h->h_ops); - - class_handle_unhash_nolock(h); - rc++; - } - spin_unlock(&handle_hash[i].lock); - } - - return rc; -} - -void class_handle_cleanup(void) -{ - int count; - - LASSERT(handle_hash); - - count = cleanup_all_handles(); - - kvfree(handle_hash); - handle_hash = NULL; - - if (count != 0) - CERROR("handle_count at cleanup: %d\n", count); -} diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c deleted file mode 100644 index e286a2665423..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c +++ /dev/null @@ -1,214 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include -#include -#include - -#define NIDS_MAX 32 - -struct uuid_nid_data { - struct list_head un_list; - struct obd_uuid un_uuid; - int un_nid_count; - lnet_nid_t un_nids[NIDS_MAX]; -}; - -/* FIXME: This should probably become more elegant than a global linked list */ -static struct list_head g_uuid_list; -static spinlock_t g_uuid_lock; - -void class_init_uuidlist(void) -{ - INIT_LIST_HEAD(&g_uuid_list); - spin_lock_init(&g_uuid_lock); -} - -void class_exit_uuidlist(void) -{ - /* delete all */ - class_del_uuid(NULL); -} - -int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index) -{ - struct uuid_nid_data *data; - struct obd_uuid tmp; - int rc = -ENOENT; - - obd_str2uuid(&tmp, uuid); - spin_lock(&g_uuid_lock); - list_for_each_entry(data, &g_uuid_list, un_list) { - if (obd_uuid_equals(&data->un_uuid, &tmp)) { - if (index >= data->un_nid_count) - break; - - rc = 0; - *peer_nid = data->un_nids[index]; - break; - } - } - spin_unlock(&g_uuid_lock); - return rc; -} -EXPORT_SYMBOL(lustre_uuid_to_peer); - -/* Add a nid to a niduuid. Multiple nids can be added to a single uuid; - * LNET will choose the best one. - */ -int class_add_uuid(const char *uuid, __u64 nid) -{ - struct uuid_nid_data *data, *entry; - int found = 0; - - LASSERT(nid != 0); /* valid newconfig NID is never zero */ - - if (strlen(uuid) > UUID_MAX - 1) - return -EOVERFLOW; - - data = kzalloc(sizeof(*data), GFP_NOFS); - if (!data) - return -ENOMEM; - - obd_str2uuid(&data->un_uuid, uuid); - data->un_nids[0] = nid; - data->un_nid_count = 1; - - spin_lock(&g_uuid_lock); - list_for_each_entry(entry, &g_uuid_list, un_list) { - if (obd_uuid_equals(&entry->un_uuid, &data->un_uuid)) { - int i; - - found = 1; - for (i = 0; i < entry->un_nid_count; i++) - if (nid == entry->un_nids[i]) - break; - - if (i == entry->un_nid_count) { - LASSERT(entry->un_nid_count < NIDS_MAX); - entry->un_nids[entry->un_nid_count++] = nid; - } - break; - } - } - if (!found) - list_add(&data->un_list, &g_uuid_list); - spin_unlock(&g_uuid_lock); - - if (found) { - CDEBUG(D_INFO, "found uuid %s %s cnt=%d\n", uuid, - libcfs_nid2str(nid), entry->un_nid_count); - kfree(data); - } else { - CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid)); - } - return 0; -} - -/* Delete the nids for one uuid if specified, otherwise delete all */ -int class_del_uuid(const char *uuid) -{ - LIST_HEAD(deathrow); - struct uuid_nid_data *data; - struct uuid_nid_data *temp; - - spin_lock(&g_uuid_lock); - if (uuid) { - struct obd_uuid tmp; - - obd_str2uuid(&tmp, uuid); - list_for_each_entry(data, &g_uuid_list, un_list) { - if (obd_uuid_equals(&data->un_uuid, &tmp)) { - list_move(&data->un_list, &deathrow); - break; - } - } - } else { - list_splice_init(&g_uuid_list, &deathrow); - } - spin_unlock(&g_uuid_lock); - - if (uuid && list_empty(&deathrow)) { - CDEBUG(D_INFO, "Try to delete a non-existent uuid %s\n", uuid); - return -EINVAL; - } - - list_for_each_entry_safe(data, temp, &deathrow, un_list) { - list_del(&data->un_list); - - CDEBUG(D_INFO, "del uuid %s %s/%d\n", - obd_uuid2str(&data->un_uuid), - libcfs_nid2str(data->un_nids[0]), - data->un_nid_count); - - kfree(data); - } - - return 0; -} - -/* check if @nid exists in nid list of @uuid */ -int class_check_uuid(struct obd_uuid *uuid, __u64 nid) -{ - struct uuid_nid_data *entry; - int found = 0; - - CDEBUG(D_INFO, "check if uuid %s has %s.\n", - obd_uuid2str(uuid), libcfs_nid2str(nid)); - - spin_lock(&g_uuid_lock); - list_for_each_entry(entry, &g_uuid_list, un_list) { - int i; - - if (!obd_uuid_equals(&entry->un_uuid, uuid)) - continue; - - /* found the uuid, check if it has @nid */ - for (i = 0; i < entry->un_nid_count; i++) { - if (entry->un_nids[i] == nid) { - found = 1; - break; - } - } - break; - } - spin_unlock(&g_uuid_lock); - return found; -} -EXPORT_SYMBOL(class_check_uuid); diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c deleted file mode 100644 index ffc1814398a5..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/obd_config.c +++ /dev/null @@ -1,1538 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/obd_config.c - * - * Config API - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include - -#include -#include -#include -#include -#include -#include - -#include "llog_internal.h" - -/* - * uuid<->export lustre hash operations - */ -/* - * NOTE: It is impossible to find an export that is in failed - * state with this function - */ -static int -uuid_keycmp(struct rhashtable_compare_arg *arg, const void *obj) -{ - const struct obd_uuid *uuid = arg->key; - const struct obd_export *exp = obj; - - if (obd_uuid_equals(uuid, &exp->exp_client_uuid) && - !exp->exp_failed) - return 0; - return -ESRCH; -} - -static void -uuid_export_exit(void *vexport, void *data) -{ - struct obd_export *exp = vexport; - - class_export_put(exp); -} - -static const struct rhashtable_params uuid_hash_params = { - .key_len = sizeof(struct obd_uuid), - .key_offset = offsetof(struct obd_export, exp_client_uuid), - .head_offset = offsetof(struct obd_export, exp_uuid_hash), - .obj_cmpfn = uuid_keycmp, - .automatic_shrinking = true, -}; - -int obd_uuid_add(struct obd_device *obd, struct obd_export *export) -{ - int rc; - - rc = rhashtable_lookup_insert_fast(&obd->obd_uuid_hash, - &export->exp_uuid_hash, - uuid_hash_params); - if (rc == 0) - class_export_get(export); - else if (rc == -EEXIST) - rc = -EALREADY; - else - /* map obscure error codes to -ENOMEM */ - rc = -ENOMEM; - return rc; -} - -void obd_uuid_del(struct obd_device *obd, struct obd_export *export) -{ - int rc; - - rc = rhashtable_remove_fast(&obd->obd_uuid_hash, - &export->exp_uuid_hash, - uuid_hash_params); - - if (rc == 0) - class_export_put(export); -} - -/*********** string parsing utils *********/ - -/* returns 0 if we find this key in the buffer, else 1 */ -int class_find_param(char *buf, char *key, char **valp) -{ - char *ptr; - - if (!buf) - return 1; - - ptr = strstr(buf, key); - if (!ptr) - return 1; - - if (valp) - *valp = ptr + strlen(key); - - return 0; -} -EXPORT_SYMBOL(class_find_param); - -/* returns 0 if this is the first key in the buffer, else 1. - * valp points to first char after key. - */ -static int class_match_param(char *buf, const char *key, char **valp) -{ - if (!buf) - return 1; - - if (memcmp(buf, key, strlen(key)) != 0) - return 1; - - if (valp) - *valp = buf + strlen(key); - - return 0; -} - -static int parse_nid(char *buf, void *value, int quiet) -{ - lnet_nid_t *nid = value; - - *nid = libcfs_str2nid(buf); - if (*nid != LNET_NID_ANY) - return 0; - - if (!quiet) - LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", buf); - return -EINVAL; -} - -static int parse_net(char *buf, void *value) -{ - __u32 *net = value; - - *net = libcfs_str2net(buf); - CDEBUG(D_INFO, "Net %s\n", libcfs_net2str(*net)); - return 0; -} - -enum { - CLASS_PARSE_NID = 1, - CLASS_PARSE_NET, -}; - -/* 0 is good nid, - * 1 not found - * < 0 error - * endh is set to next separator - */ -static int class_parse_value(char *buf, int opc, void *value, char **endh, - int quiet) -{ - char *endp; - char tmp; - int rc = 0; - - if (!buf) - return 1; - while (*buf == ',' || *buf == ':') - buf++; - if (*buf == ' ' || *buf == '/' || *buf == '\0') - return 1; - - /* nid separators or end of nids */ - endp = strpbrk(buf, ",: /"); - if (!endp) - endp = buf + strlen(buf); - - tmp = *endp; - *endp = '\0'; - switch (opc) { - default: - LBUG(); - case CLASS_PARSE_NID: - rc = parse_nid(buf, value, quiet); - break; - case CLASS_PARSE_NET: - rc = parse_net(buf, value); - break; - } - *endp = tmp; - if (rc != 0) - return rc; - if (endh) - *endh = endp; - return 0; -} - -int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh) -{ - return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 0); -} -EXPORT_SYMBOL(class_parse_nid); - -int class_parse_nid_quiet(char *buf, lnet_nid_t *nid, char **endh) -{ - return class_parse_value(buf, CLASS_PARSE_NID, (void *)nid, endh, 1); -} -EXPORT_SYMBOL(class_parse_nid_quiet); - -char *lustre_cfg_string(struct lustre_cfg *lcfg, u32 index) -{ - char *s; - - if (!lcfg->lcfg_buflens[index]) - return NULL; - - s = lustre_cfg_buf(lcfg, index); - if (!s) - return NULL; - - /* - * make sure it's NULL terminated, even if this kills a char - * of data. Try to use the padding first though. - */ - if (s[lcfg->lcfg_buflens[index] - 1] != '\0') { - size_t last = ALIGN(lcfg->lcfg_buflens[index], 8) - 1; - char lost; - - /* Use the smaller value */ - if (last > lcfg->lcfg_buflens[index]) - last = lcfg->lcfg_buflens[index]; - - lost = s[last]; - s[last] = '\0'; - if (lost != '\0') { - CWARN("Truncated buf %d to '%s' (lost '%c'...)\n", - index, s, lost); - } - } - return s; -} -EXPORT_SYMBOL(lustre_cfg_string); - -/********************** class fns **********************/ - -/** - * Create a new obd device and set the type, name and uuid. If successful, - * the new device can be accessed by either name or uuid. - */ -static int class_attach(struct lustre_cfg *lcfg) -{ - struct obd_device *obd = NULL; - char *typename, *name, *uuid; - int rc, len; - - if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) { - CERROR("No type passed!\n"); - return -EINVAL; - } - typename = lustre_cfg_string(lcfg, 1); - - if (!LUSTRE_CFG_BUFLEN(lcfg, 0)) { - CERROR("No name passed!\n"); - return -EINVAL; - } - name = lustre_cfg_string(lcfg, 0); - - if (!LUSTRE_CFG_BUFLEN(lcfg, 2)) { - CERROR("No UUID passed!\n"); - return -EINVAL; - } - uuid = lustre_cfg_string(lcfg, 2); - - CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n", - typename, name, uuid); - - obd = class_newdev(typename, name); - if (IS_ERR(obd)) { - /* Already exists or out of obds */ - rc = PTR_ERR(obd); - obd = NULL; - CERROR("Cannot create device %s of type %s : %d\n", - name, typename, rc); - goto out; - } - LASSERTF(obd, "Cannot get obd device %s of type %s\n", - name, typename); - LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, - "obd %p obd_magic %08X != %08X\n", - obd, obd->obd_magic, OBD_DEVICE_MAGIC); - LASSERTF(strncmp(obd->obd_name, name, strlen(name)) == 0, - "%p obd_name %s != %s\n", obd, obd->obd_name, name); - - rwlock_init(&obd->obd_pool_lock); - obd->obd_pool_limit = 0; - obd->obd_pool_slv = 0; - - INIT_LIST_HEAD(&obd->obd_exports); - INIT_LIST_HEAD(&obd->obd_unlinked_exports); - INIT_LIST_HEAD(&obd->obd_delayed_exports); - spin_lock_init(&obd->obd_nid_lock); - spin_lock_init(&obd->obd_dev_lock); - mutex_init(&obd->obd_dev_mutex); - spin_lock_init(&obd->obd_osfs_lock); - /* obd->obd_osfs_age must be set to a value in the distant - * past to guarantee a fresh statfs is fetched on mount. - */ - obd->obd_osfs_age = get_jiffies_64() - 1000 * HZ; - - /* XXX belongs in setup not attach */ - init_rwsem(&obd->obd_observer_link_sem); - /* recovery data */ - init_waitqueue_head(&obd->obd_evict_inprogress_waitq); - - llog_group_init(&obd->obd_olg); - - obd->obd_conn_inprogress = 0; - - len = strlen(uuid); - if (len >= sizeof(obd->obd_uuid)) { - CERROR("uuid must be < %d bytes long\n", - (int)sizeof(obd->obd_uuid)); - rc = -EINVAL; - goto out; - } - memcpy(obd->obd_uuid.uuid, uuid, len); - - /* Detach drops this */ - spin_lock(&obd->obd_dev_lock); - atomic_set(&obd->obd_refcount, 1); - spin_unlock(&obd->obd_dev_lock); - lu_ref_init(&obd->obd_reference); - lu_ref_add(&obd->obd_reference, "attach", obd); - - obd->obd_attached = 1; - CDEBUG(D_IOCTL, "OBD: dev %d attached type %s with refcount %d\n", - obd->obd_minor, typename, atomic_read(&obd->obd_refcount)); - return 0; - out: - if (obd) - class_release_dev(obd); - - return rc; -} - -/** Create hashes, self-export, and call type-specific setup. - * Setup is effectively the "start this obd" call. - */ -static int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - int err = 0; - struct obd_export *exp; - - LASSERT(obd); - LASSERTF(obd == class_num2obd(obd->obd_minor), - "obd %p != obd_devs[%d] %p\n", - obd, obd->obd_minor, class_num2obd(obd->obd_minor)); - LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, - "obd %p obd_magic %08x != %08x\n", - obd, obd->obd_magic, OBD_DEVICE_MAGIC); - - /* have we attached a type to this device? */ - if (!obd->obd_attached) { - CERROR("Device %d not attached\n", obd->obd_minor); - return -ENODEV; - } - - if (obd->obd_set_up) { - CERROR("Device %d already setup (type %s)\n", - obd->obd_minor, obd->obd_type->typ_name); - return -EEXIST; - } - - /* is someone else setting us up right now? (attach inits spinlock) */ - spin_lock(&obd->obd_dev_lock); - if (obd->obd_starting) { - spin_unlock(&obd->obd_dev_lock); - CERROR("Device %d setup in progress (type %s)\n", - obd->obd_minor, obd->obd_type->typ_name); - return -EEXIST; - } - /* just leave this on forever. I can't use obd_set_up here because - * other fns check that status, and we're not actually set up yet. - */ - obd->obd_starting = 1; - spin_unlock(&obd->obd_dev_lock); - - /* create an uuid-export lustre hash */ - err = rhashtable_init(&obd->obd_uuid_hash, &uuid_hash_params); - - if (err) - goto err_hash; - - exp = class_new_export(obd, &obd->obd_uuid); - if (IS_ERR(exp)) { - err = PTR_ERR(exp); - goto err_new; - } - - obd->obd_self_export = exp; - class_export_put(exp); - - err = obd_setup(obd, lcfg); - if (err) - goto err_exp; - - obd->obd_set_up = 1; - - spin_lock(&obd->obd_dev_lock); - /* cleanup drops this */ - class_incref(obd, "setup", obd); - spin_unlock(&obd->obd_dev_lock); - - CDEBUG(D_IOCTL, "finished setup of obd %s (uuid %s)\n", - obd->obd_name, obd->obd_uuid.uuid); - - return 0; -err_exp: - if (obd->obd_self_export) { - class_unlink_export(obd->obd_self_export); - obd->obd_self_export = NULL; - } -err_new: - rhashtable_destroy(&obd->obd_uuid_hash); -err_hash: - obd->obd_starting = 0; - CERROR("setup %s failed (%d)\n", obd->obd_name, err); - return err; -} - -/** We have finished using this obd and are ready to destroy it. - * There can be no more references to this obd. - */ -static int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - if (obd->obd_set_up) { - CERROR("OBD device %d still set up\n", obd->obd_minor); - return -EBUSY; - } - - spin_lock(&obd->obd_dev_lock); - if (!obd->obd_attached) { - spin_unlock(&obd->obd_dev_lock); - CERROR("OBD device %d not attached\n", obd->obd_minor); - return -ENODEV; - } - obd->obd_attached = 0; - spin_unlock(&obd->obd_dev_lock); - - CDEBUG(D_IOCTL, "detach on obd %s (uuid %s)\n", - obd->obd_name, obd->obd_uuid.uuid); - - class_decref(obd, "attach", obd); - return 0; -} - -/** Start shutting down the obd. There may be in-progress ops when - * this is called. We tell them to start shutting down with a call - * to class_disconnect_exports(). - */ -static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - int err = 0; - char *flag; - - OBD_RACE(OBD_FAIL_LDLM_RECOV_CLIENTS); - - if (!obd->obd_set_up) { - CERROR("Device %d not setup\n", obd->obd_minor); - return -ENODEV; - } - - spin_lock(&obd->obd_dev_lock); - if (obd->obd_stopping) { - spin_unlock(&obd->obd_dev_lock); - CERROR("OBD %d already stopping\n", obd->obd_minor); - return -ENODEV; - } - /* Leave this on forever */ - obd->obd_stopping = 1; - spin_unlock(&obd->obd_dev_lock); - - while (obd->obd_conn_inprogress > 0) - cond_resched(); - smp_rmb(); - - if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) { - for (flag = lustre_cfg_string(lcfg, 1); *flag != 0; flag++) - switch (*flag) { - case 'F': - obd->obd_force = 1; - break; - case 'A': - LCONSOLE_WARN("Failing over %s\n", - obd->obd_name); - obd->obd_fail = 1; - obd->obd_no_transno = 1; - obd->obd_no_recov = 1; - if (OBP(obd, iocontrol)) { - obd_iocontrol(OBD_IOC_SYNC, - obd->obd_self_export, - 0, NULL, NULL); - } - break; - default: - CERROR("Unrecognised flag '%c'\n", *flag); - } - } - - LASSERT(obd->obd_self_export); - - /* Precleanup, we must make sure all exports get destroyed. */ - err = obd_precleanup(obd); - if (err) - CERROR("Precleanup %s returned %d\n", - obd->obd_name, err); - - /* destroy an uuid-export hash body */ - rhashtable_free_and_destroy(&obd->obd_uuid_hash, uuid_export_exit, NULL); - - class_decref(obd, "setup", obd); - obd->obd_set_up = 0; - - return 0; -} - -struct obd_device *class_incref(struct obd_device *obd, - const char *scope, const void *source) -{ - lu_ref_add_atomic(&obd->obd_reference, scope, source); - atomic_inc(&obd->obd_refcount); - CDEBUG(D_INFO, "incref %s (%p) now %d\n", obd->obd_name, obd, - atomic_read(&obd->obd_refcount)); - - return obd; -} -EXPORT_SYMBOL(class_incref); - -void class_decref(struct obd_device *obd, const char *scope, const void *source) -{ - int err; - int refs; - - spin_lock(&obd->obd_dev_lock); - atomic_dec(&obd->obd_refcount); - refs = atomic_read(&obd->obd_refcount); - spin_unlock(&obd->obd_dev_lock); - lu_ref_del(&obd->obd_reference, scope, source); - - CDEBUG(D_INFO, "Decref %s (%p) now %d\n", obd->obd_name, obd, refs); - - if ((refs == 1) && obd->obd_stopping) { - /* All exports have been destroyed; there should - * be no more in-progress ops by this point. - */ - - spin_lock(&obd->obd_self_export->exp_lock); - obd->obd_self_export->exp_flags |= exp_flags_from_obd(obd); - spin_unlock(&obd->obd_self_export->exp_lock); - - /* note that we'll recurse into class_decref again */ - class_unlink_export(obd->obd_self_export); - return; - } - - if (refs == 0) { - CDEBUG(D_CONFIG, "finishing cleanup of obd %s (%s)\n", - obd->obd_name, obd->obd_uuid.uuid); - LASSERT(!obd->obd_attached); - if (obd->obd_stopping) { - /* If we're not stopping, we were never set up */ - err = obd_cleanup(obd); - if (err) - CERROR("Cleanup %s returned %d\n", - obd->obd_name, err); - } - class_release_dev(obd); - } -} -EXPORT_SYMBOL(class_decref); - -/** Add a failover nid location. - * Client obd types contact server obd types using this nid list. - */ -static int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct obd_import *imp; - struct obd_uuid uuid; - int rc; - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 || - LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) { - CERROR("invalid conn_uuid\n"); - return -EINVAL; - } - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_LWP_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_MGC_NAME)) { - CERROR("can't add connection on non-client dev\n"); - return -EINVAL; - } - - imp = obd->u.cli.cl_import; - if (!imp) { - CERROR("try to add conn on immature client dev\n"); - return -EINVAL; - } - - obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1)); - rc = obd_add_conn(imp, &uuid, lcfg->lcfg_num); - - return rc; -} - -/** Remove a failover nid location. - */ -static int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct obd_import *imp; - struct obd_uuid uuid; - int rc; - - if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 || - LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) { - CERROR("invalid conn_uuid\n"); - return -EINVAL; - } - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) && - strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME)) { - CERROR("can't del connection on non-client dev\n"); - return -EINVAL; - } - - imp = obd->u.cli.cl_import; - if (!imp) { - CERROR("try to del conn on immature client dev\n"); - return -EINVAL; - } - - obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1)); - rc = obd_del_conn(imp, &uuid); - - return rc; -} - -static LIST_HEAD(lustre_profile_list); -static DEFINE_SPINLOCK(lustre_profile_list_lock); - -struct lustre_profile *class_get_profile(const char *prof) -{ - struct lustre_profile *lprof; - - spin_lock(&lustre_profile_list_lock); - list_for_each_entry(lprof, &lustre_profile_list, lp_list) { - if (!strcmp(lprof->lp_profile, prof)) { - lprof->lp_refs++; - spin_unlock(&lustre_profile_list_lock); - return lprof; - } - } - spin_unlock(&lustre_profile_list_lock); - return NULL; -} -EXPORT_SYMBOL(class_get_profile); - -/** Create a named "profile". - * This defines the mdc and osc names to use for a client. - * This also is used to define the lov to be used by a mdt. - */ -static int class_add_profile(int proflen, char *prof, int osclen, char *osc, - int mdclen, char *mdc) -{ - struct lustre_profile *lprof; - int err = 0; - - CDEBUG(D_CONFIG, "Add profile %s\n", prof); - - lprof = kzalloc(sizeof(*lprof), GFP_NOFS); - if (!lprof) - return -ENOMEM; - INIT_LIST_HEAD(&lprof->lp_list); - - LASSERT(proflen == (strlen(prof) + 1)); - lprof->lp_profile = kmemdup(prof, proflen, GFP_NOFS); - if (!lprof->lp_profile) { - err = -ENOMEM; - goto free_lprof; - } - - LASSERT(osclen == (strlen(osc) + 1)); - lprof->lp_dt = kmemdup(osc, osclen, GFP_NOFS); - if (!lprof->lp_dt) { - err = -ENOMEM; - goto free_lp_profile; - } - - if (mdclen > 0) { - LASSERT(mdclen == (strlen(mdc) + 1)); - lprof->lp_md = kmemdup(mdc, mdclen, GFP_NOFS); - if (!lprof->lp_md) { - err = -ENOMEM; - goto free_lp_dt; - } - } - - spin_lock(&lustre_profile_list_lock); - lprof->lp_refs = 1; - lprof->lp_list_deleted = false; - list_add(&lprof->lp_list, &lustre_profile_list); - spin_unlock(&lustre_profile_list_lock); - return err; - -free_lp_dt: - kfree(lprof->lp_dt); -free_lp_profile: - kfree(lprof->lp_profile); -free_lprof: - kfree(lprof); - return err; -} - -void class_del_profile(const char *prof) -{ - struct lustre_profile *lprof; - - CDEBUG(D_CONFIG, "Del profile %s\n", prof); - - lprof = class_get_profile(prof); - if (lprof) { - spin_lock(&lustre_profile_list_lock); - /* because get profile increments the ref counter */ - lprof->lp_refs--; - list_del(&lprof->lp_list); - lprof->lp_list_deleted = true; - spin_unlock(&lustre_profile_list_lock); - - class_put_profile(lprof); - } -} -EXPORT_SYMBOL(class_del_profile); - -void class_put_profile(struct lustre_profile *lprof) -{ - spin_lock(&lustre_profile_list_lock); - if (--lprof->lp_refs > 0) { - LASSERT(lprof->lp_refs > 0); - spin_unlock(&lustre_profile_list_lock); - return; - } - spin_unlock(&lustre_profile_list_lock); - - /* confirm not a negative number */ - LASSERT(!lprof->lp_refs); - - /* - * At least one class_del_profile/profiles must be called - * on the target profile or lustre_profile_list will corrupt - */ - LASSERT(lprof->lp_list_deleted); - kfree(lprof->lp_profile); - kfree(lprof->lp_dt); - kfree(lprof->lp_md); - kfree(lprof); -} -EXPORT_SYMBOL(class_put_profile); - -/* COMPAT_146 */ -void class_del_profiles(void) -{ - struct lustre_profile *lprof, *n; - - spin_lock(&lustre_profile_list_lock); - list_for_each_entry_safe(lprof, n, &lustre_profile_list, lp_list) { - list_del(&lprof->lp_list); - lprof->lp_list_deleted = true; - spin_unlock(&lustre_profile_list_lock); - - class_put_profile(lprof); - - spin_lock(&lustre_profile_list_lock); - } - spin_unlock(&lustre_profile_list_lock); -} -EXPORT_SYMBOL(class_del_profiles); - -static int class_set_global(char *ptr, int val, struct lustre_cfg *lcfg) -{ - if (class_match_param(ptr, PARAM_AT_MIN, NULL) == 0) - at_min = val; - else if (class_match_param(ptr, PARAM_AT_MAX, NULL) == 0) - at_max = val; - else if (class_match_param(ptr, PARAM_AT_EXTRA, NULL) == 0) - at_extra = val; - else if (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, NULL) == 0) - at_early_margin = val; - else if (class_match_param(ptr, PARAM_AT_HISTORY, NULL) == 0) - at_history = val; - else if (class_match_param(ptr, PARAM_JOBID_VAR, NULL) == 0) - strlcpy(obd_jobid_var, lustre_cfg_string(lcfg, 2), - JOBSTATS_JOBID_VAR_MAX_LEN + 1); - else - return -EINVAL; - - CDEBUG(D_IOCTL, "global %s = %d\n", ptr, val); - return 0; -} - -/* We can't call ll_process_config or lquota_process_config directly because - * it lives in a module that must be loaded after this one. - */ -static int (*client_process_config)(struct lustre_cfg *lcfg); -static int (*quota_process_config)(struct lustre_cfg *lcfg); - -void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)) -{ - client_process_config = cpc; -} -EXPORT_SYMBOL(lustre_register_client_process_config); - -static int process_param2_config(struct lustre_cfg *lcfg) -{ - char *param = lustre_cfg_string(lcfg, 1); - char *upcall = lustre_cfg_string(lcfg, 2); - char *argv[] = { - [0] = "/usr/sbin/lctl", - [1] = "set_param", - [2] = param, - [3] = NULL - }; - ktime_t start; - ktime_t end; - int rc; - - /* Add upcall processing here. Now only lctl is supported */ - if (strcmp(upcall, LCTL_UPCALL) != 0) { - CERROR("Unsupported upcall %s\n", upcall); - return -EINVAL; - } - - start = ktime_get(); - rc = call_usermodehelper(argv[0], argv, NULL, UMH_WAIT_PROC); - end = ktime_get(); - - if (rc < 0) { - CERROR( - "lctl: error invoking upcall %s %s %s: rc = %d; time %ldus\n", - argv[0], argv[1], argv[2], rc, - (long)ktime_us_delta(end, start)); - } else { - CDEBUG(D_HA, "lctl: invoked upcall %s %s %s, time %ldus\n", - argv[0], argv[1], argv[2], - (long)ktime_us_delta(end, start)); - rc = 0; - } - - return rc; -} - -/** Process configuration commands given in lustre_cfg form. - * These may come from direct calls (e.g. class_manual_cleanup) - * or processing the config llog, or ioctl from lctl. - */ -int class_process_config(struct lustre_cfg *lcfg) -{ - struct obd_device *obd; - int err; - - LASSERT(lcfg && !IS_ERR(lcfg)); - CDEBUG(D_IOCTL, "processing cmd: %x\n", lcfg->lcfg_command); - - /* Commands that don't need a device */ - switch (lcfg->lcfg_command) { - case LCFG_ATTACH: { - err = class_attach(lcfg); - goto out; - } - case LCFG_ADD_UUID: { - CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid %#llx (%s)\n", - lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid, - libcfs_nid2str(lcfg->lcfg_nid)); - - err = class_add_uuid(lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid); - goto out; - } - case LCFG_DEL_UUID: { - CDEBUG(D_IOCTL, "removing mappings for uuid %s\n", - (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) == 0) - ? "" : lustre_cfg_string(lcfg, 1)); - - err = class_del_uuid(lustre_cfg_string(lcfg, 1)); - goto out; - } - case LCFG_MOUNTOPT: { - CDEBUG(D_IOCTL, "mountopt: profile %s osc %s mdc %s\n", - lustre_cfg_string(lcfg, 1), - lustre_cfg_string(lcfg, 2), - lustre_cfg_string(lcfg, 3)); - /* set these mount options somewhere, so ll_fill_super - * can find them. - */ - err = class_add_profile(LUSTRE_CFG_BUFLEN(lcfg, 1), - lustre_cfg_string(lcfg, 1), - LUSTRE_CFG_BUFLEN(lcfg, 2), - lustre_cfg_string(lcfg, 2), - LUSTRE_CFG_BUFLEN(lcfg, 3), - lustre_cfg_string(lcfg, 3)); - goto out; - } - case LCFG_DEL_MOUNTOPT: { - CDEBUG(D_IOCTL, "mountopt: profile %s\n", - lustre_cfg_string(lcfg, 1)); - class_del_profile(lustre_cfg_string(lcfg, 1)); - err = 0; - goto out; - } - case LCFG_SET_TIMEOUT: { - CDEBUG(D_IOCTL, "changing lustre timeout from %d to %d\n", - obd_timeout, lcfg->lcfg_num); - obd_timeout = max(lcfg->lcfg_num, 1U); - obd_timeout_set = 1; - err = 0; - goto out; - } - case LCFG_SET_LDLM_TIMEOUT: { - /* ldlm_timeout is not used on the client */ - err = 0; - goto out; - } - case LCFG_SET_UPCALL: { - LCONSOLE_ERROR_MSG(0x15a, "recovery upcall is deprecated\n"); - /* COMPAT_146 Don't fail on old configs */ - err = 0; - goto out; - } - case LCFG_MARKER: { - struct cfg_marker *marker; - - marker = lustre_cfg_buf(lcfg, 1); - CDEBUG(D_IOCTL, "marker %d (%#x) %.16s %s\n", marker->cm_step, - marker->cm_flags, marker->cm_tgtname, marker->cm_comment); - err = 0; - goto out; - } - case LCFG_PARAM: { - char *tmp; - /* llite has no obd */ - if ((class_match_param(lustre_cfg_string(lcfg, 1), - PARAM_LLITE, NULL) == 0) && - client_process_config) { - err = (*client_process_config)(lcfg); - goto out; - } else if ((class_match_param(lustre_cfg_string(lcfg, 1), - PARAM_SYS, &tmp) == 0)) { - /* Global param settings */ - err = class_set_global(tmp, lcfg->lcfg_num, lcfg); - /* - * Client or server should not fail to mount if - * it hits an unknown configuration parameter. - */ - if (err != 0) - CWARN("Ignoring unknown param %s\n", tmp); - - err = 0; - goto out; - } else if ((class_match_param(lustre_cfg_string(lcfg, 1), - PARAM_QUOTA, &tmp) == 0) && - quota_process_config) { - err = (*quota_process_config)(lcfg); - goto out; - } - - break; - } - case LCFG_SET_PARAM: { - err = process_param2_config(lcfg); - goto out; - } - } - /* Commands that require a device */ - obd = class_name2obd(lustre_cfg_string(lcfg, 0)); - if (!obd) { - if (!LUSTRE_CFG_BUFLEN(lcfg, 0)) - CERROR("this lcfg command requires a device name\n"); - else - CERROR("no device for: %s\n", - lustre_cfg_string(lcfg, 0)); - - err = -EINVAL; - goto out; - } - - switch (lcfg->lcfg_command) { - case LCFG_SETUP: { - err = class_setup(obd, lcfg); - goto out; - } - case LCFG_DETACH: { - err = class_detach(obd, lcfg); - err = 0; - goto out; - } - case LCFG_CLEANUP: { - err = class_cleanup(obd, lcfg); - err = 0; - goto out; - } - case LCFG_ADD_CONN: { - err = class_add_conn(obd, lcfg); - err = 0; - goto out; - } - case LCFG_DEL_CONN: { - err = class_del_conn(obd, lcfg); - err = 0; - goto out; - } - case LCFG_POOL_NEW: { - err = obd_pool_new(obd, lustre_cfg_string(lcfg, 2)); - err = 0; - goto out; - } - case LCFG_POOL_ADD: { - err = obd_pool_add(obd, lustre_cfg_string(lcfg, 2), - lustre_cfg_string(lcfg, 3)); - err = 0; - goto out; - } - case LCFG_POOL_REM: { - err = obd_pool_rem(obd, lustre_cfg_string(lcfg, 2), - lustre_cfg_string(lcfg, 3)); - err = 0; - goto out; - } - case LCFG_POOL_DEL: { - err = obd_pool_del(obd, lustre_cfg_string(lcfg, 2)); - err = 0; - goto out; - } - default: { - err = obd_process_config(obd, sizeof(*lcfg), lcfg); - goto out; - } - } -out: - if ((err < 0) && !(lcfg->lcfg_command & LCFG_REQUIRED)) { - CWARN("Ignoring error %d on optional command %#x\n", err, - lcfg->lcfg_command); - err = 0; - } - return err; -} -EXPORT_SYMBOL(class_process_config); - -int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, - struct lustre_cfg *lcfg, void *data) -{ - struct lprocfs_vars *var; - struct file fakefile; - struct seq_file fake_seqfile; - char *key, *sval; - int i, keylen, vallen; - int matched = 0, j = 0; - int rc = 0; - int skip = 0; - - if (lcfg->lcfg_command != LCFG_PARAM) { - CERROR("Unknown command: %d\n", lcfg->lcfg_command); - return -EINVAL; - } - - /* fake a seq file so that var->fops->write can work... */ - fakefile.private_data = &fake_seqfile; - fake_seqfile.private = data; - /* e.g. tunefs.lustre --param mdt.group_upcall=foo /r/tmp/lustre-mdt - * or lctl conf_param lustre-MDT0000.mdt.group_upcall=bar - * or lctl conf_param lustre-OST0000.osc.max_dirty_mb=36 - */ - for (i = 1; i < lcfg->lcfg_bufcount; i++) { - key = lustre_cfg_buf(lcfg, i); - /* Strip off prefix */ - if (class_match_param(key, prefix, &key)) { - /* - * If the prefix doesn't match, return error so we - * can pass it down the stack - */ - return -ENOSYS; - } - sval = strchr(key, '='); - if (!sval || (*(sval + 1) == 0)) { - CERROR("Can't parse param %s (missing '=')\n", key); - /* rc = -EINVAL; continue parsing other params */ - continue; - } - keylen = sval - key; - sval++; - vallen = strlen(sval); - matched = 0; - j = 0; - /* Search proc entries */ - while (lvars[j].name) { - var = &lvars[j]; - if (!class_match_param(key, var->name, NULL) && - keylen == strlen(var->name)) { - matched++; - rc = -EROFS; - if (var->fops && var->fops->write) { - mm_segment_t oldfs; - - oldfs = get_fs(); - set_fs(KERNEL_DS); - rc = var->fops->write(&fakefile, - (const char __user *)sval, - vallen, NULL); - set_fs(oldfs); - } - break; - } - j++; - } - if (!matched) { - CERROR("%.*s: %s unknown param %s\n", - (int)strlen(prefix) - 1, prefix, - (char *)lustre_cfg_string(lcfg, 0), key); - /* rc = -EINVAL; continue parsing other params */ - skip++; - } else if (rc < 0) { - CERROR("%s: error writing proc entry '%s': rc = %d\n", - prefix, var->name, rc); - rc = 0; - } else { - CDEBUG(D_CONFIG, "%s.%.*s: Set parameter %.*s=%s\n", - lustre_cfg_string(lcfg, 0), - (int)strlen(prefix) - 1, prefix, - (int)(sval - key - 1), key, sval); - } - } - - if (rc > 0) - rc = 0; - if (!rc && skip) - rc = skip; - return rc; -} -EXPORT_SYMBOL(class_process_proc_param); - -/** Parse a configuration llog, doing various manipulations on them - * for various reasons, (modifications for compatibility, skip obsolete - * records, change uuids, etc), then class_process_config() resulting - * net records. - */ -int class_config_llog_handler(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, void *data) -{ - struct config_llog_instance *clli = data; - int cfg_len = rec->lrh_len; - char *cfg_buf = (char *)(rec + 1); - int rc = 0; - - switch (rec->lrh_type) { - case OBD_CFG_REC: { - struct lustre_cfg *lcfg, *lcfg_new; - struct lustre_cfg_bufs bufs; - char *inst_name = NULL; - int inst_len = 0; - size_t lcfg_len; - int swab = 0; - - lcfg = (struct lustre_cfg *)cfg_buf; - if (lcfg->lcfg_version == __swab32(LUSTRE_CFG_VERSION)) { - lustre_swab_lustre_cfg(lcfg); - swab = 1; - } - - rc = lustre_cfg_sanity_check(cfg_buf, cfg_len); - if (rc) - goto out; - - /* Figure out config state info */ - if (lcfg->lcfg_command == LCFG_MARKER) { - struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); - - lustre_swab_cfg_marker(marker, swab, - LUSTRE_CFG_BUFLEN(lcfg, 1)); - CDEBUG(D_CONFIG, "Marker, inst_flg=%#x mark_flg=%#x\n", - clli->cfg_flags, marker->cm_flags); - if (marker->cm_flags & CM_START) { - /* all previous flags off */ - clli->cfg_flags = CFG_F_MARKER; - if (marker->cm_flags & CM_SKIP) { - clli->cfg_flags |= CFG_F_SKIP; - CDEBUG(D_CONFIG, "SKIP #%d\n", - marker->cm_step); - } else if ((marker->cm_flags & CM_EXCLUDE) || - (clli->cfg_sb && - lustre_check_exclusion(clli->cfg_sb, - marker->cm_tgtname))) { - clli->cfg_flags |= CFG_F_EXCLUDE; - CDEBUG(D_CONFIG, "EXCLUDE %d\n", - marker->cm_step); - } - } else if (marker->cm_flags & CM_END) { - clli->cfg_flags = 0; - } - } - /* A config command without a start marker before it is - * illegal (post 146) - */ - if (!(clli->cfg_flags & CFG_F_COMPAT146) && - !(clli->cfg_flags & CFG_F_MARKER) && - (lcfg->lcfg_command != LCFG_MARKER)) { - CWARN("Config not inside markers, ignoring! (inst: %p, uuid: %s, flags: %#x)\n", - clli->cfg_instance, - clli->cfg_uuid.uuid, clli->cfg_flags); - clli->cfg_flags |= CFG_F_SKIP; - } - if (clli->cfg_flags & CFG_F_SKIP) { - CDEBUG(D_CONFIG, "skipping %#x\n", - clli->cfg_flags); - rc = 0; - /* No processing! */ - break; - } - - /* - * For interoperability between 1.8 and 2.0, - * rename "mds" obd device type to "mdt". - */ - { - char *typename = lustre_cfg_string(lcfg, 1); - char *index = lustre_cfg_string(lcfg, 2); - - if ((lcfg->lcfg_command == LCFG_ATTACH && typename && - strcmp(typename, "mds") == 0)) { - CWARN("For 1.8 interoperability, rename obd type from mds to mdt\n"); - typename[2] = 't'; - } - if ((lcfg->lcfg_command == LCFG_SETUP && index && - strcmp(index, "type") == 0)) { - CDEBUG(D_INFO, "For 1.8 interoperability, set this index to '0'\n"); - index[0] = '0'; - index[1] = 0; - } - } - - if (clli->cfg_flags & CFG_F_EXCLUDE) { - CDEBUG(D_CONFIG, "cmd: %x marked EXCLUDED\n", - lcfg->lcfg_command); - if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) - /* Add inactive instead */ - lcfg->lcfg_command = LCFG_LOV_ADD_INA; - } - - lustre_cfg_bufs_init(&bufs, lcfg); - - if (clli && clli->cfg_instance && - LUSTRE_CFG_BUFLEN(lcfg, 0) > 0) { - inst_len = LUSTRE_CFG_BUFLEN(lcfg, 0) + - sizeof(clli->cfg_instance) * 2 + 4; - inst_name = kasprintf(GFP_NOFS, "%s-%p", - lustre_cfg_string(lcfg, 0), - clli->cfg_instance); - if (!inst_name) { - rc = -ENOMEM; - goto out; - } - lustre_cfg_bufs_set_string(&bufs, 0, inst_name); - CDEBUG(D_CONFIG, "cmd %x, instance name: %s\n", - lcfg->lcfg_command, inst_name); - } - - /* we override the llog's uuid for clients, to insure they - * are unique - */ - if (clli && clli->cfg_instance && - lcfg->lcfg_command == LCFG_ATTACH) { - lustre_cfg_bufs_set_string(&bufs, 2, - clli->cfg_uuid.uuid); - } - /* - * sptlrpc config record, we expect 2 data segments: - * [0]: fs_name/target_name, - * [1]: rule string - * moving them to index [1] and [2], and insert MGC's - * obdname at index [0]. - */ - if (clli && !clli->cfg_instance && - lcfg->lcfg_command == LCFG_SPTLRPC_CONF) { - lustre_cfg_bufs_set(&bufs, 2, bufs.lcfg_buf[1], - bufs.lcfg_buflen[1]); - lustre_cfg_bufs_set(&bufs, 1, bufs.lcfg_buf[0], - bufs.lcfg_buflen[0]); - lustre_cfg_bufs_set_string(&bufs, 0, - clli->cfg_obdname); - } - - lcfg_len = lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen); - lcfg_new = kzalloc(lcfg_len, GFP_NOFS); - if (!lcfg_new) { - rc = -ENOMEM; - goto out; - } - - lustre_cfg_init(lcfg_new, lcfg->lcfg_command, &bufs); - lcfg_new->lcfg_num = lcfg->lcfg_num; - lcfg_new->lcfg_flags = lcfg->lcfg_flags; - - /* XXX Hack to try to remain binary compatible with - * pre-newconfig logs - */ - if (lcfg->lcfg_nal != 0 && /* pre-newconfig log? */ - (lcfg->lcfg_nid >> 32) == 0) { - __u32 addr = (__u32)(lcfg->lcfg_nid & 0xffffffff); - - lcfg_new->lcfg_nid = - LNET_MKNID(LNET_MKNET(lcfg->lcfg_nal, 0), addr); - CWARN("Converted pre-newconfig NAL %d NID %x to %s\n", - lcfg->lcfg_nal, addr, - libcfs_nid2str(lcfg_new->lcfg_nid)); - } else { - lcfg_new->lcfg_nid = lcfg->lcfg_nid; - } - - lcfg_new->lcfg_nal = 0; /* illegal value for obsolete field */ - - rc = class_process_config(lcfg_new); - kfree(lcfg_new); - kfree(inst_name); - break; - } - default: - CERROR("Unknown llog record type %#x encountered\n", - rec->lrh_type); - break; - } -out: - if (rc) { - CERROR("%s: cfg command failed: rc = %d\n", - handle->lgh_ctxt->loc_obd->obd_name, rc); - class_config_dump_handler(NULL, handle, rec, data); - } - return rc; -} -EXPORT_SYMBOL(class_config_llog_handler); - -int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt, - char *name, struct config_llog_instance *cfg) -{ - struct llog_process_cat_data cd = {0, 0}; - struct llog_handle *llh; - llog_cb_t callback; - int rc; - - CDEBUG(D_INFO, "looking up llog %s\n", name); - rc = llog_open(env, ctxt, &llh, NULL, name, LLOG_OPEN_EXISTS); - if (rc) - return rc; - - rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL); - if (rc) - goto parse_out; - - /* continue processing from where we last stopped to end-of-log */ - if (cfg) { - cd.lpcd_first_idx = cfg->cfg_last_idx; - callback = cfg->cfg_callback; - LASSERT(callback); - } else { - callback = class_config_llog_handler; - } - - cd.lpcd_last_idx = 0; - - rc = llog_process(env, llh, callback, cfg, &cd); - - CDEBUG(D_CONFIG, "Processed log %s gen %d-%d (rc=%d)\n", name, - cd.lpcd_first_idx + 1, cd.lpcd_last_idx, rc); - if (cfg) - cfg->cfg_last_idx = cd.lpcd_last_idx; - -parse_out: - llog_close(env, llh); - return rc; -} -EXPORT_SYMBOL(class_config_parse_llog); - -/** - * parse config record and output dump in supplied buffer. - * This is separated from class_config_dump_handler() to use - * for ioctl needs as well - */ -static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf, - int size) -{ - struct lustre_cfg *lcfg = (struct lustre_cfg *)(rec + 1); - char *ptr = buf; - char *end = buf + size; - int rc = 0; - - LASSERT(rec->lrh_type == OBD_CFG_REC); - rc = lustre_cfg_sanity_check(lcfg, rec->lrh_len); - if (rc < 0) - return rc; - - ptr += snprintf(ptr, end - ptr, "cmd=%05x ", lcfg->lcfg_command); - if (lcfg->lcfg_flags) - ptr += snprintf(ptr, end - ptr, "flags=%#08x ", - lcfg->lcfg_flags); - - if (lcfg->lcfg_num) - ptr += snprintf(ptr, end - ptr, "num=%#08x ", lcfg->lcfg_num); - - if (lcfg->lcfg_nid) { - char nidstr[LNET_NIDSTR_SIZE]; - - libcfs_nid2str_r(lcfg->lcfg_nid, nidstr, sizeof(nidstr)); - ptr += snprintf(ptr, end - ptr, "nid=%s(%#llx)\n ", - nidstr, lcfg->lcfg_nid); - } - - if (lcfg->lcfg_command == LCFG_MARKER) { - struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1); - - ptr += snprintf(ptr, end - ptr, "marker=%d(%#x)%s '%s'", - marker->cm_step, marker->cm_flags, - marker->cm_tgtname, marker->cm_comment); - } else { - int i; - - for (i = 0; i < lcfg->lcfg_bufcount; i++) { - ptr += snprintf(ptr, end - ptr, "%d:%s ", i, - lustre_cfg_string(lcfg, i)); - } - } - ptr += snprintf(ptr, end - ptr, "\n"); - /* return consumed bytes */ - rc = ptr - buf; - return rc; -} - -int class_config_dump_handler(const struct lu_env *env, - struct llog_handle *handle, - struct llog_rec_hdr *rec, void *data) -{ - char *outstr; - int rc = 0; - - outstr = kzalloc(256, GFP_NOFS); - if (!outstr) - return -ENOMEM; - - if (rec->lrh_type == OBD_CFG_REC) { - class_config_parse_rec(rec, outstr, 256); - LCONSOLE(D_WARNING, " %s", outstr); - } else { - LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type); - rc = -EINVAL; - } - - kfree(outstr); - return rc; -} - -/** Call class_cleanup and class_detach. - * "Manual" only in the sense that we're faking lcfg commands. - */ -int class_manual_cleanup(struct obd_device *obd) -{ - char flags[3] = ""; - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs bufs; - int rc; - - if (!obd) { - CERROR("empty cleanup\n"); - return -EALREADY; - } - - if (obd->obd_force) - strcat(flags, "F"); - if (obd->obd_fail) - strcat(flags, "A"); - - CDEBUG(D_CONFIG, "Manual cleanup of %s (flags='%s')\n", - obd->obd_name, flags); - - lustre_cfg_bufs_reset(&bufs, obd->obd_name); - lustre_cfg_bufs_set_string(&bufs, 1, flags); - lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen), - GFP_NOFS); - if (!lcfg) - return -ENOMEM; - lustre_cfg_init(lcfg, LCFG_CLEANUP, &bufs); - - rc = class_process_config(lcfg); - if (rc) { - CERROR("cleanup failed %d: %s\n", rc, obd->obd_name); - goto out; - } - - /* the lcfg is almost the same for both ops */ - lcfg->lcfg_command = LCFG_DETACH; - rc = class_process_config(lcfg); - if (rc) - CERROR("detach failed %d: %s\n", rc, obd->obd_name); -out: - kfree(lcfg); - return rc; -} -EXPORT_SYMBOL(class_manual_cleanup); diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c deleted file mode 100644 index 06c38fdef7ba..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c +++ /dev/null @@ -1,1245 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/obd_mount.c - * - * Client mount routines - * - * Author: Nathan Rutman - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#define D_MOUNT (D_SUPER | D_CONFIG/*|D_WARNING */) -#define PRINT_CMD CDEBUG - -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(client_lock); -static struct module *client_mod; -static int (*client_fill_super)(struct super_block *sb); -static void (*kill_super_cb)(struct super_block *sb); - -/**************** config llog ********************/ - -/** Get a config log from the MGS and process it. - * This func is called for both clients and servers. - * Continue to process new statements appended to the logs - * (whenever the config lock is revoked) until lustre_end_log - * is called. - * @param sb The superblock is used by the MGC to write to the local copy of - * the config log - * @param logname The name of the llog to replicate from the MGS - * @param cfg Since the same mgc may be used to follow multiple config logs - * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for - * this log, and is added to the mgc's list of logs to follow. - */ -int lustre_process_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg) -{ - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs *bufs; - struct lustre_sb_info *lsi = s2lsi(sb); - struct obd_device *mgc = lsi->lsi_mgc; - int rc; - - LASSERT(mgc); - LASSERT(cfg); - - bufs = kzalloc(sizeof(*bufs), GFP_NOFS); - if (!bufs) - return -ENOMEM; - - /* mgc_process_config */ - lustre_cfg_bufs_reset(bufs, mgc->obd_name); - lustre_cfg_bufs_set_string(bufs, 1, logname); - lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg)); - lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb)); - lcfg = kzalloc(lustre_cfg_len(bufs->lcfg_bufcount, bufs->lcfg_buflen), - GFP_NOFS); - if (!lcfg) { - rc = -ENOMEM; - goto out; - } - lustre_cfg_init(lcfg, LCFG_LOG_START, bufs); - - rc = obd_process_config(mgc, sizeof(*lcfg), lcfg); - kfree(lcfg); -out: - kfree(bufs); - - if (rc == -EINVAL) - LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n", - mgc->obd_name, logname, rc); - - else if (rc) - LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n", - mgc->obd_name, logname, - rc); - - /* class_obd_list(); */ - return rc; -} -EXPORT_SYMBOL(lustre_process_log); - -/* Stop watching this config log for updates */ -int lustre_end_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg) -{ - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs bufs; - struct lustre_sb_info *lsi = s2lsi(sb); - struct obd_device *mgc = lsi->lsi_mgc; - int rc; - - if (!mgc) - return -ENOENT; - - /* mgc_process_config */ - lustre_cfg_bufs_reset(&bufs, mgc->obd_name); - lustre_cfg_bufs_set_string(&bufs, 1, logname); - if (cfg) - lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg)); - lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen), - GFP_NOFS); - if (!lcfg) - return -ENOMEM; - lustre_cfg_init(lcfg, LCFG_LOG_END, &bufs); - - rc = obd_process_config(mgc, sizeof(*lcfg), lcfg); - kfree(lcfg); - return rc; -} -EXPORT_SYMBOL(lustre_end_log); - -/**************** obd start *******************/ - -/** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from - * lctl (and do for echo cli/srv. - */ -static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, - char *s1, char *s2, char *s3, char *s4) -{ - struct lustre_cfg_bufs bufs; - struct lustre_cfg *lcfg = NULL; - int rc; - - CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname, - cmd, s1, s2, s3, s4); - - lustre_cfg_bufs_reset(&bufs, cfgname); - if (s1) - lustre_cfg_bufs_set_string(&bufs, 1, s1); - if (s2) - lustre_cfg_bufs_set_string(&bufs, 2, s2); - if (s3) - lustre_cfg_bufs_set_string(&bufs, 3, s3); - if (s4) - lustre_cfg_bufs_set_string(&bufs, 4, s4); - - lcfg = kzalloc(lustre_cfg_len(bufs.lcfg_bufcount, bufs.lcfg_buflen), - GFP_NOFS); - if (!lcfg) - return -ENOMEM; - lustre_cfg_init(lcfg, cmd, &bufs); - lcfg->lcfg_nid = nid; - rc = class_process_config(lcfg); - kfree(lcfg); - return rc; -} - -/** Call class_attach and class_setup. These methods in turn call - * obd type-specific methods. - */ -static int lustre_start_simple(char *obdname, char *type, char *uuid, - char *s1, char *s2, char *s3, char *s4) -{ - int rc; - - CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type); - - rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL); - if (rc) { - CERROR("%s attach error %d\n", obdname, rc); - return rc; - } - rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4); - if (rc) { - CERROR("%s setup error %d\n", obdname, rc); - do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL); - } - return rc; -} - -static DEFINE_MUTEX(mgc_start_lock); - -/** Set up a mgc obd to process startup logs - * - * \param sb [in] super block of the mgc obd - * - * \retval 0 success, otherwise error code - */ -int lustre_start_mgc(struct super_block *sb) -{ - struct obd_connect_data *data = NULL; - struct lustre_sb_info *lsi = s2lsi(sb); - struct obd_device *obd; - struct obd_export *exp; - struct obd_uuid *uuid; - class_uuid_t uuidc; - lnet_nid_t nid; - char nidstr[LNET_NIDSTR_SIZE]; - char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL; - char *ptr; - int rc = 0, i = 0, j; - - LASSERT(lsi->lsi_lmd); - - /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ - ptr = lsi->lsi_lmd->lmd_dev; - if (class_parse_nid(ptr, &nid, &ptr) == 0) - i++; - if (i == 0) { - CERROR("No valid MGS nids found.\n"); - return -EINVAL; - } - - mutex_lock(&mgc_start_lock); - - libcfs_nid2str_r(nid, nidstr, sizeof(nidstr)); - mgcname = kasprintf(GFP_NOFS, - "%s%s", LUSTRE_MGC_OBDNAME, nidstr); - niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, 0); - if (!mgcname || !niduuid) { - rc = -ENOMEM; - goto out_free; - } - - mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : ""; - - data = kzalloc(sizeof(*data), GFP_NOFS); - if (!data) { - rc = -ENOMEM; - goto out_free; - } - - obd = class_name2obd(mgcname); - if (obd && !obd->obd_stopping) { - int recov_bk; - - rc = obd_set_info_async(NULL, obd->obd_self_export, - strlen(KEY_MGSSEC), KEY_MGSSEC, - strlen(mgssec), mgssec, NULL); - if (rc) - goto out_free; - - /* Re-using an existing MGC */ - atomic_inc(&obd->u.cli.cl_mgc_refcount); - - /* IR compatibility check, only for clients */ - if (lmd_is_client(lsi->lsi_lmd)) { - int has_ir; - int vallen = sizeof(*data); - __u32 *flags = &lsi->lsi_lmd->lmd_flags; - - rc = obd_get_info(NULL, obd->obd_self_export, - strlen(KEY_CONN_DATA), KEY_CONN_DATA, - &vallen, data); - LASSERT(rc == 0); - has_ir = OCD_HAS_FLAG(data, IMP_RECOV); - if (has_ir ^ !(*flags & LMD_FLG_NOIR)) { - /* LMD_FLG_NOIR is for test purpose only */ - LCONSOLE_WARN( - "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n", - has_ir ? "enabled" : "disabled"); - if (has_ir) - *flags &= ~LMD_FLG_NOIR; - else - *flags |= LMD_FLG_NOIR; - } - } - - recov_bk = 0; - - /* Try all connections, but only once (again). - * We don't want to block another target from starting - * (using its local copy of the log), but we do want to connect - * if at all possible. - */ - recov_bk++; - CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname, - recov_bk); - rc = obd_set_info_async(NULL, obd->obd_self_export, - sizeof(KEY_INIT_RECOV_BACKUP), - KEY_INIT_RECOV_BACKUP, - sizeof(recov_bk), &recov_bk, NULL); - rc = 0; - goto out; - } - - CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname); - - /* Add the primary nids for the MGS */ - i = 0; - /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */ - ptr = lsi->lsi_lmd->lmd_dev; - while (class_parse_nid(ptr, &nid, &ptr) == 0) { - rc = do_lcfg(mgcname, nid, - LCFG_ADD_UUID, niduuid, NULL, NULL, NULL); - if (!rc) - i++; - /* Stop at the first failover nid */ - if (*ptr == ':') - break; - } - if (i == 0) { - CERROR("No valid MGS nids found.\n"); - rc = -EINVAL; - goto out_free; - } - lsi->lsi_lmd->lmd_mgs_failnodes = 1; - - /* Random uuid for MGC allows easier reconnects */ - uuid = kzalloc(sizeof(*uuid), GFP_NOFS); - if (!uuid) { - rc = -ENOMEM; - goto out_free; - } - - ll_generate_random_uuid(uuidc); - class_uuid_unparse(uuidc, uuid); - - /* Start the MGC */ - rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME, - (char *)uuid->uuid, LUSTRE_MGS_OBDNAME, - niduuid, NULL, NULL); - kfree(uuid); - if (rc) - goto out_free; - - /* Add any failover MGS nids */ - i = 1; - while (ptr && ((*ptr == ':' || - class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) { - /* New failover node */ - sprintf(niduuid, "%s_%x", mgcname, i); - j = 0; - while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) { - rc = do_lcfg(mgcname, nid, LCFG_ADD_UUID, niduuid, - NULL, NULL, NULL); - if (!rc) - ++j; - if (*ptr == ':') - break; - } - if (j > 0) { - rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN, - niduuid, NULL, NULL, NULL); - if (!rc) - i++; - } else { - /* at ":/fsname" */ - break; - } - } - lsi->lsi_lmd->lmd_mgs_failnodes = i; - - obd = class_name2obd(mgcname); - if (!obd) { - CERROR("Can't find mgcobd %s\n", mgcname); - rc = -ENOTCONN; - goto out_free; - } - - rc = obd_set_info_async(NULL, obd->obd_self_export, - strlen(KEY_MGSSEC), KEY_MGSSEC, - strlen(mgssec), mgssec, NULL); - if (rc) - goto out_free; - - /* Keep a refcount of servers/clients who started with "mount", - * so we know when we can get rid of the mgc. - */ - atomic_set(&obd->u.cli.cl_mgc_refcount, 1); - - /* We connect to the MGS at setup, and don't disconnect until cleanup */ - data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT | - OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | - OBD_CONNECT_LVB_TYPE | OBD_CONNECT_BULK_MBITS; - -#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE - data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB; -#endif - - if (lmd_is_client(lsi->lsi_lmd) && - lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR) - data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV; - data->ocd_version = LUSTRE_VERSION_CODE; - rc = obd_connect(NULL, &exp, obd, &obd->obd_uuid, data, NULL); - if (rc) { - CERROR("connect failed %d\n", rc); - goto out; - } - - obd->u.cli.cl_mgc_mgsexp = exp; - -out: - /* Keep the mgc info in the sb. Note that many lsi's can point - * to the same mgc. - */ - lsi->lsi_mgc = obd; -out_free: - mutex_unlock(&mgc_start_lock); - - kfree(data); - kfree(mgcname); - kfree(niduuid); - return rc; -} - -static int lustre_stop_mgc(struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct obd_device *obd; - char *niduuid = NULL, *ptr = NULL; - int i, rc = 0, len = 0; - - if (!lsi) - return -ENOENT; - obd = lsi->lsi_mgc; - if (!obd) - return -ENOENT; - lsi->lsi_mgc = NULL; - - mutex_lock(&mgc_start_lock); - LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0); - if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) { - /* This is not fatal, every client that stops - * will call in here. - */ - CDEBUG(D_MOUNT, "mgc still has %d references.\n", - atomic_read(&obd->u.cli.cl_mgc_refcount)); - rc = -EBUSY; - goto out; - } - - /* The MGC has no recoverable data in any case. - * force shutdown set in umount_begin - */ - obd->obd_no_recov = 1; - - if (obd->u.cli.cl_mgc_mgsexp) { - /* An error is not fatal, if we are unable to send the - * disconnect mgs ping evictor cleans up the export - */ - rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp); - if (rc) - CDEBUG(D_MOUNT, "disconnect failed %d\n", rc); - } - - /* Save the obdname for cleaning the nid uuids, which are obdname_XX */ - len = strlen(obd->obd_name) + 6; - niduuid = kzalloc(len, GFP_NOFS); - if (niduuid) { - strcpy(niduuid, obd->obd_name); - ptr = niduuid + strlen(niduuid); - } - - rc = class_manual_cleanup(obd); - if (rc) - goto out; - - /* Clean the nid uuids */ - if (!niduuid) { - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) { - sprintf(ptr, "_%x", i); - rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID, - niduuid, NULL, NULL, NULL); - if (rc) - CERROR("del MDC UUID %s failed: rc = %d\n", - niduuid, rc); - } -out: - kfree(niduuid); - - /* class_import_put will get rid of the additional connections */ - mutex_unlock(&mgc_start_lock); - return rc; -} - -/***************** lustre superblock **************/ - -static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb) -{ - struct lustre_sb_info *lsi; - - lsi = kzalloc(sizeof(*lsi), GFP_NOFS); - if (!lsi) - return NULL; - lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS); - if (!lsi->lsi_lmd) { - kfree(lsi); - return NULL; - } - - lsi->lsi_lmd->lmd_exclude_count = 0; - lsi->lsi_lmd->lmd_recovery_time_soft = 0; - lsi->lsi_lmd->lmd_recovery_time_hard = 0; - s2lsi_nocast(sb) = lsi; - /* we take 1 extra ref for our setup */ - atomic_set(&lsi->lsi_mounts, 1); - - /* Default umount style */ - lsi->lsi_flags = LSI_UMOUNT_FAILOVER; - - return lsi; -} - -static int lustre_free_lsi(struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - - CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi); - - /* someone didn't call server_put_mount. */ - LASSERT(atomic_read(&lsi->lsi_mounts) == 0); - - if (lsi->lsi_lmd) { - kfree(lsi->lsi_lmd->lmd_dev); - kfree(lsi->lsi_lmd->lmd_profile); - kfree(lsi->lsi_lmd->lmd_mgssec); - kfree(lsi->lsi_lmd->lmd_opts); - if (lsi->lsi_lmd->lmd_exclude_count) - kfree(lsi->lsi_lmd->lmd_exclude); - kfree(lsi->lsi_lmd->lmd_mgs); - kfree(lsi->lsi_lmd->lmd_osd_type); - kfree(lsi->lsi_lmd->lmd_params); - - kfree(lsi->lsi_lmd); - } - - LASSERT(!lsi->lsi_llsbi); - kfree(lsi); - s2lsi_nocast(sb) = NULL; - - return 0; -} - -/* The lsi has one reference for every server that is using the disk - - * e.g. MDT, MGS, and potentially MGC - */ -static int lustre_put_lsi(struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - - CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts)); - if (atomic_dec_and_test(&lsi->lsi_mounts)) { - lustre_free_lsi(sb); - return 1; - } - return 0; -} - -/*** SERVER NAME *** - * - * FSNAME is between 1 and 8 characters (inclusive). - * Excluded characters are '/' and ':' - * SEPARATOR is either ':' or '-' - * TYPE: "OST", "MDT", etc. - * INDEX: Hex representation of the index - */ - -/** Get the fsname ("lustre") from the server name ("lustre-OST003F"). - * @param [in] svname server name including type and index - * @param [out] fsname Buffer to copy filesystem name prefix into. - * Must have at least 'strlen(fsname) + 1' chars. - * @param [out] endptr if endptr isn't NULL it is set to end of fsname - * rc < 0 on error - */ -static int server_name2fsname(const char *svname, char *fsname, - const char **endptr) -{ - const char *dash; - - dash = svname + strnlen(svname, 8); /* max fsname length is 8 */ - for (; dash > svname && *dash != '-' && *dash != ':'; dash--) - ; - if (dash == svname) - return -EINVAL; - - if (fsname) { - strncpy(fsname, svname, dash - svname); - fsname[dash - svname] = '\0'; - } - - if (endptr) - *endptr = dash; - - return 0; -} - -/* Get the index from the obd name. - * rc = server type, or - * rc < 0 on error - * if endptr isn't NULL it is set to end of name - */ -static int server_name2index(const char *svname, __u32 *idx, - const char **endptr) -{ - unsigned long index; - int rc; - const char *dash; - - /* We use server_name2fsname() just for parsing */ - rc = server_name2fsname(svname, NULL, &dash); - if (rc != 0) - return rc; - - dash++; - - if (strncmp(dash, "MDT", 3) == 0) - rc = LDD_F_SV_TYPE_MDT; - else if (strncmp(dash, "OST", 3) == 0) - rc = LDD_F_SV_TYPE_OST; - else - return -EINVAL; - - dash += 3; - - if (strncmp(dash, "all", 3) == 0) { - if (endptr) - *endptr = dash + 3; - return rc | LDD_F_SV_ALL; - } - - index = simple_strtoul(dash, (char **)endptr, 16); - if (idx) - *idx = index; - - /* Account for -mdc after index that is possible when specifying mdt */ - if (endptr && strncmp(LUSTRE_MDC_NAME, *endptr + 1, - sizeof(LUSTRE_MDC_NAME) - 1) == 0) - *endptr += sizeof(LUSTRE_MDC_NAME); - - return rc; -} - -/*************** mount common between server and client ***************/ - -/* Common umount */ -int lustre_common_put_super(struct super_block *sb) -{ - int rc; - - CDEBUG(D_MOUNT, "dropping sb %p\n", sb); - - /* Drop a ref to the MGC */ - rc = lustre_stop_mgc(sb); - if (rc && (rc != -ENOENT)) { - if (rc != -EBUSY) { - CERROR("Can't stop MGC: %d\n", rc); - return rc; - } - /* BUSY just means that there's some other obd that - * needs the mgc. Let him clean it up. - */ - CDEBUG(D_MOUNT, "MGC still in use\n"); - } - /* Drop a ref to the mounted disk */ - lustre_put_lsi(sb); - return rc; -} -EXPORT_SYMBOL(lustre_common_put_super); - -static void lmd_print(struct lustre_mount_data *lmd) -{ - int i; - - PRINT_CMD(D_MOUNT, " mount data:\n"); - if (lmd_is_client(lmd)) - PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile); - PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev); - PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags); - - if (lmd->lmd_opts) - PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts); - - if (lmd->lmd_recovery_time_soft) - PRINT_CMD(D_MOUNT, "recovery time soft: %d\n", - lmd->lmd_recovery_time_soft); - - if (lmd->lmd_recovery_time_hard) - PRINT_CMD(D_MOUNT, "recovery time hard: %d\n", - lmd->lmd_recovery_time_hard); - - for (i = 0; i < lmd->lmd_exclude_count; i++) { - PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i, - lmd->lmd_exclude[i]); - } -} - -/* Is this server on the exclusion list */ -int lustre_check_exclusion(struct super_block *sb, char *svname) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - struct lustre_mount_data *lmd = lsi->lsi_lmd; - __u32 index; - int i, rc; - - rc = server_name2index(svname, &index, NULL); - if (rc != LDD_F_SV_TYPE_OST) - /* Only exclude OSTs */ - return 0; - - CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname, - index, lmd->lmd_exclude_count, lmd->lmd_dev); - - for (i = 0; i < lmd->lmd_exclude_count; i++) { - if (index == lmd->lmd_exclude[i]) { - CWARN("Excluding %s (on exclusion list)\n", svname); - return 1; - } - } - return 0; -} - -/* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */ -static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr) -{ - const char *s1 = ptr, *s2; - __u32 index = 0, *exclude_list; - int rc = 0, devmax; - - /* The shortest an ost name can be is 8 chars: -OST0000. - * We don't actually know the fsname at this time, so in fact - * a user could specify any fsname. - */ - devmax = strlen(ptr) / 8 + 1; - - /* temp storage until we figure out how many we have */ - exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS); - if (!exclude_list) - return -ENOMEM; - - /* we enter this fn pointing at the '=' */ - while (*s1 && *s1 != ' ' && *s1 != ',') { - s1++; - rc = server_name2index(s1, &index, &s2); - if (rc < 0) { - CERROR("Can't parse server name '%s': rc = %d\n", - s1, rc); - break; - } - if (rc == LDD_F_SV_TYPE_OST) - exclude_list[lmd->lmd_exclude_count++] = index; - else - CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n", - (uint)(s2 - s1), s1, rc); - s1 = s2; - /* now we are pointing at ':' (next exclude) - * or ',' (end of excludes) - */ - if (lmd->lmd_exclude_count >= devmax) - break; - } - if (rc >= 0) /* non-err */ - rc = 0; - - if (lmd->lmd_exclude_count) { - /* permanent, freed in lustre_free_lsi */ - lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count, - sizeof(index), GFP_NOFS); - if (lmd->lmd_exclude) { - memcpy(lmd->lmd_exclude, exclude_list, - sizeof(index) * lmd->lmd_exclude_count); - } else { - rc = -ENOMEM; - lmd->lmd_exclude_count = 0; - } - } - kfree(exclude_list); - return rc; -} - -static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr) -{ - char *tail; - int length; - - kfree(lmd->lmd_mgssec); - lmd->lmd_mgssec = NULL; - - tail = strchr(ptr, ','); - if (!tail) - length = strlen(ptr); - else - length = tail - ptr; - - lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS); - if (!lmd->lmd_mgssec) - return -ENOMEM; - - memcpy(lmd->lmd_mgssec, ptr, length); - lmd->lmd_mgssec[length] = '\0'; - return 0; -} - -static int lmd_parse_string(char **handle, char *ptr) -{ - char *tail; - int length; - - if (!handle || !ptr) - return -EINVAL; - - kfree(*handle); - *handle = NULL; - - tail = strchr(ptr, ','); - if (!tail) - length = strlen(ptr); - else - length = tail - ptr; - - *handle = kzalloc(length + 1, GFP_NOFS); - if (!*handle) - return -ENOMEM; - - memcpy(*handle, ptr, length); - (*handle)[length] = '\0'; - - return 0; -} - -/* Collect multiple values for mgsnid specifiers */ -static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr) -{ - lnet_nid_t nid; - char *tail = *ptr; - char *mgsnid; - int length; - int oldlen = 0; - - /* Find end of nidlist */ - while (class_parse_nid_quiet(tail, &nid, &tail) == 0) - ; - length = tail - *ptr; - if (length == 0) { - LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr); - return -EINVAL; - } - - if (lmd->lmd_mgs) - oldlen = strlen(lmd->lmd_mgs) + 1; - - mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS); - if (!mgsnid) - return -ENOMEM; - - if (lmd->lmd_mgs) { - /* Multiple mgsnid= are taken to mean failover locations */ - memcpy(mgsnid, lmd->lmd_mgs, oldlen); - mgsnid[oldlen - 1] = ':'; - kfree(lmd->lmd_mgs); - } - memcpy(mgsnid + oldlen, *ptr, length); - mgsnid[oldlen + length] = '\0'; - lmd->lmd_mgs = mgsnid; - *ptr = tail; - - return 0; -} - -/** Parse mount line options - * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre - * dev is passed as device=uml1:/lustre by mount.lustre - */ -static int lmd_parse(char *options, struct lustre_mount_data *lmd) -{ - char *s1, *s2, *devname = NULL; - struct lustre_mount_data *raw = (struct lustre_mount_data *)options; - int rc = 0; - - LASSERT(lmd); - if (!options) { - LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n"); - return -EINVAL; - } - - /* Options should be a string - try to detect old lmd data */ - if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) { - LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n", - LUSTRE_VERSION_STRING); - return -EINVAL; - } - lmd->lmd_magic = LMD_MAGIC; - - lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS); - if (!lmd->lmd_params) - return -ENOMEM; - lmd->lmd_params[0] = '\0'; - - /* Set default flags here */ - - s1 = options; - while (*s1) { - int clear = 0; - int time_min = OBD_RECOVERY_TIME_MIN; - char *s3; - - /* Skip whitespace and extra commas */ - while (*s1 == ' ' || *s1 == ',') - s1++; - s3 = s1; - - /* Client options are parsed in ll_options: eg. flock, - * user_xattr, acl - */ - - /* Parse non-ldiskfs options here. Rather than modifying - * ldiskfs, we just zero these out here - */ - if (strncmp(s1, "abort_recov", 11) == 0) { - lmd->lmd_flags |= LMD_FLG_ABORT_RECOV; - clear++; - } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) { - lmd->lmd_recovery_time_soft = max_t(int, - simple_strtoul(s1 + 19, NULL, 10), time_min); - clear++; - } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) { - lmd->lmd_recovery_time_hard = max_t(int, - simple_strtoul(s1 + 19, NULL, 10), time_min); - clear++; - } else if (strncmp(s1, "noir", 4) == 0) { - lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */ - clear++; - } else if (strncmp(s1, "nosvc", 5) == 0) { - lmd->lmd_flags |= LMD_FLG_NOSVC; - clear++; - } else if (strncmp(s1, "nomgs", 5) == 0) { - lmd->lmd_flags |= LMD_FLG_NOMGS; - clear++; - } else if (strncmp(s1, "noscrub", 7) == 0) { - lmd->lmd_flags |= LMD_FLG_NOSCRUB; - clear++; - } else if (strncmp(s1, PARAM_MGSNODE, - sizeof(PARAM_MGSNODE) - 1) == 0) { - s2 = s1 + sizeof(PARAM_MGSNODE) - 1; - /* Assume the next mount opt is the first - * invalid nid we get to. - */ - rc = lmd_parse_mgs(lmd, &s2); - if (rc) - goto invalid; - clear++; - } else if (strncmp(s1, "writeconf", 9) == 0) { - lmd->lmd_flags |= LMD_FLG_WRITECONF; - clear++; - } else if (strncmp(s1, "update", 6) == 0) { - lmd->lmd_flags |= LMD_FLG_UPDATE; - clear++; - } else if (strncmp(s1, "virgin", 6) == 0) { - lmd->lmd_flags |= LMD_FLG_VIRGIN; - clear++; - } else if (strncmp(s1, "noprimnode", 10) == 0) { - lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE; - clear++; - } else if (strncmp(s1, "mgssec=", 7) == 0) { - rc = lmd_parse_mgssec(lmd, s1 + 7); - if (rc) - goto invalid; - s3 = s2; - clear++; - /* ost exclusion list */ - } else if (strncmp(s1, "exclude=", 8) == 0) { - rc = lmd_make_exclusion(lmd, s1 + 7); - if (rc) - goto invalid; - clear++; - } else if (strncmp(s1, "mgs", 3) == 0) { - /* We are an MGS */ - lmd->lmd_flags |= LMD_FLG_MGS; - clear++; - } else if (strncmp(s1, "svname=", 7) == 0) { - rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7); - if (rc) - goto invalid; - clear++; - } else if (strncmp(s1, "param=", 6) == 0) { - size_t length, params_length; - char *tail = strchr(s1 + 6, ','); - - if (!tail) { - length = strlen(s1); - } else { - lnet_nid_t nid; - char *param_str = tail + 1; - int supplementary = 1; - - while (!class_parse_nid_quiet(param_str, &nid, - ¶m_str)) { - supplementary = 0; - } - length = param_str - s1 - supplementary; - } - length -= 6; - params_length = strlen(lmd->lmd_params); - if (params_length + length + 1 >= LMD_PARAMS_MAXLEN) - return -E2BIG; - strncat(lmd->lmd_params, s1 + 6, length); - lmd->lmd_params[params_length + length] = '\0'; - strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN); - s3 = s1 + 6 + length; - clear++; - } else if (strncmp(s1, "osd=", 4) == 0) { - rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4); - if (rc) - goto invalid; - clear++; - } - /* Linux 2.4 doesn't pass the device, so we stuck it at the - * end of the options. - */ - else if (strncmp(s1, "device=", 7) == 0) { - devname = s1 + 7; - /* terminate options right before device. device - * must be the last one. - */ - *s1 = '\0'; - break; - } - - /* Find next opt */ - s2 = strchr(s1, ','); - if (!s2) { - if (clear) - *s1 = '\0'; - break; - } - s2++; - if (clear) - memmove(s1, s2, strlen(s2) + 1); - else - s1 = s2; - } - - if (!devname) { - LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n"); - goto invalid; - } - - s1 = strstr(devname, ":/"); - if (s1) { - ++s1; - lmd->lmd_flags |= LMD_FLG_CLIENT; - /* Remove leading /s from fsname */ - while (*++s1 == '/') - ; - /* Freed in lustre_free_lsi */ - lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1); - if (!lmd->lmd_profile) - return -ENOMEM; - } - - /* Freed in lustre_free_lsi */ - lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS); - if (!lmd->lmd_dev) - return -ENOMEM; - strcpy(lmd->lmd_dev, devname); - - /* Save mount options */ - s1 = options + strlen(options) - 1; - while (s1 >= options && (*s1 == ',' || *s1 == ' ')) - *s1-- = 0; - if (*options != 0) { - /* Freed in lustre_free_lsi */ - lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS); - if (!lmd->lmd_opts) - return -ENOMEM; - strcpy(lmd->lmd_opts, options); - } - - lmd_print(lmd); - lmd->lmd_magic = LMD_MAGIC; - - return rc; - -invalid: - CERROR("Bad mount options %s\n", options); - return -EINVAL; -} - -/** This is the entry point for the mount call into Lustre. - * This is called when a server or client is mounted, - * and this is where we start setting things up. - * @param data Mount options (e.g. -o flock,abort_recov) - */ -static int lustre_fill_super(struct super_block *sb, void *lmd2_data, int silent) -{ - struct lustre_mount_data *lmd; - struct lustre_sb_info *lsi; - int rc; - - CDEBUG(D_MOUNT | D_VFSTRACE, "VFS Op: sb %p\n", sb); - - lsi = lustre_init_lsi(sb); - if (!lsi) - return -ENOMEM; - lmd = lsi->lsi_lmd; - - /* - * Disable lockdep during mount, because mount locking patterns are - * `special'. - */ - lockdep_off(); - - /* - * LU-639: the obd cleanup of last mount may not finish yet, wait here. - */ - obd_zombie_barrier(); - - /* Figure out the lmd from the mount options */ - if (lmd_parse(lmd2_data, lmd)) { - lustre_put_lsi(sb); - rc = -EINVAL; - goto out; - } - - if (lmd_is_client(lmd)) { - bool have_client = false; - CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile); - if (!client_fill_super) - request_module("lustre"); - spin_lock(&client_lock); - if (client_fill_super && try_module_get(client_mod)) - have_client = true; - spin_unlock(&client_lock); - if (!have_client) { - LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n"); - lustre_put_lsi(sb); - rc = -ENODEV; - } else { - rc = lustre_start_mgc(sb); - if (rc) { - lustre_common_put_super(sb); - goto out; - } - /* Connect and start */ - /* (should always be ll_fill_super) */ - rc = (*client_fill_super)(sb); - /* c_f_s will call lustre_common_put_super on failure, otherwise - * c_f_s will have taken another reference to the module */ - module_put(client_mod); - } - } else { - CERROR("This is client-side-only module, cannot handle server mount.\n"); - rc = -EINVAL; - } - - /* If error happens in fill_super() call, @lsi will be killed there. - * This is why we do not put it here. - */ - goto out; -out: - if (rc) { - CERROR("Unable to mount %s (%d)\n", - s2lsi(sb) ? lmd->lmd_dev : "", rc); - } else { - CDEBUG(D_SUPER, "Mount %s complete\n", - lmd->lmd_dev); - } - lockdep_on(); - return rc; -} - -/* We can't call ll_fill_super by name because it lives in a module that - * must be loaded after this one. - */ -void lustre_register_super_ops(struct module *mod, - int (*cfs)(struct super_block *sb), - void (*ksc)(struct super_block *sb)) -{ - spin_lock(&client_lock); - client_mod = mod; - client_fill_super = cfs; - kill_super_cb = ksc; - spin_unlock(&client_lock); -} -EXPORT_SYMBOL(lustre_register_super_ops); - -/***************** FS registration ******************/ -static struct dentry *lustre_mount(struct file_system_type *fs_type, int flags, - const char *devname, void *data) -{ - return mount_nodev(fs_type, flags, data, lustre_fill_super); -} - -static void lustre_kill_super(struct super_block *sb) -{ - struct lustre_sb_info *lsi = s2lsi(sb); - - if (kill_super_cb && lsi) - (*kill_super_cb)(sb); - - kill_anon_super(sb); -} - -/** Register the "lustre" fs type - */ -static struct file_system_type lustre_fs_type = { - .owner = THIS_MODULE, - .name = "lustre", - .mount = lustre_mount, - .kill_sb = lustre_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE, -}; -MODULE_ALIAS_FS("lustre"); - -int lustre_register_fs(void) -{ - return register_filesystem(&lustre_fs_type); -} - -int lustre_unregister_fs(void) -{ - return unregister_filesystem(&lustre_fs_type); -} diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c deleted file mode 100644 index c4503bc36591..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/obdo.c +++ /dev/null @@ -1,181 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/obdo.c - * - * Object Devices Class Driver - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include - -void obdo_set_parent_fid(struct obdo *dst, const struct lu_fid *parent) -{ - dst->o_parent_oid = fid_oid(parent); - dst->o_parent_seq = fid_seq(parent); - dst->o_parent_ver = fid_ver(parent); - dst->o_valid |= OBD_MD_FLGENER | OBD_MD_FLFID; -} -EXPORT_SYMBOL(obdo_set_parent_fid); - -/* WARNING: the file systems must take care not to tinker with - * attributes they don't manage (such as blocks). - */ -void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid) -{ - u32 newvalid = 0; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", - valid, LTIME_S(src->i_mtime), - LTIME_S(src->i_ctime)); - - if (valid & OBD_MD_FLATIME) { - dst->o_atime = LTIME_S(src->i_atime); - newvalid |= OBD_MD_FLATIME; - } - if (valid & OBD_MD_FLMTIME) { - dst->o_mtime = LTIME_S(src->i_mtime); - newvalid |= OBD_MD_FLMTIME; - } - if (valid & OBD_MD_FLCTIME) { - dst->o_ctime = LTIME_S(src->i_ctime); - newvalid |= OBD_MD_FLCTIME; - } - if (valid & OBD_MD_FLSIZE) { - dst->o_size = i_size_read(src); - newvalid |= OBD_MD_FLSIZE; - } - if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */ - dst->o_blocks = src->i_blocks; - newvalid |= OBD_MD_FLBLOCKS; - } - if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */ - dst->o_blksize = 1 << src->i_blkbits; - newvalid |= OBD_MD_FLBLKSZ; - } - if (valid & OBD_MD_FLTYPE) { - dst->o_mode = (dst->o_mode & S_IALLUGO) | - (src->i_mode & S_IFMT); - newvalid |= OBD_MD_FLTYPE; - } - if (valid & OBD_MD_FLMODE) { - dst->o_mode = (dst->o_mode & S_IFMT) | - (src->i_mode & S_IALLUGO); - newvalid |= OBD_MD_FLMODE; - } - if (valid & OBD_MD_FLUID) { - dst->o_uid = from_kuid(&init_user_ns, src->i_uid); - newvalid |= OBD_MD_FLUID; - } - if (valid & OBD_MD_FLGID) { - dst->o_gid = from_kgid(&init_user_ns, src->i_gid); - newvalid |= OBD_MD_FLGID; - } - if (valid & OBD_MD_FLFLAGS) { - dst->o_flags = src->i_flags; - newvalid |= OBD_MD_FLFLAGS; - } - dst->o_valid |= newvalid; -} -EXPORT_SYMBOL(obdo_from_inode); - -void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj) -{ - ioobj->ioo_oid = oa->o_oi; - if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP))) - ostid_set_seq_mdt0(&ioobj->ioo_oid); - - /* Since 2.4 this does not contain o_mode in the low 16 bits. - * Instead, it holds (bd_md_max_brw - 1) for multi-bulk BRW RPCs - */ - ioobj->ioo_max_brw = 0; -} -EXPORT_SYMBOL(obdo_to_ioobj); - -/** - * Create an obdo to send over the wire - */ -void lustre_set_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *wobdo, const struct obdo *lobdo) -{ - *wobdo = *lobdo; - wobdo->o_flags &= ~OBD_FL_LOCAL_MASK; - if (!ocd) - return; - - if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) && - fid_seq_is_echo(ostid_seq(&lobdo->o_oi))) { - /* - * Currently OBD_FL_OSTID will only be used when 2.4 echo - * client communicate with pre-2.4 server - */ - wobdo->o_oi.oi.oi_id = fid_oid(&lobdo->o_oi.oi_fid); - wobdo->o_oi.oi.oi_seq = fid_seq(&lobdo->o_oi.oi_fid); - } -} -EXPORT_SYMBOL(lustre_set_wire_obdo); - -/** - * Create a local obdo from a wire based odbo - */ -void lustre_get_wire_obdo(const struct obd_connect_data *ocd, - struct obdo *lobdo, const struct obdo *wobdo) -{ - u32 local_flags = 0; - - if (lobdo->o_valid & OBD_MD_FLFLAGS) - local_flags = lobdo->o_flags & OBD_FL_LOCAL_MASK; - - *lobdo = *wobdo; - if (local_flags) { - lobdo->o_valid |= OBD_MD_FLFLAGS; - lobdo->o_flags &= ~OBD_FL_LOCAL_MASK; - lobdo->o_flags |= local_flags; - } - if (!ocd) - return; - - if (unlikely(!(ocd->ocd_connect_flags & OBD_CONNECT_FID)) && - fid_seq_is_echo(wobdo->o_oi.oi.oi_seq)) { - /* see above */ - lobdo->o_oi.oi_fid.f_seq = wobdo->o_oi.oi.oi_seq; - lobdo->o_oi.oi_fid.f_oid = wobdo->o_oi.oi.oi_id; - lobdo->o_oi.oi_fid.f_ver = 0; - } -} -EXPORT_SYMBOL(lustre_get_wire_obdo); diff --git a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c b/drivers/staging/lustre/lustre/obdclass/statfs_pack.c deleted file mode 100644 index 355e888885f4..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/statfs_pack.c +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/statfs_pack.c - * - * (Un)packing of OST/MDS requests - * - * Author: Andreas Dilger - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include - -void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs) -{ - memset(sfs, 0, sizeof(*sfs)); - sfs->f_type = osfs->os_type; - sfs->f_blocks = osfs->os_blocks; - sfs->f_bfree = osfs->os_bfree; - sfs->f_bavail = osfs->os_bavail; - sfs->f_files = osfs->os_files; - sfs->f_ffree = osfs->os_ffree; - sfs->f_bsize = osfs->os_bsize; - sfs->f_namelen = osfs->os_namelen; -} -EXPORT_SYMBOL(statfs_unpack); diff --git a/drivers/staging/lustre/lustre/obdclass/uuid.c b/drivers/staging/lustre/lustre/obdclass/uuid.c deleted file mode 100644 index ec8c6dc5c9a7..000000000000 --- a/drivers/staging/lustre/lustre/obdclass/uuid.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdclass/uuid.c - * - * Public include file for the UUID library - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include - -void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out) -{ - sprintf(out->uuid, "%pU", uu); -} -EXPORT_SYMBOL(class_uuid_unparse); diff --git a/drivers/staging/lustre/lustre/obdecho/Makefile b/drivers/staging/lustre/lustre/obdecho/Makefile deleted file mode 100644 index 6be66fbab872..000000000000 --- a/drivers/staging/lustre/lustre/obdecho/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += obdecho.o -obdecho-y := echo_client.o diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c deleted file mode 100644 index b692e76e7108..000000000000 --- a/drivers/staging/lustre/lustre/obdecho/echo_client.c +++ /dev/null @@ -1,1729 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_ECHO - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "echo_internal.h" - -/** \defgroup echo_client Echo Client - * @{ - */ - -struct echo_device { - struct cl_device ed_cl; - struct echo_client_obd *ed_ec; - - struct cl_site ed_site_myself; - struct lu_site *ed_site; - struct lu_device *ed_next; -}; - -struct echo_object { - struct cl_object eo_cl; - struct cl_object_header eo_hdr; - - struct echo_device *eo_dev; - struct list_head eo_obj_chain; - struct lov_oinfo *eo_oinfo; - atomic_t eo_npages; - int eo_deleted; -}; - -struct echo_object_conf { - struct cl_object_conf eoc_cl; - struct lov_oinfo **eoc_oinfo; -}; - -struct echo_page { - struct cl_page_slice ep_cl; - struct mutex ep_lock; -}; - -struct echo_lock { - struct cl_lock_slice el_cl; - struct list_head el_chain; - struct echo_object *el_object; - __u64 el_cookie; - atomic_t el_refcount; -}; - -static int echo_client_setup(const struct lu_env *env, - struct obd_device *obddev, - struct lustre_cfg *lcfg); -static int echo_client_cleanup(struct obd_device *obddev); - -/** \defgroup echo_helpers Helper functions - * @{ - */ -static inline struct echo_device *cl2echo_dev(const struct cl_device *dev) -{ - return container_of_safe(dev, struct echo_device, ed_cl); -} - -static inline struct cl_device *echo_dev2cl(struct echo_device *d) -{ - return &d->ed_cl; -} - -static inline struct echo_device *obd2echo_dev(const struct obd_device *obd) -{ - return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev)); -} - -static inline struct cl_object *echo_obj2cl(struct echo_object *eco) -{ - return &eco->eo_cl; -} - -static inline struct echo_object *cl2echo_obj(const struct cl_object *o) -{ - return container_of(o, struct echo_object, eo_cl); -} - -static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s) -{ - return container_of(s, struct echo_page, ep_cl); -} - -static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s) -{ - return container_of(s, struct echo_lock, el_cl); -} - -static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl) -{ - return ecl->el_cl.cls_lock; -} - -static struct lu_context_key echo_thread_key; -static inline struct echo_thread_info *echo_env_info(const struct lu_env *env) -{ - struct echo_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &echo_thread_key); - LASSERT(info); - return info; -} - -static inline -struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c) -{ - return container_of(c, struct echo_object_conf, eoc_cl); -} - -/** @} echo_helpers */ -static int cl_echo_object_put(struct echo_object *eco); -static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, - struct page **pages, int npages, int async); - -struct echo_thread_info { - struct echo_object_conf eti_conf; - struct lustre_md eti_md; - - struct cl_2queue eti_queue; - struct cl_io eti_io; - struct cl_lock eti_lock; - struct lu_fid eti_fid; - struct lu_fid eti_fid2; -}; - -/* No session used right now */ -struct echo_session_info { - unsigned long dummy; -}; - -static struct kmem_cache *echo_lock_kmem; -static struct kmem_cache *echo_object_kmem; -static struct kmem_cache *echo_thread_kmem; -static struct kmem_cache *echo_session_kmem; - -static struct lu_kmem_descr echo_caches[] = { - { - .ckd_cache = &echo_lock_kmem, - .ckd_name = "echo_lock_kmem", - .ckd_size = sizeof(struct echo_lock) - }, - { - .ckd_cache = &echo_object_kmem, - .ckd_name = "echo_object_kmem", - .ckd_size = sizeof(struct echo_object) - }, - { - .ckd_cache = &echo_thread_kmem, - .ckd_name = "echo_thread_kmem", - .ckd_size = sizeof(struct echo_thread_info) - }, - { - .ckd_cache = &echo_session_kmem, - .ckd_name = "echo_session_kmem", - .ckd_size = sizeof(struct echo_session_info) - }, - { - .ckd_cache = NULL - } -}; - -/** \defgroup echo_page Page operations - * - * Echo page operations. - * - * @{ - */ -static int echo_page_own(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io, int nonblock) -{ - struct echo_page *ep = cl2echo_page(slice); - - if (!nonblock) - mutex_lock(&ep->ep_lock); - else if (!mutex_trylock(&ep->ep_lock)) - return -EAGAIN; - return 0; -} - -static void echo_page_disown(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct echo_page *ep = cl2echo_page(slice); - - LASSERT(mutex_is_locked(&ep->ep_lock)); - mutex_unlock(&ep->ep_lock); -} - -static void echo_page_discard(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - cl_page_delete(env, slice->cpl_page); -} - -static int echo_page_is_vmlocked(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - if (mutex_is_locked(&cl2echo_page(slice)->ep_lock)) - return -EBUSY; - return -ENODATA; -} - -static void echo_page_completion(const struct lu_env *env, - const struct cl_page_slice *slice, - int ioret) -{ - LASSERT(slice->cpl_page->cp_sync_io); -} - -static void echo_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ - struct echo_object *eco = cl2echo_obj(slice->cpl_obj); - - atomic_dec(&eco->eo_npages); - put_page(slice->cpl_page->cp_vmpage); -} - -static int echo_page_prep(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused) -{ - return 0; -} - -static int echo_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) -{ - struct echo_page *ep = cl2echo_page(slice); - - (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME "-page@%p %d vm@%p\n", - ep, mutex_is_locked(&ep->ep_lock), - slice->cpl_page->cp_vmpage); - return 0; -} - -static const struct cl_page_operations echo_page_ops = { - .cpo_own = echo_page_own, - .cpo_disown = echo_page_disown, - .cpo_discard = echo_page_discard, - .cpo_fini = echo_page_fini, - .cpo_print = echo_page_print, - .cpo_is_vmlocked = echo_page_is_vmlocked, - .io = { - [CRT_READ] = { - .cpo_prep = echo_page_prep, - .cpo_completion = echo_page_completion, - }, - [CRT_WRITE] = { - .cpo_prep = echo_page_prep, - .cpo_completion = echo_page_completion, - } - } -}; - -/** @} echo_page */ - -/** \defgroup echo_lock Locking - * - * echo lock operations - * - * @{ - */ -static void echo_lock_fini(const struct lu_env *env, - struct cl_lock_slice *slice) -{ - struct echo_lock *ecl = cl2echo_lock(slice); - - LASSERT(list_empty(&ecl->el_chain)); - kmem_cache_free(echo_lock_kmem, ecl); -} - -static const struct cl_lock_operations echo_lock_ops = { - .clo_fini = echo_lock_fini, -}; - -/** @} echo_lock */ - -/** \defgroup echo_cl_ops cl_object operations - * - * operations for cl_object - * - * @{ - */ -static int echo_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct echo_page *ep = cl_object_page_slice(obj, page); - struct echo_object *eco = cl2echo_obj(obj); - - get_page(page->cp_vmpage); - mutex_init(&ep->ep_lock); - cl_page_slice_add(page, &ep->ep_cl, obj, index, &echo_page_ops); - atomic_inc(&eco->eo_npages); - return 0; -} - -static int echo_io_init(const struct lu_env *env, struct cl_object *obj, - struct cl_io *io) -{ - return 0; -} - -static int echo_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *unused) -{ - struct echo_lock *el; - - el = kmem_cache_zalloc(echo_lock_kmem, GFP_NOFS); - if (el) { - cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops); - el->el_object = cl2echo_obj(obj); - INIT_LIST_HEAD(&el->el_chain); - atomic_set(&el->el_refcount, 0); - } - return !el ? -ENOMEM : 0; -} - -static int echo_conf_set(const struct lu_env *env, struct cl_object *obj, - const struct cl_object_conf *conf) -{ - return 0; -} - -static const struct cl_object_operations echo_cl_obj_ops = { - .coo_page_init = echo_page_init, - .coo_lock_init = echo_lock_init, - .coo_io_init = echo_io_init, - .coo_conf_set = echo_conf_set -}; - -/** @} echo_cl_ops */ - -/** \defgroup echo_lu_ops lu_object operations - * - * operations for echo lu object. - * - * @{ - */ -static int echo_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct echo_device *ed = cl2echo_dev(lu2cl_dev(obj->lo_dev)); - struct echo_client_obd *ec = ed->ed_ec; - struct echo_object *eco = cl2echo_obj(lu2cl(obj)); - const struct cl_object_conf *cconf; - struct echo_object_conf *econf; - - if (ed->ed_next) { - struct lu_object *below; - struct lu_device *under; - - under = ed->ed_next; - below = under->ld_ops->ldo_object_alloc(env, obj->lo_header, - under); - if (!below) - return -ENOMEM; - lu_object_add(obj, below); - } - - cconf = lu2cl_conf(conf); - econf = cl2echo_conf(cconf); - - LASSERT(econf->eoc_oinfo); - /* - * Transfer the oinfo pointer to eco that it won't be - * freed. - */ - eco->eo_oinfo = *econf->eoc_oinfo; - *econf->eoc_oinfo = NULL; - - eco->eo_dev = ed; - atomic_set(&eco->eo_npages, 0); - cl_object_page_init(lu2cl(obj), sizeof(struct echo_page)); - - spin_lock(&ec->ec_lock); - list_add_tail(&eco->eo_obj_chain, &ec->ec_objects); - spin_unlock(&ec->ec_lock); - - return 0; -} - -static void echo_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct echo_object *eco = cl2echo_obj(lu2cl(obj)); - struct echo_client_obd *ec = eco->eo_dev->ed_ec; - - LASSERT(atomic_read(&eco->eo_npages) == 0); - - spin_lock(&ec->ec_lock); - list_del_init(&eco->eo_obj_chain); - spin_unlock(&ec->ec_lock); - - lu_object_fini(obj); - lu_object_header_fini(obj->lo_header); - - kfree(eco->eo_oinfo); - kmem_cache_free(echo_object_kmem, eco); -} - -static int echo_object_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *o) -{ - struct echo_object *obj = cl2echo_obj(lu2cl(o)); - - return (*p)(env, cookie, "echoclient-object@%p", obj); -} - -static const struct lu_object_operations echo_lu_obj_ops = { - .loo_object_init = echo_object_init, - .loo_object_delete = NULL, - .loo_object_release = NULL, - .loo_object_free = echo_object_free, - .loo_object_print = echo_object_print, - .loo_object_invariant = NULL -}; - -/** @} echo_lu_ops */ - -/** \defgroup echo_lu_dev_ops lu_device operations - * - * Operations for echo lu device. - * - * @{ - */ -static struct lu_object *echo_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev) -{ - struct echo_object *eco; - struct lu_object *obj = NULL; - - /* we're the top dev. */ - LASSERT(!hdr); - eco = kmem_cache_zalloc(echo_object_kmem, GFP_NOFS); - if (eco) { - struct cl_object_header *hdr = &eco->eo_hdr; - - obj = &echo_obj2cl(eco)->co_lu; - cl_object_header_init(hdr); - hdr->coh_page_bufsize = cfs_size_round(sizeof(struct cl_page)); - - lu_object_init(obj, &hdr->coh_lu, dev); - lu_object_add_top(&hdr->coh_lu, obj); - - eco->eo_cl.co_ops = &echo_cl_obj_ops; - obj->lo_ops = &echo_lu_obj_ops; - } - return obj; -} - -static const struct lu_device_operations echo_device_lu_ops = { - .ldo_object_alloc = echo_object_alloc, -}; - -/** @} echo_lu_dev_ops */ - -/** \defgroup echo_init Setup and teardown - * - * Init and fini functions for echo client. - * - * @{ - */ -static int echo_site_init(const struct lu_env *env, struct echo_device *ed) -{ - struct cl_site *site = &ed->ed_site_myself; - int rc; - - /* initialize site */ - rc = cl_site_init(site, &ed->ed_cl); - if (rc) { - CERROR("Cannot initialize site for echo client(%d)\n", rc); - return rc; - } - - rc = lu_site_init_finish(&site->cs_lu); - if (rc) { - cl_site_fini(site); - return rc; - } - - ed->ed_site = &site->cs_lu; - return 0; -} - -static void echo_site_fini(const struct lu_env *env, struct echo_device *ed) -{ - if (ed->ed_site) { - lu_site_fini(ed->ed_site); - ed->ed_site = NULL; - } -} - -static void *echo_thread_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct echo_thread_info *info; - - info = kmem_cache_zalloc(echo_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void echo_thread_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct echo_thread_info *info = data; - - kmem_cache_free(echo_thread_kmem, info); -} - -static struct lu_context_key echo_thread_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = echo_thread_key_init, - .lct_fini = echo_thread_key_fini, -}; - -static void *echo_session_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct echo_session_info *session; - - session = kmem_cache_zalloc(echo_session_kmem, GFP_NOFS); - if (!session) - session = ERR_PTR(-ENOMEM); - return session; -} - -static void echo_session_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct echo_session_info *session = data; - - kmem_cache_free(echo_session_kmem, session); -} - -static struct lu_context_key echo_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = echo_session_key_init, - .lct_fini = echo_session_key_fini, -}; - -LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key); - -static struct lu_device *echo_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) -{ - struct lu_device *next; - struct echo_device *ed; - struct cl_device *cd; - struct obd_device *obd = NULL; /* to keep compiler happy */ - struct obd_device *tgt; - const char *tgt_type_name; - int rc, err; - - ed = kzalloc(sizeof(*ed), GFP_NOFS); - if (!ed) { - rc = -ENOMEM; - goto out; - } - - cd = &ed->ed_cl; - rc = cl_device_init(cd, t); - if (rc) - goto out_free; - - cd->cd_lu_dev.ld_ops = &echo_device_lu_ops; - - obd = class_name2obd(lustre_cfg_string(cfg, 0)); - LASSERT(obd); - LASSERT(env); - - tgt = class_name2obd(lustre_cfg_string(cfg, 1)); - if (!tgt) { - CERROR("Can not find tgt device %s\n", - lustre_cfg_string(cfg, 1)); - rc = -ENODEV; - goto out_device_fini; - } - - next = tgt->obd_lu_dev; - if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) { - CERROR("echo MDT client must be run on server\n"); - rc = -EOPNOTSUPP; - goto out_device_fini; - } - - rc = echo_site_init(env, ed); - if (rc) - goto out_device_fini; - - rc = echo_client_setup(env, obd, cfg); - if (rc) - goto out_site_fini; - - ed->ed_ec = &obd->u.echo_client; - - /* if echo client is to be stacked upon ost device, the next is - * NULL since ost is not a clio device so far - */ - if (next && !lu_device_is_cl(next)) - next = NULL; - - tgt_type_name = tgt->obd_type->typ_name; - if (next) { - if (next->ld_site) { - rc = -EBUSY; - goto out_cleanup; - } - - next->ld_site = ed->ed_site; - rc = next->ld_type->ldt_ops->ldto_device_init(env, next, - next->ld_type->ldt_name, - NULL); - if (rc) - goto out_cleanup; - - } else { - LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0); - } - - ed->ed_next = next; - return &cd->cd_lu_dev; - -out_cleanup: - err = echo_client_cleanup(obd); - if (err) - CERROR("Cleanup obd device %s error(%d)\n", - obd->obd_name, err); -out_site_fini: - echo_site_fini(env, ed); -out_device_fini: - cl_device_fini(&ed->ed_cl); -out_free: - kfree(ed); -out: - return ERR_PTR(rc); -} - -static int echo_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - LBUG(); - return 0; -} - -static struct lu_device *echo_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - struct echo_device *ed = cl2echo_dev(lu2cl_dev(d)); - struct lu_device *next = ed->ed_next; - - while (next) - next = next->ld_type->ldt_ops->ldto_device_fini(env, next); - return NULL; -} - -static void echo_lock_release(const struct lu_env *env, - struct echo_lock *ecl, - int still_used) -{ - struct cl_lock *clk = echo_lock2cl(ecl); - - cl_lock_release(env, clk); -} - -static struct lu_device *echo_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct echo_device *ed = cl2echo_dev(lu2cl_dev(d)); - struct echo_client_obd *ec = ed->ed_ec; - struct echo_object *eco; - struct lu_device *next = ed->ed_next; - - CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n", - ed, next); - - lu_site_purge(env, ed->ed_site, -1); - - /* check if there are objects still alive. - * It shouldn't have any object because lu_site_purge would cleanup - * all of cached objects. Anyway, probably the echo device is being - * parallelly accessed. - */ - spin_lock(&ec->ec_lock); - list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain) - eco->eo_deleted = 1; - spin_unlock(&ec->ec_lock); - - /* purge again */ - lu_site_purge(env, ed->ed_site, -1); - - CDEBUG(D_INFO, - "Waiting for the reference of echo object to be dropped\n"); - - /* Wait for the last reference to be dropped. */ - spin_lock(&ec->ec_lock); - while (!list_empty(&ec->ec_objects)) { - spin_unlock(&ec->ec_lock); - CERROR("echo_client still has objects at cleanup time, wait for 1 second\n"); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); - lu_site_purge(env, ed->ed_site, -1); - spin_lock(&ec->ec_lock); - } - spin_unlock(&ec->ec_lock); - - LASSERT(list_empty(&ec->ec_locks)); - - CDEBUG(D_INFO, "No object exists, exiting...\n"); - - echo_client_cleanup(d->ld_obd); - - while (next) - next = next->ld_type->ldt_ops->ldto_device_free(env, next); - - LASSERT(ed->ed_site == d->ld_site); - echo_site_fini(env, ed); - cl_device_fini(&ed->ed_cl); - kfree(ed); - - cl_env_cache_purge(~0); - - return NULL; -} - -static const struct lu_device_type_operations echo_device_type_ops = { - .ldto_init = echo_type_init, - .ldto_fini = echo_type_fini, - - .ldto_start = echo_type_start, - .ldto_stop = echo_type_stop, - - .ldto_device_alloc = echo_device_alloc, - .ldto_device_free = echo_device_free, - .ldto_device_init = echo_device_init, - .ldto_device_fini = echo_device_fini -}; - -static struct lu_device_type echo_device_type = { - .ldt_tags = LU_DEVICE_CL, - .ldt_name = LUSTRE_ECHO_CLIENT_NAME, - .ldt_ops = &echo_device_type_ops, - .ldt_ctx_tags = LCT_CL_THREAD, -}; - -/** @} echo_init */ - -/** \defgroup echo_exports Exported operations - * - * exporting functions to echo client - * - * @{ - */ - -/* Interfaces to echo client obd device */ -static struct echo_object * -cl_echo_object_find(struct echo_device *d, const struct ost_id *oi) -{ - struct lu_env *env; - struct echo_thread_info *info; - struct echo_object_conf *conf; - struct lov_oinfo *oinfo = NULL; - struct echo_object *eco; - struct cl_object *obj; - struct lu_fid *fid; - u16 refcheck; - int rc; - - LASSERTF(ostid_id(oi), DOSTID "\n", POSTID(oi)); - LASSERTF(ostid_seq(oi) == FID_SEQ_ECHO, DOSTID "\n", POSTID(oi)); - - /* Never return an object if the obd is to be freed. */ - if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping) - return ERR_PTR(-ENODEV); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return (void *)env; - - info = echo_env_info(env); - conf = &info->eti_conf; - if (d->ed_next) { - oinfo = kzalloc(sizeof(*oinfo), GFP_NOFS); - if (!oinfo) { - eco = ERR_PTR(-ENOMEM); - goto out; - } - - oinfo->loi_oi = *oi; - conf->eoc_cl.u.coc_oinfo = oinfo; - } - - /* - * If echo_object_init() is successful then ownership of oinfo - * is transferred to the object. - */ - conf->eoc_oinfo = &oinfo; - - fid = &info->eti_fid; - rc = ostid_to_fid(fid, (struct ost_id *)oi, 0); - if (rc != 0) { - eco = ERR_PTR(rc); - goto out; - } - - /* In the function below, .hs_keycmp resolves to - * lu_obj_hop_keycmp() - */ - /* coverity[overrun-buffer-val] */ - obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl); - if (IS_ERR(obj)) { - eco = (void *)obj; - goto out; - } - - eco = cl2echo_obj(obj); - if (eco->eo_deleted) { - cl_object_put(env, obj); - eco = ERR_PTR(-EAGAIN); - } - -out: - kfree(oinfo); - cl_env_put(env, &refcheck); - return eco; -} - -static int cl_echo_object_put(struct echo_object *eco) -{ - struct lu_env *env; - struct cl_object *obj = echo_obj2cl(eco); - u16 refcheck; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - /* an external function to kill an object? */ - if (eco->eo_deleted) { - struct lu_object_header *loh = obj->co_lu.lo_header; - - LASSERT(&eco->eo_hdr == luh2coh(loh)); - set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags); - } - - cl_object_put(env, obj); - cl_env_put(env, &refcheck); - return 0; -} - -static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco, - u64 start, u64 end, int mode, - __u64 *cookie, __u32 enqflags) -{ - struct cl_io *io; - struct cl_lock *lck; - struct cl_object *obj; - struct cl_lock_descr *descr; - struct echo_thread_info *info; - int rc = -ENOMEM; - - info = echo_env_info(env); - io = &info->eti_io; - lck = &info->eti_lock; - obj = echo_obj2cl(eco); - - memset(lck, 0, sizeof(*lck)); - descr = &lck->cll_descr; - descr->cld_obj = obj; - descr->cld_start = cl_index(obj, start); - descr->cld_end = cl_index(obj, end); - descr->cld_mode = mode == LCK_PW ? CLM_WRITE : CLM_READ; - descr->cld_enq_flags = enqflags; - io->ci_obj = obj; - - rc = cl_lock_request(env, io, lck); - if (rc == 0) { - struct echo_client_obd *ec = eco->eo_dev->ed_ec; - struct echo_lock *el; - - el = cl2echo_lock(cl_lock_at(lck, &echo_device_type)); - spin_lock(&ec->ec_lock); - if (list_empty(&el->el_chain)) { - list_add(&el->el_chain, &ec->ec_locks); - el->el_cookie = ++ec->ec_unique; - } - atomic_inc(&el->el_refcount); - *cookie = el->el_cookie; - spin_unlock(&ec->ec_lock); - } - return rc; -} - -static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed, - __u64 cookie) -{ - struct echo_client_obd *ec = ed->ed_ec; - struct echo_lock *ecl = NULL; - struct list_head *el; - int found = 0, still_used = 0; - - spin_lock(&ec->ec_lock); - list_for_each(el, &ec->ec_locks) { - ecl = list_entry(el, struct echo_lock, el_chain); - CDEBUG(D_INFO, "ecl: %p, cookie: %#llx\n", ecl, ecl->el_cookie); - found = (ecl->el_cookie == cookie); - if (found) { - if (atomic_dec_and_test(&ecl->el_refcount)) - list_del_init(&ecl->el_chain); - else - still_used = 1; - break; - } - } - spin_unlock(&ec->ec_lock); - - if (!found) - return -ENOENT; - - echo_lock_release(env, ecl, still_used); - return 0; -} - -static void echo_commit_callback(const struct lu_env *env, struct cl_io *io, - struct cl_page *page) -{ - struct echo_thread_info *info; - struct cl_2queue *queue; - - info = echo_env_info(env); - LASSERT(io == &info->eti_io); - - queue = &info->eti_queue; - cl_page_list_add(&queue->c2_qout, page); -} - -static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset, - struct page **pages, int npages, int async) -{ - struct lu_env *env; - struct echo_thread_info *info; - struct cl_object *obj = echo_obj2cl(eco); - struct echo_device *ed = eco->eo_dev; - struct cl_2queue *queue; - struct cl_io *io; - struct cl_page *clp; - struct lustre_handle lh = { 0 }; - size_t page_size = cl_page_size(obj); - u16 refcheck; - int rc; - int i; - - LASSERT((offset & ~PAGE_MASK) == 0); - LASSERT(ed->ed_next); - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - info = echo_env_info(env); - io = &info->eti_io; - queue = &info->eti_queue; - - cl_2queue_init(queue); - - io->ci_ignore_layout = 1; - rc = cl_io_init(env, io, CIT_MISC, obj); - if (rc < 0) - goto out; - LASSERT(rc == 0); - - rc = cl_echo_enqueue0(env, eco, offset, - offset + npages * PAGE_SIZE - 1, - rw == READ ? LCK_PR : LCK_PW, &lh.cookie, - CEF_NEVER); - if (rc < 0) - goto error_lock; - - for (i = 0; i < npages; i++) { - LASSERT(pages[i]); - clp = cl_page_find(env, obj, cl_index(obj, offset), - pages[i], CPT_TRANSIENT); - if (IS_ERR(clp)) { - rc = PTR_ERR(clp); - break; - } - LASSERT(clp->cp_type == CPT_TRANSIENT); - - rc = cl_page_own(env, io, clp); - if (rc) { - LASSERT(clp->cp_state == CPS_FREEING); - cl_page_put(env, clp); - break; - } - /* - * Add a page to the incoming page list of 2-queue. - */ - cl_page_list_add(&queue->c2_qin, clp); - - /* drop the reference count for cl_page_find, so that the page - * will be freed in cl_2queue_fini. - */ - cl_page_put(env, clp); - cl_page_clip(env, clp, 0, page_size); - - offset += page_size; - } - - if (rc == 0) { - enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE; - - async = async && (typ == CRT_WRITE); - if (async) - rc = cl_io_commit_async(env, io, &queue->c2_qin, - 0, PAGE_SIZE, - echo_commit_callback); - else - rc = cl_io_submit_sync(env, io, typ, queue, 0); - CDEBUG(D_INFO, "echo_client %s write returns %d\n", - async ? "async" : "sync", rc); - } - - cl_echo_cancel0(env, ed, lh.cookie); -error_lock: - cl_2queue_discard(env, io, queue); - cl_2queue_disown(env, io, queue); - cl_2queue_fini(env, queue); - cl_io_fini(env, io); -out: - cl_env_put(env, &refcheck); - return rc; -} - -/** @} echo_exports */ - -static u64 last_object_id; - -static int echo_create_object(const struct lu_env *env, struct echo_device *ed, - struct obdo *oa) -{ - struct echo_object *eco; - struct echo_client_obd *ec = ed->ed_ec; - int rc; - int created = 0; - - if (!(oa->o_valid & OBD_MD_FLID) || - !(oa->o_valid & OBD_MD_FLGROUP) || - !fid_seq_is_echo(ostid_seq(&oa->o_oi))) { - CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi)); - return -EINVAL; - } - - if (!ostid_id(&oa->o_oi)) { - rc = ostid_set_id(&oa->o_oi, ++last_object_id); - if (rc) - goto failed; - } - - rc = obd_create(env, ec->ec_exp, oa); - if (rc != 0) { - CERROR("Cannot create objects: rc = %d\n", rc); - goto failed; - } - created = 1; - - oa->o_valid |= OBD_MD_FLID; - - eco = cl_echo_object_find(ed, &oa->o_oi); - if (IS_ERR(eco)) { - rc = PTR_ERR(eco); - goto failed; - } - cl_echo_object_put(eco); - - CDEBUG(D_INFO, "oa oid " DOSTID "\n", POSTID(&oa->o_oi)); - - failed: - if (created && rc) - obd_destroy(env, ec->ec_exp, oa); - if (rc) - CERROR("create object failed with: rc = %d\n", rc); - return rc; -} - -static int echo_get_object(struct echo_object **ecop, struct echo_device *ed, - struct obdo *oa) -{ - struct echo_object *eco; - int rc; - - if (!(oa->o_valid & OBD_MD_FLID) || !(oa->o_valid & OBD_MD_FLGROUP) || - !ostid_id(&oa->o_oi)) { - CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi)); - return -EINVAL; - } - - rc = 0; - eco = cl_echo_object_find(ed, &oa->o_oi); - if (!IS_ERR(eco)) - *ecop = eco; - else - rc = PTR_ERR(eco); - return rc; -} - -static void echo_put_object(struct echo_object *eco) -{ - int rc; - - rc = cl_echo_object_put(eco); - if (rc) - CERROR("%s: echo client drop an object failed: rc = %d\n", - eco->eo_dev->ed_ec->ec_exp->exp_obd->obd_name, rc); -} - -static void -echo_client_page_debug_setup(struct page *page, int rw, u64 id, - u64 offset, u64 count) -{ - char *addr; - u64 stripe_off; - u64 stripe_id; - int delta; - - /* no partial pages on the client */ - LASSERT(count == PAGE_SIZE); - - addr = kmap(page); - - for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { - if (rw == OBD_BRW_WRITE) { - stripe_off = offset + delta; - stripe_id = id; - } else { - stripe_off = 0xdeadbeef00c0ffeeULL; - stripe_id = 0xdeadbeef00c0ffeeULL; - } - block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE, - stripe_off, stripe_id); - } - - kunmap(page); -} - -static int echo_client_page_debug_check(struct page *page, u64 id, - u64 offset, u64 count) -{ - u64 stripe_off; - u64 stripe_id; - char *addr; - int delta; - int rc; - int rc2; - - /* no partial pages on the client */ - LASSERT(count == PAGE_SIZE); - - addr = kmap(page); - - for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { - stripe_off = offset + delta; - stripe_id = id; - - rc2 = block_debug_check("test_brw", - addr + delta, OBD_ECHO_BLOCK_SIZE, - stripe_off, stripe_id); - if (rc2 != 0) { - CERROR("Error in echo object %#llx\n", id); - rc = rc2; - } - } - - kunmap(page); - return rc; -} - -static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa, - struct echo_object *eco, u64 offset, - u64 count, int async) -{ - u32 npages; - struct brw_page *pga; - struct brw_page *pgp; - struct page **pages; - u64 off; - int i; - int rc; - int verify; - gfp_t gfp_mask; - int brw_flags = 0; - - verify = (ostid_id(&oa->o_oi) != ECHO_PERSISTENT_OBJID && - (oa->o_valid & OBD_MD_FLFLAGS) != 0 && - (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0); - - gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER; - - LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); - - if (count <= 0 || - (count & (~PAGE_MASK)) != 0) - return -EINVAL; - - /* XXX think again with misaligned I/O */ - npages = count >> PAGE_SHIFT; - - if (rw == OBD_BRW_WRITE) - brw_flags = OBD_BRW_ASYNC; - - pga = kcalloc(npages, sizeof(*pga), GFP_NOFS); - if (!pga) - return -ENOMEM; - - pages = kcalloc(npages, sizeof(*pages), GFP_NOFS); - if (!pages) { - kfree(pga); - return -ENOMEM; - } - - for (i = 0, pgp = pga, off = offset; - i < npages; - i++, pgp++, off += PAGE_SIZE) { - LASSERT(!pgp->pg); /* for cleanup */ - - rc = -ENOMEM; - pgp->pg = alloc_page(gfp_mask); - if (!pgp->pg) - goto out; - - pages[i] = pgp->pg; - pgp->count = PAGE_SIZE; - pgp->off = off; - pgp->flag = brw_flags; - - if (verify) - echo_client_page_debug_setup(pgp->pg, rw, - ostid_id(&oa->o_oi), off, - pgp->count); - } - - /* brw mode can only be used at client */ - LASSERT(ed->ed_next); - rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async); - - out: - if (rc != 0 || rw != OBD_BRW_READ) - verify = 0; - - for (i = 0, pgp = pga; i < npages; i++, pgp++) { - if (!pgp->pg) - continue; - - if (verify) { - int vrc; - - vrc = echo_client_page_debug_check(pgp->pg, - ostid_id(&oa->o_oi), - pgp->off, pgp->count); - if (vrc != 0 && rc == 0) - rc = vrc; - } - __free_page(pgp->pg); - } - kfree(pga); - kfree(pages); - return rc; -} - -static int echo_client_prep_commit(const struct lu_env *env, - struct obd_export *exp, int rw, - struct obdo *oa, struct echo_object *eco, - u64 offset, u64 count, - u64 batch, int async) -{ - struct obd_ioobj ioo; - struct niobuf_local *lnb; - struct niobuf_remote rnb; - u64 off; - u64 npages, tot_pages; - int i, ret = 0, brw_flags = 0; - - if (count <= 0 || (count & (~PAGE_MASK)) != 0) - return -EINVAL; - - npages = batch >> PAGE_SHIFT; - tot_pages = count >> PAGE_SHIFT; - - lnb = kcalloc(npages, sizeof(struct niobuf_local), GFP_NOFS); - if (!lnb) { - ret = -ENOMEM; - goto out; - } - - if (rw == OBD_BRW_WRITE && async) - brw_flags |= OBD_BRW_ASYNC; - - obdo_to_ioobj(oa, &ioo); - - off = offset; - - for (; tot_pages > 0; tot_pages -= npages) { - int lpages; - - if (tot_pages < npages) - npages = tot_pages; - - rnb.rnb_offset = off; - rnb.rnb_len = npages * PAGE_SIZE; - rnb.rnb_flags = brw_flags; - ioo.ioo_bufcnt = 1; - off += npages * PAGE_SIZE; - - lpages = npages; - ret = obd_preprw(env, rw, exp, oa, 1, &ioo, &rnb, &lpages, lnb); - if (ret != 0) - goto out; - - for (i = 0; i < lpages; i++) { - struct page *page = lnb[i].lnb_page; - - /* read past eof? */ - if (!page && lnb[i].lnb_rc == 0) - continue; - - if (async) - lnb[i].lnb_flags |= OBD_BRW_ASYNC; - - if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID || - (oa->o_valid & OBD_MD_FLFLAGS) == 0 || - (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0) - continue; - - if (rw == OBD_BRW_WRITE) - echo_client_page_debug_setup(page, rw, - ostid_id(&oa->o_oi), - lnb[i].lnb_file_offset, - lnb[i].lnb_len); - else - echo_client_page_debug_check(page, - ostid_id(&oa->o_oi), - lnb[i].lnb_file_offset, - lnb[i].lnb_len); - } - - ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, &rnb, npages, lnb, - ret); - if (ret != 0) - goto out; - - /* Reuse env context. */ - lu_context_exit((struct lu_context *)&env->le_ctx); - lu_context_enter((struct lu_context *)&env->le_ctx); - } - -out: - kfree(lnb); - return ret; -} - -static int echo_client_brw_ioctl(const struct lu_env *env, int rw, - struct obd_export *exp, - struct obd_ioctl_data *data) -{ - struct obd_device *obd = class_exp2obd(exp); - struct echo_device *ed = obd2echo_dev(obd); - struct echo_client_obd *ec = ed->ed_ec; - struct obdo *oa = &data->ioc_obdo1; - struct echo_object *eco; - int rc; - int async = 1; - long test_mode; - - LASSERT(oa->o_valid & OBD_MD_FLGROUP); - - rc = echo_get_object(&eco, ed, oa); - if (rc) - return rc; - - oa->o_valid &= ~OBD_MD_FLHANDLE; - - /* OFD/obdfilter works only via prep/commit */ - test_mode = (long)data->ioc_pbuf1; - if (test_mode == 1) - async = 0; - - if (!ed->ed_next && test_mode != 3) { - test_mode = 3; - data->ioc_plen1 = data->ioc_count; - } - - /* Truncate batch size to maximum */ - if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE) - data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE; - - switch (test_mode) { - case 1: - /* fall through */ - case 2: - rc = echo_client_kbrw(ed, rw, oa, eco, data->ioc_offset, - data->ioc_count, async); - break; - case 3: - rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa, eco, - data->ioc_offset, data->ioc_count, - data->ioc_plen1, async); - break; - default: - rc = -EINVAL; - } - echo_put_object(eco); - return rc; -} - -static int -echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg) -{ - struct obd_device *obd = exp->exp_obd; - struct echo_device *ed = obd2echo_dev(obd); - struct echo_client_obd *ec = ed->ed_ec; - struct echo_object *eco; - struct obd_ioctl_data *data = karg; - struct lu_env *env; - struct obdo *oa; - struct lu_fid fid; - int rw = OBD_BRW_READ; - int rc = 0; - - oa = &data->ioc_obdo1; - if (!(oa->o_valid & OBD_MD_FLGROUP)) { - oa->o_valid |= OBD_MD_FLGROUP; - ostid_set_seq_echo(&oa->o_oi); - } - - /* This FID is unpacked just for validation at this point */ - rc = ostid_to_fid(&fid, &oa->o_oi, 0); - if (rc < 0) - return rc; - - env = kzalloc(sizeof(*env), GFP_NOFS); - if (!env) - return -ENOMEM; - - rc = lu_env_init(env, LCT_DT_THREAD); - if (rc) { - rc = -ENOMEM; - goto out; - } - - switch (cmd) { - case OBD_IOC_CREATE: /* may create echo object */ - if (!capable(CAP_SYS_ADMIN)) { - rc = -EPERM; - goto out; - } - - rc = echo_create_object(env, ed, oa); - goto out; - - case OBD_IOC_DESTROY: - if (!capable(CAP_SYS_ADMIN)) { - rc = -EPERM; - goto out; - } - - rc = echo_get_object(&eco, ed, oa); - if (rc == 0) { - rc = obd_destroy(env, ec->ec_exp, oa); - if (rc == 0) - eco->eo_deleted = 1; - echo_put_object(eco); - } - goto out; - - case OBD_IOC_GETATTR: - rc = echo_get_object(&eco, ed, oa); - if (rc == 0) { - rc = obd_getattr(env, ec->ec_exp, oa); - echo_put_object(eco); - } - goto out; - - case OBD_IOC_SETATTR: - if (!capable(CAP_SYS_ADMIN)) { - rc = -EPERM; - goto out; - } - - rc = echo_get_object(&eco, ed, oa); - if (rc == 0) { - rc = obd_setattr(env, ec->ec_exp, oa); - echo_put_object(eco); - } - goto out; - - case OBD_IOC_BRW_WRITE: - if (!capable(CAP_SYS_ADMIN)) { - rc = -EPERM; - goto out; - } - - rw = OBD_BRW_WRITE; - /* fall through */ - case OBD_IOC_BRW_READ: - rc = echo_client_brw_ioctl(env, rw, exp, data); - goto out; - - default: - CERROR("echo_ioctl(): unrecognised ioctl %#x\n", cmd); - rc = -ENOTTY; - goto out; - } - -out: - lu_env_fini(env); - kfree(env); - - return rc; -} - -static int echo_client_setup(const struct lu_env *env, - struct obd_device *obddev, struct lustre_cfg *lcfg) -{ - struct echo_client_obd *ec = &obddev->u.echo_client; - struct obd_device *tgt; - struct obd_uuid echo_uuid = { "ECHO_UUID" }; - struct obd_connect_data *ocd = NULL; - int rc; - - if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { - CERROR("requires a TARGET OBD name\n"); - return -EINVAL; - } - - tgt = class_name2obd(lustre_cfg_string(lcfg, 1)); - if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) { - CERROR("device not attached or not set up (%s)\n", - lustre_cfg_string(lcfg, 1)); - return -EINVAL; - } - - spin_lock_init(&ec->ec_lock); - INIT_LIST_HEAD(&ec->ec_objects); - INIT_LIST_HEAD(&ec->ec_locks); - ec->ec_unique = 0; - - ocd = kzalloc(sizeof(*ocd), GFP_NOFS); - if (!ocd) - return -ENOMEM; - - ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL | - OBD_CONNECT_BRW_SIZE | - OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 | - OBD_CONNECT_64BITHASH | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_FID; - ocd->ocd_brw_size = DT_MAX_BRW_SIZE; - ocd->ocd_version = LUSTRE_VERSION_CODE; - ocd->ocd_group = FID_SEQ_ECHO; - - rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL); - - kfree(ocd); - - if (rc != 0) { - CERROR("fail to connect to device %s\n", - lustre_cfg_string(lcfg, 1)); - return rc; - } - - return rc; -} - -static int echo_client_cleanup(struct obd_device *obddev) -{ - struct echo_client_obd *ec = &obddev->u.echo_client; - int rc; - - if (!list_empty(&obddev->obd_exports)) { - CERROR("still has clients!\n"); - return -EBUSY; - } - - LASSERT(atomic_read(&ec->ec_exp->exp_refcount) > 0); - rc = obd_disconnect(ec->ec_exp); - if (rc != 0) - CERROR("fail to disconnect device: %d\n", rc); - - return rc; -} - -static int echo_client_connect(const struct lu_env *env, - struct obd_export **exp, - struct obd_device *src, struct obd_uuid *cluuid, - struct obd_connect_data *data, void *localdata) -{ - int rc; - struct lustre_handle conn = { 0 }; - - rc = class_connect(&conn, src, cluuid); - if (rc == 0) - *exp = class_conn2export(&conn); - - return rc; -} - -static int echo_client_disconnect(struct obd_export *exp) -{ - int rc; - - if (!exp) { - rc = -EINVAL; - goto out; - } - - rc = class_disconnect(exp); - goto out; - out: - return rc; -} - -static struct obd_ops echo_client_obd_ops = { - .owner = THIS_MODULE, - .iocontrol = echo_client_iocontrol, - .connect = echo_client_connect, - .disconnect = echo_client_disconnect -}; - -static int echo_client_init(void) -{ - int rc; - - rc = lu_kmem_init(echo_caches); - if (rc == 0) { - rc = class_register_type(&echo_client_obd_ops, NULL, - LUSTRE_ECHO_CLIENT_NAME, - &echo_device_type); - if (rc) - lu_kmem_fini(echo_caches); - } - return rc; -} - -static void echo_client_exit(void) -{ - class_unregister_type(LUSTRE_ECHO_CLIENT_NAME); - lu_kmem_fini(echo_caches); -} - -static int __init obdecho_init(void) -{ - int rc; - - LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n"); - - LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0); - - rc = libcfs_setup(); - if (rc) - return rc; - - return echo_client_init(); -} - -static void /*__exit*/ obdecho_exit(void) -{ - echo_client_exit(); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Echo Client test driver"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(obdecho_init); -module_exit(obdecho_exit); - -/** @} echo_client */ diff --git a/drivers/staging/lustre/lustre/obdecho/echo_internal.h b/drivers/staging/lustre/lustre/obdecho/echo_internal.h deleted file mode 100644 index 42faa164fabb..000000000000 --- a/drivers/staging/lustre/lustre/obdecho/echo_internal.h +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Whamcloud, Inc. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/obdecho/echo_internal.h - */ - -#ifndef _ECHO_INTERNAL_H -#define _ECHO_INTERNAL_H - -/* The persistent object (i.e. actually stores stuff!) */ -#define ECHO_PERSISTENT_OBJID 1ULL -#define ECHO_PERSISTENT_SIZE ((__u64)(1 << 20)) - -/* block size to use for data verification */ -#define OBD_ECHO_BLOCK_SIZE (4 << 10) - -#endif diff --git a/drivers/staging/lustre/lustre/osc/Makefile b/drivers/staging/lustre/lustre/osc/Makefile deleted file mode 100644 index 30dec90e64e8..000000000000 --- a/drivers/staging/lustre/lustre/osc/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += osc.o -osc-y := osc_request.o osc_dev.o osc_object.o \ - osc_page.o osc_lock.o osc_io.o osc_quota.o osc_cache.o lproc_osc.o diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c deleted file mode 100644 index 6a705bc5420c..000000000000 --- a/drivers/staging/lustre/lustre/osc/lproc_osc.c +++ /dev/null @@ -1,838 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include -#include "osc_internal.h" - -static ssize_t active_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%d\n", !dev->u.cli.cl_import->imp_deactive); -} - -static ssize_t active_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - if (val > 1) - return -ERANGE; - - /* opposite senses */ - if (dev->u.cli.cl_import->imp_deactive == val) - rc = ptlrpc_set_import_active(dev->u.cli.cl_import, val); - else - CDEBUG(D_CONFIG, "activate %ld: ignoring repeat request\n", - val); - - return count; -} -LUSTRE_RW_ATTR(active); - -static ssize_t max_rpcs_in_flight_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - - return sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight); -} - -static ssize_t max_rpcs_in_flight_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - int rc; - unsigned long val; - int adding, added, req_count; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val < 1 || val > OSC_MAX_RIF_MAX) - return -ERANGE; - - adding = val - cli->cl_max_rpcs_in_flight; - req_count = atomic_read(&osc_pool_req_count); - if (adding > 0 && req_count < osc_reqpool_maxreqcount) { - /* - * There might be some race which will cause over-limit - * allocation, but it is fine. - */ - if (req_count + adding > osc_reqpool_maxreqcount) - adding = osc_reqpool_maxreqcount - req_count; - - added = osc_rq_pool->prp_populate(osc_rq_pool, adding); - atomic_add(added, &osc_pool_req_count); - } - - spin_lock(&cli->cl_loi_list_lock); - cli->cl_max_rpcs_in_flight = val; - client_adjust_max_dirty(cli); - spin_unlock(&cli->cl_loi_list_lock); - - return count; -} -LUSTRE_RW_ATTR(max_rpcs_in_flight); - -static ssize_t max_dirty_mb_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - long val; - int mult; - - spin_lock(&cli->cl_loi_list_lock); - val = cli->cl_dirty_max_pages; - spin_unlock(&cli->cl_loi_list_lock); - - mult = 1 << (20 - PAGE_SHIFT); - return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult); -} - -static ssize_t max_dirty_mb_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - int rc; - unsigned long pages_number; - - rc = kstrtoul(buffer, 10, &pages_number); - if (rc) - return rc; - - pages_number *= 1 << (20 - PAGE_SHIFT); /* MB -> pages */ - - if (pages_number <= 0 || - pages_number >= OSC_MAX_DIRTY_MB_MAX << (20 - PAGE_SHIFT) || - pages_number > totalram_pages / 4) /* 1/4 of RAM */ - return -ERANGE; - - spin_lock(&cli->cl_loi_list_lock); - cli->cl_dirty_max_pages = pages_number; - osc_wake_cache_waiters(cli); - spin_unlock(&cli->cl_loi_list_lock); - - return count; -} -LUSTRE_RW_ATTR(max_dirty_mb); - -static int osc_cached_mb_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *dev = m->private; - struct client_obd *cli = &dev->u.cli; - int shift = 20 - PAGE_SHIFT; - - seq_printf(m, - "used_mb: %ld\n" - "busy_cnt: %ld\n" - "reclaim: %llu\n", - (atomic_long_read(&cli->cl_lru_in_list) + - atomic_long_read(&cli->cl_lru_busy)) >> shift, - atomic_long_read(&cli->cl_lru_busy), - cli->cl_lru_reclaim); - - return 0; -} - -/* shrink the number of caching pages to a specific number */ -static ssize_t osc_cached_mb_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *dev = ((struct seq_file *)file->private_data)->private; - struct client_obd *cli = &dev->u.cli; - long pages_number, rc; - char kernbuf[128]; - int mult; - u64 val; - - if (count >= sizeof(kernbuf)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - kernbuf[count] = 0; - - mult = 1 << (20 - PAGE_SHIFT); - buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) - - kernbuf; - rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult); - if (rc) - return rc; - - if (val > LONG_MAX) - return -ERANGE; - pages_number = (long)val; - - if (pages_number < 0) - return -ERANGE; - - rc = atomic_long_read(&cli->cl_lru_in_list) - pages_number; - if (rc > 0) { - struct lu_env *env; - u16 refcheck; - - env = cl_env_get(&refcheck); - if (!IS_ERR(env)) { - (void)osc_lru_shrink(env, cli, rc, true); - cl_env_put(env, &refcheck); - } - } - - return count; -} - -LPROC_SEQ_FOPS(osc_cached_mb); - -static ssize_t cur_dirty_bytes_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - int len; - - spin_lock(&cli->cl_loi_list_lock); - len = sprintf(buf, "%lu\n", cli->cl_dirty_pages << PAGE_SHIFT); - spin_unlock(&cli->cl_loi_list_lock); - - return len; -} -LUSTRE_RO_ATTR(cur_dirty_bytes); - -static ssize_t cur_grant_bytes_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - int len; - - spin_lock(&cli->cl_loi_list_lock); - len = sprintf(buf, "%lu\n", cli->cl_avail_grant); - spin_unlock(&cli->cl_loi_list_lock); - - return len; -} - -static ssize_t cur_grant_bytes_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &obd->u.cli; - int rc; - unsigned long long val; - - rc = kstrtoull(buffer, 10, &val); - if (rc) - return rc; - - /* this is only for shrinking grant */ - spin_lock(&cli->cl_loi_list_lock); - if (val >= cli->cl_avail_grant) { - spin_unlock(&cli->cl_loi_list_lock); - return -EINVAL; - } - spin_unlock(&cli->cl_loi_list_lock); - - if (cli->cl_import->imp_state == LUSTRE_IMP_FULL) - rc = osc_shrink_grant_to_target(cli, val); - if (rc) - return rc; - return count; -} -LUSTRE_RW_ATTR(cur_grant_bytes); - -static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - int len; - - spin_lock(&cli->cl_loi_list_lock); - len = sprintf(buf, "%lu\n", cli->cl_lost_grant); - spin_unlock(&cli->cl_loi_list_lock); - - return len; -} -LUSTRE_RO_ATTR(cur_lost_grant_bytes); - -static ssize_t grant_shrink_interval_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%d\n", obd->u.cli.cl_grant_shrink_interval); -} - -static ssize_t grant_shrink_interval_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - if (val <= 0) - return -ERANGE; - - obd->u.cli.cl_grant_shrink_interval = val; - - return count; -} -LUSTRE_RW_ATTR(grant_shrink_interval); - -static ssize_t checksums_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%d\n", obd->u.cli.cl_checksum ? 1 : 0); -} - -static ssize_t checksums_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - obd->u.cli.cl_checksum = (val ? 1 : 0); - - return count; -} -LUSTRE_RW_ATTR(checksums); - -static int osc_checksum_type_seq_show(struct seq_file *m, void *v) -{ - struct obd_device *obd = m->private; - int i; - - DECLARE_CKSUM_NAME; - - if (!obd) - return 0; - - for (i = 0; i < ARRAY_SIZE(cksum_name); i++) { - if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0) - continue; - if (obd->u.cli.cl_cksum_type == (1 << i)) - seq_printf(m, "[%s] ", cksum_name[i]); - else - seq_printf(m, "%s ", cksum_name[i]); - } - seq_putc(m, '\n'); - return 0; -} - -static ssize_t osc_checksum_type_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *obd = ((struct seq_file *)file->private_data)->private; - int i; - - DECLARE_CKSUM_NAME; - char kernbuf[10]; - - if (!obd) - return 0; - - if (count > sizeof(kernbuf) - 1) - return -EINVAL; - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - if (count > 0 && kernbuf[count - 1] == '\n') - kernbuf[count - 1] = '\0'; - else - kernbuf[count] = '\0'; - - for (i = 0; i < ARRAY_SIZE(cksum_name); i++) { - if (((1 << i) & obd->u.cli.cl_supp_cksum_types) == 0) - continue; - if (!strcmp(kernbuf, cksum_name[i])) { - obd->u.cli.cl_cksum_type = 1 << i; - return count; - } - } - return -EINVAL; -} - -LPROC_SEQ_FOPS(osc_checksum_type); - -static ssize_t resend_count_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%u\n", atomic_read(&obd->u.cli.cl_resends)); -} - -static ssize_t resend_count_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - int rc; - unsigned long val; - - rc = kstrtoul(buffer, 10, &val); - if (rc) - return rc; - - atomic_set(&obd->u.cli.cl_resends, val); - - return count; -} -LUSTRE_RW_ATTR(resend_count); - -static ssize_t contention_seconds_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct osc_device *od = obd2osc_dev(obd); - - return sprintf(buf, "%u\n", od->od_contention_time); -} - -static ssize_t contention_seconds_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct osc_device *od = obd2osc_dev(obd); - int rc; - int val; - - rc = kstrtoint(buffer, 10, &val); - if (rc) - return rc; - - if (val < 0) - return -EINVAL; - - od->od_contention_time = val; - - return count; -} -LUSTRE_RW_ATTR(contention_seconds); - -static ssize_t lockless_truncate_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct osc_device *od = obd2osc_dev(obd); - - return sprintf(buf, "%u\n", od->od_lockless_truncate); -} - -static ssize_t lockless_truncate_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - struct osc_device *od = obd2osc_dev(obd); - int rc; - unsigned int val; - - rc = kstrtouint(buffer, 10, &val); - if (rc) - return rc; - - od->od_lockless_truncate = val; - - return count; -} -LUSTRE_RW_ATTR(lockless_truncate); - -static ssize_t destroys_in_flight_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kobj); - - return sprintf(buf, "%u\n", - atomic_read(&obd->u.cli.cl_destroy_in_flight)); -} -LUSTRE_RO_ATTR(destroys_in_flight); - -static ssize_t max_pages_per_rpc_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - - return sprintf(buf, "%d\n", cli->cl_max_pages_per_rpc); -} - -static ssize_t max_pages_per_rpc_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - struct obd_connect_data *ocd = &cli->cl_import->imp_connect_data; - int chunk_mask, rc; - unsigned long long val; - - rc = kstrtoull(buffer, 10, &val); - if (rc) - return rc; - - /* if the max_pages is specified in bytes, convert to pages */ - if (val >= ONE_MB_BRW_SIZE) - val >>= PAGE_SHIFT; - - chunk_mask = ~((1 << (cli->cl_chunkbits - PAGE_SHIFT)) - 1); - /* max_pages_per_rpc must be chunk aligned */ - val = (val + ~chunk_mask) & chunk_mask; - if (!val || (ocd->ocd_brw_size && - val > ocd->ocd_brw_size >> PAGE_SHIFT)) { - return -ERANGE; - } - spin_lock(&cli->cl_loi_list_lock); - cli->cl_max_pages_per_rpc = val; - client_adjust_max_dirty(cli); - spin_unlock(&cli->cl_loi_list_lock); - - return count; -} -LUSTRE_RW_ATTR(max_pages_per_rpc); - -static ssize_t unstable_stats_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct obd_device *dev = container_of(kobj, struct obd_device, - obd_kobj); - struct client_obd *cli = &dev->u.cli; - long pages; - int mb; - - pages = atomic_long_read(&cli->cl_unstable_count); - mb = (pages * PAGE_SIZE) >> 20; - - return sprintf(buf, "unstable_pages: %20ld\n" - "unstable_mb: %10d\n", pages, mb); -} -LUSTRE_RO_ATTR(unstable_stats); - -LPROC_SEQ_FOPS_RO_TYPE(osc, connect_flags); -LPROC_SEQ_FOPS_RO_TYPE(osc, server_uuid); -LPROC_SEQ_FOPS_RO_TYPE(osc, conn_uuid); -LPROC_SEQ_FOPS_RO_TYPE(osc, timeouts); -LPROC_SEQ_FOPS_RO_TYPE(osc, state); - -LPROC_SEQ_FOPS_WR_ONLY(osc, ping); - -LPROC_SEQ_FOPS_RW_TYPE(osc, import); -LPROC_SEQ_FOPS_RW_TYPE(osc, pinger_recov); - -static struct lprocfs_vars lprocfs_osc_obd_vars[] = { - { "ping", &osc_ping_fops, NULL, 0222 }, - { "connect_flags", &osc_connect_flags_fops, NULL, 0 }, - /*{ "filegroups", lprocfs_rd_filegroups, NULL, 0 },*/ - { "ost_server_uuid", &osc_server_uuid_fops, NULL, 0 }, - { "ost_conn_uuid", &osc_conn_uuid_fops, NULL, 0 }, - { "osc_cached_mb", &osc_cached_mb_fops, NULL }, - { "checksum_type", &osc_checksum_type_fops, NULL }, - { "timeouts", &osc_timeouts_fops, NULL, 0 }, - { "import", &osc_import_fops, NULL }, - { "state", &osc_state_fops, NULL, 0 }, - { "pinger_recov", &osc_pinger_recov_fops, NULL }, - { NULL } -}; - -#define pct(a, b) (b ? a * 100 / b : 0) - -static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) -{ - struct timespec64 now; - struct obd_device *dev = seq->private; - struct client_obd *cli = &dev->u.cli; - unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; - int i; - - ktime_get_real_ts64(&now); - - spin_lock(&cli->cl_loi_list_lock); - - seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - seq_printf(seq, "read RPCs in flight: %d\n", - cli->cl_r_in_flight); - seq_printf(seq, "write RPCs in flight: %d\n", - cli->cl_w_in_flight); - seq_printf(seq, "pending write pages: %d\n", - atomic_read(&cli->cl_pending_w_pages)); - seq_printf(seq, "pending read pages: %d\n", - atomic_read(&cli->cl_pending_r_pages)); - - seq_puts(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_puts(seq, "pages per rpc rpcs % cum % |"); - seq_puts(seq, " rpcs % cum %\n"); - - read_tot = lprocfs_oh_sum(&cli->cl_read_page_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_page_hist); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_page_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_page_hist.oh_buckets[i]; - - read_cum += r; - write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - 1 << i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, - pct(w, write_tot), - pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; - } - - seq_puts(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_puts(seq, "rpcs in flight rpcs % cum % |"); - seq_puts(seq, " rpcs % cum %\n"); - - read_tot = lprocfs_oh_sum(&cli->cl_read_rpc_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_rpc_hist); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_rpc_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_rpc_hist.oh_buckets[i]; - - read_cum += r; - write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, - pct(w, write_tot), - pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; - } - - seq_puts(seq, "\n\t\t\tread\t\t\twrite\n"); - seq_puts(seq, "offset rpcs % cum % |"); - seq_puts(seq, " rpcs % cum %\n"); - - read_tot = lprocfs_oh_sum(&cli->cl_read_offset_hist); - write_tot = lprocfs_oh_sum(&cli->cl_write_offset_hist); - - read_cum = 0; - write_cum = 0; - for (i = 0; i < OBD_HIST_MAX; i++) { - unsigned long r = cli->cl_read_offset_hist.oh_buckets[i]; - unsigned long w = cli->cl_write_offset_hist.oh_buckets[i]; - - read_cum += r; - write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - (i == 0) ? 0 : 1 << (i - 1), - r, pct(r, read_tot), pct(read_cum, read_tot), - w, pct(w, write_tot), pct(write_cum, write_tot)); - if (read_cum == read_tot && write_cum == write_tot) - break; - } - - spin_unlock(&cli->cl_loi_list_lock); - - return 0; -} - -#undef pct - -static ssize_t osc_rpc_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct obd_device *dev = seq->private; - struct client_obd *cli = &dev->u.cli; - - lprocfs_oh_clear(&cli->cl_read_rpc_hist); - lprocfs_oh_clear(&cli->cl_write_rpc_hist); - lprocfs_oh_clear(&cli->cl_read_page_hist); - lprocfs_oh_clear(&cli->cl_write_page_hist); - lprocfs_oh_clear(&cli->cl_read_offset_hist); - lprocfs_oh_clear(&cli->cl_write_offset_hist); - - return len; -} - -LPROC_SEQ_FOPS(osc_rpc_stats); - -static int osc_stats_seq_show(struct seq_file *seq, void *v) -{ - struct timespec64 now; - struct obd_device *dev = seq->private; - struct osc_stats *stats = &obd2osc_dev(dev)->od_stats; - - ktime_get_real_ts64(&now); - - seq_printf(seq, "snapshot_time: %llu.%9lu (secs.usecs)\n", - (s64)now.tv_sec, (unsigned long)now.tv_nsec); - seq_printf(seq, "lockless_write_bytes\t\t%llu\n", - stats->os_lockless_writes); - seq_printf(seq, "lockless_read_bytes\t\t%llu\n", - stats->os_lockless_reads); - seq_printf(seq, "lockless_truncate\t\t%llu\n", - stats->os_lockless_truncates); - return 0; -} - -static ssize_t osc_stats_seq_write(struct file *file, - const char __user *buf, - size_t len, loff_t *off) -{ - struct seq_file *seq = file->private_data; - struct obd_device *dev = seq->private; - struct osc_stats *stats = &obd2osc_dev(dev)->od_stats; - - memset(stats, 0, sizeof(*stats)); - return len; -} - -LPROC_SEQ_FOPS(osc_stats); - -void lproc_osc_attach_seqstat(struct obd_device *dev) -{ - debugfs_create_file("osc_stats", 0644, dev->obd_debugfs_entry, dev, - &osc_stats_fops); - debugfs_create_file("rpc_stats", 0644, dev->obd_debugfs_entry, dev, - &osc_rpc_stats_fops); -} - -static struct attribute *osc_attrs[] = { - &lustre_attr_active.attr, - &lustre_attr_checksums.attr, - &lustre_attr_contention_seconds.attr, - &lustre_attr_cur_dirty_bytes.attr, - &lustre_attr_cur_grant_bytes.attr, - &lustre_attr_cur_lost_grant_bytes.attr, - &lustre_attr_destroys_in_flight.attr, - &lustre_attr_grant_shrink_interval.attr, - &lustre_attr_lockless_truncate.attr, - &lustre_attr_max_dirty_mb.attr, - &lustre_attr_max_pages_per_rpc.attr, - &lustre_attr_max_rpcs_in_flight.attr, - &lustre_attr_resend_count.attr, - &lustre_attr_unstable_stats.attr, - NULL, -}; - -static const struct attribute_group osc_attr_group = { - .attrs = osc_attrs, -}; - -void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars) -{ - lvars->sysfs_vars = &osc_attr_group; - lvars->obd_vars = lprocfs_osc_obd_vars; -} diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c deleted file mode 100644 index f26983004843..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_cache.c +++ /dev/null @@ -1,3306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - * - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * osc cache management. - * - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_OSC - -#include "osc_cl_internal.h" -#include "osc_internal.h" - -static int extent_debug; /* set it to be true for more debug */ - -static void osc_update_pending(struct osc_object *obj, int cmd, int delta); -static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, - enum osc_extent_state state); -static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, - struct osc_async_page *oap, int sent, int rc); -static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap, - int cmd); -static int osc_refresh_count(const struct lu_env *env, - struct osc_async_page *oap, int cmd); -static int osc_io_unplug_async(const struct lu_env *env, - struct client_obd *cli, struct osc_object *osc); -static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, - unsigned int lost_grant); - -static void osc_extent_tree_dump0(int level, struct osc_object *obj, - const char *func, int line); -#define osc_extent_tree_dump(lvl, obj) \ - osc_extent_tree_dump0(lvl, obj, __func__, __LINE__) - -/** \addtogroup osc - * @{ - */ - -/* ------------------ osc extent ------------------ */ -static inline char *ext_flags(struct osc_extent *ext, char *flags) -{ - char *buf = flags; - *buf++ = ext->oe_rw ? 'r' : 'w'; - if (ext->oe_intree) - *buf++ = 'i'; - if (ext->oe_sync) - *buf++ = 'S'; - if (ext->oe_srvlock) - *buf++ = 's'; - if (ext->oe_hp) - *buf++ = 'h'; - if (ext->oe_urgent) - *buf++ = 'u'; - if (ext->oe_memalloc) - *buf++ = 'm'; - if (ext->oe_trunc_pending) - *buf++ = 't'; - if (ext->oe_fsync_wait) - *buf++ = 'Y'; - *buf = 0; - return flags; -} - -static inline char list_empty_marker(struct list_head *list) -{ - return list_empty(list) ? '-' : '+'; -} - -#define EXTSTR "[%lu -> %lu/%lu]" -#define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end -static const char *oes_strings[] = { - "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL }; - -#define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do { \ - struct osc_extent *__ext = (extent); \ - char __buf[16]; \ - \ - CDEBUG(lvl, \ - "extent %p@{" EXTSTR ", " \ - "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt, \ - /* ----- extent part 0 ----- */ \ - __ext, EXTPARA(__ext), \ - /* ----- part 1 ----- */ \ - atomic_read(&__ext->oe_refc), \ - atomic_read(&__ext->oe_users), \ - list_empty_marker(&__ext->oe_link), \ - oes_strings[__ext->oe_state], ext_flags(__ext, __buf), \ - __ext->oe_obj, \ - /* ----- part 2 ----- */ \ - __ext->oe_grants, __ext->oe_nr_pages, \ - list_empty_marker(&__ext->oe_pages), \ - waitqueue_active(&__ext->oe_waitq) ? '+' : '-', \ - __ext->oe_dlmlock, __ext->oe_mppr, __ext->oe_owner, \ - /* ----- part 4 ----- */ \ - ## __VA_ARGS__); \ - if (lvl == D_ERROR && __ext->oe_dlmlock) \ - LDLM_ERROR(__ext->oe_dlmlock, "extent: %p", __ext); \ - else \ - LDLM_DEBUG(__ext->oe_dlmlock, "extent: %p", __ext); \ -} while (0) - -#undef EASSERTF -#define EASSERTF(expr, ext, fmt, args...) do { \ - if (!(expr)) { \ - OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args); \ - osc_extent_tree_dump(D_ERROR, (ext)->oe_obj); \ - LASSERT(expr); \ - } \ -} while (0) - -#undef EASSERT -#define EASSERT(expr, ext) EASSERTF(expr, ext, "\n") - -static inline struct osc_extent *rb_extent(struct rb_node *n) -{ - return rb_entry_safe(n, struct osc_extent, oe_node); -} - -static inline struct osc_extent *next_extent(struct osc_extent *ext) -{ - if (!ext) - return NULL; - - LASSERT(ext->oe_intree); - return rb_extent(rb_next(&ext->oe_node)); -} - -static inline struct osc_extent *prev_extent(struct osc_extent *ext) -{ - if (!ext) - return NULL; - - LASSERT(ext->oe_intree); - return rb_extent(rb_prev(&ext->oe_node)); -} - -static inline struct osc_extent *first_extent(struct osc_object *obj) -{ - return rb_extent(rb_first(&obj->oo_root)); -} - -/* object must be locked by caller. */ -static int osc_extent_sanity_check0(struct osc_extent *ext, - const char *func, const int line) -{ - struct osc_object *obj = ext->oe_obj; - struct osc_async_page *oap; - size_t page_count; - int rc = 0; - - if (!osc_object_is_locked(obj)) { - rc = 9; - goto out; - } - - if (ext->oe_state >= OES_STATE_MAX) { - rc = 10; - goto out; - } - - if (atomic_read(&ext->oe_refc) <= 0) { - rc = 20; - goto out; - } - - if (atomic_read(&ext->oe_refc) < atomic_read(&ext->oe_users)) { - rc = 30; - goto out; - } - - switch (ext->oe_state) { - case OES_INV: - if (ext->oe_nr_pages > 0 || !list_empty(&ext->oe_pages)) - rc = 35; - else - rc = 0; - goto out; - case OES_ACTIVE: - if (atomic_read(&ext->oe_users) == 0) { - rc = 40; - goto out; - } - if (ext->oe_hp) { - rc = 50; - goto out; - } - if (ext->oe_fsync_wait && !ext->oe_urgent) { - rc = 55; - goto out; - } - break; - case OES_CACHE: - if (ext->oe_grants == 0) { - rc = 60; - goto out; - } - if (ext->oe_fsync_wait && !ext->oe_urgent && !ext->oe_hp) { - rc = 65; - goto out; - } - /* fall through */ - default: - if (atomic_read(&ext->oe_users) > 0) { - rc = 70; - goto out; - } - } - - if (ext->oe_max_end < ext->oe_end || ext->oe_end < ext->oe_start) { - rc = 80; - goto out; - } - - if (ext->oe_sync && ext->oe_grants > 0) { - rc = 90; - goto out; - } - - if (ext->oe_dlmlock && !ldlm_is_failed(ext->oe_dlmlock)) { - struct ldlm_extent *extent; - - extent = &ext->oe_dlmlock->l_policy_data.l_extent; - if (!(extent->start <= cl_offset(osc2cl(obj), ext->oe_start) && - extent->end >= cl_offset(osc2cl(obj), ext->oe_max_end))) { - rc = 100; - goto out; - } - - if (!(ext->oe_dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP))) { - rc = 102; - goto out; - } - } - - if (ext->oe_nr_pages > ext->oe_mppr) { - rc = 105; - goto out; - } - - /* Do not verify page list if extent is in RPC. This is because an - * in-RPC extent is supposed to be exclusively accessible w/o lock. - */ - if (ext->oe_state > OES_CACHE) { - rc = 0; - goto out; - } - - if (!extent_debug) { - rc = 0; - goto out; - } - - page_count = 0; - list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - pgoff_t index = osc_index(oap2osc(oap)); - ++page_count; - if (index > ext->oe_end || index < ext->oe_start) { - rc = 110; - goto out; - } - } - if (page_count != ext->oe_nr_pages) { - rc = 120; - goto out; - } - -out: - if (rc != 0) - OSC_EXTENT_DUMP(D_ERROR, ext, - "%s:%d sanity check %p failed with rc = %d\n", - func, line, ext, rc); - return rc; -} - -#define sanity_check_nolock(ext) \ - osc_extent_sanity_check0(ext, __func__, __LINE__) - -#define sanity_check(ext) ({ \ - int __res; \ - osc_object_lock((ext)->oe_obj); \ - __res = sanity_check_nolock(ext); \ - osc_object_unlock((ext)->oe_obj); \ - __res; \ -}) - -/** - * sanity check - to make sure there is no overlapped extent in the tree. - */ -static int osc_extent_is_overlapped(struct osc_object *obj, - struct osc_extent *ext) -{ - struct osc_extent *tmp; - - LASSERT(osc_object_is_locked(obj)); - - if (!extent_debug) - return 0; - - for (tmp = first_extent(obj); tmp; tmp = next_extent(tmp)) { - if (tmp == ext) - continue; - if (tmp->oe_end >= ext->oe_start && - tmp->oe_start <= ext->oe_end) - return 1; - } - return 0; -} - -static void osc_extent_state_set(struct osc_extent *ext, int state) -{ - LASSERT(osc_object_is_locked(ext->oe_obj)); - LASSERT(state >= OES_INV && state < OES_STATE_MAX); - - /* Never try to sanity check a state changing extent :-) */ - /* LASSERT(sanity_check_nolock(ext) == 0); */ - - /* TODO: validate the state machine */ - ext->oe_state = state; - wake_up_all(&ext->oe_waitq); -} - -static struct osc_extent *osc_extent_alloc(struct osc_object *obj) -{ - struct osc_extent *ext; - - ext = kmem_cache_zalloc(osc_extent_kmem, GFP_NOFS); - if (!ext) - return NULL; - - RB_CLEAR_NODE(&ext->oe_node); - ext->oe_obj = obj; - cl_object_get(osc2cl(obj)); - atomic_set(&ext->oe_refc, 1); - atomic_set(&ext->oe_users, 0); - INIT_LIST_HEAD(&ext->oe_link); - ext->oe_state = OES_INV; - INIT_LIST_HEAD(&ext->oe_pages); - init_waitqueue_head(&ext->oe_waitq); - ext->oe_dlmlock = NULL; - - return ext; -} - -static void osc_extent_free(struct osc_extent *ext) -{ - kmem_cache_free(osc_extent_kmem, ext); -} - -static struct osc_extent *osc_extent_get(struct osc_extent *ext) -{ - LASSERT(atomic_read(&ext->oe_refc) >= 0); - atomic_inc(&ext->oe_refc); - return ext; -} - -static void osc_extent_put(const struct lu_env *env, struct osc_extent *ext) -{ - LASSERT(atomic_read(&ext->oe_refc) > 0); - if (atomic_dec_and_test(&ext->oe_refc)) { - LASSERT(list_empty(&ext->oe_link)); - LASSERT(atomic_read(&ext->oe_users) == 0); - LASSERT(ext->oe_state == OES_INV); - LASSERT(!ext->oe_intree); - - if (ext->oe_dlmlock) { - lu_ref_add(&ext->oe_dlmlock->l_reference, - "osc_extent", ext); - LDLM_LOCK_PUT(ext->oe_dlmlock); - ext->oe_dlmlock = NULL; - } - cl_object_put(env, osc2cl(ext->oe_obj)); - osc_extent_free(ext); - } -} - -/** - * osc_extent_put_trust() is a special version of osc_extent_put() when - * it's known that the caller is not the last user. This is to address the - * problem of lacking of lu_env ;-). - */ -static void osc_extent_put_trust(struct osc_extent *ext) -{ - LASSERT(atomic_read(&ext->oe_refc) > 1); - LASSERT(osc_object_is_locked(ext->oe_obj)); - atomic_dec(&ext->oe_refc); -} - -/** - * Return the extent which includes pgoff @index, or return the greatest - * previous extent in the tree. - */ -static struct osc_extent *osc_extent_search(struct osc_object *obj, - pgoff_t index) -{ - struct rb_node *n = obj->oo_root.rb_node; - struct osc_extent *tmp, *p = NULL; - - LASSERT(osc_object_is_locked(obj)); - while (n) { - tmp = rb_extent(n); - if (index < tmp->oe_start) { - n = n->rb_left; - } else if (index > tmp->oe_end) { - p = rb_extent(n); - n = n->rb_right; - } else { - return tmp; - } - } - return p; -} - -/* - * Return the extent covering @index, otherwise return NULL. - * caller must have held object lock. - */ -static struct osc_extent *osc_extent_lookup(struct osc_object *obj, - pgoff_t index) -{ - struct osc_extent *ext; - - ext = osc_extent_search(obj, index); - if (ext && ext->oe_start <= index && index <= ext->oe_end) - return osc_extent_get(ext); - return NULL; -} - -/* caller must have held object lock. */ -static void osc_extent_insert(struct osc_object *obj, struct osc_extent *ext) -{ - struct rb_node **n = &obj->oo_root.rb_node; - struct rb_node *parent = NULL; - struct osc_extent *tmp; - - LASSERT(ext->oe_intree == 0); - LASSERT(ext->oe_obj == obj); - LASSERT(osc_object_is_locked(obj)); - while (*n) { - tmp = rb_extent(*n); - parent = *n; - - if (ext->oe_end < tmp->oe_start) - n = &(*n)->rb_left; - else if (ext->oe_start > tmp->oe_end) - n = &(*n)->rb_right; - else - EASSERTF(0, tmp, EXTSTR "\n", EXTPARA(ext)); - } - rb_link_node(&ext->oe_node, parent, n); - rb_insert_color(&ext->oe_node, &obj->oo_root); - osc_extent_get(ext); - ext->oe_intree = 1; -} - -/* caller must have held object lock. */ -static void osc_extent_erase(struct osc_extent *ext) -{ - struct osc_object *obj = ext->oe_obj; - - LASSERT(osc_object_is_locked(obj)); - if (ext->oe_intree) { - rb_erase(&ext->oe_node, &obj->oo_root); - ext->oe_intree = 0; - /* rbtree held a refcount */ - osc_extent_put_trust(ext); - } -} - -static struct osc_extent *osc_extent_hold(struct osc_extent *ext) -{ - struct osc_object *obj = ext->oe_obj; - - LASSERT(osc_object_is_locked(obj)); - LASSERT(ext->oe_state == OES_ACTIVE || ext->oe_state == OES_CACHE); - if (ext->oe_state == OES_CACHE) { - osc_extent_state_set(ext, OES_ACTIVE); - osc_update_pending(obj, OBD_BRW_WRITE, -ext->oe_nr_pages); - } - atomic_inc(&ext->oe_users); - list_del_init(&ext->oe_link); - return osc_extent_get(ext); -} - -static void __osc_extent_remove(struct osc_extent *ext) -{ - LASSERT(osc_object_is_locked(ext->oe_obj)); - LASSERT(list_empty(&ext->oe_pages)); - osc_extent_erase(ext); - list_del_init(&ext->oe_link); - osc_extent_state_set(ext, OES_INV); - OSC_EXTENT_DUMP(D_CACHE, ext, "destroyed.\n"); -} - -static void osc_extent_remove(struct osc_extent *ext) -{ - struct osc_object *obj = ext->oe_obj; - - osc_object_lock(obj); - __osc_extent_remove(ext); - osc_object_unlock(obj); -} - -/** - * This function is used to merge extents to get better performance. It checks - * if @cur and @victim are contiguous at chunk level. - */ -static int osc_extent_merge(const struct lu_env *env, struct osc_extent *cur, - struct osc_extent *victim) -{ - struct osc_object *obj = cur->oe_obj; - pgoff_t chunk_start; - pgoff_t chunk_end; - int ppc_bits; - - LASSERT(cur->oe_state == OES_CACHE); - LASSERT(osc_object_is_locked(obj)); - if (!victim) - return -EINVAL; - - if (victim->oe_state != OES_CACHE || victim->oe_fsync_wait) - return -EBUSY; - - if (cur->oe_max_end != victim->oe_max_end) - return -ERANGE; - - LASSERT(cur->oe_dlmlock == victim->oe_dlmlock); - ppc_bits = osc_cli(obj)->cl_chunkbits - PAGE_SHIFT; - chunk_start = cur->oe_start >> ppc_bits; - chunk_end = cur->oe_end >> ppc_bits; - if (chunk_start != (victim->oe_end >> ppc_bits) + 1 && - chunk_end + 1 != victim->oe_start >> ppc_bits) - return -ERANGE; - - OSC_EXTENT_DUMP(D_CACHE, victim, "will be merged by %p.\n", cur); - - cur->oe_start = min(cur->oe_start, victim->oe_start); - cur->oe_end = max(cur->oe_end, victim->oe_end); - cur->oe_grants += victim->oe_grants; - cur->oe_nr_pages += victim->oe_nr_pages; - /* only the following bits are needed to merge */ - cur->oe_urgent |= victim->oe_urgent; - cur->oe_memalloc |= victim->oe_memalloc; - list_splice_init(&victim->oe_pages, &cur->oe_pages); - list_del_init(&victim->oe_link); - victim->oe_nr_pages = 0; - - osc_extent_get(victim); - __osc_extent_remove(victim); - osc_extent_put(env, victim); - - OSC_EXTENT_DUMP(D_CACHE, cur, "after merging %p.\n", victim); - return 0; -} - -/** - * Drop user count of osc_extent, and unplug IO asynchronously. - */ -void osc_extent_release(const struct lu_env *env, struct osc_extent *ext) -{ - struct osc_object *obj = ext->oe_obj; - - LASSERT(atomic_read(&ext->oe_users) > 0); - LASSERT(sanity_check(ext) == 0); - LASSERT(ext->oe_grants > 0); - - if (atomic_dec_and_lock(&ext->oe_users, &obj->oo_lock)) { - LASSERT(ext->oe_state == OES_ACTIVE); - if (ext->oe_trunc_pending) { - /* a truncate process is waiting for this extent. - * This may happen due to a race, check - * osc_cache_truncate_start(). - */ - osc_extent_state_set(ext, OES_TRUNC); - ext->oe_trunc_pending = 0; - } else { - osc_extent_state_set(ext, OES_CACHE); - osc_update_pending(obj, OBD_BRW_WRITE, - ext->oe_nr_pages); - - /* try to merge the previous and next extent. */ - osc_extent_merge(env, ext, prev_extent(ext)); - osc_extent_merge(env, ext, next_extent(ext)); - - if (ext->oe_urgent) - list_move_tail(&ext->oe_link, - &obj->oo_urgent_exts); - } - osc_object_unlock(obj); - - osc_io_unplug_async(env, osc_cli(obj), obj); - } - osc_extent_put(env, ext); -} - -static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2) -{ - return !(ex1->oe_end < ex2->oe_start || ex2->oe_end < ex1->oe_start); -} - -/** - * Find or create an extent which includes @index, core function to manage - * extent tree. - */ -static struct osc_extent *osc_extent_find(const struct lu_env *env, - struct osc_object *obj, pgoff_t index, - unsigned int *grants) -{ - struct client_obd *cli = osc_cli(obj); - struct osc_lock *olck; - struct cl_lock_descr *descr; - struct osc_extent *cur; - struct osc_extent *ext; - struct osc_extent *conflict = NULL; - struct osc_extent *found = NULL; - pgoff_t chunk; - pgoff_t max_end; - unsigned int max_pages; /* max_pages_per_rpc */ - unsigned int chunksize; - int ppc_bits; /* pages per chunk bits */ - pgoff_t chunk_mask; - int rc; - - cur = osc_extent_alloc(obj); - if (!cur) - return ERR_PTR(-ENOMEM); - - olck = osc_env_io(env)->oi_write_osclock; - LASSERTF(olck, "page %lu is not covered by lock\n", index); - LASSERT(olck->ols_state == OLS_GRANTED); - - descr = &olck->ols_cl.cls_lock->cll_descr; - LASSERT(descr->cld_mode >= CLM_WRITE); - - LASSERT(cli->cl_chunkbits >= PAGE_SHIFT); - ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; - chunk_mask = ~((1 << ppc_bits) - 1); - chunksize = 1 << cli->cl_chunkbits; - chunk = index >> ppc_bits; - - /* align end to rpc edge, rpc size may not be a power 2 integer. */ - max_pages = cli->cl_max_pages_per_rpc; - LASSERT((max_pages & ~chunk_mask) == 0); - max_end = index - (index % max_pages) + max_pages - 1; - max_end = min_t(pgoff_t, max_end, descr->cld_end); - - /* initialize new extent by parameters so far */ - cur->oe_max_end = max_end; - cur->oe_start = index & chunk_mask; - cur->oe_end = ((index + ~chunk_mask + 1) & chunk_mask) - 1; - if (cur->oe_start < descr->cld_start) - cur->oe_start = descr->cld_start; - if (cur->oe_end > max_end) - cur->oe_end = max_end; - cur->oe_grants = 0; - cur->oe_mppr = max_pages; - if (olck->ols_dlmlock) { - LASSERT(olck->ols_hold); - cur->oe_dlmlock = LDLM_LOCK_GET(olck->ols_dlmlock); - lu_ref_add(&olck->ols_dlmlock->l_reference, "osc_extent", cur); - } - - /* grants has been allocated by caller */ - LASSERTF(*grants >= chunksize + cli->cl_extent_tax, - "%u/%u/%u.\n", *grants, chunksize, cli->cl_extent_tax); - LASSERTF((max_end - cur->oe_start) < max_pages, EXTSTR "\n", - EXTPARA(cur)); - -restart: - osc_object_lock(obj); - ext = osc_extent_search(obj, cur->oe_start); - if (!ext) - ext = first_extent(obj); - while (ext) { - pgoff_t ext_chk_start = ext->oe_start >> ppc_bits; - pgoff_t ext_chk_end = ext->oe_end >> ppc_bits; - - LASSERT(sanity_check_nolock(ext) == 0); - if (chunk > ext_chk_end + 1) - break; - - /* if covering by different locks, no chance to match */ - if (olck->ols_dlmlock != ext->oe_dlmlock) { - EASSERTF(!overlapped(ext, cur), ext, - EXTSTR "\n", EXTPARA(cur)); - - ext = next_extent(ext); - continue; - } - - /* discontiguous chunks? */ - if (chunk + 1 < ext_chk_start) { - ext = next_extent(ext); - continue; - } - - /* ok, from now on, ext and cur have these attrs: - * 1. covered by the same lock - * 2. contiguous at chunk level or overlapping. - */ - - if (overlapped(ext, cur)) { - /* cur is the minimum unit, so overlapping means - * full contain. - */ - EASSERTF((ext->oe_start <= cur->oe_start && - ext->oe_end >= cur->oe_end), - ext, EXTSTR "\n", EXTPARA(cur)); - - if (ext->oe_state > OES_CACHE || ext->oe_fsync_wait) { - /* for simplicity, we wait for this extent to - * finish before going forward. - */ - conflict = osc_extent_get(ext); - break; - } - - found = osc_extent_hold(ext); - break; - } - - /* non-overlapped extent */ - if (ext->oe_state != OES_CACHE || ext->oe_fsync_wait) { - /* we can't do anything for a non OES_CACHE extent, or - * if there is someone waiting for this extent to be - * flushed, try next one. - */ - ext = next_extent(ext); - continue; - } - - /* check if they belong to the same rpc slot before trying to - * merge. the extents are not overlapped and contiguous at - * chunk level to get here. - */ - if (ext->oe_max_end != max_end) { - /* if they don't belong to the same RPC slot or - * max_pages_per_rpc has ever changed, do not merge. - */ - ext = next_extent(ext); - continue; - } - - /* it's required that an extent must be contiguous at chunk - * level so that we know the whole extent is covered by grant - * (the pages in the extent are NOT required to be contiguous). - * Otherwise, it will be too much difficult to know which - * chunks have grants allocated. - */ - - /* try to do front merge - extend ext's start */ - if (chunk + 1 == ext_chk_start) { - /* ext must be chunk size aligned */ - EASSERT((ext->oe_start & ~chunk_mask) == 0, ext); - - /* pull ext's start back to cover cur */ - ext->oe_start = cur->oe_start; - ext->oe_grants += chunksize; - LASSERT(*grants >= chunksize); - *grants -= chunksize; - - found = osc_extent_hold(ext); - } else if (chunk == ext_chk_end + 1) { - /* rear merge */ - ext->oe_end = cur->oe_end; - ext->oe_grants += chunksize; - LASSERT(*grants >= chunksize); - *grants -= chunksize; - - /* try to merge with the next one because we just fill - * in a gap - */ - if (osc_extent_merge(env, ext, next_extent(ext)) == 0) - /* we can save extent tax from next extent */ - *grants += cli->cl_extent_tax; - - found = osc_extent_hold(ext); - } - if (found) - break; - - ext = next_extent(ext); - } - - osc_extent_tree_dump(D_CACHE, obj); - if (found) { - LASSERT(!conflict); - if (!IS_ERR(found)) { - LASSERT(found->oe_dlmlock == cur->oe_dlmlock); - OSC_EXTENT_DUMP(D_CACHE, found, - "found caching ext for %lu.\n", index); - } - } else if (!conflict) { - /* create a new extent */ - EASSERT(osc_extent_is_overlapped(obj, cur) == 0, cur); - cur->oe_grants = chunksize + cli->cl_extent_tax; - LASSERT(*grants >= cur->oe_grants); - *grants -= cur->oe_grants; - - cur->oe_state = OES_CACHE; - found = osc_extent_hold(cur); - osc_extent_insert(obj, cur); - OSC_EXTENT_DUMP(D_CACHE, cur, "add into tree %lu/%lu.\n", - index, descr->cld_end); - } - osc_object_unlock(obj); - - if (conflict) { - LASSERT(!found); - - /* waiting for IO to finish. Please notice that it's impossible - * to be an OES_TRUNC extent. - */ - rc = osc_extent_wait(env, conflict, OES_INV); - osc_extent_put(env, conflict); - conflict = NULL; - if (rc < 0) { - found = ERR_PTR(rc); - goto out; - } - - goto restart; - } - -out: - osc_extent_put(env, cur); - return found; -} - -/** - * Called when IO is finished to an extent. - */ -int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, - int sent, int rc) -{ - struct client_obd *cli = osc_cli(ext->oe_obj); - struct osc_async_page *oap; - struct osc_async_page *tmp; - int nr_pages = ext->oe_nr_pages; - int lost_grant = 0; - int blocksize = cli->cl_import->imp_obd->obd_osfs.os_bsize ? : 4096; - __u64 last_off = 0; - int last_count = -1; - - OSC_EXTENT_DUMP(D_CACHE, ext, "extent finished.\n"); - - ext->oe_rc = rc ?: ext->oe_nr_pages; - EASSERT(ergo(rc == 0, ext->oe_state == OES_RPC), ext); - - osc_lru_add_batch(cli, &ext->oe_pages); - list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { - list_del_init(&oap->oap_rpc_item); - list_del_init(&oap->oap_pending_item); - if (last_off <= oap->oap_obj_off) { - last_off = oap->oap_obj_off; - last_count = oap->oap_count; - } - - --ext->oe_nr_pages; - osc_ap_completion(env, cli, oap, sent, rc); - } - EASSERT(ext->oe_nr_pages == 0, ext); - - if (!sent) { - lost_grant = ext->oe_grants; - } else if (blocksize < PAGE_SIZE && - last_count != PAGE_SIZE) { - /* For short writes we shouldn't count parts of pages that - * span a whole chunk on the OST side, or our accounting goes - * wrong. Should match the code in filter_grant_check. - */ - int offset = last_off & ~PAGE_MASK; - int count = last_count + (offset & (blocksize - 1)); - int end = (offset + last_count) & (blocksize - 1); - - if (end) - count += blocksize - end; - - lost_grant = PAGE_SIZE - count; - } - if (ext->oe_grants > 0) - osc_free_grant(cli, nr_pages, lost_grant); - - osc_extent_remove(ext); - /* put the refcount for RPC */ - osc_extent_put(env, ext); - return 0; -} - -static int extent_wait_cb(struct osc_extent *ext, enum osc_extent_state state) -{ - int ret; - - osc_object_lock(ext->oe_obj); - ret = ext->oe_state == state; - osc_object_unlock(ext->oe_obj); - - return ret; -} - -/** - * Wait for the extent's state to become @state. - */ -static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext, - enum osc_extent_state state) -{ - struct osc_object *obj = ext->oe_obj; - int rc = 0; - - osc_object_lock(obj); - LASSERT(sanity_check_nolock(ext) == 0); - /* `Kick' this extent only if the caller is waiting for it to be - * written out. - */ - if (state == OES_INV && !ext->oe_urgent && !ext->oe_hp && - !ext->oe_trunc_pending) { - if (ext->oe_state == OES_ACTIVE) { - ext->oe_urgent = 1; - } else if (ext->oe_state == OES_CACHE) { - ext->oe_urgent = 1; - osc_extent_hold(ext); - rc = 1; - } - } - osc_object_unlock(obj); - if (rc == 1) - osc_extent_release(env, ext); - - /* wait for the extent until its state becomes @state */ - rc = wait_event_idle_timeout(ext->oe_waitq, - extent_wait_cb(ext, state), 600 * HZ); - if (rc == 0) { - OSC_EXTENT_DUMP(D_ERROR, ext, - "%s: wait ext to %u timedout, recovery in progress?\n", - cli_name(osc_cli(obj)), state); - - wait_event_idle(ext->oe_waitq, extent_wait_cb(ext, state)); - } - if (ext->oe_rc < 0) - rc = ext->oe_rc; - else - rc = 0; - return rc; -} - -/** - * Discard pages with index greater than @size. If @ext is overlapped with - * @size, then partial truncate happens. - */ -static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, - bool partial) -{ - struct lu_env *env; - struct cl_io *io; - struct osc_object *obj = ext->oe_obj; - struct client_obd *cli = osc_cli(obj); - struct osc_async_page *oap; - struct osc_async_page *tmp; - int pages_in_chunk = 0; - int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; - __u64 trunc_chunk = trunc_index >> ppc_bits; - int grants = 0; - int nr_pages = 0; - int rc = 0; - u16 refcheck; - - LASSERT(sanity_check(ext) == 0); - EASSERT(ext->oe_state == OES_TRUNC, ext); - EASSERT(!ext->oe_urgent, ext); - - /* Request new lu_env. - * We can't use that env from osc_cache_truncate_start() because - * it's from lov_io_sub and not fully initialized. - */ - env = cl_env_get(&refcheck); - io = &osc_env_info(env)->oti_io; - io->ci_obj = cl_object_top(osc2cl(obj)); - io->ci_ignore_layout = 1; - rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (rc < 0) - goto out; - - /* discard all pages with index greater then trunc_index */ - list_for_each_entry_safe(oap, tmp, &ext->oe_pages, oap_pending_item) { - pgoff_t index = osc_index(oap2osc(oap)); - struct cl_page *page = oap2cl_page(oap); - - LASSERT(list_empty(&oap->oap_rpc_item)); - - /* only discard the pages with their index greater than - * trunc_index, and ... - */ - if (index < trunc_index || - (index == trunc_index && partial)) { - /* accounting how many pages remaining in the chunk - * so that we can calculate grants correctly. */ - if (index >> ppc_bits == trunc_chunk) - ++pages_in_chunk; - continue; - } - - list_del_init(&oap->oap_pending_item); - - cl_page_get(page); - lu_ref_add(&page->cp_reference, "truncate", current); - - if (cl_page_own(env, io, page) == 0) { - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); - LASSERT(0); - } - - lu_ref_del(&page->cp_reference, "truncate", current); - cl_page_put(env, page); - - --ext->oe_nr_pages; - ++nr_pages; - } - EASSERTF(ergo(ext->oe_start >= trunc_index + !!partial, - ext->oe_nr_pages == 0), - ext, "trunc_index %lu, partial %d\n", trunc_index, partial); - - osc_object_lock(obj); - if (ext->oe_nr_pages == 0) { - LASSERT(pages_in_chunk == 0); - grants = ext->oe_grants; - ext->oe_grants = 0; - } else { /* calculate how many grants we can free */ - int chunks = (ext->oe_end >> ppc_bits) - trunc_chunk; - pgoff_t last_index; - - /* if there is no pages in this chunk, we can also free grants - * for the last chunk - */ - if (pages_in_chunk == 0) { - /* if this is the 1st chunk and no pages in this chunk, - * ext->oe_nr_pages must be zero, so we should be in - * the other if-clause. - */ - LASSERT(trunc_chunk > 0); - --trunc_chunk; - ++chunks; - } - - /* this is what we can free from this extent */ - grants = chunks << cli->cl_chunkbits; - ext->oe_grants -= grants; - last_index = ((trunc_chunk + 1) << ppc_bits) - 1; - ext->oe_end = min(last_index, ext->oe_max_end); - LASSERT(ext->oe_end >= ext->oe_start); - LASSERT(ext->oe_grants > 0); - } - osc_object_unlock(obj); - - if (grants > 0 || nr_pages > 0) - osc_free_grant(cli, nr_pages, grants); - -out: - cl_io_fini(env, io); - cl_env_put(env, &refcheck); - return rc; -} - -/** - * This function is used to make the extent prepared for transfer. - * A race with flushing page - ll_writepage() has to be handled cautiously. - */ -static int osc_extent_make_ready(const struct lu_env *env, - struct osc_extent *ext) -{ - struct osc_async_page *oap; - struct osc_async_page *last = NULL; - struct osc_object *obj = ext->oe_obj; - unsigned int page_count = 0; - int rc; - - /* we're going to grab page lock, so object lock must not be taken. */ - LASSERT(sanity_check(ext) == 0); - /* in locking state, any process should not touch this extent. */ - EASSERT(ext->oe_state == OES_LOCKING, ext); - EASSERT(ext->oe_owner, ext); - - OSC_EXTENT_DUMP(D_CACHE, ext, "make ready\n"); - - list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - ++page_count; - if (!last || last->oap_obj_off < oap->oap_obj_off) - last = oap; - - /* checking ASYNC_READY is race safe */ - if ((oap->oap_async_flags & ASYNC_READY) != 0) - continue; - - rc = osc_make_ready(env, oap, OBD_BRW_WRITE); - switch (rc) { - case 0: - spin_lock(&oap->oap_lock); - oap->oap_async_flags |= ASYNC_READY; - spin_unlock(&oap->oap_lock); - break; - case -EALREADY: - LASSERT((oap->oap_async_flags & ASYNC_READY) != 0); - break; - default: - LASSERTF(0, "unknown return code: %d\n", rc); - } - } - - LASSERT(page_count == ext->oe_nr_pages); - LASSERT(last); - /* the last page is the only one we need to refresh its count by - * the size of file. - */ - if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) { - int last_oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE); - - LASSERT(last_oap_count > 0); - LASSERT(last->oap_page_off + last_oap_count <= PAGE_SIZE); - last->oap_count = last_oap_count; - spin_lock(&last->oap_lock); - last->oap_async_flags |= ASYNC_COUNT_STABLE; - spin_unlock(&last->oap_lock); - } - - /* for the rest of pages, we don't need to call osf_refresh_count() - * because it's known they are not the last page - */ - list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - if (!(oap->oap_async_flags & ASYNC_COUNT_STABLE)) { - oap->oap_count = PAGE_SIZE - oap->oap_page_off; - spin_lock(&last->oap_lock); - oap->oap_async_flags |= ASYNC_COUNT_STABLE; - spin_unlock(&last->oap_lock); - } - } - - osc_object_lock(obj); - osc_extent_state_set(ext, OES_RPC); - osc_object_unlock(obj); - /* get a refcount for RPC. */ - osc_extent_get(ext); - - return 0; -} - -/** - * Quick and simple version of osc_extent_find(). This function is frequently - * called to expand the extent for the same IO. To expand the extent, the - * page index must be in the same or next chunk of ext->oe_end. - */ -static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, - unsigned int *grants) -{ - struct osc_object *obj = ext->oe_obj; - struct client_obd *cli = osc_cli(obj); - struct osc_extent *next; - int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; - pgoff_t chunk = index >> ppc_bits; - pgoff_t end_chunk; - pgoff_t end_index; - unsigned int chunksize = 1 << cli->cl_chunkbits; - int rc = 0; - - LASSERT(ext->oe_max_end >= index && ext->oe_start <= index); - osc_object_lock(obj); - LASSERT(sanity_check_nolock(ext) == 0); - end_chunk = ext->oe_end >> ppc_bits; - if (chunk > end_chunk + 1) { - rc = -ERANGE; - goto out; - } - - if (end_chunk >= chunk) { - rc = 0; - goto out; - } - - LASSERT(end_chunk + 1 == chunk); - /* try to expand this extent to cover @index */ - end_index = min(ext->oe_max_end, ((chunk + 1) << ppc_bits) - 1); - - next = next_extent(ext); - if (next && next->oe_start <= end_index) { - /* complex mode - overlapped with the next extent, - * this case will be handled by osc_extent_find() - */ - rc = -EAGAIN; - goto out; - } - - ext->oe_end = end_index; - ext->oe_grants += chunksize; - LASSERT(*grants >= chunksize); - *grants -= chunksize; - EASSERTF(osc_extent_is_overlapped(obj, ext) == 0, ext, - "overlapped after expanding for %lu.\n", index); - -out: - osc_object_unlock(obj); - return rc; -} - -static void osc_extent_tree_dump0(int level, struct osc_object *obj, - const char *func, int line) -{ - struct osc_extent *ext; - int cnt; - - CDEBUG(level, "Dump object %p extents at %s:%d, mppr: %u.\n", - obj, func, line, osc_cli(obj)->cl_max_pages_per_rpc); - - /* osc_object_lock(obj); */ - cnt = 1; - for (ext = first_extent(obj); ext; ext = next_extent(ext)) - OSC_EXTENT_DUMP(level, ext, "in tree %d.\n", cnt++); - - cnt = 1; - list_for_each_entry(ext, &obj->oo_hp_exts, oe_link) - OSC_EXTENT_DUMP(level, ext, "hp %d.\n", cnt++); - - cnt = 1; - list_for_each_entry(ext, &obj->oo_urgent_exts, oe_link) - OSC_EXTENT_DUMP(level, ext, "urgent %d.\n", cnt++); - - cnt = 1; - list_for_each_entry(ext, &obj->oo_reading_exts, oe_link) - OSC_EXTENT_DUMP(level, ext, "reading %d.\n", cnt++); - /* osc_object_unlock(obj); */ -} - -/* ------------------ osc extent end ------------------ */ - -static inline int osc_is_ready(struct osc_object *osc) -{ - return !list_empty(&osc->oo_ready_item) || - !list_empty(&osc->oo_hp_ready_item); -} - -#define OSC_IO_DEBUG(OSC, STR, args...) \ - CDEBUG(D_CACHE, "obj %p ready %d|%c|%c wr %d|%c|%c rd %d|%c " STR, \ - (OSC), osc_is_ready(OSC), \ - list_empty_marker(&(OSC)->oo_hp_ready_item), \ - list_empty_marker(&(OSC)->oo_ready_item), \ - atomic_read(&(OSC)->oo_nr_writes), \ - list_empty_marker(&(OSC)->oo_hp_exts), \ - list_empty_marker(&(OSC)->oo_urgent_exts), \ - atomic_read(&(OSC)->oo_nr_reads), \ - list_empty_marker(&(OSC)->oo_reading_exts), \ - ##args) - -static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap, - int cmd) -{ - struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = oap2cl_page(oap); - int result; - - LASSERT(cmd == OBD_BRW_WRITE); /* no cached reads */ - - result = cl_page_make_ready(env, page, CRT_WRITE); - if (result == 0) - opg->ops_submit_time = jiffies; - return result; -} - -static int osc_refresh_count(const struct lu_env *env, - struct osc_async_page *oap, int cmd) -{ - struct osc_page *opg = oap2osc_page(oap); - pgoff_t index = osc_index(oap2osc(oap)); - struct cl_object *obj; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - - int result; - loff_t kms; - - /* readpage queues with _COUNT_STABLE, shouldn't get here. */ - LASSERT(!(cmd & OBD_BRW_READ)); - obj = opg->ops_cl.cpl_obj; - - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - if (result < 0) - return result; - kms = attr->cat_kms; - if (cl_offset(obj, index) >= kms) - /* catch race with truncate */ - return 0; - else if (cl_offset(obj, index + 1) > kms) - /* catch sub-page write at end of file */ - return kms % PAGE_SIZE; - else - return PAGE_SIZE; -} - -static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, - int cmd, int rc) -{ - struct osc_page *opg = oap2osc_page(oap); - struct cl_page *page = oap2cl_page(oap); - enum cl_req_type crt; - int srvlock; - - cmd &= ~OBD_BRW_NOQUOTA; - LASSERTF(equi(page->cp_state == CPS_PAGEIN, cmd == OBD_BRW_READ), - "cp_state:%u, cmd:%d\n", page->cp_state, cmd); - LASSERTF(equi(page->cp_state == CPS_PAGEOUT, cmd == OBD_BRW_WRITE), - "cp_state:%u, cmd:%d\n", page->cp_state, cmd); - LASSERT(opg->ops_transfer_pinned); - - crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE; - /* Clear opg->ops_transfer_pinned before VM lock is released. */ - opg->ops_transfer_pinned = 0; - - opg->ops_submit_time = 0; - srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK; - - /* statistic */ - if (rc == 0 && srvlock) { - struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev; - struct osc_stats *stats = &lu2osc_dev(ld)->od_stats; - size_t bytes = oap->oap_count; - - if (crt == CRT_READ) - stats->os_lockless_reads += bytes; - else - stats->os_lockless_writes += bytes; - } - - /* - * This has to be the last operation with the page, as locks are - * released in cl_page_completion() and nothing except for the - * reference counter protects page from concurrent reclaim. - */ - lu_ref_del(&page->cp_reference, "transfer", page); - - cl_page_completion(env, page, crt, rc); - cl_page_put(env, page); - - return 0; -} - -#define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do { \ - struct client_obd *__tmp = (cli); \ - CDEBUG(lvl, "%s: grant { dirty: %lu/%lu dirty_pages: %ld/%lu " \ - "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \ - "lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n", \ - cli_name(__tmp), \ - __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \ - atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages, \ - __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ - __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \ - atomic_long_read(&__tmp->cl_lru_in_list), \ - atomic_long_read(&__tmp->cl_lru_busy), \ - atomic_read(&__tmp->cl_lru_shrinkers), ##args); \ -} while (0) - -/* caller must hold loi_list_lock */ -static void osc_consume_write_grant(struct client_obd *cli, - struct brw_page *pga) -{ - assert_spin_locked(&cli->cl_loi_list_lock); - LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT)); - atomic_long_inc(&obd_dirty_pages); - cli->cl_dirty_pages++; - pga->flag |= OBD_BRW_FROM_GRANT; - CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n", - PAGE_SIZE, pga, pga->pg); - osc_update_next_shrink(cli); -} - -/* the companion to osc_consume_write_grant, called when a brw has completed. - * must be called with the loi lock held. - */ -static void osc_release_write_grant(struct client_obd *cli, - struct brw_page *pga) -{ - assert_spin_locked(&cli->cl_loi_list_lock); - if (!(pga->flag & OBD_BRW_FROM_GRANT)) - return; - - pga->flag &= ~OBD_BRW_FROM_GRANT; - atomic_long_dec(&obd_dirty_pages); - cli->cl_dirty_pages--; - if (pga->flag & OBD_BRW_NOCACHE) { - pga->flag &= ~OBD_BRW_NOCACHE; - atomic_long_dec(&obd_dirty_transit_pages); - cli->cl_dirty_transit--; - } -} - -/** - * To avoid sleeping with object lock held, it's good for us allocate enough - * grants before entering into critical section. - * - * spin_lock held by caller - */ -static int osc_reserve_grant(struct client_obd *cli, unsigned int bytes) -{ - int rc = -EDQUOT; - - if (cli->cl_avail_grant >= bytes) { - cli->cl_avail_grant -= bytes; - cli->cl_reserved_grant += bytes; - rc = 0; - } - return rc; -} - -static void __osc_unreserve_grant(struct client_obd *cli, - unsigned int reserved, unsigned int unused) -{ - /* it's quite normal for us to get more grant than reserved. - * Thinking about a case that two extents merged by adding a new - * chunk, we can save one extent tax. If extent tax is greater than - * one chunk, we can save more grant by adding a new chunk - */ - cli->cl_reserved_grant -= reserved; - if (unused > reserved) { - cli->cl_avail_grant += reserved; - cli->cl_lost_grant += unused - reserved; - } else { - cli->cl_avail_grant += unused; - } -} - -static void osc_unreserve_grant(struct client_obd *cli, - unsigned int reserved, unsigned int unused) -{ - spin_lock(&cli->cl_loi_list_lock); - __osc_unreserve_grant(cli, reserved, unused); - if (unused > 0) - osc_wake_cache_waiters(cli); - spin_unlock(&cli->cl_loi_list_lock); -} - -/** - * Free grant after IO is finished or canceled. - * - * @lost_grant is used to remember how many grants we have allocated but not - * used, we should return these grants to OST. There're two cases where grants - * can be lost: - * 1. truncate; - * 2. blocksize at OST is less than PAGE_SIZE and a partial page was - * written. In this case OST may use less chunks to serve this partial - * write. OSTs don't actually know the page size on the client side. so - * clients have to calculate lost grant by the blocksize on the OST. - * See filter_grant_check() for details. - */ -static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, - unsigned int lost_grant) -{ - unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; - - spin_lock(&cli->cl_loi_list_lock); - atomic_long_sub(nr_pages, &obd_dirty_pages); - cli->cl_dirty_pages -= nr_pages; - cli->cl_lost_grant += lost_grant; - if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) { - /* borrow some grant from truncate to avoid the case that - * truncate uses up all avail grant - */ - cli->cl_lost_grant -= grant; - cli->cl_avail_grant += grant; - } - osc_wake_cache_waiters(cli); - spin_unlock(&cli->cl_loi_list_lock); - CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n", - lost_grant, cli->cl_lost_grant, - cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_SHIFT); -} - -/** - * The companion to osc_enter_cache(), called when @oap is no longer part of - * the dirty accounting due to error. - */ -static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap) -{ - spin_lock(&cli->cl_loi_list_lock); - osc_release_write_grant(cli, &oap->oap_brw_page); - spin_unlock(&cli->cl_loi_list_lock); -} - -/** - * Non-blocking version of osc_enter_cache() that consumes grant only when it - * is available. - */ -static int osc_enter_cache_try(struct client_obd *cli, - struct osc_async_page *oap, - int bytes, int transient) -{ - int rc; - - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes); - - rc = osc_reserve_grant(cli, bytes); - if (rc < 0) - return 0; - - if (cli->cl_dirty_pages < cli->cl_dirty_max_pages && - atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) { - osc_consume_write_grant(cli, &oap->oap_brw_page); - if (transient) { - cli->cl_dirty_transit++; - atomic_long_inc(&obd_dirty_transit_pages); - oap->oap_brw_flags |= OBD_BRW_NOCACHE; - } - rc = 1; - } else { - __osc_unreserve_grant(cli, bytes, bytes); - rc = 0; - } - return rc; -} - -static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) -{ - int rc; - - spin_lock(&cli->cl_loi_list_lock); - rc = list_empty(&ocw->ocw_entry); - spin_unlock(&cli->cl_loi_list_lock); - return rc; -} - -/** - * The main entry to reserve dirty page accounting. Usually the grant reserved - * in this function will be freed in bulk in osc_free_grant() unless it fails - * to add osc cache, in that case, it will be freed in osc_exit_cache(). - * - * The process will be put into sleep if it's already run out of grant. - */ -static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, - struct osc_async_page *oap, int bytes) -{ - struct osc_object *osc = oap->oap_obj; - struct lov_oinfo *loi = osc->oo_oinfo; - struct osc_cache_waiter ocw; - unsigned long timeout = (AT_OFF ? obd_timeout : at_max) * HZ; - int rc = -EDQUOT; - - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes); - - spin_lock(&cli->cl_loi_list_lock); - - /* force the caller to try sync io. this can jump the list - * of queued writes and create a discontiguous rpc stream - */ - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) || - !cli->cl_dirty_max_pages || cli->cl_ar.ar_force_sync || - loi->loi_ar.ar_force_sync) { - OSC_DUMP_GRANT(D_CACHE, cli, "forced sync i/o\n"); - rc = -EDQUOT; - goto out; - } - - /* Hopefully normal case - cache space and write credits available */ - if (osc_enter_cache_try(cli, oap, bytes, 0)) { - OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n"); - rc = 0; - goto out; - } - - /* We can get here for two reasons: too many dirty pages in cache, or - * run out of grants. In both cases we should write dirty pages out. - * Adding a cache waiter will trigger urgent write-out no matter what - * RPC size will be. - * The exiting condition is no avail grants and no dirty pages caching, - * that really means there is no space on the OST. - */ - init_waitqueue_head(&ocw.ocw_waitq); - ocw.ocw_oap = oap; - ocw.ocw_grant = bytes; - while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) { - list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); - ocw.ocw_rc = 0; - spin_unlock(&cli->cl_loi_list_lock); - - osc_io_unplug_async(env, cli, NULL); - - CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n", - cli_name(cli), &ocw, oap); - - rc = wait_event_idle_timeout(ocw.ocw_waitq, - ocw_granted(cli, &ocw), timeout); - - spin_lock(&cli->cl_loi_list_lock); - - if (rc == 0) { - /* wait_event is interrupted by signal, or timed out */ - list_del_init(&ocw.ocw_entry); - rc = -ETIMEDOUT; - break; - } - LASSERT(list_empty(&ocw.ocw_entry)); - rc = ocw.ocw_rc; - - if (rc != -EDQUOT) - break; - if (osc_enter_cache_try(cli, oap, bytes, 0)) { - rc = 0; - break; - } - } - - switch (rc) { - case 0: - OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space\n"); - break; - case -ETIMEDOUT: - OSC_DUMP_GRANT(D_CACHE, cli, - "timeout, fall back to sync i/o\n"); - osc_extent_tree_dump(D_CACHE, osc); - /* fall back to synchronous I/O */ - rc = -EDQUOT; - break; - case -EINTR: - /* Ensures restartability - LU-3581 */ - OSC_DUMP_GRANT(D_CACHE, cli, "interrupted\n"); - rc = -ERESTARTSYS; - break; - case -EDQUOT: - OSC_DUMP_GRANT(D_CACHE, cli, - "no grant space, fall back to sync i/o\n"); - break; - default: - CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n", - cli_name(cli), &ocw, rc); - break; - } -out: - spin_unlock(&cli->cl_loi_list_lock); - return rc; -} - -/* caller must hold loi_list_lock */ -void osc_wake_cache_waiters(struct client_obd *cli) -{ - struct list_head *l, *tmp; - struct osc_cache_waiter *ocw; - - list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { - ocw = list_entry(l, struct osc_cache_waiter, ocw_entry); - list_del_init(&ocw->ocw_entry); - - ocw->ocw_rc = -EDQUOT; - /* we can't dirty more */ - if ((cli->cl_dirty_pages > cli->cl_dirty_max_pages) || - (atomic_long_read(&obd_dirty_pages) + 1 > - obd_max_dirty_pages)) { - CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %ld\n", - cli->cl_dirty_pages, cli->cl_dirty_max_pages, - obd_max_dirty_pages); - goto wakeup; - } - - if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0)) - ocw->ocw_rc = 0; -wakeup: - CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n", - ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc); - - wake_up(&ocw->ocw_waitq); - } -} - -static int osc_max_rpc_in_flight(struct client_obd *cli, struct osc_object *osc) -{ - int hprpc = !!list_empty(&osc->oo_hp_exts); - - return rpcs_in_flight(cli) >= cli->cl_max_rpcs_in_flight + hprpc; -} - -/* This maintains the lists of pending pages to read/write for a given object - * (lop). This is used by osc_check_rpcs->osc_next_obj() and osc_list_maint() - * to quickly find objects that are ready to send an RPC. - */ -static int osc_makes_rpc(struct client_obd *cli, struct osc_object *osc, - int cmd) -{ - int invalid_import = 0; - - /* if we have an invalid import we want to drain the queued pages - * by forcing them through rpcs that immediately fail and complete - * the pages. recovery relies on this to empty the queued pages - * before canceling the locks and evicting down the llite pages - */ - if (!cli->cl_import || cli->cl_import->imp_invalid) - invalid_import = 1; - - if (cmd & OBD_BRW_WRITE) { - if (atomic_read(&osc->oo_nr_writes) == 0) - return 0; - if (invalid_import) { - CDEBUG(D_CACHE, "invalid import forcing RPC\n"); - return 1; - } - if (!list_empty(&osc->oo_hp_exts)) { - CDEBUG(D_CACHE, "high prio request forcing RPC\n"); - return 1; - } - if (!list_empty(&osc->oo_urgent_exts)) { - CDEBUG(D_CACHE, "urgent request forcing RPC\n"); - return 1; - } - /* trigger a write rpc stream as long as there are dirtiers - * waiting for space. as they're waiting, they're not going to - * create more pages to coalesce with what's waiting.. - */ - if (!list_empty(&cli->cl_cache_waiters)) { - CDEBUG(D_CACHE, "cache waiters forcing RPC\n"); - return 1; - } - if (atomic_read(&osc->oo_nr_writes) >= - cli->cl_max_pages_per_rpc) - return 1; - } else { - if (atomic_read(&osc->oo_nr_reads) == 0) - return 0; - if (invalid_import) { - CDEBUG(D_CACHE, "invalid import forcing RPC\n"); - return 1; - } - /* all read are urgent. */ - if (!list_empty(&osc->oo_reading_exts)) - return 1; - } - - return 0; -} - -static void osc_update_pending(struct osc_object *obj, int cmd, int delta) -{ - struct client_obd *cli = osc_cli(obj); - - if (cmd & OBD_BRW_WRITE) { - atomic_add(delta, &obj->oo_nr_writes); - atomic_add(delta, &cli->cl_pending_w_pages); - LASSERT(atomic_read(&obj->oo_nr_writes) >= 0); - } else { - atomic_add(delta, &obj->oo_nr_reads); - atomic_add(delta, &cli->cl_pending_r_pages); - LASSERT(atomic_read(&obj->oo_nr_reads) >= 0); - } - OSC_IO_DEBUG(obj, "update pending cmd %d delta %d.\n", cmd, delta); -} - -static int osc_makes_hprpc(struct osc_object *obj) -{ - return !list_empty(&obj->oo_hp_exts); -} - -static void on_list(struct list_head *item, struct list_head *list, int should_be_on) -{ - if (list_empty(item) && should_be_on) - list_add_tail(item, list); - else if (!list_empty(item) && !should_be_on) - list_del_init(item); -} - -/* maintain the osc's cli list membership invariants so that osc_send_oap_rpc - * can find pages to build into rpcs quickly - */ -static int __osc_list_maint(struct client_obd *cli, struct osc_object *osc) -{ - if (osc_makes_hprpc(osc)) { - /* HP rpc */ - on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list, 0); - on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 1); - } else { - on_list(&osc->oo_hp_ready_item, &cli->cl_loi_hp_ready_list, 0); - on_list(&osc->oo_ready_item, &cli->cl_loi_ready_list, - osc_makes_rpc(cli, osc, OBD_BRW_WRITE) || - osc_makes_rpc(cli, osc, OBD_BRW_READ)); - } - - on_list(&osc->oo_write_item, &cli->cl_loi_write_list, - atomic_read(&osc->oo_nr_writes) > 0); - - on_list(&osc->oo_read_item, &cli->cl_loi_read_list, - atomic_read(&osc->oo_nr_reads) > 0); - - return osc_is_ready(osc); -} - -static int osc_list_maint(struct client_obd *cli, struct osc_object *osc) -{ - int is_ready; - - spin_lock(&cli->cl_loi_list_lock); - is_ready = __osc_list_maint(cli, osc); - spin_unlock(&cli->cl_loi_list_lock); - - return is_ready; -} - -/* this is trying to propagate async writeback errors back up to the - * application. As an async write fails we record the error code for later if - * the app does an fsync. As long as errors persist we force future rpcs to be - * sync so that the app can get a sync error and break the cycle of queueing - * pages for which writeback will fail. - */ -static void osc_process_ar(struct osc_async_rc *ar, __u64 xid, - int rc) -{ - if (rc) { - if (!ar->ar_rc) - ar->ar_rc = rc; - - ar->ar_force_sync = 1; - ar->ar_min_xid = ptlrpc_sample_next_xid(); - return; - } - - if (ar->ar_force_sync && (xid >= ar->ar_min_xid)) - ar->ar_force_sync = 0; -} - -/* this must be called holding the loi list lock to give coverage to exit_cache, - * async_flag maintenance, and oap_request - */ -static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli, - struct osc_async_page *oap, int sent, int rc) -{ - struct osc_object *osc = oap->oap_obj; - struct lov_oinfo *loi = osc->oo_oinfo; - __u64 xid = 0; - - if (oap->oap_request) { - xid = ptlrpc_req_xid(oap->oap_request); - ptlrpc_req_finished(oap->oap_request); - oap->oap_request = NULL; - } - - /* As the transfer for this page is being done, clear the flags */ - spin_lock(&oap->oap_lock); - oap->oap_async_flags = 0; - spin_unlock(&oap->oap_lock); - oap->oap_interrupted = 0; - - if (oap->oap_cmd & OBD_BRW_WRITE && xid > 0) { - spin_lock(&cli->cl_loi_list_lock); - osc_process_ar(&cli->cl_ar, xid, rc); - osc_process_ar(&loi->loi_ar, xid, rc); - spin_unlock(&cli->cl_loi_list_lock); - } - - rc = osc_completion(env, oap, oap->oap_cmd, rc); - if (rc) - CERROR("completion on oap %p obj %p returns %d.\n", - oap, osc, rc); -} - -struct extent_rpc_data { - struct list_head *erd_rpc_list; - unsigned int erd_page_count; - unsigned int erd_max_pages; - unsigned int erd_max_chunks; - unsigned int erd_max_extents; -}; - -static inline unsigned int osc_extent_chunks(const struct osc_extent *ext) -{ - struct client_obd *cli = osc_cli(ext->oe_obj); - unsigned int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; - - return (ext->oe_end >> ppc_bits) - (ext->oe_start >> ppc_bits) + 1; -} - -/** - * Try to add extent to one RPC. We need to think about the following things: - * - # of pages must not be over max_pages_per_rpc - * - extent must be compatible with previous ones - */ -static int try_to_add_extent_for_io(struct client_obd *cli, - struct osc_extent *ext, - struct extent_rpc_data *data) -{ - struct osc_extent *tmp; - unsigned int chunk_count; - struct osc_async_page *oap = list_first_entry(&ext->oe_pages, - struct osc_async_page, - oap_pending_item); - - EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), - ext); - - if (!data->erd_max_extents) - return 0; - - chunk_count = osc_extent_chunks(ext); - EASSERTF(data->erd_page_count != 0 || - chunk_count <= data->erd_max_chunks, ext, - "The first extent to be fit in a RPC contains %u chunks, which is over the limit %u.\n", - chunk_count, data->erd_max_chunks); - - if (chunk_count > data->erd_max_chunks) - return 0; - - data->erd_max_pages = max(ext->oe_mppr, data->erd_max_pages); - EASSERTF(data->erd_page_count != 0 || - ext->oe_nr_pages <= data->erd_max_pages, ext, - "The first extent to be fit in a RPC contains %u pages, which is over the limit %u.\n", - ext->oe_nr_pages, data->erd_max_pages); - if (data->erd_page_count + ext->oe_nr_pages > data->erd_max_pages) - return 0; - - list_for_each_entry(tmp, data->erd_rpc_list, oe_link) { - struct osc_async_page *oap2; - - oap2 = list_first_entry(&tmp->oe_pages, struct osc_async_page, - oap_pending_item); - EASSERT(tmp->oe_owner == current, tmp); - if (oap2cl_page(oap)->cp_type != oap2cl_page(oap2)->cp_type) { - CDEBUG(D_CACHE, "Do not permit different type of IO in one RPC\n"); - return 0; - } - - if (tmp->oe_srvlock != ext->oe_srvlock || - !tmp->oe_grants != !ext->oe_grants || - tmp->oe_no_merge || ext->oe_no_merge) - return 0; - - /* remove break for strict check */ - break; - } - - data->erd_max_extents--; - data->erd_max_chunks -= chunk_count; - data->erd_page_count += ext->oe_nr_pages; - list_move_tail(&ext->oe_link, data->erd_rpc_list); - ext->oe_owner = current; - return 1; -} - -static inline unsigned int osc_max_write_chunks(const struct client_obd *cli) -{ - /* - * LU-8135: - * - * The maximum size of a single transaction is about 64MB in ZFS. - * #define DMU_MAX_ACCESS (64 * 1024 * 1024) - * - * Since ZFS is a copy-on-write file system, a single dirty page in - * a chunk will result in the rewrite of the whole chunk, therefore - * an RPC shouldn't be allowed to contain too many chunks otherwise - * it will make transaction size much bigger than 64MB, especially - * with big block size for ZFS. - * - * This piece of code is to make sure that OSC won't send write RPCs - * with too many chunks. The maximum chunk size that an RPC can cover - * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally - * OST should tell the client what the biggest transaction size is, - * but it's good enough for now. - * - * This limitation doesn't apply to ldiskfs, which allows as many - * chunks in one RPC as we want. However, it won't have any benefits - * to have too many discontiguous pages in one RPC. - * - * An osc_extent won't cover over a RPC size, so the chunks in an - * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits. - */ - return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits; -} - -/** - * In order to prevent multiple ptlrpcd from breaking contiguous extents, - * get_write_extent() takes all appropriate extents in atomic. - * - * The following policy is used to collect extents for IO: - * 1. Add as many HP extents as possible; - * 2. Add the first urgent extent in urgent extent list and take it out of - * urgent list; - * 3. Add subsequent extents of this urgent extent; - * 4. If urgent list is not empty, goto 2; - * 5. Traverse the extent tree from the 1st extent; - * 6. Above steps exit if there is no space in this RPC. - */ -static unsigned int get_write_extents(struct osc_object *obj, - struct list_head *rpclist) -{ - struct client_obd *cli = osc_cli(obj); - struct osc_extent *ext; - struct osc_extent *temp; - struct extent_rpc_data data = { - .erd_rpc_list = rpclist, - .erd_page_count = 0, - .erd_max_pages = cli->cl_max_pages_per_rpc, - .erd_max_chunks = osc_max_write_chunks(cli), - .erd_max_extents = 256, - }; - - LASSERT(osc_object_is_locked(obj)); - list_for_each_entry_safe(ext, temp, &obj->oo_hp_exts, oe_link) { - LASSERT(ext->oe_state == OES_CACHE); - if (!try_to_add_extent_for_io(cli, ext, &data)) - return data.erd_page_count; - EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext); - } - if (data.erd_page_count == data.erd_max_pages) - return data.erd_page_count; - - while (!list_empty(&obj->oo_urgent_exts)) { - ext = list_entry(obj->oo_urgent_exts.next, - struct osc_extent, oe_link); - if (!try_to_add_extent_for_io(cli, ext, &data)) - return data.erd_page_count; - - if (!ext->oe_intree) - continue; - - while ((ext = next_extent(ext)) != NULL) { - if ((ext->oe_state != OES_CACHE) || - (!list_empty(&ext->oe_link) && - ext->oe_owner)) - continue; - - if (!try_to_add_extent_for_io(cli, ext, &data)) - return data.erd_page_count; - } - } - if (data.erd_page_count == data.erd_max_pages) - return data.erd_page_count; - - ext = first_extent(obj); - while (ext) { - if ((ext->oe_state != OES_CACHE) || - /* this extent may be already in current rpclist */ - (!list_empty(&ext->oe_link) && ext->oe_owner)) { - ext = next_extent(ext); - continue; - } - - if (!try_to_add_extent_for_io(cli, ext, &data)) - return data.erd_page_count; - - ext = next_extent(ext); - } - return data.erd_page_count; -} - -static int -osc_send_write_rpc(const struct lu_env *env, struct client_obd *cli, - struct osc_object *osc) - __must_hold(osc) -{ - LIST_HEAD(rpclist); - struct osc_extent *ext; - struct osc_extent *tmp; - struct osc_extent *first = NULL; - u32 page_count = 0; - int srvlock = 0; - int rc = 0; - - LASSERT(osc_object_is_locked(osc)); - - page_count = get_write_extents(osc, &rpclist); - LASSERT(equi(page_count == 0, list_empty(&rpclist))); - - if (list_empty(&rpclist)) - return 0; - - osc_update_pending(osc, OBD_BRW_WRITE, -page_count); - - list_for_each_entry(ext, &rpclist, oe_link) { - LASSERT(ext->oe_state == OES_CACHE || - ext->oe_state == OES_LOCK_DONE); - if (ext->oe_state == OES_CACHE) - osc_extent_state_set(ext, OES_LOCKING); - else - osc_extent_state_set(ext, OES_RPC); - } - - /* we're going to grab page lock, so release object lock because - * lock order is page lock -> object lock. - */ - osc_object_unlock(osc); - - list_for_each_entry_safe(ext, tmp, &rpclist, oe_link) { - if (ext->oe_state == OES_LOCKING) { - rc = osc_extent_make_ready(env, ext); - if (unlikely(rc < 0)) { - list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 0, rc); - continue; - } - } - if (!first) { - first = ext; - srvlock = ext->oe_srvlock; - } else { - LASSERT(srvlock == ext->oe_srvlock); - } - } - - if (!list_empty(&rpclist)) { - LASSERT(page_count > 0); - rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_WRITE); - LASSERT(list_empty(&rpclist)); - } - - osc_object_lock(osc); - return rc; -} - -/** - * prepare pages for ASYNC io and put pages in send queue. - * - * \param cmd OBD_BRW_* macroses - * \param lop pending pages - * - * \return zero if no page added to send queue. - * \return 1 if pages successfully added to send queue. - * \return negative on errors. - */ -static int -osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli, - struct osc_object *osc) - __must_hold(osc) -{ - struct osc_extent *ext; - struct osc_extent *next; - LIST_HEAD(rpclist); - struct extent_rpc_data data = { - .erd_rpc_list = &rpclist, - .erd_page_count = 0, - .erd_max_pages = cli->cl_max_pages_per_rpc, - .erd_max_chunks = UINT_MAX, - .erd_max_extents = UINT_MAX, - }; - int rc = 0; - - LASSERT(osc_object_is_locked(osc)); - list_for_each_entry_safe(ext, next, &osc->oo_reading_exts, oe_link) { - EASSERT(ext->oe_state == OES_LOCK_DONE, ext); - if (!try_to_add_extent_for_io(cli, ext, &data)) - break; - osc_extent_state_set(ext, OES_RPC); - EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext); - } - LASSERT(data.erd_page_count <= data.erd_max_pages); - - osc_update_pending(osc, OBD_BRW_READ, -data.erd_page_count); - - if (!list_empty(&rpclist)) { - osc_object_unlock(osc); - - rc = osc_build_rpc(env, cli, &rpclist, OBD_BRW_READ); - LASSERT(list_empty(&rpclist)); - - osc_object_lock(osc); - } - return rc; -} - -#define list_to_obj(list, item) ({ \ - struct list_head *__tmp = (list)->next; \ - list_del_init(__tmp); \ - list_entry(__tmp, struct osc_object, oo_##item); \ -}) - -/* This is called by osc_check_rpcs() to find which objects have pages that - * we could be sending. These lists are maintained by osc_makes_rpc(). - */ -static struct osc_object *osc_next_obj(struct client_obd *cli) -{ - /* First return objects that have blocked locks so that they - * will be flushed quickly and other clients can get the lock, - * then objects which have pages ready to be stuffed into RPCs - */ - if (!list_empty(&cli->cl_loi_hp_ready_list)) - return list_to_obj(&cli->cl_loi_hp_ready_list, hp_ready_item); - if (!list_empty(&cli->cl_loi_ready_list)) - return list_to_obj(&cli->cl_loi_ready_list, ready_item); - - /* then if we have cache waiters, return all objects with queued - * writes. This is especially important when many small files - * have filled up the cache and not been fired into rpcs because - * they don't pass the nr_pending/object threshold - */ - if (!list_empty(&cli->cl_cache_waiters) && - !list_empty(&cli->cl_loi_write_list)) - return list_to_obj(&cli->cl_loi_write_list, write_item); - - /* then return all queued objects when we have an invalid import - * so that they get flushed - */ - if (!cli->cl_import || cli->cl_import->imp_invalid) { - if (!list_empty(&cli->cl_loi_write_list)) - return list_to_obj(&cli->cl_loi_write_list, write_item); - if (!list_empty(&cli->cl_loi_read_list)) - return list_to_obj(&cli->cl_loi_read_list, read_item); - } - return NULL; -} - -/* called with the loi list lock held */ -static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) - __must_hold(&cli->cl_loi_list_lock) -{ - struct osc_object *osc; - int rc = 0; - - while ((osc = osc_next_obj(cli)) != NULL) { - struct cl_object *obj = osc2cl(osc); - struct lu_ref_link link; - - OSC_IO_DEBUG(osc, "%lu in flight\n", rpcs_in_flight(cli)); - - if (osc_max_rpc_in_flight(cli, osc)) { - __osc_list_maint(cli, osc); - break; - } - - cl_object_get(obj); - spin_unlock(&cli->cl_loi_list_lock); - lu_object_ref_add_at(&obj->co_lu, &link, "check", current); - - /* attempt some read/write balancing by alternating between - * reads and writes in an object. The makes_rpc checks here - * would be redundant if we were getting read/write work items - * instead of objects. we don't want send_oap_rpc to drain a - * partial read pending queue when we're given this object to - * do io on writes while there are cache waiters - */ - osc_object_lock(osc); - if (osc_makes_rpc(cli, osc, OBD_BRW_WRITE)) { - rc = osc_send_write_rpc(env, cli, osc); - if (rc < 0) { - CERROR("Write request failed with %d\n", rc); - - /* osc_send_write_rpc failed, mostly because of - * memory pressure. - * - * It can't break here, because if: - * - a page was submitted by osc_io_submit, so - * page locked; - * - no request in flight - * - no subsequent request - * The system will be in live-lock state, - * because there is no chance to call - * osc_io_unplug() and osc_check_rpcs() any - * more. pdflush can't help in this case, - * because it might be blocked at grabbing - * the page lock as we mentioned. - * - * Anyway, continue to drain pages. - */ - /* break; */ - } - } - if (osc_makes_rpc(cli, osc, OBD_BRW_READ)) { - rc = osc_send_read_rpc(env, cli, osc); - if (rc < 0) - CERROR("Read request failed with %d\n", rc); - } - osc_object_unlock(osc); - - osc_list_maint(cli, osc); - lu_object_ref_del_at(&obj->co_lu, &link, "check", current); - cl_object_put(env, obj); - - spin_lock(&cli->cl_loi_list_lock); - } -} - -static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli, - struct osc_object *osc, int async) -{ - int rc = 0; - - if (osc && osc_list_maint(cli, osc) == 0) - return 0; - - if (!async) { - spin_lock(&cli->cl_loi_list_lock); - osc_check_rpcs(env, cli); - spin_unlock(&cli->cl_loi_list_lock); - } else { - CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli); - LASSERT(cli->cl_writeback_work); - rc = ptlrpcd_queue_work(cli->cl_writeback_work); - } - return rc; -} - -static int osc_io_unplug_async(const struct lu_env *env, - struct client_obd *cli, struct osc_object *osc) -{ - return osc_io_unplug0(env, cli, osc, 1); -} - -void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, - struct osc_object *osc) -{ - (void)osc_io_unplug0(env, cli, osc, 0); -} - -int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, - struct page *page, loff_t offset) -{ - struct obd_export *exp = osc_export(osc); - struct osc_async_page *oap = &ops->ops_oap; - - if (!page) - return cfs_size_round(sizeof(*oap)); - - oap->oap_magic = OAP_MAGIC; - oap->oap_cli = &exp->exp_obd->u.cli; - oap->oap_obj = osc; - - oap->oap_page = page; - oap->oap_obj_off = offset; - LASSERT(!(offset & ~PAGE_MASK)); - - if (capable(CAP_SYS_RESOURCE)) - oap->oap_brw_flags = OBD_BRW_NOQUOTA; - - INIT_LIST_HEAD(&oap->oap_pending_item); - INIT_LIST_HEAD(&oap->oap_rpc_item); - - spin_lock_init(&oap->oap_lock); - CDEBUG(D_INFO, "oap %p page %p obj off %llu\n", - oap, page, oap->oap_obj_off); - return 0; -} - -int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops) -{ - struct osc_io *oio = osc_env_io(env); - struct osc_extent *ext = NULL; - struct osc_async_page *oap = &ops->ops_oap; - struct client_obd *cli = oap->oap_cli; - struct osc_object *osc = oap->oap_obj; - pgoff_t index; - unsigned int grants = 0, tmp; - int brw_flags = OBD_BRW_ASYNC; - int cmd = OBD_BRW_WRITE; - int need_release = 0; - int rc = 0; - - if (oap->oap_magic != OAP_MAGIC) - return -EINVAL; - - if (!cli->cl_import || cli->cl_import->imp_invalid) - return -EIO; - - if (!list_empty(&oap->oap_pending_item) || - !list_empty(&oap->oap_rpc_item)) - return -EBUSY; - - /* Set the OBD_BRW_SRVLOCK before the page is queued. */ - brw_flags |= ops->ops_srvlock ? OBD_BRW_SRVLOCK : 0; - if (capable(CAP_SYS_RESOURCE)) { - brw_flags |= OBD_BRW_NOQUOTA; - cmd |= OBD_BRW_NOQUOTA; - } - - /* check if the file's owner/group is over quota */ - if (!(cmd & OBD_BRW_NOQUOTA)) { - struct cl_object *obj; - struct cl_attr *attr; - unsigned int qid[MAXQUOTAS]; - - obj = cl_object_top(&osc->oo_cl); - attr = &osc_env_info(env)->oti_attr; - - cl_object_attr_lock(obj); - rc = cl_object_attr_get(env, obj, attr); - cl_object_attr_unlock(obj); - - qid[USRQUOTA] = attr->cat_uid; - qid[GRPQUOTA] = attr->cat_gid; - if (rc == 0 && osc_quota_chkdq(cli, qid) == NO_QUOTA) - rc = -EDQUOT; - if (rc) - return rc; - } - - oap->oap_cmd = cmd; - oap->oap_page_off = ops->ops_from; - oap->oap_count = ops->ops_to - ops->ops_from; - /* - * No need to hold a lock here, - * since this page is not in any list yet. - */ - oap->oap_async_flags = 0; - oap->oap_brw_flags = brw_flags; - - OSC_IO_DEBUG(osc, "oap %p page %p added for cmd %d\n", - oap, oap->oap_page, oap->oap_cmd & OBD_BRW_RWMASK); - - index = osc_index(oap2osc(oap)); - - /* Add this page into extent by the following steps: - * 1. if there exists an active extent for this IO, mostly this page - * can be added to the active extent and sometimes we need to - * expand extent to accommodate this page; - * 2. otherwise, a new extent will be allocated. - */ - - ext = oio->oi_active; - if (ext && ext->oe_start <= index && ext->oe_max_end >= index) { - /* one chunk plus extent overhead must be enough to write this - * page - */ - grants = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; - if (ext->oe_end >= index) - grants = 0; - - /* it doesn't need any grant to dirty this page */ - spin_lock(&cli->cl_loi_list_lock); - rc = osc_enter_cache_try(cli, oap, grants, 0); - spin_unlock(&cli->cl_loi_list_lock); - if (rc == 0) { /* try failed */ - grants = 0; - need_release = 1; - } else if (ext->oe_end < index) { - tmp = grants; - /* try to expand this extent */ - rc = osc_extent_expand(ext, index, &tmp); - if (rc < 0) { - need_release = 1; - /* don't free reserved grant */ - } else { - OSC_EXTENT_DUMP(D_CACHE, ext, - "expanded for %lu.\n", index); - osc_unreserve_grant(cli, grants, tmp); - grants = 0; - } - } - rc = 0; - } else if (ext) { - /* index is located outside of active extent */ - need_release = 1; - } - if (need_release) { - osc_extent_release(env, ext); - oio->oi_active = NULL; - ext = NULL; - } - - if (!ext) { - tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax; - - /* try to find new extent to cover this page */ - LASSERT(!oio->oi_active); - /* we may have allocated grant for this page if we failed - * to expand the previous active extent. - */ - LASSERT(ergo(grants > 0, grants >= tmp)); - - rc = 0; - if (grants == 0) { - /* we haven't allocated grant for this page. */ - rc = osc_enter_cache(env, cli, oap, tmp); - if (rc == 0) - grants = tmp; - } - - tmp = grants; - if (rc == 0) { - ext = osc_extent_find(env, osc, index, &tmp); - if (IS_ERR(ext)) { - LASSERT(tmp == grants); - osc_exit_cache(cli, oap); - rc = PTR_ERR(ext); - ext = NULL; - } else { - oio->oi_active = ext; - } - } - if (grants > 0) - osc_unreserve_grant(cli, grants, tmp); - } - - LASSERT(ergo(rc == 0, ext)); - if (ext) { - EASSERTF(ext->oe_end >= index && ext->oe_start <= index, - ext, "index = %lu.\n", index); - LASSERT((oap->oap_brw_flags & OBD_BRW_FROM_GRANT) != 0); - - osc_object_lock(osc); - if (ext->oe_nr_pages == 0) - ext->oe_srvlock = ops->ops_srvlock; - else - LASSERT(ext->oe_srvlock == ops->ops_srvlock); - ++ext->oe_nr_pages; - list_add_tail(&oap->oap_pending_item, &ext->oe_pages); - osc_object_unlock(osc); - } - return rc; -} - -int osc_teardown_async_page(const struct lu_env *env, - struct osc_object *obj, struct osc_page *ops) -{ - struct osc_async_page *oap = &ops->ops_oap; - int rc = 0; - - LASSERT(oap->oap_magic == OAP_MAGIC); - - CDEBUG(D_INFO, "teardown oap %p page %p at index %lu.\n", - oap, ops, osc_index(oap2osc(oap))); - - if (!list_empty(&oap->oap_rpc_item)) { - CDEBUG(D_CACHE, "oap %p is not in cache.\n", oap); - rc = -EBUSY; - } else if (!list_empty(&oap->oap_pending_item)) { - struct osc_extent *ext = NULL; - - osc_object_lock(obj); - ext = osc_extent_lookup(obj, osc_index(oap2osc(oap))); - osc_object_unlock(obj); - /* only truncated pages are allowed to be taken out. - * See osc_extent_truncate() and osc_cache_truncate_start() - * for details. - */ - if (ext && ext->oe_state != OES_TRUNC) { - OSC_EXTENT_DUMP(D_ERROR, ext, "trunc at %lu.\n", - osc_index(oap2osc(oap))); - rc = -EBUSY; - } - if (ext) - osc_extent_put(env, ext); - } - return rc; -} - -/** - * This is called when a page is picked up by kernel to write out. - * - * We should find out the corresponding extent and add the whole extent - * into urgent list. The extent may be being truncated or used, handle it - * carefully. - */ -int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops) -{ - struct osc_extent *ext = NULL; - struct osc_object *obj = cl2osc(ops->ops_cl.cpl_obj); - struct cl_page *cp = ops->ops_cl.cpl_page; - pgoff_t index = osc_index(ops); - struct osc_async_page *oap = &ops->ops_oap; - bool unplug = false; - int rc = 0; - - osc_object_lock(obj); - ext = osc_extent_lookup(obj, index); - if (!ext) { - osc_extent_tree_dump(D_ERROR, obj); - LASSERTF(0, "page index %lu is NOT covered.\n", index); - } - - switch (ext->oe_state) { - case OES_RPC: - case OES_LOCK_DONE: - CL_PAGE_DEBUG(D_ERROR, env, cp, "flush an in-rpc page?\n"); - LASSERT(0); - break; - case OES_LOCKING: - /* If we know this extent is being written out, we should abort - * so that the writer can make this page ready. Otherwise, there - * exists a deadlock problem because other process can wait for - * page writeback bit holding page lock; and meanwhile in - * vvp_page_make_ready(), we need to grab page lock before - * really sending the RPC. - */ - case OES_TRUNC: - /* race with truncate, page will be redirtied */ - case OES_ACTIVE: - /* The extent is active so we need to abort and let the caller - * re-dirty the page. If we continued on here, and we were the - * one making the extent active, we could deadlock waiting for - * the page writeback to clear but it won't because the extent - * is active and won't be written out. - */ - rc = -EAGAIN; - goto out; - default: - break; - } - - rc = cl_page_prep(env, io, cp, CRT_WRITE); - if (rc) - goto out; - - spin_lock(&oap->oap_lock); - oap->oap_async_flags |= ASYNC_READY | ASYNC_URGENT; - spin_unlock(&oap->oap_lock); - - if (current->flags & PF_MEMALLOC) - ext->oe_memalloc = 1; - - ext->oe_urgent = 1; - if (ext->oe_state == OES_CACHE) { - OSC_EXTENT_DUMP(D_CACHE, ext, - "flush page %p make it urgent.\n", oap); - if (list_empty(&ext->oe_link)) - list_add_tail(&ext->oe_link, &obj->oo_urgent_exts); - unplug = true; - } - rc = 0; - -out: - osc_object_unlock(obj); - osc_extent_put(env, ext); - if (unplug) - osc_io_unplug_async(env, osc_cli(obj), obj); - return rc; -} - -/** - * this is called when a sync waiter receives an interruption. Its job is to - * get the caller woken as soon as possible. If its page hasn't been put in an - * rpc yet it can dequeue immediately. Otherwise it has to mark the rpc as - * desiring interruption which will forcefully complete the rpc once the rpc - * has timed out. - */ -int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops) -{ - struct osc_async_page *oap = &ops->ops_oap; - struct osc_object *obj = oap->oap_obj; - struct client_obd *cli = osc_cli(obj); - struct osc_extent *ext; - struct osc_extent *found = NULL; - struct list_head *plist; - pgoff_t index = osc_index(ops); - int rc = -EBUSY; - int cmd; - - LASSERT(!oap->oap_interrupted); - oap->oap_interrupted = 1; - - /* Find out the caching extent */ - osc_object_lock(obj); - if (oap->oap_cmd & OBD_BRW_WRITE) { - plist = &obj->oo_urgent_exts; - cmd = OBD_BRW_WRITE; - } else { - plist = &obj->oo_reading_exts; - cmd = OBD_BRW_READ; - } - list_for_each_entry(ext, plist, oe_link) { - if (ext->oe_start <= index && ext->oe_end >= index) { - LASSERT(ext->oe_state == OES_LOCK_DONE); - /* For OES_LOCK_DONE state extent, it has already held - * a refcount for RPC. - */ - found = osc_extent_get(ext); - break; - } - } - if (found) { - list_del_init(&found->oe_link); - osc_update_pending(obj, cmd, -found->oe_nr_pages); - osc_object_unlock(obj); - - osc_extent_finish(env, found, 0, -EINTR); - osc_extent_put(env, found); - rc = 0; - } else { - osc_object_unlock(obj); - /* ok, it's been put in an rpc. only one oap gets a request - * reference - */ - if (oap->oap_request) { - ptlrpc_mark_interrupted(oap->oap_request); - ptlrpcd_wake(oap->oap_request); - ptlrpc_req_finished(oap->oap_request); - oap->oap_request = NULL; - } - } - - osc_list_maint(cli, obj); - return rc; -} - -int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, - struct list_head *list, int cmd, int brw_flags) -{ - struct client_obd *cli = osc_cli(obj); - struct osc_extent *ext; - struct osc_async_page *oap, *tmp; - int page_count = 0; - int mppr = cli->cl_max_pages_per_rpc; - bool can_merge = true; - pgoff_t start = CL_PAGE_EOF; - pgoff_t end = 0; - - list_for_each_entry(oap, list, oap_pending_item) { - struct osc_page *opg = oap2osc_page(oap); - pgoff_t index = osc_index(opg); - - if (index > end) - end = index; - if (index < start) - start = index; - ++page_count; - mppr <<= (page_count > mppr); - - if (unlikely(opg->ops_from > 0 || opg->ops_to < PAGE_SIZE)) - can_merge = false; - } - - ext = osc_extent_alloc(obj); - if (!ext) { - list_for_each_entry_safe(oap, tmp, list, oap_pending_item) { - list_del_init(&oap->oap_pending_item); - osc_ap_completion(env, cli, oap, 0, -ENOMEM); - } - return -ENOMEM; - } - - ext->oe_rw = !!(cmd & OBD_BRW_READ); - ext->oe_sync = 1; - ext->oe_no_merge = !can_merge; - ext->oe_urgent = 1; - ext->oe_start = start; - ext->oe_end = end; - ext->oe_max_end = end; - ext->oe_obj = obj; - ext->oe_srvlock = !!(brw_flags & OBD_BRW_SRVLOCK); - ext->oe_nr_pages = page_count; - ext->oe_mppr = mppr; - list_splice_init(list, &ext->oe_pages); - - osc_object_lock(obj); - /* Reuse the initial refcount for RPC, don't drop it */ - osc_extent_state_set(ext, OES_LOCK_DONE); - if (cmd & OBD_BRW_WRITE) { - list_add_tail(&ext->oe_link, &obj->oo_urgent_exts); - osc_update_pending(obj, OBD_BRW_WRITE, page_count); - } else { - list_add_tail(&ext->oe_link, &obj->oo_reading_exts); - osc_update_pending(obj, OBD_BRW_READ, page_count); - } - osc_object_unlock(obj); - - osc_io_unplug_async(env, cli, obj); - return 0; -} - -/** - * Called by osc_io_setattr_start() to freeze and destroy covering extents. - */ -int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, - u64 size, struct osc_extent **extp) -{ - struct client_obd *cli = osc_cli(obj); - struct osc_extent *ext; - struct osc_extent *temp; - struct osc_extent *waiting = NULL; - pgoff_t index; - LIST_HEAD(list); - int result = 0; - bool partial; - - /* pages with index greater or equal to index will be truncated. */ - index = cl_index(osc2cl(obj), size); - partial = size > cl_offset(osc2cl(obj), index); - -again: - osc_object_lock(obj); - ext = osc_extent_search(obj, index); - if (!ext) - ext = first_extent(obj); - else if (ext->oe_end < index) - ext = next_extent(ext); - while (ext) { - EASSERT(ext->oe_state != OES_TRUNC, ext); - - if (ext->oe_state > OES_CACHE || ext->oe_urgent) { - /* if ext is in urgent state, it means there must exist - * a page already having been flushed by write_page(). - * We have to wait for this extent because we can't - * truncate that page. - */ - OSC_EXTENT_DUMP(D_CACHE, ext, - "waiting for busy extent\n"); - waiting = osc_extent_get(ext); - break; - } - - OSC_EXTENT_DUMP(D_CACHE, ext, "try to trunc:%llu.\n", size); - - osc_extent_get(ext); - if (ext->oe_state == OES_ACTIVE) { - /* though we grab inode mutex for write path, but we - * release it before releasing extent(in osc_io_end()), - * so there is a race window that an extent is still - * in OES_ACTIVE when truncate starts. - */ - LASSERT(!ext->oe_trunc_pending); - ext->oe_trunc_pending = 1; - } else { - EASSERT(ext->oe_state == OES_CACHE, ext); - osc_extent_state_set(ext, OES_TRUNC); - osc_update_pending(obj, OBD_BRW_WRITE, - -ext->oe_nr_pages); - } - EASSERT(list_empty(&ext->oe_link), ext); - list_add_tail(&ext->oe_link, &list); - - ext = next_extent(ext); - } - osc_object_unlock(obj); - - osc_list_maint(cli, obj); - - list_for_each_entry_safe(ext, temp, &list, oe_link) { - int rc; - - list_del_init(&ext->oe_link); - - /* extent may be in OES_ACTIVE state because inode mutex - * is released before osc_io_end() in file write case - */ - if (ext->oe_state != OES_TRUNC) - osc_extent_wait(env, ext, OES_TRUNC); - - rc = osc_extent_truncate(ext, index, partial); - if (rc < 0) { - if (result == 0) - result = rc; - - OSC_EXTENT_DUMP(D_ERROR, ext, - "truncate error %d\n", rc); - } else if (ext->oe_nr_pages == 0) { - osc_extent_remove(ext); - } else { - /* this must be an overlapped extent which means only - * part of pages in this extent have been truncated. - */ - EASSERTF(ext->oe_start <= index, ext, - "trunc index = %lu/%d.\n", index, partial); - /* fix index to skip this partially truncated extent */ - index = ext->oe_end + 1; - partial = false; - - /* we need to hold this extent in OES_TRUNC state so - * that no writeback will happen. This is to avoid - * BUG 17397. - * Only partial truncate can reach here, if @size is - * not zero, the caller should provide a valid @extp. - */ - LASSERT(!*extp); - *extp = osc_extent_get(ext); - OSC_EXTENT_DUMP(D_CACHE, ext, - "trunc at %llu\n", size); - } - osc_extent_put(env, ext); - } - if (waiting) { - int rc; - - /* ignore the result of osc_extent_wait the write initiator - * should take care of it. - */ - rc = osc_extent_wait(env, waiting, OES_INV); - if (rc < 0) - OSC_EXTENT_DUMP(D_CACHE, waiting, "error: %d.\n", rc); - - osc_extent_put(env, waiting); - waiting = NULL; - goto again; - } - return result; -} - -/** - * Called after osc_io_setattr_end to add oio->oi_trunc back to cache. - */ -void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext) -{ - if (ext) { - struct osc_object *obj = ext->oe_obj; - bool unplug = false; - - EASSERT(ext->oe_nr_pages > 0, ext); - EASSERT(ext->oe_state == OES_TRUNC, ext); - EASSERT(!ext->oe_urgent, ext); - - OSC_EXTENT_DUMP(D_CACHE, ext, "trunc -> cache.\n"); - osc_object_lock(obj); - osc_extent_state_set(ext, OES_CACHE); - if (ext->oe_fsync_wait && !ext->oe_urgent) { - ext->oe_urgent = 1; - list_move_tail(&ext->oe_link, &obj->oo_urgent_exts); - unplug = true; - } - osc_update_pending(obj, OBD_BRW_WRITE, ext->oe_nr_pages); - osc_object_unlock(obj); - osc_extent_put(env, ext); - - if (unplug) - osc_io_unplug_async(env, osc_cli(obj), obj); - } -} - -/** - * Wait for extents in a specific range to be written out. - * The caller must have called osc_cache_writeback_range() to issue IO - * otherwise it will take a long time for this function to finish. - * - * Caller must hold inode_mutex , or cancel exclusive dlm lock so that - * nobody else can dirty this range of file while we're waiting for - * extents to be written. - */ -int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, - pgoff_t start, pgoff_t end) -{ - struct osc_extent *ext; - pgoff_t index = start; - int result = 0; - -again: - osc_object_lock(obj); - ext = osc_extent_search(obj, index); - if (!ext) - ext = first_extent(obj); - else if (ext->oe_end < index) - ext = next_extent(ext); - while (ext) { - int rc; - - if (ext->oe_start > end) - break; - - if (!ext->oe_fsync_wait) { - ext = next_extent(ext); - continue; - } - - EASSERT(ergo(ext->oe_state == OES_CACHE, - ext->oe_hp || ext->oe_urgent), ext); - EASSERT(ergo(ext->oe_state == OES_ACTIVE, - !ext->oe_hp && ext->oe_urgent), ext); - - index = ext->oe_end + 1; - osc_extent_get(ext); - osc_object_unlock(obj); - - rc = osc_extent_wait(env, ext, OES_INV); - if (result == 0) - result = rc; - osc_extent_put(env, ext); - goto again; - } - osc_object_unlock(obj); - - OSC_IO_DEBUG(obj, "sync file range.\n"); - return result; -} - -/** - * Called to write out a range of osc object. - * - * @hp : should be set this is caused by lock cancel; - * @discard: is set if dirty pages should be dropped - file will be deleted or - * truncated, this implies there is no partially discarding extents. - * - * Return how many pages will be issued, or error code if error occurred. - */ -int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, - pgoff_t start, pgoff_t end, int hp, int discard) -{ - struct osc_extent *ext; - LIST_HEAD(discard_list); - bool unplug = false; - int result = 0; - - osc_object_lock(obj); - ext = osc_extent_search(obj, start); - if (!ext) - ext = first_extent(obj); - else if (ext->oe_end < start) - ext = next_extent(ext); - while (ext) { - if (ext->oe_start > end) - break; - - ext->oe_fsync_wait = 1; - switch (ext->oe_state) { - case OES_CACHE: - result += ext->oe_nr_pages; - if (!discard) { - struct list_head *list = NULL; - - if (hp) { - EASSERT(!ext->oe_hp, ext); - ext->oe_hp = 1; - list = &obj->oo_hp_exts; - } else if (!ext->oe_urgent) { - ext->oe_urgent = 1; - list = &obj->oo_urgent_exts; - } - if (list) - list_move_tail(&ext->oe_link, list); - unplug = true; - } else { - /* the only discarder is lock cancelling, so - * [start, end] must contain this extent - */ - EASSERT(ext->oe_start >= start && - ext->oe_max_end <= end, ext); - osc_extent_state_set(ext, OES_LOCKING); - ext->oe_owner = current; - list_move_tail(&ext->oe_link, &discard_list); - osc_update_pending(obj, OBD_BRW_WRITE, - -ext->oe_nr_pages); - } - break; - case OES_ACTIVE: - /* It's pretty bad to wait for ACTIVE extents, because - * we don't know how long we will wait for it to be - * flushed since it may be blocked at awaiting more - * grants. We do this for the correctness of fsync. - */ - LASSERT(hp == 0 && discard == 0); - ext->oe_urgent = 1; - break; - case OES_TRUNC: - /* this extent is being truncated, can't do anything - * for it now. it will be set to urgent after truncate - * is finished in osc_cache_truncate_end(). - */ - default: - break; - } - ext = next_extent(ext); - } - osc_object_unlock(obj); - - LASSERT(ergo(!discard, list_empty(&discard_list))); - if (!list_empty(&discard_list)) { - struct osc_extent *tmp; - int rc; - - osc_list_maint(osc_cli(obj), obj); - list_for_each_entry_safe(ext, tmp, &discard_list, oe_link) { - list_del_init(&ext->oe_link); - EASSERT(ext->oe_state == OES_LOCKING, ext); - - /* Discard caching pages. We don't actually write this - * extent out but we complete it as if we did. - */ - rc = osc_extent_make_ready(env, ext); - if (unlikely(rc < 0)) { - OSC_EXTENT_DUMP(D_ERROR, ext, - "make_ready returned %d\n", rc); - if (result >= 0) - result = rc; - } - - /* finish the extent as if the pages were sent */ - osc_extent_finish(env, ext, 0, 0); - } - } - - if (unplug) - osc_io_unplug(env, osc_cli(obj), obj); - - if (hp || discard) { - int rc; - - rc = osc_cache_wait_range(env, obj, start, end); - if (result >= 0 && rc < 0) - result = rc; - } - - OSC_IO_DEBUG(obj, "pageout [%lu, %lu], %d.\n", start, end, result); - return result; -} - -/** - * Returns a list of pages by a given [start, end] of \a obj. - * - * \param resched If not NULL, then we give up before hogging CPU for too - * long and set *resched = 1, in that case caller should implement a retry - * logic. - * - * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely - * crucial in the face of [offset, EOF] locks. - * - * Return at least one page in @queue unless there is no covered page. - */ -int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, - struct osc_object *osc, pgoff_t start, pgoff_t end, - osc_page_gang_cbt cb, void *cbdata) -{ - struct osc_page *ops; - void **pvec; - pgoff_t idx; - unsigned int nr; - unsigned int i; - unsigned int j; - int res = CLP_GANG_OKAY; - bool tree_lock = true; - - idx = start; - pvec = osc_env_info(env)->oti_pvec; - spin_lock(&osc->oo_tree_lock); - while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec, - idx, OTI_PVEC_SIZE)) > 0) { - struct cl_page *page; - bool end_of_region = false; - - for (i = 0, j = 0; i < nr; ++i) { - ops = pvec[i]; - pvec[i] = NULL; - - idx = osc_index(ops); - if (idx > end) { - end_of_region = true; - break; - } - - page = ops->ops_cl.cpl_page; - LASSERT(page->cp_type == CPT_CACHEABLE); - if (page->cp_state == CPS_FREEING) - continue; - - cl_page_get(page); - lu_ref_add_atomic(&page->cp_reference, - "gang_lookup", current); - pvec[j++] = ops; - } - ++idx; - - /* - * Here a delicate locking dance is performed. Current thread - * holds a reference to a page, but has to own it before it - * can be placed into queue. Owning implies waiting, so - * radix-tree lock is to be released. After a wait one has to - * check that pages weren't truncated (cl_page_own() returns - * error in the latter case). - */ - spin_unlock(&osc->oo_tree_lock); - tree_lock = false; - - for (i = 0; i < j; ++i) { - ops = pvec[i]; - if (res == CLP_GANG_OKAY) - res = (*cb)(env, io, ops, cbdata); - - page = ops->ops_cl.cpl_page; - lu_ref_del(&page->cp_reference, "gang_lookup", current); - cl_page_put(env, page); - } - if (nr < OTI_PVEC_SIZE || end_of_region) - break; - - if (res == CLP_GANG_OKAY && need_resched()) - res = CLP_GANG_RESCHED; - if (res != CLP_GANG_OKAY) - break; - - spin_lock(&osc->oo_tree_lock); - tree_lock = true; - } - if (tree_lock) - spin_unlock(&osc->oo_tree_lock); - return res; -} - -/** - * Check if page @page is covered by an extra lock or discard it. - */ -static int check_and_discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) -{ - struct osc_thread_info *info = osc_env_info(env); - struct osc_object *osc = cbdata; - pgoff_t index; - - index = osc_index(ops); - if (index >= info->oti_fn_index) { - struct ldlm_lock *tmp; - struct cl_page *page = ops->ops_cl.cpl_page; - - /* refresh non-overlapped index */ - tmp = osc_dlmlock_at_pgoff(env, osc, index, - OSC_DAP_FL_TEST_LOCK); - if (tmp) { - __u64 end = tmp->l_policy_data.l_extent.end; - /* Cache the first-non-overlapped index so as to skip - * all pages within [index, oti_fn_index). This is safe - * because if tmp lock is canceled, it will discard - * these pages. - */ - info->oti_fn_index = cl_index(osc2cl(osc), end + 1); - if (end == OBD_OBJECT_EOF) - info->oti_fn_index = CL_PAGE_EOF; - LDLM_LOCK_PUT(tmp); - } else if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); - } - } - - info->oti_next_index = index + 1; - return CLP_GANG_OKAY; -} - -static int discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) -{ - struct osc_thread_info *info = osc_env_info(env); - struct cl_page *page = ops->ops_cl.cpl_page; - - /* page is top page. */ - info->oti_next_index = osc_index(ops) + 1; - if (cl_page_own(env, io, page) == 0) { - if (page->cp_type == CPT_CACHEABLE && - PageDirty(cl_page_vmpage(page))) - CL_PAGE_DEBUG(D_ERROR, env, page, - "discard dirty page?\n"); - - /* discard the page */ - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); - } - - return CLP_GANG_OKAY; -} - -/** - * Discard pages protected by the given lock. This function traverses radix - * tree to find all covering pages and discard them. If a page is being covered - * by other locks, it should remain in cache. - * - * If error happens on any step, the process continues anyway (the reasoning - * behind this being that lock cancellation cannot be delayed indefinitely). - */ -int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, - pgoff_t start, pgoff_t end, enum cl_lock_mode mode) -{ - struct osc_thread_info *info = osc_env_info(env); - struct cl_io *io = &info->oti_io; - osc_page_gang_cbt cb; - int res; - int result; - - io->ci_obj = cl_object_top(osc2cl(osc)); - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result != 0) - goto out; - - cb = mode == CLM_READ ? check_and_discard_cb : discard_cb; - info->oti_fn_index = start; - info->oti_next_index = start; - do { - res = osc_page_gang_lookup(env, io, osc, - info->oti_next_index, end, cb, osc); - if (info->oti_next_index > end) - break; - - if (res == CLP_GANG_RESCHED) - cond_resched(); - } while (res != CLP_GANG_OKAY); -out: - cl_io_fini(env, io); - return result; -} - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h deleted file mode 100644 index 2d3cba16ef34..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ /dev/null @@ -1,681 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Internal interfaces of OSC layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#ifndef OSC_CL_INTERNAL_H -#define OSC_CL_INTERNAL_H - -#include -/* osc_build_res_name() */ -#include -#include "osc_internal.h" - -/** \defgroup osc osc - * @{ - */ - -struct osc_extent; - -/** - * State maintained by osc layer for each IO context. - */ -struct osc_io { - /** super class */ - struct cl_io_slice oi_cl; - /** true if this io is lockless. */ - unsigned int oi_lockless:1, - /** true if this io is counted as active IO */ - oi_is_active:1; - /** how many LRU pages are reserved for this IO */ - unsigned long oi_lru_reserved; - - /** active extents, we know how many bytes is going to be written, - * so having an active extent will prevent it from being fragmented - */ - struct osc_extent *oi_active; - /** partially truncated extent, we need to hold this extent to prevent - * page writeback from happening. - */ - struct osc_extent *oi_trunc; - - /** write osc_lock for this IO, used by osc_extent_find(). */ - struct osc_lock *oi_write_osclock; - struct obdo oi_oa; - struct osc_async_cbargs { - bool opc_rpc_sent; - int opc_rc; - struct completion opc_sync; - } oi_cbarg; -}; - -/** - * State maintained by osc layer for the duration of a system call. - */ -struct osc_session { - struct osc_io os_io; -}; - -#define OTI_PVEC_SIZE 256 -struct osc_thread_info { - struct ldlm_res_id oti_resname; - union ldlm_policy_data oti_policy; - struct cl_lock_descr oti_descr; - struct cl_attr oti_attr; - struct lustre_handle oti_handle; - struct cl_page_list oti_plist; - struct cl_io oti_io; - void *oti_pvec[OTI_PVEC_SIZE]; - /** - * Fields used by cl_lock_discard_pages(). - */ - pgoff_t oti_next_index; - pgoff_t oti_fn_index; /* first non-overlapped index */ - struct cl_sync_io oti_anchor; - struct cl_req_attr oti_req_attr; -}; - -struct osc_object { - struct cl_object oo_cl; - struct lov_oinfo *oo_oinfo; - /** - * True if locking against this stripe got -EUSERS. - */ - int oo_contended; - unsigned long oo_contention_time; - /** - * used by the osc to keep track of what objects to build into rpcs. - * Protected by client_obd->cli_loi_list_lock. - */ - struct list_head oo_ready_item; - struct list_head oo_hp_ready_item; - struct list_head oo_write_item; - struct list_head oo_read_item; - - /** - * extent is a red black tree to manage (async) dirty pages. - */ - struct rb_root oo_root; - /** - * Manage write(dirty) extents. - */ - struct list_head oo_hp_exts; /* list of hp extents */ - struct list_head oo_urgent_exts; /* list of writeback extents */ - struct list_head oo_rpc_exts; - - struct list_head oo_reading_exts; - - atomic_t oo_nr_reads; - atomic_t oo_nr_writes; - - /** Protect extent tree. Will be used to protect - * oo_{read|write}_pages soon. - */ - spinlock_t oo_lock; - - /** - * Radix tree for caching pages - */ - struct radix_tree_root oo_tree; - spinlock_t oo_tree_lock; - unsigned long oo_npages; - - /* Protect osc_lock this osc_object has */ - spinlock_t oo_ol_spin; - struct list_head oo_ol_list; - - /** number of active IOs of this object */ - atomic_t oo_nr_ios; - wait_queue_head_t oo_io_waitq; -}; - -static inline void osc_object_lock(struct osc_object *obj) -{ - spin_lock(&obj->oo_lock); -} - -static inline int osc_object_trylock(struct osc_object *obj) -{ - return spin_trylock(&obj->oo_lock); -} - -static inline void osc_object_unlock(struct osc_object *obj) -{ - spin_unlock(&obj->oo_lock); -} - -static inline int osc_object_is_locked(struct osc_object *obj) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) - return spin_is_locked(&obj->oo_lock); -#else - /* - * It is not perfect to return true all the time. - * But since this function is only used for assertion - * and checking, it seems OK. - */ - return 1; -#endif -} - -/* - * Lock "micro-states" for osc layer. - */ -enum osc_lock_state { - OLS_NEW, - OLS_ENQUEUED, - OLS_UPCALL_RECEIVED, - OLS_GRANTED, - OLS_CANCELLED -}; - -/** - * osc-private state of cl_lock. - * - * Interaction with DLM. - * - * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in - * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_dlmlock. - * - * This pointer is protected through a reference, acquired by - * osc_lock_upcall0(). Also, an additional reference is acquired by - * ldlm_lock_addref() call protecting the lock from cancellation, until - * osc_lock_unuse() releases it. - * - * Below is a description of how lock references are acquired and released - * inside of DLM. - * - * - When new lock is created and enqueued to the server (ldlm_cli_enqueue()) - * - ldlm_lock_create() - * - ldlm_lock_new(): initializes a lock with 2 references. One for - * the caller (released when reply from the server is received, or on - * error), and another for the hash table. - * - ldlm_lock_addref_internal(): protects the lock from cancellation. - * - * - When reply is received from the server (osc_enqueue_interpret()) - * - ldlm_cli_enqueue_fini() - * - LDLM_LOCK_PUT(): releases caller reference acquired by - * ldlm_lock_new(). - * - if (rc != 0) - * ldlm_lock_decref(): error case: matches ldlm_cli_enqueue(). - * - ldlm_lock_decref(): for async locks, matches ldlm_cli_enqueue(). - * - * - When lock is being cancelled (ldlm_lock_cancel()) - * - ldlm_lock_destroy() - * - LDLM_LOCK_PUT(): releases hash-table reference acquired by - * ldlm_lock_new(). - * - * osc_lock is detached from ldlm_lock by osc_lock_detach() that is called - * either when lock is cancelled (osc_lock_blocking()), or when locks is - * deleted without cancellation (e.g., from cl_locks_prune()). In the latter - * case ldlm lock remains in memory, and can be re-attached to osc_lock in the - * future. - */ -struct osc_lock { - struct cl_lock_slice ols_cl; - /** Internal lock to protect states, etc. */ - spinlock_t ols_lock; - /** Owner sleeps on this channel for state change */ - struct cl_sync_io *ols_owner; - /** waiting list for this lock to be cancelled */ - struct list_head ols_waiting_list; - /** wait entry of ols_waiting_list */ - struct list_head ols_wait_entry; - /** list entry for osc_object::oo_ol_list */ - struct list_head ols_nextlock_oscobj; - - /** underlying DLM lock */ - struct ldlm_lock *ols_dlmlock; - /** DLM flags with which osc_lock::ols_lock was enqueued */ - __u64 ols_flags; - /** osc_lock::ols_lock handle */ - struct lustre_handle ols_handle; - struct ldlm_enqueue_info ols_einfo; - enum osc_lock_state ols_state; - /** lock value block */ - struct ost_lvb ols_lvb; - - /** - * true, if ldlm_lock_addref() was called against - * osc_lock::ols_lock. This is used for sanity checking. - * - * \see osc_lock::ols_has_ref - */ - unsigned ols_hold :1, - /** - * this is much like osc_lock::ols_hold, except that this bit is - * cleared _after_ reference in released in osc_lock_unuse(). This - * fine distinction is needed because: - * - * - if ldlm lock still has a reference, osc_ast_data_get() needs - * to return associated cl_lock (so that a flag is needed that is - * cleared after ldlm_lock_decref() returned), and - * - * - ldlm_lock_decref() can invoke blocking ast (for a - * LDLM_FL_CBPENDING lock), and osc_lock functions like - * osc_lock_cancel() called from there need to know whether to - * release lock reference (so that a flag is needed that is - * cleared before ldlm_lock_decref() is called). - */ - ols_has_ref:1, - /** - * inherit the lockless attribute from top level cl_io. - * If true, osc_lock_enqueue is able to tolerate the -EUSERS error. - */ - ols_locklessable:1, - /** - * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat - * the EVAVAIL error as tolerable, this will make upper logic happy - * to wait all glimpse locks to each OSTs to be completed. - * Glimpse lock converts to normal lock if the server lock is - * granted. - * Glimpse lock should be destroyed immediately after use. - */ - ols_glimpse:1, - /** - * For async glimpse lock. - */ - ols_agl:1; -}; - -/** - * Page state private for osc layer. - */ -struct osc_page { - struct cl_page_slice ops_cl; - /** - * Page queues used by osc to detect when RPC can be formed. - */ - struct osc_async_page ops_oap; - /** - * An offset within page from which next transfer starts. This is used - * by cl_page_clip() to submit partial page transfers. - */ - int ops_from; - /** - * An offset within page at which next transfer ends. - * - * \see osc_page::ops_from. - */ - int ops_to; - /** - * Boolean, true iff page is under transfer. Used for sanity checking. - */ - unsigned ops_transfer_pinned:1, - /** - * in LRU? - */ - ops_in_lru:1, - /** - * Set if the page must be transferred with OBD_BRW_SRVLOCK. - */ - ops_srvlock:1; - /** - * lru page list. See osc_lru_{del|use}() in osc_page.c for usage. - */ - struct list_head ops_lru; - /** - * Submit time - the time when the page is starting RPC. For debugging. - */ - unsigned long ops_submit_time; -}; - -extern struct kmem_cache *osc_lock_kmem; -extern struct kmem_cache *osc_object_kmem; -extern struct kmem_cache *osc_thread_kmem; -extern struct kmem_cache *osc_session_kmem; -extern struct kmem_cache *osc_extent_kmem; - -extern struct lu_device_type osc_device_type; -extern struct lu_context_key osc_key; -extern struct lu_context_key osc_session_key; - -#define OSC_FLAGS (ASYNC_URGENT | ASYNC_READY) - -int osc_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io); -int osc_io_init(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io); -struct lu_object *osc_object_alloc(const struct lu_env *env, - const struct lu_object_header *hdr, - struct lu_device *dev); -int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t ind); - -void osc_index2policy(union ldlm_policy_data *policy, - const struct cl_object *obj, - pgoff_t start, pgoff_t end); -int osc_lvb_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct ost_lvb *lvb); - -void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); -void osc_page_submit(const struct lu_env *env, struct osc_page *opg, - enum cl_req_type crt, int brw_flags); -int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops); -int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg, - u32 async_flags); -int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops, - struct page *page, loff_t offset); -int osc_queue_async_io(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops); -int osc_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io); -int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj, - struct osc_page *ops); -int osc_flush_async_page(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops); -int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj, - struct list_head *list, int cmd, int brw_flags); -int osc_cache_truncate_start(const struct lu_env *env, struct osc_object *obj, - u64 size, struct osc_extent **extp); -void osc_cache_truncate_end(const struct lu_env *env, struct osc_extent *ext); -int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj, - pgoff_t start, pgoff_t end, int hp, int discard); -int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj, - pgoff_t start, pgoff_t end); -void osc_io_unplug(const struct lu_env *env, struct client_obd *cli, - struct osc_object *osc); -int lru_queue_work(const struct lu_env *env, void *data); - -void osc_object_set_contended(struct osc_object *obj); -void osc_object_clear_contended(struct osc_object *obj); -int osc_object_is_contended(struct osc_object *obj); - -int osc_lock_is_lockless(const struct osc_lock *olck); - -/***************************************************************************** - * - * Accessors. - * - */ - -static inline struct osc_thread_info *osc_env_info(const struct lu_env *env) -{ - struct osc_thread_info *info; - - info = lu_context_key_get(&env->le_ctx, &osc_key); - LASSERT(info); - return info; -} - -static inline struct osc_session *osc_env_session(const struct lu_env *env) -{ - struct osc_session *ses; - - ses = lu_context_key_get(env->le_ses, &osc_session_key); - LASSERT(ses); - return ses; -} - -static inline struct osc_io *osc_env_io(const struct lu_env *env) -{ - return &osc_env_session(env)->os_io; -} - -static inline int osc_is_object(const struct lu_object *obj) -{ - return obj->lo_dev->ld_type == &osc_device_type; -} - -static inline struct osc_device *lu2osc_dev(const struct lu_device *d) -{ - LINVRNT(d->ld_type == &osc_device_type); - return container_of(d, struct osc_device, od_cl.cd_lu_dev); -} - -static inline struct obd_export *osc_export(const struct osc_object *obj) -{ - return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp; -} - -static inline struct client_obd *osc_cli(const struct osc_object *obj) -{ - return &osc_export(obj)->exp_obd->u.cli; -} - -static inline struct osc_object *cl2osc(const struct cl_object *obj) -{ - LINVRNT(osc_is_object(&obj->co_lu)); - return container_of(obj, struct osc_object, oo_cl); -} - -static inline struct cl_object *osc2cl(const struct osc_object *obj) -{ - return (struct cl_object *)&obj->oo_cl; -} - -static inline enum ldlm_mode osc_cl_lock2ldlm(enum cl_lock_mode mode) -{ - LASSERT(mode == CLM_READ || mode == CLM_WRITE || mode == CLM_GROUP); - if (mode == CLM_READ) - return LCK_PR; - else if (mode == CLM_WRITE) - return LCK_PW; - else - return LCK_GROUP; -} - -static inline enum cl_lock_mode osc_ldlm2cl_lock(enum ldlm_mode mode) -{ - LASSERT(mode == LCK_PR || mode == LCK_PW || mode == LCK_GROUP); - if (mode == LCK_PR) - return CLM_READ; - else if (mode == LCK_PW) - return CLM_WRITE; - else - return CLM_GROUP; -} - -static inline struct osc_page *cl2osc_page(const struct cl_page_slice *slice) -{ - LINVRNT(osc_is_object(&slice->cpl_obj->co_lu)); - return container_of(slice, struct osc_page, ops_cl); -} - -static inline struct osc_page *oap2osc(struct osc_async_page *oap) -{ - return container_of_safe(oap, struct osc_page, ops_oap); -} - -static inline pgoff_t osc_index(struct osc_page *opg) -{ - return opg->ops_cl.cpl_index; -} - -static inline struct cl_page *oap2cl_page(struct osc_async_page *oap) -{ - return oap2osc(oap)->ops_cl.cpl_page; -} - -static inline struct osc_page *oap2osc_page(struct osc_async_page *oap) -{ - return (struct osc_page *)container_of(oap, struct osc_page, ops_oap); -} - -static inline struct osc_page * -osc_cl_page_osc(struct cl_page *page, struct osc_object *osc) -{ - const struct cl_page_slice *slice; - - LASSERT(osc); - slice = cl_object_page_slice(&osc->oo_cl, page); - return cl2osc_page(slice); -} - -static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice) -{ - LINVRNT(osc_is_object(&slice->cls_obj->co_lu)); - return container_of(slice, struct osc_lock, ols_cl); -} - -static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock) -{ - return cl2osc_lock(cl_lock_at(lock, &osc_device_type)); -} - -static inline int osc_io_srvlock(struct osc_io *oio) -{ - return (oio->oi_lockless && !oio->oi_cl.cis_io->ci_no_srvlock); -} - -enum osc_extent_state { - OES_INV = 0, /** extent is just initialized or destroyed */ - OES_ACTIVE = 1, /** process is using this extent */ - OES_CACHE = 2, /** extent is ready for IO */ - OES_LOCKING = 3, /** locking page to prepare IO */ - OES_LOCK_DONE = 4, /** locking finished, ready to send */ - OES_RPC = 5, /** in RPC */ - OES_TRUNC = 6, /** being truncated */ - OES_STATE_MAX -}; - -/** - * osc_extent data to manage dirty pages. - * osc_extent has the following attributes: - * 1. all pages in the same must be in one RPC in write back; - * 2. # of pages must be less than max_pages_per_rpc - implied by 1; - * 3. must be covered by only 1 osc_lock; - * 4. exclusive. It's impossible to have overlapped osc_extent. - * - * The lifetime of an extent is from when the 1st page is dirtied to when - * all pages inside it are written out. - * - * LOCKING ORDER - * ============= - * page lock -> cl_loi_list_lock -> object lock(osc_object::oo_lock) - */ -struct osc_extent { - /** red-black tree node */ - struct rb_node oe_node; - /** osc_object of this extent */ - struct osc_object *oe_obj; - /** refcount, removed from red-black tree if reaches zero. */ - atomic_t oe_refc; - /** busy if non-zero */ - atomic_t oe_users; - /** link list of osc_object's oo_{hp|urgent|locking}_exts. */ - struct list_head oe_link; - /** state of this extent */ - enum osc_extent_state oe_state; - /** flags for this extent. */ - unsigned int oe_intree:1, - /** 0 is write, 1 is read */ - oe_rw:1, - /** sync extent, queued by osc_queue_sync_pages() */ - oe_sync:1, - /** set if this extent has partial, sync pages. - * Extents with partial page(s) can't merge with others in RPC - */ - oe_no_merge:1, - oe_srvlock:1, - oe_memalloc:1, - /** an ACTIVE extent is going to be truncated, so when this extent - * is released, it will turn into TRUNC state instead of CACHE. - */ - oe_trunc_pending:1, - /** this extent should be written asap and someone may wait for the - * write to finish. This bit is usually set along with urgent if - * the extent was CACHE state. - * fsync_wait extent can't be merged because new extent region may - * exceed fsync range. - */ - oe_fsync_wait:1, - /** covering lock is being canceled */ - oe_hp:1, - /** this extent should be written back asap. set if one of pages is - * called by page WB daemon, or sync write or reading requests. - */ - oe_urgent:1; - /** how many grants allocated for this extent. - * Grant allocated for this extent. There is no grant allocated - * for reading extents and sync write extents. - */ - unsigned int oe_grants; - /** # of dirty pages in this extent */ - unsigned int oe_nr_pages; - /** list of pending oap pages. Pages in this list are NOT sorted. */ - struct list_head oe_pages; - /** Since an extent has to be written out in atomic, this is used to - * remember the next page need to be locked to write this extent out. - * Not used right now. - */ - struct osc_page *oe_next_page; - /** start and end index of this extent, include start and end - * themselves. Page offset here is the page index of osc_pages. - * oe_start is used as keyword for red-black tree. - */ - pgoff_t oe_start; - pgoff_t oe_end; - /** maximum ending index of this extent, this is limited by - * max_pages_per_rpc, lock extent and chunk size. - */ - pgoff_t oe_max_end; - /** waitqueue - for those who want to be notified if this extent's - * state has changed. - */ - wait_queue_head_t oe_waitq; - /** lock covering this extent */ - struct ldlm_lock *oe_dlmlock; - /** terminator of this extent. Must be true if this extent is in IO. */ - struct task_struct *oe_owner; - /** return value of writeback. If somebody is waiting for this extent, - * this value can be known by outside world. - */ - int oe_rc; - /** max pages per rpc when this extent was created */ - unsigned int oe_mppr; -}; - -int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext, - int sent, int rc); -void osc_extent_release(const struct lu_env *env, struct osc_extent *ext); - -int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, - pgoff_t start, pgoff_t end, enum cl_lock_mode mode); - -typedef int (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, - struct osc_page *, void *); -int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, - struct osc_object *osc, pgoff_t start, pgoff_t end, - osc_page_gang_cbt cb, void *cbdata); -/** @} osc */ - -#endif /* OSC_CL_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_dev.c b/drivers/staging/lustre/lustre/osc/osc_dev.c deleted file mode 100644 index 2b5f324743e2..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_dev.c +++ /dev/null @@ -1,246 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_device, for OSC layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_OSC - -/* class_name2obd() */ -#include - -#include "osc_cl_internal.h" - -/** \addtogroup osc - * @{ - */ - -struct kmem_cache *osc_lock_kmem; -struct kmem_cache *osc_object_kmem; -struct kmem_cache *osc_thread_kmem; -struct kmem_cache *osc_session_kmem; -struct kmem_cache *osc_extent_kmem; -struct kmem_cache *osc_quota_kmem; - -struct lu_kmem_descr osc_caches[] = { - { - .ckd_cache = &osc_lock_kmem, - .ckd_name = "osc_lock_kmem", - .ckd_size = sizeof(struct osc_lock) - }, - { - .ckd_cache = &osc_object_kmem, - .ckd_name = "osc_object_kmem", - .ckd_size = sizeof(struct osc_object) - }, - { - .ckd_cache = &osc_thread_kmem, - .ckd_name = "osc_thread_kmem", - .ckd_size = sizeof(struct osc_thread_info) - }, - { - .ckd_cache = &osc_session_kmem, - .ckd_name = "osc_session_kmem", - .ckd_size = sizeof(struct osc_session) - }, - { - .ckd_cache = &osc_extent_kmem, - .ckd_name = "osc_extent_kmem", - .ckd_size = sizeof(struct osc_extent) - }, - { - .ckd_cache = &osc_quota_kmem, - .ckd_name = "osc_quota_kmem", - .ckd_size = sizeof(struct osc_quota_info) - }, - { - .ckd_cache = NULL - } -}; - -/***************************************************************************** - * - * Type conversions. - * - */ - -static struct lu_device *osc2lu_dev(struct osc_device *osc) -{ - return &osc->od_cl.cd_lu_dev; -} - -/***************************************************************************** - * - * Osc device and device type functions. - * - */ - -static void *osc_key_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct osc_thread_info *info; - - info = kmem_cache_zalloc(osc_thread_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void osc_key_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct osc_thread_info *info = data; - - kmem_cache_free(osc_thread_kmem, info); -} - -struct lu_context_key osc_key = { - .lct_tags = LCT_CL_THREAD, - .lct_init = osc_key_init, - .lct_fini = osc_key_fini -}; - -static void *osc_session_init(const struct lu_context *ctx, - struct lu_context_key *key) -{ - struct osc_session *info; - - info = kmem_cache_zalloc(osc_session_kmem, GFP_NOFS); - if (!info) - info = ERR_PTR(-ENOMEM); - return info; -} - -static void osc_session_fini(const struct lu_context *ctx, - struct lu_context_key *key, void *data) -{ - struct osc_session *info = data; - - kmem_cache_free(osc_session_kmem, info); -} - -struct lu_context_key osc_session_key = { - .lct_tags = LCT_SESSION, - .lct_init = osc_session_init, - .lct_fini = osc_session_fini -}; - -/* type constructor/destructor: osc_type_{init,fini,start,stop}(). */ -LU_TYPE_INIT_FINI(osc, &osc_key, &osc_session_key); - -static int osc_cl_process_config(const struct lu_env *env, - struct lu_device *d, struct lustre_cfg *cfg) -{ - return osc_process_config_base(d->ld_obd, cfg); -} - -static const struct lu_device_operations osc_lu_ops = { - .ldo_object_alloc = osc_object_alloc, - .ldo_process_config = osc_cl_process_config, - .ldo_recovery_complete = NULL -}; - -static int osc_device_init(const struct lu_env *env, struct lu_device *d, - const char *name, struct lu_device *next) -{ - return 0; -} - -static struct lu_device *osc_device_fini(const struct lu_env *env, - struct lu_device *d) -{ - return NULL; -} - -static struct lu_device *osc_device_free(const struct lu_env *env, - struct lu_device *d) -{ - struct osc_device *od = lu2osc_dev(d); - - cl_device_fini(lu2cl_dev(d)); - kfree(od); - return NULL; -} - -static struct lu_device *osc_device_alloc(const struct lu_env *env, - struct lu_device_type *t, - struct lustre_cfg *cfg) -{ - struct lu_device *d; - struct osc_device *od; - struct obd_device *obd; - int rc; - - od = kzalloc(sizeof(*od), GFP_NOFS); - if (!od) - return ERR_PTR(-ENOMEM); - - cl_device_init(&od->od_cl, t); - d = osc2lu_dev(od); - d->ld_ops = &osc_lu_ops; - - /* Setup OSC OBD */ - obd = class_name2obd(lustre_cfg_string(cfg, 0)); - LASSERT(obd); - rc = osc_setup(obd, cfg); - if (rc) { - osc_device_free(env, d); - return ERR_PTR(rc); - } - od->od_exp = obd->obd_self_export; - return d; -} - -static const struct lu_device_type_operations osc_device_type_ops = { - .ldto_init = osc_type_init, - .ldto_fini = osc_type_fini, - - .ldto_start = osc_type_start, - .ldto_stop = osc_type_stop, - - .ldto_device_alloc = osc_device_alloc, - .ldto_device_free = osc_device_free, - - .ldto_device_init = osc_device_init, - .ldto_device_fini = osc_device_fini -}; - -struct lu_device_type osc_device_type = { - .ldt_tags = LU_DEVICE_CL, - .ldt_name = LUSTRE_OSC_NAME, - .ldt_ops = &osc_device_type_ops, - .ldt_ctx_tags = LCT_CL_THREAD -}; - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h deleted file mode 100644 index 4ddba1354bef..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ /dev/null @@ -1,237 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#ifndef OSC_INTERNAL_H -#define OSC_INTERNAL_H - -#define OAP_MAGIC 8675309 - -extern atomic_t osc_pool_req_count; -extern unsigned int osc_reqpool_maxreqcount; -extern struct ptlrpc_request_pool *osc_rq_pool; - -struct lu_env; - -enum async_flags { - ASYNC_READY = 0x1, /* ap_make_ready will not be called before this - * page is added to an rpc - */ - ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ - ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called - * to give the caller a chance to update - * or cancel the size of the io - */ - ASYNC_HP = 0x10, -}; - -struct osc_async_page { - int oap_magic; - unsigned short oap_cmd; - unsigned short oap_interrupted:1; - - struct list_head oap_pending_item; - struct list_head oap_rpc_item; - - u64 oap_obj_off; - unsigned int oap_page_off; - enum async_flags oap_async_flags; - - struct brw_page oap_brw_page; - - struct ptlrpc_request *oap_request; - struct client_obd *oap_cli; - struct osc_object *oap_obj; - - spinlock_t oap_lock; -}; - -#define oap_page oap_brw_page.pg -#define oap_count oap_brw_page.count -#define oap_brw_flags oap_brw_page.flag - -static inline struct osc_async_page *brw_page2oap(struct brw_page *pga) -{ - return (struct osc_async_page *)container_of(pga, struct osc_async_page, - oap_brw_page); -} - -struct osc_cache_waiter { - struct list_head ocw_entry; - wait_queue_head_t ocw_waitq; - struct osc_async_page *ocw_oap; - int ocw_grant; - int ocw_rc; -}; - -void osc_wake_cache_waiters(struct client_obd *cli); -int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes); -void osc_update_next_shrink(struct client_obd *cli); - -/* - * cl integration. - */ -#include - -extern struct ptlrpc_request_set *PTLRPCD_SET; - -typedef int (*osc_enqueue_upcall_f)(void *cookie, struct lustre_handle *lockh, - int rc); - -int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u64 *flags, union ldlm_policy_data *policy, - struct ost_lvb *lvb, int kms_valid, - osc_enqueue_upcall_f upcall, - void *cookie, struct ldlm_enqueue_info *einfo, - struct ptlrpc_request_set *rqset, int async, int agl); - -int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - enum ldlm_type type, union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 *flags, void *data, - struct lustre_handle *lockh, int unref); - -int osc_setattr_async(struct obd_export *exp, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); -int osc_punch_base(struct obd_export *exp, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); -int osc_sync_base(struct osc_object *exp, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset); - -int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg); -int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, - struct list_head *ext_list, int cmd); -long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, - long target, bool force); -unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages); -void osc_lru_unreserve(struct client_obd *cli, unsigned long npages); - -unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); - -int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg); - -void lproc_osc_attach_seqstat(struct obd_device *dev); -void lprocfs_osc_init_vars(struct lprocfs_static_vars *lvars); - -extern struct lu_device_type osc_device_type; - -static inline int osc_recoverable_error(int rc) -{ - return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || - rc == -EAGAIN || rc == -EINPROGRESS); -} - -static inline unsigned long rpcs_in_flight(struct client_obd *cli) -{ - return cli->cl_r_in_flight + cli->cl_w_in_flight; -} - -static inline char *cli_name(struct client_obd *cli) -{ - return cli->cl_import->imp_obd->obd_name; -} - -struct osc_device { - struct cl_device od_cl; - struct obd_export *od_exp; - - /* Write stats is actually protected by client_obd's lock. */ - struct osc_stats { - u64 os_lockless_writes; /* by bytes */ - u64 os_lockless_reads; /* by bytes */ - u64 os_lockless_truncates; /* by times */ - } od_stats; - - /* configuration item(s) */ - int od_contention_time; - int od_lockless_truncate; -}; - -static inline struct osc_device *obd2osc_dev(const struct obd_device *d) -{ - return container_of_safe(d->obd_lu_dev, struct osc_device, od_cl.cd_lu_dev); -} - -extern struct lu_kmem_descr osc_caches[]; - -extern struct kmem_cache *osc_quota_kmem; -struct osc_quota_info { - /** linkage for quota hash table */ - struct rhash_head oqi_hash; - u32 oqi_id; - struct rcu_head rcu; -}; - -int osc_quota_setup(struct obd_device *obd); -int osc_quota_cleanup(struct obd_device *obd); -int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], - u32 valid, u32 flags); -int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]); -int osc_quotactl(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl); -void osc_inc_unstable_pages(struct ptlrpc_request *req); -void osc_dec_unstable_pages(struct ptlrpc_request *req); -bool osc_over_unstable_soft_limit(struct client_obd *cli); - -/** - * Bit flags for osc_dlm_lock_at_pageoff(). - */ -enum osc_dap_flags { - /** - * Just check if the desired lock exists, it won't hold reference - * count on lock. - */ - OSC_DAP_FL_TEST_LOCK = BIT(0), - /** - * Return the lock even if it is being canceled. - */ - OSC_DAP_FL_CANCELING = BIT(1), -}; - -struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, - struct osc_object *obj, pgoff_t index, - enum osc_dap_flags flags); - -int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc); - -/** osc shrink list to link all osc client obd */ -extern struct list_head osc_shrink_list; -/** spin lock to protect osc_shrink_list */ -extern spinlock_t osc_shrink_lock; -unsigned long osc_cache_shrink_count(struct shrinker *sk, - struct shrink_control *sc); -unsigned long osc_cache_shrink_scan(struct shrinker *sk, - struct shrink_control *sc); - -#endif /* OSC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c deleted file mode 100644 index 67734a8ed331..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ /dev/null @@ -1,918 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_io for OSC layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_OSC - -#include - -#include "osc_cl_internal.h" - -/** \addtogroup osc - * @{ - */ - -/***************************************************************************** - * - * Type conversions. - * - */ - -static struct osc_io *cl2osc_io(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct osc_io *oio = container_of_safe(slice, struct osc_io, oi_cl); - - LINVRNT(oio == osc_env_io(env)); - return oio; -} - -/***************************************************************************** - * - * io operations. - * - */ - -static void osc_io_fini(const struct lu_env *env, const struct cl_io_slice *io) -{ -} - -static void osc_read_ahead_release(const struct lu_env *env, void *cbdata) -{ - struct ldlm_lock *dlmlock = cbdata; - struct lustre_handle lockh; - - ldlm_lock2handle(dlmlock, &lockh); - ldlm_lock_decref(&lockh, LCK_PR); - LDLM_LOCK_PUT(dlmlock); -} - -static int osc_io_read_ahead(const struct lu_env *env, - const struct cl_io_slice *ios, - pgoff_t start, struct cl_read_ahead *ra) -{ - struct osc_object *osc = cl2osc(ios->cis_obj); - struct ldlm_lock *dlmlock; - int result = -ENODATA; - - dlmlock = osc_dlmlock_at_pgoff(env, osc, start, 0); - if (dlmlock) { - LASSERT(dlmlock->l_ast_data == osc); - if (dlmlock->l_req_mode != LCK_PR) { - struct lustre_handle lockh; - - ldlm_lock2handle(dlmlock, &lockh); - ldlm_lock_addref(&lockh, LCK_PR); - ldlm_lock_decref(&lockh, dlmlock->l_req_mode); - } - - ra->cra_rpc_size = osc_cli(osc)->cl_max_pages_per_rpc; - ra->cra_end = cl_index(osc2cl(osc), - dlmlock->l_policy_data.l_extent.end); - ra->cra_release = osc_read_ahead_release; - ra->cra_cbdata = dlmlock; - result = 0; - } - - return result; -} - -/** - * An implementation of cl_io_operations::cio_io_submit() method for osc - * layer. Iterates over pages in the in-queue, prepares each for io by calling - * cl_page_prep() and then either submits them through osc_io_submit_page() - * or, if page is already submitted, changes osc flags through - * osc_set_async_flags(). - */ -static int osc_io_submit(const struct lu_env *env, - const struct cl_io_slice *ios, - enum cl_req_type crt, struct cl_2queue *queue) -{ - struct cl_page *page; - struct cl_page *tmp; - struct client_obd *cli = NULL; - struct osc_object *osc = NULL; /* to keep gcc happy */ - struct osc_page *opg; - struct cl_io *io; - LIST_HEAD(list); - - struct cl_page_list *qin = &queue->c2_qin; - struct cl_page_list *qout = &queue->c2_qout; - unsigned int queued = 0; - int result = 0; - int cmd; - int brw_flags; - unsigned int max_pages; - - LASSERT(qin->pl_nr > 0); - - CDEBUG(D_CACHE | D_READA, "%d %d\n", qin->pl_nr, crt); - - osc = cl2osc(ios->cis_obj); - cli = osc_cli(osc); - max_pages = cli->cl_max_pages_per_rpc; - - cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; - brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0; - - /* - * NOTE: here @page is a top-level page. This is done to avoid - * creation of sub-page-list. - */ - cl_page_list_for_each_safe(page, tmp, qin) { - struct osc_async_page *oap; - - /* Top level IO. */ - io = page->cp_owner; - LASSERT(io); - - opg = osc_cl_page_osc(page, osc); - oap = &opg->ops_oap; - LASSERT(osc == oap->oap_obj); - - if (!list_empty(&oap->oap_pending_item) || - !list_empty(&oap->oap_rpc_item)) { - CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", - oap, opg); - result = -EBUSY; - break; - } - - result = cl_page_prep(env, io, page, crt); - if (result != 0) { - LASSERT(result < 0); - if (result != -EALREADY) - break; - /* - * Handle -EALREADY error: for read case, the page is - * already in UPTODATE state; for write, the page - * is not dirty. - */ - result = 0; - continue; - } - - spin_lock(&oap->oap_lock); - oap->oap_async_flags = ASYNC_URGENT | ASYNC_READY; - oap->oap_async_flags |= ASYNC_COUNT_STABLE; - spin_unlock(&oap->oap_lock); - - osc_page_submit(env, opg, crt, brw_flags); - list_add_tail(&oap->oap_pending_item, &list); - - if (page->cp_sync_io) - cl_page_list_move(qout, qin, page); - else /* async IO */ - cl_page_list_del(env, qin, page); - - if (++queued == max_pages) { - queued = 0; - result = osc_queue_sync_pages(env, osc, &list, cmd, - brw_flags); - if (result < 0) - break; - } - } - - if (queued > 0) - result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); - - /* Update c/mtime for sync write. LU-7310 */ - if (qout->pl_nr > 0 && !result) { - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - struct cl_object *obj = ios->cis_obj; - - cl_object_attr_lock(obj); - attr->cat_mtime = ktime_get_real_seconds(); - attr->cat_ctime = attr->cat_mtime; - cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME); - cl_object_attr_unlock(obj); - } - - CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); - return qout->pl_nr > 0 ? 0 : result; -} - -/** - * This is called when a page is accessed within file in a way that creates - * new page, if one were missing (i.e., if there were a hole at that place in - * the file, or accessed page is beyond the current file size). - * - * Expand stripe KMS if necessary. - */ -static void osc_page_touch_at(const struct lu_env *env, - struct cl_object *obj, pgoff_t idx, size_t to) -{ - struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - int valid; - __u64 kms; - - /* offset within stripe */ - kms = cl_offset(obj, idx) + to; - - cl_object_attr_lock(obj); - /* - * XXX old code used - * - * ll_inode_size_lock(inode, 0); lov_stripe_lock(lsm); - * - * here - */ - CDEBUG(D_INODE, "stripe KMS %sincreasing %llu->%llu %llu\n", - kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms, - loi->loi_lvb.lvb_size); - - attr->cat_ctime = ktime_get_real_seconds(); - attr->cat_mtime = attr->cat_ctime; - valid = CAT_MTIME | CAT_CTIME; - if (kms > loi->loi_kms) { - attr->cat_kms = kms; - valid |= CAT_KMS; - } - if (kms > loi->loi_lvb.lvb_size) { - attr->cat_size = kms; - valid |= CAT_SIZE; - } - cl_object_attr_update(env, obj, attr, valid); - cl_object_attr_unlock(obj); -} - -static int osc_io_commit_async(const struct lu_env *env, - const struct cl_io_slice *ios, - struct cl_page_list *qin, int from, int to, - cl_commit_cbt cb) -{ - struct cl_io *io = ios->cis_io; - struct osc_io *oio = cl2osc_io(env, ios); - struct osc_object *osc = cl2osc(ios->cis_obj); - struct cl_page *page; - struct cl_page *last_page; - struct osc_page *opg; - int result = 0; - - LASSERT(qin->pl_nr > 0); - - /* Handle partial page cases */ - last_page = cl_page_list_last(qin); - if (oio->oi_lockless) { - page = cl_page_list_first(qin); - if (page == last_page) { - cl_page_clip(env, page, from, to); - } else { - if (from != 0) - cl_page_clip(env, page, from, PAGE_SIZE); - if (to != PAGE_SIZE) - cl_page_clip(env, last_page, 0, to); - } - } - - while (qin->pl_nr > 0) { - struct osc_async_page *oap; - - page = cl_page_list_first(qin); - opg = osc_cl_page_osc(page, osc); - oap = &opg->ops_oap; - - if (!list_empty(&oap->oap_rpc_item)) { - CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", - oap, opg); - result = -EBUSY; - break; - } - - /* The page may be already in dirty cache. */ - if (list_empty(&oap->oap_pending_item)) { - result = osc_page_cache_add(env, &opg->ops_cl, io); - if (result != 0) - break; - } - - osc_page_touch_at(env, osc2cl(osc), osc_index(opg), - page == last_page ? to : PAGE_SIZE); - - cl_page_list_del(env, qin, page); - - (*cb)(env, io, page); - /* Can't access page any more. Page can be in transfer and - * complete at any time. - */ - } - - /* for sync write, kernel will wait for this page to be flushed before - * osc_io_end() is called, so release it earlier. - * for mkwrite(), it's known there is no further pages. - */ - if (cl_io_is_sync_write(io) && oio->oi_active) { - osc_extent_release(env, oio->oi_active); - oio->oi_active = NULL; - } - - CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result); - return result; -} - -static int osc_io_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct osc_object *osc = cl2osc(ios->cis_obj); - struct obd_import *imp = osc_cli(osc)->cl_import; - int rc = -EIO; - - spin_lock(&imp->imp_lock); - if (likely(!imp->imp_invalid)) { - struct osc_io *oio = osc_env_io(env); - - atomic_inc(&osc->oo_nr_ios); - oio->oi_is_active = 1; - rc = 0; - } - spin_unlock(&imp->imp_lock); - - return rc; -} - -static int osc_io_write_iter_init(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io = ios->cis_io; - struct osc_io *oio = osc_env_io(env); - struct osc_object *osc = cl2osc(ios->cis_obj); - unsigned long npages; - - if (cl_io_is_append(io)) - return osc_io_iter_init(env, ios); - - npages = io->u.ci_rw.crw_count >> PAGE_SHIFT; - if (io->u.ci_rw.crw_pos & ~PAGE_MASK) - ++npages; - - oio->oi_lru_reserved = osc_lru_reserve(osc_cli(osc), npages); - - return osc_io_iter_init(env, ios); -} - -static void osc_io_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct osc_io *oio = osc_env_io(env); - - if (oio->oi_is_active) { - struct osc_object *osc = cl2osc(ios->cis_obj); - - oio->oi_is_active = 0; - LASSERT(atomic_read(&osc->oo_nr_ios) > 0); - if (atomic_dec_and_test(&osc->oo_nr_ios)) - wake_up_all(&osc->oo_io_waitq); - } -} - -static void osc_io_write_iter_fini(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct osc_io *oio = osc_env_io(env); - struct osc_object *osc = cl2osc(ios->cis_obj); - - if (oio->oi_lru_reserved > 0) { - osc_lru_unreserve(osc_cli(osc), oio->oi_lru_reserved); - oio->oi_lru_reserved = 0; - } - oio->oi_write_osclock = NULL; - - osc_io_iter_fini(env, ios); -} - -static int osc_io_fault_start(const struct lu_env *env, - const struct cl_io_slice *ios) -{ - struct cl_io *io; - struct cl_fault_io *fio; - - io = ios->cis_io; - fio = &io->u.ci_fault; - CDEBUG(D_INFO, "%lu %d %zu\n", - fio->ft_index, fio->ft_writable, fio->ft_nob); - /* - * If mapping is writeable, adjust kms to cover this page, - * but do not extend kms beyond actual file size. - * See bug 10919. - */ - if (fio->ft_writable) - osc_page_touch_at(env, ios->cis_obj, - fio->ft_index, fio->ft_nob); - return 0; -} - -static int osc_async_upcall(void *a, int rc) -{ - struct osc_async_cbargs *args = a; - - args->opc_rc = rc; - complete(&args->opc_sync); - return 0; -} - -/** - * Checks that there are no pages being written in the extent being truncated. - */ -static int trunc_check_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) -{ - struct cl_page *page = ops->ops_cl.cpl_page; - struct osc_async_page *oap; - __u64 start = *(__u64 *)cbdata; - - oap = &ops->ops_oap; - if (oap->oap_cmd & OBD_BRW_WRITE && - !list_empty(&oap->oap_pending_item)) - CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", - start, current->comm); - - if (PageLocked(page->cp_vmpage)) - CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", - ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK); - - return CLP_GANG_OKAY; -} - -static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, - struct osc_io *oio, __u64 size) -{ - struct cl_object *clob; - int partial; - pgoff_t start; - - clob = oio->oi_cl.cis_obj; - start = cl_index(clob, size); - partial = cl_offset(clob, start) < size; - - /* - * Complain if there are pages in the truncated region. - */ - osc_page_gang_lookup(env, io, cl2osc(clob), - start + partial, CL_PAGE_EOF, - trunc_check_cb, (void *)&size); -} - -static int osc_io_setattr_start(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_io *io = slice->cis_io; - struct osc_io *oio = cl2osc_io(env, slice); - struct cl_object *obj = slice->cis_obj; - struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - struct obdo *oa = &oio->oi_oa; - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - __u64 size = io->u.ci_setattr.sa_attr.lvb_size; - unsigned int ia_valid = io->u.ci_setattr.sa_valid; - int result = 0; - - /* truncate cache dirty pages first */ - if (cl_io_is_trunc(io)) - result = osc_cache_truncate_start(env, cl2osc(obj), size, - &oio->oi_trunc); - - if (result == 0 && oio->oi_lockless == 0) { - cl_object_attr_lock(obj); - result = cl_object_attr_get(env, obj, attr); - if (result == 0) { - struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr; - unsigned int cl_valid = 0; - - if (ia_valid & ATTR_SIZE) { - attr->cat_size = size; - attr->cat_kms = size; - cl_valid = CAT_SIZE | CAT_KMS; - } - if (ia_valid & ATTR_MTIME_SET) { - attr->cat_mtime = lvb->lvb_mtime; - cl_valid |= CAT_MTIME; - } - if (ia_valid & ATTR_ATIME_SET) { - attr->cat_atime = lvb->lvb_atime; - cl_valid |= CAT_ATIME; - } - if (ia_valid & ATTR_CTIME_SET) { - attr->cat_ctime = lvb->lvb_ctime; - cl_valid |= CAT_CTIME; - } - result = cl_object_attr_update(env, obj, attr, - cl_valid); - } - cl_object_attr_unlock(obj); - } - memset(oa, 0, sizeof(*oa)); - if (result == 0) { - oa->o_oi = loi->loi_oi; - obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid); - oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index; - oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP; - if (ia_valid & ATTR_CTIME) { - oa->o_valid |= OBD_MD_FLCTIME; - oa->o_ctime = attr->cat_ctime; - } - if (ia_valid & ATTR_ATIME) { - oa->o_valid |= OBD_MD_FLATIME; - oa->o_atime = attr->cat_atime; - } - if (ia_valid & ATTR_MTIME) { - oa->o_valid |= OBD_MD_FLMTIME; - oa->o_mtime = attr->cat_mtime; - } - if (ia_valid & ATTR_SIZE) { - oa->o_size = size; - oa->o_blocks = OBD_OBJECT_EOF; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - - if (oio->oi_lockless) { - oa->o_flags = OBD_FL_SRVLOCK; - oa->o_valid |= OBD_MD_FLFLAGS; - } - } else { - LASSERT(oio->oi_lockless == 0); - } - if (ia_valid & ATTR_ATTR_FLAG) { - oa->o_flags = io->u.ci_setattr.sa_attr_flags; - oa->o_valid |= OBD_MD_FLFLAGS; - } - - init_completion(&cbargs->opc_sync); - - if (ia_valid & ATTR_SIZE) - result = osc_punch_base(osc_export(cl2osc(obj)), - oa, osc_async_upcall, - cbargs, PTLRPCD_SET); - else - result = osc_setattr_async(osc_export(cl2osc(obj)), - oa, osc_async_upcall, - cbargs, PTLRPCD_SET); - cbargs->opc_rpc_sent = result == 0; - } - return result; -} - -static void osc_io_setattr_end(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_io *io = slice->cis_io; - struct osc_io *oio = cl2osc_io(env, slice); - struct cl_object *obj = slice->cis_obj; - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - int result = 0; - - if (cbargs->opc_rpc_sent) { - wait_for_completion(&cbargs->opc_sync); - result = cbargs->opc_rc; - io->ci_result = cbargs->opc_rc; - } - if (result == 0) { - if (oio->oi_lockless) { - /* lockless truncate */ - struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); - - LASSERT(cl_io_is_trunc(io)); - /* XXX: Need a lock. */ - osd->od_stats.os_lockless_truncates++; - } - } - - if (cl_io_is_trunc(io)) { - __u64 size = io->u.ci_setattr.sa_attr.lvb_size; - - osc_trunc_check(env, io, oio, size); - osc_cache_truncate_end(env, oio->oi_trunc); - oio->oi_trunc = NULL; - } -} - -struct osc_data_version_args { - struct osc_io *dva_oio; -}; - -static int -osc_data_version_interpret(const struct lu_env *env, struct ptlrpc_request *req, - void *arg, int rc) -{ - struct osc_data_version_args *dva = arg; - struct osc_io *oio = dva->dva_oio; - const struct ost_body *body; - - if (rc < 0) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, &oio->oi_oa, - &body->oa); -out: - oio->oi_cbarg.opc_rc = rc; - complete(&oio->oi_cbarg.opc_sync); - - return 0; -} - -static int osc_io_data_version_start(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; - struct osc_io *oio = cl2osc_io(env, slice); - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - struct osc_object *obj = cl2osc(slice->cis_obj); - struct obd_export *exp = osc_export(obj); - struct lov_oinfo *loi = obj->oo_oinfo; - struct osc_data_version_args *dva; - struct obdo *oa = &oio->oi_oa; - struct ptlrpc_request *req; - struct ost_body *body; - int rc; - - memset(oa, 0, sizeof(*oa)); - oa->o_oi = loi->loi_oi; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - - if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) { - oa->o_valid |= OBD_MD_FLFLAGS; - oa->o_flags |= OBD_FL_SRVLOCK; - if (dv->dv_flags & LL_DV_WR_FLUSH) - oa->o_flags |= OBD_FL_FLUSH; - } - - init_completion(&cbargs->opc_sync); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR); - if (rc < 0) { - ptlrpc_request_free(req); - return rc; - } - - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); - - ptlrpc_request_set_replen(req); - req->rq_interpret_reply = osc_data_version_interpret; - BUILD_BUG_ON(sizeof(*dva) > sizeof(req->rq_async_args)); - dva = ptlrpc_req_async_args(req); - dva->dva_oio = oio; - - ptlrpcd_add_req(req); - - return 0; -} - -static void osc_io_data_version_end(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; - struct osc_io *oio = cl2osc_io(env, slice); - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - - wait_for_completion(&cbargs->opc_sync); - - if (cbargs->opc_rc) { - slice->cis_io->ci_result = cbargs->opc_rc; - } else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) { - slice->cis_io->ci_result = -EOPNOTSUPP; - } else { - dv->dv_data_version = oio->oi_oa.o_data_version; - slice->cis_io->ci_result = 0; - } -} - -static int osc_io_read_start(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_object *obj = slice->cis_obj; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - int rc = 0; - - if (!slice->cis_io->ci_noatime) { - cl_object_attr_lock(obj); - attr->cat_atime = ktime_get_real_seconds(); - rc = cl_object_attr_update(env, obj, attr, CAT_ATIME); - cl_object_attr_unlock(obj); - } - return rc; -} - -static int osc_io_write_start(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_object *obj = slice->cis_obj; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - int rc = 0; - - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1); - cl_object_attr_lock(obj); - attr->cat_ctime = ktime_get_real_seconds(); - attr->cat_mtime = attr->cat_ctime; - rc = cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME); - cl_object_attr_unlock(obj); - - return rc; -} - -static int osc_fsync_ost(const struct lu_env *env, struct osc_object *obj, - struct cl_fsync_io *fio) -{ - struct osc_io *oio = osc_env_io(env); - struct obdo *oa = &oio->oi_oa; - struct lov_oinfo *loi = obj->oo_oinfo; - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - int rc = 0; - - memset(oa, 0, sizeof(*oa)); - oa->o_oi = loi->loi_oi; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - - /* reload size abd blocks for start and end of sync range */ - oa->o_size = fio->fi_start; - oa->o_blocks = fio->fi_end; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - - obdo_set_parent_fid(oa, fio->fi_fid); - - init_completion(&cbargs->opc_sync); - - rc = osc_sync_base(obj, oa, osc_async_upcall, cbargs, PTLRPCD_SET); - return rc; -} - -static int osc_io_fsync_start(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_io *io = slice->cis_io; - struct cl_fsync_io *fio = &io->u.ci_fsync; - struct cl_object *obj = slice->cis_obj; - struct osc_object *osc = cl2osc(obj); - pgoff_t start = cl_index(obj, fio->fi_start); - pgoff_t end = cl_index(obj, fio->fi_end); - int result = 0; - - if (fio->fi_end == OBD_OBJECT_EOF) - end = CL_PAGE_EOF; - - result = osc_cache_writeback_range(env, osc, start, end, 0, - fio->fi_mode == CL_FSYNC_DISCARD); - if (result > 0) { - fio->fi_nr_written += result; - result = 0; - } - if (fio->fi_mode == CL_FSYNC_ALL) { - int rc; - - /* we have to wait for writeback to finish before we can - * send OST_SYNC RPC. This is bad because it causes extents - * to be written osc by osc. However, we usually start - * writeback before CL_FSYNC_ALL so this won't have any real - * problem. - */ - rc = osc_cache_wait_range(env, osc, start, end); - if (result == 0) - result = rc; - rc = osc_fsync_ost(env, osc, fio); - if (result == 0) - result = rc; - } - - return result; -} - -static void osc_io_fsync_end(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync; - struct cl_object *obj = slice->cis_obj; - pgoff_t start = cl_index(obj, fio->fi_start); - pgoff_t end = cl_index(obj, fio->fi_end); - int result = 0; - - if (fio->fi_mode == CL_FSYNC_LOCAL) { - result = osc_cache_wait_range(env, cl2osc(obj), start, end); - } else if (fio->fi_mode == CL_FSYNC_ALL) { - struct osc_io *oio = cl2osc_io(env, slice); - struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - - wait_for_completion(&cbargs->opc_sync); - if (result == 0) - result = cbargs->opc_rc; - } - slice->cis_io->ci_result = result; -} - -static void osc_io_end(const struct lu_env *env, - const struct cl_io_slice *slice) -{ - struct osc_io *oio = cl2osc_io(env, slice); - - if (oio->oi_active) { - osc_extent_release(env, oio->oi_active); - oio->oi_active = NULL; - } -} - -static const struct cl_io_operations osc_io_ops = { - .op = { - [CIT_READ] = { - .cio_iter_init = osc_io_iter_init, - .cio_iter_fini = osc_io_iter_fini, - .cio_start = osc_io_read_start, - .cio_fini = osc_io_fini - }, - [CIT_WRITE] = { - .cio_iter_init = osc_io_write_iter_init, - .cio_iter_fini = osc_io_write_iter_fini, - .cio_start = osc_io_write_start, - .cio_end = osc_io_end, - .cio_fini = osc_io_fini - }, - [CIT_SETATTR] = { - .cio_iter_init = osc_io_iter_init, - .cio_iter_fini = osc_io_iter_fini, - .cio_start = osc_io_setattr_start, - .cio_end = osc_io_setattr_end - }, - [CIT_DATA_VERSION] = { - .cio_start = osc_io_data_version_start, - .cio_end = osc_io_data_version_end, - }, - [CIT_FAULT] = { - .cio_iter_init = osc_io_iter_init, - .cio_iter_fini = osc_io_iter_fini, - .cio_start = osc_io_fault_start, - .cio_end = osc_io_end, - .cio_fini = osc_io_fini - }, - [CIT_FSYNC] = { - .cio_start = osc_io_fsync_start, - .cio_end = osc_io_fsync_end, - .cio_fini = osc_io_fini - }, - [CIT_MISC] = { - .cio_fini = osc_io_fini - } - }, - .cio_read_ahead = osc_io_read_ahead, - .cio_submit = osc_io_submit, - .cio_commit_async = osc_io_commit_async -}; - -/***************************************************************************** - * - * Transfer operations. - * - */ - -int osc_io_init(const struct lu_env *env, - struct cl_object *obj, struct cl_io *io) -{ - struct osc_io *oio = osc_env_io(env); - - CL_IO_SLICE_CLEAN(oio, oi_cl); - cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops); - return 0; -} - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c deleted file mode 100644 index d93d33dc8dc4..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_lock.c +++ /dev/null @@ -1,1230 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_lock for OSC layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_OSC - -/* fid_build_reg_res_name() */ -#include - -#include "osc_cl_internal.h" - -/** \addtogroup osc - * @{ - */ - -/***************************************************************************** - * - * Type conversions. - * - */ - -static const struct cl_lock_operations osc_lock_ops; -static const struct cl_lock_operations osc_lock_lockless_ops; -static void osc_lock_to_lockless(const struct lu_env *env, - struct osc_lock *ols, int force); - -int osc_lock_is_lockless(const struct osc_lock *olck) -{ - return (olck->ols_cl.cls_ops == &osc_lock_lockless_ops); -} - -/** - * Returns a weak pointer to the ldlm lock identified by a handle. Returned - * pointer cannot be dereferenced, as lock is not protected from concurrent - * reclaim. This function is a helper for osc_lock_invariant(). - */ -static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle) -{ - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(handle); - if (lock) - LDLM_LOCK_PUT(lock); - return lock; -} - -/** - * Invariant that has to be true all of the time. - */ -static int osc_lock_invariant(struct osc_lock *ols) -{ - struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle); - struct ldlm_lock *olock = ols->ols_dlmlock; - int handle_used = lustre_handle_is_used(&ols->ols_handle); - - if (ergo(osc_lock_is_lockless(ols), - ols->ols_locklessable && !ols->ols_dlmlock)) - return 1; - - /* - * If all the following "ergo"s are true, return 1, otherwise 0 - */ - if (!ergo(olock, handle_used)) - return 0; - - if (!ergo(olock, olock->l_handle.h_cookie == ols->ols_handle.cookie)) - return 0; - - if (!ergo(handle_used, - ergo(lock && olock, lock == olock) && - ergo(!lock, !olock))) - return 0; - /* - * Check that ->ols_handle and ->ols_dlmlock are consistent, but - * take into account that they are set at the different time. - */ - if (!ergo(ols->ols_state == OLS_CANCELLED, - !olock && !handle_used)) - return 0; - /* - * DLM lock is destroyed only after we have seen cancellation - * ast. - */ - if (!ergo(olock && ols->ols_state < OLS_CANCELLED, - !ldlm_is_destroyed(olock))) - return 0; - - if (!ergo(ols->ols_state == OLS_GRANTED, - olock && olock->l_req_mode == olock->l_granted_mode && - ols->ols_hold)) - return 0; - return 1; -} - -/***************************************************************************** - * - * Lock operations. - * - */ - -static void osc_lock_fini(const struct lu_env *env, - struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - - LINVRNT(osc_lock_invariant(ols)); - LASSERT(!ols->ols_dlmlock); - - kmem_cache_free(osc_lock_kmem, ols); -} - -static void osc_lock_build_policy(const struct lu_env *env, - const struct cl_lock *lock, - union ldlm_policy_data *policy) -{ - const struct cl_lock_descr *d = &lock->cll_descr; - - osc_index2policy(policy, d->cld_obj, d->cld_start, d->cld_end); - policy->l_extent.gid = d->cld_gid; -} - -static __u64 osc_enq2ldlm_flags(__u32 enqflags) -{ - __u64 result = 0; - - LASSERT((enqflags & ~CEF_MASK) == 0); - - if (enqflags & CEF_NONBLOCK) - result |= LDLM_FL_BLOCK_NOWAIT; - if (enqflags & CEF_ASYNC) - result |= LDLM_FL_HAS_INTENT; - if (enqflags & CEF_DISCARD_DATA) - result |= LDLM_FL_AST_DISCARD_DATA; - if (enqflags & CEF_PEEK) - result |= LDLM_FL_TEST_LOCK; - if (enqflags & CEF_LOCK_MATCH) - result |= LDLM_FL_MATCH_LOCK; - return result; -} - -/** - * Updates object attributes from a lock value block (lvb) received together - * with the DLM lock reply from the server. Copy of osc_update_enqueue() - * logic. - * - * This can be optimized to not update attributes when lock is a result of a - * local match. - * - * Called under lock and resource spin-locks. - */ -static void osc_lock_lvb_update(const struct lu_env *env, - struct osc_object *osc, - struct ldlm_lock *dlmlock, - struct ost_lvb *lvb) -{ - struct cl_object *obj = osc2cl(osc); - struct lov_oinfo *oinfo = osc->oo_oinfo; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - unsigned int valid; - - valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE; - if (!lvb) - lvb = dlmlock->l_lvb_data; - - cl_lvb2attr(attr, lvb); - - cl_object_attr_lock(obj); - if (dlmlock) { - __u64 size; - - check_res_locked(dlmlock->l_resource); - LASSERT(lvb == dlmlock->l_lvb_data); - size = lvb->lvb_size; - - /* Extend KMS up to the end of this lock and no further - * A lock on [x,y] means a KMS of up to y + 1 bytes! - */ - if (size > dlmlock->l_policy_data.l_extent.end) - size = dlmlock->l_policy_data.l_extent.end + 1; - if (size >= oinfo->loi_kms) { - LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu, kms=%llu", - lvb->lvb_size, size); - valid |= CAT_KMS; - attr->cat_kms = size; - } else { - LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu; leaving kms=%llu, end=%llu", - lvb->lvb_size, oinfo->loi_kms, - dlmlock->l_policy_data.l_extent.end); - } - ldlm_lock_allow_match_locked(dlmlock); - } - - cl_object_attr_update(env, obj, attr, valid); - cl_object_attr_unlock(obj); -} - -static void osc_lock_granted(const struct lu_env *env, struct osc_lock *oscl, - struct lustre_handle *lockh, bool lvb_update) -{ - struct ldlm_lock *dlmlock; - - dlmlock = ldlm_handle2lock_long(lockh, 0); - LASSERT(dlmlock); - - /* lock reference taken by ldlm_handle2lock_long() is - * owned by osc_lock and released in osc_lock_detach() - */ - lu_ref_add(&dlmlock->l_reference, "osc_lock", oscl); - oscl->ols_has_ref = 1; - - LASSERT(!oscl->ols_dlmlock); - oscl->ols_dlmlock = dlmlock; - - /* This may be a matched lock for glimpse request, do not hold - * lock reference in that case. - */ - if (!oscl->ols_glimpse) { - /* hold a refc for non glimpse lock which will - * be released in osc_lock_cancel() - */ - lustre_handle_copy(&oscl->ols_handle, lockh); - ldlm_lock_addref(lockh, oscl->ols_einfo.ei_mode); - oscl->ols_hold = 1; - } - - /* Lock must have been granted. */ - lock_res_and_lock(dlmlock); - if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { - struct ldlm_extent *ext = &dlmlock->l_policy_data.l_extent; - struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; - - /* extend the lock extent, otherwise it will have problem when - * we decide whether to grant a lockless lock. - */ - descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); - descr->cld_start = cl_index(descr->cld_obj, ext->start); - descr->cld_end = cl_index(descr->cld_obj, ext->end); - descr->cld_gid = ext->gid; - - /* no lvb update for matched lock */ - if (lvb_update) { - LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); - osc_lock_lvb_update(env, cl2osc(oscl->ols_cl.cls_obj), - dlmlock, NULL); - } - LINVRNT(osc_lock_invariant(oscl)); - } - unlock_res_and_lock(dlmlock); - - LASSERT(oscl->ols_state != OLS_GRANTED); - oscl->ols_state = OLS_GRANTED; -} - -/** - * Lock upcall function that is executed either when a reply to ENQUEUE rpc is - * received from a server, or after osc_enqueue_base() matched a local DLM - * lock. - */ -static int osc_lock_upcall(void *cookie, struct lustre_handle *lockh, - int errcode) -{ - struct osc_lock *oscl = cookie; - struct cl_lock_slice *slice = &oscl->ols_cl; - struct lu_env *env; - int rc; - u16 refcheck; - - env = cl_env_get(&refcheck); - /* should never happen, similar to osc_ldlm_blocking_ast(). */ - LASSERT(!IS_ERR(env)); - - rc = ldlm_error2errno(errcode); - if (oscl->ols_state == OLS_ENQUEUED) { - oscl->ols_state = OLS_UPCALL_RECEIVED; - } else if (oscl->ols_state == OLS_CANCELLED) { - rc = -EIO; - } else { - CERROR("Impossible state: %d\n", oscl->ols_state); - LBUG(); - } - - if (rc == 0) - osc_lock_granted(env, oscl, lockh, errcode == ELDLM_OK); - - /* Error handling, some errors are tolerable. */ - if (oscl->ols_locklessable && rc == -EUSERS) { - /* This is a tolerable error, turn this lock into - * lockless lock. - */ - osc_object_set_contended(cl2osc(slice->cls_obj)); - LASSERT(slice->cls_ops == &osc_lock_ops); - - /* Change this lock to ldlmlock-less lock. */ - osc_lock_to_lockless(env, oscl, 1); - oscl->ols_state = OLS_GRANTED; - rc = 0; - } else if (oscl->ols_glimpse && rc == -ENAVAIL) { - LASSERT(oscl->ols_flags & LDLM_FL_LVB_READY); - osc_lock_lvb_update(env, cl2osc(slice->cls_obj), - NULL, &oscl->ols_lvb); - /* Hide the error. */ - rc = 0; - } - - if (oscl->ols_owner) - cl_sync_io_note(env, oscl->ols_owner, rc); - cl_env_put(env, &refcheck); - - return rc; -} - -static int osc_lock_upcall_agl(void *cookie, struct lustre_handle *lockh, - int errcode) -{ - struct osc_object *osc = cookie; - struct ldlm_lock *dlmlock; - struct lu_env *env; - u16 refcheck; - - env = cl_env_get(&refcheck); - LASSERT(!IS_ERR(env)); - - if (errcode == ELDLM_LOCK_MATCHED) { - errcode = ELDLM_OK; - goto out; - } - - if (errcode != ELDLM_OK) - goto out; - - dlmlock = ldlm_handle2lock(lockh); - LASSERT(dlmlock); - - lock_res_and_lock(dlmlock); - LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); - - /* there is no osc_lock associated with AGL lock */ - osc_lock_lvb_update(env, osc, dlmlock, NULL); - - unlock_res_and_lock(dlmlock); - LDLM_LOCK_PUT(dlmlock); - -out: - cl_object_put(env, osc2cl(osc)); - cl_env_put(env, &refcheck); - return ldlm_error2errno(errcode); -} - -static int osc_lock_flush(struct osc_object *obj, pgoff_t start, pgoff_t end, - enum cl_lock_mode mode, int discard) -{ - struct lu_env *env; - u16 refcheck; - int rc = 0; - int rc2 = 0; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return PTR_ERR(env); - - if (mode == CLM_WRITE) { - rc = osc_cache_writeback_range(env, obj, start, end, 1, - discard); - CDEBUG(D_CACHE, "object %p: [%lu -> %lu] %d pages were %s.\n", - obj, start, end, rc, - discard ? "discarded" : "written back"); - if (rc > 0) - rc = 0; - } - - rc2 = osc_lock_discard_pages(env, obj, start, end, mode); - if (rc == 0 && rc2 < 0) - rc = rc2; - - cl_env_put(env, &refcheck); - return rc; -} - -/** - * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock - * and ldlm_lock caches. - */ -static int osc_dlm_blocking_ast0(const struct lu_env *env, - struct ldlm_lock *dlmlock, - void *data, int flag) -{ - struct cl_object *obj = NULL; - int result = 0; - int discard; - enum cl_lock_mode mode = CLM_READ; - - LASSERT(flag == LDLM_CB_CANCELING); - - lock_res_and_lock(dlmlock); - if (dlmlock->l_granted_mode != dlmlock->l_req_mode) { - dlmlock->l_ast_data = NULL; - unlock_res_and_lock(dlmlock); - return 0; - } - - discard = ldlm_is_discard_data(dlmlock); - if (dlmlock->l_granted_mode & (LCK_PW | LCK_GROUP)) - mode = CLM_WRITE; - - if (dlmlock->l_ast_data) { - obj = osc2cl(dlmlock->l_ast_data); - dlmlock->l_ast_data = NULL; - - cl_object_get(obj); - } - - unlock_res_and_lock(dlmlock); - - /* if l_ast_data is NULL, the dlmlock was enqueued by AGL or - * the object has been destroyed. - */ - if (obj) { - struct ldlm_extent *extent = &dlmlock->l_policy_data.l_extent; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - __u64 old_kms; - - /* Destroy pages covered by the extent of the DLM lock */ - result = osc_lock_flush(cl2osc(obj), - cl_index(obj, extent->start), - cl_index(obj, extent->end), - mode, discard); - - /* losing a lock, update kms */ - lock_res_and_lock(dlmlock); - cl_object_attr_lock(obj); - /* Must get the value under the lock to avoid race. */ - old_kms = cl2osc(obj)->oo_oinfo->loi_kms; - /* Update the kms. Need to loop all granted locks. - * Not a problem for the client - */ - attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); - - cl_object_attr_update(env, obj, attr, CAT_KMS); - cl_object_attr_unlock(obj); - unlock_res_and_lock(dlmlock); - - cl_object_put(env, obj); - } - return result; -} - -/** - * Blocking ast invoked by ldlm when dlm lock is either blocking progress of - * some other lock, or is canceled. This function is installed as a - * ldlm_lock::l_blocking_ast() for client extent locks. - * - * Control flow is tricky, because ldlm uses the same call-back - * (ldlm_lock::l_blocking_ast()) for both blocking and cancellation ast's. - * - * \param dlmlock lock for which ast occurred. - * - * \param new description of a conflicting lock in case of blocking ast. - * - * \param data value of dlmlock->l_ast_data - * - * \param flag LDLM_CB_BLOCKING or LDLM_CB_CANCELING. Used to distinguish - * cancellation and blocking ast's. - * - * Possible use cases: - * - * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) to cancel - * lock due to lock lru pressure, or explicit user request to purge - * locks. - * - * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_BLOCKING) to notify - * us that dlmlock conflicts with another lock that some client is - * enqueing. Lock is canceled. - * - * - cl_lock_cancel() is called. osc_lock_cancel() calls - * ldlm_cli_cancel() that calls - * - * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) - * - * recursively entering osc_ldlm_blocking_ast(). - * - * - client cancels lock voluntary (e.g., as a part of early cancellation): - * - * cl_lock_cancel()-> - * osc_lock_cancel()-> - * ldlm_cli_cancel()-> - * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) - * - */ -static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, - struct ldlm_lock_desc *new, void *data, - int flag) -{ - int result = 0; - - switch (flag) { - case LDLM_CB_BLOCKING: { - struct lustre_handle lockh; - - ldlm_lock2handle(dlmlock, &lockh); - result = ldlm_cli_cancel(&lockh, LCF_ASYNC); - if (result == -ENODATA) - result = 0; - break; - } - case LDLM_CB_CANCELING: { - struct lu_env *env; - u16 refcheck; - - /* - * This can be called in the context of outer IO, e.g., - * - * osc_enqueue_base()->... - * ->ldlm_prep_elc_req()->... - * ->ldlm_cancel_callback()->... - * ->osc_ldlm_blocking_ast() - * - * new environment has to be created to not corrupt outer - * context. - */ - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - result = PTR_ERR(env); - break; - } - - result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); - cl_env_put(env, &refcheck); - break; - } - default: - LBUG(); - } - return result; -} - -static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) -{ - struct ptlrpc_request *req = data; - struct lu_env *env; - struct ost_lvb *lvb; - struct req_capsule *cap; - struct cl_object *obj = NULL; - int result; - u16 refcheck; - - LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) { - result = PTR_ERR(env); - goto out; - } - - lock_res_and_lock(dlmlock); - if (dlmlock->l_ast_data) { - obj = osc2cl(dlmlock->l_ast_data); - cl_object_get(obj); - } - unlock_res_and_lock(dlmlock); - - if (obj) { - /* Do not grab the mutex of cl_lock for glimpse. - * See LU-1274 for details. - * BTW, it's okay for cl_lock to be cancelled during - * this period because server can handle this race. - * See ldlm_server_glimpse_ast() for details. - * cl_lock_mutex_get(env, lock); - */ - cap = &req->rq_pill; - req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK); - req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER, - sizeof(*lvb)); - result = req_capsule_server_pack(cap); - if (result == 0) { - lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); - result = cl_object_glimpse(env, obj, lvb); - } - if (!exp_connect_lvb_type(req->rq_export)) { - req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, - sizeof(struct ost_lvb_v1), - RCL_SERVER); - } - cl_object_put(env, obj); - } else { - /* - * These errors are normal races, so we don't want to - * fill the console with messages by calling - * ptlrpc_error() - */ - lustre_pack_reply(req, 1, NULL, NULL); - result = -ELDLM_NO_LOCK_DATA; - } - cl_env_put(env, &refcheck); - -out: - req->rq_status = result; - return result; -} - -static int weigh_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) -{ - struct cl_page *page = ops->ops_cl.cpl_page; - - if (cl_page_is_vmlocked(env, page) || - PageDirty(page->cp_vmpage) || PageWriteback(page->cp_vmpage) - ) - return CLP_GANG_ABORT; - - *(pgoff_t *)cbdata = osc_index(ops) + 1; - return CLP_GANG_OKAY; -} - -static unsigned long osc_lock_weight(const struct lu_env *env, - struct osc_object *oscobj, - struct ldlm_extent *extent) -{ - struct cl_io *io = &osc_env_info(env)->oti_io; - struct cl_object *obj = cl_object_top(&oscobj->oo_cl); - pgoff_t page_index; - int result; - - io->ci_obj = obj; - io->ci_ignore_layout = 1; - result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result != 0) - return result; - - page_index = cl_index(obj, extent->start); - do { - result = osc_page_gang_lookup(env, io, oscobj, - page_index, - cl_index(obj, extent->end), - weigh_cb, (void *)&page_index); - if (result == CLP_GANG_ABORT) - break; - if (result == CLP_GANG_RESCHED) - cond_resched(); - } while (result != CLP_GANG_OKAY); - cl_io_fini(env, io); - - return result == CLP_GANG_ABORT ? 1 : 0; -} - -/** - * Get the weight of dlm lock for early cancellation. - */ -unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock) -{ - struct lu_env *env; - struct osc_object *obj; - struct osc_lock *oscl; - unsigned long weight; - bool found = false; - u16 refcheck; - - might_sleep(); - /* - * osc_ldlm_weigh_ast has a complex context since it might be called - * because of lock canceling, or from user's input. We have to make - * a new environment for it. Probably it is implementation safe to use - * the upper context because cl_lock_put don't modify environment - * variables. But just in case .. - */ - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - /* Mostly because lack of memory, do not eliminate this lock */ - return 1; - - LASSERT(dlmlock->l_resource->lr_type == LDLM_EXTENT); - obj = dlmlock->l_ast_data; - if (!obj) { - weight = 1; - goto out; - } - - spin_lock(&obj->oo_ol_spin); - list_for_each_entry(oscl, &obj->oo_ol_list, ols_nextlock_oscobj) { - if (oscl->ols_dlmlock && oscl->ols_dlmlock != dlmlock) - continue; - found = true; - } - spin_unlock(&obj->oo_ol_spin); - if (found) { - /* - * If the lock is being used by an IO, definitely not cancel it. - */ - weight = 1; - goto out; - } - - weight = osc_lock_weight(env, obj, &dlmlock->l_policy_data.l_extent); - -out: - cl_env_put(env, &refcheck); - return weight; -} - -static void osc_lock_build_einfo(const struct lu_env *env, - const struct cl_lock *lock, - struct osc_object *osc, - struct ldlm_enqueue_info *einfo) -{ - einfo->ei_type = LDLM_EXTENT; - einfo->ei_mode = osc_cl_lock2ldlm(lock->cll_descr.cld_mode); - einfo->ei_cb_bl = osc_ldlm_blocking_ast; - einfo->ei_cb_cp = ldlm_completion_ast; - einfo->ei_cb_gl = osc_ldlm_glimpse_ast; - einfo->ei_cbdata = osc; /* value to be put into ->l_ast_data */ -} - -/** - * Determine if the lock should be converted into a lockless lock. - * - * Steps to check: - * - if the lock has an explicit requirement for a non-lockless lock; - * - if the io lock request type ci_lockreq; - * - send the enqueue rpc to ost to make the further decision; - * - special treat to truncate lockless lock - * - * Additional policy can be implemented here, e.g., never do lockless-io - * for large extents. - */ -static void osc_lock_to_lockless(const struct lu_env *env, - struct osc_lock *ols, int force) -{ - struct cl_lock_slice *slice = &ols->ols_cl; - - LASSERT(ols->ols_state == OLS_NEW || - ols->ols_state == OLS_UPCALL_RECEIVED); - - if (force) { - ols->ols_locklessable = 1; - slice->cls_ops = &osc_lock_lockless_ops; - } else { - struct osc_io *oio = osc_env_io(env); - struct cl_io *io = oio->oi_cl.cis_io; - struct cl_object *obj = slice->cls_obj; - struct osc_object *oob = cl2osc(obj); - const struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); - struct obd_connect_data *ocd; - - LASSERT(io->ci_lockreq == CILR_MANDATORY || - io->ci_lockreq == CILR_MAYBE || - io->ci_lockreq == CILR_NEVER); - - ocd = &class_exp2cliimp(osc_export(oob))->imp_connect_data; - ols->ols_locklessable = (io->ci_type != CIT_SETATTR) && - (io->ci_lockreq == CILR_MAYBE) && - (ocd->ocd_connect_flags & OBD_CONNECT_SRVLOCK); - if (io->ci_lockreq == CILR_NEVER || - /* lockless IO */ - (ols->ols_locklessable && osc_object_is_contended(oob)) || - /* lockless truncate */ - (cl_io_is_trunc(io) && - (ocd->ocd_connect_flags & OBD_CONNECT_TRUNCLOCK) && - osd->od_lockless_truncate)) { - ols->ols_locklessable = 1; - slice->cls_ops = &osc_lock_lockless_ops; - } - } - LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); -} - -static bool osc_lock_compatible(const struct osc_lock *qing, - const struct osc_lock *qed) -{ - struct cl_lock_descr *qed_descr = &qed->ols_cl.cls_lock->cll_descr; - struct cl_lock_descr *qing_descr = &qing->ols_cl.cls_lock->cll_descr; - - if (qed->ols_glimpse) - return true; - - if (qing_descr->cld_mode == CLM_READ && qed_descr->cld_mode == CLM_READ) - return true; - - if (qed->ols_state < OLS_GRANTED) - return true; - - if (qed_descr->cld_mode >= qing_descr->cld_mode && - qed_descr->cld_start <= qing_descr->cld_start && - qed_descr->cld_end >= qing_descr->cld_end) - return true; - - return false; -} - -static void osc_lock_wake_waiters(const struct lu_env *env, - struct osc_object *osc, - struct osc_lock *oscl) -{ - spin_lock(&osc->oo_ol_spin); - list_del_init(&oscl->ols_nextlock_oscobj); - spin_unlock(&osc->oo_ol_spin); - - spin_lock(&oscl->ols_lock); - while (!list_empty(&oscl->ols_waiting_list)) { - struct osc_lock *scan; - - scan = list_entry(oscl->ols_waiting_list.next, struct osc_lock, - ols_wait_entry); - list_del_init(&scan->ols_wait_entry); - - cl_sync_io_note(env, scan->ols_owner, 0); - } - spin_unlock(&oscl->ols_lock); -} - -static int osc_lock_enqueue_wait(const struct lu_env *env, - struct osc_object *obj, - struct osc_lock *oscl) -{ - struct osc_lock *tmp_oscl; - struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr; - struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor; - int rc = 0; - - spin_lock(&obj->oo_ol_spin); - list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list); - -restart: - list_for_each_entry(tmp_oscl, &obj->oo_ol_list, - ols_nextlock_oscobj) { - struct cl_lock_descr *descr; - - if (tmp_oscl == oscl) - break; - - descr = &tmp_oscl->ols_cl.cls_lock->cll_descr; - if (descr->cld_start > need->cld_end || - descr->cld_end < need->cld_start) - continue; - - /* We're not supposed to give up group lock */ - if (descr->cld_mode == CLM_GROUP) - break; - - if (!osc_lock_is_lockless(oscl) && - osc_lock_compatible(oscl, tmp_oscl)) - continue; - - /* wait for conflicting lock to be canceled */ - cl_sync_io_init(waiter, 1, cl_sync_io_end); - oscl->ols_owner = waiter; - - spin_lock(&tmp_oscl->ols_lock); - /* add oscl into tmp's ols_waiting list */ - list_add_tail(&oscl->ols_wait_entry, - &tmp_oscl->ols_waiting_list); - spin_unlock(&tmp_oscl->ols_lock); - - spin_unlock(&obj->oo_ol_spin); - rc = cl_sync_io_wait(env, waiter, 0); - spin_lock(&obj->oo_ol_spin); - if (rc < 0) - break; - - oscl->ols_owner = NULL; - goto restart; - } - spin_unlock(&obj->oo_ol_spin); - - return rc; -} - -/** - * Implementation of cl_lock_operations::clo_enqueue() method for osc - * layer. This initiates ldlm enqueue: - * - * - cancels conflicting locks early (osc_lock_enqueue_wait()); - * - * - calls osc_enqueue_base() to do actual enqueue. - * - * osc_enqueue_base() is supplied with an upcall function that is executed - * when lock is received either after a local cached ldlm lock is matched, or - * when a reply from the server is received. - * - * This function does not wait for the network communication to complete. - */ -static int osc_lock_enqueue(const struct lu_env *env, - const struct cl_lock_slice *slice, - struct cl_io *unused, struct cl_sync_io *anchor) -{ - struct osc_thread_info *info = osc_env_info(env); - struct osc_io *oio = osc_env_io(env); - struct osc_object *osc = cl2osc(slice->cls_obj); - struct osc_lock *oscl = cl2osc_lock(slice); - struct cl_lock *lock = slice->cls_lock; - struct ldlm_res_id *resname = &info->oti_resname; - union ldlm_policy_data *policy = &info->oti_policy; - osc_enqueue_upcall_f upcall = osc_lock_upcall; - void *cookie = oscl; - bool async = false; - int result; - - LASSERTF(ergo(oscl->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), - "lock = %p, ols = %p\n", lock, oscl); - - if (oscl->ols_state == OLS_GRANTED) - return 0; - - if (oscl->ols_flags & LDLM_FL_TEST_LOCK) - goto enqueue_base; - - if (oscl->ols_glimpse) { - LASSERT(equi(oscl->ols_agl, !anchor)); - async = true; - goto enqueue_base; - } - - result = osc_lock_enqueue_wait(env, osc, oscl); - if (result < 0) - goto out; - - /* we can grant lockless lock right after all conflicting locks - * are canceled. - */ - if (osc_lock_is_lockless(oscl)) { - oscl->ols_state = OLS_GRANTED; - oio->oi_lockless = 1; - return 0; - } - -enqueue_base: - oscl->ols_state = OLS_ENQUEUED; - if (anchor) { - atomic_inc(&anchor->csi_sync_nr); - oscl->ols_owner = anchor; - } - - /** - * DLM lock's ast data must be osc_object; - * if glimpse or AGL lock, async of osc_enqueue_base() must be true, - * DLM's enqueue callback set to osc_lock_upcall() with cookie as - * osc_lock. - */ - ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); - osc_lock_build_policy(env, lock, policy); - if (oscl->ols_agl) { - oscl->ols_einfo.ei_cbdata = NULL; - /* hold a reference for callback */ - cl_object_get(osc2cl(osc)); - upcall = osc_lock_upcall_agl; - cookie = osc; - } - result = osc_enqueue_base(osc_export(osc), resname, &oscl->ols_flags, - policy, &oscl->ols_lvb, - osc->oo_oinfo->loi_kms_valid, - upcall, cookie, - &oscl->ols_einfo, PTLRPCD_SET, async, - oscl->ols_agl); - if (!result) { - if (osc_lock_is_lockless(oscl)) { - oio->oi_lockless = 1; - } else if (!async) { - LASSERT(oscl->ols_state == OLS_GRANTED); - LASSERT(oscl->ols_hold); - LASSERT(oscl->ols_dlmlock); - } - } else if (oscl->ols_agl) { - cl_object_put(env, osc2cl(osc)); - result = 0; - } - -out: - if (result < 0) { - oscl->ols_state = OLS_CANCELLED; - osc_lock_wake_waiters(env, osc, oscl); - - if (anchor) - cl_sync_io_note(env, anchor, result); - } - return result; -} - -/** - * Breaks a link between osc_lock and dlm_lock. - */ -static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) -{ - struct ldlm_lock *dlmlock; - - dlmlock = olck->ols_dlmlock; - if (!dlmlock) - return; - - if (olck->ols_hold) { - olck->ols_hold = 0; - ldlm_lock_decref(&olck->ols_handle, olck->ols_einfo.ei_mode); - olck->ols_handle.cookie = 0ULL; - } - - olck->ols_dlmlock = NULL; - - /* release a reference taken in osc_lock_upcall(). */ - LASSERT(olck->ols_has_ref); - lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); - LDLM_LOCK_RELEASE(dlmlock); - olck->ols_has_ref = 0; -} - -/** - * Implements cl_lock_operations::clo_cancel() method for osc layer. This is - * called (as part of cl_lock_cancel()) when lock is canceled either voluntary - * (LRU pressure, early cancellation, umount, etc.) or due to the conflict - * with some other lock some where in the cluster. This function does the - * following: - * - * - invalidates all pages protected by this lock (after sending dirty - * ones to the server, as necessary); - * - * - decref's underlying ldlm lock; - * - * - cancels ldlm lock (ldlm_cli_cancel()). - */ -static void osc_lock_cancel(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_object *obj = cl2osc(slice->cls_obj); - struct osc_lock *oscl = cl2osc_lock(slice); - - LINVRNT(osc_lock_invariant(oscl)); - - osc_lock_detach(env, oscl); - oscl->ols_state = OLS_CANCELLED; - oscl->ols_flags &= ~LDLM_FL_LVB_READY; - - osc_lock_wake_waiters(env, obj, oscl); -} - -static int osc_lock_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct cl_lock_slice *slice) -{ - struct osc_lock *lock = cl2osc_lock(slice); - - (*p)(env, cookie, "%p %#16llx %#llx %d %p ", - lock->ols_dlmlock, lock->ols_flags, lock->ols_handle.cookie, - lock->ols_state, lock->ols_owner); - osc_lvb_print(env, cookie, p, &lock->ols_lvb); - return 0; -} - -static const struct cl_lock_operations osc_lock_ops = { - .clo_fini = osc_lock_fini, - .clo_enqueue = osc_lock_enqueue, - .clo_cancel = osc_lock_cancel, - .clo_print = osc_lock_print, -}; - -static void osc_lock_lockless_cancel(const struct lu_env *env, - const struct cl_lock_slice *slice) -{ - struct osc_lock *ols = cl2osc_lock(slice); - struct osc_object *osc = cl2osc(slice->cls_obj); - struct cl_lock_descr *descr = &slice->cls_lock->cll_descr; - int result; - - LASSERT(!ols->ols_dlmlock); - result = osc_lock_flush(osc, descr->cld_start, descr->cld_end, - descr->cld_mode, 0); - if (result) - CERROR("Pages for lockless lock %p were not purged(%d)\n", - ols, result); - - osc_lock_wake_waiters(env, osc, ols); -} - -static const struct cl_lock_operations osc_lock_lockless_ops = { - .clo_fini = osc_lock_fini, - .clo_enqueue = osc_lock_enqueue, - .clo_cancel = osc_lock_lockless_cancel, - .clo_print = osc_lock_print -}; - -static void osc_lock_set_writer(const struct lu_env *env, - const struct cl_io *io, - struct cl_object *obj, struct osc_lock *oscl) -{ - struct cl_lock_descr *descr = &oscl->ols_cl.cls_lock->cll_descr; - pgoff_t io_start; - pgoff_t io_end; - - if (!cl_object_same(io->ci_obj, obj)) - return; - - if (likely(io->ci_type == CIT_WRITE)) { - io_start = cl_index(obj, io->u.ci_rw.crw_pos); - io_end = cl_index(obj, io->u.ci_rw.crw_pos + - io->u.ci_rw.crw_count - 1); - if (cl_io_is_append(io)) { - io_start = 0; - io_end = CL_PAGE_EOF; - } - } else { - LASSERT(cl_io_is_mkwrite(io)); - io_start = io->u.ci_fault.ft_index; - io_end = io->u.ci_fault.ft_index; - } - - if (descr->cld_mode >= CLM_WRITE && - descr->cld_start <= io_start && descr->cld_end >= io_end) { - struct osc_io *oio = osc_env_io(env); - - /* There must be only one lock to match the write region */ - LASSERT(!oio->oi_write_osclock); - oio->oi_write_osclock = oscl; - } -} - -int osc_lock_init(const struct lu_env *env, - struct cl_object *obj, struct cl_lock *lock, - const struct cl_io *io) -{ - struct osc_lock *oscl; - __u32 enqflags = lock->cll_descr.cld_enq_flags; - - oscl = kmem_cache_zalloc(osc_lock_kmem, GFP_NOFS); - if (!oscl) - return -ENOMEM; - - oscl->ols_state = OLS_NEW; - spin_lock_init(&oscl->ols_lock); - INIT_LIST_HEAD(&oscl->ols_waiting_list); - INIT_LIST_HEAD(&oscl->ols_wait_entry); - INIT_LIST_HEAD(&oscl->ols_nextlock_oscobj); - - oscl->ols_flags = osc_enq2ldlm_flags(enqflags); - oscl->ols_agl = !!(enqflags & CEF_AGL); - if (oscl->ols_agl) - oscl->ols_flags |= LDLM_FL_BLOCK_NOWAIT; - if (oscl->ols_flags & LDLM_FL_HAS_INTENT) { - oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; - oscl->ols_glimpse = 1; - } - osc_lock_build_einfo(env, lock, cl2osc(obj), &oscl->ols_einfo); - - cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops); - - if (!(enqflags & CEF_MUST)) - /* try to convert this lock to a lockless lock */ - osc_lock_to_lockless(env, oscl, (enqflags & CEF_NEVER)); - if (oscl->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) - oscl->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; - - if (io->ci_type == CIT_WRITE || cl_io_is_mkwrite(io)) - osc_lock_set_writer(env, io, obj, oscl); - - - LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx", - lock, oscl, oscl->ols_flags); - - return 0; -} - -/** - * Finds an existing lock covering given index and optionally different from a - * given \a except lock. - */ -struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, - struct osc_object *obj, pgoff_t index, - enum osc_dap_flags dap_flags) -{ - struct osc_thread_info *info = osc_env_info(env); - struct ldlm_res_id *resname = &info->oti_resname; - union ldlm_policy_data *policy = &info->oti_policy; - struct lustre_handle lockh; - struct ldlm_lock *lock = NULL; - enum ldlm_mode mode; - __u64 flags; - - ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); - osc_index2policy(policy, osc2cl(obj), index, index); - policy->l_extent.gid = LDLM_GID_ANY; - - flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; - if (dap_flags & OSC_DAP_FL_TEST_LOCK) - flags |= LDLM_FL_TEST_LOCK; - - /* - * It is fine to match any group lock since there could be only one - * with a uniq gid and it conflicts with all other lock modes too - */ -again: - mode = osc_match_base(osc_export(obj), resname, LDLM_EXTENT, policy, - LCK_PR | LCK_PW | LCK_GROUP, &flags, obj, &lockh, - dap_flags & OSC_DAP_FL_CANCELING); - if (mode != 0) { - lock = ldlm_handle2lock(&lockh); - /* RACE: the lock is cancelled so let's try again */ - if (unlikely(!lock)) - goto again; - } - return lock; -} - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c deleted file mode 100644 index 84240181c7ea..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ /dev/null @@ -1,473 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_object for OSC layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_OSC - -#include "osc_cl_internal.h" - -/** \addtogroup osc - * @{ - */ - -/***************************************************************************** - * - * Type conversions. - * - */ - -static struct lu_object *osc2lu(struct osc_object *osc) -{ - return &osc->oo_cl.co_lu; -} - -static struct osc_object *lu2osc(const struct lu_object *obj) -{ - LINVRNT(osc_is_object(obj)); - return container_of(obj, struct osc_object, oo_cl.co_lu); -} - -/***************************************************************************** - * - * Object operations. - * - */ - -static int osc_object_init(const struct lu_env *env, struct lu_object *obj, - const struct lu_object_conf *conf) -{ - struct osc_object *osc = lu2osc(obj); - const struct cl_object_conf *cconf = lu2cl_conf(conf); - - osc->oo_oinfo = cconf->u.coc_oinfo; - INIT_LIST_HEAD(&osc->oo_ready_item); - INIT_LIST_HEAD(&osc->oo_hp_ready_item); - INIT_LIST_HEAD(&osc->oo_write_item); - INIT_LIST_HEAD(&osc->oo_read_item); - - atomic_set(&osc->oo_nr_ios, 0); - init_waitqueue_head(&osc->oo_io_waitq); - - osc->oo_root.rb_node = NULL; - INIT_LIST_HEAD(&osc->oo_hp_exts); - INIT_LIST_HEAD(&osc->oo_urgent_exts); - INIT_LIST_HEAD(&osc->oo_rpc_exts); - INIT_LIST_HEAD(&osc->oo_reading_exts); - atomic_set(&osc->oo_nr_reads, 0); - atomic_set(&osc->oo_nr_writes, 0); - spin_lock_init(&osc->oo_lock); - spin_lock_init(&osc->oo_tree_lock); - spin_lock_init(&osc->oo_ol_spin); - INIT_LIST_HEAD(&osc->oo_ol_list); - - cl_object_page_init(lu2cl(obj), sizeof(struct osc_page)); - - return 0; -} - -static void osc_object_free(const struct lu_env *env, struct lu_object *obj) -{ - struct osc_object *osc = lu2osc(obj); - - LASSERT(list_empty(&osc->oo_ready_item)); - LASSERT(list_empty(&osc->oo_hp_ready_item)); - LASSERT(list_empty(&osc->oo_write_item)); - LASSERT(list_empty(&osc->oo_read_item)); - - LASSERT(!osc->oo_root.rb_node); - LASSERT(list_empty(&osc->oo_hp_exts)); - LASSERT(list_empty(&osc->oo_urgent_exts)); - LASSERT(list_empty(&osc->oo_rpc_exts)); - LASSERT(list_empty(&osc->oo_reading_exts)); - LASSERT(atomic_read(&osc->oo_nr_reads) == 0); - LASSERT(atomic_read(&osc->oo_nr_writes) == 0); - LASSERT(list_empty(&osc->oo_ol_list)); - LASSERT(!atomic_read(&osc->oo_nr_ios)); - - lu_object_fini(obj); - kmem_cache_free(osc_object_kmem, osc); -} - -int osc_lvb_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct ost_lvb *lvb) -{ - return (*p)(env, cookie, "size: %llu mtime: %llu atime: %llu ctime: %llu blocks: %llu", - lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime, - lvb->lvb_ctime, lvb->lvb_blocks); -} - -static int osc_object_print(const struct lu_env *env, void *cookie, - lu_printer_t p, const struct lu_object *obj) -{ - struct osc_object *osc = lu2osc(obj); - struct lov_oinfo *oinfo = osc->oo_oinfo; - struct osc_async_rc *ar = &oinfo->loi_ar; - - (*p)(env, cookie, "id: " DOSTID " idx: %d gen: %d kms_valid: %u kms %llu rc: %d force_sync: %d min_xid: %llu ", - POSTID(&oinfo->loi_oi), oinfo->loi_ost_idx, - oinfo->loi_ost_gen, oinfo->loi_kms_valid, oinfo->loi_kms, - ar->ar_rc, ar->ar_force_sync, ar->ar_min_xid); - osc_lvb_print(env, cookie, p, &oinfo->loi_lvb); - return 0; -} - -static int osc_attr_get(const struct lu_env *env, struct cl_object *obj, - struct cl_attr *attr) -{ - struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo; - - cl_lvb2attr(attr, &oinfo->loi_lvb); - attr->cat_kms = oinfo->loi_kms_valid ? oinfo->loi_kms : 0; - return 0; -} - -static int osc_attr_update(const struct lu_env *env, struct cl_object *obj, - const struct cl_attr *attr, unsigned int valid) -{ - struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo; - struct ost_lvb *lvb = &oinfo->loi_lvb; - - if (valid & CAT_SIZE) - lvb->lvb_size = attr->cat_size; - if (valid & CAT_MTIME) - lvb->lvb_mtime = attr->cat_mtime; - if (valid & CAT_ATIME) - lvb->lvb_atime = attr->cat_atime; - if (valid & CAT_CTIME) - lvb->lvb_ctime = attr->cat_ctime; - if (valid & CAT_BLOCKS) - lvb->lvb_blocks = attr->cat_blocks; - if (valid & CAT_KMS) { - CDEBUG(D_CACHE, "set kms from %llu to %llu\n", - oinfo->loi_kms, (__u64)attr->cat_kms); - loi_kms_set(oinfo, attr->cat_kms); - } - return 0; -} - -static int osc_object_glimpse(const struct lu_env *env, - const struct cl_object *obj, struct ost_lvb *lvb) -{ - struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo; - - lvb->lvb_size = oinfo->loi_kms; - lvb->lvb_blocks = oinfo->loi_lvb.lvb_blocks; - return 0; -} - -static int osc_object_ast_clear(struct ldlm_lock *lock, void *data) -{ - if (lock->l_ast_data == data) - lock->l_ast_data = NULL; - return LDLM_ITER_CONTINUE; -} - -static int osc_object_prune(const struct lu_env *env, struct cl_object *obj) -{ - struct osc_object *osc = cl2osc(obj); - struct ldlm_res_id *resname = &osc_env_info(env)->oti_resname; - - /* DLM locks don't hold a reference of osc_object so we have to - * clear it before the object is being destroyed. - */ - ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); - ldlm_resource_iterate(osc_export(osc)->exp_obd->obd_namespace, resname, - osc_object_ast_clear, osc); - return 0; -} - -static int osc_object_fiemap(const struct lu_env *env, struct cl_object *obj, - struct ll_fiemap_info_key *fmkey, - struct fiemap *fiemap, size_t *buflen) -{ - struct obd_export *exp = osc_export(cl2osc(obj)); - union ldlm_policy_data policy; - struct ptlrpc_request *req; - struct lustre_handle lockh; - struct ldlm_res_id resid; - enum ldlm_mode mode = 0; - struct fiemap *reply; - char *tmp; - int rc; - - fmkey->lfik_oa.o_oi = cl2osc(obj)->oo_oinfo->loi_oi; - if (!(fmkey->lfik_fiemap.fm_flags & FIEMAP_FLAG_SYNC)) - goto skip_locking; - - policy.l_extent.start = fmkey->lfik_fiemap.fm_start & PAGE_MASK; - - if (OBD_OBJECT_EOF - fmkey->lfik_fiemap.fm_length <= - fmkey->lfik_fiemap.fm_start + PAGE_SIZE - 1) - policy.l_extent.end = OBD_OBJECT_EOF; - else - policy.l_extent.end = (fmkey->lfik_fiemap.fm_start + - fmkey->lfik_fiemap.fm_length + - PAGE_SIZE - 1) & PAGE_MASK; - - ostid_build_res_name(&fmkey->lfik_oa.o_oi, &resid); - mode = ldlm_lock_match(exp->exp_obd->obd_namespace, - LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY, - &resid, LDLM_EXTENT, &policy, - LCK_PR | LCK_PW, &lockh, 0); - if (mode) { /* lock is cached on client */ - if (mode != LCK_PR) { - ldlm_lock_addref(&lockh, LCK_PR); - ldlm_lock_decref(&lockh, LCK_PW); - } - } else { /* no cached lock, needs acquire lock on server side */ - fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS; - fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK; - } - -skip_locking: - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_OST_GET_INFO_FIEMAP); - if (!req) { - rc = -ENOMEM; - goto drop_lock; - } - - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT, - sizeof(*fmkey)); - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_CLIENT, - *buflen); - req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_SERVER, - *buflen); - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO); - if (rc) { - ptlrpc_request_free(req); - goto drop_lock; - } - tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY); - memcpy(tmp, fmkey, sizeof(*fmkey)); - tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL); - memcpy(tmp, fiemap, *buflen); - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto fini_req; - - reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL); - if (!reply) { - rc = -EPROTO; - goto fini_req; - } - - memcpy(fiemap, reply, *buflen); -fini_req: - ptlrpc_req_finished(req); -drop_lock: - if (mode) - ldlm_lock_decref(&lockh, LCK_PR); - return rc; -} - -void osc_object_set_contended(struct osc_object *obj) -{ - obj->oo_contention_time = jiffies; - /* mb(); */ - obj->oo_contended = 1; -} - -void osc_object_clear_contended(struct osc_object *obj) -{ - obj->oo_contended = 0; -} - -int osc_object_is_contended(struct osc_object *obj) -{ - struct osc_device *dev = lu2osc_dev(obj->oo_cl.co_lu.lo_dev); - int osc_contention_time = dev->od_contention_time; - unsigned long cur_time = jiffies; - unsigned long retry_time; - - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_OBJECT_CONTENTION)) - return 1; - - if (!obj->oo_contended) - return 0; - - /* - * I like copy-paste. the code is copied from - * ll_file_is_contended. - */ - retry_time = obj->oo_contention_time + osc_contention_time * HZ; - if (time_after(cur_time, retry_time)) { - osc_object_clear_contended(obj); - return 0; - } - return 1; -} - -/** - * Implementation of struct cl_object_operations::coo_req_attr_set() for osc - * layer. osc is responsible for struct obdo::o_id and struct obdo::o_seq - * fields. - */ -static void osc_req_attr_set(const struct lu_env *env, struct cl_object *obj, - struct cl_req_attr *attr) -{ - u64 flags = attr->cra_flags; - struct lov_oinfo *oinfo; - struct ost_lvb *lvb; - struct obdo *oa; - - oinfo = cl2osc(obj)->oo_oinfo; - lvb = &oinfo->loi_lvb; - oa = attr->cra_oa; - - if (flags & OBD_MD_FLMTIME) { - oa->o_mtime = lvb->lvb_mtime; - oa->o_valid |= OBD_MD_FLMTIME; - } - if (flags & OBD_MD_FLATIME) { - oa->o_atime = lvb->lvb_atime; - oa->o_valid |= OBD_MD_FLATIME; - } - if (flags & OBD_MD_FLCTIME) { - oa->o_ctime = lvb->lvb_ctime; - oa->o_valid |= OBD_MD_FLCTIME; - } - if (flags & OBD_MD_FLGROUP) { - ostid_set_seq(&oa->o_oi, ostid_seq(&oinfo->loi_oi)); - oa->o_valid |= OBD_MD_FLGROUP; - } - if (flags & OBD_MD_FLID) { - int rc; - - rc = ostid_set_id(&oa->o_oi, ostid_id(&oinfo->loi_oi)); - if (rc) { - CERROR("Bad %llu to set " DOSTID " : rc %d\n", - (unsigned long long)ostid_id(&oinfo->loi_oi), - POSTID(&oa->o_oi), rc); - } - oa->o_valid |= OBD_MD_FLID; - } - if (flags & OBD_MD_FLHANDLE) { - struct ldlm_lock *lock; - struct osc_page *opg; - - opg = osc_cl_page_osc(attr->cra_page, cl2osc(obj)); - lock = osc_dlmlock_at_pgoff(env, cl2osc(obj), osc_index(opg), - OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_CANCELING); - if (!lock && !opg->ops_srvlock) { - struct ldlm_resource *res; - struct ldlm_res_id *resname; - - CL_PAGE_DEBUG(D_ERROR, env, attr->cra_page, - "uncovered page!\n"); - - resname = &osc_env_info(env)->oti_resname; - ostid_build_res_name(&oinfo->loi_oi, resname); - res = ldlm_resource_get( - osc_export(cl2osc(obj))->exp_obd->obd_namespace, - NULL, resname, LDLM_EXTENT, 0); - ldlm_resource_dump(D_ERROR, res); - - LBUG(); - } - - /* check for lockless io. */ - if (lock) { - oa->o_handle = lock->l_remote_handle; - oa->o_valid |= OBD_MD_FLHANDLE; - LDLM_LOCK_PUT(lock); - } - } -} - -static const struct cl_object_operations osc_ops = { - .coo_page_init = osc_page_init, - .coo_lock_init = osc_lock_init, - .coo_io_init = osc_io_init, - .coo_attr_get = osc_attr_get, - .coo_attr_update = osc_attr_update, - .coo_glimpse = osc_object_glimpse, - .coo_prune = osc_object_prune, - .coo_fiemap = osc_object_fiemap, - .coo_req_attr_set = osc_req_attr_set -}; - -static const struct lu_object_operations osc_lu_obj_ops = { - .loo_object_init = osc_object_init, - .loo_object_release = NULL, - .loo_object_free = osc_object_free, - .loo_object_print = osc_object_print, - .loo_object_invariant = NULL -}; - -struct lu_object *osc_object_alloc(const struct lu_env *env, - const struct lu_object_header *unused, - struct lu_device *dev) -{ - struct osc_object *osc; - struct lu_object *obj; - - osc = kmem_cache_zalloc(osc_object_kmem, GFP_NOFS); - if (osc) { - obj = osc2lu(osc); - lu_object_init(obj, NULL, dev); - osc->oo_cl.co_ops = &osc_ops; - obj->lo_ops = &osc_lu_obj_ops; - } else { - obj = NULL; - } - return obj; -} - -int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc) -{ - CDEBUG(D_INODE, "Invalidate osc object: %p, # of active IOs: %d\n", - osc, atomic_read(&osc->oo_nr_ios)); - - wait_event_idle(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios)); - - /* Discard all dirty pages of this object. */ - osc_cache_truncate_start(env, osc, 0, NULL); - - /* Discard all caching pages */ - osc_lock_discard_pages(env, osc, 0, CL_PAGE_EOF, CLM_WRITE); - - /* Clear ast data of dlm lock. Do this after discarding all pages */ - osc_object_prune(env, osc2cl(osc)); - - return 0; -} - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c deleted file mode 100644 index 20c553ef3a5e..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ /dev/null @@ -1,1094 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_page for OSC layer. - * - * Author: Nikita Danilov - * Author: Jinshan Xiong - */ - -#define DEBUG_SUBSYSTEM S_OSC - -#include -#include "osc_cl_internal.h" - -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); -static void osc_lru_use(struct client_obd *cli, struct osc_page *opg); -static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, - struct osc_page *opg); - -/** \addtogroup osc - * @{ - */ - -/***************************************************************************** - * - * Page operations. - * - */ -static void osc_page_transfer_get(struct osc_page *opg, const char *label) -{ - struct cl_page *page = opg->ops_cl.cpl_page; - - LASSERT(!opg->ops_transfer_pinned); - cl_page_get(page); - lu_ref_add_atomic(&page->cp_reference, label, page); - opg->ops_transfer_pinned = 1; -} - -static void osc_page_transfer_put(const struct lu_env *env, - struct osc_page *opg) -{ - struct cl_page *page = opg->ops_cl.cpl_page; - - if (opg->ops_transfer_pinned) { - opg->ops_transfer_pinned = 0; - lu_ref_del(&page->cp_reference, "transfer", page); - cl_page_put(env, page); - } -} - -/** - * This is called once for every page when it is submitted for a transfer - * either opportunistic (osc_page_cache_add()), or immediate - * (osc_page_submit()). - */ -static void osc_page_transfer_add(const struct lu_env *env, - struct osc_page *opg, enum cl_req_type crt) -{ - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - - osc_lru_use(osc_cli(obj), opg); -} - -int osc_page_cache_add(const struct lu_env *env, - const struct cl_page_slice *slice, struct cl_io *io) -{ - struct osc_page *opg = cl2osc_page(slice); - int result; - - osc_page_transfer_get(opg, "transfer\0cache"); - result = osc_queue_async_io(env, io, opg); - if (result != 0) - osc_page_transfer_put(env, opg); - else - osc_page_transfer_add(env, opg, CRT_WRITE); - - return result; -} - -void osc_index2policy(union ldlm_policy_data *policy, - const struct cl_object *obj, - pgoff_t start, pgoff_t end) -{ - memset(policy, 0, sizeof(*policy)); - policy->l_extent.start = cl_offset(obj, start); - policy->l_extent.end = cl_offset(obj, end + 1) - 1; -} - -static const char *osc_list(struct list_head *head) -{ - return list_empty(head) ? "-" : "+"; -} - -static inline unsigned long osc_submit_duration(struct osc_page *opg) -{ - if (opg->ops_submit_time == 0) - return 0; - - return (jiffies - opg->ops_submit_time); -} - -static int osc_page_print(const struct lu_env *env, - const struct cl_page_slice *slice, - void *cookie, lu_printer_t printer) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_async_page *oap = &opg->ops_oap; - struct osc_object *obj = cl2osc(slice->cpl_obj); - struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli; - - return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", - opg, osc_index(opg), - /* 1 */ - oap->oap_magic, oap->oap_cmd, - oap->oap_interrupted, - osc_list(&oap->oap_pending_item), - osc_list(&oap->oap_rpc_item), - /* 2 */ - oap->oap_obj_off, oap->oap_page_off, oap->oap_count, - oap->oap_async_flags, oap->oap_brw_flags, - oap->oap_request, oap->oap_cli, obj, - /* 3 */ - opg->ops_transfer_pinned, - osc_submit_duration(opg), opg->ops_srvlock, - /* 4 */ - cli->cl_r_in_flight, cli->cl_w_in_flight, - cli->cl_max_rpcs_in_flight, - cli->cl_avail_grant, - osc_list(&cli->cl_cache_waiters), - osc_list(&cli->cl_loi_ready_list), - osc_list(&cli->cl_loi_hp_ready_list), - osc_list(&cli->cl_loi_write_list), - osc_list(&cli->cl_loi_read_list), - /* 5 */ - osc_list(&obj->oo_ready_item), - osc_list(&obj->oo_hp_ready_item), - osc_list(&obj->oo_write_item), - osc_list(&obj->oo_read_item), - atomic_read(&obj->oo_nr_reads), - osc_list(&obj->oo_reading_exts), - atomic_read(&obj->oo_nr_writes), - osc_list(&obj->oo_hp_exts), - osc_list(&obj->oo_urgent_exts)); -} - -static void osc_page_delete(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); - int rc; - - CDEBUG(D_TRACE, "%p\n", opg); - osc_page_transfer_put(env, opg); - rc = osc_teardown_async_page(env, obj, opg); - if (rc) { - CL_PAGE_DEBUG(D_ERROR, env, slice->cpl_page, - "Trying to teardown failed: %d\n", rc); - LASSERT(0); - } - - osc_lru_del(osc_cli(obj), opg); - - if (slice->cpl_page->cp_type == CPT_CACHEABLE) { - void *value; - - spin_lock(&obj->oo_tree_lock); - value = radix_tree_delete(&obj->oo_tree, osc_index(opg)); - if (value) - --obj->oo_npages; - spin_unlock(&obj->oo_tree_lock); - - LASSERT(ergo(value, value == opg)); - } -} - -static void osc_page_clip(const struct lu_env *env, - const struct cl_page_slice *slice, int from, int to) -{ - struct osc_page *opg = cl2osc_page(slice); - struct osc_async_page *oap = &opg->ops_oap; - - opg->ops_from = from; - opg->ops_to = to; - spin_lock(&oap->oap_lock); - oap->oap_async_flags |= ASYNC_COUNT_STABLE; - spin_unlock(&oap->oap_lock); -} - -static int osc_page_cancel(const struct lu_env *env, - const struct cl_page_slice *slice) -{ - struct osc_page *opg = cl2osc_page(slice); - int rc = 0; - - /* Check if the transferring against this page - * is completed, or not even queued. - */ - if (opg->ops_transfer_pinned) - /* FIXME: may not be interrupted.. */ - rc = osc_cancel_async_page(env, opg); - LASSERT(ergo(rc == 0, opg->ops_transfer_pinned == 0)); - return rc; -} - -static int osc_page_flush(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *io) -{ - struct osc_page *opg = cl2osc_page(slice); - int rc; - - rc = osc_flush_async_page(env, io, opg); - return rc; -} - -static const struct cl_page_operations osc_page_ops = { - .cpo_print = osc_page_print, - .cpo_delete = osc_page_delete, - .cpo_clip = osc_page_clip, - .cpo_cancel = osc_page_cancel, - .cpo_flush = osc_page_flush -}; - -int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct osc_object *osc = cl2osc(obj); - struct osc_page *opg = cl_object_page_slice(obj, page); - int result; - - opg->ops_from = 0; - opg->ops_to = PAGE_SIZE; - - result = osc_prep_async_page(osc, opg, page->cp_vmpage, - cl_offset(obj, index)); - if (result == 0) { - struct osc_io *oio = osc_env_io(env); - - opg->ops_srvlock = osc_io_srvlock(oio); - cl_page_slice_add(page, &opg->ops_cl, obj, index, - &osc_page_ops); - } - INIT_LIST_HEAD(&opg->ops_lru); - - /* reserve an LRU space for this page */ - if (page->cp_type == CPT_CACHEABLE && result == 0) { - result = osc_lru_alloc(env, osc_cli(osc), opg); - if (result == 0) { - spin_lock(&osc->oo_tree_lock); - result = radix_tree_insert(&osc->oo_tree, index, opg); - if (result == 0) - ++osc->oo_npages; - spin_unlock(&osc->oo_tree_lock); - LASSERT(result == 0); - } - } - - return result; -} - -/** - * Helper function called by osc_io_submit() for every page in an immediate - * transfer (i.e., transferred synchronously). - */ -void osc_page_submit(const struct lu_env *env, struct osc_page *opg, - enum cl_req_type crt, int brw_flags) -{ - struct osc_async_page *oap = &opg->ops_oap; - - LASSERTF(oap->oap_magic == OAP_MAGIC, "Bad oap magic: oap %p, magic 0x%x\n", - oap, oap->oap_magic); - LASSERT(oap->oap_async_flags & ASYNC_READY); - LASSERT(oap->oap_async_flags & ASYNC_COUNT_STABLE); - - oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; - oap->oap_page_off = opg->ops_from; - oap->oap_count = opg->ops_to - opg->ops_from; - oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC; - - if (capable(CAP_SYS_RESOURCE)) { - oap->oap_brw_flags |= OBD_BRW_NOQUOTA; - oap->oap_cmd |= OBD_BRW_NOQUOTA; - } - - opg->ops_submit_time = jiffies; - osc_page_transfer_get(opg, "transfer\0imm"); - osc_page_transfer_add(env, opg, crt); -} - -/* --------------- LRU page management ------------------ */ - -/* OSC is a natural place to manage LRU pages as applications are specialized - * to write OSC by OSC. Ideally, if one OSC is used more frequently it should - * occupy more LRU slots. On the other hand, we should avoid using up all LRU - * slots (client_obd::cl_lru_left) otherwise process has to be put into sleep - * for free LRU slots - this will be very bad so the algorithm requires each - * OSC to free slots voluntarily to maintain a reasonable number of free slots - * at any time. - */ -static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq); - -/** - * LRU pages are freed in batch mode. OSC should at least free this - * number of pages to avoid running out of LRU slots. - */ -static inline int lru_shrink_min(struct client_obd *cli) -{ - return cli->cl_max_pages_per_rpc * 2; -} - -/** - * free this number at most otherwise it will take too long time to finish. - */ -static inline int lru_shrink_max(struct client_obd *cli) -{ - return cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; -} - -/** - * Check if we can free LRU slots from this OSC. If there exists LRU waiters, - * we should free slots aggressively. In this way, slots are freed in a steady - * step to maintain fairness among OSCs. - * - * Return how many LRU pages should be freed. - */ -static int osc_cache_too_much(struct client_obd *cli) -{ - struct cl_client_cache *cache = cli->cl_cache; - long pages = atomic_long_read(&cli->cl_lru_in_list); - unsigned long budget; - - budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2); - - /* if it's going to run out LRU slots, we should free some, but not - * too much to maintain fairness among OSCs. - */ - if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 2) { - if (pages >= budget) - return lru_shrink_max(cli); - else if (pages >= budget / 2) - return lru_shrink_min(cli); - } else { - time64_t duration = ktime_get_real_seconds(); - long timediff; - - /* knock out pages by duration of no IO activity */ - duration -= cli->cl_lru_last_used; - /* - * The difference shouldn't be more than 70 years - * so we can safely case to a long. Round to - * approximately 1 minute. - */ - timediff = (long)(duration >> 6); - if (timediff > 0 && pages >= budget / timediff) - return lru_shrink_min(cli); - } - return 0; -} - -int lru_queue_work(const struct lu_env *env, void *data) -{ - struct client_obd *cli = data; - int count; - - CDEBUG(D_CACHE, "%s: run LRU work for client obd\n", cli_name(cli)); - - count = osc_cache_too_much(cli); - if (count > 0) { - int rc = osc_lru_shrink(env, cli, count, false); - - CDEBUG(D_CACHE, "%s: shrank %d/%d pages from client obd\n", - cli_name(cli), rc, count); - if (rc >= count) { - CDEBUG(D_CACHE, "%s: queue again\n", cli_name(cli)); - ptlrpcd_queue_work(cli->cl_lru_work); - } - } - - return 0; -} - -void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist) -{ - LIST_HEAD(lru); - struct osc_async_page *oap; - long npages = 0; - - list_for_each_entry(oap, plist, oap_pending_item) { - struct osc_page *opg = oap2osc_page(oap); - - if (!opg->ops_in_lru) - continue; - - ++npages; - LASSERT(list_empty(&opg->ops_lru)); - list_add(&opg->ops_lru, &lru); - } - - if (npages > 0) { - spin_lock(&cli->cl_lru_list_lock); - list_splice_tail(&lru, &cli->cl_lru_list); - atomic_long_sub(npages, &cli->cl_lru_busy); - atomic_long_add(npages, &cli->cl_lru_in_list); - cli->cl_lru_last_used = ktime_get_real_seconds(); - spin_unlock(&cli->cl_lru_list_lock); - - if (waitqueue_active(&osc_lru_waitq)) - (void)ptlrpcd_queue_work(cli->cl_lru_work); - } -} - -static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg) -{ - LASSERT(atomic_long_read(&cli->cl_lru_in_list) > 0); - list_del_init(&opg->ops_lru); - atomic_long_dec(&cli->cl_lru_in_list); -} - -/** - * Page is being destroyed. The page may be not in LRU list, if the transfer - * has never finished(error occurred). - */ -static void osc_lru_del(struct client_obd *cli, struct osc_page *opg) -{ - if (opg->ops_in_lru) { - spin_lock(&cli->cl_lru_list_lock); - if (!list_empty(&opg->ops_lru)) { - __osc_lru_del(cli, opg); - } else { - LASSERT(atomic_long_read(&cli->cl_lru_busy) > 0); - atomic_long_dec(&cli->cl_lru_busy); - } - spin_unlock(&cli->cl_lru_list_lock); - - atomic_long_inc(cli->cl_lru_left); - /* this is a great place to release more LRU pages if - * this osc occupies too many LRU pages and kernel is - * stealing one of them. - */ - if (osc_cache_too_much(cli)) { - CDEBUG(D_CACHE, "%s: queue LRU work\n", cli_name(cli)); - (void)ptlrpcd_queue_work(cli->cl_lru_work); - } - wake_up(&osc_lru_waitq); - } else { - LASSERT(list_empty(&opg->ops_lru)); - } -} - -/** - * Delete page from LRUlist for redirty. - */ -static void osc_lru_use(struct client_obd *cli, struct osc_page *opg) -{ - /* If page is being transferred for the first time, - * ops_lru should be empty - */ - if (opg->ops_in_lru && !list_empty(&opg->ops_lru)) { - spin_lock(&cli->cl_lru_list_lock); - __osc_lru_del(cli, opg); - spin_unlock(&cli->cl_lru_list_lock); - atomic_long_inc(&cli->cl_lru_busy); - } -} - -static void discard_pagevec(const struct lu_env *env, struct cl_io *io, - struct cl_page **pvec, int max_index) -{ - int i; - - for (i = 0; i < max_index; i++) { - struct cl_page *page = pvec[i]; - - LASSERT(cl_page_is_owned(page, io)); - cl_page_delete(env, page); - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - cl_page_put(env, page); - - pvec[i] = NULL; - } -} - -/** - * Check if a cl_page can be released, i.e, it's not being used. - * - * If unstable account is turned on, bulk transfer may hold one refcount - * for recovery so we need to check vmpage refcount as well; otherwise, - * even we can destroy cl_page but the corresponding vmpage can't be reused. - */ -static inline bool lru_page_busy(struct client_obd *cli, struct cl_page *page) -{ - if (cl_page_in_use_noref(page)) - return true; - - if (cli->cl_cache->ccc_unstable_check) { - struct page *vmpage = cl_page_vmpage(page); - - /* vmpage have two known users: cl_page and VM page cache */ - if (page_count(vmpage) - page_mapcount(vmpage) > 2) - return true; - } - return false; -} - -/** - * Drop @target of pages from LRU at most. - */ -long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, - long target, bool force) -{ - struct cl_io *io; - struct cl_object *clobj = NULL; - struct cl_page **pvec; - struct osc_page *opg; - int maxscan = 0; - long count = 0; - int index = 0; - int rc = 0; - - LASSERT(atomic_long_read(&cli->cl_lru_in_list) >= 0); - if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0) - return 0; - - CDEBUG(D_CACHE, "%s: shrinkers: %d, force: %d\n", - cli_name(cli), atomic_read(&cli->cl_lru_shrinkers), force); - if (!force) { - if (atomic_read(&cli->cl_lru_shrinkers) > 0) - return -EBUSY; - - if (atomic_inc_return(&cli->cl_lru_shrinkers) > 1) { - atomic_dec(&cli->cl_lru_shrinkers); - return -EBUSY; - } - } else { - atomic_inc(&cli->cl_lru_shrinkers); - } - - pvec = (struct cl_page **)osc_env_info(env)->oti_pvec; - io = &osc_env_info(env)->oti_io; - - spin_lock(&cli->cl_lru_list_lock); - if (force) - cli->cl_lru_reclaim++; - maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list)); - while (!list_empty(&cli->cl_lru_list)) { - struct cl_page *page; - bool will_free = false; - - if (!force && atomic_read(&cli->cl_lru_shrinkers) > 1) - break; - - if (--maxscan < 0) - break; - - opg = list_entry(cli->cl_lru_list.next, struct osc_page, - ops_lru); - page = opg->ops_cl.cpl_page; - if (lru_page_busy(cli, page)) { - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); - continue; - } - - LASSERT(page->cp_obj); - if (clobj != page->cp_obj) { - struct cl_object *tmp = page->cp_obj; - - cl_object_get(tmp); - spin_unlock(&cli->cl_lru_list_lock); - - if (clobj) { - discard_pagevec(env, io, pvec, index); - index = 0; - - cl_io_fini(env, io); - cl_object_put(env, clobj); - clobj = NULL; - } - - clobj = tmp; - io->ci_obj = clobj; - io->ci_ignore_layout = 1; - rc = cl_io_init(env, io, CIT_MISC, clobj); - - spin_lock(&cli->cl_lru_list_lock); - - if (rc != 0) - break; - - ++maxscan; - continue; - } - - if (cl_page_own_try(env, io, page) == 0) { - if (!lru_page_busy(cli, page)) { - /* remove it from lru list earlier to avoid - * lock contention - */ - __osc_lru_del(cli, opg); - opg->ops_in_lru = 0; /* will be discarded */ - - cl_page_get(page); - will_free = true; - } else { - cl_page_disown(env, io, page); - } - } - - if (!will_free) { - list_move_tail(&opg->ops_lru, &cli->cl_lru_list); - continue; - } - - /* Don't discard and free the page with cl_lru_list held */ - pvec[index++] = page; - if (unlikely(index == OTI_PVEC_SIZE)) { - spin_unlock(&cli->cl_lru_list_lock); - discard_pagevec(env, io, pvec, index); - index = 0; - - spin_lock(&cli->cl_lru_list_lock); - } - - if (++count >= target) - break; - } - spin_unlock(&cli->cl_lru_list_lock); - - if (clobj) { - discard_pagevec(env, io, pvec, index); - - cl_io_fini(env, io); - cl_object_put(env, clobj); - } - - atomic_dec(&cli->cl_lru_shrinkers); - if (count > 0) { - atomic_long_add(count, cli->cl_lru_left); - wake_up_all(&osc_lru_waitq); - } - return count > 0 ? count : rc; -} - -/** - * Reclaim LRU pages by an IO thread. The caller wants to reclaim at least - * \@npages of LRU slots. For performance consideration, it's better to drop - * LRU pages in batch. Therefore, the actual number is adjusted at least - * max_pages_per_rpc. - */ -static long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) -{ - struct lu_env *env; - struct cl_client_cache *cache = cli->cl_cache; - int max_scans; - u16 refcheck; - long rc = 0; - - LASSERT(cache); - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return 0; - - npages = max_t(int, npages, cli->cl_max_pages_per_rpc); - CDEBUG(D_CACHE, "%s: start to reclaim %ld pages from LRU\n", - cli_name(cli), npages); - rc = osc_lru_shrink(env, cli, npages, true); - if (rc >= npages) { - CDEBUG(D_CACHE, "%s: reclaimed %ld/%ld pages from LRU\n", - cli_name(cli), rc, npages); - if (osc_cache_too_much(cli) > 0) - ptlrpcd_queue_work(cli->cl_lru_work); - goto out; - } else if (rc > 0) { - npages -= rc; - } - - CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld/%ld, want: %ld\n", - cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list), - atomic_long_read(&cli->cl_lru_busy), npages); - - /* Reclaim LRU slots from other client_obd as it can't free enough - * from its own. This should rarely happen. - */ - spin_lock(&cache->ccc_lru_lock); - LASSERT(!list_empty(&cache->ccc_lru)); - - cache->ccc_lru_shrinkers++; - list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - - max_scans = atomic_read(&cache->ccc_users) - 2; - while (--max_scans > 0 && !list_empty(&cache->ccc_lru)) { - cli = list_entry(cache->ccc_lru.next, struct client_obd, - cl_lru_osc); - - CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n", - cli_name(cli), cli, - atomic_long_read(&cli->cl_lru_in_list), - atomic_long_read(&cli->cl_lru_busy)); - - list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru); - if (osc_cache_too_much(cli) > 0) { - spin_unlock(&cache->ccc_lru_lock); - - rc = osc_lru_shrink(env, cli, npages, true); - spin_lock(&cache->ccc_lru_lock); - if (rc >= npages) - break; - if (rc > 0) - npages -= rc; - } - } - spin_unlock(&cache->ccc_lru_lock); - -out: - cl_env_put(env, &refcheck); - CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n", - cli_name(cli), cli, rc); - return rc; -} - -/** - * osc_lru_alloc() is called to reserve an LRU slot for a cl_page. - * - * Usually the LRU slots are reserved in osc_io_iter_rw_init(). - * Only in the case that the LRU slots are in extreme shortage, it should - * have reserved enough slots for an IO. - */ -static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, - struct osc_page *opg) -{ - struct osc_io *oio = osc_env_io(env); - int rc = 0; - - if (!cli->cl_cache) /* shall not be in LRU */ - return 0; - - if (oio->oi_lru_reserved > 0) { - --oio->oi_lru_reserved; - goto out; - } - - LASSERT(atomic_long_read(cli->cl_lru_left) >= 0); - while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) { - /* run out of LRU spaces, try to drop some by itself */ - rc = osc_lru_reclaim(cli, 1); - if (rc < 0) - break; - if (rc > 0) - continue; - - cond_resched(); - - rc = l_wait_event_abortable(osc_lru_waitq, - atomic_long_read(cli->cl_lru_left) > 0); - - if (rc < 0) - break; - } - -out: - if (rc >= 0) { - atomic_long_inc(&cli->cl_lru_busy); - opg->ops_in_lru = 1; - rc = 0; - } - - return rc; -} - -/** - * osc_lru_reserve() is called to reserve enough LRU slots for I/O. - * - * The benefit of doing this is to reduce contention against atomic counter - * cl_lru_left by changing it from per-page access to per-IO access. - */ -unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages) -{ - unsigned long reserved = 0; - unsigned long max_pages; - unsigned long c; - - /* - * reserve a full RPC window at most to avoid that a thread accidentally - * consumes too many LRU slots - */ - max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; - if (npages > max_pages) - npages = max_pages; - - c = atomic_long_read(cli->cl_lru_left); - if (c < npages && osc_lru_reclaim(cli, npages) > 0) - c = atomic_long_read(cli->cl_lru_left); - while (c >= npages) { - if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) { - reserved = npages; - break; - } - c = atomic_long_read(cli->cl_lru_left); - } - if (atomic_long_read(cli->cl_lru_left) < max_pages) { - /* - * If there aren't enough pages in the per-OSC LRU then - * wake up the LRU thread to try and clear out space, so - * we don't block if pages are being dirtied quickly. - */ - CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n", - cli_name(cli), atomic_long_read(cli->cl_lru_left), - max_pages); - (void)ptlrpcd_queue_work(cli->cl_lru_work); - } - - return reserved; -} - -/** - * osc_lru_unreserve() is called to unreserve LRU slots. - * - * LRU slots reserved by osc_lru_reserve() may have entries left due to several - * reasons such as page already existing or I/O error. Those reserved slots - * should be freed by calling this function. - */ -void osc_lru_unreserve(struct client_obd *cli, unsigned long npages) -{ - atomic_long_add(npages, cli->cl_lru_left); - wake_up_all(&osc_lru_waitq); -} - -/** - * Atomic operations are expensive. We accumulate the accounting for the - * same page pgdat to get better performance. - * In practice this can work pretty good because the pages in the same RPC - * are likely from the same page zone. - */ -static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, - int factor) -{ - int page_count = desc->bd_iov_count; - pg_data_t *last = NULL; - int count = 0; - int i; - - LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); - - for (i = 0; i < page_count; i++) { - pg_data_t *pgdat = page_pgdat(BD_GET_KIOV(desc, i).bv_page); - - if (likely(pgdat == last)) { - ++count; - continue; - } - - if (count > 0) { - mod_node_page_state(pgdat, NR_UNSTABLE_NFS, - factor * count); - count = 0; - } - last = pgdat; - ++count; - } - if (count > 0) - mod_node_page_state(last, NR_UNSTABLE_NFS, factor * count); -} - -static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc) -{ - unstable_page_accounting(desc, 1); -} - -static inline void dec_unstable_page_accounting(struct ptlrpc_bulk_desc *desc) -{ - unstable_page_accounting(desc, -1); -} - -/** - * Performs "unstable" page accounting. This function balances the - * increment operations performed in osc_inc_unstable_pages. It is - * registered as the RPC request callback, and is executed when the - * bulk RPC is committed on the server. Thus at this point, the pages - * involved in the bulk transfer are no longer considered unstable. - * - * If this function is called, the request should have been committed - * or req:rq_unstable must have been set; it implies that the unstable - * statistic have been added. - */ -void osc_dec_unstable_pages(struct ptlrpc_request *req) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - int page_count = desc->bd_iov_count; - long unstable_count; - - LASSERT(page_count >= 0); - dec_unstable_page_accounting(desc); - - unstable_count = atomic_long_sub_return(page_count, - &cli->cl_unstable_count); - LASSERT(unstable_count >= 0); - - unstable_count = atomic_long_sub_return(page_count, - &cli->cl_cache->ccc_unstable_nr); - LASSERT(unstable_count >= 0); - if (!unstable_count) - wake_up_all(&cli->cl_cache->ccc_unstable_waitq); - - if (waitqueue_active(&osc_lru_waitq)) - (void)ptlrpcd_queue_work(cli->cl_lru_work); -} - -/** - * "unstable" page accounting. See: osc_dec_unstable_pages. - */ -void osc_inc_unstable_pages(struct ptlrpc_request *req) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - long page_count = desc->bd_iov_count; - - /* No unstable page tracking */ - if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check) - return; - - add_unstable_page_accounting(desc); - atomic_long_add(page_count, &cli->cl_unstable_count); - atomic_long_add(page_count, &cli->cl_cache->ccc_unstable_nr); - - /* - * If the request has already been committed (i.e. brw_commit - * called via rq_commit_cb), we need to undo the unstable page - * increments we just performed because rq_commit_cb wont be - * called again. - */ - spin_lock(&req->rq_lock); - if (unlikely(req->rq_committed)) { - spin_unlock(&req->rq_lock); - - osc_dec_unstable_pages(req); - } else { - req->rq_unstable = 1; - spin_unlock(&req->rq_lock); - } -} - -/** - * Check if it piggybacks SOFT_SYNC flag to OST from this OSC. - * This function will be called by every BRW RPC so it's critical - * to make this function fast. - */ -bool osc_over_unstable_soft_limit(struct client_obd *cli) -{ - long unstable_nr, osc_unstable_count; - - /* Can't check cli->cl_unstable_count, therefore, no soft limit */ - if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check) - return false; - - osc_unstable_count = atomic_long_read(&cli->cl_unstable_count); - unstable_nr = atomic_long_read(&cli->cl_cache->ccc_unstable_nr); - - CDEBUG(D_CACHE, - "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n", - cli_name(cli), cli, unstable_nr, osc_unstable_count); - - /* - * If the LRU slots are in shortage - 25% remaining AND this OSC - * has one full RPC window of unstable pages, it's a good chance - * to piggyback a SOFT_SYNC flag. - * Please notice that the OST won't take immediate response for the - * SOFT_SYNC request so active OSCs will have more chance to carry - * the flag, this is reasonable. - */ - return unstable_nr > cli->cl_cache->ccc_lru_max >> 2 && - osc_unstable_count > cli->cl_max_pages_per_rpc * - cli->cl_max_rpcs_in_flight; -} - -/** - * Return how many LRU pages in the cache of all OSC devices - * - * Return: return # of cached LRU pages times reclaimation tendency - * SHRINK_STOP if it cannot do any scanning in this time - */ -unsigned long osc_cache_shrink_count(struct shrinker *sk, - struct shrink_control *sc) -{ - struct client_obd *cli; - unsigned long cached = 0; - - spin_lock(&osc_shrink_lock); - list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list) - cached += atomic_long_read(&cli->cl_lru_in_list); - spin_unlock(&osc_shrink_lock); - - return (cached * sysctl_vfs_cache_pressure) / 100; -} - -/** - * Scan and try to reclaim sc->nr_to_scan cached LRU pages - * - * Return: number of cached LRU pages reclaimed - * SHRINK_STOP if it cannot do any scanning in this time - * - * Linux kernel will loop calling this shrinker scan routine with - * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory. - * - * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need - * to scan and try to reclaim LRU pages, just return 0 and - * osc_cache_shrink_count() will report the LRU page number. - */ -unsigned long osc_cache_shrink_scan(struct shrinker *sk, - struct shrink_control *sc) -{ - struct client_obd *stop_anchor = NULL; - struct client_obd *cli; - struct lu_env *env; - long shrank = 0; - u16 refcheck; - int rc; - - if (!sc->nr_to_scan) - return 0; - - if (!(sc->gfp_mask & __GFP_FS)) - return SHRINK_STOP; - - env = cl_env_get(&refcheck); - if (IS_ERR(env)) - return SHRINK_STOP; - - spin_lock(&osc_shrink_lock); - while (!list_empty(&osc_shrink_list)) { - cli = list_entry(osc_shrink_list.next, struct client_obd, - cl_shrink_list); - - if (!stop_anchor) - stop_anchor = cli; - else if (cli == stop_anchor) - break; - - list_move_tail(&cli->cl_shrink_list, &osc_shrink_list); - spin_unlock(&osc_shrink_lock); - - /* shrink no more than max_pages_per_rpc for an OSC */ - rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) > - cli->cl_max_pages_per_rpc ? - cli->cl_max_pages_per_rpc : - sc->nr_to_scan - shrank, true); - if (rc > 0) - shrank += rc; - - if (shrank >= sc->nr_to_scan) - goto out; - - spin_lock(&osc_shrink_lock); - } - spin_unlock(&osc_shrink_lock); - -out: - cl_env_put(env, &refcheck); - - return shrank; -} - -/** @} osc */ diff --git a/drivers/staging/lustre/lustre/osc/osc_quota.c b/drivers/staging/lustre/lustre/osc/osc_quota.c deleted file mode 100644 index 723ec2fb18bf..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_quota.c +++ /dev/null @@ -1,236 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * GPL HEADER END - */ -/* - * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * - * Copyright (c) 2011, 2015, Intel Corporation. - * - * Code originally extracted from quota directory - */ - -#include -#include "osc_internal.h" - -static const struct rhashtable_params quota_hash_params = { - .key_len = sizeof(u32), - .key_offset = offsetof(struct osc_quota_info, oqi_id), - .head_offset = offsetof(struct osc_quota_info, oqi_hash), - .automatic_shrinking = true, -}; - -static inline struct osc_quota_info *osc_oqi_alloc(u32 id) -{ - struct osc_quota_info *oqi; - - oqi = kmem_cache_zalloc(osc_quota_kmem, GFP_NOFS); - if (oqi) - oqi->oqi_id = id; - - return oqi; -} - -int osc_quota_chkdq(struct client_obd *cli, const unsigned int qid[]) -{ - int type; - - for (type = 0; type < MAXQUOTAS; type++) { - struct osc_quota_info *oqi; - - oqi = rhashtable_lookup_fast(&cli->cl_quota_hash[type], &qid[type], - quota_hash_params); - if (oqi) { - /* Must not access oqi here, it could have been - * freed by osc_quota_setdq() - */ - - /* the slot is busy, the user is about to run out of - * quota space on this OST - */ - CDEBUG(D_QUOTA, "chkdq found noquota for %s %d\n", - type == USRQUOTA ? "user" : "grout", qid[type]); - return NO_QUOTA; - } - } - - return QUOTA_OK; -} - -static void osc_quota_free(struct rcu_head *head) -{ - struct osc_quota_info *oqi = container_of(head, struct osc_quota_info, rcu); - - kmem_cache_free(osc_quota_kmem, oqi); -} - - -#define MD_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_MD_FLUSRQUOTA \ - : OBD_MD_FLGRPQUOTA) -#define FL_QUOTA_FLAG(type) ((type == USRQUOTA) ? OBD_FL_NO_USRQUOTA \ - : OBD_FL_NO_GRPQUOTA) - -int osc_quota_setdq(struct client_obd *cli, const unsigned int qid[], - u32 valid, u32 flags) -{ - int type; - int rc = 0; - - if ((valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) == 0) - return 0; - - for (type = 0; type < MAXQUOTAS; type++) { - struct osc_quota_info *oqi; - - if ((valid & MD_QUOTA_FLAG(type)) == 0) - continue; - - /* lookup the ID in the per-type hash table */ - rcu_read_lock(); - oqi = rhashtable_lookup_fast(&cli->cl_quota_hash[type], &qid[type], - quota_hash_params); - if ((flags & FL_QUOTA_FLAG(type)) != 0) { - /* This ID is getting close to its quota limit, let's - * switch to sync I/O - */ - rcu_read_unlock(); - if (oqi) - continue; - - oqi = osc_oqi_alloc(qid[type]); - if (!oqi) { - rc = -ENOMEM; - break; - } - - rc = rhashtable_lookup_insert_fast(&cli->cl_quota_hash[type], - &oqi->oqi_hash, quota_hash_params); - /* race with others? */ - if (rc) { - kmem_cache_free(osc_quota_kmem, oqi); - if (rc != -EEXIST) { - rc = -ENOMEM; - break; - } - rc = 0; - } - - CDEBUG(D_QUOTA, "%s: setdq to insert for %s %d (%d)\n", - cli_name(cli), - type == USRQUOTA ? "user" : "group", - qid[type], rc); - } else { - /* This ID is now off the hook, let's remove it from - * the hash table - */ - if (!oqi) { - rcu_read_unlock(); - continue; - } - if (rhashtable_remove_fast(&cli->cl_quota_hash[type], - &oqi->oqi_hash, quota_hash_params) == 0) - call_rcu(&oqi->rcu, osc_quota_free); - rcu_read_unlock(); - CDEBUG(D_QUOTA, "%s: setdq to remove for %s %d (%p)\n", - cli_name(cli), - type == USRQUOTA ? "user" : "group", - qid[type], oqi); - } - } - - return rc; -} - -static void -oqi_exit(void *vquota, void *data) -{ - struct osc_quota_info *oqi = vquota; - - osc_quota_free(&oqi->rcu); -} - -int osc_quota_setup(struct obd_device *obd) -{ - struct client_obd *cli = &obd->u.cli; - int i, type; - - for (type = 0; type < MAXQUOTAS; type++) { - if (rhashtable_init(&cli->cl_quota_hash[type], "a_hash_params) != 0) - break; - } - - if (type == MAXQUOTAS) - return 0; - - for (i = 0; i < type; i++) - rhashtable_destroy(&cli->cl_quota_hash[i]); - - return -ENOMEM; -} - -int osc_quota_cleanup(struct obd_device *obd) -{ - struct client_obd *cli = &obd->u.cli; - int type; - - for (type = 0; type < MAXQUOTAS; type++) - rhashtable_free_and_destroy(&cli->cl_quota_hash[type], - oqi_exit, NULL); - - return 0; -} - -int osc_quotactl(struct obd_device *unused, struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - struct ptlrpc_request *req; - struct obd_quotactl *oqc; - int rc; - - req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), - &RQF_OST_QUOTACTL, LUSTRE_OST_VERSION, - OST_QUOTACTL); - if (!req) - return -ENOMEM; - - oqc = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - *oqc = *oqctl; - - ptlrpc_request_set_replen(req); - ptlrpc_at_set_req_timeout(req); - req->rq_no_resend = 1; - - rc = ptlrpc_queue_wait(req); - if (rc) - CERROR("ptlrpc_queue_wait failed, rc: %d\n", rc); - - if (req->rq_repmsg) { - oqc = req_capsule_server_get(&req->rq_pill, &RMF_OBD_QUOTACTL); - if (oqc) { - *oqctl = *oqc; - } else if (!rc) { - CERROR("Can't unpack obd_quotactl\n"); - rc = -EPROTO; - } - } else if (!rc) { - CERROR("Can't unpack obd_quotactl\n"); - rc = -EPROTO; - } - ptlrpc_req_finished(req); - - return rc; -} diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c deleted file mode 100644 index 61ef6c8d7a12..000000000000 --- a/drivers/staging/lustre/lustre/osc/osc_request.c +++ /dev/null @@ -1,2907 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_OSC - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "osc_internal.h" -#include "osc_cl_internal.h" - -atomic_t osc_pool_req_count; -unsigned int osc_reqpool_maxreqcount; -struct ptlrpc_request_pool *osc_rq_pool; - -/* max memory used for request pool, unit is MB */ -static unsigned int osc_reqpool_mem_max = 5; -module_param(osc_reqpool_mem_max, uint, 0444); - -struct osc_brw_async_args { - struct obdo *aa_oa; - int aa_requested_nob; - int aa_nio_count; - u32 aa_page_count; - int aa_resends; - struct brw_page **aa_ppga; - struct client_obd *aa_cli; - struct list_head aa_oaps; - struct list_head aa_exts; -}; - -struct osc_async_args { - struct obd_info *aa_oi; -}; - -struct osc_setattr_args { - struct obdo *sa_oa; - obd_enqueue_update_f sa_upcall; - void *sa_cookie; -}; - -struct osc_fsync_args { - struct osc_object *fa_obj; - struct obdo *fa_oa; - obd_enqueue_update_f fa_upcall; - void *fa_cookie; -}; - -struct osc_enqueue_args { - struct obd_export *oa_exp; - enum ldlm_type oa_type; - enum ldlm_mode oa_mode; - __u64 *oa_flags; - osc_enqueue_upcall_f oa_upcall; - void *oa_cookie; - struct ost_lvb *oa_lvb; - struct lustre_handle oa_lockh; - unsigned int oa_agl:1; -}; - -static void osc_release_ppga(struct brw_page **ppga, u32 count); -static int brw_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *data, int rc); - -static inline void osc_pack_req_body(struct ptlrpc_request *req, - struct obdo *oa) -{ - struct ost_body *body; - - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); -} - -static int osc_getattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - struct ptlrpc_request *req; - struct ost_body *body; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - osc_pack_req_body(req, oa); - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, - &body->oa); - - oa->o_blksize = cli_brw_size(exp->exp_obd); - oa->o_valid |= OBD_MD_FLBLKSZ; - - out: - ptlrpc_req_finished(req); - return rc; -} - -static int osc_setattr(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - struct ptlrpc_request *req; - struct ost_body *body; - int rc; - - LASSERT(oa->o_valid & OBD_MD_FLGROUP); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - osc_pack_req_body(req, oa); - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, - &body->oa); - -out: - ptlrpc_req_finished(req); - return rc; -} - -static int osc_setattr_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct osc_setattr_args *sa, int rc) -{ - struct ost_body *body; - - if (rc != 0) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - rc = -EPROTO; - goto out; - } - - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, sa->sa_oa, - &body->oa); -out: - rc = sa->sa_upcall(sa->sa_cookie, rc); - return rc; -} - -int osc_setattr_async(struct obd_export *exp, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset) -{ - struct ptlrpc_request *req; - struct osc_setattr_args *sa; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - osc_pack_req_body(req, oa); - - ptlrpc_request_set_replen(req); - - /* do mds to ost setattr asynchronously */ - if (!rqset) { - /* Do not wait for response. */ - ptlrpcd_add_req(req); - } else { - req->rq_interpret_reply = - (ptlrpc_interpterer_t)osc_setattr_interpret; - - BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args)); - sa = ptlrpc_req_async_args(req); - sa->sa_oa = oa; - sa->sa_upcall = upcall; - sa->sa_cookie = cookie; - - if (rqset == PTLRPCD_SET) - ptlrpcd_add_req(req); - else - ptlrpc_set_add_req(rqset, req); - } - - return 0; -} - -static int osc_create(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - struct ptlrpc_request *req; - struct ost_body *body; - int rc; - - LASSERT(oa); - LASSERT(oa->o_valid & OBD_MD_FLGROUP); - LASSERT(fid_seq_is_echo(ostid_seq(&oa->o_oi))); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE); - if (!req) { - rc = -ENOMEM; - goto out; - } - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE); - if (rc) { - ptlrpc_request_free(req); - goto out; - } - - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); - - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out_req; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - rc = -EPROTO; - goto out_req; - } - - CDEBUG(D_INFO, "oa flags %x\n", oa->o_flags); - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa); - - oa->o_blksize = cli_brw_size(exp->exp_obd); - oa->o_valid |= OBD_MD_FLBLKSZ; - - CDEBUG(D_HA, "transno: %lld\n", - lustre_msg_get_transno(req->rq_repmsg)); -out_req: - ptlrpc_req_finished(req); -out: - return rc; -} - -int osc_punch_base(struct obd_export *exp, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset) -{ - struct ptlrpc_request *req; - struct osc_setattr_args *sa; - struct ost_body *body; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_PUNCH); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ - ptlrpc_at_set_req_timeout(req); - - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, - oa); - - ptlrpc_request_set_replen(req); - - req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret; - BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args)); - sa = ptlrpc_req_async_args(req); - sa->sa_oa = oa; - sa->sa_upcall = upcall; - sa->sa_cookie = cookie; - if (rqset == PTLRPCD_SET) - ptlrpcd_add_req(req); - else - ptlrpc_set_add_req(rqset, req); - - return 0; -} - -static int osc_sync_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *arg, int rc) -{ - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - struct osc_fsync_args *fa = arg; - unsigned long valid = 0; - struct ost_body *body; - struct cl_object *obj; - - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - CERROR("can't unpack ost_body\n"); - rc = -EPROTO; - goto out; - } - - *fa->fa_oa = body->oa; - obj = osc2cl(fa->fa_obj); - - /* Update osc object's blocks attribute */ - cl_object_attr_lock(obj); - if (body->oa.o_valid & OBD_MD_FLBLOCKS) { - attr->cat_blocks = body->oa.o_blocks; - valid |= CAT_BLOCKS; - } - - if (valid) - cl_object_attr_update(env, obj, attr, valid); - cl_object_attr_unlock(obj); - -out: - rc = fa->fa_upcall(fa->fa_cookie, rc); - return rc; -} - -int osc_sync_base(struct osc_object *obj, struct obdo *oa, - obd_enqueue_update_f upcall, void *cookie, - struct ptlrpc_request_set *rqset) -{ - struct obd_export *exp = osc_export(obj); - struct ptlrpc_request *req; - struct ost_body *body; - struct osc_fsync_args *fa; - int rc; - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SYNC); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - /* overload the size and blocks fields in the oa with start/end */ - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, - oa); - - ptlrpc_request_set_replen(req); - req->rq_interpret_reply = osc_sync_interpret; - - BUILD_BUG_ON(sizeof(*fa) > sizeof(req->rq_async_args)); - fa = ptlrpc_req_async_args(req); - fa->fa_obj = obj; - fa->fa_oa = oa; - fa->fa_upcall = upcall; - fa->fa_cookie = cookie; - - if (rqset == PTLRPCD_SET) - ptlrpcd_add_req(req); - else - ptlrpc_set_add_req(rqset, req); - - return 0; -} - -/* Find and cancel locally locks matched by @mode in the resource found by - * @objid. Found locks are added into @cancel list. Returns the amount of - * locks added to @cancels list. - */ -static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa, - struct list_head *cancels, - enum ldlm_mode mode, __u64 lock_flags) -{ - struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; - struct ldlm_res_id res_id; - struct ldlm_resource *res; - int count; - - /* Return, i.e. cancel nothing, only if ELC is supported (flag in - * export) but disabled through procfs (flag in NS). - * - * This distinguishes from a case when ELC is not supported originally, - * when we still want to cancel locks in advance and just cancel them - * locally, without sending any RPC. - */ - if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns)) - return 0; - - ostid_build_res_name(&oa->o_oi, &res_id); - res = ldlm_resource_get(ns, NULL, &res_id, 0, 0); - if (IS_ERR(res)) - return 0; - - LDLM_RESOURCE_ADDREF(res); - count = ldlm_cancel_resource_local(res, cancels, NULL, mode, - lock_flags, 0, NULL); - LDLM_RESOURCE_DELREF(res); - ldlm_resource_putref(res); - return count; -} - -static int osc_destroy_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *data, - int rc) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - - atomic_dec(&cli->cl_destroy_in_flight); - wake_up(&cli->cl_destroy_waitq); - return 0; -} - -static int osc_can_send_destroy(struct client_obd *cli) -{ - if (atomic_inc_return(&cli->cl_destroy_in_flight) <= - cli->cl_max_rpcs_in_flight) { - /* The destroy request can be sent */ - return 1; - } - if (atomic_dec_return(&cli->cl_destroy_in_flight) < - cli->cl_max_rpcs_in_flight) { - /* - * The counter has been modified between the two atomic - * operations. - */ - wake_up(&cli->cl_destroy_waitq); - } - return 0; -} - -static int osc_destroy(const struct lu_env *env, struct obd_export *exp, - struct obdo *oa) -{ - struct client_obd *cli = &exp->exp_obd->u.cli; - struct ptlrpc_request *req; - struct ost_body *body; - LIST_HEAD(cancels); - int rc, count; - - if (!oa) { - CDEBUG(D_INFO, "oa NULL\n"); - return -EINVAL; - } - - count = osc_resource_get_unused(exp, oa, &cancels, LCK_PW, - LDLM_FL_DISCARD_DATA); - - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_DESTROY); - if (!req) { - ldlm_lock_list_put(&cancels, l_bl_ast, count); - return -ENOMEM; - } - - rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY, - 0, &cancels, count); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ - ptlrpc_at_set_req_timeout(req); - - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); - - ptlrpc_request_set_replen(req); - - req->rq_interpret_reply = osc_destroy_interpret; - if (!osc_can_send_destroy(cli)) { - /* - * Wait until the number of on-going destroy RPCs drops - * under max_rpc_in_flight - */ - l_wait_event_abortable_exclusive(cli->cl_destroy_waitq, - osc_can_send_destroy(cli)); - } - - /* Do not wait for response */ - ptlrpcd_add_req(req); - return 0; -} - -static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, - long writing_bytes) -{ - u32 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT; - - LASSERT(!(oa->o_valid & bits)); - - oa->o_valid |= bits; - spin_lock(&cli->cl_loi_list_lock); - oa->o_dirty = cli->cl_dirty_pages << PAGE_SHIFT; - if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit > - cli->cl_dirty_max_pages)) { - CERROR("dirty %lu - %lu > dirty_max %lu\n", - cli->cl_dirty_pages, cli->cl_dirty_transit, - cli->cl_dirty_max_pages); - oa->o_undirty = 0; - } else if (unlikely(atomic_long_read(&obd_dirty_pages) - - atomic_long_read(&obd_dirty_transit_pages) > - (long)(obd_max_dirty_pages + 1))) { - /* The atomic_read() allowing the atomic_inc() are - * not covered by a lock thus they may safely race and trip - * this CERROR() unless we add in a small fudge factor (+1). - */ - CERROR("%s: dirty %ld + %ld > system dirty_max %ld\n", - cli_name(cli), atomic_long_read(&obd_dirty_pages), - atomic_long_read(&obd_dirty_transit_pages), - obd_max_dirty_pages); - oa->o_undirty = 0; - } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages > - 0x7fffffff)) { - CERROR("dirty %lu - dirty_max %lu too big???\n", - cli->cl_dirty_pages, cli->cl_dirty_max_pages); - oa->o_undirty = 0; - } else { - unsigned long max_in_flight; - - max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT) * - (cli->cl_max_rpcs_in_flight + 1); - oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_SHIFT, - max_in_flight); - } - oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant; - oa->o_dropped = cli->cl_lost_grant; - cli->cl_lost_grant = 0; - spin_unlock(&cli->cl_loi_list_lock); - CDEBUG(D_CACHE, "dirty: %llu undirty: %u dropped %u grant: %llu\n", - oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); -} - -void osc_update_next_shrink(struct client_obd *cli) -{ - cli->cl_next_shrink_grant = - jiffies + cli->cl_grant_shrink_interval * HZ; - CDEBUG(D_CACHE, "next time %ld to shrink grant\n", - cli->cl_next_shrink_grant); -} - -static void __osc_update_grant(struct client_obd *cli, u64 grant) -{ - spin_lock(&cli->cl_loi_list_lock); - cli->cl_avail_grant += grant; - spin_unlock(&cli->cl_loi_list_lock); -} - -static void osc_update_grant(struct client_obd *cli, struct ost_body *body) -{ - if (body->oa.o_valid & OBD_MD_FLGRANT) { - CDEBUG(D_CACHE, "got %llu extra grant\n", body->oa.o_grant); - __osc_update_grant(cli, body->oa.o_grant); - } -} - -static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, u32 vallen, - void *val, struct ptlrpc_request_set *set); - -static int osc_shrink_grant_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *aa, int rc) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - struct obdo *oa = ((struct osc_brw_async_args *)aa)->aa_oa; - struct ost_body *body; - - if (rc != 0) { - __osc_update_grant(cli, oa->o_grant); - goto out; - } - - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - osc_update_grant(cli, body); -out: - kmem_cache_free(obdo_cachep, oa); - return rc; -} - -static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) -{ - spin_lock(&cli->cl_loi_list_lock); - oa->o_grant = cli->cl_avail_grant / 4; - cli->cl_avail_grant -= oa->o_grant; - spin_unlock(&cli->cl_loi_list_lock); - if (!(oa->o_valid & OBD_MD_FLFLAGS)) { - oa->o_valid |= OBD_MD_FLFLAGS; - oa->o_flags = 0; - } - oa->o_flags |= OBD_FL_SHRINK_GRANT; - osc_update_next_shrink(cli); -} - -/* Shrink the current grant, either from some large amount to enough for a - * full set of in-flight RPCs, or if we have already shrunk to that limit - * then to enough for a single RPC. This avoids keeping more grant than - * needed, and avoids shrinking the grant piecemeal. - */ -static int osc_shrink_grant(struct client_obd *cli) -{ - __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) * - (cli->cl_max_pages_per_rpc << PAGE_SHIFT); - - spin_lock(&cli->cl_loi_list_lock); - if (cli->cl_avail_grant <= target_bytes) - target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; - spin_unlock(&cli->cl_loi_list_lock); - - return osc_shrink_grant_to_target(cli, target_bytes); -} - -int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) -{ - int rc = 0; - struct ost_body *body; - - spin_lock(&cli->cl_loi_list_lock); - /* Don't shrink if we are already above or below the desired limit - * We don't want to shrink below a single RPC, as that will negatively - * impact block allocation and long-term performance. - */ - if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_SHIFT) - target_bytes = cli->cl_max_pages_per_rpc << PAGE_SHIFT; - - if (target_bytes >= cli->cl_avail_grant) { - spin_unlock(&cli->cl_loi_list_lock); - return 0; - } - spin_unlock(&cli->cl_loi_list_lock); - - body = kzalloc(sizeof(*body), GFP_NOFS); - if (!body) - return -ENOMEM; - - osc_announce_cached(cli, &body->oa, 0); - - spin_lock(&cli->cl_loi_list_lock); - body->oa.o_grant = cli->cl_avail_grant - target_bytes; - cli->cl_avail_grant = target_bytes; - spin_unlock(&cli->cl_loi_list_lock); - if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) { - body->oa.o_valid |= OBD_MD_FLFLAGS; - body->oa.o_flags = 0; - } - body->oa.o_flags |= OBD_FL_SHRINK_GRANT; - osc_update_next_shrink(cli); - - rc = osc_set_info_async(NULL, cli->cl_import->imp_obd->obd_self_export, - sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK, - sizeof(*body), body, NULL); - if (rc != 0) - __osc_update_grant(cli, body->oa.o_grant); - kfree(body); - return rc; -} - -static int osc_should_shrink_grant(struct client_obd *client) -{ - unsigned long time = jiffies; - unsigned long next_shrink = client->cl_next_shrink_grant; - - if ((client->cl_import->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_GRANT_SHRINK) == 0) - return 0; - - if (time_after_eq(time, next_shrink - 5)) { - /* Get the current RPC size directly, instead of going via: - * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export) - * Keep comment here so that it can be found by searching. - */ - int brw_size = client->cl_max_pages_per_rpc << PAGE_SHIFT; - - if (client->cl_import->imp_state == LUSTRE_IMP_FULL && - client->cl_avail_grant > brw_size) - return 1; - - osc_update_next_shrink(client); - } - return 0; -} - -static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data) -{ - struct client_obd *client; - - list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) { - if (osc_should_shrink_grant(client)) - osc_shrink_grant(client); - } - return 0; -} - -static int osc_add_shrink_grant(struct client_obd *client) -{ - int rc; - - rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval, - TIMEOUT_GRANT, - osc_grant_shrink_grant_cb, NULL, - &client->cl_grant_shrink_list); - if (rc) { - CERROR("add grant client %s error %d\n", cli_name(client), rc); - return rc; - } - CDEBUG(D_CACHE, "add grant client %s\n", cli_name(client)); - osc_update_next_shrink(client); - return 0; -} - -static int osc_del_shrink_grant(struct client_obd *client) -{ - return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list, - TIMEOUT_GRANT); -} - -static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) -{ - /* - * ocd_grant is the total grant amount we're expect to hold: if we've - * been evicted, it's the new avail_grant amount, cl_dirty_pages will - * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant + - * dirty. - * - * race is tolerable here: if we're evicted, but imp_state already - * left EVICTED state, then cl_dirty_pages must be 0 already. - */ - spin_lock(&cli->cl_loi_list_lock); - if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) - cli->cl_avail_grant = ocd->ocd_grant; - else - cli->cl_avail_grant = ocd->ocd_grant - - (cli->cl_dirty_pages << PAGE_SHIFT); - - /* determine the appropriate chunk size used by osc_extent. */ - cli->cl_chunkbits = max_t(int, PAGE_SHIFT, ocd->ocd_blocksize); - spin_unlock(&cli->cl_loi_list_lock); - - CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld chunk bits: %d\n", - cli_name(cli), cli->cl_avail_grant, cli->cl_lost_grant, - cli->cl_chunkbits); - - if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK && - list_empty(&cli->cl_grant_shrink_list)) - osc_add_shrink_grant(cli); -} - -/* We assume that the reason this OSC got a short read is because it read - * beyond the end of a stripe file; i.e. lustre is reading a sparse file - * via the LOV, and it _knows_ it's reading inside the file, it's just that - * this stripe never got written at or beyond this stripe offset yet. - */ -static void handle_short_read(int nob_read, u32 page_count, - struct brw_page **pga) -{ - char *ptr; - int i = 0; - - /* skip bytes read OK */ - while (nob_read > 0) { - LASSERT(page_count > 0); - - if (pga[i]->count > nob_read) { - /* EOF inside this page */ - ptr = kmap(pga[i]->pg) + - (pga[i]->off & ~PAGE_MASK); - memset(ptr + nob_read, 0, pga[i]->count - nob_read); - kunmap(pga[i]->pg); - page_count--; - i++; - break; - } - - nob_read -= pga[i]->count; - page_count--; - i++; - } - - /* zero remaining pages */ - while (page_count-- > 0) { - ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK); - memset(ptr, 0, pga[i]->count); - kunmap(pga[i]->pg); - i++; - } -} - -static int check_write_rcs(struct ptlrpc_request *req, - int requested_nob, int niocount, - u32 page_count, struct brw_page **pga) -{ - int i; - __u32 *remote_rcs; - - remote_rcs = req_capsule_server_sized_get(&req->rq_pill, &RMF_RCS, - sizeof(*remote_rcs) * - niocount); - if (!remote_rcs) { - CDEBUG(D_INFO, "Missing/short RC vector on BRW_WRITE reply\n"); - return -EPROTO; - } - - /* return error if any niobuf was in error */ - for (i = 0; i < niocount; i++) { - if ((int)remote_rcs[i] < 0) - return remote_rcs[i]; - - if (remote_rcs[i] != 0) { - CDEBUG(D_INFO, "rc[%d] invalid (%d) req %p\n", - i, remote_rcs[i], req); - return -EPROTO; - } - } - - if (req->rq_bulk->bd_nob_transferred != requested_nob) { - CERROR("Unexpected # bytes transferred: %d (requested %d)\n", - req->rq_bulk->bd_nob_transferred, requested_nob); - return -EPROTO; - } - - return 0; -} - -static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) -{ - if (p1->flag != p2->flag) { - unsigned int mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | - OBD_BRW_SYNC | OBD_BRW_ASYNC | - OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); - - /* warn if we try to combine flags that we don't know to be - * safe to combine - */ - if (unlikely((p1->flag & mask) != (p2->flag & mask))) { - CWARN("Saw flags 0x%x and 0x%x in the same brw, please report this at http://bugs.whamcloud.com/\n", - p1->flag, p2->flag); - } - return 0; - } - - return (p1->off + p1->count == p2->off); -} - -static u32 osc_checksum_bulk(int nob, u32 pg_count, - struct brw_page **pga, int opc, - enum cksum_type cksum_type) -{ - __u32 cksum; - int i = 0; - struct ahash_request *hdesc; - unsigned int bufsize; - unsigned char cfs_alg = cksum_obd2cfs(cksum_type); - - LASSERT(pg_count > 0); - - hdesc = cfs_crypto_hash_init(cfs_alg, NULL, 0); - if (IS_ERR(hdesc)) { - CERROR("Unable to initialize checksum hash %s\n", - cfs_crypto_hash_name(cfs_alg)); - return PTR_ERR(hdesc); - } - - while (nob > 0 && pg_count > 0) { - unsigned int count = pga[i]->count > nob ? nob : pga[i]->count; - - /* corrupt the data before we compute the checksum, to - * simulate an OST->client data error - */ - if (i == 0 && opc == OST_READ && - OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) { - unsigned char *ptr = kmap(pga[i]->pg); - int off = pga[i]->off & ~PAGE_MASK; - - memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob)); - kunmap(pga[i]->pg); - } - cfs_crypto_hash_update_page(hdesc, pga[i]->pg, - pga[i]->off & ~PAGE_MASK, - count); - CDEBUG(D_PAGE, - "page %p map %p index %lu flags %lx count %u priv %0lx: off %d\n", - pga[i]->pg, pga[i]->pg->mapping, pga[i]->pg->index, - (long)pga[i]->pg->flags, page_count(pga[i]->pg), - page_private(pga[i]->pg), - (int)(pga[i]->off & ~PAGE_MASK)); - - nob -= pga[i]->count; - pg_count--; - i++; - } - - bufsize = sizeof(cksum); - cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); - - /* For sending we only compute the wrong checksum instead - * of corrupting the data so it is still correct on a redo - */ - if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND)) - cksum++; - - return cksum; -} - -static int osc_brw_prep_request(int cmd, struct client_obd *cli, - struct obdo *oa, u32 page_count, - struct brw_page **pga, - struct ptlrpc_request **reqp, - int reserve, - int resend) -{ - struct ptlrpc_request *req; - struct ptlrpc_bulk_desc *desc; - struct ost_body *body; - struct obd_ioobj *ioobj; - struct niobuf_remote *niobuf; - int niocount, i, requested_nob, opc, rc; - struct osc_brw_async_args *aa; - struct req_capsule *pill; - struct brw_page *pg_prev; - - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ)) - return -ENOMEM; /* Recoverable */ - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ2)) - return -EINVAL; /* Fatal */ - - if ((cmd & OBD_BRW_WRITE) != 0) { - opc = OST_WRITE; - req = ptlrpc_request_alloc_pool(cli->cl_import, - osc_rq_pool, - &RQF_OST_BRW_WRITE); - } else { - opc = OST_READ; - req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ); - } - if (!req) - return -ENOMEM; - - for (niocount = i = 1; i < page_count; i++) { - if (!can_merge_pages(pga[i - 1], pga[i])) - niocount++; - } - - pill = &req->rq_pill; - req_capsule_set_size(pill, &RMF_OBD_IOOBJ, RCL_CLIENT, - sizeof(*ioobj)); - req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT, - niocount * sizeof(*niobuf)); - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ - ptlrpc_at_set_req_timeout(req); - /* ask ptlrpc not to resend on EINPROGRESS since BRWs have their own - * retry logic - */ - req->rq_no_retry_einprogress = 1; - - desc = ptlrpc_prep_bulk_imp(req, page_count, - cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS, - (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE : - PTLRPC_BULK_PUT_SINK) | PTLRPC_BULK_BUF_KIOV, OST_BULK_PORTAL, - &ptlrpc_bulk_kiov_pin_ops); - - if (!desc) { - rc = -ENOMEM; - goto out; - } - /* NB request now owns desc and will free it when it gets freed */ - - body = req_capsule_client_get(pill, &RMF_OST_BODY); - ioobj = req_capsule_client_get(pill, &RMF_OBD_IOOBJ); - niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE); - LASSERT(body && ioobj && niobuf); - - lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); - - obdo_to_ioobj(oa, ioobj); - ioobj->ioo_bufcnt = niocount; - /* The high bits of ioo_max_brw tells server _maximum_ number of bulks - * that might be send for this request. The actual number is decided - * when the RPC is finally sent in ptlrpc_register_bulk(). It sends - * "max - 1" for old client compatibility sending "0", and also so the - * the actual maximum is a power-of-two number, not one less. LU-1431 - */ - ioobj_max_brw_set(ioobj, desc->bd_md_max_brw); - LASSERT(page_count > 0); - pg_prev = pga[0]; - for (requested_nob = i = 0; i < page_count; i++, niobuf++) { - struct brw_page *pg = pga[i]; - int poff = pg->off & ~PAGE_MASK; - - LASSERT(pg->count > 0); - /* make sure there is no gap in the middle of page array */ - LASSERTF(page_count == 1 || - (ergo(i == 0, poff + pg->count == PAGE_SIZE) && - ergo(i > 0 && i < page_count - 1, - poff == 0 && pg->count == PAGE_SIZE) && - ergo(i == page_count - 1, poff == 0)), - "i: %d/%d pg: %p off: %llu, count: %u\n", - i, page_count, pg, pg->off, pg->count); - LASSERTF(i == 0 || pg->off > pg_prev->off, - "i %d p_c %u pg %p [pri %lu ind %lu] off %llu prev_pg %p [pri %lu ind %lu] off %llu\n", - i, page_count, - pg->pg, page_private(pg->pg), pg->pg->index, pg->off, - pg_prev->pg, page_private(pg_prev->pg), - pg_prev->pg->index, pg_prev->off); - LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) == - (pg->flag & OBD_BRW_SRVLOCK)); - - desc->bd_frag_ops->add_kiov_frag(desc, pg->pg, poff, pg->count); - requested_nob += pg->count; - - if (i > 0 && can_merge_pages(pg_prev, pg)) { - niobuf--; - niobuf->rnb_len += pg->count; - } else { - niobuf->rnb_offset = pg->off; - niobuf->rnb_len = pg->count; - niobuf->rnb_flags = pg->flag; - } - pg_prev = pg; - } - - LASSERTF((void *)(niobuf - niocount) == - req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE), - "want %p - real %p\n", req_capsule_client_get(&req->rq_pill, - &RMF_NIOBUF_REMOTE), (void *)(niobuf - niocount)); - - osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0); - if (resend) { - if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) { - body->oa.o_valid |= OBD_MD_FLFLAGS; - body->oa.o_flags = 0; - } - body->oa.o_flags |= OBD_FL_RECOV_RESEND; - } - - if (osc_should_shrink_grant(cli)) - osc_shrink_grant_local(cli, &body->oa); - - /* size[REQ_REC_OFF] still sizeof (*body) */ - if (opc == OST_WRITE) { - if (cli->cl_checksum && - !sptlrpc_flavor_has_bulk(&req->rq_flvr)) { - /* store cl_cksum_type in a local variable since - * it can be changed via lprocfs - */ - enum cksum_type cksum_type = cli->cl_cksum_type; - - if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) { - oa->o_flags &= OBD_FL_LOCAL_MASK; - body->oa.o_flags = 0; - } - body->oa.o_flags |= cksum_type_pack(cksum_type); - body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS; - body->oa.o_cksum = osc_checksum_bulk(requested_nob, - page_count, pga, - OST_WRITE, - cksum_type); - CDEBUG(D_PAGE, "checksum at write origin: %x\n", - body->oa.o_cksum); - /* save this in 'oa', too, for later checking */ - oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS; - oa->o_flags |= cksum_type_pack(cksum_type); - } else { - /* clear out the checksum flag, in case this is a - * resend but cl_checksum is no longer set. b=11238 - */ - oa->o_valid &= ~OBD_MD_FLCKSUM; - } - oa->o_cksum = body->oa.o_cksum; - /* 1 RC per niobuf */ - req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER, - sizeof(__u32) * niocount); - } else { - if (cli->cl_checksum && - !sptlrpc_flavor_has_bulk(&req->rq_flvr)) { - if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) - body->oa.o_flags = 0; - body->oa.o_flags |= cksum_type_pack(cli->cl_cksum_type); - body->oa.o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS; - } - } - ptlrpc_request_set_replen(req); - - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->aa_oa = oa; - aa->aa_requested_nob = requested_nob; - aa->aa_nio_count = niocount; - aa->aa_page_count = page_count; - aa->aa_resends = 0; - aa->aa_ppga = pga; - aa->aa_cli = cli; - INIT_LIST_HEAD(&aa->aa_oaps); - - *reqp = req; - niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE); - CDEBUG(D_RPCTRACE, "brw rpc %p - object " DOSTID " offset %lld<>%lld\n", - req, POSTID(&oa->o_oi), niobuf[0].rnb_offset, - niobuf[niocount - 1].rnb_offset + niobuf[niocount - 1].rnb_len); - - return 0; - - out: - ptlrpc_req_finished(req); - return rc; -} - -static int check_write_checksum(struct obdo *oa, - const struct lnet_process_id *peer, - __u32 client_cksum, __u32 server_cksum, int nob, - u32 page_count, struct brw_page **pga, - enum cksum_type client_cksum_type) -{ - __u32 new_cksum; - char *msg; - enum cksum_type cksum_type; - - if (server_cksum == client_cksum) { - CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum); - return 0; - } - - cksum_type = cksum_type_unpack(oa->o_valid & OBD_MD_FLFLAGS ? - oa->o_flags : 0); - new_cksum = osc_checksum_bulk(nob, page_count, pga, OST_WRITE, - cksum_type); - - if (cksum_type != client_cksum_type) - msg = "the server did not use the checksum type specified in the original request - likely a protocol problem" - ; - else if (new_cksum == server_cksum) - msg = "changed on the client after we checksummed it - likely false positive due to mmap IO (bug 11742)" - ; - else if (new_cksum == client_cksum) - msg = "changed in transit before arrival at OST"; - else - msg = "changed in transit AND doesn't match the original - likely false positive due to mmap IO (bug 11742)" - ; - - LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode " DFID " object " DOSTID " extent [%llu-%llu]\n", - msg, libcfs_nid2str(peer->nid), - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0, - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0, - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0, - POSTID(&oa->o_oi), pga[0]->off, - pga[page_count - 1]->off + - pga[page_count - 1]->count - 1); - CERROR("original client csum %x (type %x), server csum %x (type %x), client csum now %x\n", - client_cksum, client_cksum_type, - server_cksum, cksum_type, new_cksum); - return 1; -} - -/* Note rc enters this function as number of bytes transferred */ -static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) -{ - struct osc_brw_async_args *aa = (void *)&req->rq_async_args; - const struct lnet_process_id *peer = - &req->rq_import->imp_connection->c_peer; - struct client_obd *cli = aa->aa_cli; - struct ost_body *body; - __u32 client_cksum = 0; - - if (rc < 0 && rc != -EDQUOT) { - DEBUG_REQ(D_INFO, req, "Failed request with rc = %d\n", rc); - return rc; - } - - LASSERTF(req->rq_repmsg, "rc = %d\n", rc); - body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); - if (!body) { - DEBUG_REQ(D_INFO, req, "Can't unpack body\n"); - return -EPROTO; - } - - /* set/clear over quota flag for a uid/gid */ - if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && - body->oa.o_valid & (OBD_MD_FLUSRQUOTA | OBD_MD_FLGRPQUOTA)) { - unsigned int qid[MAXQUOTAS] = { body->oa.o_uid, body->oa.o_gid }; - - CDEBUG(D_QUOTA, "setdq for [%u %u] with valid %#llx, flags %x\n", - body->oa.o_uid, body->oa.o_gid, body->oa.o_valid, - body->oa.o_flags); - osc_quota_setdq(cli, qid, body->oa.o_valid, body->oa.o_flags); - } - - osc_update_grant(cli, body); - - if (rc < 0) - return rc; - - if (aa->aa_oa->o_valid & OBD_MD_FLCKSUM) - client_cksum = aa->aa_oa->o_cksum; /* save for later */ - - if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) { - if (rc > 0) { - CERROR("Unexpected +ve rc %d\n", rc); - return -EPROTO; - } - LASSERT(req->rq_bulk->bd_nob == aa->aa_requested_nob); - - if (sptlrpc_cli_unwrap_bulk_write(req, req->rq_bulk)) - return -EAGAIN; - - if ((aa->aa_oa->o_valid & OBD_MD_FLCKSUM) && client_cksum && - check_write_checksum(&body->oa, peer, client_cksum, - body->oa.o_cksum, aa->aa_requested_nob, - aa->aa_page_count, aa->aa_ppga, - cksum_type_unpack(aa->aa_oa->o_flags))) - return -EAGAIN; - - rc = check_write_rcs(req, aa->aa_requested_nob, - aa->aa_nio_count, - aa->aa_page_count, aa->aa_ppga); - goto out; - } - - /* The rest of this function executes only for OST_READs */ - - /* if unwrap_bulk failed, return -EAGAIN to retry */ - rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, rc); - if (rc < 0) { - rc = -EAGAIN; - goto out; - } - - if (rc > aa->aa_requested_nob) { - CERROR("Unexpected rc %d (%d requested)\n", rc, - aa->aa_requested_nob); - return -EPROTO; - } - - if (rc != req->rq_bulk->bd_nob_transferred) { - CERROR("Unexpected rc %d (%d transferred)\n", - rc, req->rq_bulk->bd_nob_transferred); - return -EPROTO; - } - - if (rc < aa->aa_requested_nob) - handle_short_read(rc, aa->aa_page_count, aa->aa_ppga); - - if (body->oa.o_valid & OBD_MD_FLCKSUM) { - static int cksum_counter; - __u32 server_cksum = body->oa.o_cksum; - char *via = ""; - char *router = ""; - enum cksum_type cksum_type; - - cksum_type = cksum_type_unpack(body->oa.o_valid & - OBD_MD_FLFLAGS ? - body->oa.o_flags : 0); - client_cksum = osc_checksum_bulk(rc, aa->aa_page_count, - aa->aa_ppga, OST_READ, - cksum_type); - - if (peer->nid != req->rq_bulk->bd_sender) { - via = " via "; - router = libcfs_nid2str(req->rq_bulk->bd_sender); - } - - if (server_cksum != client_cksum) { - LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from %s%s%s inode " DFID " object " DOSTID " extent [%llu-%llu]\n", - req->rq_import->imp_obd->obd_name, - libcfs_nid2str(peer->nid), - via, router, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_seq : (__u64)0, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_oid : 0, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_ver : 0, - POSTID(&body->oa.o_oi), - aa->aa_ppga[0]->off, - aa->aa_ppga[aa->aa_page_count-1]->off + - aa->aa_ppga[aa->aa_page_count-1]->count - - 1); - CERROR("client %x, server %x, cksum_type %x\n", - client_cksum, server_cksum, cksum_type); - cksum_counter = 0; - aa->aa_oa->o_cksum = client_cksum; - rc = -EAGAIN; - } else { - cksum_counter++; - CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum); - rc = 0; - } - } else if (unlikely(client_cksum)) { - static int cksum_missed; - - cksum_missed++; - if ((cksum_missed & (-cksum_missed)) == cksum_missed) - CERROR("Checksum %u requested from %s but not sent\n", - cksum_missed, libcfs_nid2str(peer->nid)); - } else { - rc = 0; - } -out: - if (rc >= 0) - lustre_get_wire_obdo(&req->rq_import->imp_connect_data, - aa->aa_oa, &body->oa); - - return rc; -} - -static int osc_brw_redo_request(struct ptlrpc_request *request, - struct osc_brw_async_args *aa, int rc) -{ - struct ptlrpc_request *new_req; - struct osc_brw_async_args *new_aa; - struct osc_async_page *oap; - - DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request, - "redo for recoverable error %d", rc); - - rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == - OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - aa->aa_cli, aa->aa_oa, - aa->aa_page_count, aa->aa_ppga, - &new_req, 0, 1); - if (rc) - return rc; - - list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { - if (oap->oap_request) { - LASSERTF(request == oap->oap_request, - "request %p != oap_request %p\n", - request, oap->oap_request); - if (oap->oap_interrupted) { - ptlrpc_req_finished(new_req); - return -EINTR; - } - } - } - /* New request takes over pga and oaps from old request. - * Note that copying a list_head doesn't work, need to move it... - */ - aa->aa_resends++; - new_req->rq_interpret_reply = request->rq_interpret_reply; - new_req->rq_async_args = request->rq_async_args; - new_req->rq_commit_cb = request->rq_commit_cb; - /* cap resend delay to the current request timeout, this is similar to - * what ptlrpc does (see after_reply()) - */ - if (aa->aa_resends > new_req->rq_timeout) - new_req->rq_sent = ktime_get_real_seconds() + new_req->rq_timeout; - else - new_req->rq_sent = ktime_get_real_seconds() + aa->aa_resends; - new_req->rq_generation_set = 1; - new_req->rq_import_generation = request->rq_import_generation; - - new_aa = ptlrpc_req_async_args(new_req); - - INIT_LIST_HEAD(&new_aa->aa_oaps); - list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps); - INIT_LIST_HEAD(&new_aa->aa_exts); - list_splice_init(&aa->aa_exts, &new_aa->aa_exts); - new_aa->aa_resends = aa->aa_resends; - - list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) { - if (oap->oap_request) { - ptlrpc_req_finished(oap->oap_request); - oap->oap_request = ptlrpc_request_addref(new_req); - } - } - - /* XXX: This code will run into problem if we're going to support - * to add a series of BRW RPCs into a self-defined ptlrpc_request_set - * and wait for all of them to be finished. We should inherit request - * set from old request. - */ - ptlrpcd_add_req(new_req); - - DEBUG_REQ(D_INFO, new_req, "new request"); - return 0; -} - -/* - * ugh, we want disk allocation on the target to happen in offset order. we'll - * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do - * fine for our small page arrays and doesn't require allocation. its an - * insertion sort that swaps elements that are strides apart, shrinking the - * stride down until its '1' and the array is sorted. - */ -static void sort_brw_pages(struct brw_page **array, int num) -{ - int stride, i, j; - struct brw_page *tmp; - - if (num == 1) - return; - for (stride = 1; stride < num ; stride = (stride * 3) + 1) - ; - - do { - stride /= 3; - for (i = stride ; i < num ; i++) { - tmp = array[i]; - j = i; - while (j >= stride && array[j - stride]->off > tmp->off) { - array[j] = array[j - stride]; - j -= stride; - } - array[j] = tmp; - } - } while (stride > 1); -} - -static void osc_release_ppga(struct brw_page **ppga, u32 count) -{ - LASSERT(ppga); - kfree(ppga); -} - -static int brw_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *data, int rc) -{ - struct osc_brw_async_args *aa = data; - struct osc_extent *ext; - struct osc_extent *tmp; - struct client_obd *cli = aa->aa_cli; - - rc = osc_brw_fini_request(req, rc); - CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); - /* When server return -EINPROGRESS, client should always retry - * regardless of the number of times the bulk was resent already. - */ - if (osc_recoverable_error(rc)) { - if (req->rq_import_generation != - req->rq_import->imp_generation) { - CDEBUG(D_HA, "%s: resend cross eviction for object: " DOSTID ", rc = %d.\n", - req->rq_import->imp_obd->obd_name, - POSTID(&aa->aa_oa->o_oi), rc); - } else if (rc == -EINPROGRESS || - client_should_resend(aa->aa_resends, aa->aa_cli)) { - rc = osc_brw_redo_request(req, aa, rc); - } else { - CERROR("%s: too many resent retries for object: %llu:%llu, rc = %d.\n", - req->rq_import->imp_obd->obd_name, - POSTID(&aa->aa_oa->o_oi), rc); - } - - if (rc == 0) - return 0; - else if (rc == -EAGAIN || rc == -EINPROGRESS) - rc = -EIO; - } - - if (rc == 0) { - struct obdo *oa = aa->aa_oa; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - unsigned long valid = 0; - struct cl_object *obj; - struct osc_async_page *last; - - last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); - obj = osc2cl(last->oap_obj); - - cl_object_attr_lock(obj); - if (oa->o_valid & OBD_MD_FLBLOCKS) { - attr->cat_blocks = oa->o_blocks; - valid |= CAT_BLOCKS; - } - if (oa->o_valid & OBD_MD_FLMTIME) { - attr->cat_mtime = oa->o_mtime; - valid |= CAT_MTIME; - } - if (oa->o_valid & OBD_MD_FLATIME) { - attr->cat_atime = oa->o_atime; - valid |= CAT_ATIME; - } - if (oa->o_valid & OBD_MD_FLCTIME) { - attr->cat_ctime = oa->o_ctime; - valid |= CAT_CTIME; - } - - if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) { - struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; - loff_t last_off = last->oap_count + last->oap_obj_off + - last->oap_page_off; - - /* Change file size if this is an out of quota or - * direct IO write and it extends the file size - */ - if (loi->loi_lvb.lvb_size < last_off) { - attr->cat_size = last_off; - valid |= CAT_SIZE; - } - /* Extend KMS if it's not a lockless write */ - if (loi->loi_kms < last_off && - oap2osc_page(last)->ops_srvlock == 0) { - attr->cat_kms = last_off; - valid |= CAT_KMS; - } - } - - if (valid != 0) - cl_object_attr_update(env, obj, attr, valid); - cl_object_attr_unlock(obj); - } - kmem_cache_free(obdo_cachep, aa->aa_oa); - - if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0) - osc_inc_unstable_pages(req); - - list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { - list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 1, rc); - } - LASSERT(list_empty(&aa->aa_exts)); - LASSERT(list_empty(&aa->aa_oaps)); - - osc_release_ppga(aa->aa_ppga, aa->aa_page_count); - ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred); - - spin_lock(&cli->cl_loi_list_lock); - /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters - * is called so we know whether to go to sync BRWs or wait for more - * RPCs to complete - */ - if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) - cli->cl_w_in_flight--; - else - cli->cl_r_in_flight--; - osc_wake_cache_waiters(cli); - spin_unlock(&cli->cl_loi_list_lock); - - osc_io_unplug(env, cli, NULL); - return rc; -} - -static void brw_commit(struct ptlrpc_request *req) -{ - /* - * If osc_inc_unstable_pages (via osc_extent_finish) races with - * this called via the rq_commit_cb, I need to ensure - * osc_dec_unstable_pages is still called. Otherwise unstable - * pages may be leaked. - */ - spin_lock(&req->rq_lock); - if (unlikely(req->rq_unstable)) { - req->rq_unstable = 0; - spin_unlock(&req->rq_lock); - osc_dec_unstable_pages(req); - } else { - req->rq_committed = 1; - spin_unlock(&req->rq_lock); - } -} - -/** - * Build an RPC by the list of extent @ext_list. The caller must ensure - * that the total pages in this list are NOT over max pages per RPC. - * Extents in the list must be in OES_RPC state. - */ -int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, - struct list_head *ext_list, int cmd) -{ - struct ptlrpc_request *req = NULL; - struct osc_extent *ext; - struct brw_page **pga = NULL; - struct osc_brw_async_args *aa = NULL; - struct obdo *oa = NULL; - struct osc_async_page *oap; - struct osc_object *obj = NULL; - struct cl_req_attr *crattr = NULL; - u64 starting_offset = OBD_OBJECT_EOF; - u64 ending_offset = 0; - unsigned int mpflag = 0; - int mem_tight = 0; - int page_count = 0; - bool soft_sync = false; - bool interrupted = false; - int i; - int rc; - struct ost_body *body; - LIST_HEAD(rpc_list); - - LASSERT(!list_empty(ext_list)); - - /* add pages into rpc_list to build BRW rpc */ - list_for_each_entry(ext, ext_list, oe_link) { - LASSERT(ext->oe_state == OES_RPC); - mem_tight |= ext->oe_memalloc; - page_count += ext->oe_nr_pages; - if (!obj) - obj = ext->oe_obj; - } - - soft_sync = osc_over_unstable_soft_limit(cli); - if (mem_tight) - mpflag = memalloc_noreclaim_save(); - - pga = kcalloc(page_count, sizeof(*pga), GFP_NOFS); - if (!pga) { - rc = -ENOMEM; - goto out; - } - - oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!oa) { - rc = -ENOMEM; - goto out; - } - - i = 0; - list_for_each_entry(ext, ext_list, oe_link) { - list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { - if (mem_tight) - oap->oap_brw_flags |= OBD_BRW_MEMALLOC; - if (soft_sync) - oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; - pga[i] = &oap->oap_brw_page; - pga[i]->off = oap->oap_obj_off + oap->oap_page_off; - i++; - - list_add_tail(&oap->oap_rpc_item, &rpc_list); - if (starting_offset == OBD_OBJECT_EOF || - starting_offset > oap->oap_obj_off) - starting_offset = oap->oap_obj_off; - else - LASSERT(!oap->oap_page_off); - if (ending_offset < oap->oap_obj_off + oap->oap_count) - ending_offset = oap->oap_obj_off + - oap->oap_count; - else - LASSERT(oap->oap_page_off + oap->oap_count == - PAGE_SIZE); - if (oap->oap_interrupted) - interrupted = true; - } - } - - /* first page in the list */ - oap = list_entry(rpc_list.next, typeof(*oap), oap_rpc_item); - - crattr = &osc_env_info(env)->oti_req_attr; - memset(crattr, 0, sizeof(*crattr)); - crattr->cra_type = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ; - crattr->cra_flags = ~0ULL; - crattr->cra_page = oap2cl_page(oap); - crattr->cra_oa = oa; - cl_req_attr_set(env, osc2cl(obj), crattr); - - sort_brw_pages(pga, page_count); - rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 1, 0); - if (rc != 0) { - CERROR("prep_req failed: %d\n", rc); - goto out; - } - - req->rq_commit_cb = brw_commit; - req->rq_interpret_reply = brw_interpret; - - req->rq_memalloc = mem_tight != 0; - oap->oap_request = ptlrpc_request_addref(req); - if (interrupted && !req->rq_intr) - ptlrpc_mark_interrupted(req); - - /* Need to update the timestamps after the request is built in case - * we race with setattr (locally or in queue at OST). If OST gets - * later setattr before earlier BRW (as determined by the request xid), - * the OST will not use BRW timestamps. Sadly, there is no obvious - * way to do this in a single call. bug 10150 - */ - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - crattr->cra_oa = &body->oa; - crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME; - cl_req_attr_set(env, osc2cl(obj), crattr); - lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); - - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - INIT_LIST_HEAD(&aa->aa_oaps); - list_splice_init(&rpc_list, &aa->aa_oaps); - INIT_LIST_HEAD(&aa->aa_exts); - list_splice_init(ext_list, &aa->aa_exts); - - spin_lock(&cli->cl_loi_list_lock); - starting_offset >>= PAGE_SHIFT; - if (cmd == OBD_BRW_READ) { - cli->cl_r_in_flight++; - lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count); - lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight); - lprocfs_oh_tally_log2(&cli->cl_read_offset_hist, - starting_offset + 1); - } else { - cli->cl_w_in_flight++; - lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count); - lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_w_in_flight); - lprocfs_oh_tally_log2(&cli->cl_write_offset_hist, - starting_offset + 1); - } - spin_unlock(&cli->cl_loi_list_lock); - - DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight", - page_count, aa, cli->cl_r_in_flight, - cli->cl_w_in_flight); - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_IO, cfs_fail_val); - - ptlrpcd_add_req(req); - rc = 0; - -out: - if (mem_tight != 0) - memalloc_noreclaim_restore(mpflag); - - if (rc != 0) { - LASSERT(!req); - - if (oa) - kmem_cache_free(obdo_cachep, oa); - kfree(pga); - /* this should happen rarely and is pretty bad, it makes the - * pending list not follow the dirty order - */ - while (!list_empty(ext_list)) { - ext = list_entry(ext_list->next, struct osc_extent, - oe_link); - list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 0, rc); - } - } - return rc; -} - -static int osc_set_lock_data(struct ldlm_lock *lock, void *data) -{ - int set = 0; - - LASSERT(lock); - - lock_res_and_lock(lock); - - if (!lock->l_ast_data) - lock->l_ast_data = data; - if (lock->l_ast_data == data) - set = 1; - - unlock_res_and_lock(lock); - - return set; -} - -static int osc_enqueue_fini(struct ptlrpc_request *req, - osc_enqueue_upcall_f upcall, void *cookie, - struct lustre_handle *lockh, enum ldlm_mode mode, - __u64 *flags, int agl, int errcode) -{ - bool intent = *flags & LDLM_FL_HAS_INTENT; - int rc; - - /* The request was created before ldlm_cli_enqueue call. */ - if (intent && errcode == ELDLM_LOCK_ABORTED) { - struct ldlm_reply *rep; - - rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); - - rep->lock_policy_res1 = - ptlrpc_status_ntoh(rep->lock_policy_res1); - if (rep->lock_policy_res1) - errcode = rep->lock_policy_res1; - if (!agl) - *flags |= LDLM_FL_LVB_READY; - } else if (errcode == ELDLM_OK) { - *flags |= LDLM_FL_LVB_READY; - } - - /* Call the update callback. */ - rc = (*upcall)(cookie, lockh, errcode); - /* release the reference taken in ldlm_cli_enqueue() */ - if (errcode == ELDLM_LOCK_MATCHED) - errcode = ELDLM_OK; - if (errcode == ELDLM_OK && lustre_handle_is_used(lockh)) - ldlm_lock_decref(lockh, mode); - - return rc; -} - -static int osc_enqueue_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct osc_enqueue_args *aa, int rc) -{ - struct ldlm_lock *lock; - struct lustre_handle *lockh = &aa->oa_lockh; - enum ldlm_mode mode = aa->oa_mode; - struct ost_lvb *lvb = aa->oa_lvb; - __u32 lvb_len = sizeof(*lvb); - __u64 flags = 0; - - - /* ldlm_cli_enqueue is holding a reference on the lock, so it must - * be valid. - */ - lock = ldlm_handle2lock(lockh); - LASSERTF(lock, "lockh %llx, req %p, aa %p - client evicted?\n", - lockh->cookie, req, aa); - - /* Take an additional reference so that a blocking AST that - * ldlm_cli_enqueue_fini() might post for a failed lock, is guaranteed - * to arrive after an upcall has been executed by - * osc_enqueue_fini(). - */ - ldlm_lock_addref(lockh, mode); - - /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); - - /* Let CP AST to grant the lock first. */ - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); - - if (aa->oa_agl) { - LASSERT(!aa->oa_lvb); - LASSERT(!aa->oa_flags); - aa->oa_flags = &flags; - } - - /* Complete obtaining the lock procedure. */ - rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, aa->oa_type, 1, - aa->oa_mode, aa->oa_flags, lvb, lvb_len, - lockh, rc); - /* Complete osc stuff. */ - rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode, - aa->oa_flags, aa->oa_agl, rc); - - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10); - - ldlm_lock_decref(lockh, mode); - LDLM_LOCK_PUT(lock); - return rc; -} - -struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; - -/* When enqueuing asynchronously, locks are not ordered, we can obtain a lock - * from the 2nd OSC before a lock from the 1st one. This does not deadlock with - * other synchronous requests, however keeping some locks and trying to obtain - * others may take a considerable amount of time in a case of ost failure; and - * when other sync requests do not get released lock from a client, the client - * is evicted from the cluster -- such scenaries make the life difficult, so - * release locks just after they are obtained. - */ -int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u64 *flags, union ldlm_policy_data *policy, - struct ost_lvb *lvb, int kms_valid, - osc_enqueue_upcall_f upcall, void *cookie, - struct ldlm_enqueue_info *einfo, - struct ptlrpc_request_set *rqset, int async, int agl) -{ - struct obd_device *obd = exp->exp_obd; - struct lustre_handle lockh = { 0 }; - struct ptlrpc_request *req = NULL; - int intent = *flags & LDLM_FL_HAS_INTENT; - __u64 match_flags = *flags; - enum ldlm_mode mode; - int rc; - - /* Filesystem lock extents are extended to page boundaries so that - * dealing with the page cache is a little smoother. - */ - policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; - policy->l_extent.end |= ~PAGE_MASK; - - /* - * kms is not valid when either object is completely fresh (so that no - * locks are cached), or object was evicted. In the latter case cached - * lock cannot be used, because it would prime inode state with - * potentially stale LVB. - */ - if (!kms_valid) - goto no_match; - - /* Next, search for already existing extent locks that will cover us */ - /* If we're trying to read, we also search for an existing PW lock. The - * VFS and page cache already protect us locally, so lots of readers/ - * writers can share a single PW lock. - * - * There are problems with conversion deadlocks, so instead of - * converting a read lock to a write lock, we'll just enqueue a new - * one. - * - * At some point we should cancel the read lock instead of making them - * send us a blocking callback, but there are problems with canceling - * locks out from other users right now, too. - */ - mode = einfo->ei_mode; - if (einfo->ei_mode == LCK_PR) - mode |= LCK_PW; - if (agl == 0) - match_flags |= LDLM_FL_LVB_READY; - if (intent != 0) - match_flags |= LDLM_FL_BLOCK_GRANTED; - mode = ldlm_lock_match(obd->obd_namespace, match_flags, res_id, - einfo->ei_type, policy, mode, &lockh, 0); - if (mode) { - struct ldlm_lock *matched; - - if (*flags & LDLM_FL_TEST_LOCK) - return ELDLM_OK; - - matched = ldlm_handle2lock(&lockh); - if (agl) { - /* AGL enqueues DLM locks speculatively. Therefore if - * it already exists a DLM lock, it wll just inform the - * caller to cancel the AGL process for this stripe. - */ - ldlm_lock_decref(&lockh, mode); - LDLM_LOCK_PUT(matched); - return -ECANCELED; - } else if (osc_set_lock_data(matched, einfo->ei_cbdata)) { - *flags |= LDLM_FL_LVB_READY; - /* We already have a lock, and it's referenced. */ - (*upcall)(cookie, &lockh, ELDLM_LOCK_MATCHED); - - ldlm_lock_decref(&lockh, mode); - LDLM_LOCK_PUT(matched); - return ELDLM_OK; - } else { - ldlm_lock_decref(&lockh, mode); - LDLM_LOCK_PUT(matched); - } - } - -no_match: - if (*flags & (LDLM_FL_TEST_LOCK | LDLM_FL_MATCH_LOCK)) - return -ENOLCK; - if (intent) { - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_ENQUEUE_LVB); - if (!req) - return -ENOMEM; - - rc = ldlm_prep_enqueue_req(exp, req, NULL, 0); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - sizeof(*lvb)); - ptlrpc_request_set_replen(req); - } - - /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */ - *flags &= ~LDLM_FL_BLOCK_GRANTED; - - rc = ldlm_cli_enqueue(exp, &req, einfo, res_id, policy, flags, lvb, - sizeof(*lvb), LVB_T_OST, &lockh, async); - if (async) { - if (!rc) { - struct osc_enqueue_args *aa; - - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->oa_exp = exp; - aa->oa_mode = einfo->ei_mode; - aa->oa_type = einfo->ei_type; - lustre_handle_copy(&aa->oa_lockh, &lockh); - aa->oa_upcall = upcall; - aa->oa_cookie = cookie; - aa->oa_agl = !!agl; - if (!agl) { - aa->oa_flags = flags; - aa->oa_lvb = lvb; - } else { - /* AGL is essentially to enqueue an DLM lock - * in advance, so we don't care about the - * result of AGL enqueue. - */ - aa->oa_lvb = NULL; - aa->oa_flags = NULL; - } - - req->rq_interpret_reply = - (ptlrpc_interpterer_t)osc_enqueue_interpret; - if (rqset == PTLRPCD_SET) - ptlrpcd_add_req(req); - else - ptlrpc_set_add_req(rqset, req); - } else if (intent) { - ptlrpc_req_finished(req); - } - return rc; - } - - rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode, - flags, agl, rc); - if (intent) - ptlrpc_req_finished(req); - - return rc; -} - -int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - enum ldlm_type type, union ldlm_policy_data *policy, - enum ldlm_mode mode, __u64 *flags, void *data, - struct lustre_handle *lockh, int unref) -{ - struct obd_device *obd = exp->exp_obd; - __u64 lflags = *flags; - enum ldlm_mode rc; - - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH)) - return -EIO; - - /* Filesystem lock extents are extended to page boundaries so that - * dealing with the page cache is a little smoother - */ - policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; - policy->l_extent.end |= ~PAGE_MASK; - - /* Next, search for already existing extent locks that will cover us */ - /* If we're trying to read, we also search for an existing PW lock. The - * VFS and page cache already protect us locally, so lots of readers/ - * writers can share a single PW lock. - */ - rc = mode; - if (mode == LCK_PR) - rc |= LCK_PW; - rc = ldlm_lock_match(obd->obd_namespace, lflags, - res_id, type, policy, rc, lockh, unref); - if (!rc || lflags & LDLM_FL_TEST_LOCK) - return rc; - - if (data) { - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - - LASSERT(lock); - if (!osc_set_lock_data(lock, data)) { - ldlm_lock_decref(lockh, rc); - rc = 0; - } - LDLM_LOCK_PUT(lock); - } - return rc; -} - -static int osc_statfs_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - struct osc_async_args *aa, int rc) -{ - struct obd_statfs *msfs; - - if (rc == -EBADR) - /* The request has in fact never been sent - * due to issues at a higher level (LOV). - * Exit immediately since the caller is - * aware of the problem and takes care - * of the clean up - */ - return rc; - - if ((rc == -ENOTCONN || rc == -EAGAIN) && - (aa->aa_oi->oi_flags & OBD_STATFS_NODELAY)) { - rc = 0; - goto out; - } - - if (rc != 0) - goto out; - - msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); - if (!msfs) { - rc = -EPROTO; - goto out; - } - - *aa->aa_oi->oi_osfs = *msfs; -out: - rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); - return rc; -} - -static int osc_statfs_async(struct obd_export *exp, - struct obd_info *oinfo, __u64 max_age, - struct ptlrpc_request_set *rqset) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ptlrpc_request *req; - struct osc_async_args *aa; - int rc; - - /* We could possibly pass max_age in the request (as an absolute - * timestamp or a "seconds.usec ago") so the target can avoid doing - * extra calls into the filesystem if that isn't necessary (e.g. - * during mount that would help a bit). Having relative timestamps - * is not so great if request processing is slow, while absolute - * timestamps are not ideal because they need time synchronization. - */ - req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS); - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - ptlrpc_request_set_replen(req); - req->rq_request_portal = OST_CREATE_PORTAL; - ptlrpc_at_set_req_timeout(req); - - if (oinfo->oi_flags & OBD_STATFS_NODELAY) { - /* procfs requests not want stat in wait for avoid deadlock */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } - - req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_statfs_interpret; - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - aa->aa_oi = oinfo; - - ptlrpc_set_add_req(rqset, req); - return 0; -} - -static int osc_statfs(const struct lu_env *env, struct obd_export *exp, - struct obd_statfs *osfs, __u64 max_age, __u32 flags) -{ - struct obd_device *obd = class_exp2obd(exp); - struct obd_statfs *msfs; - struct ptlrpc_request *req; - struct obd_import *imp = NULL; - int rc; - - /* Since the request might also come from lprocfs, so we need - * sync this with client_disconnect_export Bug15684 - */ - down_read(&obd->u.cli.cl_sem); - if (obd->u.cli.cl_import) - imp = class_import_get(obd->u.cli.cl_import); - up_read(&obd->u.cli.cl_sem); - if (!imp) - return -ENODEV; - - /* We could possibly pass max_age in the request (as an absolute - * timestamp or a "seconds.usec ago") so the target can avoid doing - * extra calls into the filesystem if that isn't necessary (e.g. - * during mount that would help a bit). Having relative timestamps - * is not so great if request processing is slow, while absolute - * timestamps are not ideal because they need time synchronization. - */ - req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS); - - class_import_put(imp); - - if (!req) - return -ENOMEM; - - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - ptlrpc_request_set_replen(req); - req->rq_request_portal = OST_CREATE_PORTAL; - ptlrpc_at_set_req_timeout(req); - - if (flags & OBD_STATFS_NODELAY) { - /* procfs requests not want stat in wait for avoid deadlock */ - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS); - if (!msfs) { - rc = -EPROTO; - goto out; - } - - *osfs = *msfs; - - out: - ptlrpc_req_finished(req); - return rc; -} - -static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void __user *uarg) -{ - struct obd_device *obd = exp->exp_obd; - struct obd_ioctl_data *data = karg; - int err = 0; - - if (!try_module_get(THIS_MODULE)) { - CERROR("%s: cannot get module '%s'\n", obd->obd_name, - module_name(THIS_MODULE)); - return -EINVAL; - } - switch (cmd) { - case OBD_IOC_CLIENT_RECOVER: - err = ptlrpc_recover_import(obd->u.cli.cl_import, - data->ioc_inlbuf1, 0); - if (err > 0) - err = 0; - goto out; - case IOC_OSC_SET_ACTIVE: - err = ptlrpc_set_import_active(obd->u.cli.cl_import, - data->ioc_offset); - goto out; - case OBD_IOC_PING_TARGET: - err = ptlrpc_obd_ping(obd); - goto out; - default: - CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", - cmd, current->comm); - err = -ENOTTY; - goto out; - } -out: - module_put(THIS_MODULE); - return err; -} - -static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, - u32 keylen, void *key, u32 vallen, - void *val, struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - struct obd_device *obd = exp->exp_obd; - struct obd_import *imp = class_exp2cliimp(exp); - char *tmp; - int rc; - - OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SHUTDOWN, 10); - - if (KEY_IS(KEY_CHECKSUM)) { - if (vallen != sizeof(int)) - return -EINVAL; - exp->exp_obd->u.cli.cl_checksum = (*(int *)val) ? 1 : 0; - return 0; - } - - if (KEY_IS(KEY_SPTLRPC_CONF)) { - sptlrpc_conf_client_adapt(obd); - return 0; - } - - if (KEY_IS(KEY_FLUSH_CTX)) { - sptlrpc_import_flush_my_ctx(imp); - return 0; - } - - if (KEY_IS(KEY_CACHE_SET)) { - struct client_obd *cli = &obd->u.cli; - - LASSERT(!cli->cl_cache); /* only once */ - cli->cl_cache = val; - cl_cache_incref(cli->cl_cache); - cli->cl_lru_left = &cli->cl_cache->ccc_lru_left; - - /* add this osc into entity list */ - LASSERT(list_empty(&cli->cl_lru_osc)); - spin_lock(&cli->cl_cache->ccc_lru_lock); - list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru); - spin_unlock(&cli->cl_cache->ccc_lru_lock); - - return 0; - } - - if (KEY_IS(KEY_CACHE_LRU_SHRINK)) { - struct client_obd *cli = &obd->u.cli; - long nr = atomic_long_read(&cli->cl_lru_in_list) >> 1; - long target = *(long *)val; - - nr = osc_lru_shrink(env, cli, min(nr, target), true); - *(long *)val -= nr; - return 0; - } - - if (!set && !KEY_IS(KEY_GRANT_SHRINK)) - return -EINVAL; - - /* We pass all other commands directly to OST. Since nobody calls osc - * methods directly and everybody is supposed to go through LOV, we - * assume lov checked invalid values for us. - * The only recognised values so far are evict_by_nid and mds_conn. - * Even if something bad goes through, we'd get a -EINVAL from OST - * anyway. - */ - - req = ptlrpc_request_alloc(imp, KEY_IS(KEY_GRANT_SHRINK) ? - &RQF_OST_SET_GRANT_INFO : - &RQF_OBD_SET_INFO); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, - RCL_CLIENT, keylen); - if (!KEY_IS(KEY_GRANT_SHRINK)) - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL, - RCL_CLIENT, vallen); - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); - memcpy(tmp, key, keylen); - tmp = req_capsule_client_get(&req->rq_pill, KEY_IS(KEY_GRANT_SHRINK) ? - &RMF_OST_BODY : - &RMF_SETINFO_VAL); - memcpy(tmp, val, vallen); - - if (KEY_IS(KEY_GRANT_SHRINK)) { - struct osc_brw_async_args *aa; - struct obdo *oa; - - BUILD_BUG_ON(sizeof(*aa) > sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS); - if (!oa) { - ptlrpc_req_finished(req); - return -ENOMEM; - } - *oa = ((struct ost_body *)val)->oa; - aa->aa_oa = oa; - req->rq_interpret_reply = osc_shrink_grant_interpret; - } - - ptlrpc_request_set_replen(req); - if (!KEY_IS(KEY_GRANT_SHRINK)) { - LASSERT(set); - ptlrpc_set_add_req(set, req); - ptlrpc_check_set(NULL, set); - } else { - ptlrpcd_add_req(req); - } - - return 0; -} - -static int osc_reconnect(const struct lu_env *env, - struct obd_export *exp, struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *data, - void *localdata) -{ - struct client_obd *cli = &obd->u.cli; - - if (data && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { - long lost_grant; - - spin_lock(&cli->cl_loi_list_lock); - data->ocd_grant = (cli->cl_avail_grant + - (cli->cl_dirty_pages << PAGE_SHIFT)) ?: - 2 * cli_brw_size(obd); - lost_grant = cli->cl_lost_grant; - cli->cl_lost_grant = 0; - spin_unlock(&cli->cl_loi_list_lock); - - CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d ocd_grant: %d, lost: %ld.\n", - data->ocd_connect_flags, - data->ocd_version, data->ocd_grant, lost_grant); - } - - return 0; -} - -static int osc_disconnect(struct obd_export *exp) -{ - struct obd_device *obd = class_exp2obd(exp); - int rc; - - rc = client_disconnect_export(exp); - /** - * Initially we put del_shrink_grant before disconnect_export, but it - * causes the following problem if setup (connect) and cleanup - * (disconnect) are tangled together. - * connect p1 disconnect p2 - * ptlrpc_connect_import - * ............... class_manual_cleanup - * osc_disconnect - * del_shrink_grant - * ptlrpc_connect_interrupt - * init_grant_shrink - * add this client to shrink list - * cleanup_osc - * Bang! pinger trigger the shrink. - * So the osc should be disconnected from the shrink list, after we - * are sure the import has been destroyed. BUG18662 - */ - if (!obd->u.cli.cl_import) - osc_del_shrink_grant(&obd->u.cli); - return rc; -} - -static int osc_ldlm_resource_invalidate(struct cfs_hash *hs, - struct cfs_hash_bd *bd, - struct hlist_node *hnode, void *arg) -{ - struct ldlm_resource *res = cfs_hash_object(hs, hnode); - struct osc_object *osc = NULL; - struct lu_env *env = arg; - struct ldlm_lock *lock; - - lock_res(res); - list_for_each_entry(lock, &res->lr_granted, l_res_link) { - if (lock->l_ast_data && !osc) { - osc = lock->l_ast_data; - cl_object_get(osc2cl(osc)); - } - - /* - * clear LDLM_FL_CLEANED flag to make sure it will be canceled - * by the 2nd round of ldlm_namespace_clean() call in - * osc_import_event(). - */ - ldlm_clear_cleaned(lock); - } - unlock_res(res); - - if (osc) { - osc_object_invalidate(env, osc); - cl_object_put(env, osc2cl(osc)); - } - - return 0; -} - -static int osc_import_event(struct obd_device *obd, - struct obd_import *imp, - enum obd_import_event event) -{ - struct client_obd *cli; - int rc = 0; - - LASSERT(imp->imp_obd == obd); - - switch (event) { - case IMP_EVENT_DISCON: { - cli = &obd->u.cli; - spin_lock(&cli->cl_loi_list_lock); - cli->cl_avail_grant = 0; - cli->cl_lost_grant = 0; - spin_unlock(&cli->cl_loi_list_lock); - break; - } - case IMP_EVENT_INACTIVE: { - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL); - break; - } - case IMP_EVENT_INVALIDATE: { - struct ldlm_namespace *ns = obd->obd_namespace; - struct lu_env *env; - u16 refcheck; - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); - - env = cl_env_get(&refcheck); - if (!IS_ERR(env)) { - osc_io_unplug(env, &obd->u.cli, NULL); - - cfs_hash_for_each_nolock(ns->ns_rs_hash, - osc_ldlm_resource_invalidate, - env, 0); - cl_env_put(env, &refcheck); - - ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); - } else { - rc = PTR_ERR(env); - } - break; - } - case IMP_EVENT_ACTIVE: { - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL); - break; - } - case IMP_EVENT_OCD: { - struct obd_connect_data *ocd = &imp->imp_connect_data; - - if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT) - osc_init_grant(&obd->u.cli, ocd); - - /* See bug 7198 */ - if (ocd->ocd_connect_flags & OBD_CONNECT_REQPORTAL) - imp->imp_client->cli_request_portal = OST_REQUEST_PORTAL; - - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL); - break; - } - case IMP_EVENT_DEACTIVATE: { - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_DEACTIVATE, NULL); - break; - } - case IMP_EVENT_ACTIVATE: { - rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVATE, NULL); - break; - } - default: - CERROR("Unknown import event %d\n", event); - LBUG(); - } - return rc; -} - -/** - * Determine whether the lock can be canceled before replaying the lock - * during recovery, see bug16774 for detailed information. - * - * \retval zero the lock can't be canceled - * \retval other ok to cancel - */ -static int osc_cancel_weight(struct ldlm_lock *lock) -{ - /* - * Cancel all unused and granted extent lock. - */ - if (lock->l_resource->lr_type == LDLM_EXTENT && - lock->l_granted_mode == lock->l_req_mode && - osc_ldlm_weigh_ast(lock) == 0) - return 1; - - return 0; -} - -static int brw_queue_work(const struct lu_env *env, void *data) -{ - struct client_obd *cli = data; - - CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli); - - osc_io_unplug(env, cli, NULL); - return 0; -} - -int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct lprocfs_static_vars lvars = { NULL }; - struct client_obd *cli = &obd->u.cli; - void *handler; - int rc; - int adding; - int added; - int req_count; - - rc = ptlrpcd_addref(); - if (rc) - return rc; - - rc = client_obd_setup(obd, lcfg); - if (rc) - goto out_ptlrpcd; - - handler = ptlrpcd_alloc_work(cli->cl_import, brw_queue_work, cli); - if (IS_ERR(handler)) { - rc = PTR_ERR(handler); - goto out_client_setup; - } - cli->cl_writeback_work = handler; - - handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli); - if (IS_ERR(handler)) { - rc = PTR_ERR(handler); - goto out_ptlrpcd_work; - } - - cli->cl_lru_work = handler; - - rc = osc_quota_setup(obd); - if (rc) - goto out_ptlrpcd_work; - - cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL; - lprocfs_osc_init_vars(&lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars, lvars.sysfs_vars) == 0) { - lproc_osc_attach_seqstat(obd); - sptlrpc_lprocfs_cliobd_attach(obd); - ptlrpc_lprocfs_register_obd(obd); - } - - /* - * We try to control the total number of requests with a upper limit - * osc_reqpool_maxreqcount. There might be some race which will cause - * over-limit allocation, but it is fine. - */ - req_count = atomic_read(&osc_pool_req_count); - if (req_count < osc_reqpool_maxreqcount) { - adding = cli->cl_max_rpcs_in_flight + 2; - if (req_count + adding > osc_reqpool_maxreqcount) - adding = osc_reqpool_maxreqcount - req_count; - - added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding); - atomic_add(added, &osc_pool_req_count); - } - - INIT_LIST_HEAD(&cli->cl_grant_shrink_list); - ns_register_cancel(obd->obd_namespace, osc_cancel_weight); - - spin_lock(&osc_shrink_lock); - list_add_tail(&cli->cl_shrink_list, &osc_shrink_list); - spin_unlock(&osc_shrink_lock); - - return rc; - -out_ptlrpcd_work: - if (cli->cl_writeback_work) { - ptlrpcd_destroy_work(cli->cl_writeback_work); - cli->cl_writeback_work = NULL; - } - if (cli->cl_lru_work) { - ptlrpcd_destroy_work(cli->cl_lru_work); - cli->cl_lru_work = NULL; - } -out_client_setup: - client_obd_cleanup(obd); -out_ptlrpcd: - ptlrpcd_decref(); - return rc; -} - -static int osc_precleanup(struct obd_device *obd) -{ - struct client_obd *cli = &obd->u.cli; - - /* LU-464 - * for echo client, export may be on zombie list, wait for - * zombie thread to cull it, because cli.cl_import will be - * cleared in client_disconnect_export(): - * class_export_destroy() -> obd_cleanup() -> - * echo_device_free() -> echo_client_cleanup() -> - * obd_disconnect() -> osc_disconnect() -> - * client_disconnect_export() - */ - obd_zombie_barrier(); - if (cli->cl_writeback_work) { - ptlrpcd_destroy_work(cli->cl_writeback_work); - cli->cl_writeback_work = NULL; - } - - if (cli->cl_lru_work) { - ptlrpcd_destroy_work(cli->cl_lru_work); - cli->cl_lru_work = NULL; - } - - obd_cleanup_client_import(obd); - ptlrpc_lprocfs_unregister_obd(obd); - lprocfs_obd_cleanup(obd); - return 0; -} - -static int osc_cleanup(struct obd_device *obd) -{ - struct client_obd *cli = &obd->u.cli; - int rc; - - spin_lock(&osc_shrink_lock); - list_del(&cli->cl_shrink_list); - spin_unlock(&osc_shrink_lock); - - /* lru cleanup */ - if (cli->cl_cache) { - LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0); - spin_lock(&cli->cl_cache->ccc_lru_lock); - list_del_init(&cli->cl_lru_osc); - spin_unlock(&cli->cl_cache->ccc_lru_lock); - cli->cl_lru_left = NULL; - cl_cache_decref(cli->cl_cache); - cli->cl_cache = NULL; - } - - /* free memory of osc quota cache */ - osc_quota_cleanup(obd); - - rc = client_obd_cleanup(obd); - - ptlrpcd_decref(); - return rc; -} - -int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg) -{ - struct lprocfs_static_vars lvars = { NULL }; - int rc = 0; - - lprocfs_osc_init_vars(&lvars); - - switch (lcfg->lcfg_command) { - default: - rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; - break; - } - - return rc; -} - -static int osc_process_config(struct obd_device *obd, u32 len, void *buf) -{ - return osc_process_config_base(obd, buf); -} - -static struct obd_ops osc_obd_ops = { - .owner = THIS_MODULE, - .setup = osc_setup, - .precleanup = osc_precleanup, - .cleanup = osc_cleanup, - .add_conn = client_import_add_conn, - .del_conn = client_import_del_conn, - .connect = client_connect_import, - .reconnect = osc_reconnect, - .disconnect = osc_disconnect, - .statfs = osc_statfs, - .statfs_async = osc_statfs_async, - .create = osc_create, - .destroy = osc_destroy, - .getattr = osc_getattr, - .setattr = osc_setattr, - .iocontrol = osc_iocontrol, - .set_info_async = osc_set_info_async, - .import_event = osc_import_event, - .process_config = osc_process_config, - .quotactl = osc_quotactl, -}; - -struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list); -DEFINE_SPINLOCK(osc_shrink_lock); - -static struct shrinker osc_cache_shrinker = { - .count_objects = osc_cache_shrink_count, - .scan_objects = osc_cache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - -static int __init osc_init(void) -{ - struct lprocfs_static_vars lvars = { NULL }; - unsigned int reqpool_size; - unsigned int reqsize; - int rc; - - /* print an address of _any_ initialized kernel symbol from this - * module, to allow debugging with gdb that doesn't support data - * symbols from modules. - */ - CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches); - - rc = libcfs_setup(); - if (rc) - return rc; - - rc = lu_kmem_init(osc_caches); - if (rc) - return rc; - - lprocfs_osc_init_vars(&lvars); - - rc = register_shrinker(&osc_cache_shrinker); - if (rc) - goto err; - - /* This is obviously too much memory, only prevent overflow here */ - if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) { - rc = -EINVAL; - goto err; - } - - reqpool_size = osc_reqpool_mem_max << 20; - - reqsize = 1; - while (reqsize < OST_MAXREQSIZE) - reqsize = reqsize << 1; - - /* - * We don't enlarge the request count in OSC pool according to - * cl_max_rpcs_in_flight. The allocation from the pool will only be - * tried after normal allocation failed. So a small OSC pool won't - * cause much performance degression in most of cases. - */ - osc_reqpool_maxreqcount = reqpool_size / reqsize; - - atomic_set(&osc_pool_req_count, 0); - osc_rq_pool = ptlrpc_init_rq_pool(0, OST_MAXREQSIZE, - ptlrpc_add_rqs_to_pool); - - rc = -ENOMEM; - - if (!osc_rq_pool) - goto err; - - rc = class_register_type(&osc_obd_ops, NULL, - LUSTRE_OSC_NAME, &osc_device_type); - if (rc) - goto err; - - return rc; - -err: - if (osc_rq_pool) - ptlrpc_free_rq_pool(osc_rq_pool); - unregister_shrinker(&osc_cache_shrinker); - lu_kmem_fini(osc_caches); - return rc; -} - -static void /*__exit*/ osc_exit(void) -{ - unregister_shrinker(&osc_cache_shrinker); - class_unregister_type(LUSTRE_OSC_NAME); - lu_kmem_fini(osc_caches); - ptlrpc_free_rq_pool(osc_rq_pool); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(LUSTRE_VERSION_STRING); - -module_init(osc_init); -module_exit(osc_exit); diff --git a/drivers/staging/lustre/lustre/ptlrpc/Makefile b/drivers/staging/lustre/lustre/ptlrpc/Makefile deleted file mode 100644 index 1deb1971b39e..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/include -subdir-ccflags-y += -I$(srctree)/drivers/staging/lustre/lustre/include - -obj-$(CONFIG_LUSTRE_FS) += ptlrpc.o -LDLM := ../../lustre/ldlm/ - -ldlm_objs := $(LDLM)l_lock.o $(LDLM)ldlm_lock.o -ldlm_objs += $(LDLM)ldlm_resource.o $(LDLM)ldlm_lib.o -ldlm_objs += $(LDLM)ldlm_plain.o $(LDLM)ldlm_extent.o -ldlm_objs += $(LDLM)ldlm_request.o $(LDLM)ldlm_lockd.o -ldlm_objs += $(LDLM)ldlm_flock.o $(LDLM)ldlm_inodebits.o -ldlm_objs += $(LDLM)ldlm_pool.o -ldlm_objs += $(LDLM)interval_tree.o -ptlrpc_objs := client.o recover.o connection.o niobuf.o pack_generic.o -ptlrpc_objs += events.o ptlrpc_module.o service.o pinger.o -ptlrpc_objs += llog_net.o llog_client.o import.o ptlrpcd.o -ptlrpc_objs += pers.o lproc_ptlrpc.o wiretest.o layout.o -ptlrpc_objs += sec.o sec_bulk.o sec_gc.o sec_config.o -ptlrpc_objs += sec_null.o sec_plain.o nrs.o nrs_fifo.o - -ptlrpc-y := $(ldlm_objs) $(ptlrpc_objs) sec_lproc.o -ptlrpc-$(CONFIG_LUSTRE_TRANSLATE_ERRNOS) += errno.o diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c deleted file mode 100644 index c1b82bf20f08..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ /dev/null @@ -1,3271 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/** Implementation of client-side PortalRPC interfaces */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_pin_ops = { - .add_kiov_frag = ptlrpc_prep_bulk_page_pin, - .release_frags = ptlrpc_release_bulk_page_pin, -}; -EXPORT_SYMBOL(ptlrpc_bulk_kiov_pin_ops); - -const struct ptlrpc_bulk_frag_ops ptlrpc_bulk_kiov_nopin_ops = { - .add_kiov_frag = ptlrpc_prep_bulk_page_nopin, - .release_frags = NULL, -}; -EXPORT_SYMBOL(ptlrpc_bulk_kiov_nopin_ops); - -static int ptlrpc_send_new_req(struct ptlrpc_request *req); -static int ptlrpcd_check_work(struct ptlrpc_request *req); -static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async); - -/** - * Initialize passed in client structure \a cl. - */ -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *cl) -{ - cl->cli_request_portal = req_portal; - cl->cli_reply_portal = rep_portal; - cl->cli_name = name; -} -EXPORT_SYMBOL(ptlrpc_init_client); - -/** - * Return PortalRPC connection for remote uud \a uuid - */ -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) -{ - struct ptlrpc_connection *c; - lnet_nid_t self; - struct lnet_process_id peer; - int err; - - /* - * ptlrpc_uuid_to_peer() initializes its 2nd parameter - * before accessing its values. - * coverity[uninit_use_in_call] - */ - err = ptlrpc_uuid_to_peer(uuid, &peer, &self); - if (err != 0) { - CNETERR("cannot find peer %s!\n", uuid->uuid); - return NULL; - } - - c = ptlrpc_connection_get(peer, self, uuid); - if (c) { - memcpy(c->c_remote_uuid.uuid, - uuid->uuid, sizeof(c->c_remote_uuid.uuid)); - } - - CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c); - - return c; -} - -/** - * Allocate and initialize new bulk descriptor on the sender. - * Returns pointer to the descriptor or NULL on error. - */ -struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags, - unsigned int max_brw, - enum ptlrpc_bulk_op_type type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops) -{ - struct ptlrpc_bulk_desc *desc; - int i; - - /* ensure that only one of KIOV or IOVEC is set but not both */ - LASSERT((ptlrpc_is_bulk_desc_kiov(type) && ops->add_kiov_frag) || - (ptlrpc_is_bulk_desc_kvec(type) && ops->add_iov_frag)); - - desc = kzalloc(sizeof(*desc), GFP_NOFS); - if (!desc) - return NULL; - - if (type & PTLRPC_BULK_BUF_KIOV) { - GET_KIOV(desc) = kcalloc(nfrags, sizeof(*GET_KIOV(desc)), - GFP_NOFS); - if (!GET_KIOV(desc)) - goto free_desc; - } else { - GET_KVEC(desc) = kcalloc(nfrags, sizeof(*GET_KVEC(desc)), - GFP_NOFS); - if (!GET_KVEC(desc)) - goto free_desc; - } - - spin_lock_init(&desc->bd_lock); - init_waitqueue_head(&desc->bd_waitq); - desc->bd_max_iov = nfrags; - desc->bd_iov_count = 0; - desc->bd_portal = portal; - desc->bd_type = type; - desc->bd_md_count = 0; - desc->bd_frag_ops = (struct ptlrpc_bulk_frag_ops *)ops; - LASSERT(max_brw > 0); - desc->bd_md_max_brw = min(max_brw, PTLRPC_BULK_OPS_COUNT); - /* - * PTLRPC_BULK_OPS_COUNT is the compile-time transfer limit for this - * node. Negotiated ocd_brw_size will always be <= this number. - */ - for (i = 0; i < PTLRPC_BULK_OPS_COUNT; i++) - LNetInvalidateMDHandle(&desc->bd_mds[i]); - - return desc; -free_desc: - kfree(desc); - return NULL; -} - -/** - * Prepare bulk descriptor for specified outgoing request \a req that - * can fit \a nfrags * pages. \a type is bulk type. \a portal is where - * the bulk to be sent. Used on client-side. - * Returns pointer to newly allocated initialized bulk descriptor or NULL on - * error. - */ -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, - unsigned int nfrags, - unsigned int max_brw, - unsigned int type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops) -{ - struct obd_import *imp = req->rq_import; - struct ptlrpc_bulk_desc *desc; - - LASSERT(ptlrpc_is_bulk_op_passive(type)); - - desc = ptlrpc_new_bulk(nfrags, max_brw, type, portal, ops); - if (!desc) - return NULL; - - desc->bd_import_generation = req->rq_import_generation; - desc->bd_import = class_import_get(imp); - desc->bd_req = req; - - desc->bd_cbid.cbid_fn = client_bulk_callback; - desc->bd_cbid.cbid_arg = desc; - - /* This makes req own desc, and free it when she frees herself */ - req->rq_bulk = desc; - - return desc; -} -EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); - -void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len, int pin) -{ - struct bio_vec *kiov; - - LASSERT(desc->bd_iov_count < desc->bd_max_iov); - LASSERT(page); - LASSERT(pageoffset >= 0); - LASSERT(len > 0); - LASSERT(pageoffset + len <= PAGE_SIZE); - LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); - - kiov = &BD_GET_KIOV(desc, desc->bd_iov_count); - - desc->bd_nob += len; - - if (pin) - get_page(page); - - kiov->bv_page = page; - kiov->bv_offset = pageoffset; - kiov->bv_len = len; - - desc->bd_iov_count++; -} -EXPORT_SYMBOL(__ptlrpc_prep_bulk_page); - -int ptlrpc_prep_bulk_frag(struct ptlrpc_bulk_desc *desc, - void *frag, int len) -{ - struct kvec *iovec; - - LASSERT(desc->bd_iov_count < desc->bd_max_iov); - LASSERT(frag); - LASSERT(len > 0); - LASSERT(ptlrpc_is_bulk_desc_kvec(desc->bd_type)); - - iovec = &BD_GET_KVEC(desc, desc->bd_iov_count); - - desc->bd_nob += len; - - iovec->iov_base = frag; - iovec->iov_len = len; - - desc->bd_iov_count++; - - return desc->bd_nob; -} -EXPORT_SYMBOL(ptlrpc_prep_bulk_frag); - -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) -{ - LASSERT(desc->bd_iov_count != LI_POISON); /* not freed already */ - LASSERT(desc->bd_md_count == 0); /* network hands off */ - LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); - LASSERT(desc->bd_frag_ops); - - if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) - sptlrpc_enc_pool_put_pages(desc); - - if (desc->bd_export) - class_export_put(desc->bd_export); - else - class_import_put(desc->bd_import); - - if (desc->bd_frag_ops->release_frags) - desc->bd_frag_ops->release_frags(desc); - - if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) - kfree(GET_KIOV(desc)); - else - kfree(GET_KVEC(desc)); - - kfree(desc); -} -EXPORT_SYMBOL(ptlrpc_free_bulk); - -/** - * Set server timelimit for this req, i.e. how long are we willing to wait - * for reply before timing out this request. - */ -void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req) -{ - __u32 serv_est; - int idx; - struct imp_at *at; - - LASSERT(req->rq_import); - - if (AT_OFF) { - /* - * non-AT settings - * - * \a imp_server_timeout means this is reverse import and - * we send (currently only) ASTs to the client and cannot afford - * to wait too long for the reply, otherwise the other client - * (because of which we are sending this request) would - * timeout waiting for us - */ - req->rq_timeout = req->rq_import->imp_server_timeout ? - obd_timeout / 2 : obd_timeout; - } else { - at = &req->rq_import->imp_at; - idx = import_at_get_index(req->rq_import, - req->rq_request_portal); - serv_est = at_get(&at->iat_service_estimate[idx]); - req->rq_timeout = at_est2timeout(serv_est); - } - /* - * We could get even fancier here, using history to predict increased - * loading... - */ - - /* - * Let the server know what this RPC timeout is by putting it in the - * reqmsg - */ - lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout); -} -EXPORT_SYMBOL(ptlrpc_at_set_req_timeout); - -/* Adjust max service estimate based on server value */ -static void ptlrpc_at_adj_service(struct ptlrpc_request *req, - unsigned int serv_est) -{ - int idx; - unsigned int oldse; - struct imp_at *at; - - LASSERT(req->rq_import); - at = &req->rq_import->imp_at; - - idx = import_at_get_index(req->rq_import, req->rq_request_portal); - /* - * max service estimates are tracked on the server side, - * so just keep minimal history here - */ - oldse = at_measured(&at->iat_service_estimate[idx], serv_est); - if (oldse != 0) - CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d has changed from %d to %d\n", - req->rq_import->imp_obd->obd_name, req->rq_request_portal, - oldse, at_get(&at->iat_service_estimate[idx])); -} - -/* Expected network latency per remote node (secs) */ -int ptlrpc_at_get_net_latency(struct ptlrpc_request *req) -{ - return AT_OFF ? 0 : at_get(&req->rq_import->imp_at.iat_net_latency); -} - -/* Adjust expected network latency */ -void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req, - unsigned int service_time) -{ - unsigned int nl, oldnl; - struct imp_at *at; - time64_t now = ktime_get_real_seconds(); - - LASSERT(req->rq_import); - - if (service_time > now - req->rq_sent + 3) { - /* - * bz16408, however, this can also happen if early reply - * is lost and client RPC is expired and resent, early reply - * or reply of original RPC can still be fit in reply buffer - * of resent RPC, now client is measuring time from the - * resent time, but server sent back service time of original - * RPC. - */ - CDEBUG((lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ? - D_ADAPTTO : D_WARNING, - "Reported service time %u > total measured time %lld\n", - service_time, now - req->rq_sent); - return; - } - - /* Network latency is total time less server processing time */ - nl = max_t(int, now - req->rq_sent - - service_time, 0) + 1; /* st rounding */ - at = &req->rq_import->imp_at; - - oldnl = at_measured(&at->iat_net_latency, nl); - if (oldnl != 0) - CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) has changed from %d to %d\n", - req->rq_import->imp_obd->obd_name, - obd_uuid2str( - &req->rq_import->imp_connection->c_remote_uuid), - oldnl, at_get(&at->iat_net_latency)); -} - -static int unpack_reply(struct ptlrpc_request *req) -{ - int rc; - - if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) { - rc = ptlrpc_unpack_rep_msg(req, req->rq_replen); - if (rc) { - DEBUG_REQ(D_ERROR, req, "unpack_rep failed: %d", rc); - return -EPROTO; - } - } - - rc = lustre_unpack_rep_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF); - if (rc) { - DEBUG_REQ(D_ERROR, req, "unpack ptlrpc body failed: %d", rc); - return -EPROTO; - } - return 0; -} - -/** - * Handle an early reply message, called with the rq_lock held. - * If anything goes wrong just ignore it - same as if it never happened - */ -static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) - __must_hold(&req->rq_lock) -{ - struct ptlrpc_request *early_req; - time64_t olddl; - int rc; - - req->rq_early = 0; - spin_unlock(&req->rq_lock); - - rc = sptlrpc_cli_unwrap_early_reply(req, &early_req); - if (rc) { - spin_lock(&req->rq_lock); - return rc; - } - - rc = unpack_reply(early_req); - if (rc) { - sptlrpc_cli_finish_early_reply(early_req); - spin_lock(&req->rq_lock); - return rc; - } - - /* - * Use new timeout value just to adjust the local value for this - * request, don't include it into at_history. It is unclear yet why - * service time increased and should it be counted or skipped, e.g. - * that can be recovery case or some error or server, the real reply - * will add all new data if it is worth to add. - */ - req->rq_timeout = lustre_msg_get_timeout(early_req->rq_repmsg); - lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout); - - /* Network latency can be adjusted, it is pure network delays */ - ptlrpc_at_adj_net_latency(req, - lustre_msg_get_service_time(early_req->rq_repmsg)); - - sptlrpc_cli_finish_early_reply(early_req); - - spin_lock(&req->rq_lock); - olddl = req->rq_deadline; - /* - * server assumes it now has rq_timeout from when the request - * arrived, so the client should give it at least that long. - * since we don't know the arrival time we'll use the original - * sent time - */ - req->rq_deadline = req->rq_sent + req->rq_timeout + - ptlrpc_at_get_net_latency(req); - - DEBUG_REQ(D_ADAPTTO, req, - "Early reply #%d, new deadline in %lds (%lds)", - req->rq_early_count, - (long)(req->rq_deadline - ktime_get_real_seconds()), - (long)(req->rq_deadline - olddl)); - - return rc; -} - -static struct kmem_cache *request_cache; - -int ptlrpc_request_cache_init(void) -{ - request_cache = kmem_cache_create("ptlrpc_cache", - sizeof(struct ptlrpc_request), - 0, SLAB_HWCACHE_ALIGN, NULL); - return !request_cache ? -ENOMEM : 0; -} - -void ptlrpc_request_cache_fini(void) -{ - kmem_cache_destroy(request_cache); -} - -struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags) -{ - struct ptlrpc_request *req; - - req = kmem_cache_zalloc(request_cache, flags); - return req; -} - -void ptlrpc_request_cache_free(struct ptlrpc_request *req) -{ - kmem_cache_free(request_cache, req); -} - -/** - * Wind down request pool \a pool. - * Frees all requests from the pool too - */ -void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool) -{ - struct ptlrpc_request *req; - - while ((req = list_first_entry_or_null(&pool->prp_req_list, - struct ptlrpc_request, rq_list))) { - list_del(&req->rq_list); - LASSERT(req->rq_reqbuf); - LASSERT(req->rq_reqbuf_len == pool->prp_rq_size); - kvfree(req->rq_reqbuf); - ptlrpc_request_cache_free(req); - } - kfree(pool); -} -EXPORT_SYMBOL(ptlrpc_free_rq_pool); - -/** - * Allocates, initializes and adds \a num_rq requests to the pool \a pool - */ -int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) -{ - int i; - int size = 1; - - while (size < pool->prp_rq_size) - size <<= 1; - - LASSERTF(list_empty(&pool->prp_req_list) || - size == pool->prp_rq_size, - "Trying to change pool size with nonempty pool from %d to %d bytes\n", - pool->prp_rq_size, size); - - spin_lock(&pool->prp_lock); - pool->prp_rq_size = size; - for (i = 0; i < num_rq; i++) { - struct ptlrpc_request *req; - struct lustre_msg *msg; - - spin_unlock(&pool->prp_lock); - req = ptlrpc_request_cache_alloc(GFP_KERNEL); - if (!req) - return i; - msg = kvzalloc(size, GFP_KERNEL); - if (!msg) { - ptlrpc_request_cache_free(req); - return i; - } - req->rq_reqbuf = msg; - req->rq_reqbuf_len = size; - req->rq_pool = pool; - spin_lock(&pool->prp_lock); - list_add_tail(&req->rq_list, &pool->prp_req_list); - } - spin_unlock(&pool->prp_lock); - return num_rq; -} -EXPORT_SYMBOL(ptlrpc_add_rqs_to_pool); - -/** - * Create and initialize new request pool with given attributes: - * \a num_rq - initial number of requests to create for the pool - * \a msgsize - maximum message size possible for requests in thid pool - * \a populate_pool - function to be called when more requests need to be added - * to the pool - * Returns pointer to newly created pool or NULL on error. - */ -struct ptlrpc_request_pool * -ptlrpc_init_rq_pool(int num_rq, int msgsize, - int (*populate_pool)(struct ptlrpc_request_pool *, int)) -{ - struct ptlrpc_request_pool *pool; - - pool = kzalloc(sizeof(struct ptlrpc_request_pool), GFP_NOFS); - if (!pool) - return NULL; - - /* - * Request next power of two for the allocation, because internally - * kernel would do exactly this - */ - - spin_lock_init(&pool->prp_lock); - INIT_LIST_HEAD(&pool->prp_req_list); - pool->prp_rq_size = msgsize + SPTLRPC_MAX_PAYLOAD; - pool->prp_populate = populate_pool; - - populate_pool(pool, num_rq); - - return pool; -} -EXPORT_SYMBOL(ptlrpc_init_rq_pool); - -/** - * Fetches one request from pool \a pool - */ -static struct ptlrpc_request * -ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool) -{ - struct ptlrpc_request *request; - struct lustre_msg *reqbuf; - - if (!pool) - return NULL; - - spin_lock(&pool->prp_lock); - - /* - * See if we have anything in a pool, and bail out if nothing, - * in writeout path, where this matters, this is safe to do, because - * nothing is lost in this case, and when some in-flight requests - * complete, this code will be called again. - */ - if (unlikely(list_empty(&pool->prp_req_list))) { - spin_unlock(&pool->prp_lock); - return NULL; - } - - request = list_entry(pool->prp_req_list.next, struct ptlrpc_request, - rq_list); - list_del_init(&request->rq_list); - spin_unlock(&pool->prp_lock); - - LASSERT(request->rq_reqbuf); - LASSERT(request->rq_pool); - - reqbuf = request->rq_reqbuf; - memset(request, 0, sizeof(*request)); - request->rq_reqbuf = reqbuf; - request->rq_reqbuf_len = pool->prp_rq_size; - request->rq_pool = pool; - - return request; -} - -/** - * Returns freed \a request to pool. - */ -static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request) -{ - struct ptlrpc_request_pool *pool = request->rq_pool; - - spin_lock(&pool->prp_lock); - LASSERT(list_empty(&request->rq_list)); - LASSERT(!request->rq_receiving_reply); - list_add_tail(&request->rq_list, &pool->prp_req_list); - spin_unlock(&pool->prp_lock); -} - -void ptlrpc_add_unreplied(struct ptlrpc_request *req) -{ - struct obd_import *imp = req->rq_import; - struct ptlrpc_request *iter; - - assert_spin_locked(&imp->imp_lock); - LASSERT(list_empty(&req->rq_unreplied_list)); - - /* unreplied list is sorted by xid in ascending order */ - list_for_each_entry_reverse(iter, &imp->imp_unreplied_list, rq_unreplied_list) { - - LASSERT(req->rq_xid != iter->rq_xid); - if (req->rq_xid < iter->rq_xid) - continue; - list_add(&req->rq_unreplied_list, &iter->rq_unreplied_list); - return; - } - list_add(&req->rq_unreplied_list, &imp->imp_unreplied_list); -} - -void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req) -{ - req->rq_xid = ptlrpc_next_xid(); - ptlrpc_add_unreplied(req); -} - -static inline void ptlrpc_assign_next_xid(struct ptlrpc_request *req) -{ - spin_lock(&req->rq_import->imp_lock); - ptlrpc_assign_next_xid_nolock(req); - spin_unlock(&req->rq_import->imp_lock); -} - -int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, - __u32 version, int opcode, char **bufs, - struct ptlrpc_cli_ctx *ctx) -{ - int count; - struct obd_import *imp; - __u32 *lengths; - int rc; - - count = req_capsule_filled_sizes(&request->rq_pill, RCL_CLIENT); - imp = request->rq_import; - lengths = request->rq_pill.rc_area[RCL_CLIENT]; - - if (unlikely(ctx)) { - request->rq_cli_ctx = sptlrpc_cli_ctx_get(ctx); - } else { - rc = sptlrpc_req_get_ctx(request); - if (rc) - goto out_free; - } - sptlrpc_req_set_flavor(request, opcode); - - rc = lustre_pack_request(request, imp->imp_msg_magic, count, - lengths, bufs); - if (rc) - goto out_ctx; - - lustre_msg_add_version(request->rq_reqmsg, version); - request->rq_send_state = LUSTRE_IMP_FULL; - request->rq_type = PTL_RPC_MSG_REQUEST; - - request->rq_req_cbid.cbid_fn = request_out_callback; - request->rq_req_cbid.cbid_arg = request; - - request->rq_reply_cbid.cbid_fn = reply_in_callback; - request->rq_reply_cbid.cbid_arg = request; - - request->rq_reply_deadline = 0; - request->rq_bulk_deadline = 0; - request->rq_req_deadline = 0; - request->rq_phase = RQ_PHASE_NEW; - request->rq_next_phase = RQ_PHASE_UNDEFINED; - - request->rq_request_portal = imp->imp_client->cli_request_portal; - request->rq_reply_portal = imp->imp_client->cli_reply_portal; - - ptlrpc_at_set_req_timeout(request); - - lustre_msg_set_opc(request->rq_reqmsg, opcode); - ptlrpc_assign_next_xid(request); - - /* Let's setup deadline for req/reply/bulk unlink for opcode. */ - if (cfs_fail_val == opcode) { - time64_t *fail_t = NULL, *fail2_t = NULL; - - if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) { - fail_t = &request->rq_bulk_deadline; - } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { - fail_t = &request->rq_reply_deadline; - } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) { - fail_t = &request->rq_req_deadline; - } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) { - fail_t = &request->rq_reply_deadline; - fail2_t = &request->rq_bulk_deadline; - } - - if (fail_t) { - *fail_t = ktime_get_real_seconds() + LONG_UNLINK; - - if (fail2_t) - *fail2_t = ktime_get_real_seconds() + - LONG_UNLINK; - - /* The RPC is infected, let the test change the - * fail_loc - */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(2 * HZ); - set_current_state(TASK_RUNNING); - } - } - - return 0; - -out_ctx: - LASSERT(!request->rq_pool); - sptlrpc_cli_ctx_put(request->rq_cli_ctx, 1); -out_free: - class_import_put(imp); - return rc; -} -EXPORT_SYMBOL(ptlrpc_request_bufs_pack); - -/** - * Pack request buffers for network transfer, performing necessary encryption - * steps if necessary. - */ -int ptlrpc_request_pack(struct ptlrpc_request *request, - __u32 version, int opcode) -{ - int rc; - - rc = ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL); - if (rc) - return rc; - - /* - * For some old 1.8 clients (< 1.8.7), they will LASSERT the size of - * ptlrpc_body sent from server equal to local ptlrpc_body size, so we - * have to send old ptlrpc_body to keep interoperability with these - * clients. - * - * Only three kinds of server->client RPCs so far: - * - LDLM_BL_CALLBACK - * - LDLM_CP_CALLBACK - * - LDLM_GL_CALLBACK - * - * XXX This should be removed whenever we drop the interoperability with - * the these old clients. - */ - if (opcode == LDLM_BL_CALLBACK || opcode == LDLM_CP_CALLBACK || - opcode == LDLM_GL_CALLBACK) - req_capsule_shrink(&request->rq_pill, &RMF_PTLRPC_BODY, - sizeof(struct ptlrpc_body_v2), RCL_CLIENT); - - return rc; -} -EXPORT_SYMBOL(ptlrpc_request_pack); - -/** - * Helper function to allocate new request on import \a imp - * and possibly using existing request from pool \a pool if provided. - * Returns allocated request structure with import field filled or - * NULL on error. - */ -static inline -struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp, - struct ptlrpc_request_pool *pool) -{ - struct ptlrpc_request *request; - - request = ptlrpc_request_cache_alloc(GFP_NOFS); - - if (!request && pool) - request = ptlrpc_prep_req_from_pool(pool); - - if (request) { - ptlrpc_cli_req_init(request); - - LASSERTF((unsigned long)imp > 0x1000, "%p", imp); - LASSERT(imp != LP_POISON); - LASSERTF((unsigned long)imp->imp_client > 0x1000, "%p\n", - imp->imp_client); - LASSERT(imp->imp_client != LP_POISON); - - request->rq_import = class_import_get(imp); - } else { - CERROR("request allocation out of memory\n"); - } - - return request; -} - -/** - * Helper function for creating a request. - * Calls __ptlrpc_request_alloc to allocate new request structure and inits - * buffer structures according to capsule template \a format. - * Returns allocated request structure pointer or NULL on error. - */ -static struct ptlrpc_request * -ptlrpc_request_alloc_internal(struct obd_import *imp, - struct ptlrpc_request_pool *pool, - const struct req_format *format) -{ - struct ptlrpc_request *request; - - request = __ptlrpc_request_alloc(imp, pool); - if (!request) - return NULL; - - req_capsule_init(&request->rq_pill, request, RCL_CLIENT); - req_capsule_set(&request->rq_pill, format); - return request; -} - -/** - * Allocate new request structure for import \a imp and initialize its - * buffer structure according to capsule template \a format. - */ -struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp, - const struct req_format *format) -{ - return ptlrpc_request_alloc_internal(imp, NULL, format); -} -EXPORT_SYMBOL(ptlrpc_request_alloc); - -/** - * Allocate new request structure for import \a imp from pool \a pool and - * initialize its buffer structure according to capsule template \a format. - */ -struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp, - struct ptlrpc_request_pool *pool, - const struct req_format *format) -{ - return ptlrpc_request_alloc_internal(imp, pool, format); -} -EXPORT_SYMBOL(ptlrpc_request_alloc_pool); - -/** - * For requests not from pool, free memory of the request structure. - * For requests obtained from a pool earlier, return request back to pool. - */ -void ptlrpc_request_free(struct ptlrpc_request *request) -{ - if (request->rq_pool) - __ptlrpc_free_req_to_pool(request); - else - ptlrpc_request_cache_free(request); -} -EXPORT_SYMBOL(ptlrpc_request_free); - -/** - * Allocate new request for operation \a opcode and immediately pack it for - * network transfer. - * Only used for simple requests like OBD_PING where the only important - * part of the request is operation itself. - * Returns allocated request or NULL on error. - */ -struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp, - const struct req_format *format, - __u32 version, int opcode) -{ - struct ptlrpc_request *req = ptlrpc_request_alloc(imp, format); - int rc; - - if (req) { - rc = ptlrpc_request_pack(req, version, opcode); - if (rc) { - ptlrpc_request_free(req); - req = NULL; - } - } - return req; -} -EXPORT_SYMBOL(ptlrpc_request_alloc_pack); - -/** - * Allocate and initialize new request set structure on the current CPT. - * Returns a pointer to the newly allocated set structure or NULL on error. - */ -struct ptlrpc_request_set *ptlrpc_prep_set(void) -{ - struct ptlrpc_request_set *set; - int cpt; - - cpt = cfs_cpt_current(cfs_cpt_tab, 0); - set = kzalloc_node(sizeof(*set), GFP_NOFS, - cfs_cpt_spread_node(cfs_cpt_tab, cpt)); - if (!set) - return NULL; - atomic_set(&set->set_refcount, 1); - INIT_LIST_HEAD(&set->set_requests); - init_waitqueue_head(&set->set_waitq); - atomic_set(&set->set_new_count, 0); - atomic_set(&set->set_remaining, 0); - spin_lock_init(&set->set_new_req_lock); - INIT_LIST_HEAD(&set->set_new_requests); - INIT_LIST_HEAD(&set->set_cblist); - set->set_max_inflight = UINT_MAX; - set->set_producer = NULL; - set->set_producer_arg = NULL; - set->set_rc = 0; - - return set; -} -EXPORT_SYMBOL(ptlrpc_prep_set); - -/** - * Allocate and initialize new request set structure with flow control - * extension. This extension allows to control the number of requests in-flight - * for the whole set. A callback function to generate requests must be provided - * and the request set will keep the number of requests sent over the wire to - * @max_inflight. - * Returns a pointer to the newly allocated set structure or NULL on error. - */ -struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func, - void *arg) - -{ - struct ptlrpc_request_set *set; - - set = ptlrpc_prep_set(); - if (!set) - return NULL; - - set->set_max_inflight = max; - set->set_producer = func; - set->set_producer_arg = arg; - - return set; -} - -/** - * Wind down and free request set structure previously allocated with - * ptlrpc_prep_set. - * Ensures that all requests on the set have completed and removes - * all requests from the request list in a set. - * If any unsent request happen to be on the list, pretends that they got - * an error in flight and calls their completion handler. - */ -void ptlrpc_set_destroy(struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - int expected_phase; - int n = 0; - - /* Requests on the set should either all be completed, or all be new */ - expected_phase = (atomic_read(&set->set_remaining) == 0) ? - RQ_PHASE_COMPLETE : RQ_PHASE_NEW; - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - LASSERT(req->rq_phase == expected_phase); - n++; - } - - LASSERTF(atomic_read(&set->set_remaining) == 0 || - atomic_read(&set->set_remaining) == n, "%d / %d\n", - atomic_read(&set->set_remaining), n); - - while ((req = list_first_entry_or_null(&set->set_requests, - struct ptlrpc_request, - rq_set_chain))) { - list_del_init(&req->rq_set_chain); - - LASSERT(req->rq_phase == expected_phase); - - if (req->rq_phase == RQ_PHASE_NEW) { - ptlrpc_req_interpret(NULL, req, -EBADR); - atomic_dec(&set->set_remaining); - } - - spin_lock(&req->rq_lock); - req->rq_set = NULL; - req->rq_invalid_rqset = 0; - spin_unlock(&req->rq_lock); - - ptlrpc_req_finished(req); - } - - LASSERT(atomic_read(&set->set_remaining) == 0); - - ptlrpc_reqset_put(set); -} -EXPORT_SYMBOL(ptlrpc_set_destroy); - -/** - * Add a new request to the general purpose request set. - * Assumes request reference from the caller. - */ -void ptlrpc_set_add_req(struct ptlrpc_request_set *set, - struct ptlrpc_request *req) -{ - LASSERT(list_empty(&req->rq_set_chain)); - - /* The set takes over the caller's request reference */ - list_add_tail(&req->rq_set_chain, &set->set_requests); - req->rq_set = set; - atomic_inc(&set->set_remaining); - req->rq_queued_time = jiffies; - - if (req->rq_reqmsg) - lustre_msg_set_jobid(req->rq_reqmsg, NULL); - - if (set->set_producer) - /* - * If the request set has a producer callback, the RPC must be - * sent straight away - */ - ptlrpc_send_new_req(req); -} -EXPORT_SYMBOL(ptlrpc_set_add_req); - -/** - * Add a request to a request with dedicated server thread - * and wake the thread to make any necessary processing. - * Currently only used for ptlrpcd. - */ -void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, - struct ptlrpc_request *req) -{ - struct ptlrpc_request_set *set = pc->pc_set; - int count, i; - - LASSERT(!req->rq_set); - LASSERT(test_bit(LIOD_STOP, &pc->pc_flags) == 0); - - spin_lock(&set->set_new_req_lock); - /* The set takes over the caller's request reference. */ - req->rq_set = set; - req->rq_queued_time = jiffies; - list_add_tail(&req->rq_set_chain, &set->set_new_requests); - count = atomic_inc_return(&set->set_new_count); - spin_unlock(&set->set_new_req_lock); - - /* Only need to call wakeup once for the first entry. */ - if (count == 1) { - wake_up(&set->set_waitq); - - /* - * XXX: It maybe unnecessary to wakeup all the partners. But to - * guarantee the async RPC can be processed ASAP, we have - * no other better choice. It maybe fixed in future. - */ - for (i = 0; i < pc->pc_npartners; i++) - wake_up(&pc->pc_partners[i]->pc_set->set_waitq); - } -} - -/** - * Based on the current state of the import, determine if the request - * can be sent, is an error, or should be delayed. - * - * Returns true if this request should be delayed. If false, and - * *status is set, then the request can not be sent and *status is the - * error code. If false and status is 0, then request can be sent. - * - * The imp->imp_lock must be held. - */ -static int ptlrpc_import_delay_req(struct obd_import *imp, - struct ptlrpc_request *req, int *status) -{ - int delay = 0; - - *status = 0; - - if (req->rq_ctx_init || req->rq_ctx_fini) { - /* always allow ctx init/fini rpc go through */ - } else if (imp->imp_state == LUSTRE_IMP_NEW) { - DEBUG_REQ(D_ERROR, req, "Uninitialized import."); - *status = -EIO; - } else if (imp->imp_state == LUSTRE_IMP_CLOSED) { - /* pings may safely race with umount */ - DEBUG_REQ(lustre_msg_get_opc(req->rq_reqmsg) == OBD_PING ? - D_HA : D_ERROR, req, "IMP_CLOSED "); - *status = -EIO; - } else if (ptlrpc_send_limit_expired(req)) { - /* probably doesn't need to be a D_ERROR after initial testing */ - DEBUG_REQ(D_HA, req, "send limit expired "); - *status = -ETIMEDOUT; - } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING && - imp->imp_state == LUSTRE_IMP_CONNECTING) { - /* allow CONNECT even if import is invalid */ - if (atomic_read(&imp->imp_inval_count) != 0) { - DEBUG_REQ(D_ERROR, req, "invalidate in flight"); - *status = -EIO; - } - } else if (imp->imp_invalid || imp->imp_obd->obd_no_recov) { - if (!imp->imp_deactive) - DEBUG_REQ(D_NET, req, "IMP_INVALID"); - *status = -ESHUTDOWN; /* bz 12940 */ - } else if (req->rq_import_generation != imp->imp_generation) { - DEBUG_REQ(D_ERROR, req, "req wrong generation:"); - *status = -EIO; - } else if (req->rq_send_state != imp->imp_state) { - /* invalidate in progress - any requests should be drop */ - if (atomic_read(&imp->imp_inval_count) != 0) { - DEBUG_REQ(D_ERROR, req, "invalidate in flight"); - *status = -EIO; - } else if (req->rq_no_delay) { - *status = -EWOULDBLOCK; - } else if (req->rq_allow_replay && - (imp->imp_state == LUSTRE_IMP_REPLAY || - imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS || - imp->imp_state == LUSTRE_IMP_REPLAY_WAIT || - imp->imp_state == LUSTRE_IMP_RECOVER)) { - DEBUG_REQ(D_HA, req, "allow during recovery.\n"); - } else { - delay = 1; - } - } - - return delay; -} - -/** - * Decide if the error message should be printed to the console or not. - * Makes its decision based on request type, status, and failure frequency. - * - * \param[in] req request that failed and may need a console message - * - * \retval false if no message should be printed - * \retval true if console message should be printed - */ -static bool ptlrpc_console_allow(struct ptlrpc_request *req) -{ - __u32 opc; - - LASSERT(req->rq_reqmsg); - opc = lustre_msg_get_opc(req->rq_reqmsg); - - /* Suppress particular reconnect errors which are to be expected. */ - if (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT) { - int err; - - /* Suppress timed out reconnect requests */ - if (lustre_handle_is_used(&req->rq_import->imp_remote_handle) || - req->rq_timedout) - return false; - - /* - * Suppress most unavailable/again reconnect requests, but - * print occasionally so it is clear client is trying to - * connect to a server where no target is running. - */ - err = lustre_msg_get_status(req->rq_repmsg); - if ((err == -ENODEV || err == -EAGAIN) && - req->rq_import->imp_conn_cnt % 30 != 20) - return false; - } - - return true; -} - -/** - * Check request processing status. - * Returns the status. - */ -static int ptlrpc_check_status(struct ptlrpc_request *req) -{ - int err; - - err = lustre_msg_get_status(req->rq_repmsg); - if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) { - struct obd_import *imp = req->rq_import; - lnet_nid_t nid = imp->imp_connection->c_peer.nid; - __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); - - /* -EAGAIN is normal when using POSIX flocks */ - if (ptlrpc_console_allow(req) && - !(opc == LDLM_ENQUEUE && err == -EAGAIN)) - LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n", - imp->imp_obd->obd_name, - ll_opcode2str(opc), - libcfs_nid2str(nid), err); - return err < 0 ? err : -EINVAL; - } - - if (err < 0) - DEBUG_REQ(D_INFO, req, "status is %d", err); - else if (err > 0) - /* XXX: translate this error from net to host */ - DEBUG_REQ(D_INFO, req, "status is %d", err); - - return err; -} - -/** - * save pre-versions of objects into request for replay. - * Versions are obtained from server reply. - * used for VBR. - */ -static void ptlrpc_save_versions(struct ptlrpc_request *req) -{ - struct lustre_msg *repmsg = req->rq_repmsg; - struct lustre_msg *reqmsg = req->rq_reqmsg; - __u64 *versions = lustre_msg_get_versions(repmsg); - - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) - return; - - LASSERT(versions); - lustre_msg_set_versions(reqmsg, versions); - CDEBUG(D_INFO, "Client save versions [%#llx/%#llx]\n", - versions[0], versions[1]); -} - -__u64 ptlrpc_known_replied_xid(struct obd_import *imp) -{ - struct ptlrpc_request *req; - - assert_spin_locked(&imp->imp_lock); - if (list_empty(&imp->imp_unreplied_list)) - return 0; - - req = list_entry(imp->imp_unreplied_list.next, struct ptlrpc_request, - rq_unreplied_list); - LASSERTF(req->rq_xid >= 1, "XID:%llu\n", req->rq_xid); - - if (imp->imp_known_replied_xid < req->rq_xid - 1) - imp->imp_known_replied_xid = req->rq_xid - 1; - - return req->rq_xid - 1; -} - -/** - * Callback function called when client receives RPC reply for \a req. - * Returns 0 on success or error code. - * The return value would be assigned to req->rq_status by the caller - * as request processing status. - * This function also decides if the request needs to be saved for later replay. - */ -static int after_reply(struct ptlrpc_request *req) -{ - struct obd_import *imp = req->rq_import; - struct obd_device *obd = req->rq_import->imp_obd; - int rc; - struct timespec64 work_start; - long timediff; - u64 committed; - - LASSERT(obd); - /* repbuf must be unlinked */ - LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked); - - if (req->rq_reply_truncated) { - if (ptlrpc_no_resend(req)) { - DEBUG_REQ(D_ERROR, req, "reply buffer overflow, expected: %d, actual size: %d", - req->rq_nob_received, req->rq_repbuf_len); - return -EOVERFLOW; - } - - sptlrpc_cli_free_repbuf(req); - /* - * Pass the required reply buffer size (include space for early - * reply). NB: no need to round up because alloc_repbuf will - * round it up - */ - req->rq_replen = req->rq_nob_received; - req->rq_nob_received = 0; - spin_lock(&req->rq_lock); - req->rq_resend = 1; - spin_unlock(&req->rq_lock); - return 0; - } - - ktime_get_real_ts64(&work_start); - timediff = (work_start.tv_sec - req->rq_sent_tv.tv_sec) * USEC_PER_SEC + - (work_start.tv_nsec - req->rq_sent_tv.tv_nsec) / - NSEC_PER_USEC; - /* - * NB Until this point, the whole of the incoming message, - * including buflens, status etc is in the sender's byte order. - */ - rc = sptlrpc_cli_unwrap_reply(req); - if (rc) { - DEBUG_REQ(D_ERROR, req, "unwrap reply failed (%d):", rc); - return rc; - } - - /* Security layer unwrap might ask resend this request. */ - if (req->rq_resend) - return 0; - - rc = unpack_reply(req); - if (rc) - return rc; - - /* retry indefinitely on EINPROGRESS */ - if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS && - ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) { - time64_t now = ktime_get_real_seconds(); - - DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS"); - spin_lock(&req->rq_lock); - req->rq_resend = 1; - spin_unlock(&req->rq_lock); - req->rq_nr_resend++; - - /* Readjust the timeout for current conditions */ - ptlrpc_at_set_req_timeout(req); - /* - * delay resend to give a chance to the server to get ready. - * The delay is increased by 1s on every resend and is capped to - * the current request timeout (i.e. obd_timeout if AT is off, - * or AT service time x 125% + 5s, see at_est2timeout) - */ - if (req->rq_nr_resend > req->rq_timeout) - req->rq_sent = now + req->rq_timeout; - else - req->rq_sent = now + req->rq_nr_resend; - - /* Resend for EINPROGRESS will use a new XID */ - spin_lock(&imp->imp_lock); - list_del_init(&req->rq_unreplied_list); - spin_unlock(&imp->imp_lock); - - return 0; - } - - if (obd->obd_svc_stats) { - lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, - timediff); - ptlrpc_lprocfs_rpc_sent(req, timediff); - } - - if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY && - lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, "invalid packet received (type=%u)", - lustre_msg_get_type(req->rq_repmsg)); - return -EPROTO; - } - - if (lustre_msg_get_opc(req->rq_reqmsg) != OBD_PING) - CFS_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_PAUSE_REP, cfs_fail_val); - ptlrpc_at_adj_service(req, lustre_msg_get_timeout(req->rq_repmsg)); - ptlrpc_at_adj_net_latency(req, - lustre_msg_get_service_time(req->rq_repmsg)); - - rc = ptlrpc_check_status(req); - imp->imp_connect_error = rc; - - if (rc) { - /* - * Either we've been evicted, or the server has failed for - * some reason. Try to reconnect, and if that fails, punt to - * the upcall. - */ - if (ptlrpc_recoverable_error(rc)) { - if (req->rq_send_state != LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) { - return rc; - } - ptlrpc_request_handle_notconn(req); - return rc; - } - } else { - /* - * Let's look if server sent slv. Do it only for RPC with - * rc == 0. - */ - ldlm_cli_update_pool(req); - } - - /* Store transno in reqmsg for replay. */ - if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) { - req->rq_transno = lustre_msg_get_transno(req->rq_repmsg); - lustre_msg_set_transno(req->rq_reqmsg, req->rq_transno); - } - - if (imp->imp_replayable) { - spin_lock(&imp->imp_lock); - /* - * No point in adding already-committed requests to the replay - * list, we will just remove them immediately. b=9829 - */ - if (req->rq_transno != 0 && - (req->rq_transno > - lustre_msg_get_last_committed(req->rq_repmsg) || - req->rq_replay)) { - /* version recovery */ - ptlrpc_save_versions(req); - ptlrpc_retain_replayable_request(req, imp); - } else if (req->rq_commit_cb && - list_empty(&req->rq_replay_list)) { - /* - * NB: don't call rq_commit_cb if it's already on - * rq_replay_list, ptlrpc_free_committed() will call - * it later, see LU-3618 for details - */ - spin_unlock(&imp->imp_lock); - req->rq_commit_cb(req); - spin_lock(&imp->imp_lock); - } - - /* Replay-enabled imports return commit-status information. */ - committed = lustre_msg_get_last_committed(req->rq_repmsg); - if (likely(committed > imp->imp_peer_committed_transno)) - imp->imp_peer_committed_transno = committed; - - ptlrpc_free_committed(imp); - - if (!list_empty(&imp->imp_replay_list)) { - struct ptlrpc_request *last; - - last = list_entry(imp->imp_replay_list.prev, - struct ptlrpc_request, - rq_replay_list); - /* - * Requests with rq_replay stay on the list even if no - * commit is expected. - */ - if (last->rq_transno > imp->imp_peer_committed_transno) - ptlrpc_pinger_commit_expected(imp); - } - - spin_unlock(&imp->imp_lock); - } - - return rc; -} - -/** - * Helper function to send request \a req over the network for the first time - * Also adjusts request phase. - * Returns 0 on success or error code. - */ -static int ptlrpc_send_new_req(struct ptlrpc_request *req) -{ - struct obd_import *imp = req->rq_import; - u64 min_xid = 0; - int rc; - - LASSERT(req->rq_phase == RQ_PHASE_NEW); - - /* do not try to go further if there is not enough memory in enc_pool */ - if (req->rq_sent && req->rq_bulk) - if (req->rq_bulk->bd_iov_count > get_free_pages_in_pool() && - pool_is_at_full_capacity()) - return -ENOMEM; - - if (req->rq_sent && (req->rq_sent > ktime_get_real_seconds()) && - (!req->rq_generation_set || - req->rq_import_generation == imp->imp_generation)) - return 0; - - ptlrpc_rqphase_move(req, RQ_PHASE_RPC); - - spin_lock(&imp->imp_lock); - - LASSERT(req->rq_xid); - LASSERT(!list_empty(&req->rq_unreplied_list)); - - if (!req->rq_generation_set) - req->rq_import_generation = imp->imp_generation; - - if (ptlrpc_import_delay_req(imp, req, &rc)) { - spin_lock(&req->rq_lock); - req->rq_waiting = 1; - spin_unlock(&req->rq_lock); - - DEBUG_REQ(D_HA, req, "req from PID %d waiting for recovery: (%s != %s)", - lustre_msg_get_status(req->rq_reqmsg), - ptlrpc_import_state_name(req->rq_send_state), - ptlrpc_import_state_name(imp->imp_state)); - LASSERT(list_empty(&req->rq_list)); - list_add_tail(&req->rq_list, &imp->imp_delayed_list); - atomic_inc(&req->rq_import->imp_inflight); - spin_unlock(&imp->imp_lock); - return 0; - } - - if (rc != 0) { - spin_unlock(&imp->imp_lock); - req->rq_status = rc; - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - return rc; - } - - LASSERT(list_empty(&req->rq_list)); - list_add_tail(&req->rq_list, &imp->imp_sending_list); - atomic_inc(&req->rq_import->imp_inflight); - - /* find the known replied XID from the unreplied list, CONNECT - * and DISCONNECT requests are skipped to make the sanity check - * on server side happy. see process_req_last_xid(). - * - * For CONNECT: Because replay requests have lower XID, it'll - * break the sanity check if CONNECT bump the exp_last_xid on - * server. - * - * For DISCONNECT: Since client will abort inflight RPC before - * sending DISCONNECT, DISCONNECT may carry an XID which higher - * than the inflight RPC. - */ - if (!ptlrpc_req_is_connect(req) && !ptlrpc_req_is_disconnect(req)) - min_xid = ptlrpc_known_replied_xid(imp); - spin_unlock(&imp->imp_lock); - - lustre_msg_set_last_xid(req->rq_reqmsg, min_xid); - - lustre_msg_set_status(req->rq_reqmsg, current->pid); - - rc = sptlrpc_req_refresh_ctx(req, -1); - if (rc) { - if (req->rq_err) { - req->rq_status = rc; - return 1; - } - spin_lock(&req->rq_lock); - req->rq_wait_ctx = 1; - spin_unlock(&req->rq_lock); - return 0; - } - - CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n", - current->comm, - imp->imp_obd->obd_uuid.uuid, - lustre_msg_get_status(req->rq_reqmsg), req->rq_xid, - libcfs_nid2str(imp->imp_connection->c_peer.nid), - lustre_msg_get_opc(req->rq_reqmsg)); - - rc = ptl_send_rpc(req, 0); - if (rc == -ENOMEM) { - spin_lock(&imp->imp_lock); - if (!list_empty(&req->rq_list)) { - list_del_init(&req->rq_list); - if (atomic_dec_and_test(&req->rq_import->imp_inflight)) - wake_up_all(&req->rq_import->imp_recovery_waitq); - } - spin_unlock(&imp->imp_lock); - ptlrpc_rqphase_move(req, RQ_PHASE_NEW); - return rc; - } - if (rc) { - DEBUG_REQ(D_HA, req, "send failed (%d); expect timeout", rc); - spin_lock(&req->rq_lock); - req->rq_net_err = 1; - spin_unlock(&req->rq_lock); - return rc; - } - return 0; -} - -static inline int ptlrpc_set_producer(struct ptlrpc_request_set *set) -{ - int remaining, rc; - - LASSERT(set->set_producer); - - remaining = atomic_read(&set->set_remaining); - - /* - * populate the ->set_requests list with requests until we - * reach the maximum number of RPCs in flight for this set - */ - while (atomic_read(&set->set_remaining) < set->set_max_inflight) { - rc = set->set_producer(set, set->set_producer_arg); - if (rc == -ENOENT) { - /* no more RPC to produce */ - set->set_producer = NULL; - set->set_producer_arg = NULL; - return 0; - } - } - - return (atomic_read(&set->set_remaining) - remaining); -} - -/** - * this sends any unsent RPCs in \a set and returns 1 if all are sent - * and no more replies are expected. - * (it is possible to get less replies than requests sent e.g. due to timed out - * requests or requests that we had trouble to send out) - * - * NOTE: This function contains a potential schedule point (cond_resched()). - */ -int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req, *next; - struct list_head comp_reqs; - int force_timer_recalc = 0; - - if (atomic_read(&set->set_remaining) == 0) - return 1; - - INIT_LIST_HEAD(&comp_reqs); - list_for_each_entry_safe(req, next, &set->set_requests, rq_set_chain) { - struct obd_import *imp = req->rq_import; - int unregistered = 0; - int rc = 0; - - /* - * This schedule point is mainly for the ptlrpcd caller of this - * function. Most ptlrpc sets are not long-lived and unbounded - * in length, but at the least the set used by the ptlrpcd is. - * Since the processing time is unbounded, we need to insert an - * explicit schedule point to make the thread well-behaved. - */ - cond_resched(); - - if (req->rq_phase == RQ_PHASE_NEW && - ptlrpc_send_new_req(req)) { - force_timer_recalc = 1; - } - - /* delayed send - skip */ - if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent) - continue; - - /* delayed resend - skip */ - if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend && - req->rq_sent > ktime_get_real_seconds()) - continue; - - if (!(req->rq_phase == RQ_PHASE_RPC || - req->rq_phase == RQ_PHASE_BULK || - req->rq_phase == RQ_PHASE_INTERPRET || - req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK || - req->rq_phase == RQ_PHASE_COMPLETE)) { - DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); - LBUG(); - } - - if (req->rq_phase == RQ_PHASE_UNREG_RPC || - req->rq_phase == RQ_PHASE_UNREG_BULK) { - LASSERT(req->rq_next_phase != req->rq_phase); - LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); - - if (req->rq_req_deadline && - !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) - req->rq_req_deadline = 0; - if (req->rq_reply_deadline && - !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) - req->rq_reply_deadline = 0; - if (req->rq_bulk_deadline && - !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) - req->rq_bulk_deadline = 0; - - /* - * Skip processing until reply is unlinked. We - * can't return to pool before that and we can't - * call interpret before that. We need to make - * sure that all rdma transfers finished and will - * not corrupt any data. - */ - if (req->rq_phase == RQ_PHASE_UNREG_RPC && - ptlrpc_client_recv_or_unlink(req)) - continue; - if (req->rq_phase == RQ_PHASE_UNREG_BULK && - ptlrpc_client_bulk_active(req)) - continue; - - /* - * Turn fail_loc off to prevent it from looping - * forever. - */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { - OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK, - OBD_FAIL_ONCE); - } - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) { - OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK, - OBD_FAIL_ONCE); - } - - /* Move to next phase if reply was successfully - * unlinked. - */ - ptlrpc_rqphase_move(req, req->rq_next_phase); - } - - if (req->rq_phase == RQ_PHASE_COMPLETE) { - list_move_tail(&req->rq_set_chain, &comp_reqs); - continue; - } - - if (req->rq_phase == RQ_PHASE_INTERPRET) - goto interpret; - - /* Note that this also will start async reply unlink. */ - if (req->rq_net_err && !req->rq_timedout) { - ptlrpc_expire_one_request(req, 1); - - /* Check if we still need to wait for unlink. */ - if (ptlrpc_client_recv_or_unlink(req) || - ptlrpc_client_bulk_active(req)) - continue; - /* If there is no need to resend, fail it now. */ - if (req->rq_no_resend) { - if (req->rq_status == 0) - req->rq_status = -EIO; - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - goto interpret; - } else { - continue; - } - } - - if (req->rq_err) { - spin_lock(&req->rq_lock); - req->rq_replied = 0; - spin_unlock(&req->rq_lock); - if (req->rq_status == 0) - req->rq_status = -EIO; - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - goto interpret; - } - - /* - * ptlrpc_set_wait allow signal to abort the timeout - * so it sets rq_intr regardless of individual rpc - * timeouts. The synchronous IO waiting path sets - * rq_intr irrespective of whether ptlrpcd - * has seen a timeout. Our policy is to only interpret - * interrupted rpcs after they have timed out, so we - * need to enforce that here. - */ - - if (req->rq_intr && (req->rq_timedout || req->rq_waiting || - req->rq_wait_ctx)) { - req->rq_status = -EINTR; - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - goto interpret; - } - - if (req->rq_phase == RQ_PHASE_RPC) { - if (req->rq_timedout || req->rq_resend || - req->rq_waiting || req->rq_wait_ctx) { - int status; - - if (!ptlrpc_unregister_reply(req, 1)) { - ptlrpc_unregister_bulk(req, 1); - continue; - } - - spin_lock(&imp->imp_lock); - if (ptlrpc_import_delay_req(imp, req, - &status)) { - /* - * put on delay list - only if we wait - * recovery finished - before send - */ - list_del_init(&req->rq_list); - list_add_tail(&req->rq_list, - &imp->imp_delayed_list); - spin_unlock(&imp->imp_lock); - continue; - } - - if (status != 0) { - req->rq_status = status; - ptlrpc_rqphase_move(req, - RQ_PHASE_INTERPRET); - spin_unlock(&imp->imp_lock); - goto interpret; - } - if (ptlrpc_no_resend(req) && - !req->rq_wait_ctx) { - req->rq_status = -ENOTCONN; - ptlrpc_rqphase_move(req, - RQ_PHASE_INTERPRET); - spin_unlock(&imp->imp_lock); - goto interpret; - } - - list_del_init(&req->rq_list); - list_add_tail(&req->rq_list, - &imp->imp_sending_list); - - spin_unlock(&imp->imp_lock); - - spin_lock(&req->rq_lock); - req->rq_waiting = 0; - spin_unlock(&req->rq_lock); - - if (req->rq_timedout || req->rq_resend) { - /* This is re-sending anyway, let's mark req as resend. */ - spin_lock(&req->rq_lock); - req->rq_resend = 1; - spin_unlock(&req->rq_lock); - if (req->rq_bulk && - !ptlrpc_unregister_bulk(req, 1)) - continue; - } - /* - * rq_wait_ctx is only touched by ptlrpcd, - * so no lock is needed here. - */ - status = sptlrpc_req_refresh_ctx(req, -1); - if (status) { - if (req->rq_err) { - req->rq_status = status; - spin_lock(&req->rq_lock); - req->rq_wait_ctx = 0; - spin_unlock(&req->rq_lock); - force_timer_recalc = 1; - } else { - spin_lock(&req->rq_lock); - req->rq_wait_ctx = 1; - spin_unlock(&req->rq_lock); - } - - continue; - } else { - spin_lock(&req->rq_lock); - req->rq_wait_ctx = 0; - spin_unlock(&req->rq_lock); - } - - rc = ptl_send_rpc(req, 0); - if (rc == -ENOMEM) { - spin_lock(&imp->imp_lock); - if (!list_empty(&req->rq_list)) - list_del_init(&req->rq_list); - spin_unlock(&imp->imp_lock); - ptlrpc_rqphase_move(req, RQ_PHASE_NEW); - continue; - } - if (rc) { - DEBUG_REQ(D_HA, req, - "send failed: rc = %d", rc); - force_timer_recalc = 1; - spin_lock(&req->rq_lock); - req->rq_net_err = 1; - spin_unlock(&req->rq_lock); - continue; - } - /* need to reset the timeout */ - force_timer_recalc = 1; - } - - spin_lock(&req->rq_lock); - - if (ptlrpc_client_early(req)) { - ptlrpc_at_recv_early_reply(req); - spin_unlock(&req->rq_lock); - continue; - } - - /* Still waiting for a reply? */ - if (ptlrpc_client_recv(req)) { - spin_unlock(&req->rq_lock); - continue; - } - - /* Did we actually receive a reply? */ - if (!ptlrpc_client_replied(req)) { - spin_unlock(&req->rq_lock); - continue; - } - - spin_unlock(&req->rq_lock); - - /* - * unlink from net because we are going to - * swab in-place of reply buffer - */ - unregistered = ptlrpc_unregister_reply(req, 1); - if (!unregistered) - continue; - - req->rq_status = after_reply(req); - if (req->rq_resend) - continue; - - /* - * If there is no bulk associated with this request, - * then we're done and should let the interpreter - * process the reply. Similarly if the RPC returned - * an error, and therefore the bulk will never arrive. - */ - if (!req->rq_bulk || req->rq_status < 0) { - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - goto interpret; - } - - ptlrpc_rqphase_move(req, RQ_PHASE_BULK); - } - - LASSERT(req->rq_phase == RQ_PHASE_BULK); - if (ptlrpc_client_bulk_active(req)) - continue; - - if (req->rq_bulk->bd_failure) { - /* - * The RPC reply arrived OK, but the bulk screwed - * up! Dead weird since the server told us the RPC - * was good after getting the REPLY for her GET or - * the ACK for her PUT. - */ - DEBUG_REQ(D_ERROR, req, "bulk transfer failed"); - req->rq_status = -EIO; - } - - ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); - -interpret: - LASSERT(req->rq_phase == RQ_PHASE_INTERPRET); - - /* - * This moves to "unregistering" phase we need to wait for - * reply unlink. - */ - if (!unregistered && !ptlrpc_unregister_reply(req, 1)) { - /* start async bulk unlink too */ - ptlrpc_unregister_bulk(req, 1); - continue; - } - - if (!ptlrpc_unregister_bulk(req, 1)) - continue; - - /* When calling interpret receive should already be finished. */ - LASSERT(!req->rq_receiving_reply); - - ptlrpc_req_interpret(env, req, req->rq_status); - - if (ptlrpcd_check_work(req)) { - atomic_dec(&set->set_remaining); - continue; - } - ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE); - - CDEBUG(req->rq_reqmsg ? D_RPCTRACE : 0, - "Completed RPC pname:cluuid:pid:xid:nid:opc %s:%s:%d:%llu:%s:%d\n", - current->comm, imp->imp_obd->obd_uuid.uuid, - lustre_msg_get_status(req->rq_reqmsg), req->rq_xid, - libcfs_nid2str(imp->imp_connection->c_peer.nid), - lustre_msg_get_opc(req->rq_reqmsg)); - - spin_lock(&imp->imp_lock); - /* - * Request already may be not on sending or delaying list. This - * may happen in the case of marking it erroneous for the case - * ptlrpc_import_delay_req(req, status) find it impossible to - * allow sending this rpc and returns *status != 0. - */ - if (!list_empty(&req->rq_list)) { - list_del_init(&req->rq_list); - atomic_dec(&imp->imp_inflight); - } - list_del_init(&req->rq_unreplied_list); - spin_unlock(&imp->imp_lock); - - atomic_dec(&set->set_remaining); - wake_up_all(&imp->imp_recovery_waitq); - - if (set->set_producer) { - /* produce a new request if possible */ - if (ptlrpc_set_producer(set) > 0) - force_timer_recalc = 1; - - /* - * free the request that has just been completed - * in order not to pollute set->set_requests - */ - list_del_init(&req->rq_set_chain); - spin_lock(&req->rq_lock); - req->rq_set = NULL; - req->rq_invalid_rqset = 0; - spin_unlock(&req->rq_lock); - - /* record rq_status to compute the final status later */ - if (req->rq_status != 0) - set->set_rc = req->rq_status; - ptlrpc_req_finished(req); - } else { - list_move_tail(&req->rq_set_chain, &comp_reqs); - } - } - - /* - * move completed request at the head of list so it's easier for - * caller to find them - */ - list_splice(&comp_reqs, &set->set_requests); - - /* If we hit an error, we want to recover promptly. */ - return atomic_read(&set->set_remaining) == 0 || force_timer_recalc; -} -EXPORT_SYMBOL(ptlrpc_check_set); - -/** - * Time out request \a req. is \a async_unlink is set, that means do not wait - * until LNet actually confirms network buffer unlinking. - * Return 1 if we should give up further retrying attempts or 0 otherwise. - */ -int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) -{ - struct obd_import *imp = req->rq_import; - int rc = 0; - - spin_lock(&req->rq_lock); - req->rq_timedout = 1; - spin_unlock(&req->rq_lock); - - DEBUG_REQ(D_WARNING, req, "Request sent has %s: [sent %lld/real %lld]", - req->rq_net_err ? "failed due to network error" : - ((req->rq_real_sent == 0 || - req->rq_real_sent < req->rq_sent || - req->rq_real_sent >= req->rq_deadline) ? - "timed out for sent delay" : "timed out for slow reply"), - (s64)req->rq_sent, (s64)req->rq_real_sent); - - if (imp && obd_debug_peer_on_timeout) - LNetDebugPeer(imp->imp_connection->c_peer); - - ptlrpc_unregister_reply(req, async_unlink); - ptlrpc_unregister_bulk(req, async_unlink); - - if (obd_dump_on_timeout) - libcfs_debug_dumplog(); - - if (!imp) { - DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?"); - return 1; - } - - atomic_inc(&imp->imp_timeouts); - - /* The DLM server doesn't want recovery run on its imports. */ - if (imp->imp_dlm_fake) - return 1; - - /* - * If this request is for recovery or other primordial tasks, - * then error it out here. - */ - if (req->rq_ctx_init || req->rq_ctx_fini || - req->rq_send_state != LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov) { - DEBUG_REQ(D_RPCTRACE, req, "err -110, sent_state=%s (now=%s)", - ptlrpc_import_state_name(req->rq_send_state), - ptlrpc_import_state_name(imp->imp_state)); - spin_lock(&req->rq_lock); - req->rq_status = -ETIMEDOUT; - req->rq_err = 1; - spin_unlock(&req->rq_lock); - return 1; - } - - /* - * if a request can't be resent we can't wait for an answer after - * the timeout - */ - if (ptlrpc_no_resend(req)) { - DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:"); - rc = 1; - } - - ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg)); - - return rc; -} - -/** - * Time out all uncompleted requests in request set pointed by \a data - * Called when wait_event_idle_timeout times out. - */ -void ptlrpc_expired_set(struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - time64_t now = ktime_get_real_seconds(); - - /* A timeout expired. See which reqs it applies to... */ - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - - /* don't expire request waiting for context */ - if (req->rq_wait_ctx) - continue; - - /* Request in-flight? */ - if (!((req->rq_phase == RQ_PHASE_RPC && - !req->rq_waiting && !req->rq_resend) || - (req->rq_phase == RQ_PHASE_BULK))) - continue; - - if (req->rq_timedout || /* already dealt with */ - req->rq_deadline > now) /* not expired */ - continue; - - /* - * Deal with this guy. Do it asynchronously to not block - * ptlrpcd thread. - */ - ptlrpc_expire_one_request(req, 1); - } -} - -/** - * Sets rq_intr flag in \a req under spinlock. - */ -void ptlrpc_mark_interrupted(struct ptlrpc_request *req) -{ - spin_lock(&req->rq_lock); - req->rq_intr = 1; - spin_unlock(&req->rq_lock); -} -EXPORT_SYMBOL(ptlrpc_mark_interrupted); - -/** - * Interrupts (sets interrupted flag) all uncompleted requests in - * a set \a data. Called when l_wait_event_abortable_timeout receives signal. - */ -static void ptlrpc_interrupted_set(struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - CDEBUG(D_RPCTRACE, "INTERRUPTED SET %p\n", set); - - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - - if (req->rq_phase != RQ_PHASE_RPC && - req->rq_phase != RQ_PHASE_UNREG_RPC) - continue; - - ptlrpc_mark_interrupted(req); - } -} - -/** - * Get the smallest timeout in the set; this does NOT set a timeout. - */ -int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) -{ - time64_t now = ktime_get_real_seconds(); - int timeout = 0; - struct ptlrpc_request *req; - time64_t deadline; - - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - - /* Request in-flight? */ - if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || - (req->rq_phase == RQ_PHASE_BULK) || - (req->rq_phase == RQ_PHASE_NEW))) - continue; - - /* Already timed out. */ - if (req->rq_timedout) - continue; - - /* Waiting for ctx. */ - if (req->rq_wait_ctx) - continue; - - if (req->rq_phase == RQ_PHASE_NEW) - deadline = req->rq_sent; - else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend) - deadline = req->rq_sent; - else - deadline = req->rq_sent + req->rq_timeout; - - if (deadline <= now) /* actually expired already */ - timeout = 1; /* ASAP */ - else if (timeout == 0 || timeout > deadline - now) - timeout = deadline - now; - } - return timeout; -} - -/** - * Send all unset request from the set and then wait until all - * requests in the set complete (either get a reply, timeout, get an - * error or otherwise be interrupted). - * Returns 0 on success or error code otherwise. - */ -int ptlrpc_set_wait(struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - int rc, timeout; - - if (set->set_producer) - (void)ptlrpc_set_producer(set); - else - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - if (req->rq_phase == RQ_PHASE_NEW) - (void)ptlrpc_send_new_req(req); - } - - if (list_empty(&set->set_requests)) - return 0; - - do { - timeout = ptlrpc_set_next_timeout(set); - - /* - * wait until all complete, interrupted, or an in-flight - * req times out - */ - CDEBUG(D_RPCTRACE, "set %p going to sleep for %d seconds\n", - set, timeout); - - if (timeout == 0 && !signal_pending(current)) { - /* - * No requests are in-flight (ether timed out - * or delayed), so we can allow interrupts. - * We still want to block for a limited time, - * so we allow interrupts during the timeout. - */ - rc = l_wait_event_abortable_timeout(set->set_waitq, - ptlrpc_check_set(NULL, set), - HZ); - if (rc == 0) { - rc = -ETIMEDOUT; - ptlrpc_expired_set(set); - } else if (rc < 0) { - rc = -EINTR; - ptlrpc_interrupted_set(set); - } else - rc = 0; - } else { - /* - * At least one request is in flight, so no - * interrupts are allowed. Wait until all - * complete, or an in-flight req times out. - */ - rc = wait_event_idle_timeout(set->set_waitq, - ptlrpc_check_set(NULL, set), - (timeout ? timeout : 1) * HZ); - if (rc == 0) { - ptlrpc_expired_set(set); - rc = -ETIMEDOUT; - /* - * LU-769 - if we ignored the signal - * because it was already pending when - * we started, we need to handle it - * now or we risk it being ignored - * forever - */ - if (l_fatal_signal_pending(current)) - ptlrpc_interrupted_set(set); - } else - rc = 0; - } - - LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); - - /* - * -EINTR => all requests have been flagged rq_intr so next - * check completes. - * -ETIMEDOUT => someone timed out. When all reqs have - * timed out, signals are enabled allowing completion with - * EINTR. - * I don't really care if we go once more round the loop in - * the error cases -eeb. - */ - if (rc == 0 && atomic_read(&set->set_remaining) == 0) { - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - spin_lock(&req->rq_lock); - req->rq_invalid_rqset = 1; - spin_unlock(&req->rq_lock); - } - } - } while (rc != 0 || atomic_read(&set->set_remaining) != 0); - - LASSERT(atomic_read(&set->set_remaining) == 0); - - rc = set->set_rc; /* rq_status of already freed requests if any */ - list_for_each_entry(req, &set->set_requests, rq_set_chain) { - LASSERT(req->rq_phase == RQ_PHASE_COMPLETE); - if (req->rq_status != 0) - rc = req->rq_status; - } - - if (set->set_interpret) { - int (*interpreter)(struct ptlrpc_request_set *set, void *, int) = - set->set_interpret; - rc = interpreter(set, set->set_arg, rc); - } else { - struct ptlrpc_set_cbdata *cbdata, *n; - int err; - - list_for_each_entry_safe(cbdata, n, - &set->set_cblist, psc_item) { - list_del_init(&cbdata->psc_item); - err = cbdata->psc_interpret(set, cbdata->psc_data, rc); - if (err && !rc) - rc = err; - kfree(cbdata); - } - } - - return rc; -} -EXPORT_SYMBOL(ptlrpc_set_wait); - -/** - * Helper function for request freeing. - * Called when request count reached zero and request needs to be freed. - * Removes request from all sorts of sending/replay lists it might be on, - * frees network buffers if any are present. - * If \a locked is set, that means caller is already holding import imp_lock - * and so we no longer need to reobtain it (for certain lists manipulations) - */ -static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) -{ - if (!request) - return; - LASSERT(!request->rq_srv_req); - LASSERT(!request->rq_export); - LASSERTF(!request->rq_receiving_reply, "req %p\n", request); - LASSERTF(list_empty(&request->rq_list), "req %p\n", request); - LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); - LASSERTF(!request->rq_replay, "req %p\n", request); - - req_capsule_fini(&request->rq_pill); - - /* - * We must take it off the imp_replay_list first. Otherwise, we'll set - * request->rq_reqmsg to NULL while osc_close is dereferencing it. - */ - if (request->rq_import) { - if (!locked) - spin_lock(&request->rq_import->imp_lock); - list_del_init(&request->rq_replay_list); - list_del_init(&request->rq_unreplied_list); - if (!locked) - spin_unlock(&request->rq_import->imp_lock); - } - LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request); - - if (atomic_read(&request->rq_refcount) != 0) { - DEBUG_REQ(D_ERROR, request, - "freeing request with nonzero refcount"); - LBUG(); - } - - if (request->rq_repbuf) - sptlrpc_cli_free_repbuf(request); - - if (request->rq_import) { - class_import_put(request->rq_import); - request->rq_import = NULL; - } - if (request->rq_bulk) - ptlrpc_free_bulk(request->rq_bulk); - - if (request->rq_reqbuf || request->rq_clrbuf) - sptlrpc_cli_free_reqbuf(request); - - if (request->rq_cli_ctx) - sptlrpc_req_put_ctx(request, !locked); - - if (request->rq_pool) - __ptlrpc_free_req_to_pool(request); - else - ptlrpc_request_cache_free(request); -} - -/** - * Helper function - * Drops one reference count for request \a request. - * \a locked set indicates that caller holds import imp_lock. - * Frees the request when reference count reaches zero. - */ -static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) -{ - if (!request) - return 1; - - if (request == LP_POISON || - request->rq_reqmsg == LP_POISON) { - CERROR("dereferencing freed request (bug 575)\n"); - LBUG(); - return 1; - } - - DEBUG_REQ(D_INFO, request, "refcount now %u", - atomic_read(&request->rq_refcount) - 1); - - if (atomic_dec_and_test(&request->rq_refcount)) { - __ptlrpc_free_req(request, locked); - return 1; - } - - return 0; -} - -/** - * Drops one reference count for a request. - */ -void ptlrpc_req_finished(struct ptlrpc_request *request) -{ - __ptlrpc_req_finished(request, 0); -} -EXPORT_SYMBOL(ptlrpc_req_finished); - -/** - * Returns xid of a \a request - */ -__u64 ptlrpc_req_xid(struct ptlrpc_request *request) -{ - return request->rq_xid; -} -EXPORT_SYMBOL(ptlrpc_req_xid); - -/** - * Disengage the client's reply buffer from the network - * NB does _NOT_ unregister any client-side bulk. - * IDEMPOTENT, but _not_ safe against concurrent callers. - * The request owner (i.e. the thread doing the I/O) must call... - * Returns 0 on success or 1 if unregistering cannot be made. - */ -static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) -{ - int rc; - wait_queue_head_t *wq; - - /* Might sleep. */ - LASSERT(!in_interrupt()); - - /* Let's setup deadline for reply unlink. */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - async && request->rq_reply_deadline == 0 && cfs_fail_val == 0) - request->rq_reply_deadline = - ktime_get_real_seconds() + LONG_UNLINK; - - /* Nothing left to do. */ - if (!ptlrpc_client_recv_or_unlink(request)) - return 1; - - LNetMDUnlink(request->rq_reply_md_h); - - /* Let's check it once again. */ - if (!ptlrpc_client_recv_or_unlink(request)) - return 1; - - /* Move to "Unregistering" phase as reply was not unlinked yet. */ - ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC); - - /* Do not wait for unlink to finish. */ - if (async) - return 0; - - /* - * We have to wait_event_idle_timeout() whatever the result, to give liblustre - * a chance to run reply_in_callback(), and to make sure we've - * unlinked before returning a req to the pool. - */ - if (request->rq_set) - wq = &request->rq_set->set_waitq; - else - wq = &request->rq_reply_waitq; - - for (;;) { - /* - * Network access will complete in finite time but the HUGE - * timeout lets us CWARN for visibility of sluggish NALs - */ - int cnt = 0; - while (cnt < LONG_UNLINK && - (rc = wait_event_idle_timeout(*wq, - !ptlrpc_client_recv_or_unlink(request), - HZ)) == 0) - cnt += 1; - if (rc > 0) { - ptlrpc_rqphase_move(request, request->rq_next_phase); - return 1; - } - - DEBUG_REQ(D_WARNING, request, - "Unexpectedly long timeout receiving_reply=%d req_unlinked=%d reply_unlinked=%d", - request->rq_receiving_reply, - request->rq_req_unlinked, - request->rq_reply_unlinked); - } - return 0; -} - -static void ptlrpc_free_request(struct ptlrpc_request *req) -{ - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - - if (req->rq_commit_cb) - req->rq_commit_cb(req); - list_del_init(&req->rq_replay_list); - - __ptlrpc_req_finished(req, 1); -} - -/** - * the request is committed and dropped from the replay list of its import - */ -void ptlrpc_request_committed(struct ptlrpc_request *req, int force) -{ - struct obd_import *imp = req->rq_import; - - spin_lock(&imp->imp_lock); - if (list_empty(&req->rq_replay_list)) { - spin_unlock(&imp->imp_lock); - return; - } - - if (force || req->rq_transno <= imp->imp_peer_committed_transno) - ptlrpc_free_request(req); - - spin_unlock(&imp->imp_lock); -} -EXPORT_SYMBOL(ptlrpc_request_committed); - -/** - * Iterates through replay_list on import and prunes - * all requests have transno smaller than last_committed for the - * import and don't have rq_replay set. - * Since requests are sorted in transno order, stops when meeting first - * transno bigger than last_committed. - * caller must hold imp->imp_lock - */ -void ptlrpc_free_committed(struct obd_import *imp) -{ - struct ptlrpc_request *req, *saved; - struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ - bool skip_committed_list = true; - - assert_spin_locked(&imp->imp_lock); - - if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked && - imp->imp_generation == imp->imp_last_generation_checked) { - CDEBUG(D_INFO, "%s: skip recheck: last_committed %llu\n", - imp->imp_obd->obd_name, imp->imp_peer_committed_transno); - return; - } - CDEBUG(D_RPCTRACE, "%s: committing for last_committed %llu gen %d\n", - imp->imp_obd->obd_name, imp->imp_peer_committed_transno, - imp->imp_generation); - - if (imp->imp_generation != imp->imp_last_generation_checked || - !imp->imp_last_transno_checked) - skip_committed_list = false; - - imp->imp_last_transno_checked = imp->imp_peer_committed_transno; - imp->imp_last_generation_checked = imp->imp_generation; - - list_for_each_entry_safe(req, saved, &imp->imp_replay_list, - rq_replay_list) { - /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ - LASSERT(req != last_req); - last_req = req; - - if (req->rq_transno == 0) { - DEBUG_REQ(D_EMERG, req, "zero transno during replay"); - LBUG(); - } - if (req->rq_import_generation < imp->imp_generation) { - DEBUG_REQ(D_RPCTRACE, req, "free request with old gen"); - goto free_req; - } - - /* not yet committed */ - if (req->rq_transno > imp->imp_peer_committed_transno) { - DEBUG_REQ(D_RPCTRACE, req, "stopping search"); - break; - } - - if (req->rq_replay) { - DEBUG_REQ(D_RPCTRACE, req, "keeping (FL_REPLAY)"); - list_move_tail(&req->rq_replay_list, - &imp->imp_committed_list); - continue; - } - - DEBUG_REQ(D_INFO, req, "commit (last_committed %llu)", - imp->imp_peer_committed_transno); -free_req: - ptlrpc_free_request(req); - } - if (skip_committed_list) - return; - - list_for_each_entry_safe(req, saved, &imp->imp_committed_list, - rq_replay_list) { - LASSERT(req->rq_transno != 0); - if (req->rq_import_generation < imp->imp_generation || - !req->rq_replay) { - DEBUG_REQ(D_RPCTRACE, req, "free %s open request", - req->rq_import_generation < - imp->imp_generation ? "stale" : "closed"); - - if (imp->imp_replay_cursor == &req->rq_replay_list) - imp->imp_replay_cursor = - req->rq_replay_list.next; - - ptlrpc_free_request(req); - } - } -} - -/** - * Schedule previously sent request for resend. - * For bulk requests we assign new xid (to avoid problems with - * lost replies and therefore several transfers landing into same buffer - * from different sending attempts). - */ -void ptlrpc_resend_req(struct ptlrpc_request *req) -{ - DEBUG_REQ(D_HA, req, "going to resend"); - spin_lock(&req->rq_lock); - - /* - * Request got reply but linked to the import list still. - * Let ptlrpc_check_set() to process it. - */ - if (ptlrpc_client_replied(req)) { - spin_unlock(&req->rq_lock); - DEBUG_REQ(D_HA, req, "it has reply, so skip it"); - return; - } - - lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 }); - req->rq_status = -EAGAIN; - - req->rq_resend = 1; - req->rq_net_err = 0; - req->rq_timedout = 0; - ptlrpc_client_wake_req(req); - spin_unlock(&req->rq_lock); -} - -/** - * Grab additional reference on a request \a req - */ -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req) -{ - atomic_inc(&req->rq_refcount); - return req; -} -EXPORT_SYMBOL(ptlrpc_request_addref); - -/** - * Add a request to import replay_list. - * Must be called under imp_lock - */ -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp) -{ - struct ptlrpc_request *iter; - assert_spin_locked(&imp->imp_lock); - - if (req->rq_transno == 0) { - DEBUG_REQ(D_EMERG, req, "saving request with zero transno"); - LBUG(); - } - - /* - * clear this for new requests that were resent as well - * as resent replayed requests. - */ - lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT); - - /* don't re-add requests that have been replayed */ - if (!list_empty(&req->rq_replay_list)) - return; - - lustre_msg_add_flags(req->rq_reqmsg, MSG_REPLAY); - - spin_lock(&req->rq_lock); - req->rq_resend = 0; - spin_unlock(&req->rq_lock); - - LASSERT(imp->imp_replayable); - /* Balanced in ptlrpc_free_committed, usually. */ - ptlrpc_request_addref(req); - list_for_each_entry_reverse(iter, &imp->imp_replay_list, rq_replay_list) { - /* - * We may have duplicate transnos if we create and then - * open a file, or for closes retained if to match creating - * opens, so use req->rq_xid as a secondary key. - * (See bugs 684, 685, and 428.) - * XXX no longer needed, but all opens need transnos! - */ - if (iter->rq_transno > req->rq_transno) - continue; - - if (iter->rq_transno == req->rq_transno) { - LASSERT(iter->rq_xid != req->rq_xid); - if (iter->rq_xid > req->rq_xid) - continue; - } - - list_add(&req->rq_replay_list, &iter->rq_replay_list); - return; - } - - list_add(&req->rq_replay_list, &imp->imp_replay_list); -} - -/** - * Send request and wait until it completes. - * Returns request processing status. - */ -int ptlrpc_queue_wait(struct ptlrpc_request *req) -{ - struct ptlrpc_request_set *set; - int rc; - - LASSERT(!req->rq_set); - LASSERT(!req->rq_receiving_reply); - - set = ptlrpc_prep_set(); - if (!set) { - CERROR("cannot allocate ptlrpc set: rc = %d\n", -ENOMEM); - return -ENOMEM; - } - - /* for distributed debugging */ - lustre_msg_set_status(req->rq_reqmsg, current->pid); - - /* add a ref for the set (see comment in ptlrpc_set_add_req) */ - ptlrpc_request_addref(req); - ptlrpc_set_add_req(set, req); - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - - return rc; -} -EXPORT_SYMBOL(ptlrpc_queue_wait); - -/** - * Callback used for replayed requests reply processing. - * In case of successful reply calls registered request replay callback. - * In case of error restart replay process. - */ -static int ptlrpc_replay_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *data, int rc) -{ - struct ptlrpc_replay_async_args *aa = data; - struct obd_import *imp = req->rq_import; - - atomic_dec(&imp->imp_replay_inflight); - - /* - * Note: if it is bulk replay (MDS-MDS replay), then even if - * server got the request, but bulk transfer timeout, let's - * replay the bulk req again - */ - if (!ptlrpc_client_replied(req) || - (req->rq_bulk && - lustre_msg_get_status(req->rq_repmsg) == -ETIMEDOUT)) { - DEBUG_REQ(D_ERROR, req, "request replay timed out.\n"); - rc = -ETIMEDOUT; - goto out; - } - - if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR && - (lustre_msg_get_status(req->rq_repmsg) == -ENOTCONN || - lustre_msg_get_status(req->rq_repmsg) == -ENODEV)) { - rc = lustre_msg_get_status(req->rq_repmsg); - goto out; - } - - /** VBR: check version failure */ - if (lustre_msg_get_status(req->rq_repmsg) == -EOVERFLOW) { - /** replay was failed due to version mismatch */ - DEBUG_REQ(D_WARNING, req, "Version mismatch during replay\n"); - spin_lock(&imp->imp_lock); - imp->imp_vbr_failed = 1; - imp->imp_no_lock_replay = 1; - spin_unlock(&imp->imp_lock); - lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status); - } else { - /** The transno had better not change over replay. */ - LASSERTF(lustre_msg_get_transno(req->rq_reqmsg) == - lustre_msg_get_transno(req->rq_repmsg) || - lustre_msg_get_transno(req->rq_repmsg) == 0, - "%#llx/%#llx\n", - lustre_msg_get_transno(req->rq_reqmsg), - lustre_msg_get_transno(req->rq_repmsg)); - } - - spin_lock(&imp->imp_lock); - /** if replays by version then gap occur on server, no trust to locks */ - if (lustre_msg_get_flags(req->rq_repmsg) & MSG_VERSION_REPLAY) - imp->imp_no_lock_replay = 1; - imp->imp_last_replay_transno = lustre_msg_get_transno(req->rq_reqmsg); - spin_unlock(&imp->imp_lock); - LASSERT(imp->imp_last_replay_transno); - - /* transaction number shouldn't be bigger than the latest replayed */ - if (req->rq_transno > lustre_msg_get_transno(req->rq_reqmsg)) { - DEBUG_REQ(D_ERROR, req, - "Reported transno %llu is bigger than the replayed one: %llu", - req->rq_transno, - lustre_msg_get_transno(req->rq_reqmsg)); - rc = -EINVAL; - goto out; - } - - DEBUG_REQ(D_HA, req, "got rep"); - - /* let the callback do fixups, possibly including in the request */ - if (req->rq_replay_cb) - req->rq_replay_cb(req); - - if (ptlrpc_client_replied(req) && - lustre_msg_get_status(req->rq_repmsg) != aa->praa_old_status) { - DEBUG_REQ(D_ERROR, req, "status %d, old was %d", - lustre_msg_get_status(req->rq_repmsg), - aa->praa_old_status); - } else { - /* Put it back for re-replay. */ - lustre_msg_set_status(req->rq_repmsg, aa->praa_old_status); - } - - /* - * Errors while replay can set transno to 0, but - * imp_last_replay_transno shouldn't be set to 0 anyway - */ - if (req->rq_transno == 0) - CERROR("Transno is 0 during replay!\n"); - - /* continue with recovery */ - rc = ptlrpc_import_recovery_state_machine(imp); - out: - req->rq_send_state = aa->praa_old_state; - - if (rc != 0) - /* this replay failed, so restart recovery */ - ptlrpc_connect_import(imp); - - return rc; -} - -/** - * Prepares and queues request for replay. - * Adds it to ptlrpcd queue for actual sending. - * Returns 0 on success. - */ -int ptlrpc_replay_req(struct ptlrpc_request *req) -{ - struct ptlrpc_replay_async_args *aa; - - LASSERT(req->rq_import->imp_state == LUSTRE_IMP_REPLAY); - - LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); - aa = ptlrpc_req_async_args(req); - memset(aa, 0, sizeof(*aa)); - - /* Prepare request to be resent with ptlrpcd */ - aa->praa_old_state = req->rq_send_state; - req->rq_send_state = LUSTRE_IMP_REPLAY; - req->rq_phase = RQ_PHASE_NEW; - req->rq_next_phase = RQ_PHASE_UNDEFINED; - if (req->rq_repmsg) - aa->praa_old_status = lustre_msg_get_status(req->rq_repmsg); - req->rq_status = 0; - req->rq_interpret_reply = ptlrpc_replay_interpret; - /* Readjust the timeout for current conditions */ - ptlrpc_at_set_req_timeout(req); - - /* - * Tell server the net_latency, so the server can calculate how long - * it should wait for next replay - */ - lustre_msg_set_service_time(req->rq_reqmsg, - ptlrpc_at_get_net_latency(req)); - DEBUG_REQ(D_HA, req, "REPLAY"); - - atomic_inc(&req->rq_import->imp_replay_inflight); - ptlrpc_request_addref(req); /* ptlrpcd needs a ref */ - - ptlrpcd_add_req(req); - return 0; -} - -/** - * Aborts all in-flight request on import \a imp sending and delayed lists - */ -void ptlrpc_abort_inflight(struct obd_import *imp) -{ - struct ptlrpc_request *req, *n; - - /* - * Make sure that no new requests get processed for this import. - * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing - * this flag and then putting requests on sending_list or delayed_list. - */ - spin_lock(&imp->imp_lock); - - /* - * XXX locking? Maybe we should remove each request with the list - * locked? Also, how do we know if the requests on the list are - * being freed at this time? - */ - list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list) { - DEBUG_REQ(D_RPCTRACE, req, "inflight"); - - spin_lock(&req->rq_lock); - if (req->rq_import_generation < imp->imp_generation) { - req->rq_err = 1; - req->rq_status = -EIO; - ptlrpc_client_wake_req(req); - } - spin_unlock(&req->rq_lock); - } - - list_for_each_entry_safe(req, n, &imp->imp_delayed_list, rq_list) { - DEBUG_REQ(D_RPCTRACE, req, "aborting waiting req"); - - spin_lock(&req->rq_lock); - if (req->rq_import_generation < imp->imp_generation) { - req->rq_err = 1; - req->rq_status = -EIO; - ptlrpc_client_wake_req(req); - } - spin_unlock(&req->rq_lock); - } - - /* - * Last chance to free reqs left on the replay list, but we - * will still leak reqs that haven't committed. - */ - if (imp->imp_replayable) - ptlrpc_free_committed(imp); - - spin_unlock(&imp->imp_lock); -} - -/** - * Abort all uncompleted requests in request set \a set - */ -void ptlrpc_abort_set(struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req, *tmp; - - list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) { - spin_lock(&req->rq_lock); - if (req->rq_phase != RQ_PHASE_RPC) { - spin_unlock(&req->rq_lock); - continue; - } - - req->rq_err = 1; - req->rq_status = -EINTR; - ptlrpc_client_wake_req(req); - spin_unlock(&req->rq_lock); - } -} - -static __u64 ptlrpc_last_xid; -static spinlock_t ptlrpc_last_xid_lock; - -/** - * Initialize the XID for the node. This is common among all requests on - * this node, and only requires the property that it is monotonically - * increasing. It does not need to be sequential. Since this is also used - * as the RDMA match bits, it is important that a single client NOT have - * the same match bits for two different in-flight requests, hence we do - * NOT want to have an XID per target or similar. - * - * To avoid an unlikely collision between match bits after a client reboot - * (which would deliver old data into the wrong RDMA buffer) initialize - * the XID based on the current time, assuming a maximum RPC rate of 1M RPC/s. - * If the time is clearly incorrect, we instead use a 62-bit random number. - * In the worst case the random number will overflow 1M RPCs per second in - * 9133 years, or permutations thereof. - */ -#define YEAR_2004 (1ULL << 30) -void ptlrpc_init_xid(void) -{ - time64_t now = ktime_get_real_seconds(); - - spin_lock_init(&ptlrpc_last_xid_lock); - if (now < YEAR_2004) { - get_random_bytes(&ptlrpc_last_xid, sizeof(ptlrpc_last_xid)); - ptlrpc_last_xid >>= 2; - ptlrpc_last_xid |= (1ULL << 61); - } else { - ptlrpc_last_xid = (__u64)now << 20; - } - - /* Always need to be aligned to a power-of-two for multi-bulk BRW */ - BUILD_BUG_ON(((PTLRPC_BULK_OPS_COUNT - 1) & PTLRPC_BULK_OPS_COUNT) != 0); - ptlrpc_last_xid &= PTLRPC_BULK_OPS_MASK; -} - -/** - * Increase xid and returns resulting new value to the caller. - * - * Multi-bulk BRW RPCs consume multiple XIDs for each bulk transfer, starting - * at the returned xid, up to xid + PTLRPC_BULK_OPS_COUNT - 1. The BRW RPC - * itself uses the last bulk xid needed, so the server can determine the - * the number of bulk transfers from the RPC XID and a bitmask. The starting - * xid must align to a power-of-two value. - * - * This is assumed to be true due to the initial ptlrpc_last_xid - * value also being initialized to a power-of-two value. LU-1431 - */ -__u64 ptlrpc_next_xid(void) -{ - __u64 next; - - spin_lock(&ptlrpc_last_xid_lock); - next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; - ptlrpc_last_xid = next; - spin_unlock(&ptlrpc_last_xid_lock); - - return next; -} - -/** - * If request has a new allocated XID (new request or EINPROGRESS resend), - * use this XID as matchbits of bulk, otherwise allocate a new matchbits for - * request to ensure previous bulk fails and avoid problems with lost replies - * and therefore several transfers landing into the same buffer from different - * sending attempts. - */ -void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *bd = req->rq_bulk; - - LASSERT(bd); - - /* - * Generate new matchbits for all resend requests, including - * resend replay. - */ - if (req->rq_resend) { - u64 old_mbits = req->rq_mbits; - - /* - * First time resend on -EINPROGRESS will generate new xid, - * so we can actually use the rq_xid as rq_mbits in such case, - * however, it's bit hard to distinguish such resend with a - * 'resend for the -EINPROGRESS resend'. To make it simple, - * we opt to generate mbits for all resend cases. - */ - if ((bd->bd_import->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_BULK_MBITS)) { - req->rq_mbits = ptlrpc_next_xid(); - } else { - /* old version transfers rq_xid to peer as matchbits */ - spin_lock(&req->rq_import->imp_lock); - list_del_init(&req->rq_unreplied_list); - ptlrpc_assign_next_xid_nolock(req); - spin_unlock(&req->rq_import->imp_lock); - req->rq_mbits = req->rq_xid; - } - - CDEBUG(D_HA, "resend bulk old x%llu new x%llu\n", - old_mbits, req->rq_mbits); - } else if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) { - /* Request being sent first time, use xid as matchbits. */ - req->rq_mbits = req->rq_xid; - } else { - /* - * Replay request, xid and matchbits have already been - * correctly assigned. - */ - return; - } - - /* - * For multi-bulk RPCs, rq_mbits is the last mbits needed for bulks so - * that server can infer the number of bulks that were prepared, - * see LU-1431 - */ - req->rq_mbits += DIV_ROUND_UP(bd->bd_iov_count, LNET_MAX_IOV) - 1; -} - -/** - * Get a glimpse at what next xid value might have been. - * Returns possible next xid. - */ -__u64 ptlrpc_sample_next_xid(void) -{ -#if BITS_PER_LONG == 32 - /* need to avoid possible word tearing on 32-bit systems */ - __u64 next; - - spin_lock(&ptlrpc_last_xid_lock); - next = ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; - spin_unlock(&ptlrpc_last_xid_lock); - - return next; -#else - /* No need to lock, since returned value is racy anyways */ - return ptlrpc_last_xid + PTLRPC_BULK_OPS_COUNT; -#endif -} -EXPORT_SYMBOL(ptlrpc_sample_next_xid); - -/** - * Functions for operating ptlrpc workers. - * - * A ptlrpc work is a function which will be running inside ptlrpc context. - * The callback shouldn't sleep otherwise it will block that ptlrpcd thread. - * - * 1. after a work is created, it can be used many times, that is: - * handler = ptlrpcd_alloc_work(); - * ptlrpcd_queue_work(); - * - * queue it again when necessary: - * ptlrpcd_queue_work(); - * ptlrpcd_destroy_work(); - * 2. ptlrpcd_queue_work() can be called by multiple processes meanwhile, but - * it will only be queued once in any time. Also as its name implies, it may - * have delay before it really runs by ptlrpcd thread. - */ -struct ptlrpc_work_async_args { - int (*cb)(const struct lu_env *, void *); - void *cbdata; -}; - -static void ptlrpcd_add_work_req(struct ptlrpc_request *req) -{ - /* re-initialize the req */ - req->rq_timeout = obd_timeout; - req->rq_sent = ktime_get_real_seconds(); - req->rq_deadline = req->rq_sent + req->rq_timeout; - req->rq_phase = RQ_PHASE_INTERPRET; - req->rq_next_phase = RQ_PHASE_COMPLETE; - req->rq_xid = ptlrpc_next_xid(); - req->rq_import_generation = req->rq_import->imp_generation; - - ptlrpcd_add_req(req); -} - -static int work_interpreter(const struct lu_env *env, - struct ptlrpc_request *req, void *data, int rc) -{ - struct ptlrpc_work_async_args *arg = data; - - LASSERT(ptlrpcd_check_work(req)); - - rc = arg->cb(env, arg->cbdata); - - list_del_init(&req->rq_set_chain); - req->rq_set = NULL; - - if (atomic_dec_return(&req->rq_refcount) > 1) { - atomic_set(&req->rq_refcount, 2); - ptlrpcd_add_work_req(req); - } - return rc; -} - -static int worker_format; - -static int ptlrpcd_check_work(struct ptlrpc_request *req) -{ - return req->rq_pill.rc_fmt == (void *)&worker_format; -} - -/** - * Create a work for ptlrpc. - */ -void *ptlrpcd_alloc_work(struct obd_import *imp, - int (*cb)(const struct lu_env *, void *), void *cbdata) -{ - struct ptlrpc_request *req = NULL; - struct ptlrpc_work_async_args *args; - - might_sleep(); - - if (!cb) - return ERR_PTR(-EINVAL); - - /* copy some code from deprecated fakereq. */ - req = ptlrpc_request_cache_alloc(GFP_NOFS); - if (!req) { - CERROR("ptlrpc: run out of memory!\n"); - return ERR_PTR(-ENOMEM); - } - - ptlrpc_cli_req_init(req); - - req->rq_send_state = LUSTRE_IMP_FULL; - req->rq_type = PTL_RPC_MSG_REQUEST; - req->rq_import = class_import_get(imp); - req->rq_interpret_reply = work_interpreter; - /* don't want reply */ - req->rq_no_delay = 1; - req->rq_no_resend = 1; - req->rq_pill.rc_fmt = (void *)&worker_format; - - BUILD_BUG_ON(sizeof(*args) > sizeof(req->rq_async_args)); - args = ptlrpc_req_async_args(req); - args->cb = cb; - args->cbdata = cbdata; - - return req; -} -EXPORT_SYMBOL(ptlrpcd_alloc_work); - -void ptlrpcd_destroy_work(void *handler) -{ - struct ptlrpc_request *req = handler; - - if (req) - ptlrpc_req_finished(req); -} -EXPORT_SYMBOL(ptlrpcd_destroy_work); - -int ptlrpcd_queue_work(void *handler) -{ - struct ptlrpc_request *req = handler; - - /* - * Check if the req is already being queued. - * - * Here comes a trick: it lacks a way of checking if a req is being - * processed reliably in ptlrpc. Here I have to use refcount of req - * for this purpose. This is okay because the caller should use this - * req as opaque data. - Jinshan - */ - LASSERT(atomic_read(&req->rq_refcount) > 0); - if (atomic_inc_return(&req->rq_refcount) == 2) - ptlrpcd_add_work_req(req); - return 0; -} -EXPORT_SYMBOL(ptlrpcd_queue_work); diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c deleted file mode 100644 index fb35a89ca6c6..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/connection.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC -#include -#include -#include - -#include "ptlrpc_internal.h" - -static struct rhashtable conn_hash; - -/* - * struct lnet_process_id may contain unassigned bytes which might not - * be zero, so we cannot just hash and compare bytes. - */ - -static u32 lnet_process_id_hash(const void *data, u32 len, u32 seed) -{ - const struct lnet_process_id *lpi = data; - - seed = hash_32(seed ^ lpi->pid, 32); - seed ^= hash_64(lpi->nid, 32); - return seed; -} - -static int lnet_process_id_cmp(struct rhashtable_compare_arg *arg, - const void *obj) -{ - const struct lnet_process_id *lpi = arg->key; - const struct ptlrpc_connection *con = obj; - - if (lpi->nid == con->c_peer.nid && - lpi->pid == con->c_peer.pid) - return 0; - return -ESRCH; -} - -static const struct rhashtable_params conn_hash_params = { - .key_len = 1, /* actually variable-length */ - .key_offset = offsetof(struct ptlrpc_connection, c_peer), - .head_offset = offsetof(struct ptlrpc_connection, c_hash), - .hashfn = lnet_process_id_hash, - .obj_cmpfn = lnet_process_id_cmp, -}; - -struct ptlrpc_connection * -ptlrpc_connection_get(struct lnet_process_id peer, lnet_nid_t self, - struct obd_uuid *uuid) -{ - struct ptlrpc_connection *conn, *conn2; - - conn = rhashtable_lookup_fast(&conn_hash, &peer, conn_hash_params); - if (conn) { - ptlrpc_connection_addref(conn); - goto out; - } - - conn = kzalloc(sizeof(*conn), GFP_NOFS); - if (!conn) - return NULL; - - conn->c_peer = peer; - conn->c_self = self; - atomic_set(&conn->c_refcount, 1); - if (uuid) - obd_str2uuid(&conn->c_remote_uuid, uuid->uuid); - - /* - * Add the newly created conn to the hash, on key collision we - * lost a racing addition and must destroy our newly allocated - * connection. The object which exists in the hash will be - * returned, otherwise NULL is returned on success. - */ - conn2 = rhashtable_lookup_get_insert_fast(&conn_hash, &conn->c_hash, - conn_hash_params); - if (conn2 != NULL) { - /* insertion failed */ - kfree(conn); - if (IS_ERR(conn2)) - return NULL; - conn = conn2; - ptlrpc_connection_addref(conn); - } -out: - CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", - conn, atomic_read(&conn->c_refcount), - libcfs_nid2str(conn->c_peer.nid)); - return conn; -} - -int ptlrpc_connection_put(struct ptlrpc_connection *conn) -{ - int rc = 0; - - if (!conn) - return rc; - - LASSERT(atomic_read(&conn->c_refcount) > 0); - - /* - * We do not remove connection from hashtable and - * do not free it even if last caller released ref, - * as we want to have it cached for the case it is - * needed again. - * - * Deallocating it and later creating new connection - * again would be wastful. This way we also avoid - * expensive locking to protect things from get/put - * race when found cached connection is freed by - * ptlrpc_connection_put(). - * - * It will be freed later in module unload time, - * when ptlrpc_connection_fini()->lh_exit->conn_exit() - * path is called. - */ - if (atomic_dec_return(&conn->c_refcount) == 0) - rc = 1; - - CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n", - conn, atomic_read(&conn->c_refcount), - libcfs_nid2str(conn->c_peer.nid)); - - return rc; -} - -struct ptlrpc_connection * -ptlrpc_connection_addref(struct ptlrpc_connection *conn) -{ - atomic_inc(&conn->c_refcount); - CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", - conn, atomic_read(&conn->c_refcount), - libcfs_nid2str(conn->c_peer.nid)); - - return conn; -} - -static void -conn_exit(void *vconn, void *data) -{ - struct ptlrpc_connection *conn = vconn; - - /* - * Nothing should be left. Connection user put it and - * connection also was deleted from table by this time - * so we should have 0 refs. - */ - LASSERTF(atomic_read(&conn->c_refcount) == 0, - "Busy connection with %d refs\n", - atomic_read(&conn->c_refcount)); - kfree(conn); -} - -int ptlrpc_connection_init(void) -{ - return rhashtable_init(&conn_hash, &conn_hash_params); -} - -void ptlrpc_connection_fini(void) -{ - rhashtable_free_and_destroy(&conn_hash, conn_exit, NULL); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/errno.c b/drivers/staging/lustre/lustre/ptlrpc/errno.c deleted file mode 100644 index b904524fc1c6..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/errno.c +++ /dev/null @@ -1,383 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.txt - * - * GPL HEADER END - */ -/* - * Copyright (C) 2011 FUJITSU LIMITED. All rights reserved. - * - * Copyright (c) 2013, Intel Corporation. - */ - -#include -#include -#include -#include - -/* - * The two translation tables below must define a one-to-one mapping between - * host and network errnos. - * - * EWOULDBLOCK is equal to EAGAIN on all architectures except for parisc, which - * appears irrelevant. Thus, existing references to EWOULDBLOCK are fine. - * - * EDEADLOCK is equal to EDEADLK on x86 but not on sparc, at least. A sparc - * host has no context-free way to determine if a LUSTRE_EDEADLK represents an - * EDEADLK or an EDEADLOCK. Therefore, all existing references to EDEADLOCK - * that need to be transferred on wire have been replaced with EDEADLK. - */ -static int lustre_errno_hton_mapping[] = { - [EPERM] = LUSTRE_EPERM, - [ENOENT] = LUSTRE_ENOENT, - [ESRCH] = LUSTRE_ESRCH, - [EINTR] = LUSTRE_EINTR, - [EIO] = LUSTRE_EIO, - [ENXIO] = LUSTRE_ENXIO, - [E2BIG] = LUSTRE_E2BIG, - [ENOEXEC] = LUSTRE_ENOEXEC, - [EBADF] = LUSTRE_EBADF, - [ECHILD] = LUSTRE_ECHILD, - [EAGAIN] = LUSTRE_EAGAIN, - [ENOMEM] = LUSTRE_ENOMEM, - [EACCES] = LUSTRE_EACCES, - [EFAULT] = LUSTRE_EFAULT, - [ENOTBLK] = LUSTRE_ENOTBLK, - [EBUSY] = LUSTRE_EBUSY, - [EEXIST] = LUSTRE_EEXIST, - [EXDEV] = LUSTRE_EXDEV, - [ENODEV] = LUSTRE_ENODEV, - [ENOTDIR] = LUSTRE_ENOTDIR, - [EISDIR] = LUSTRE_EISDIR, - [EINVAL] = LUSTRE_EINVAL, - [ENFILE] = LUSTRE_ENFILE, - [EMFILE] = LUSTRE_EMFILE, - [ENOTTY] = LUSTRE_ENOTTY, - [ETXTBSY] = LUSTRE_ETXTBSY, - [EFBIG] = LUSTRE_EFBIG, - [ENOSPC] = LUSTRE_ENOSPC, - [ESPIPE] = LUSTRE_ESPIPE, - [EROFS] = LUSTRE_EROFS, - [EMLINK] = LUSTRE_EMLINK, - [EPIPE] = LUSTRE_EPIPE, - [EDOM] = LUSTRE_EDOM, - [ERANGE] = LUSTRE_ERANGE, - [EDEADLK] = LUSTRE_EDEADLK, - [ENAMETOOLONG] = LUSTRE_ENAMETOOLONG, - [ENOLCK] = LUSTRE_ENOLCK, - [ENOSYS] = LUSTRE_ENOSYS, - [ENOTEMPTY] = LUSTRE_ENOTEMPTY, - [ELOOP] = LUSTRE_ELOOP, - [ENOMSG] = LUSTRE_ENOMSG, - [EIDRM] = LUSTRE_EIDRM, - [ECHRNG] = LUSTRE_ECHRNG, - [EL2NSYNC] = LUSTRE_EL2NSYNC, - [EL3HLT] = LUSTRE_EL3HLT, - [EL3RST] = LUSTRE_EL3RST, - [ELNRNG] = LUSTRE_ELNRNG, - [EUNATCH] = LUSTRE_EUNATCH, - [ENOCSI] = LUSTRE_ENOCSI, - [EL2HLT] = LUSTRE_EL2HLT, - [EBADE] = LUSTRE_EBADE, - [EBADR] = LUSTRE_EBADR, - [EXFULL] = LUSTRE_EXFULL, - [ENOANO] = LUSTRE_ENOANO, - [EBADRQC] = LUSTRE_EBADRQC, - [EBADSLT] = LUSTRE_EBADSLT, - [EBFONT] = LUSTRE_EBFONT, - [ENOSTR] = LUSTRE_ENOSTR, - [ENODATA] = LUSTRE_ENODATA, - [ETIME] = LUSTRE_ETIME, - [ENOSR] = LUSTRE_ENOSR, - [ENONET] = LUSTRE_ENONET, - [ENOPKG] = LUSTRE_ENOPKG, - [EREMOTE] = LUSTRE_EREMOTE, - [ENOLINK] = LUSTRE_ENOLINK, - [EADV] = LUSTRE_EADV, - [ESRMNT] = LUSTRE_ESRMNT, - [ECOMM] = LUSTRE_ECOMM, - [EPROTO] = LUSTRE_EPROTO, - [EMULTIHOP] = LUSTRE_EMULTIHOP, - [EDOTDOT] = LUSTRE_EDOTDOT, - [EBADMSG] = LUSTRE_EBADMSG, - [EOVERFLOW] = LUSTRE_EOVERFLOW, - [ENOTUNIQ] = LUSTRE_ENOTUNIQ, - [EBADFD] = LUSTRE_EBADFD, - [EREMCHG] = LUSTRE_EREMCHG, - [ELIBACC] = LUSTRE_ELIBACC, - [ELIBBAD] = LUSTRE_ELIBBAD, - [ELIBSCN] = LUSTRE_ELIBSCN, - [ELIBMAX] = LUSTRE_ELIBMAX, - [ELIBEXEC] = LUSTRE_ELIBEXEC, - [EILSEQ] = LUSTRE_EILSEQ, - [ERESTART] = LUSTRE_ERESTART, - [ESTRPIPE] = LUSTRE_ESTRPIPE, - [EUSERS] = LUSTRE_EUSERS, - [ENOTSOCK] = LUSTRE_ENOTSOCK, - [EDESTADDRREQ] = LUSTRE_EDESTADDRREQ, - [EMSGSIZE] = LUSTRE_EMSGSIZE, - [EPROTOTYPE] = LUSTRE_EPROTOTYPE, - [ENOPROTOOPT] = LUSTRE_ENOPROTOOPT, - [EPROTONOSUPPORT] = LUSTRE_EPROTONOSUPPORT, - [ESOCKTNOSUPPORT] = LUSTRE_ESOCKTNOSUPPORT, - [EOPNOTSUPP] = LUSTRE_EOPNOTSUPP, - [EPFNOSUPPORT] = LUSTRE_EPFNOSUPPORT, - [EAFNOSUPPORT] = LUSTRE_EAFNOSUPPORT, - [EADDRINUSE] = LUSTRE_EADDRINUSE, - [EADDRNOTAVAIL] = LUSTRE_EADDRNOTAVAIL, - [ENETDOWN] = LUSTRE_ENETDOWN, - [ENETUNREACH] = LUSTRE_ENETUNREACH, - [ENETRESET] = LUSTRE_ENETRESET, - [ECONNABORTED] = LUSTRE_ECONNABORTED, - [ECONNRESET] = LUSTRE_ECONNRESET, - [ENOBUFS] = LUSTRE_ENOBUFS, - [EISCONN] = LUSTRE_EISCONN, - [ENOTCONN] = LUSTRE_ENOTCONN, - [ESHUTDOWN] = LUSTRE_ESHUTDOWN, - [ETOOMANYREFS] = LUSTRE_ETOOMANYREFS, - [ETIMEDOUT] = LUSTRE_ETIMEDOUT, - [ECONNREFUSED] = LUSTRE_ECONNREFUSED, - [EHOSTDOWN] = LUSTRE_EHOSTDOWN, - [EHOSTUNREACH] = LUSTRE_EHOSTUNREACH, - [EALREADY] = LUSTRE_EALREADY, - [EINPROGRESS] = LUSTRE_EINPROGRESS, - [ESTALE] = LUSTRE_ESTALE, - [EUCLEAN] = LUSTRE_EUCLEAN, - [ENOTNAM] = LUSTRE_ENOTNAM, - [ENAVAIL] = LUSTRE_ENAVAIL, - [EISNAM] = LUSTRE_EISNAM, - [EREMOTEIO] = LUSTRE_EREMOTEIO, - [EDQUOT] = LUSTRE_EDQUOT, - [ENOMEDIUM] = LUSTRE_ENOMEDIUM, - [EMEDIUMTYPE] = LUSTRE_EMEDIUMTYPE, - [ECANCELED] = LUSTRE_ECANCELED, - [ENOKEY] = LUSTRE_ENOKEY, - [EKEYEXPIRED] = LUSTRE_EKEYEXPIRED, - [EKEYREVOKED] = LUSTRE_EKEYREVOKED, - [EKEYREJECTED] = LUSTRE_EKEYREJECTED, - [EOWNERDEAD] = LUSTRE_EOWNERDEAD, - [ENOTRECOVERABLE] = LUSTRE_ENOTRECOVERABLE, - [ERESTARTSYS] = LUSTRE_ERESTARTSYS, - [ERESTARTNOINTR] = LUSTRE_ERESTARTNOINTR, - [ERESTARTNOHAND] = LUSTRE_ERESTARTNOHAND, - [ENOIOCTLCMD] = LUSTRE_ENOIOCTLCMD, - [ERESTART_RESTARTBLOCK] = LUSTRE_ERESTART_RESTARTBLOCK, - [EBADHANDLE] = LUSTRE_EBADHANDLE, - [ENOTSYNC] = LUSTRE_ENOTSYNC, - [EBADCOOKIE] = LUSTRE_EBADCOOKIE, - [ENOTSUPP] = LUSTRE_ENOTSUPP, - [ETOOSMALL] = LUSTRE_ETOOSMALL, - [ESERVERFAULT] = LUSTRE_ESERVERFAULT, - [EBADTYPE] = LUSTRE_EBADTYPE, - [EJUKEBOX] = LUSTRE_EJUKEBOX, - [EIOCBQUEUED] = LUSTRE_EIOCBQUEUED, -}; - -static int lustre_errno_ntoh_mapping[] = { - [LUSTRE_EPERM] = EPERM, - [LUSTRE_ENOENT] = ENOENT, - [LUSTRE_ESRCH] = ESRCH, - [LUSTRE_EINTR] = EINTR, - [LUSTRE_EIO] = EIO, - [LUSTRE_ENXIO] = ENXIO, - [LUSTRE_E2BIG] = E2BIG, - [LUSTRE_ENOEXEC] = ENOEXEC, - [LUSTRE_EBADF] = EBADF, - [LUSTRE_ECHILD] = ECHILD, - [LUSTRE_EAGAIN] = EAGAIN, - [LUSTRE_ENOMEM] = ENOMEM, - [LUSTRE_EACCES] = EACCES, - [LUSTRE_EFAULT] = EFAULT, - [LUSTRE_ENOTBLK] = ENOTBLK, - [LUSTRE_EBUSY] = EBUSY, - [LUSTRE_EEXIST] = EEXIST, - [LUSTRE_EXDEV] = EXDEV, - [LUSTRE_ENODEV] = ENODEV, - [LUSTRE_ENOTDIR] = ENOTDIR, - [LUSTRE_EISDIR] = EISDIR, - [LUSTRE_EINVAL] = EINVAL, - [LUSTRE_ENFILE] = ENFILE, - [LUSTRE_EMFILE] = EMFILE, - [LUSTRE_ENOTTY] = ENOTTY, - [LUSTRE_ETXTBSY] = ETXTBSY, - [LUSTRE_EFBIG] = EFBIG, - [LUSTRE_ENOSPC] = ENOSPC, - [LUSTRE_ESPIPE] = ESPIPE, - [LUSTRE_EROFS] = EROFS, - [LUSTRE_EMLINK] = EMLINK, - [LUSTRE_EPIPE] = EPIPE, - [LUSTRE_EDOM] = EDOM, - [LUSTRE_ERANGE] = ERANGE, - [LUSTRE_EDEADLK] = EDEADLK, - [LUSTRE_ENAMETOOLONG] = ENAMETOOLONG, - [LUSTRE_ENOLCK] = ENOLCK, - [LUSTRE_ENOSYS] = ENOSYS, - [LUSTRE_ENOTEMPTY] = ENOTEMPTY, - [LUSTRE_ELOOP] = ELOOP, - [LUSTRE_ENOMSG] = ENOMSG, - [LUSTRE_EIDRM] = EIDRM, - [LUSTRE_ECHRNG] = ECHRNG, - [LUSTRE_EL2NSYNC] = EL2NSYNC, - [LUSTRE_EL3HLT] = EL3HLT, - [LUSTRE_EL3RST] = EL3RST, - [LUSTRE_ELNRNG] = ELNRNG, - [LUSTRE_EUNATCH] = EUNATCH, - [LUSTRE_ENOCSI] = ENOCSI, - [LUSTRE_EL2HLT] = EL2HLT, - [LUSTRE_EBADE] = EBADE, - [LUSTRE_EBADR] = EBADR, - [LUSTRE_EXFULL] = EXFULL, - [LUSTRE_ENOANO] = ENOANO, - [LUSTRE_EBADRQC] = EBADRQC, - [LUSTRE_EBADSLT] = EBADSLT, - [LUSTRE_EBFONT] = EBFONT, - [LUSTRE_ENOSTR] = ENOSTR, - [LUSTRE_ENODATA] = ENODATA, - [LUSTRE_ETIME] = ETIME, - [LUSTRE_ENOSR] = ENOSR, - [LUSTRE_ENONET] = ENONET, - [LUSTRE_ENOPKG] = ENOPKG, - [LUSTRE_EREMOTE] = EREMOTE, - [LUSTRE_ENOLINK] = ENOLINK, - [LUSTRE_EADV] = EADV, - [LUSTRE_ESRMNT] = ESRMNT, - [LUSTRE_ECOMM] = ECOMM, - [LUSTRE_EPROTO] = EPROTO, - [LUSTRE_EMULTIHOP] = EMULTIHOP, - [LUSTRE_EDOTDOT] = EDOTDOT, - [LUSTRE_EBADMSG] = EBADMSG, - [LUSTRE_EOVERFLOW] = EOVERFLOW, - [LUSTRE_ENOTUNIQ] = ENOTUNIQ, - [LUSTRE_EBADFD] = EBADFD, - [LUSTRE_EREMCHG] = EREMCHG, - [LUSTRE_ELIBACC] = ELIBACC, - [LUSTRE_ELIBBAD] = ELIBBAD, - [LUSTRE_ELIBSCN] = ELIBSCN, - [LUSTRE_ELIBMAX] = ELIBMAX, - [LUSTRE_ELIBEXEC] = ELIBEXEC, - [LUSTRE_EILSEQ] = EILSEQ, - [LUSTRE_ERESTART] = ERESTART, - [LUSTRE_ESTRPIPE] = ESTRPIPE, - [LUSTRE_EUSERS] = EUSERS, - [LUSTRE_ENOTSOCK] = ENOTSOCK, - [LUSTRE_EDESTADDRREQ] = EDESTADDRREQ, - [LUSTRE_EMSGSIZE] = EMSGSIZE, - [LUSTRE_EPROTOTYPE] = EPROTOTYPE, - [LUSTRE_ENOPROTOOPT] = ENOPROTOOPT, - [LUSTRE_EPROTONOSUPPORT] = EPROTONOSUPPORT, - [LUSTRE_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, - [LUSTRE_EOPNOTSUPP] = EOPNOTSUPP, - [LUSTRE_EPFNOSUPPORT] = EPFNOSUPPORT, - [LUSTRE_EAFNOSUPPORT] = EAFNOSUPPORT, - [LUSTRE_EADDRINUSE] = EADDRINUSE, - [LUSTRE_EADDRNOTAVAIL] = EADDRNOTAVAIL, - [LUSTRE_ENETDOWN] = ENETDOWN, - [LUSTRE_ENETUNREACH] = ENETUNREACH, - [LUSTRE_ENETRESET] = ENETRESET, - [LUSTRE_ECONNABORTED] = ECONNABORTED, - [LUSTRE_ECONNRESET] = ECONNRESET, - [LUSTRE_ENOBUFS] = ENOBUFS, - [LUSTRE_EISCONN] = EISCONN, - [LUSTRE_ENOTCONN] = ENOTCONN, - [LUSTRE_ESHUTDOWN] = ESHUTDOWN, - [LUSTRE_ETOOMANYREFS] = ETOOMANYREFS, - [LUSTRE_ETIMEDOUT] = ETIMEDOUT, - [LUSTRE_ECONNREFUSED] = ECONNREFUSED, - [LUSTRE_EHOSTDOWN] = EHOSTDOWN, - [LUSTRE_EHOSTUNREACH] = EHOSTUNREACH, - [LUSTRE_EALREADY] = EALREADY, - [LUSTRE_EINPROGRESS] = EINPROGRESS, - [LUSTRE_ESTALE] = ESTALE, - [LUSTRE_EUCLEAN] = EUCLEAN, - [LUSTRE_ENOTNAM] = ENOTNAM, - [LUSTRE_ENAVAIL] = ENAVAIL, - [LUSTRE_EISNAM] = EISNAM, - [LUSTRE_EREMOTEIO] = EREMOTEIO, - [LUSTRE_EDQUOT] = EDQUOT, - [LUSTRE_ENOMEDIUM] = ENOMEDIUM, - [LUSTRE_EMEDIUMTYPE] = EMEDIUMTYPE, - [LUSTRE_ECANCELED] = ECANCELED, - [LUSTRE_ENOKEY] = ENOKEY, - [LUSTRE_EKEYEXPIRED] = EKEYEXPIRED, - [LUSTRE_EKEYREVOKED] = EKEYREVOKED, - [LUSTRE_EKEYREJECTED] = EKEYREJECTED, - [LUSTRE_EOWNERDEAD] = EOWNERDEAD, - [LUSTRE_ENOTRECOVERABLE] = ENOTRECOVERABLE, - [LUSTRE_ERESTARTSYS] = ERESTARTSYS, - [LUSTRE_ERESTARTNOINTR] = ERESTARTNOINTR, - [LUSTRE_ERESTARTNOHAND] = ERESTARTNOHAND, - [LUSTRE_ENOIOCTLCMD] = ENOIOCTLCMD, - [LUSTRE_ERESTART_RESTARTBLOCK] = ERESTART_RESTARTBLOCK, - [LUSTRE_EBADHANDLE] = EBADHANDLE, - [LUSTRE_ENOTSYNC] = ENOTSYNC, - [LUSTRE_EBADCOOKIE] = EBADCOOKIE, - [LUSTRE_ENOTSUPP] = ENOTSUPP, - [LUSTRE_ETOOSMALL] = ETOOSMALL, - [LUSTRE_ESERVERFAULT] = ESERVERFAULT, - [LUSTRE_EBADTYPE] = EBADTYPE, - [LUSTRE_EJUKEBOX] = EJUKEBOX, - [LUSTRE_EIOCBQUEUED] = EIOCBQUEUED, -}; - -unsigned int lustre_errno_hton(unsigned int h) -{ - unsigned int n; - - if (h == 0) { - n = 0; - } else if (h < ARRAY_SIZE(lustre_errno_hton_mapping)) { - n = lustre_errno_hton_mapping[h]; - if (n == 0) - goto generic; - } else { -generic: - /* - * A generic errno is better than the unknown one that could - * mean anything to a different host. - */ - n = LUSTRE_EIO; - } - - return n; -} -EXPORT_SYMBOL(lustre_errno_hton); - -unsigned int lustre_errno_ntoh(unsigned int n) -{ - unsigned int h; - - if (n == 0) { - h = 0; - } else if (n < ARRAY_SIZE(lustre_errno_ntoh_mapping)) { - h = lustre_errno_ntoh_mapping[n]; - if (h == 0) - goto generic; - } else { -generic: - /* - * Similar to the situation in lustre_errno_hton(), an unknown - * network errno could coincide with anything. Hence, it is - * better to return a generic errno. - */ - h = EIO; - } - - return h; -} -EXPORT_SYMBOL(lustre_errno_ntoh); diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c deleted file mode 100644 index 130bacc2c891..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/events.c +++ /dev/null @@ -1,585 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015 Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -# ifdef __mips64__ -# include -# endif - -#include -#include -#include -#include "ptlrpc_internal.h" - -struct lnet_handle_eq ptlrpc_eq_h; - -/* - * Client's outgoing request callback - */ -void request_out_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_request *req = cbid->cbid_arg; - bool wakeup = false; - - LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK); - LASSERT(ev->unlinked); - - DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); - - sptlrpc_request_out_callback(req); - - spin_lock(&req->rq_lock); - req->rq_real_sent = ktime_get_real_seconds(); - req->rq_req_unlinked = 1; - /* reply_in_callback happened before request_out_callback? */ - if (req->rq_reply_unlinked) - wakeup = true; - - if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { - /* Failed send: make it seem like the reply timed out, just - * like failing sends in client.c does currently... - */ - req->rq_net_err = 1; - wakeup = true; - } - - if (wakeup) - ptlrpc_client_wake_req(req); - - spin_unlock(&req->rq_lock); - - ptlrpc_req_finished(req); -} - -/* - * Client's incoming reply callback - */ -void reply_in_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_request *req = cbid->cbid_arg; - - DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); - - LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); - LASSERT(ev->md.start == req->rq_repbuf); - LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len); - /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests - * for adaptive timeouts' early reply. - */ - LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0); - - spin_lock(&req->rq_lock); - - req->rq_receiving_reply = 0; - req->rq_early = 0; - if (ev->unlinked) - req->rq_reply_unlinked = 1; - - if (ev->status) - goto out_wake; - - if (ev->type == LNET_EVENT_UNLINK) { - LASSERT(ev->unlinked); - DEBUG_REQ(D_NET, req, "unlink"); - goto out_wake; - } - - if (ev->mlength < ev->rlength) { - CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, - req->rq_replen, ev->rlength, ev->offset); - req->rq_reply_truncated = 1; - req->rq_replied = 1; - req->rq_status = -EOVERFLOW; - req->rq_nob_received = ev->rlength + ev->offset; - goto out_wake; - } - - if ((ev->offset == 0) && - ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) { - /* Early reply */ - DEBUG_REQ(D_ADAPTTO, req, - "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d", - ev->mlength, ev->offset, - req->rq_replen, req->rq_replied, ev->unlinked); - - req->rq_early_count++; /* number received, client side */ - - /* already got the real reply or buffers are already unlinked */ - if (req->rq_replied || req->rq_reply_unlinked == 1) - goto out_wake; - - req->rq_early = 1; - req->rq_reply_off = ev->offset; - req->rq_nob_received = ev->mlength; - /* And we're still receiving */ - req->rq_receiving_reply = 1; - } else { - /* Real reply */ - req->rq_rep_swab_mask = 0; - req->rq_replied = 1; - /* Got reply, no resend required */ - req->rq_resend = 0; - req->rq_reply_off = ev->offset; - req->rq_nob_received = ev->mlength; - /* LNetMDUnlink can't be called under the LNET_LOCK, - * so we must unlink in ptlrpc_unregister_reply - */ - DEBUG_REQ(D_INFO, req, - "reply in flags=%x mlen=%u offset=%d replen=%d", - lustre_msg_get_flags(req->rq_reqmsg), - ev->mlength, ev->offset, req->rq_replen); - } - - req->rq_import->imp_last_reply_time = ktime_get_real_seconds(); - -out_wake: - /* NB don't unlock till after wakeup; req can disappear under us - * since we don't have our own ref - */ - ptlrpc_client_wake_req(req); - spin_unlock(&req->rq_lock); -} - -/* - * Client's bulk has been written/read - */ -void client_bulk_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; - struct ptlrpc_request *req; - - LASSERT((ptlrpc_is_bulk_put_sink(desc->bd_type) && - ev->type == LNET_EVENT_PUT) || - (ptlrpc_is_bulk_get_source(desc->bd_type) && - ev->type == LNET_EVENT_GET) || - ev->type == LNET_EVENT_UNLINK); - LASSERT(ev->unlinked); - - if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE)) - ev->status = -EIO; - - if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2, - CFS_FAIL_ONCE)) - ev->status = -EIO; - - CDEBUG((ev->status == 0) ? D_NET : D_ERROR, - "event type %d, status %d, desc %p\n", - ev->type, ev->status, desc); - - spin_lock(&desc->bd_lock); - req = desc->bd_req; - LASSERT(desc->bd_md_count > 0); - desc->bd_md_count--; - - if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) { - desc->bd_nob_transferred += ev->mlength; - desc->bd_sender = ev->sender; - } else { - /* start reconnect and resend if network error hit */ - spin_lock(&req->rq_lock); - req->rq_net_err = 1; - spin_unlock(&req->rq_lock); - } - - if (ev->status != 0) - desc->bd_failure = 1; - - /* NB don't unlock till after wakeup; desc can disappear under us - * otherwise - */ - if (desc->bd_md_count == 0) - ptlrpc_client_wake_req(desc->bd_req); - - spin_unlock(&desc->bd_lock); -} - -/* - * We will have percpt request history list for ptlrpc service in upcoming - * patches because we don't want to be serialized by current per-service - * history operations. So we require history ID can (somehow) show arriving - * order w/o grabbing global lock, and user can sort them in userspace. - * - * This is how we generate history ID for ptlrpc_request: - * ---------------------------------------------------- - * | 32 bits | 16 bits | (16 - X)bits | X bits | - * ---------------------------------------------------- - * | seconds | usec / 16 | sequence | CPT id | - * ---------------------------------------------------- - * - * it might not be precise but should be good enough. - */ - -#define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits) - -#define REQS_SEC_SHIFT 32 -#define REQS_USEC_SHIFT 16 -#define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt) - -static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req) -{ - __u64 sec = req->rq_arrival_time.tv_sec; - __u32 usec = req->rq_arrival_time.tv_nsec / NSEC_PER_USEC / 16; /* usec / 16 */ - __u64 new_seq; - - /* set sequence ID for request and add it to history list, - * it must be called with hold svcpt::scp_lock - */ - - new_seq = (sec << REQS_SEC_SHIFT) | - (usec << REQS_USEC_SHIFT) | - (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt); - - if (new_seq > svcpt->scp_hist_seq) { - /* This handles the initial case of scp_hist_seq == 0 or - * we just jumped into a new time window - */ - svcpt->scp_hist_seq = new_seq; - } else { - LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT); - /* NB: increase sequence number in current usec bucket, - * however, it's possible that we used up all bits for - * sequence and jumped into the next usec bucket (future time), - * then we hope there will be less RPCs per bucket at some - * point, and sequence will catch up again - */ - svcpt->scp_hist_seq += (1ULL << REQS_SEQ_SHIFT(svcpt)); - new_seq = svcpt->scp_hist_seq; - } - - req->rq_history_seq = new_seq; - - list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs); -} - -/* - * Server's incoming request callback - */ -void request_in_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; - struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; - struct ptlrpc_service *service = svcpt->scp_service; - struct ptlrpc_request *req; - - LASSERT(ev->type == LNET_EVENT_PUT || - ev->type == LNET_EVENT_UNLINK); - LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer); - LASSERT((char *)ev->md.start + ev->offset + ev->mlength <= - rqbd->rqbd_buffer + service->srv_buf_size); - - CDEBUG((ev->status == 0) ? D_NET : D_ERROR, - "event type %d, status %d, service %s\n", - ev->type, ev->status, service->srv_name); - - if (ev->unlinked) { - /* If this is the last request message to fit in the - * request buffer we can use the request object embedded in - * rqbd. Note that if we failed to allocate a request, - * we'd have to re-post the rqbd, which we can't do in this - * context. - */ - req = &rqbd->rqbd_req; - memset(req, 0, sizeof(*req)); - } else { - LASSERT(ev->type == LNET_EVENT_PUT); - if (ev->status != 0) { - /* We moaned above already... */ - return; - } - req = ptlrpc_request_cache_alloc(GFP_ATOMIC); - if (!req) { - CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n", - service->srv_name, - libcfs_id2str(ev->initiator)); - return; - } - } - - ptlrpc_srv_req_init(req); - /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, - * flags are reset and scalars are zero. We only set the message - * size to non-zero if this was a successful receive. - */ - req->rq_xid = ev->match_bits; - req->rq_reqbuf = ev->md.start + ev->offset; - if (ev->type == LNET_EVENT_PUT && ev->status == 0) - req->rq_reqdata_len = ev->mlength; - ktime_get_real_ts64(&req->rq_arrival_time); - req->rq_peer = ev->initiator; - req->rq_self = ev->target.nid; - req->rq_rqbd = rqbd; - req->rq_phase = RQ_PHASE_NEW; - if (ev->type == LNET_EVENT_PUT) - CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n", - req, req->rq_xid, ev->mlength); - - CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); - - spin_lock(&svcpt->scp_lock); - - ptlrpc_req_add_history(svcpt, req); - - if (ev->unlinked) { - svcpt->scp_nrqbds_posted--; - CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n", - svcpt->scp_nrqbds_posted); - - /* Normally, don't complain about 0 buffers posted; LNET won't - * drop incoming reqs since we set the portal lazy - */ - if (test_req_buffer_pressure && - ev->type != LNET_EVENT_UNLINK && - svcpt->scp_nrqbds_posted == 0) - CWARN("All %s request buffers busy\n", - service->srv_name); - - /* req takes over the network's ref on rqbd */ - } else { - /* req takes a ref on rqbd */ - rqbd->rqbd_refcount++; - } - - list_add_tail(&req->rq_list, &svcpt->scp_req_incoming); - svcpt->scp_nreqs_incoming++; - - /* NB everything can disappear under us once the request - * has been queued and we unlock, so do the wake now... - */ - wake_up(&svcpt->scp_waitq); - - spin_unlock(&svcpt->scp_lock); -} - -/* - * Server's outgoing reply callback - */ -void reply_out_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_reply_state *rs = cbid->cbid_arg; - struct ptlrpc_service_part *svcpt = rs->rs_svcpt; - - LASSERT(ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_ACK || - ev->type == LNET_EVENT_UNLINK); - - if (!rs->rs_difficult) { - /* 'Easy' replies have no further processing so I drop the - * net's ref on 'rs' - */ - LASSERT(ev->unlinked); - ptlrpc_rs_decref(rs); - return; - } - - LASSERT(rs->rs_on_net); - - if (ev->unlinked) { - /* Last network callback. The net's ref on 'rs' stays put - * until ptlrpc_handle_rs() is done with it - */ - spin_lock(&svcpt->scp_rep_lock); - spin_lock(&rs->rs_lock); - - rs->rs_on_net = 0; - if (!rs->rs_no_ack || - rs->rs_transno <= - rs->rs_export->exp_obd->obd_last_committed || - list_empty(&rs->rs_obd_list)) - ptlrpc_schedule_difficult_reply(rs); - - spin_unlock(&rs->rs_lock); - spin_unlock(&svcpt->scp_rep_lock); - } -} - -static void ptlrpc_master_callback(struct lnet_event *ev) -{ - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - void (*callback)(struct lnet_event *ev) = cbid->cbid_fn; - - /* Honestly, it's best to find out early. */ - LASSERT(cbid->cbid_arg != LP_POISON); - LASSERT(callback == request_out_callback || - callback == reply_in_callback || - callback == client_bulk_callback || - callback == request_in_callback || - callback == reply_out_callback); - - callback(ev); -} - -int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, - struct lnet_process_id *peer, lnet_nid_t *self) -{ - int best_dist = 0; - __u32 best_order = 0; - int count = 0; - int rc = -ENOENT; - int dist; - __u32 order; - lnet_nid_t dst_nid; - lnet_nid_t src_nid; - - peer->pid = LNET_PID_LUSTRE; - - /* Choose the matching UUID that's closest */ - while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) { - dist = LNetDist(dst_nid, &src_nid, &order); - if (dist < 0) - continue; - - if (dist == 0) { /* local! use loopback LND */ - peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0); - rc = 0; - break; - } - - if (rc < 0 || - dist < best_dist || - (dist == best_dist && order < best_order)) { - best_dist = dist; - best_order = order; - - peer->nid = dst_nid; - *self = src_nid; - rc = 0; - } - } - - CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); - return rc; -} - -static void ptlrpc_ni_fini(void) -{ - int rc; - int retries; - - /* Wait for the event queue to become idle since there may still be - * messages in flight with pending events (i.e. the fire-and-forget - * messages == client requests and "non-difficult" server - * replies - */ - - for (retries = 0;; retries++) { - rc = LNetEQFree(ptlrpc_eq_h); - switch (rc) { - default: - LBUG(); - - case 0: - LNetNIFini(); - return; - - case -EBUSY: - if (retries != 0) - CWARN("Event queue still busy\n"); - - schedule_timeout_uninterruptible(2 * HZ); - break; - } - } - /* notreached */ -} - -static lnet_pid_t ptl_get_pid(void) -{ - lnet_pid_t pid; - - pid = LNET_PID_LUSTRE; - return pid; -} - -static int ptlrpc_ni_init(void) -{ - int rc; - lnet_pid_t pid; - - pid = ptl_get_pid(); - CDEBUG(D_NET, "My pid is: %x\n", pid); - - /* We're not passing any limits yet... */ - rc = LNetNIInit(pid); - if (rc < 0) { - CDEBUG(D_NET, "Can't init network interface: %d\n", rc); - return rc; - } - - /* CAVEAT EMPTOR: how we process portals events is _radically_ - * different depending on... - */ - /* kernel LNet calls our master callback when there are new event, - * because we are guaranteed to get every event via callback, - * so we just set EQ size to 0 to avoid overhead of serializing - * enqueue/dequeue operations in LNet. - */ - rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h); - if (rc == 0) - return 0; - - CERROR("Failed to allocate event queue: %d\n", rc); - LNetNIFini(); - - return rc; -} - -int ptlrpc_init_portals(void) -{ - int rc = ptlrpc_ni_init(); - - if (rc != 0) { - CERROR("network initialisation failed\n"); - return rc; - } - rc = ptlrpcd_addref(); - if (rc == 0) - return 0; - - CERROR("rpcd initialisation failed\n"); - ptlrpc_ni_fini(); - return rc; -} - -void ptlrpc_exit_portals(void) -{ - ptlrpcd_decref(); - ptlrpc_ni_fini(); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c deleted file mode 100644 index 1a0f35dfab97..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ /dev/null @@ -1,1677 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/import.c - * - * Author: Mike Shaver - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -struct ptlrpc_connect_async_args { - __u64 pcaa_peer_committed; - int pcaa_initial_connect; -}; - -/** - * Updates import \a imp current state to provided \a state value - * Helper function. Must be called under imp_lock. - */ -static void __import_set_state(struct obd_import *imp, - enum lustre_imp_state state) -{ - switch (state) { - case LUSTRE_IMP_CLOSED: - case LUSTRE_IMP_NEW: - case LUSTRE_IMP_DISCON: - case LUSTRE_IMP_CONNECTING: - break; - case LUSTRE_IMP_REPLAY_WAIT: - imp->imp_replay_state = LUSTRE_IMP_REPLAY_LOCKS; - break; - default: - imp->imp_replay_state = LUSTRE_IMP_REPLAY; - } - - imp->imp_state = state; - imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state; - imp->imp_state_hist[imp->imp_state_hist_idx].ish_time = - ktime_get_real_seconds(); - imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) % - IMP_STATE_HIST_LEN; -} - -/* A CLOSED import should remain so. */ -#define IMPORT_SET_STATE_NOLOCK(imp, state) \ -do { \ - if (imp->imp_state != LUSTRE_IMP_CLOSED) { \ - CDEBUG(D_HA, "%p %s: changing import state from %s to %s\n", \ - imp, obd2cli_tgt(imp->imp_obd), \ - ptlrpc_import_state_name(imp->imp_state), \ - ptlrpc_import_state_name(state)); \ - __import_set_state(imp, state); \ - } \ -} while (0) - -#define IMPORT_SET_STATE(imp, state) \ -do { \ - spin_lock(&imp->imp_lock); \ - IMPORT_SET_STATE_NOLOCK(imp, state); \ - spin_unlock(&imp->imp_lock); \ -} while (0) - -static int ptlrpc_connect_interpret(const struct lu_env *env, - struct ptlrpc_request *request, - void *data, int rc); -int ptlrpc_import_recovery_state_machine(struct obd_import *imp); - -/* Only this function is allowed to change the import state when it is - * CLOSED. I would rather refcount the import and free it after - * disconnection like we do with exports. To do that, the client_obd - * will need to save the peer info somewhere other than in the import, - * though. - */ -int ptlrpc_init_import(struct obd_import *imp) -{ - spin_lock(&imp->imp_lock); - - imp->imp_generation++; - imp->imp_state = LUSTRE_IMP_NEW; - - spin_unlock(&imp->imp_lock); - - return 0; -} -EXPORT_SYMBOL(ptlrpc_init_import); - -#define UUID_STR "_UUID" -static void deuuidify(char *uuid, const char *prefix, char **uuid_start, - int *uuid_len) -{ - *uuid_start = !prefix || strncmp(uuid, prefix, strlen(prefix)) - ? uuid : uuid + strlen(prefix); - - *uuid_len = strlen(*uuid_start); - - if (*uuid_len < strlen(UUID_STR)) - return; - - if (!strncmp(*uuid_start + *uuid_len - strlen(UUID_STR), - UUID_STR, strlen(UUID_STR))) - *uuid_len -= strlen(UUID_STR); -} - -/** - * Returns true if import was FULL, false if import was already not - * connected. - * @imp - import to be disconnected - * @conn_cnt - connection count (epoch) of the request that timed out - * and caused the disconnection. In some cases, multiple - * inflight requests can fail to a single target (e.g. OST - * bulk requests) and if one has already caused a reconnection - * (increasing the import->conn_cnt) the older failure should - * not also cause a reconnection. If zero it forces a reconnect. - */ -int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt) -{ - int rc = 0; - - spin_lock(&imp->imp_lock); - - if (imp->imp_state == LUSTRE_IMP_FULL && - (conn_cnt == 0 || conn_cnt == imp->imp_conn_cnt)) { - char *target_start; - int target_len; - - deuuidify(obd2cli_tgt(imp->imp_obd), NULL, - &target_start, &target_len); - - if (imp->imp_replayable) { - LCONSOLE_WARN("%s: Connection to %.*s (at %s) was lost; in progress operations using this service will wait for recovery to complete\n", - imp->imp_obd->obd_name, target_len, target_start, - libcfs_nid2str(imp->imp_connection->c_peer.nid)); - } else { - LCONSOLE_ERROR_MSG(0x166, "%s: Connection to %.*s (at %s) was lost; in progress operations using this service will fail\n", - imp->imp_obd->obd_name, - target_len, target_start, - libcfs_nid2str(imp->imp_connection->c_peer.nid)); - } - IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); - spin_unlock(&imp->imp_lock); - - if (obd_dump_on_timeout) - libcfs_debug_dumplog(); - - obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); - rc = 1; - } else { - spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "%s: import %p already %s (conn %u, was %u): %s\n", - imp->imp_client->cli_name, imp, - (imp->imp_state == LUSTRE_IMP_FULL && - imp->imp_conn_cnt > conn_cnt) ? - "reconnected" : "not connected", imp->imp_conn_cnt, - conn_cnt, ptlrpc_import_state_name(imp->imp_state)); - } - - return rc; -} - -/* - * This acts as a barrier; all existing requests are rejected, and - * no new requests will be accepted until the import is valid again. - */ -void ptlrpc_deactivate_import(struct obd_import *imp) -{ - CDEBUG(D_HA, "setting import %s INVALID\n", obd2cli_tgt(imp->imp_obd)); - - spin_lock(&imp->imp_lock); - imp->imp_invalid = 1; - imp->imp_generation++; - spin_unlock(&imp->imp_lock); - - ptlrpc_abort_inflight(imp); - obd_import_event(imp->imp_obd, imp, IMP_EVENT_INACTIVE); -} -EXPORT_SYMBOL(ptlrpc_deactivate_import); - -static unsigned int -ptlrpc_inflight_deadline(struct ptlrpc_request *req, time64_t now) -{ - long dl; - - if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || - (req->rq_phase == RQ_PHASE_BULK) || - (req->rq_phase == RQ_PHASE_NEW))) - return 0; - - if (req->rq_timedout) - return 0; - - if (req->rq_phase == RQ_PHASE_NEW) - dl = req->rq_sent; - else - dl = req->rq_deadline; - - if (dl <= now) - return 0; - - return dl - now; -} - -static unsigned int ptlrpc_inflight_timeout(struct obd_import *imp) -{ - time64_t now = ktime_get_real_seconds(); - struct ptlrpc_request *req, *n; - unsigned int timeout = 0; - - spin_lock(&imp->imp_lock); - list_for_each_entry_safe(req, n, &imp->imp_sending_list, rq_list) - timeout = max(ptlrpc_inflight_deadline(req, now), timeout); - - spin_unlock(&imp->imp_lock); - return timeout; -} - -/** - * This function will invalidate the import, if necessary, then block - * for all the RPC completions, and finally notify the obd to - * invalidate its state (ie cancel locks, clear pending requests, - * etc). - */ -void ptlrpc_invalidate_import(struct obd_import *imp) -{ - struct ptlrpc_request *req, *n; - unsigned int timeout; - int rc; - - atomic_inc(&imp->imp_inval_count); - - if (!imp->imp_invalid || imp->imp_obd->obd_no_recov) - ptlrpc_deactivate_import(imp); - - CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2); - LASSERT(imp->imp_invalid); - - /* Wait forever until inflight == 0. We really can't do it another - * way because in some cases we need to wait for very long reply - * unlink. We can't do anything before that because there is really - * no guarantee that some rdma transfer is not in progress right now. - */ - do { - /* Calculate max timeout for waiting on rpcs to error - * out. Use obd_timeout if calculated value is smaller - * than it. - */ - if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { - timeout = ptlrpc_inflight_timeout(imp); - timeout += timeout / 3; - - if (timeout == 0) - timeout = obd_timeout; - } else { - /* decrease the interval to increase race condition */ - timeout = 1; - } - - CDEBUG(D_RPCTRACE, - "Sleeping %d sec for inflight to error out\n", - timeout); - - /* Wait for all requests to error out and call completion - * callbacks. Cap it at obd_timeout -- these should all - * have been locally cancelled by ptlrpc_abort_inflight. - */ - rc = wait_event_idle_timeout(imp->imp_recovery_waitq, - atomic_read(&imp->imp_inflight) == 0, - obd_timeout * HZ); - - if (rc == 0) { - const char *cli_tgt = obd2cli_tgt(imp->imp_obd); - - CERROR("%s: timeout waiting for callback (%d != 0)\n", - cli_tgt, - atomic_read(&imp->imp_inflight)); - - spin_lock(&imp->imp_lock); - if (atomic_read(&imp->imp_inflight) == 0) { - int count = atomic_read(&imp->imp_unregistering); - - /* We know that "unregistering" rpcs only can - * survive in sending or delaying lists (they - * maybe waiting for long reply unlink in - * sluggish nets). Let's check this. If there - * is no inflight and unregistering != 0, this - * is bug. - */ - LASSERTF(count == 0, "Some RPCs are still unregistering: %d\n", - count); - - /* Let's save one loop as soon as inflight have - * dropped to zero. No new inflights possible at - * this point. - */ - rc = 0; - } else { - list_for_each_entry_safe(req, n, - &imp->imp_sending_list, rq_list) { - DEBUG_REQ(D_ERROR, req, - "still on sending list"); - } - list_for_each_entry_safe(req, n, - &imp->imp_delayed_list, rq_list) { - DEBUG_REQ(D_ERROR, req, - "still on delayed list"); - } - - CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n", - cli_tgt, - atomic_read(&imp-> - imp_unregistering)); - } - spin_unlock(&imp->imp_lock); - } - } while (rc == 0); - - /* - * Let's additionally check that no new rpcs added to import in - * "invalidate" state. - */ - LASSERT(atomic_read(&imp->imp_inflight) == 0); - obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); - sptlrpc_import_flush_all_ctx(imp); - - atomic_dec(&imp->imp_inval_count); - wake_up_all(&imp->imp_recovery_waitq); -} -EXPORT_SYMBOL(ptlrpc_invalidate_import); - -/* unset imp_invalid */ -void ptlrpc_activate_import(struct obd_import *imp) -{ - struct obd_device *obd = imp->imp_obd; - - spin_lock(&imp->imp_lock); - if (imp->imp_deactive != 0) { - spin_unlock(&imp->imp_lock); - return; - } - - imp->imp_invalid = 0; - spin_unlock(&imp->imp_lock); - obd_import_event(obd, imp, IMP_EVENT_ACTIVE); -} -EXPORT_SYMBOL(ptlrpc_activate_import); - -void ptlrpc_pinger_force(struct obd_import *imp) -{ - CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(imp->imp_state)); - - spin_lock(&imp->imp_lock); - imp->imp_force_verify = 1; - spin_unlock(&imp->imp_lock); - - if (imp->imp_state != LUSTRE_IMP_CONNECTING) - ptlrpc_pinger_wake_up(); -} -EXPORT_SYMBOL(ptlrpc_pinger_force); - -void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) -{ - LASSERT(!imp->imp_dlm_fake); - - if (ptlrpc_set_import_discon(imp, conn_cnt)) { - if (!imp->imp_replayable) { - CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_obd->obd_name); - ptlrpc_deactivate_import(imp); - } - - ptlrpc_pinger_force(imp); - } -} - -int ptlrpc_reconnect_import(struct obd_import *imp) -{ - int rc; - - ptlrpc_pinger_force(imp); - - CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", - obd2cli_tgt(imp->imp_obd), obd_timeout); - - rc = wait_event_idle_timeout(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), - obd_timeout * HZ); - CDEBUG(D_HA, "%s: recovery finished s:%s\n", obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(imp->imp_state)); - return rc == 0 ? -ETIMEDOUT : 0; -} -EXPORT_SYMBOL(ptlrpc_reconnect_import); - -/** - * Connection on import \a imp is changed to another one (if more than one is - * present). We typically chose connection that we have not tried to connect to - * the longest - */ -static int import_select_connection(struct obd_import *imp) -{ - struct obd_import_conn *imp_conn = NULL, *conn; - struct obd_export *dlmexp; - char *target_start; - int target_len, tried_all = 1; - - spin_lock(&imp->imp_lock); - - if (list_empty(&imp->imp_conn_list)) { - CERROR("%s: no connections available\n", - imp->imp_obd->obd_name); - spin_unlock(&imp->imp_lock); - return -EINVAL; - } - - list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { - CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n", - imp->imp_obd->obd_name, - libcfs_nid2str(conn->oic_conn->c_peer.nid), - conn->oic_last_attempt); - - /* If we have not tried this connection since - * the last successful attempt, go with this one - */ - if ((conn->oic_last_attempt == 0) || - time_before_eq64(conn->oic_last_attempt, - imp->imp_last_success_conn)) { - imp_conn = conn; - tried_all = 0; - break; - } - - /* If all of the connections have already been tried - * since the last successful connection; just choose the - * least recently used - */ - if (!imp_conn) - imp_conn = conn; - else if (time_before64(conn->oic_last_attempt, - imp_conn->oic_last_attempt)) - imp_conn = conn; - } - - /* if not found, simply choose the current one */ - if (!imp_conn || imp->imp_force_reconnect) { - LASSERT(imp->imp_conn_current); - imp_conn = imp->imp_conn_current; - tried_all = 0; - } - LASSERT(imp_conn->oic_conn); - - /* If we've tried everything, and we're back to the beginning of the - * list, increase our timeout and try again. It will be reset when - * we do finally connect. (FIXME: really we should wait for all network - * state associated with the last connection attempt to drain before - * trying to reconnect on it.) - */ - if (tried_all && (imp->imp_conn_list.next == &imp_conn->oic_item)) { - struct adaptive_timeout *at = &imp->imp_at.iat_net_latency; - - if (at_get(at) < CONNECTION_SWITCH_MAX) { - at_measured(at, at_get(at) + CONNECTION_SWITCH_INC); - if (at_get(at) > CONNECTION_SWITCH_MAX) - at_reset(at, CONNECTION_SWITCH_MAX); - } - LASSERT(imp_conn->oic_last_attempt); - CDEBUG(D_HA, "%s: tried all connections, increasing latency to %ds\n", - imp->imp_obd->obd_name, at_get(at)); - } - - imp_conn->oic_last_attempt = get_jiffies_64(); - - /* switch connection, don't mind if it's same as the current one */ - ptlrpc_connection_put(imp->imp_connection); - imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - ptlrpc_connection_put(dlmexp->exp_connection); - dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); - class_export_put(dlmexp); - - if (imp->imp_conn_current != imp_conn) { - if (imp->imp_conn_current) { - deuuidify(obd2cli_tgt(imp->imp_obd), NULL, - &target_start, &target_len); - - CDEBUG(D_HA, "%s: Connection changing to %.*s (at %s)\n", - imp->imp_obd->obd_name, - target_len, target_start, - libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); - } - - imp->imp_conn_current = imp_conn; - } - - CDEBUG(D_HA, "%s: import %p using connection %s/%s\n", - imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid, - libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); - - spin_unlock(&imp->imp_lock); - - return 0; -} - -/* - * must be called under imp_lock - */ -static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno) -{ - struct ptlrpc_request *req; - - /* The requests in committed_list always have smaller transnos than - * the requests in replay_list - */ - if (!list_empty(&imp->imp_committed_list)) { - req = list_first_entry(&imp->imp_committed_list, - struct ptlrpc_request, rq_replay_list); - *transno = req->rq_transno; - if (req->rq_transno == 0) { - DEBUG_REQ(D_ERROR, req, - "zero transno in committed_list"); - LBUG(); - } - return 1; - } - if (!list_empty(&imp->imp_replay_list)) { - req = list_first_entry(&imp->imp_replay_list, - struct ptlrpc_request, rq_replay_list); - *transno = req->rq_transno; - if (req->rq_transno == 0) { - DEBUG_REQ(D_ERROR, req, "zero transno in replay_list"); - LBUG(); - } - return 1; - } - return 0; -} - -/** - * Attempt to (re)connect import \a imp. This includes all preparations, - * initializing CONNECT RPC request and passing it to ptlrpcd for - * actual sending. - * Returns 0 on success or error code. - */ -int ptlrpc_connect_import(struct obd_import *imp) -{ - struct obd_device *obd = imp->imp_obd; - int initial_connect = 0; - int set_transno = 0; - __u64 committed_before_reconnect = 0; - struct ptlrpc_request *request; - char *bufs[] = { NULL, - obd2cli_tgt(imp->imp_obd), - obd->obd_uuid.uuid, - (char *)&imp->imp_dlm_handle, - (char *)&imp->imp_connect_data }; - struct ptlrpc_connect_async_args *aa; - int rc; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_CLOSED) { - spin_unlock(&imp->imp_lock); - CERROR("can't connect to a closed import\n"); - return -EINVAL; - } else if (imp->imp_state == LUSTRE_IMP_FULL) { - spin_unlock(&imp->imp_lock); - CERROR("already connected\n"); - return 0; - } else if (imp->imp_state == LUSTRE_IMP_CONNECTING || - imp->imp_connected) { - spin_unlock(&imp->imp_lock); - CERROR("already connecting\n"); - return -EALREADY; - } - - IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING); - - imp->imp_conn_cnt++; - imp->imp_resend_replay = 0; - - if (!lustre_handle_is_used(&imp->imp_remote_handle)) - initial_connect = 1; - else - committed_before_reconnect = imp->imp_peer_committed_transno; - - set_transno = ptlrpc_first_transno(imp, - &imp->imp_connect_data.ocd_transno); - spin_unlock(&imp->imp_lock); - - rc = import_select_connection(imp); - if (rc) - goto out; - - rc = sptlrpc_import_sec_adapt(imp, NULL, NULL); - if (rc) - goto out; - - /* Reset connect flags to the originally requested flags, in case - * the server is updated on-the-fly we will get the new features. - */ - imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig; - /* Reset ocd_version each time so the server knows the exact versions */ - imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE; - imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT; - imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18; - - rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd, - &obd->obd_uuid, &imp->imp_connect_data, NULL); - if (rc) - goto out; - - request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT); - if (!request) { - rc = -ENOMEM; - goto out; - } - - rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION, - imp->imp_connect_op, bufs, NULL); - if (rc) { - ptlrpc_request_free(request); - goto out; - } - - /* Report the rpc service time to the server so that it knows how long - * to wait for clients to join recovery - */ - lustre_msg_set_service_time(request->rq_reqmsg, - at_timeout2est(request->rq_timeout)); - - /* The amount of time we give the server to process the connect req. - * import_select_connection will increase the net latency on - * repeated reconnect attempts to cover slow networks. - * We override/ignore the server rpc completion estimate here, - * which may be large if this is a reconnect attempt - */ - request->rq_timeout = INITIAL_CONNECT_TIMEOUT; - lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout); - - request->rq_no_resend = 1; - request->rq_no_delay = 1; - request->rq_send_state = LUSTRE_IMP_CONNECTING; - /* Allow a slightly larger reply for future growth compatibility */ - req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER, - sizeof(struct obd_connect_data) + - 16 * sizeof(__u64)); - ptlrpc_request_set_replen(request); - request->rq_interpret_reply = ptlrpc_connect_interpret; - - BUILD_BUG_ON(sizeof(*aa) > sizeof(request->rq_async_args)); - aa = ptlrpc_req_async_args(request); - memset(aa, 0, sizeof(*aa)); - - aa->pcaa_peer_committed = committed_before_reconnect; - aa->pcaa_initial_connect = initial_connect; - - if (aa->pcaa_initial_connect) { - spin_lock(&imp->imp_lock); - imp->imp_replayable = 1; - spin_unlock(&imp->imp_lock); - lustre_msg_add_op_flags(request->rq_reqmsg, - MSG_CONNECT_INITIAL); - } - - if (set_transno) - lustre_msg_add_op_flags(request->rq_reqmsg, - MSG_CONNECT_TRANSNO); - - DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)", - request->rq_timeout); - ptlrpcd_add_req(request); - rc = 0; -out: - if (rc != 0) - IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); - - return rc; -} -EXPORT_SYMBOL(ptlrpc_connect_import); - -static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp) -{ - int force_verify; - - spin_lock(&imp->imp_lock); - force_verify = imp->imp_force_verify != 0; - spin_unlock(&imp->imp_lock); - - if (force_verify) - ptlrpc_pinger_wake_up(); -} - -static int ptlrpc_busy_reconnect(int rc) -{ - return (rc == -EBUSY) || (rc == -EAGAIN); -} - -static int ptlrpc_connect_set_flags(struct obd_import *imp, - struct obd_connect_data *ocd, - u64 old_connect_flags, - struct obd_export *exp, int init_connect) -{ - struct client_obd *cli = &imp->imp_obd->u.cli; - static bool warned; - - if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) && - !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) { - LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %#llx, replied %#llx\n", - imp->imp_obd->obd_name, - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_connect_flags_orig, - ocd->ocd_connect_flags); - return -EPROTO; - } - - spin_lock(&imp->imp_lock); - list_del(&imp->imp_conn_current->oic_item); - list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list); - imp->imp_last_success_conn = imp->imp_conn_current->oic_last_attempt; - - spin_unlock(&imp->imp_lock); - - if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && - (ocd->ocd_version > LUSTRE_VERSION_CODE + - LUSTRE_VERSION_OFFSET_WARN || - ocd->ocd_version < LUSTRE_VERSION_CODE - - LUSTRE_VERSION_OFFSET_WARN)) { - /* - * Sigh, some compilers do not like #ifdef in the middle - * of macro arguments - */ - const char *older = "older than client. Consider upgrading server"; - const char *newer = "newer than client. Consider recompiling application"; - - LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n", - obd2cli_tgt(imp->imp_obd), - OBD_OCD_VERSION_MAJOR(ocd->ocd_version), - OBD_OCD_VERSION_MINOR(ocd->ocd_version), - OBD_OCD_VERSION_PATCH(ocd->ocd_version), - OBD_OCD_VERSION_FIX(ocd->ocd_version), - ocd->ocd_version > LUSTRE_VERSION_CODE ? - newer : older, LUSTRE_VERSION_STRING); - warned = true; - } - -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) - /* - * Check if server has LU-1252 fix applied to not always swab - * the IR MNE entries. Do this only once per connection. This - * fixup is version-limited, because we don't want to carry the - * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we - * need interop with unpatched 2.2 servers. For newer servers, - * the client will do MNE swabbing only as needed. LU-1644 - */ - if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && - !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) && - OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 && - OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 && - OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 && - !strcmp(imp->imp_obd->obd_type->typ_name, - LUSTRE_MGC_NAME))) - imp->imp_need_mne_swab = 1; - else /* clear if server was upgraded since last connect */ - imp->imp_need_mne_swab = 0; -#endif - - if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) { - /* - * We sent to the server ocd_cksum_types with bits set - * for algorithms we understand. The server masked off - * the checksum types it doesn't support - */ - if (!(ocd->ocd_cksum_types & cksum_types_supported_client())) { - LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n", - obd2cli_tgt(imp->imp_obd), - ocd->ocd_cksum_types, - cksum_types_supported_client()); - cli->cl_checksum = 0; - cli->cl_supp_cksum_types = OBD_CKSUM_ADLER; - } else { - cli->cl_supp_cksum_types = ocd->ocd_cksum_types; - } - } else { - /* - * The server does not support OBD_CONNECT_CKSUM. - * Enforce ADLER for backward compatibility - */ - cli->cl_supp_cksum_types = OBD_CKSUM_ADLER; - } - cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types); - - if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) - cli->cl_max_pages_per_rpc = - min(ocd->ocd_brw_size >> PAGE_SHIFT, - cli->cl_max_pages_per_rpc); - else if (imp->imp_connect_op == MDS_CONNECT || - imp->imp_connect_op == MGS_CONNECT) - cli->cl_max_pages_per_rpc = 1; - - LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) && - (cli->cl_max_pages_per_rpc > 0)); - - client_adjust_max_dirty(cli); - - /* - * Update client max modify RPCs in flight with value returned - * by the server - */ - if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) - cli->cl_max_mod_rpcs_in_flight = min( - cli->cl_max_mod_rpcs_in_flight, - ocd->ocd_maxmodrpcs); - else - cli->cl_max_mod_rpcs_in_flight = 1; - - /* - * Reset ns_connect_flags only for initial connect. It might be - * changed in while using FS and if we reset it in reconnect - * this leads to losing user settings done before such as - * disable lru_resize, etc. - */ - if (old_connect_flags != exp_connect_flags(exp) || init_connect) { - CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n", - imp->imp_obd->obd_name, ocd->ocd_connect_flags); - imp->imp_obd->obd_namespace->ns_connect_flags = - ocd->ocd_connect_flags; - imp->imp_obd->obd_namespace->ns_orig_connect_flags = - ocd->ocd_connect_flags; - } - - if (ocd->ocd_connect_flags & OBD_CONNECT_AT) - /* - * We need a per-message support flag, because - * a. we don't know if the incoming connect reply - * supports AT or not (in reply_in_callback) - * until we unpack it. - * b. failovered server means export and flags are gone - * (in ptlrpc_send_reply). - * Can only be set when we know AT is supported at - * both ends - */ - imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT; - else - imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT; - - imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18; - - return 0; -} - -/** - * Add all replay requests back to unreplied list before start replay, - * so that we can make sure the known replied XID is always increased - * only even if when replaying requests. - */ -static void ptlrpc_prepare_replay(struct obd_import *imp) -{ - struct ptlrpc_request *req; - - if (imp->imp_state != LUSTRE_IMP_REPLAY || - imp->imp_resend_replay) - return; - - /* - * If the server was restart during repaly, the requests may - * have been added to the unreplied list in former replay. - */ - spin_lock(&imp->imp_lock); - - list_for_each_entry(req, &imp->imp_committed_list, rq_replay_list) { - if (list_empty(&req->rq_unreplied_list)) - ptlrpc_add_unreplied(req); - } - - list_for_each_entry(req, &imp->imp_replay_list, rq_replay_list) { - if (list_empty(&req->rq_unreplied_list)) - ptlrpc_add_unreplied(req); - } - - imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp); - spin_unlock(&imp->imp_lock); -} - -/** - * interpret_reply callback for connect RPCs. - * Looks into returned status of connect operation and decides - * what to do with the import - i.e enter recovery, promote it to - * full state for normal operations of disconnect it due to an error. - */ -static int ptlrpc_connect_interpret(const struct lu_env *env, - struct ptlrpc_request *request, - void *data, int rc) -{ - struct ptlrpc_connect_async_args *aa = data; - struct obd_import *imp = request->rq_import; - struct lustre_handle old_hdl; - __u64 old_connect_flags; - int msg_flags; - struct obd_connect_data *ocd; - struct obd_export *exp; - int ret; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_CLOSED) { - imp->imp_connect_tried = 1; - spin_unlock(&imp->imp_lock); - return 0; - } - - if (rc) { - /* if this reconnect to busy export - not need select new target - * for connecting - */ - imp->imp_force_reconnect = ptlrpc_busy_reconnect(rc); - spin_unlock(&imp->imp_lock); - ptlrpc_maybe_ping_import_soon(imp); - goto out; - } - - /* - * LU-7558: indicate that we are interpretting connect reply, - * pltrpc_connect_import() will not try to reconnect until - * interpret will finish. - */ - imp->imp_connected = 1; - spin_unlock(&imp->imp_lock); - - LASSERT(imp->imp_conn_current); - - msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); - - ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA, - RCL_SERVER); - /* server replied obd_connect_data is always bigger */ - ocd = req_capsule_server_sized_get(&request->rq_pill, - &RMF_CONNECT_DATA, ret); - - if (!ocd) { - CERROR("%s: no connect data from server\n", - imp->imp_obd->obd_name); - rc = -EPROTO; - goto out; - } - - spin_lock(&imp->imp_lock); - - /* All imports are pingable */ - imp->imp_pingable = 1; - imp->imp_force_reconnect = 0; - imp->imp_force_verify = 0; - - imp->imp_connect_data = *ocd; - - CDEBUG(D_HA, "%s: connect to target with instance %u\n", - imp->imp_obd->obd_name, ocd->ocd_instance); - exp = class_conn2export(&imp->imp_dlm_handle); - - spin_unlock(&imp->imp_lock); - - if (!exp) { - /* This could happen if export is cleaned during the - * connect attempt - */ - CERROR("%s: missing export after connect\n", - imp->imp_obd->obd_name); - rc = -ENODEV; - goto out; - } - - /* check that server granted subset of flags we asked for. */ - if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) != - ocd->ocd_connect_flags) { - CERROR("%s: Server didn't grant the asked for subset of flags: asked=%#llx granted=%#llx\n", - imp->imp_obd->obd_name, imp->imp_connect_flags_orig, - ocd->ocd_connect_flags); - rc = -EPROTO; - goto out; - } - - old_connect_flags = exp_connect_flags(exp); - exp->exp_connect_data = *ocd; - imp->imp_obd->obd_self_export->exp_connect_data = *ocd; - - /* - * The net statistics after (re-)connect is not valid anymore, - * because may reflect other routing, etc. - */ - at_init(&imp->imp_at.iat_net_latency, 0, 0); - ptlrpc_at_adj_net_latency(request, - lustre_msg_get_service_time(request->rq_repmsg)); - - /* Import flags should be updated before waking import at FULL state */ - rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp, - aa->pcaa_initial_connect); - class_export_put(exp); - if (rc) - goto out; - - obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD); - - if (aa->pcaa_initial_connect) { - spin_lock(&imp->imp_lock); - if (msg_flags & MSG_CONNECT_REPLAYABLE) { - imp->imp_replayable = 1; - spin_unlock(&imp->imp_lock); - CDEBUG(D_HA, "connected to replayable target: %s\n", - obd2cli_tgt(imp->imp_obd)); - } else { - imp->imp_replayable = 0; - spin_unlock(&imp->imp_lock); - } - - /* if applies, adjust the imp->imp_msg_magic here - * according to reply flags - */ - - imp->imp_remote_handle = - *lustre_msg_get_handle(request->rq_repmsg); - - /* Initial connects are allowed for clients with non-random - * uuids when servers are in recovery. Simply signal the - * servers replay is complete and wait in REPLAY_WAIT. - */ - if (msg_flags & MSG_CONNECT_RECOVERING) { - CDEBUG(D_HA, "connect to %s during recovery\n", - obd2cli_tgt(imp->imp_obd)); - IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); - } else { - IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); - ptlrpc_activate_import(imp); - } - - rc = 0; - goto finish; - } - - /* Determine what recovery state to move the import to. */ - if (msg_flags & MSG_CONNECT_RECONNECT) { - memset(&old_hdl, 0, sizeof(old_hdl)); - if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg), - sizeof(old_hdl))) { - LCONSOLE_WARN("Reconnect to %s (at @%s) failed due bad handle %#llx\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_dlm_handle.cookie); - rc = -ENOTCONN; - goto out; - } - - if (memcmp(&imp->imp_remote_handle, - lustre_msg_get_handle(request->rq_repmsg), - sizeof(imp->imp_remote_handle))) { - int level = msg_flags & MSG_CONNECT_RECOVERING ? - D_HA : D_WARNING; - - /* Bug 16611/14775: if server handle have changed, - * that means some sort of disconnection happened. - * If the server is not in recovery, that also means it - * already erased all of our state because of previous - * eviction. If it is in recovery - we are safe to - * participate since we can reestablish all of our state - * with server again - */ - if ((msg_flags & MSG_CONNECT_RECOVERING)) { - CDEBUG(level, "%s@%s changed server handle from %#llx to %#llx but is still in recovery\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_remote_handle.cookie, - lustre_msg_get_handle( - request->rq_repmsg)->cookie); - } else { - LCONSOLE_WARN("Evicted from %s (at %s) after server handle changed from %#llx to %#llx\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection-> \ - c_remote_uuid.uuid, - imp->imp_remote_handle.cookie, - lustre_msg_get_handle( - request->rq_repmsg)->cookie); - } - - imp->imp_remote_handle = - *lustre_msg_get_handle(request->rq_repmsg); - - if (!(msg_flags & MSG_CONNECT_RECOVERING)) { - IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); - rc = 0; - goto finish; - } - - } else { - CDEBUG(D_HA, "reconnected to %s@%s after partition\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - } - - if (imp->imp_invalid) { - CDEBUG(D_HA, "%s: reconnected but import is invalid; marking evicted\n", - imp->imp_obd->obd_name); - IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); - } else if (msg_flags & MSG_CONNECT_RECOVERING) { - CDEBUG(D_HA, "%s: reconnected to %s during replay\n", - imp->imp_obd->obd_name, - obd2cli_tgt(imp->imp_obd)); - - spin_lock(&imp->imp_lock); - imp->imp_resend_replay = 1; - spin_unlock(&imp->imp_lock); - - IMPORT_SET_STATE(imp, imp->imp_replay_state); - } else { - IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); - } - } else if ((msg_flags & MSG_CONNECT_RECOVERING) && !imp->imp_invalid) { - LASSERT(imp->imp_replayable); - imp->imp_remote_handle = - *lustre_msg_get_handle(request->rq_repmsg); - imp->imp_last_replay_transno = 0; - imp->imp_replay_cursor = &imp->imp_committed_list; - IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY); - } else { - DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags not set: %x)", - imp->imp_obd->obd_name, msg_flags); - imp->imp_remote_handle = - *lustre_msg_get_handle(request->rq_repmsg); - IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED); - } - - /* Sanity checks for a reconnected import. */ - if (!(imp->imp_replayable) != !(msg_flags & MSG_CONNECT_REPLAYABLE)) - CERROR("imp_replayable flag does not match server after reconnect. We should LBUG right here.\n"); - - if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 && - lustre_msg_get_last_committed(request->rq_repmsg) < - aa->pcaa_peer_committed) - CERROR("%s went back in time (transno %lld was previously committed, server now claims %lld)! See https://bugzilla.lustre.org/show_bug.cgi?id=9646\n", - obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed, - lustre_msg_get_last_committed(request->rq_repmsg)); - -finish: - ptlrpc_prepare_replay(imp); - rc = ptlrpc_import_recovery_state_machine(imp); - if (rc == -ENOTCONN) { - CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - ptlrpc_connect_import(imp); - spin_lock(&imp->imp_lock); - imp->imp_connected = 0; - imp->imp_connect_tried = 1; - spin_unlock(&imp->imp_lock); - return 0; - } - -out: - spin_lock(&imp->imp_lock); - imp->imp_connected = 0; - imp->imp_connect_tried = 1; - spin_unlock(&imp->imp_lock); - - if (rc != 0) { - IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); - if (rc == -EACCES) { - /* - * Give up trying to reconnect - * EACCES means client has no permission for connection - */ - imp->imp_obd->obd_no_recov = 1; - ptlrpc_deactivate_import(imp); - } - - if (rc == -EPROTO) { - struct obd_connect_data *ocd; - - /* reply message might not be ready */ - if (!request->rq_repmsg) - return -EPROTO; - - ocd = req_capsule_server_get(&request->rq_pill, - &RMF_CONNECT_DATA); - if (ocd && - (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && - (ocd->ocd_version != LUSTRE_VERSION_CODE)) { - /* - * Actually servers are only supposed to refuse - * connection from liblustre clients, so we - * should never see this from VFS context - */ - LCONSOLE_ERROR_MSG(0x16a, "Server %s version (%d.%d.%d.%d) refused connection from this client with an incompatible version (%s). Client must be recompiled\n", - obd2cli_tgt(imp->imp_obd), - OBD_OCD_VERSION_MAJOR(ocd->ocd_version), - OBD_OCD_VERSION_MINOR(ocd->ocd_version), - OBD_OCD_VERSION_PATCH(ocd->ocd_version), - OBD_OCD_VERSION_FIX(ocd->ocd_version), - LUSTRE_VERSION_STRING); - ptlrpc_deactivate_import(imp); - IMPORT_SET_STATE(imp, LUSTRE_IMP_CLOSED); - } - return -EPROTO; - } - - ptlrpc_maybe_ping_import_soon(imp); - - CDEBUG(D_HA, "recovery of %s on %s failed (%d)\n", - obd2cli_tgt(imp->imp_obd), - (char *)imp->imp_connection->c_remote_uuid.uuid, rc); - } - - wake_up_all(&imp->imp_recovery_waitq); - return rc; -} - -/** - * interpret callback for "completed replay" RPCs. - * \see signal_completed_replay - */ -static int completed_replay_interpret(const struct lu_env *env, - struct ptlrpc_request *req, - void *data, int rc) -{ - atomic_dec(&req->rq_import->imp_replay_inflight); - if (req->rq_status == 0 && - !req->rq_import->imp_vbr_failed) { - ptlrpc_import_recovery_state_machine(req->rq_import); - } else { - if (req->rq_import->imp_vbr_failed) { - CDEBUG(D_WARNING, - "%s: version recovery fails, reconnecting\n", - req->rq_import->imp_obd->obd_name); - } else { - CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, reconnecting\n", - req->rq_import->imp_obd->obd_name, - req->rq_status); - } - ptlrpc_connect_import(req->rq_import); - } - - return 0; -} - -/** - * Let server know that we have no requests to replay anymore. - * Achieved by just sending a PING request - */ -static int signal_completed_replay(struct obd_import *imp) -{ - struct ptlrpc_request *req; - - if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_FINISH_REPLAY))) - return 0; - - LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); - atomic_inc(&imp->imp_replay_inflight); - - req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, LUSTRE_OBD_VERSION, - OBD_PING); - if (!req) { - atomic_dec(&imp->imp_replay_inflight); - return -ENOMEM; - } - - ptlrpc_request_set_replen(req); - req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT; - lustre_msg_add_flags(req->rq_reqmsg, - MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE); - if (AT_OFF) - req->rq_timeout *= 3; - req->rq_interpret_reply = completed_replay_interpret; - - ptlrpcd_add_req(req); - return 0; -} - -/** - * In kernel code all import invalidation happens in its own - * separate thread, so that whatever application happened to encounter - * a problem could still be killed or otherwise continue - */ -static int ptlrpc_invalidate_import_thread(void *data) -{ - struct obd_import *imp = data; - - unshare_fs_struct(); - - CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n", - imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - - ptlrpc_invalidate_import(imp); - - if (obd_dump_on_eviction) { - CERROR("dump the log upon eviction\n"); - libcfs_debug_dumplog(); - } - - IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); - ptlrpc_import_recovery_state_machine(imp); - - class_import_put(imp); - return 0; -} - -/** - * This is the state machine for client-side recovery on import. - * - * Typically we have two possibly paths. If we came to server and it is not - * in recovery, we just enter IMP_EVICTED state, invalidate our import - * state and reconnect from scratch. - * If we came to server that is in recovery, we enter IMP_REPLAY import state. - * We go through our list of requests to replay and send them to server one by - * one. - * After sending all request from the list we change import state to - * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server - * and also all the locks we don't yet have and wait for server to grant us. - * After that we send a special "replay completed" request and change import - * state to IMP_REPLAY_WAIT. - * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER - * state and resend all requests from sending list. - * After that we promote import to FULL state and send all delayed requests - * and import is fully operational after that. - * - */ -int ptlrpc_import_recovery_state_machine(struct obd_import *imp) -{ - int rc = 0; - int inflight; - char *target_start; - int target_len; - - if (imp->imp_state == LUSTRE_IMP_EVICTED) { - deuuidify(obd2cli_tgt(imp->imp_obd), NULL, - &target_start, &target_len); - /* Don't care about MGC eviction */ - if (strcmp(imp->imp_obd->obd_type->typ_name, - LUSTRE_MGC_NAME) != 0) { - LCONSOLE_ERROR_MSG(0x167, "%s: This client was evicted by %.*s; in progress operations using this service will fail.\n", - imp->imp_obd->obd_name, target_len, - target_start); - } - CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - /* reset vbr_failed flag upon eviction */ - spin_lock(&imp->imp_lock); - imp->imp_vbr_failed = 0; - spin_unlock(&imp->imp_lock); - - { - struct task_struct *task; - /* bug 17802: XXX client_disconnect_export vs connect request - * race. if client is evicted at this time, we start - * invalidate thread without reference to import and import can - * be freed at same time. - */ - class_import_get(imp); - task = kthread_run(ptlrpc_invalidate_import_thread, imp, - "ll_imp_inval"); - if (IS_ERR(task)) { - class_import_put(imp); - CERROR("error starting invalidate thread: %d\n", rc); - rc = PTR_ERR(task); - } else { - rc = 0; - } - return rc; - } - } - - if (imp->imp_state == LUSTRE_IMP_REPLAY) { - CDEBUG(D_HA, "replay requested by %s\n", - obd2cli_tgt(imp->imp_obd)); - rc = ptlrpc_replay_next(imp, &inflight); - if (inflight == 0 && - atomic_read(&imp->imp_replay_inflight) == 0) { - IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); - rc = ldlm_replay_locks(imp); - if (rc) - goto out; - } - rc = 0; - } - - if (imp->imp_state == LUSTRE_IMP_REPLAY_LOCKS) - if (atomic_read(&imp->imp_replay_inflight) == 0) { - IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_WAIT); - rc = signal_completed_replay(imp); - if (rc) - goto out; - } - - if (imp->imp_state == LUSTRE_IMP_REPLAY_WAIT) - if (atomic_read(&imp->imp_replay_inflight) == 0) - IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); - - if (imp->imp_state == LUSTRE_IMP_RECOVER) { - CDEBUG(D_HA, "reconnected to %s@%s\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - - rc = ptlrpc_resend(imp); - if (rc) - goto out; - IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); - ptlrpc_activate_import(imp); - - deuuidify(obd2cli_tgt(imp->imp_obd), NULL, - &target_start, &target_len); - LCONSOLE_INFO("%s: Connection restored to %.*s (at %s)\n", - imp->imp_obd->obd_name, - target_len, target_start, - libcfs_nid2str(imp->imp_connection->c_peer.nid)); - } - - if (imp->imp_state == LUSTRE_IMP_FULL) { - wake_up_all(&imp->imp_recovery_waitq); - ptlrpc_wake_delayed(imp); - } - -out: - return rc; -} - -int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) -{ - struct ptlrpc_request *req; - int rq_opc, rc = 0; - - if (imp->imp_obd->obd_force) - goto set_state; - - switch (imp->imp_connect_op) { - case OST_CONNECT: - rq_opc = OST_DISCONNECT; - break; - case MDS_CONNECT: - rq_opc = MDS_DISCONNECT; - break; - case MGS_CONNECT: - rq_opc = MGS_DISCONNECT; - break; - default: - rc = -EINVAL; - CERROR("%s: don't know how to disconnect from %s (connect_op %d): rc = %d\n", - imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), - imp->imp_connect_op, rc); - return rc; - } - - if (ptlrpc_import_in_recovery(imp)) { - unsigned long timeout; - - if (AT_OFF) { - if (imp->imp_server_timeout) - timeout = obd_timeout * HZ / 2; - else - timeout = obd_timeout * HZ; - } else { - int idx = import_at_get_index(imp, - imp->imp_client->cli_request_portal); - timeout = at_get(&imp->imp_at.iat_service_estimate[idx]) * HZ; - } - - if (wait_event_idle_timeout(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), - max(timeout, 1UL)) == 0) - l_wait_event_abortable( - imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp)); - } - - spin_lock(&imp->imp_lock); - if (imp->imp_state != LUSTRE_IMP_FULL) - goto out; - spin_unlock(&imp->imp_lock); - - req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_DISCONNECT, - LUSTRE_OBD_VERSION, rq_opc); - if (req) { - /* We are disconnecting, do not retry a failed DISCONNECT rpc if - * it fails. We can get through the above with a down server - * if the client doesn't know the server is gone yet. - */ - req->rq_no_resend = 1; - - /* We want client umounts to happen quickly, no matter the - * server state... - */ - req->rq_timeout = min_t(int, req->rq_timeout, - INITIAL_CONNECT_TIMEOUT); - - IMPORT_SET_STATE(imp, LUSTRE_IMP_CONNECTING); - req->rq_send_state = LUSTRE_IMP_CONNECTING; - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); - } - -set_state: - spin_lock(&imp->imp_lock); -out: - if (noclose) - IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); - else - IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED); - memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); - spin_unlock(&imp->imp_lock); - - if (rc == -ETIMEDOUT || rc == -ENOTCONN || rc == -ESHUTDOWN) - rc = 0; - - return rc; -} -EXPORT_SYMBOL(ptlrpc_disconnect_import); - -/* Adaptive Timeout utils */ -extern unsigned int at_min, at_max, at_history; - -/* - *Update at_current with the specified value (bounded by at_min and at_max), - * as well as the AT history "bins". - * - Bin into timeslices using AT_BINS bins. - * - This gives us a max of the last at_history seconds without the storage, - * but still smoothing out a return to normalcy from a slow response. - * - (E.g. remember the maximum latency in each minute of the last 4 minutes.) - */ -int at_measured(struct adaptive_timeout *at, unsigned int val) -{ - unsigned int old = at->at_current; - time64_t now = ktime_get_real_seconds(); - long binlimit = max_t(long, at_history / AT_BINS, 1); - - LASSERT(at); - CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n", - val, at, (long)(now - at->at_binstart), at->at_current, - at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]); - - if (val == 0) - /* 0's don't count, because we never want our timeout to - * drop to 0, and because 0 could mean an error - */ - return 0; - - spin_lock(&at->at_lock); - - if (unlikely(at->at_binstart == 0)) { - /* Special case to remove default from history */ - at->at_current = val; - at->at_worst_ever = val; - at->at_worst_time = now; - at->at_hist[0] = val; - at->at_binstart = now; - } else if (now - at->at_binstart < binlimit) { - /* in bin 0 */ - at->at_hist[0] = max(val, at->at_hist[0]); - at->at_current = max(val, at->at_current); - } else { - int i, shift; - unsigned int maxv = val; - /* move bins over */ - shift = (u32)(now - at->at_binstart) / binlimit; - LASSERT(shift > 0); - for (i = AT_BINS - 1; i >= 0; i--) { - if (i >= shift) { - at->at_hist[i] = at->at_hist[i - shift]; - maxv = max(maxv, at->at_hist[i]); - } else { - at->at_hist[i] = 0; - } - } - at->at_hist[0] = val; - at->at_current = maxv; - at->at_binstart += shift * binlimit; - } - - if (at->at_current > at->at_worst_ever) { - at->at_worst_ever = at->at_current; - at->at_worst_time = now; - } - - if (at->at_flags & AT_FLG_NOHIST) - /* Only keep last reported val; keeping the rest of the history - * for debugfs only - */ - at->at_current = val; - - if (at_max > 0) - at->at_current = min(at->at_current, at_max); - at->at_current = max(at->at_current, at_min); - - if (at->at_current != old) - CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d (val=%u) hist %u %u %u %u\n", - at, - old, at->at_current, at->at_current - old, val, - at->at_hist[0], at->at_hist[1], at->at_hist[2], - at->at_hist[3]); - - /* if we changed, report the old value */ - old = (at->at_current != old) ? old : 0; - - spin_unlock(&at->at_lock); - return old; -} - -/* Find the imp_at index for a given portal; assign if space available */ -int import_at_get_index(struct obd_import *imp, int portal) -{ - struct imp_at *at = &imp->imp_at; - int i; - - for (i = 0; i < IMP_AT_MAX_PORTALS; i++) { - if (at->iat_portal[i] == portal) - return i; - if (at->iat_portal[i] == 0) - /* unused */ - break; - } - - /* Not found in list, add it under a lock */ - spin_lock(&imp->imp_lock); - - /* Check unused under lock */ - for (; i < IMP_AT_MAX_PORTALS; i++) { - if (at->iat_portal[i] == portal) - goto out; - if (at->iat_portal[i] == 0) - /* unused */ - break; - } - - /* Not enough portals? */ - LASSERT(i < IMP_AT_MAX_PORTALS); - - at->iat_portal[i] = portal; -out: - spin_unlock(&imp->imp_lock); - return i; -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c deleted file mode 100644 index 417d4a151433..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/layout.c +++ /dev/null @@ -1,2232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/layout.c - * - * Lustre Metadata Target (mdt) request handler - * - * Author: Nikita Danilov - */ -/* - * This file contains the "capsule/pill" abstraction layered above PTLRPC. - * - * Every struct ptlrpc_request contains a "pill", which points to a description - * of the format that the request conforms to. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include - -#include - -#include -#include -#include -#include -#include -#include - -/* struct ptlrpc_request, lustre_msg* */ -#include -#include - -/* - * RQFs (see below) refer to two struct req_msg_field arrays describing the - * client request and server reply, respectively. - */ -/* empty set of fields... for suitable definition of emptiness. */ -static const struct req_msg_field *empty[] = { - &RMF_PTLRPC_BODY -}; - -static const struct req_msg_field *mgs_target_info_only[] = { - &RMF_PTLRPC_BODY, - &RMF_MGS_TARGET_INFO -}; - -static const struct req_msg_field *mgs_set_info[] = { - &RMF_PTLRPC_BODY, - &RMF_MGS_SEND_PARAM -}; - -static const struct req_msg_field *mgs_config_read_client[] = { - &RMF_PTLRPC_BODY, - &RMF_MGS_CONFIG_BODY -}; - -static const struct req_msg_field *mgs_config_read_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MGS_CONFIG_RES -}; - -static const struct req_msg_field *log_cancel_client[] = { - &RMF_PTLRPC_BODY, - &RMF_LOGCOOKIES -}; - -static const struct req_msg_field *mdt_body_only[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY -}; - -static const struct req_msg_field *mdt_body_capa[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_CAPA1 -}; - -static const struct req_msg_field *quotactl_only[] = { - &RMF_PTLRPC_BODY, - &RMF_OBD_QUOTACTL -}; - -static const struct req_msg_field *mdt_close_client[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_EPOCH, - &RMF_REC_REINT, - &RMF_CAPA1 -}; - -static const struct req_msg_field *mdt_intent_close_client[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_EPOCH, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_CLOSE_DATA -}; - -static const struct req_msg_field *obd_statfs_server[] = { - &RMF_PTLRPC_BODY, - &RMF_OBD_STATFS -}; - -static const struct req_msg_field *seq_query_client[] = { - &RMF_PTLRPC_BODY, - &RMF_SEQ_OPC, - &RMF_SEQ_RANGE -}; - -static const struct req_msg_field *seq_query_server[] = { - &RMF_PTLRPC_BODY, - &RMF_SEQ_RANGE -}; - -static const struct req_msg_field *fld_query_client[] = { - &RMF_PTLRPC_BODY, - &RMF_FLD_OPC, - &RMF_FLD_MDFLD -}; - -static const struct req_msg_field *fld_query_server[] = { - &RMF_PTLRPC_BODY, - &RMF_FLD_MDFLD -}; - -static const struct req_msg_field *fld_read_client[] = { - &RMF_PTLRPC_BODY, - &RMF_FLD_MDFLD -}; - -static const struct req_msg_field *fld_read_server[] = { - &RMF_PTLRPC_BODY, - &RMF_GENERIC_DATA -}; - -static const struct req_msg_field *mds_getattr_name_client[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_CAPA1, - &RMF_NAME -}; - -static const struct req_msg_field *mds_reint_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT -}; - -static const struct req_msg_field *mds_reint_create_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME -}; - -static const struct req_msg_field *mds_reint_create_slave_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME, - &RMF_EADATA, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_create_acl_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME, - &RMF_EADATA, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_create_sym_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME, - &RMF_SYMTGT, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_open_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_NAME, - &RMF_EADATA -}; - -static const struct req_msg_field *mds_reint_open_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, - &RMF_CAPA1, - &RMF_CAPA2 -}; - -static const struct req_msg_field *mds_reint_unlink_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_link_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_NAME, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_rename_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_NAME, - &RMF_SYMTGT, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_migrate_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_NAME, - &RMF_SYMTGT, - &RMF_DLM_REQ, - &RMF_MDT_EPOCH, - &RMF_CLOSE_DATA -}; - -static const struct req_msg_field *mds_last_unlink_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_LOGCOOKIES, - &RMF_CAPA1, - &RMF_CAPA2 -}; - -static const struct req_msg_field *mds_reint_setattr_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_MDT_EPOCH, - &RMF_EADATA, - &RMF_LOGCOOKIES, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mds_reint_setxattr_client[] = { - &RMF_PTLRPC_BODY, - &RMF_REC_REINT, - &RMF_CAPA1, - &RMF_NAME, - &RMF_EADATA, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *mdt_swap_layouts[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_SWAP_LAYOUTS, - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *obd_connect_client[] = { - &RMF_PTLRPC_BODY, - &RMF_TGTUUID, - &RMF_CLUUID, - &RMF_CONN, - &RMF_CONNECT_DATA -}; - -static const struct req_msg_field *obd_connect_server[] = { - &RMF_PTLRPC_BODY, - &RMF_CONNECT_DATA -}; - -static const struct req_msg_field *obd_set_info_client[] = { - &RMF_PTLRPC_BODY, - &RMF_SETINFO_KEY, - &RMF_SETINFO_VAL -}; - -static const struct req_msg_field *ost_grant_shrink_client[] = { - &RMF_PTLRPC_BODY, - &RMF_SETINFO_KEY, - &RMF_OST_BODY -}; - -static const struct req_msg_field *mds_getinfo_client[] = { - &RMF_PTLRPC_BODY, - &RMF_GETINFO_KEY, - &RMF_GETINFO_VALLEN -}; - -static const struct req_msg_field *mds_getinfo_server[] = { - &RMF_PTLRPC_BODY, - &RMF_GETINFO_VAL, -}; - -static const struct req_msg_field *ldlm_enqueue_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ -}; - -static const struct req_msg_field *ldlm_enqueue_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP -}; - -static const struct req_msg_field *ldlm_enqueue_lvb_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP, - &RMF_DLM_LVB -}; - -static const struct req_msg_field *ldlm_cp_callback_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_DLM_LVB -}; - -static const struct req_msg_field *ldlm_gl_callback_desc_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_DLM_GL_DESC -}; - -static const struct req_msg_field *ldlm_gl_callback_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_LVB -}; - -static const struct req_msg_field *ldlm_intent_basic_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, -}; - -static const struct req_msg_field *ldlm_intent_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_REC_REINT -}; - -static const struct req_msg_field *ldlm_intent_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL -}; - -static const struct req_msg_field *ldlm_intent_layout_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_LAYOUT_INTENT, - &RMF_EADATA /* for new layout to be set up */ -}; - -static const struct req_msg_field *ldlm_intent_open_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, - &RMF_CAPA1, - &RMF_CAPA2 -}; - -static const struct req_msg_field *ldlm_intent_getattr_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_MDT_BODY, /* coincides with mds_getattr_name_client[] */ - &RMF_CAPA1, - &RMF_NAME -}; - -static const struct req_msg_field *ldlm_intent_getattr_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, - &RMF_CAPA1 -}; - -static const struct req_msg_field *ldlm_intent_create_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_REC_REINT, /* coincides with mds_reint_create_client[] */ - &RMF_CAPA1, - &RMF_NAME, - &RMF_EADATA -}; - -static const struct req_msg_field *ldlm_intent_open_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_REC_REINT, /* coincides with mds_reint_open_client[] */ - &RMF_CAPA1, - &RMF_CAPA2, - &RMF_NAME, - &RMF_EADATA -}; - -static const struct req_msg_field *ldlm_intent_unlink_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_REC_REINT, /* coincides with mds_reint_unlink_client[] */ - &RMF_CAPA1, - &RMF_NAME -}; - -static const struct req_msg_field *ldlm_intent_getxattr_client[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REQ, - &RMF_LDLM_INTENT, - &RMF_MDT_BODY, - &RMF_CAPA1, -}; - -static const struct req_msg_field *ldlm_intent_getxattr_server[] = { - &RMF_PTLRPC_BODY, - &RMF_DLM_REP, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, /* for req_capsule_extend/mdt_intent_policy */ - &RMF_EADATA, - &RMF_EAVALS, - &RMF_EAVALS_LENS -}; - -static const struct req_msg_field *mds_getxattr_client[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_CAPA1, - &RMF_NAME, - &RMF_EADATA -}; - -static const struct req_msg_field *mds_getxattr_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_EADATA -}; - -static const struct req_msg_field *mds_getattr_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, - &RMF_CAPA1, - &RMF_CAPA2 -}; - -static const struct req_msg_field *mds_setattr_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDT_MD, - &RMF_ACL, - &RMF_CAPA1, - &RMF_CAPA2 -}; - -static const struct req_msg_field *llog_origin_handle_create_client[] = { - &RMF_PTLRPC_BODY, - &RMF_LLOGD_BODY, - &RMF_NAME -}; - -static const struct req_msg_field *llogd_body_only[] = { - &RMF_PTLRPC_BODY, - &RMF_LLOGD_BODY -}; - -static const struct req_msg_field *llog_log_hdr_only[] = { - &RMF_PTLRPC_BODY, - &RMF_LLOG_LOG_HDR -}; - -static const struct req_msg_field *llogd_conn_body_only[] = { - &RMF_PTLRPC_BODY, - &RMF_LLOGD_CONN_BODY -}; - -static const struct req_msg_field *llog_origin_handle_next_block_server[] = { - &RMF_PTLRPC_BODY, - &RMF_LLOGD_BODY, - &RMF_EADATA -}; - -static const struct req_msg_field *ost_body_only[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY -}; - -static const struct req_msg_field *ost_body_capa[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY, - &RMF_CAPA1 -}; - -static const struct req_msg_field *ost_destroy_client[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY, - &RMF_DLM_REQ, - &RMF_CAPA1 -}; - -static const struct req_msg_field *ost_brw_client[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY, - &RMF_OBD_IOOBJ, - &RMF_NIOBUF_REMOTE, - &RMF_CAPA1 -}; - -static const struct req_msg_field *ost_brw_read_server[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY -}; - -static const struct req_msg_field *ost_brw_write_server[] = { - &RMF_PTLRPC_BODY, - &RMF_OST_BODY, - &RMF_RCS -}; - -static const struct req_msg_field *ost_get_info_generic_server[] = { - &RMF_PTLRPC_BODY, - &RMF_GENERIC_DATA, -}; - -static const struct req_msg_field *ost_get_info_generic_client[] = { - &RMF_PTLRPC_BODY, - &RMF_GETINFO_KEY -}; - -static const struct req_msg_field *ost_get_last_id_server[] = { - &RMF_PTLRPC_BODY, - &RMF_OBD_ID -}; - -static const struct req_msg_field *ost_get_last_fid_client[] = { - &RMF_PTLRPC_BODY, - &RMF_GETINFO_KEY, - &RMF_FID, -}; - -static const struct req_msg_field *ost_get_last_fid_server[] = { - &RMF_PTLRPC_BODY, - &RMF_FID, -}; - -static const struct req_msg_field *ost_get_fiemap_client[] = { - &RMF_PTLRPC_BODY, - &RMF_FIEMAP_KEY, - &RMF_FIEMAP_VAL -}; - -static const struct req_msg_field *ost_get_fiemap_server[] = { - &RMF_PTLRPC_BODY, - &RMF_FIEMAP_VAL -}; - -static const struct req_msg_field *mdt_hsm_progress[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDS_HSM_PROGRESS, -}; - -static const struct req_msg_field *mdt_hsm_ct_register[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDS_HSM_ARCHIVE, -}; - -static const struct req_msg_field *mdt_hsm_ct_unregister[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, -}; - -static const struct req_msg_field *mdt_hsm_action_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDS_HSM_CURRENT_ACTION, -}; - -static const struct req_msg_field *mdt_hsm_state_get_server[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_HSM_USER_STATE, -}; - -static const struct req_msg_field *mdt_hsm_state_set[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_CAPA1, - &RMF_HSM_STATE_SET, -}; - -static const struct req_msg_field *mdt_hsm_request[] = { - &RMF_PTLRPC_BODY, - &RMF_MDT_BODY, - &RMF_MDS_HSM_REQUEST, - &RMF_MDS_HSM_USER_ITEM, - &RMF_GENERIC_DATA, -}; - -static struct req_format *req_formats[] = { - &RQF_OBD_PING, - &RQF_OBD_SET_INFO, - &RQF_SEC_CTX, - &RQF_MGS_TARGET_REG, - &RQF_MGS_SET_INFO, - &RQF_MGS_CONFIG_READ, - &RQF_SEQ_QUERY, - &RQF_FLD_QUERY, - &RQF_FLD_READ, - &RQF_MDS_CONNECT, - &RQF_MDS_DISCONNECT, - &RQF_MDS_GET_INFO, - &RQF_MDS_GETSTATUS, - &RQF_MDS_STATFS, - &RQF_MDS_GETATTR, - &RQF_MDS_GETATTR_NAME, - &RQF_MDS_GETXATTR, - &RQF_MDS_SYNC, - &RQF_MDS_CLOSE, - &RQF_MDS_INTENT_CLOSE, - &RQF_MDS_READPAGE, - &RQF_MDS_WRITEPAGE, - &RQF_MDS_REINT, - &RQF_MDS_REINT_CREATE, - &RQF_MDS_REINT_CREATE_ACL, - &RQF_MDS_REINT_CREATE_SLAVE, - &RQF_MDS_REINT_CREATE_SYM, - &RQF_MDS_REINT_OPEN, - &RQF_MDS_REINT_UNLINK, - &RQF_MDS_REINT_LINK, - &RQF_MDS_REINT_RENAME, - &RQF_MDS_REINT_MIGRATE, - &RQF_MDS_REINT_SETATTR, - &RQF_MDS_REINT_SETXATTR, - &RQF_MDS_QUOTACTL, - &RQF_MDS_HSM_PROGRESS, - &RQF_MDS_HSM_CT_REGISTER, - &RQF_MDS_HSM_CT_UNREGISTER, - &RQF_MDS_HSM_STATE_GET, - &RQF_MDS_HSM_STATE_SET, - &RQF_MDS_HSM_ACTION, - &RQF_MDS_HSM_REQUEST, - &RQF_MDS_SWAP_LAYOUTS, - &RQF_OST_CONNECT, - &RQF_OST_DISCONNECT, - &RQF_OST_QUOTACTL, - &RQF_OST_GETATTR, - &RQF_OST_SETATTR, - &RQF_OST_CREATE, - &RQF_OST_PUNCH, - &RQF_OST_SYNC, - &RQF_OST_DESTROY, - &RQF_OST_BRW_READ, - &RQF_OST_BRW_WRITE, - &RQF_OST_STATFS, - &RQF_OST_SET_GRANT_INFO, - &RQF_OST_GET_INFO, - &RQF_OST_GET_INFO_LAST_ID, - &RQF_OST_GET_INFO_LAST_FID, - &RQF_OST_SET_INFO_LAST_FID, - &RQF_OST_GET_INFO_FIEMAP, - &RQF_LDLM_ENQUEUE, - &RQF_LDLM_ENQUEUE_LVB, - &RQF_LDLM_CONVERT, - &RQF_LDLM_CANCEL, - &RQF_LDLM_CALLBACK, - &RQF_LDLM_CP_CALLBACK, - &RQF_LDLM_BL_CALLBACK, - &RQF_LDLM_GL_CALLBACK, - &RQF_LDLM_GL_DESC_CALLBACK, - &RQF_LDLM_INTENT, - &RQF_LDLM_INTENT_BASIC, - &RQF_LDLM_INTENT_LAYOUT, - &RQF_LDLM_INTENT_GETATTR, - &RQF_LDLM_INTENT_OPEN, - &RQF_LDLM_INTENT_CREATE, - &RQF_LDLM_INTENT_UNLINK, - &RQF_LDLM_INTENT_GETXATTR, - &RQF_LOG_CANCEL, - &RQF_LLOG_ORIGIN_HANDLE_CREATE, - &RQF_LLOG_ORIGIN_HANDLE_DESTROY, - &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, - &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, - &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, - &RQF_LLOG_ORIGIN_CONNECT, - &RQF_CONNECT, -}; - -struct req_msg_field { - const __u32 rmf_flags; - const char *rmf_name; - /** - * Field length. (-1) means "variable length". If the - * \a RMF_F_STRUCT_ARRAY flag is set the field is also variable-length, - * but the actual size must be a whole multiple of \a rmf_size. - */ - const int rmf_size; - void (*rmf_swabber)(void *); - void (*rmf_dumper)(void *); - int rmf_offset[ARRAY_SIZE(req_formats)][RCL_NR]; -}; - -enum rmf_flags { - /** - * The field is a string, must be NUL-terminated. - */ - RMF_F_STRING = BIT(0), - /** - * The field's buffer size need not match the declared \a rmf_size. - */ - RMF_F_NO_SIZE_CHECK = BIT(1), - /** - * The field's buffer size must be a whole multiple of the declared \a - * rmf_size and the \a rmf_swabber function must work on the declared \a - * rmf_size worth of bytes. - */ - RMF_F_STRUCT_ARRAY = BIT(2) -}; - -struct req_capsule; - -/* - * Request fields. - */ -#define DEFINE_MSGF(name, flags, size, swabber, dumper) { \ - .rmf_name = (name), \ - .rmf_flags = (flags), \ - .rmf_size = (size), \ - .rmf_swabber = (void (*)(void *))(swabber), \ - .rmf_dumper = (void (*)(void *))(dumper) \ -} - -struct req_msg_field RMF_GENERIC_DATA = - DEFINE_MSGF("generic_data", 0, - -1, NULL, NULL); -EXPORT_SYMBOL(RMF_GENERIC_DATA); - -struct req_msg_field RMF_MGS_TARGET_INFO = - DEFINE_MSGF("mgs_target_info", 0, - sizeof(struct mgs_target_info), - lustre_swab_mgs_target_info, NULL); -EXPORT_SYMBOL(RMF_MGS_TARGET_INFO); - -struct req_msg_field RMF_MGS_SEND_PARAM = - DEFINE_MSGF("mgs_send_param", 0, - sizeof(struct mgs_send_param), - NULL, NULL); -EXPORT_SYMBOL(RMF_MGS_SEND_PARAM); - -struct req_msg_field RMF_MGS_CONFIG_BODY = - DEFINE_MSGF("mgs_config_read request", 0, - sizeof(struct mgs_config_body), - lustre_swab_mgs_config_body, NULL); -EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY); - -struct req_msg_field RMF_MGS_CONFIG_RES = - DEFINE_MSGF("mgs_config_read reply ", 0, - sizeof(struct mgs_config_res), - lustre_swab_mgs_config_res, NULL); -EXPORT_SYMBOL(RMF_MGS_CONFIG_RES); - -struct req_msg_field RMF_U32 = - DEFINE_MSGF("generic u32", 0, - sizeof(__u32), lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_U32); - -struct req_msg_field RMF_SETINFO_VAL = - DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_SETINFO_VAL); - -struct req_msg_field RMF_GETINFO_KEY = - DEFINE_MSGF("getinfo_key", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_GETINFO_KEY); - -struct req_msg_field RMF_GETINFO_VALLEN = - DEFINE_MSGF("getinfo_vallen", 0, - sizeof(__u32), lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_GETINFO_VALLEN); - -struct req_msg_field RMF_GETINFO_VAL = - DEFINE_MSGF("getinfo_val", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_GETINFO_VAL); - -struct req_msg_field RMF_SEQ_OPC = - DEFINE_MSGF("seq_query_opc", 0, - sizeof(__u32), lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_SEQ_OPC); - -struct req_msg_field RMF_SEQ_RANGE = - DEFINE_MSGF("seq_query_range", 0, - sizeof(struct lu_seq_range), - lustre_swab_lu_seq_range, NULL); -EXPORT_SYMBOL(RMF_SEQ_RANGE); - -struct req_msg_field RMF_FLD_OPC = - DEFINE_MSGF("fld_query_opc", 0, - sizeof(__u32), lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_FLD_OPC); - -struct req_msg_field RMF_FLD_MDFLD = - DEFINE_MSGF("fld_query_mdfld", 0, - sizeof(struct lu_seq_range), - lustre_swab_lu_seq_range, NULL); -EXPORT_SYMBOL(RMF_FLD_MDFLD); - -struct req_msg_field RMF_MDT_BODY = - DEFINE_MSGF("mdt_body", 0, - sizeof(struct mdt_body), lustre_swab_mdt_body, NULL); -EXPORT_SYMBOL(RMF_MDT_BODY); - -struct req_msg_field RMF_OBD_QUOTACTL = - DEFINE_MSGF("obd_quotactl", 0, - sizeof(struct obd_quotactl), - lustre_swab_obd_quotactl, NULL); -EXPORT_SYMBOL(RMF_OBD_QUOTACTL); - -struct req_msg_field RMF_MDT_EPOCH = - DEFINE_MSGF("mdt_ioepoch", 0, - sizeof(struct mdt_ioepoch), lustre_swab_mdt_ioepoch, NULL); -EXPORT_SYMBOL(RMF_MDT_EPOCH); - -struct req_msg_field RMF_PTLRPC_BODY = - DEFINE_MSGF("ptlrpc_body", 0, - sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body, NULL); -EXPORT_SYMBOL(RMF_PTLRPC_BODY); - -struct req_msg_field RMF_CLOSE_DATA = - DEFINE_MSGF("data_version", 0, - sizeof(struct close_data), lustre_swab_close_data, NULL); -EXPORT_SYMBOL(RMF_CLOSE_DATA); - -struct req_msg_field RMF_OBD_STATFS = - DEFINE_MSGF("obd_statfs", 0, - sizeof(struct obd_statfs), lustre_swab_obd_statfs, NULL); -EXPORT_SYMBOL(RMF_OBD_STATFS); - -struct req_msg_field RMF_SETINFO_KEY = - DEFINE_MSGF("setinfo_key", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_SETINFO_KEY); - -struct req_msg_field RMF_NAME = - DEFINE_MSGF("name", RMF_F_STRING, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_NAME); - -struct req_msg_field RMF_SYMTGT = - DEFINE_MSGF("symtgt", RMF_F_STRING, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_SYMTGT); - -struct req_msg_field RMF_TGTUUID = - DEFINE_MSGF("tgtuuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL, - NULL); -EXPORT_SYMBOL(RMF_TGTUUID); - -struct req_msg_field RMF_CLUUID = - DEFINE_MSGF("cluuid", RMF_F_STRING, sizeof(struct obd_uuid) - 1, NULL, - NULL); -EXPORT_SYMBOL(RMF_CLUUID); - -struct req_msg_field RMF_STRING = - DEFINE_MSGF("string", RMF_F_STRING, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_STRING); - -struct req_msg_field RMF_LLOGD_BODY = - DEFINE_MSGF("llogd_body", 0, - sizeof(struct llogd_body), lustre_swab_llogd_body, NULL); -EXPORT_SYMBOL(RMF_LLOGD_BODY); - -struct req_msg_field RMF_LLOG_LOG_HDR = - DEFINE_MSGF("llog_log_hdr", 0, - sizeof(struct llog_log_hdr), lustre_swab_llog_hdr, NULL); -EXPORT_SYMBOL(RMF_LLOG_LOG_HDR); - -struct req_msg_field RMF_LLOGD_CONN_BODY = - DEFINE_MSGF("llogd_conn_body", 0, - sizeof(struct llogd_conn_body), - lustre_swab_llogd_conn_body, NULL); -EXPORT_SYMBOL(RMF_LLOGD_CONN_BODY); - -/* - * connection handle received in MDS_CONNECT request. - * - * No swabbing needed because struct lustre_handle contains only a 64-bit cookie - * that the client does not interpret at all. - */ -struct req_msg_field RMF_CONN = - DEFINE_MSGF("conn", 0, sizeof(struct lustre_handle), NULL, NULL); -EXPORT_SYMBOL(RMF_CONN); - -struct req_msg_field RMF_CONNECT_DATA = - DEFINE_MSGF("cdata", - RMF_F_NO_SIZE_CHECK /* we allow extra space for interop */, - sizeof(struct obd_connect_data), - lustre_swab_connect, NULL); -EXPORT_SYMBOL(RMF_CONNECT_DATA); - -struct req_msg_field RMF_DLM_REQ = - DEFINE_MSGF("dlm_req", RMF_F_NO_SIZE_CHECK /* ldlm_request_bufsize */, - sizeof(struct ldlm_request), - lustre_swab_ldlm_request, NULL); -EXPORT_SYMBOL(RMF_DLM_REQ); - -struct req_msg_field RMF_DLM_REP = - DEFINE_MSGF("dlm_rep", 0, - sizeof(struct ldlm_reply), lustre_swab_ldlm_reply, NULL); -EXPORT_SYMBOL(RMF_DLM_REP); - -struct req_msg_field RMF_LDLM_INTENT = - DEFINE_MSGF("ldlm_intent", 0, - sizeof(struct ldlm_intent), lustre_swab_ldlm_intent, NULL); -EXPORT_SYMBOL(RMF_LDLM_INTENT); - -struct req_msg_field RMF_DLM_LVB = - DEFINE_MSGF("dlm_lvb", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_DLM_LVB); - -struct req_msg_field RMF_DLM_GL_DESC = - DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc), - lustre_swab_gl_desc, NULL); -EXPORT_SYMBOL(RMF_DLM_GL_DESC); - -struct req_msg_field RMF_MDT_MD = - DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL); -EXPORT_SYMBOL(RMF_MDT_MD); - -struct req_msg_field RMF_REC_REINT = - DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint), - lustre_swab_mdt_rec_reint, NULL); -EXPORT_SYMBOL(RMF_REC_REINT); - -/* FIXME: this length should be defined as a macro */ -struct req_msg_field RMF_EADATA = DEFINE_MSGF("eadata", 0, -1, - NULL, NULL); -EXPORT_SYMBOL(RMF_EADATA); - -struct req_msg_field RMF_EAVALS = DEFINE_MSGF("eavals", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_EAVALS); - -struct req_msg_field RMF_ACL = DEFINE_MSGF("acl", 0, -1, NULL, NULL); -EXPORT_SYMBOL(RMF_ACL); - -/* FIXME: this should be made to use RMF_F_STRUCT_ARRAY */ -struct req_msg_field RMF_LOGCOOKIES = - DEFINE_MSGF("logcookies", RMF_F_NO_SIZE_CHECK /* multiple cookies */, - sizeof(struct llog_cookie), NULL, NULL); -EXPORT_SYMBOL(RMF_LOGCOOKIES); - -struct req_msg_field RMF_CAPA1 = - DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), - lustre_swab_lustre_capa, NULL); -EXPORT_SYMBOL(RMF_CAPA1); - -struct req_msg_field RMF_CAPA2 = - DEFINE_MSGF("capa", 0, sizeof(struct lustre_capa), - lustre_swab_lustre_capa, NULL); -EXPORT_SYMBOL(RMF_CAPA2); - -struct req_msg_field RMF_LAYOUT_INTENT = - DEFINE_MSGF("layout_intent", 0, - sizeof(struct layout_intent), lustre_swab_layout_intent, - NULL); -EXPORT_SYMBOL(RMF_LAYOUT_INTENT); - -/* - * OST request field. - */ -struct req_msg_field RMF_OST_BODY = - DEFINE_MSGF("ost_body", 0, - sizeof(struct ost_body), lustre_swab_ost_body, dump_ost_body); -EXPORT_SYMBOL(RMF_OST_BODY); - -struct req_msg_field RMF_OBD_IOOBJ = - DEFINE_MSGF("obd_ioobj", RMF_F_STRUCT_ARRAY, - sizeof(struct obd_ioobj), lustre_swab_obd_ioobj, dump_ioo); -EXPORT_SYMBOL(RMF_OBD_IOOBJ); - -struct req_msg_field RMF_NIOBUF_REMOTE = - DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, - sizeof(struct niobuf_remote), lustre_swab_niobuf_remote, - dump_rniobuf); -EXPORT_SYMBOL(RMF_NIOBUF_REMOTE); - -struct req_msg_field RMF_RCS = - DEFINE_MSGF("niobuf_remote", RMF_F_STRUCT_ARRAY, sizeof(__u32), - lustre_swab_generic_32s, dump_rcs); -EXPORT_SYMBOL(RMF_RCS); - -struct req_msg_field RMF_EAVALS_LENS = - DEFINE_MSGF("eavals_lens", RMF_F_STRUCT_ARRAY, sizeof(__u32), - lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_EAVALS_LENS); - -struct req_msg_field RMF_OBD_ID = - DEFINE_MSGF("u64", 0, - sizeof(u64), lustre_swab_ost_last_id, NULL); -EXPORT_SYMBOL(RMF_OBD_ID); - -struct req_msg_field RMF_FID = - DEFINE_MSGF("fid", 0, - sizeof(struct lu_fid), lustre_swab_lu_fid, NULL); -EXPORT_SYMBOL(RMF_FID); - -struct req_msg_field RMF_OST_ID = - DEFINE_MSGF("ost_id", 0, - sizeof(struct ost_id), lustre_swab_ost_id, NULL); -EXPORT_SYMBOL(RMF_OST_ID); - -struct req_msg_field RMF_FIEMAP_KEY = - DEFINE_MSGF("fiemap", 0, sizeof(struct ll_fiemap_info_key), - lustre_swab_fiemap, NULL); -EXPORT_SYMBOL(RMF_FIEMAP_KEY); - -struct req_msg_field RMF_FIEMAP_VAL = - DEFINE_MSGF("fiemap", 0, -1, lustre_swab_fiemap, NULL); -EXPORT_SYMBOL(RMF_FIEMAP_VAL); - -struct req_msg_field RMF_HSM_USER_STATE = - DEFINE_MSGF("hsm_user_state", 0, sizeof(struct hsm_user_state), - lustre_swab_hsm_user_state, NULL); -EXPORT_SYMBOL(RMF_HSM_USER_STATE); - -struct req_msg_field RMF_HSM_STATE_SET = - DEFINE_MSGF("hsm_state_set", 0, sizeof(struct hsm_state_set), - lustre_swab_hsm_state_set, NULL); -EXPORT_SYMBOL(RMF_HSM_STATE_SET); - -struct req_msg_field RMF_MDS_HSM_PROGRESS = - DEFINE_MSGF("hsm_progress", 0, sizeof(struct hsm_progress_kernel), - lustre_swab_hsm_progress_kernel, NULL); -EXPORT_SYMBOL(RMF_MDS_HSM_PROGRESS); - -struct req_msg_field RMF_MDS_HSM_CURRENT_ACTION = - DEFINE_MSGF("hsm_current_action", 0, sizeof(struct hsm_current_action), - lustre_swab_hsm_current_action, NULL); -EXPORT_SYMBOL(RMF_MDS_HSM_CURRENT_ACTION); - -struct req_msg_field RMF_MDS_HSM_USER_ITEM = - DEFINE_MSGF("hsm_user_item", RMF_F_STRUCT_ARRAY, - sizeof(struct hsm_user_item), lustre_swab_hsm_user_item, - NULL); -EXPORT_SYMBOL(RMF_MDS_HSM_USER_ITEM); - -struct req_msg_field RMF_MDS_HSM_ARCHIVE = - DEFINE_MSGF("hsm_archive", 0, - sizeof(__u32), lustre_swab_generic_32s, NULL); -EXPORT_SYMBOL(RMF_MDS_HSM_ARCHIVE); - -struct req_msg_field RMF_MDS_HSM_REQUEST = - DEFINE_MSGF("hsm_request", 0, sizeof(struct hsm_request), - lustre_swab_hsm_request, NULL); -EXPORT_SYMBOL(RMF_MDS_HSM_REQUEST); - -struct req_msg_field RMF_SWAP_LAYOUTS = - DEFINE_MSGF("swap_layouts", 0, sizeof(struct mdc_swap_layouts), - lustre_swab_swap_layouts, NULL); -EXPORT_SYMBOL(RMF_SWAP_LAYOUTS); -/* - * Request formats. - */ - -struct req_format { - const char *rf_name; - size_t rf_idx; - struct { - size_t nr; - const struct req_msg_field **d; - } rf_fields[RCL_NR]; -}; - -#define DEFINE_REQ_FMT(name, client, client_nr, server, server_nr) { \ - .rf_name = name, \ - .rf_fields = { \ - [RCL_CLIENT] = { \ - .nr = client_nr, \ - .d = client \ - }, \ - [RCL_SERVER] = { \ - .nr = server_nr, \ - .d = server \ - } \ - } \ -} - -#define DEFINE_REQ_FMT0(name, client, server) \ -DEFINE_REQ_FMT(name, client, ARRAY_SIZE(client), server, ARRAY_SIZE(server)) - -struct req_format RQF_OBD_PING = - DEFINE_REQ_FMT0("OBD_PING", empty, empty); -EXPORT_SYMBOL(RQF_OBD_PING); - -struct req_format RQF_OBD_SET_INFO = - DEFINE_REQ_FMT0("OBD_SET_INFO", obd_set_info_client, empty); -EXPORT_SYMBOL(RQF_OBD_SET_INFO); - -struct req_format RQF_SEC_CTX = - DEFINE_REQ_FMT0("SEC_CTX", empty, empty); -EXPORT_SYMBOL(RQF_SEC_CTX); - -struct req_format RQF_MGS_TARGET_REG = - DEFINE_REQ_FMT0("MGS_TARGET_REG", mgs_target_info_only, - mgs_target_info_only); -EXPORT_SYMBOL(RQF_MGS_TARGET_REG); - -struct req_format RQF_MGS_SET_INFO = - DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info, - mgs_set_info); -EXPORT_SYMBOL(RQF_MGS_SET_INFO); - -struct req_format RQF_MGS_CONFIG_READ = - DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client, - mgs_config_read_server); -EXPORT_SYMBOL(RQF_MGS_CONFIG_READ); - -struct req_format RQF_SEQ_QUERY = - DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server); -EXPORT_SYMBOL(RQF_SEQ_QUERY); - -struct req_format RQF_FLD_QUERY = - DEFINE_REQ_FMT0("FLD_QUERY", fld_query_client, fld_query_server); -EXPORT_SYMBOL(RQF_FLD_QUERY); - -/* - * The 'fld_read_server' uses 'RMF_GENERIC_DATA' to hold the 'FLD_QUERY' - * RPC reply that is composed of 'struct lu_seq_range_array'. But there - * is not registered swabber function for 'RMF_GENERIC_DATA'. So the RPC - * peers need to handle the RPC reply with fixed little-endian format. - * - * In theory, we can define new structure with some swabber registered to - * handle the 'FLD_QUERY' RPC reply result automatically. But from the - * implementation view, it is not easy to be done within current "struct - * req_msg_field" framework. Because the sequence range array in the RPC - * reply is not fixed length, instead, its length depends on 'lu_seq_range' - * count, that is unknown when prepare the RPC buffer. Generally, for such - * flexible length RPC usage, there will be a field in the RPC layout to - * indicate the data length. But for the 'FLD_READ' RPC, we have no way to - * do that unless we add new length filed that will broken the on-wire RPC - * protocol and cause interoperability trouble with old peer. - */ -struct req_format RQF_FLD_READ = - DEFINE_REQ_FMT0("FLD_READ", fld_read_client, fld_read_server); -EXPORT_SYMBOL(RQF_FLD_READ); - -struct req_format RQF_LOG_CANCEL = - DEFINE_REQ_FMT0("OBD_LOG_CANCEL", log_cancel_client, empty); -EXPORT_SYMBOL(RQF_LOG_CANCEL); - -struct req_format RQF_MDS_QUOTACTL = - DEFINE_REQ_FMT0("MDS_QUOTACTL", quotactl_only, quotactl_only); -EXPORT_SYMBOL(RQF_MDS_QUOTACTL); - -struct req_format RQF_OST_QUOTACTL = - DEFINE_REQ_FMT0("OST_QUOTACTL", quotactl_only, quotactl_only); -EXPORT_SYMBOL(RQF_OST_QUOTACTL); - -struct req_format RQF_MDS_GETSTATUS = - DEFINE_REQ_FMT0("MDS_GETSTATUS", mdt_body_only, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_GETSTATUS); - -struct req_format RQF_MDS_STATFS = - DEFINE_REQ_FMT0("MDS_STATFS", empty, obd_statfs_server); -EXPORT_SYMBOL(RQF_MDS_STATFS); - -struct req_format RQF_MDS_SYNC = - DEFINE_REQ_FMT0("MDS_SYNC", mdt_body_capa, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_SYNC); - -struct req_format RQF_MDS_GETATTR = - DEFINE_REQ_FMT0("MDS_GETATTR", mdt_body_capa, mds_getattr_server); -EXPORT_SYMBOL(RQF_MDS_GETATTR); - -struct req_format RQF_MDS_GETXATTR = - DEFINE_REQ_FMT0("MDS_GETXATTR", - mds_getxattr_client, mds_getxattr_server); -EXPORT_SYMBOL(RQF_MDS_GETXATTR); - -struct req_format RQF_MDS_GETATTR_NAME = - DEFINE_REQ_FMT0("MDS_GETATTR_NAME", - mds_getattr_name_client, mds_getattr_server); -EXPORT_SYMBOL(RQF_MDS_GETATTR_NAME); - -struct req_format RQF_MDS_REINT = - DEFINE_REQ_FMT0("MDS_REINT", mds_reint_client, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_REINT); - -struct req_format RQF_MDS_REINT_CREATE = - DEFINE_REQ_FMT0("MDS_REINT_CREATE", - mds_reint_create_client, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_REINT_CREATE); - -struct req_format RQF_MDS_REINT_CREATE_ACL = - DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL", - mds_reint_create_acl_client, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL); - -struct req_format RQF_MDS_REINT_CREATE_SLAVE = - DEFINE_REQ_FMT0("MDS_REINT_CREATE_EA", - mds_reint_create_slave_client, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SLAVE); - -struct req_format RQF_MDS_REINT_CREATE_SYM = - DEFINE_REQ_FMT0("MDS_REINT_CREATE_SYM", - mds_reint_create_sym_client, mdt_body_capa); -EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_SYM); - -struct req_format RQF_MDS_REINT_OPEN = - DEFINE_REQ_FMT0("MDS_REINT_OPEN", - mds_reint_open_client, mds_reint_open_server); -EXPORT_SYMBOL(RQF_MDS_REINT_OPEN); - -struct req_format RQF_MDS_REINT_UNLINK = - DEFINE_REQ_FMT0("MDS_REINT_UNLINK", mds_reint_unlink_client, - mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_REINT_UNLINK); - -struct req_format RQF_MDS_REINT_LINK = - DEFINE_REQ_FMT0("MDS_REINT_LINK", - mds_reint_link_client, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_REINT_LINK); - -struct req_format RQF_MDS_REINT_RENAME = - DEFINE_REQ_FMT0("MDS_REINT_RENAME", mds_reint_rename_client, - mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_REINT_RENAME); - -struct req_format RQF_MDS_REINT_MIGRATE = - DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client, - mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE); - -struct req_format RQF_MDS_REINT_SETATTR = - DEFINE_REQ_FMT0("MDS_REINT_SETATTR", - mds_reint_setattr_client, mds_setattr_server); -EXPORT_SYMBOL(RQF_MDS_REINT_SETATTR); - -struct req_format RQF_MDS_REINT_SETXATTR = - DEFINE_REQ_FMT0("MDS_REINT_SETXATTR", - mds_reint_setxattr_client, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_REINT_SETXATTR); - -struct req_format RQF_MDS_CONNECT = - DEFINE_REQ_FMT0("MDS_CONNECT", - obd_connect_client, obd_connect_server); -EXPORT_SYMBOL(RQF_MDS_CONNECT); - -struct req_format RQF_MDS_DISCONNECT = - DEFINE_REQ_FMT0("MDS_DISCONNECT", empty, empty); -EXPORT_SYMBOL(RQF_MDS_DISCONNECT); - -struct req_format RQF_MDS_GET_INFO = - DEFINE_REQ_FMT0("MDS_GET_INFO", mds_getinfo_client, - mds_getinfo_server); -EXPORT_SYMBOL(RQF_MDS_GET_INFO); - -struct req_format RQF_LDLM_ENQUEUE = - DEFINE_REQ_FMT0("LDLM_ENQUEUE", - ldlm_enqueue_client, ldlm_enqueue_lvb_server); -EXPORT_SYMBOL(RQF_LDLM_ENQUEUE); - -struct req_format RQF_LDLM_ENQUEUE_LVB = - DEFINE_REQ_FMT0("LDLM_ENQUEUE_LVB", - ldlm_enqueue_client, ldlm_enqueue_lvb_server); -EXPORT_SYMBOL(RQF_LDLM_ENQUEUE_LVB); - -struct req_format RQF_LDLM_CONVERT = - DEFINE_REQ_FMT0("LDLM_CONVERT", - ldlm_enqueue_client, ldlm_enqueue_server); -EXPORT_SYMBOL(RQF_LDLM_CONVERT); - -struct req_format RQF_LDLM_CANCEL = - DEFINE_REQ_FMT0("LDLM_CANCEL", ldlm_enqueue_client, empty); -EXPORT_SYMBOL(RQF_LDLM_CANCEL); - -struct req_format RQF_LDLM_CALLBACK = - DEFINE_REQ_FMT0("LDLM_CALLBACK", ldlm_enqueue_client, empty); -EXPORT_SYMBOL(RQF_LDLM_CALLBACK); - -struct req_format RQF_LDLM_CP_CALLBACK = - DEFINE_REQ_FMT0("LDLM_CP_CALLBACK", ldlm_cp_callback_client, empty); -EXPORT_SYMBOL(RQF_LDLM_CP_CALLBACK); - -struct req_format RQF_LDLM_BL_CALLBACK = - DEFINE_REQ_FMT0("LDLM_BL_CALLBACK", ldlm_enqueue_client, empty); -EXPORT_SYMBOL(RQF_LDLM_BL_CALLBACK); - -struct req_format RQF_LDLM_GL_CALLBACK = - DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_enqueue_client, - ldlm_gl_callback_server); -EXPORT_SYMBOL(RQF_LDLM_GL_CALLBACK); - -struct req_format RQF_LDLM_GL_DESC_CALLBACK = - DEFINE_REQ_FMT0("LDLM_GL_CALLBACK", ldlm_gl_callback_desc_client, - ldlm_gl_callback_server); -EXPORT_SYMBOL(RQF_LDLM_GL_DESC_CALLBACK); - -struct req_format RQF_LDLM_INTENT_BASIC = - DEFINE_REQ_FMT0("LDLM_INTENT_BASIC", - ldlm_intent_basic_client, ldlm_enqueue_lvb_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_BASIC); - -struct req_format RQF_LDLM_INTENT = - DEFINE_REQ_FMT0("LDLM_INTENT", - ldlm_intent_client, ldlm_intent_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT); - -struct req_format RQF_LDLM_INTENT_LAYOUT = - DEFINE_REQ_FMT0("LDLM_INTENT_LAYOUT ", - ldlm_intent_layout_client, ldlm_enqueue_lvb_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_LAYOUT); - -struct req_format RQF_LDLM_INTENT_GETATTR = - DEFINE_REQ_FMT0("LDLM_INTENT_GETATTR", - ldlm_intent_getattr_client, ldlm_intent_getattr_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_GETATTR); - -struct req_format RQF_LDLM_INTENT_OPEN = - DEFINE_REQ_FMT0("LDLM_INTENT_OPEN", - ldlm_intent_open_client, ldlm_intent_open_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_OPEN); - -struct req_format RQF_LDLM_INTENT_CREATE = - DEFINE_REQ_FMT0("LDLM_INTENT_CREATE", - ldlm_intent_create_client, ldlm_intent_getattr_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_CREATE); - -struct req_format RQF_LDLM_INTENT_UNLINK = - DEFINE_REQ_FMT0("LDLM_INTENT_UNLINK", - ldlm_intent_unlink_client, ldlm_intent_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_UNLINK); - -struct req_format RQF_LDLM_INTENT_GETXATTR = - DEFINE_REQ_FMT0("LDLM_INTENT_GETXATTR", - ldlm_intent_getxattr_client, - ldlm_intent_getxattr_server); -EXPORT_SYMBOL(RQF_LDLM_INTENT_GETXATTR); - -struct req_format RQF_MDS_CLOSE = - DEFINE_REQ_FMT0("MDS_CLOSE", - mdt_close_client, mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_CLOSE); - -struct req_format RQF_MDS_INTENT_CLOSE = - DEFINE_REQ_FMT0("MDS_CLOSE", - mdt_intent_close_client, mds_last_unlink_server); -EXPORT_SYMBOL(RQF_MDS_INTENT_CLOSE); - -struct req_format RQF_MDS_READPAGE = - DEFINE_REQ_FMT0("MDS_READPAGE", - mdt_body_capa, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_READPAGE); - -struct req_format RQF_MDS_HSM_ACTION = - DEFINE_REQ_FMT0("MDS_HSM_ACTION", mdt_body_capa, mdt_hsm_action_server); -EXPORT_SYMBOL(RQF_MDS_HSM_ACTION); - -struct req_format RQF_MDS_HSM_PROGRESS = - DEFINE_REQ_FMT0("MDS_HSM_PROGRESS", mdt_hsm_progress, empty); -EXPORT_SYMBOL(RQF_MDS_HSM_PROGRESS); - -struct req_format RQF_MDS_HSM_CT_REGISTER = - DEFINE_REQ_FMT0("MDS_HSM_CT_REGISTER", mdt_hsm_ct_register, empty); -EXPORT_SYMBOL(RQF_MDS_HSM_CT_REGISTER); - -struct req_format RQF_MDS_HSM_CT_UNREGISTER = - DEFINE_REQ_FMT0("MDS_HSM_CT_UNREGISTER", mdt_hsm_ct_unregister, empty); -EXPORT_SYMBOL(RQF_MDS_HSM_CT_UNREGISTER); - -struct req_format RQF_MDS_HSM_STATE_GET = - DEFINE_REQ_FMT0("MDS_HSM_STATE_GET", - mdt_body_capa, mdt_hsm_state_get_server); -EXPORT_SYMBOL(RQF_MDS_HSM_STATE_GET); - -struct req_format RQF_MDS_HSM_STATE_SET = - DEFINE_REQ_FMT0("MDS_HSM_STATE_SET", mdt_hsm_state_set, empty); -EXPORT_SYMBOL(RQF_MDS_HSM_STATE_SET); - -struct req_format RQF_MDS_HSM_REQUEST = - DEFINE_REQ_FMT0("MDS_HSM_REQUEST", mdt_hsm_request, empty); -EXPORT_SYMBOL(RQF_MDS_HSM_REQUEST); - -struct req_format RQF_MDS_SWAP_LAYOUTS = - DEFINE_REQ_FMT0("MDS_SWAP_LAYOUTS", - mdt_swap_layouts, empty); -EXPORT_SYMBOL(RQF_MDS_SWAP_LAYOUTS); - -/* This is for split */ -struct req_format RQF_MDS_WRITEPAGE = - DEFINE_REQ_FMT0("MDS_WRITEPAGE", - mdt_body_capa, mdt_body_only); -EXPORT_SYMBOL(RQF_MDS_WRITEPAGE); - -struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE = - DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE", - llog_origin_handle_create_client, llogd_body_only); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_CREATE); - -struct req_format RQF_LLOG_ORIGIN_HANDLE_DESTROY = - DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_DESTROY", - llogd_body_only, llogd_body_only); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_DESTROY); - -struct req_format RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK = - DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_NEXT_BLOCK", - llogd_body_only, llog_origin_handle_next_block_server); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - -struct req_format RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK = - DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_PREV_BLOCK", - llogd_body_only, llog_origin_handle_next_block_server); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK); - -struct req_format RQF_LLOG_ORIGIN_HANDLE_READ_HEADER = - DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_READ_HEADER", - llogd_body_only, llog_log_hdr_only); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); - -struct req_format RQF_LLOG_ORIGIN_CONNECT = - DEFINE_REQ_FMT0("LLOG_ORIGIN_CONNECT", llogd_conn_body_only, empty); -EXPORT_SYMBOL(RQF_LLOG_ORIGIN_CONNECT); - -struct req_format RQF_CONNECT = - DEFINE_REQ_FMT0("CONNECT", obd_connect_client, obd_connect_server); -EXPORT_SYMBOL(RQF_CONNECT); - -struct req_format RQF_OST_CONNECT = - DEFINE_REQ_FMT0("OST_CONNECT", - obd_connect_client, obd_connect_server); -EXPORT_SYMBOL(RQF_OST_CONNECT); - -struct req_format RQF_OST_DISCONNECT = - DEFINE_REQ_FMT0("OST_DISCONNECT", empty, empty); -EXPORT_SYMBOL(RQF_OST_DISCONNECT); - -struct req_format RQF_OST_GETATTR = - DEFINE_REQ_FMT0("OST_GETATTR", ost_body_capa, ost_body_only); -EXPORT_SYMBOL(RQF_OST_GETATTR); - -struct req_format RQF_OST_SETATTR = - DEFINE_REQ_FMT0("OST_SETATTR", ost_body_capa, ost_body_only); -EXPORT_SYMBOL(RQF_OST_SETATTR); - -struct req_format RQF_OST_CREATE = - DEFINE_REQ_FMT0("OST_CREATE", ost_body_only, ost_body_only); -EXPORT_SYMBOL(RQF_OST_CREATE); - -struct req_format RQF_OST_PUNCH = - DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only); -EXPORT_SYMBOL(RQF_OST_PUNCH); - -struct req_format RQF_OST_SYNC = - DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only); -EXPORT_SYMBOL(RQF_OST_SYNC); - -struct req_format RQF_OST_DESTROY = - DEFINE_REQ_FMT0("OST_DESTROY", ost_destroy_client, ost_body_only); -EXPORT_SYMBOL(RQF_OST_DESTROY); - -struct req_format RQF_OST_BRW_READ = - DEFINE_REQ_FMT0("OST_BRW_READ", ost_brw_client, ost_brw_read_server); -EXPORT_SYMBOL(RQF_OST_BRW_READ); - -struct req_format RQF_OST_BRW_WRITE = - DEFINE_REQ_FMT0("OST_BRW_WRITE", ost_brw_client, ost_brw_write_server); -EXPORT_SYMBOL(RQF_OST_BRW_WRITE); - -struct req_format RQF_OST_STATFS = - DEFINE_REQ_FMT0("OST_STATFS", empty, obd_statfs_server); -EXPORT_SYMBOL(RQF_OST_STATFS); - -struct req_format RQF_OST_SET_GRANT_INFO = - DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_grant_shrink_client, - ost_body_only); -EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO); - -struct req_format RQF_OST_GET_INFO = - DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client, - ost_get_info_generic_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO); - -struct req_format RQF_OST_GET_INFO_LAST_ID = - DEFINE_REQ_FMT0("OST_GET_INFO_LAST_ID", ost_get_info_generic_client, - ost_get_last_id_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_ID); - -struct req_format RQF_OST_GET_INFO_LAST_FID = - DEFINE_REQ_FMT0("OST_GET_INFO_LAST_FID", ost_get_last_fid_client, - ost_get_last_fid_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO_LAST_FID); - -struct req_format RQF_OST_SET_INFO_LAST_FID = - DEFINE_REQ_FMT0("OST_SET_INFO_LAST_FID", obd_set_info_client, - empty); -EXPORT_SYMBOL(RQF_OST_SET_INFO_LAST_FID); - -struct req_format RQF_OST_GET_INFO_FIEMAP = - DEFINE_REQ_FMT0("OST_GET_INFO_FIEMAP", ost_get_fiemap_client, - ost_get_fiemap_server); -EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP); - -/* Convenience macro */ -#define FMT_FIELD(fmt, i, j) ((fmt)->rf_fields[(i)].d[(j)]) - -/** - * Initializes the capsule abstraction by computing and setting the \a rf_idx - * field of RQFs and the \a rmf_offset field of RMFs. - */ -int req_layout_init(void) -{ - size_t i; - size_t j; - size_t k; - struct req_format *rf = NULL; - - for (i = 0; i < ARRAY_SIZE(req_formats); ++i) { - rf = req_formats[i]; - rf->rf_idx = i; - for (j = 0; j < RCL_NR; ++j) { - LASSERT(rf->rf_fields[j].nr <= REQ_MAX_FIELD_NR); - for (k = 0; k < rf->rf_fields[j].nr; ++k) { - struct req_msg_field *field; - - field = (typeof(field))rf->rf_fields[j].d[k]; - LASSERT(!(field->rmf_flags & RMF_F_STRUCT_ARRAY) - || field->rmf_size > 0); - LASSERT(field->rmf_offset[i][j] == 0); - /* - * k + 1 to detect unused format/field - * combinations. - */ - field->rmf_offset[i][j] = k + 1; - } - } - } - return 0; -} -EXPORT_SYMBOL(req_layout_init); - -void req_layout_fini(void) -{ -} -EXPORT_SYMBOL(req_layout_fini); - -/** - * Initializes the expected sizes of each RMF in a \a pill (\a rc_area) to -1. - * - * Actual/expected field sizes are set elsewhere in functions in this file: - * req_capsule_init(), req_capsule_server_pack(), req_capsule_set_size() and - * req_capsule_msg_size(). The \a rc_area information is used by. - * ptlrpc_request_set_replen(). - */ -static void req_capsule_init_area(struct req_capsule *pill) -{ - size_t i; - - for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) { - pill->rc_area[RCL_CLIENT][i] = -1; - pill->rc_area[RCL_SERVER][i] = -1; - } -} - -/** - * Initialize a pill. - * - * The \a location indicates whether the caller is executing on the client side - * (RCL_CLIENT) or server side (RCL_SERVER).. - */ -void req_capsule_init(struct req_capsule *pill, - struct ptlrpc_request *req, - enum req_location location) -{ - LASSERT(location == RCL_SERVER || location == RCL_CLIENT); - - /* - * Today all capsules are embedded in ptlrpc_request structs, - * but just in case that ever isn't the case, we don't reach - * into req unless req != NULL and pill is the one embedded in - * the req. - * - * The req->rq_pill_init flag makes it safe to initialize a pill - * twice, which might happen in the OST paths as a result of the - * high-priority RPC queue getting peeked at before ost_handle() - * handles an OST RPC. - */ - if (req && pill == &req->rq_pill && req->rq_pill_init) - return; - - memset(pill, 0, sizeof(*pill)); - pill->rc_req = req; - pill->rc_loc = location; - req_capsule_init_area(pill); - - if (req && pill == &req->rq_pill) - req->rq_pill_init = 1; -} -EXPORT_SYMBOL(req_capsule_init); - -void req_capsule_fini(struct req_capsule *pill) -{ -} -EXPORT_SYMBOL(req_capsule_fini); - -static int __req_format_is_sane(const struct req_format *fmt) -{ - return fmt->rf_idx < ARRAY_SIZE(req_formats) && - req_formats[fmt->rf_idx] == fmt; -} - -static struct lustre_msg *__req_msg(const struct req_capsule *pill, - enum req_location loc) -{ - struct ptlrpc_request *req; - - req = pill->rc_req; - return loc == RCL_CLIENT ? req->rq_reqmsg : req->rq_repmsg; -} - -/** - * Set the format (\a fmt) of a \a pill; format changes are not allowed here - * (see req_capsule_extend()). - */ -void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt) -{ - LASSERT(!pill->rc_fmt || pill->rc_fmt == fmt); - LASSERT(__req_format_is_sane(fmt)); - - pill->rc_fmt = fmt; -} -EXPORT_SYMBOL(req_capsule_set); - -/** - * Fills in any parts of the \a rc_area of a \a pill that haven't been filled in - * yet. - - * \a rc_area is an array of REQ_MAX_FIELD_NR elements, used to store sizes of - * variable-sized fields. The field sizes come from the declared \a rmf_size - * field of a \a pill's \a rc_fmt's RMF's. - */ -size_t req_capsule_filled_sizes(struct req_capsule *pill, - enum req_location loc) -{ - const struct req_format *fmt = pill->rc_fmt; - size_t i; - - for (i = 0; i < fmt->rf_fields[loc].nr; ++i) { - if (pill->rc_area[loc][i] == -1) { - pill->rc_area[loc][i] = - fmt->rf_fields[loc].d[i]->rmf_size; - if (pill->rc_area[loc][i] == -1) { - /* - * Skip the following fields. - * - * If this LASSERT() trips then you're missing a - * call to req_capsule_set_size(). - */ - LASSERT(loc != RCL_SERVER); - break; - } - } - } - return i; -} -EXPORT_SYMBOL(req_capsule_filled_sizes); - -/** - * Capsule equivalent of lustre_pack_request() and lustre_pack_reply(). - * - * This function uses the \a pill's \a rc_area as filled in by - * req_capsule_set_size() or req_capsule_filled_sizes() (the latter is called by - * this function). - */ -int req_capsule_server_pack(struct req_capsule *pill) -{ - const struct req_format *fmt; - int count; - int rc; - - LASSERT(pill->rc_loc == RCL_SERVER); - fmt = pill->rc_fmt; - LASSERT(fmt); - - count = req_capsule_filled_sizes(pill, RCL_SERVER); - rc = lustre_pack_reply(pill->rc_req, count, - pill->rc_area[RCL_SERVER], NULL); - if (rc != 0) { - DEBUG_REQ(D_ERROR, pill->rc_req, - "Cannot pack %d fields in format `%s': ", - count, fmt->rf_name); - } - return rc; -} -EXPORT_SYMBOL(req_capsule_server_pack); - -/** - * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill - * corresponding to the given RMF (\a field). - */ -static u32 __req_capsule_offset(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc) -{ - u32 offset; - - offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc]; - LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n", pill->rc_fmt->rf_name, - field->rmf_name, offset, loc); - offset--; - - LASSERT(offset < REQ_MAX_FIELD_NR); - return offset; -} - -/** - * Helper for __req_capsule_get(); swabs value / array of values and/or dumps - * them if desired. - */ -static -void -swabber_dumper_helper(struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc, - int offset, - void *value, int len, int dump, void (*swabber)(void *)) -{ - void *p; - int i; - int n; - int do_swab; - int inout = loc == RCL_CLIENT; - - swabber = swabber ?: field->rmf_swabber; - - if (ptlrpc_buf_need_swab(pill->rc_req, inout, offset) && - swabber && value) - do_swab = 1; - else - do_swab = 0; - - if (!field->rmf_dumper) - dump = 0; - - if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY)) { - if (dump) { - CDEBUG(D_RPCTRACE, "Dump of %sfield %s follows\n", - do_swab ? "unswabbed " : "", field->rmf_name); - field->rmf_dumper(value); - } - if (!do_swab) - return; - swabber(value); - ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset); - if (dump && field->rmf_dumper) { - CDEBUG(D_RPCTRACE, "Dump of swabbed field %s follows\n", - field->rmf_name); - field->rmf_dumper(value); - } - - return; - } - - /* - * We're swabbing an array; swabber() swabs a single array element, so - * swab every element. - */ - LASSERT((len % field->rmf_size) == 0); - for (p = value, i = 0, n = len / field->rmf_size; - i < n; - i++, p += field->rmf_size) { - if (dump) { - CDEBUG(D_RPCTRACE, "Dump of %sarray field %s, element %d follows\n", - do_swab ? "unswabbed " : "", field->rmf_name, i); - field->rmf_dumper(p); - } - if (!do_swab) - continue; - swabber(p); - if (dump) { - CDEBUG(D_RPCTRACE, "Dump of swabbed array field %s, element %d follows\n", - field->rmf_name, i); - field->rmf_dumper(value); - } - } - if (do_swab) - ptlrpc_buf_set_swabbed(pill->rc_req, inout, offset); -} - -/** - * Returns the pointer to a PTLRPC request or reply (\a loc) buffer of a \a pill - * corresponding to the given RMF (\a field). - * - * The buffer will be swabbed using the given \a swabber. If \a swabber == NULL - * then the \a rmf_swabber from the RMF will be used. Soon there will be no - * calls to __req_capsule_get() with a non-NULL \a swabber; \a swabber will then - * be removed. Fields with the \a RMF_F_STRUCT_ARRAY flag set will have each - * element of the array swabbed. - */ -static void *__req_capsule_get(struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc, - void (*swabber)(void *), - int dump) -{ - const struct req_format *fmt; - struct lustre_msg *msg; - void *value; - u32 len; - u32 offset; - - void *(*getter)(struct lustre_msg *m, u32 n, u32 minlen); - - static const char *rcl_names[RCL_NR] = { - [RCL_CLIENT] = "client", - [RCL_SERVER] = "server" - }; - - fmt = pill->rc_fmt; - LASSERT(fmt); - LASSERT(fmt != LP_POISON); - LASSERT(__req_format_is_sane(fmt)); - - offset = __req_capsule_offset(pill, field, loc); - - msg = __req_msg(pill, loc); - LASSERT(msg); - - getter = (field->rmf_flags & RMF_F_STRING) ? - (typeof(getter))lustre_msg_string : lustre_msg_buf; - - if (field->rmf_flags & (RMF_F_STRUCT_ARRAY | RMF_F_NO_SIZE_CHECK)) { - /* - * We've already asserted that field->rmf_size > 0 in - * req_layout_init(). - */ - len = lustre_msg_buflen(msg, offset); - if (!(field->rmf_flags & RMF_F_NO_SIZE_CHECK) && - (len % field->rmf_size)) { - CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n", - field->rmf_name, len, field->rmf_size, loc); - return NULL; - } - } else if (pill->rc_area[loc][offset] != -1) { - len = pill->rc_area[loc][offset]; - } else { - len = max_t(typeof(field->rmf_size), field->rmf_size, 0); - } - value = getter(msg, offset, len); - - if (!value) { - DEBUG_REQ(D_ERROR, pill->rc_req, - "Wrong buffer for field `%s' (%u of %u) in format `%s': %u vs. %u (%s)\n", - field->rmf_name, offset, lustre_msg_bufcount(msg), - fmt->rf_name, lustre_msg_buflen(msg, offset), len, - rcl_names[loc]); - } else { - swabber_dumper_helper(pill, field, loc, offset, value, len, - dump, swabber); - } - - return value; -} - -/** - * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC request - * buffer corresponding to the given RMF (\a field) of a \a pill. - */ -void *req_capsule_client_get(struct req_capsule *pill, - const struct req_msg_field *field) -{ - return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0); -} -EXPORT_SYMBOL(req_capsule_client_get); - -/** - * Same as req_capsule_client_get(), but with a \a swabber argument. - * - * Currently unused; will be removed when req_capsule_server_swab_get() is - * unused too. - */ -void *req_capsule_client_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber) -{ - return __req_capsule_get(pill, field, RCL_CLIENT, swabber, 0); -} -EXPORT_SYMBOL(req_capsule_client_swab_get); - -/** - * Utility that combines req_capsule_set_size() and req_capsule_client_get(). - * - * First the \a pill's request \a field's size is set (\a rc_area) using - * req_capsule_set_size() with the given \a len. Then the actual buffer is - * returned. - */ -void *req_capsule_client_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len) -{ - req_capsule_set_size(pill, field, RCL_CLIENT, len); - return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0); -} -EXPORT_SYMBOL(req_capsule_client_sized_get); - -/** - * Trivial wrapper around __req_capsule_get(), that returns the PTLRPC reply - * buffer corresponding to the given RMF (\a field) of a \a pill. - */ -void *req_capsule_server_get(struct req_capsule *pill, - const struct req_msg_field *field) -{ - return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0); -} -EXPORT_SYMBOL(req_capsule_server_get); - -/** - * Same as req_capsule_server_get(), but with a \a swabber argument. - * - * Ideally all swabbing should be done pursuant to RMF definitions, with no - * swabbing done outside this capsule abstraction. - */ -void *req_capsule_server_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - void *swabber) -{ - return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0); -} -EXPORT_SYMBOL(req_capsule_server_swab_get); - -/** - * Utility that combines req_capsule_set_size() and req_capsule_server_get(). - * - * First the \a pill's request \a field's size is set (\a rc_area) using - * req_capsule_set_size() with the given \a len. Then the actual buffer is - * returned. - */ -void *req_capsule_server_sized_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len) -{ - req_capsule_set_size(pill, field, RCL_SERVER, len); - return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0); -} -EXPORT_SYMBOL(req_capsule_server_sized_get); - -void *req_capsule_server_sized_swab_get(struct req_capsule *pill, - const struct req_msg_field *field, - u32 len, void *swabber) -{ - req_capsule_set_size(pill, field, RCL_SERVER, len); - return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0); -} -EXPORT_SYMBOL(req_capsule_server_sized_swab_get); - -/** - * Set the size of the PTLRPC request/reply (\a loc) buffer for the given \a - * field of the given \a pill. - * - * This function must be used when constructing variable sized fields of a - * request or reply. - */ -void req_capsule_set_size(struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc, u32 size) -{ - LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); - - if ((size != (u32)field->rmf_size) && - (field->rmf_size != -1) && - !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) && - (size > 0)) { - u32 rmf_size = (u32)field->rmf_size; - - if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) && - (size % rmf_size != 0)) { - CERROR("%s: array field size mismatch %u %% %u != 0 (%d)\n", - field->rmf_name, size, rmf_size, loc); - LBUG(); - } else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) && - size < rmf_size) { - CERROR("%s: field size mismatch %u != %u (%d)\n", - field->rmf_name, size, rmf_size, loc); - LBUG(); - } - } - - pill->rc_area[loc][__req_capsule_offset(pill, field, loc)] = size; -} -EXPORT_SYMBOL(req_capsule_set_size); - -/** - * Return the actual PTLRPC buffer length of a request or reply (\a loc) - * for the given \a pill's given \a field. - * - * NB: this function doesn't correspond with req_capsule_set_size(), which - * actually sets the size in pill.rc_area[loc][offset], but this function - * returns the message buflen[offset], maybe we should use another name. - */ -u32 req_capsule_get_size(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc) -{ - LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); - - return lustre_msg_buflen(__req_msg(pill, loc), - __req_capsule_offset(pill, field, loc)); -} -EXPORT_SYMBOL(req_capsule_get_size); - -/** - * Wrapper around lustre_msg_size() that returns the PTLRPC size needed for the - * given \a pill's request or reply (\a loc) given the field size recorded in - * the \a pill's rc_area. - * - * See also req_capsule_set_size(). - */ -u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc) -{ - return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic, - pill->rc_fmt->rf_fields[loc].nr, - pill->rc_area[loc]); -} - -/** - * While req_capsule_msg_size() computes the size of a PTLRPC request or reply - * (\a loc) given a \a pill's \a rc_area, this function computes the size of a - * PTLRPC request or reply given only an RQF (\a fmt). - * - * This function should not be used for formats which contain variable size - * fields. - */ -u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt, - enum req_location loc) -{ - size_t i = 0; - u32 size; - - /* - * This function should probably LASSERT() that fmt has no fields with - * RMF_F_STRUCT_ARRAY in rmf_flags, since we can't know here how many - * elements in the array there will ultimately be, but then, we could - * assume that there will be at least one element, and that's just what - * we do. - */ - size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr); - if (!size) - return size; - - for (; i < fmt->rf_fields[loc].nr; ++i) - if (fmt->rf_fields[loc].d[i]->rmf_size != -1) - size += cfs_size_round(fmt->rf_fields[loc].d[i]-> - rmf_size); - return size; -} - -/** - * Changes the format of an RPC. - * - * The pill must already have been initialized, which means that it already has - * a request format. The new format \a fmt must be an extension of the pill's - * old format. Specifically: the new format must have as many request and reply - * fields as the old one, and all fields shared by the old and new format must - * be at least as large in the new format. - * - * The new format's fields may be of different "type" than the old format, but - * only for fields that are "opaque" blobs: fields which have a) have no - * \a rmf_swabber, b) \a rmf_flags == 0 or RMF_F_NO_SIZE_CHECK, and c) \a - * rmf_size == -1 or \a rmf_flags == RMF_F_NO_SIZE_CHECK. For example, - * OBD_SET_INFO has a key field and an opaque value field that gets interpreted - * according to the key field. When the value, according to the key, contains a - * structure (or array thereof) to be swabbed, the format should be changed to - * one where the value field has \a rmf_size/rmf_flags/rmf_swabber set - * accordingly. - */ -void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt) -{ - int i; - size_t j; - - const struct req_format *old; - - LASSERT(pill->rc_fmt); - LASSERT(__req_format_is_sane(fmt)); - - old = pill->rc_fmt; - /* - * Sanity checking... - */ - for (i = 0; i < RCL_NR; ++i) { - LASSERT(fmt->rf_fields[i].nr >= old->rf_fields[i].nr); - for (j = 0; j < old->rf_fields[i].nr - 1; ++j) { - const struct req_msg_field *ofield = FMT_FIELD(old, i, j); - - /* "opaque" fields can be transmogrified */ - if (!ofield->rmf_swabber && - (ofield->rmf_flags & ~RMF_F_NO_SIZE_CHECK) == 0 && - (ofield->rmf_size == -1 || - ofield->rmf_flags == RMF_F_NO_SIZE_CHECK)) - continue; - LASSERT(FMT_FIELD(fmt, i, j) == FMT_FIELD(old, i, j)); - } - /* - * Last field in old format can be shorter than in new. - */ - LASSERT(FMT_FIELD(fmt, i, j)->rmf_size >= - FMT_FIELD(old, i, j)->rmf_size); - } - - pill->rc_fmt = fmt; -} -EXPORT_SYMBOL(req_capsule_extend); - -/** - * This function returns a non-zero value if the given \a field is present in - * the format (\a rc_fmt) of \a pill's PTLRPC request or reply (\a loc), else it - * returns 0. - */ -int req_capsule_has_field(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc) -{ - LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); - - return field->rmf_offset[pill->rc_fmt->rf_idx][loc]; -} -EXPORT_SYMBOL(req_capsule_has_field); - -/** - * Returns a non-zero value if the given \a field is present in the given \a - * pill's PTLRPC request or reply (\a loc), else it returns 0. - */ -static int req_capsule_field_present(const struct req_capsule *pill, - const struct req_msg_field *field, - enum req_location loc) -{ - u32 offset; - - LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT); - LASSERT(req_capsule_has_field(pill, field, loc)); - - offset = __req_capsule_offset(pill, field, loc); - return lustre_msg_bufcount(__req_msg(pill, loc)) > offset; -} - -/** - * This function shrinks the size of the _buffer_ of the \a pill's PTLRPC - * request or reply (\a loc). - * - * This is not the opposite of req_capsule_extend(). - */ -void req_capsule_shrink(struct req_capsule *pill, - const struct req_msg_field *field, - u32 newlen, enum req_location loc) -{ - const struct req_format *fmt; - struct lustre_msg *msg; - u32 len; - int offset; - - fmt = pill->rc_fmt; - LASSERT(fmt); - LASSERT(__req_format_is_sane(fmt)); - LASSERT(req_capsule_has_field(pill, field, loc)); - LASSERT(req_capsule_field_present(pill, field, loc)); - - offset = __req_capsule_offset(pill, field, loc); - - msg = __req_msg(pill, loc); - len = lustre_msg_buflen(msg, offset); - LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n", - fmt->rf_name, field->rmf_name, len, newlen); - - if (loc == RCL_CLIENT) - pill->rc_req->rq_reqlen = lustre_shrink_msg(msg, offset, newlen, - 1); - else - pill->rc_req->rq_replen = lustre_shrink_msg(msg, offset, newlen, - 1); -} -EXPORT_SYMBOL(req_capsule_shrink); diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c b/drivers/staging/lustre/lustre/ptlrpc/llog_client.c deleted file mode 100644 index 946d538121de..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/llog_client.c +++ /dev/null @@ -1,338 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/llog_client.c - * - * remote api for llog - client side - * - * Author: Andreas Dilger - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include -#include -#include -#include - -#define LLOG_CLIENT_ENTRY(ctxt, imp) do { \ - mutex_lock(&ctxt->loc_mutex); \ - if (ctxt->loc_imp) { \ - imp = class_import_get(ctxt->loc_imp); \ - } else { \ - CERROR("ctxt->loc_imp == NULL for context idx %d." \ - "Unable to complete MDS/OSS recovery," \ - "but I'll try again next time. Not fatal.\n", \ - ctxt->loc_idx); \ - imp = NULL; \ - mutex_unlock(&ctxt->loc_mutex); \ - return (-EINVAL); \ - } \ - mutex_unlock(&ctxt->loc_mutex); \ -} while (0) - -#define LLOG_CLIENT_EXIT(ctxt, imp) do { \ - mutex_lock(&ctxt->loc_mutex); \ - if (ctxt->loc_imp != imp) \ - CWARN("loc_imp has changed from %p to %p\n", \ - ctxt->loc_imp, imp); \ - class_import_put(imp); \ - mutex_unlock(&ctxt->loc_mutex); \ -} while (0) - -/* This is a callback from the llog_* functions. - * Assumes caller has already pushed us into the kernel context. - */ -static int llog_client_open(const struct lu_env *env, - struct llog_handle *lgh, struct llog_logid *logid, - char *name, enum llog_open_param open_param) -{ - struct obd_import *imp; - struct llogd_body *body; - struct llog_ctxt *ctxt = lgh->lgh_ctxt; - struct ptlrpc_request *req = NULL; - int rc; - - LLOG_CLIENT_ENTRY(ctxt, imp); - - /* client cannot create llog */ - LASSERTF(open_param != LLOG_OPEN_NEW, "%#x\n", open_param); - LASSERT(lgh); - - req = ptlrpc_request_alloc(imp, &RQF_LLOG_ORIGIN_HANDLE_CREATE); - if (!req) { - rc = -ENOMEM; - goto out; - } - - if (name) - req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, - strlen(name) + 1); - - rc = ptlrpc_request_pack(req, LUSTRE_LOG_VERSION, - LLOG_ORIGIN_HANDLE_CREATE); - if (rc) { - ptlrpc_request_free(req); - req = NULL; - goto out; - } - ptlrpc_request_set_replen(req); - - body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); - if (logid) - body->lgd_logid = *logid; - body->lgd_ctxt_idx = ctxt->loc_idx - 1; - - if (name) { - char *tmp; - - tmp = req_capsule_client_sized_get(&req->rq_pill, &RMF_NAME, - strlen(name) + 1); - LASSERT(tmp); - strcpy(tmp, name); - } - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); - if (!body) { - rc = -EFAULT; - goto out; - } - - lgh->lgh_id = body->lgd_logid; - lgh->lgh_ctxt = ctxt; -out: - LLOG_CLIENT_EXIT(ctxt, imp); - ptlrpc_req_finished(req); - return rc; -} - -static int llog_client_next_block(const struct lu_env *env, - struct llog_handle *loghandle, - int *cur_idx, int next_idx, - __u64 *cur_offset, void *buf, int len) -{ - struct obd_import *imp; - struct ptlrpc_request *req = NULL; - struct llogd_body *body; - void *ptr; - int rc; - - LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); - req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK, - LUSTRE_LOG_VERSION, - LLOG_ORIGIN_HANDLE_NEXT_BLOCK); - if (!req) { - rc = -ENOMEM; - goto err_exit; - } - - body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); - body->lgd_logid = loghandle->lgh_id; - body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; - body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; - body->lgd_index = next_idx; - body->lgd_saved_index = *cur_idx; - body->lgd_len = len; - body->lgd_cur_offset = *cur_offset; - - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len); - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); - if (!body) { - rc = -EFAULT; - goto out; - } - - /* The log records are swabbed as they are processed */ - ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); - if (!ptr) { - rc = -EFAULT; - goto out; - } - - *cur_idx = body->lgd_saved_index; - *cur_offset = body->lgd_cur_offset; - - memcpy(buf, ptr, len); -out: - ptlrpc_req_finished(req); -err_exit: - LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); - return rc; -} - -static int llog_client_prev_block(const struct lu_env *env, - struct llog_handle *loghandle, - int prev_idx, void *buf, int len) -{ - struct obd_import *imp; - struct ptlrpc_request *req = NULL; - struct llogd_body *body; - void *ptr; - int rc; - - LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp); - req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK, - LUSTRE_LOG_VERSION, - LLOG_ORIGIN_HANDLE_PREV_BLOCK); - if (!req) { - rc = -ENOMEM; - goto err_exit; - } - - body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); - body->lgd_logid = loghandle->lgh_id; - body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; - body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; - body->lgd_index = prev_idx; - body->lgd_len = len; - - req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_SERVER, len); - ptlrpc_request_set_replen(req); - - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); - if (!body) { - rc = -EFAULT; - goto out; - } - - ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); - if (!ptr) { - rc = -EFAULT; - goto out; - } - - memcpy(buf, ptr, len); -out: - ptlrpc_req_finished(req); -err_exit: - LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp); - return rc; -} - -static int llog_client_read_header(const struct lu_env *env, - struct llog_handle *handle) -{ - struct obd_import *imp; - struct ptlrpc_request *req = NULL; - struct llogd_body *body; - struct llog_log_hdr *hdr; - struct llog_rec_hdr *llh_hdr; - int rc; - - LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp); - req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER, - LUSTRE_LOG_VERSION, - LLOG_ORIGIN_HANDLE_READ_HEADER); - if (!req) { - rc = -ENOMEM; - goto err_exit; - } - - body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY); - body->lgd_logid = handle->lgh_id; - body->lgd_ctxt_idx = handle->lgh_ctxt->loc_idx - 1; - body->lgd_llh_flags = handle->lgh_hdr->llh_flags; - - ptlrpc_request_set_replen(req); - rc = ptlrpc_queue_wait(req); - if (rc) - goto out; - - hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR); - if (!hdr) { - rc = -EFAULT; - goto out; - } - - if (handle->lgh_hdr_size < hdr->llh_hdr.lrh_len) { - rc = -EFAULT; - goto out; - } - - memcpy(handle->lgh_hdr, hdr, hdr->llh_hdr.lrh_len); - handle->lgh_last_idx = LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_index; - - /* sanity checks */ - llh_hdr = &handle->lgh_hdr->llh_hdr; - if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) { - CERROR("bad log header magic: %#x (expecting %#x)\n", - llh_hdr->lrh_type, LLOG_HDR_MAGIC); - rc = -EIO; - } else if (llh_hdr->lrh_len != - LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len || - (llh_hdr->lrh_len & (llh_hdr->lrh_len - 1)) || - llh_hdr->lrh_len < LLOG_MIN_CHUNK_SIZE || - llh_hdr->lrh_len > handle->lgh_hdr_size) { - CERROR("incorrectly sized log header: %#x (expecting %#x) (power of two > 8192)\n", - llh_hdr->lrh_len, - LLOG_HDR_TAIL(handle->lgh_hdr)->lrt_len); - CERROR("you may need to re-run lconf --write_conf.\n"); - rc = -EIO; - } -out: - ptlrpc_req_finished(req); -err_exit: - LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp); - return rc; -} - -static int llog_client_close(const struct lu_env *env, - struct llog_handle *handle) -{ - /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because - * the servers all close the file at the end of every - * other LLOG_ RPC. - */ - return 0; -} - -struct llog_operations llog_client_ops = { - .lop_next_block = llog_client_next_block, - .lop_prev_block = llog_client_prev_block, - .lop_read_header = llog_client_read_header, - .lop_open = llog_client_open, - .lop_close = llog_client_close, -}; -EXPORT_SYMBOL(llog_client_ops); diff --git a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c b/drivers/staging/lustre/lustre/ptlrpc/llog_net.c deleted file mode 100644 index b871d9e40a9e..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/llog_net.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/llog_net.c - * - * OST<->MDS recovery logging infrastructure. - * - * Invariants in implementation: - * - we do not share logs among different OST<->MDS connections, so that - * if an OST or MDS fails it need only look at log(s) relevant to itself - * - * Author: Andreas Dilger - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#include -#include -#include - -int llog_initiator_connect(struct llog_ctxt *ctxt) -{ - struct obd_import *new_imp; - - LASSERT(ctxt); - new_imp = ctxt->loc_obd->u.cli.cl_import; - LASSERTF(!ctxt->loc_imp || ctxt->loc_imp == new_imp, - "%p - %p\n", ctxt->loc_imp, new_imp); - mutex_lock(&ctxt->loc_mutex); - if (ctxt->loc_imp != new_imp) { - if (ctxt->loc_imp) - class_import_put(ctxt->loc_imp); - ctxt->loc_imp = class_import_get(new_imp); - } - mutex_unlock(&ctxt->loc_mutex); - return 0; -} -EXPORT_SYMBOL(llog_initiator_connect); diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c deleted file mode 100644 index 0b638837f88b..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c +++ /dev/null @@ -1,1316 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -static struct ll_rpc_opcode { - __u32 opcode; - const char *opname; -} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = { - { OST_REPLY, "ost_reply" }, - { OST_GETATTR, "ost_getattr" }, - { OST_SETATTR, "ost_setattr" }, - { OST_READ, "ost_read" }, - { OST_WRITE, "ost_write" }, - { OST_CREATE, "ost_create" }, - { OST_DESTROY, "ost_destroy" }, - { OST_GET_INFO, "ost_get_info" }, - { OST_CONNECT, "ost_connect" }, - { OST_DISCONNECT, "ost_disconnect" }, - { OST_PUNCH, "ost_punch" }, - { OST_OPEN, "ost_open" }, - { OST_CLOSE, "ost_close" }, - { OST_STATFS, "ost_statfs" }, - { 14, NULL }, /* formerly OST_SAN_READ */ - { 15, NULL }, /* formerly OST_SAN_WRITE */ - { OST_SYNC, "ost_sync" }, - { OST_SET_INFO, "ost_set_info" }, - { OST_QUOTACHECK, "ost_quotacheck" }, - { OST_QUOTACTL, "ost_quotactl" }, - { OST_QUOTA_ADJUST_QUNIT, "ost_quota_adjust_qunit" }, - { MDS_GETATTR, "mds_getattr" }, - { MDS_GETATTR_NAME, "mds_getattr_lock" }, - { MDS_CLOSE, "mds_close" }, - { MDS_REINT, "mds_reint" }, - { MDS_READPAGE, "mds_readpage" }, - { MDS_CONNECT, "mds_connect" }, - { MDS_DISCONNECT, "mds_disconnect" }, - { MDS_GETSTATUS, "mds_getstatus" }, - { MDS_STATFS, "mds_statfs" }, - { MDS_PIN, "mds_pin" }, - { MDS_UNPIN, "mds_unpin" }, - { MDS_SYNC, "mds_sync" }, - { MDS_DONE_WRITING, "mds_done_writing" }, - { MDS_SET_INFO, "mds_set_info" }, - { MDS_QUOTACHECK, "mds_quotacheck" }, - { MDS_QUOTACTL, "mds_quotactl" }, - { MDS_GETXATTR, "mds_getxattr" }, - { MDS_SETXATTR, "mds_setxattr" }, - { MDS_WRITEPAGE, "mds_writepage" }, - { MDS_IS_SUBDIR, "mds_is_subdir" }, - { MDS_GET_INFO, "mds_get_info" }, - { MDS_HSM_STATE_GET, "mds_hsm_state_get" }, - { MDS_HSM_STATE_SET, "mds_hsm_state_set" }, - { MDS_HSM_ACTION, "mds_hsm_action" }, - { MDS_HSM_PROGRESS, "mds_hsm_progress" }, - { MDS_HSM_REQUEST, "mds_hsm_request" }, - { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" }, - { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" }, - { MDS_SWAP_LAYOUTS, "mds_swap_layouts" }, - { LDLM_ENQUEUE, "ldlm_enqueue" }, - { LDLM_CONVERT, "ldlm_convert" }, - { LDLM_CANCEL, "ldlm_cancel" }, - { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, - { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, - { LDLM_GL_CALLBACK, "ldlm_gl_callback" }, - { LDLM_SET_INFO, "ldlm_set_info" }, - { MGS_CONNECT, "mgs_connect" }, - { MGS_DISCONNECT, "mgs_disconnect" }, - { MGS_EXCEPTION, "mgs_exception" }, - { MGS_TARGET_REG, "mgs_target_reg" }, - { MGS_TARGET_DEL, "mgs_target_del" }, - { MGS_SET_INFO, "mgs_set_info" }, - { MGS_CONFIG_READ, "mgs_config_read" }, - { OBD_PING, "obd_ping" }, - { OBD_LOG_CANCEL, "llog_cancel" }, - { OBD_QC_CALLBACK, "obd_quota_callback" }, - { OBD_IDX_READ, "dt_index_read" }, - { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_open" }, - { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, - { LLOG_ORIGIN_HANDLE_READ_HEADER, "llog_origin_handle_read_header" }, - { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" }, - { LLOG_ORIGIN_HANDLE_CLOSE, "llog_origin_handle_close" }, - { LLOG_ORIGIN_CONNECT, "llog_origin_connect" }, - { LLOG_CATINFO, "llog_catinfo" }, - { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, - { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, - { QUOTA_DQACQ, "quota_acquire" }, - { QUOTA_DQREL, "quota_release" }, - { SEQ_QUERY, "seq_query" }, - { SEC_CTX_INIT, "sec_ctx_init" }, - { SEC_CTX_INIT_CONT, "sec_ctx_init_cont" }, - { SEC_CTX_FINI, "sec_ctx_fini" }, - { FLD_QUERY, "fld_query" }, - { FLD_READ, "fld_read" }, -}; - -static struct ll_eopcode { - __u32 opcode; - const char *opname; -} ll_eopcode_table[EXTRA_LAST_OPC] = { - { LDLM_GLIMPSE_ENQUEUE, "ldlm_glimpse_enqueue" }, - { LDLM_PLAIN_ENQUEUE, "ldlm_plain_enqueue" }, - { LDLM_EXTENT_ENQUEUE, "ldlm_extent_enqueue" }, - { LDLM_FLOCK_ENQUEUE, "ldlm_flock_enqueue" }, - { LDLM_IBITS_ENQUEUE, "ldlm_ibits_enqueue" }, - { MDS_REINT_SETATTR, "mds_reint_setattr" }, - { MDS_REINT_CREATE, "mds_reint_create" }, - { MDS_REINT_LINK, "mds_reint_link" }, - { MDS_REINT_UNLINK, "mds_reint_unlink" }, - { MDS_REINT_RENAME, "mds_reint_rename" }, - { MDS_REINT_OPEN, "mds_reint_open" }, - { MDS_REINT_SETXATTR, "mds_reint_setxattr" }, - { BRW_READ_BYTES, "read_bytes" }, - { BRW_WRITE_BYTES, "write_bytes" }, -}; - -const char *ll_opcode2str(__u32 opcode) -{ - /* When one of the assertions below fail, chances are that: - * 1) A new opcode was added in include/lustre/lustre_idl.h, - * but is missing from the table above. - * or 2) The opcode space was renumbered or rearranged, - * and the opcode_offset() function in - * ptlrpc_internal.h needs to be modified. - */ - __u32 offset = opcode_offset(opcode); - - LASSERTF(offset < LUSTRE_MAX_OPCODES, - "offset %u >= LUSTRE_MAX_OPCODES %u\n", - offset, LUSTRE_MAX_OPCODES); - LASSERTF(ll_rpc_opcode_table[offset].opcode == opcode, - "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n", - offset, ll_rpc_opcode_table[offset].opcode, opcode); - return ll_rpc_opcode_table[offset].opname; -} - -static const char *ll_eopcode2str(__u32 opcode) -{ - LASSERT(ll_eopcode_table[opcode].opcode == opcode); - return ll_eopcode_table[opcode].opname; -} - -static void -ptlrpc_ldebugfs_register(struct dentry *root, char *dir, - char *name, - struct dentry **debugfs_root_ret, - struct lprocfs_stats **stats_ret) -{ - struct dentry *svc_debugfs_entry; - struct lprocfs_stats *svc_stats; - int i; - unsigned int svc_counter_config = LPROCFS_CNTR_AVGMINMAX | - LPROCFS_CNTR_STDDEV; - - LASSERT(!*debugfs_root_ret); - LASSERT(!*stats_ret); - - svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES, - 0); - if (!svc_stats) - return; - - if (dir) - svc_debugfs_entry = debugfs_create_dir(dir, root); - else - svc_debugfs_entry = root; - - lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR, - svc_counter_config, "req_waittime", "usec"); - lprocfs_counter_init(svc_stats, PTLRPC_REQQDEPTH_CNTR, - svc_counter_config, "req_qdepth", "reqs"); - lprocfs_counter_init(svc_stats, PTLRPC_REQACTIVE_CNTR, - svc_counter_config, "req_active", "reqs"); - lprocfs_counter_init(svc_stats, PTLRPC_TIMEOUT, - svc_counter_config, "req_timeout", "sec"); - lprocfs_counter_init(svc_stats, PTLRPC_REQBUF_AVAIL_CNTR, - svc_counter_config, "reqbuf_avail", "bufs"); - for (i = 0; i < EXTRA_LAST_OPC; i++) { - char *units; - - switch (i) { - case BRW_WRITE_BYTES: - case BRW_READ_BYTES: - units = "bytes"; - break; - default: - units = "reqs"; - break; - } - lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i, - svc_counter_config, - ll_eopcode2str(i), units); - } - for (i = 0; i < LUSTRE_MAX_OPCODES; i++) { - __u32 opcode = ll_rpc_opcode_table[i].opcode; - - lprocfs_counter_init(svc_stats, - EXTRA_MAX_OPCODES + i, svc_counter_config, - ll_opcode2str(opcode), "usec"); - } - - debugfs_create_file("stats", 0644, svc_debugfs_entry, svc_stats, - &lprocfs_stats_seq_fops); - if (dir) - *debugfs_root_ret = svc_debugfs_entry; - *stats_ret = svc_stats; -} - -static int -ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v) -{ - struct ptlrpc_service *svc = m->private; - struct ptlrpc_service_part *svcpt; - int total = 0; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) - total += svcpt->scp_hist_nrqbds; - - seq_printf(m, "%d\n", total); - return 0; -} - -LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len); - -static int -ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n) -{ - struct ptlrpc_service *svc = m->private; - struct ptlrpc_service_part *svcpt; - int total = 0; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) - total += svc->srv_hist_nrqbds_cpt_max; - - seq_printf(m, "%d\n", total); - return 0; -} - -static ssize_t -ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; - int bufpages; - int val; - int rc; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc < 0) - return rc; - - if (val < 0) - return -ERANGE; - - /* This sanity check is more of an insanity check; we can still - * hose a kernel by allowing the request history to grow too - * far. - */ - bufpages = (svc->srv_buf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (val > totalram_pages / (2 * bufpages)) - return -ERANGE; - - spin_lock(&svc->srv_lock); - - if (val == 0) - svc->srv_hist_nrqbds_cpt_max = 0; - else - svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts)); - - spin_unlock(&svc->srv_lock); - - return count; -} - -LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max); - -static ssize_t threads_min_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - - return sprintf(buf, "%d\n", svc->srv_nthrs_cpt_init * svc->srv_ncpts); -} - -static ssize_t threads_min_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - unsigned long val; - int rc = kstrtoul(buffer, 10, &val); - - if (rc < 0) - return rc; - - if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) - return -ERANGE; - - spin_lock(&svc->srv_lock); - if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) { - spin_unlock(&svc->srv_lock); - return -ERANGE; - } - - svc->srv_nthrs_cpt_init = val / svc->srv_ncpts; - - spin_unlock(&svc->srv_lock); - - return count; -} -LUSTRE_RW_ATTR(threads_min); - -static ssize_t threads_started_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - struct ptlrpc_service_part *svcpt; - int total = 0; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) - total += svcpt->scp_nthrs_running; - - return sprintf(buf, "%d\n", total); -} -LUSTRE_RO_ATTR(threads_started); - -static ssize_t threads_max_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - - return sprintf(buf, "%d\n", svc->srv_nthrs_cpt_limit * svc->srv_ncpts); -} - -static ssize_t threads_max_store(struct kobject *kobj, struct attribute *attr, - const char *buffer, size_t count) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - unsigned long val; - int rc = kstrtoul(buffer, 10, &val); - - if (rc < 0) - return rc; - - if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) - return -ERANGE; - - spin_lock(&svc->srv_lock); - if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) { - spin_unlock(&svc->srv_lock); - return -ERANGE; - } - - svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts; - - spin_unlock(&svc->srv_lock); - - return count; -} -LUSTRE_RW_ATTR(threads_max); - -/** - * \addtogoup nrs - * @{ - */ - -/** - * Translates \e ptlrpc_nrs_pol_state values to human-readable strings. - * - * \param[in] state The policy state - */ -static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state) -{ - switch (state) { - default: - LBUG(); - case NRS_POL_STATE_INVALID: - return "invalid"; - case NRS_POL_STATE_STOPPED: - return "stopped"; - case NRS_POL_STATE_STOPPING: - return "stopping"; - case NRS_POL_STATE_STARTING: - return "starting"; - case NRS_POL_STATE_STARTED: - return "started"; - } -} - -/** - * Obtains status information for \a policy. - * - * Information is copied in \a info. - * - * \param[in] policy The policy - * \param[out] info Holds returned status information - */ -static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_pol_info *info) -{ - assert_spin_locked(&policy->pol_nrs->nrs_lock); - - memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX); - - info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK); - info->pi_state = policy->pol_state; - /** - * XXX: These are accessed without holding - * ptlrpc_service_part::scp_req_lock. - */ - info->pi_req_queued = policy->pol_req_queued; - info->pi_req_started = policy->pol_req_started; -} - -/** - * Reads and prints policy status information for all policies of a PTLRPC - * service. - */ -static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n) -{ - struct ptlrpc_service *svc = m->private; - struct ptlrpc_service_part *svcpt; - struct ptlrpc_nrs *nrs; - struct ptlrpc_nrs_policy *policy; - struct ptlrpc_nrs_pol_info *infos; - struct ptlrpc_nrs_pol_info tmp; - unsigned int num_pols; - unsigned int pol_idx = 0; - bool hp = false; - int i; - int rc = 0; - - /** - * Serialize NRS core lprocfs operations with policy registration/ - * unregistration. - */ - mutex_lock(&nrs_core.nrs_mutex); - - /** - * Use the first service partition's regular NRS head in order to obtain - * the number of policies registered with NRS heads of this service. All - * service partitions will have the same number of policies. - */ - nrs = nrs_svcpt2nrs(svc->srv_parts[0], false); - - spin_lock(&nrs->nrs_lock); - num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols; - spin_unlock(&nrs->nrs_lock); - - infos = kcalloc(num_pols, sizeof(*infos), GFP_NOFS); - if (!infos) { - rc = -ENOMEM; - goto unlock; - } -again: - - ptlrpc_service_for_each_part(svcpt, i, svc) { - nrs = nrs_svcpt2nrs(svcpt, hp); - spin_lock(&nrs->nrs_lock); - - pol_idx = 0; - - list_for_each_entry(policy, &nrs->nrs_policy_list, pol_list) { - LASSERT(pol_idx < num_pols); - - nrs_policy_get_info_locked(policy, &tmp); - /** - * Copy values when handling the first service - * partition. - */ - if (i == 0) { - memcpy(infos[pol_idx].pi_name, tmp.pi_name, - NRS_POL_NAME_MAX); - memcpy(&infos[pol_idx].pi_state, &tmp.pi_state, - sizeof(tmp.pi_state)); - infos[pol_idx].pi_fallback = tmp.pi_fallback; - /** - * For the rest of the service partitions - * sanity-check the values we get. - */ - } else { - LASSERT(strncmp(infos[pol_idx].pi_name, - tmp.pi_name, - NRS_POL_NAME_MAX) == 0); - /** - * Not asserting ptlrpc_nrs_pol_info::pi_state, - * because it may be different between - * instances of the same policy in different - * service partitions. - */ - LASSERT(infos[pol_idx].pi_fallback == - tmp.pi_fallback); - } - - infos[pol_idx].pi_req_queued += tmp.pi_req_queued; - infos[pol_idx].pi_req_started += tmp.pi_req_started; - - pol_idx++; - } - spin_unlock(&nrs->nrs_lock); - } - - /** - * Policy status information output is in YAML format. - * For example: - * - * regular_requests: - * - name: fifo - * state: started - * fallback: yes - * queued: 0 - * active: 0 - * - * - name: crrn - * state: started - * fallback: no - * queued: 2015 - * active: 384 - * - * high_priority_requests: - * - name: fifo - * state: started - * fallback: yes - * queued: 0 - * active: 2 - * - * - name: crrn - * state: stopped - * fallback: no - * queued: 0 - * active: 0 - */ - seq_printf(m, "%s\n", - !hp ? "\nregular_requests:" : "high_priority_requests:"); - - for (pol_idx = 0; pol_idx < num_pols; pol_idx++) { - seq_printf(m, " - name: %s\n" - " state: %s\n" - " fallback: %s\n" - " queued: %-20d\n" - " active: %-20d\n\n", - infos[pol_idx].pi_name, - nrs_state2str(infos[pol_idx].pi_state), - infos[pol_idx].pi_fallback ? "yes" : "no", - (int)infos[pol_idx].pi_req_queued, - (int)infos[pol_idx].pi_req_started); - } - - if (!hp && nrs_svc_has_hp(svc)) { - memset(infos, 0, num_pols * sizeof(*infos)); - - /** - * Redo the processing for the service's HP NRS heads' policies. - */ - hp = true; - goto again; - } - - kfree(infos); -unlock: - mutex_unlock(&nrs_core.nrs_mutex); - - return rc; -} - -/** - * The longest valid command string is the maximum policy name size, plus the - * length of the " reg" substring - */ -#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1) - -/** - * Starts and stops a given policy on a PTLRPC service. - * - * Commands consist of the policy name, followed by an optional [reg|hp] token; - * if the optional token is omitted, the operation is performed on both the - * regular and high-priority (if the service has one) NRS head. - */ -static ssize_t ptlrpc_lprocfs_nrs_seq_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *off) -{ - struct ptlrpc_service *svc = ((struct seq_file *)file->private_data)->private; - enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH; - char *cmd; - char *cmd_copy = NULL; - char *token; - int rc = 0; - - if (count >= LPROCFS_NRS_WR_MAX_CMD) - return -EINVAL; - - cmd = kzalloc(LPROCFS_NRS_WR_MAX_CMD, GFP_NOFS); - if (!cmd) - return -ENOMEM; - /** - * strsep() modifies its argument, so keep a copy - */ - cmd_copy = cmd; - - if (copy_from_user(cmd, buffer, count)) { - rc = -EFAULT; - goto out; - } - - cmd[count] = '\0'; - - token = strsep(&cmd, " "); - - if (strlen(token) > NRS_POL_NAME_MAX - 1) { - rc = -EINVAL; - goto out; - } - - /** - * No [reg|hp] token has been specified - */ - if (!cmd) - goto default_queue; - - /** - * The second token is either NULL, or an optional [reg|hp] string - */ - if (strcmp(cmd, "reg") == 0) { - queue = PTLRPC_NRS_QUEUE_REG; - } else if (strcmp(cmd, "hp") == 0) { - queue = PTLRPC_NRS_QUEUE_HP; - } else { - rc = -EINVAL; - goto out; - } - -default_queue: - - if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) { - rc = -ENODEV; - goto out; - } else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) { - queue = PTLRPC_NRS_QUEUE_REG; - } - - /** - * Serialize NRS core lprocfs operations with policy registration/ - * unregistration. - */ - mutex_lock(&nrs_core.nrs_mutex); - - rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START, - false, NULL); - - mutex_unlock(&nrs_core.nrs_mutex); -out: - kfree(cmd_copy); - - return rc < 0 ? rc : count; -} - -LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs); - -/** @} nrs */ - -struct ptlrpc_srh_iterator { - int srhi_idx; - __u64 srhi_seq; - struct ptlrpc_request *srhi_req; -}; - -static int -ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt, - struct ptlrpc_srh_iterator *srhi, - __u64 seq) -{ - struct list_head *e; - struct ptlrpc_request *req; - - if (srhi->srhi_req && srhi->srhi_seq > svcpt->scp_hist_seq_culled && - srhi->srhi_seq <= seq) { - /* If srhi_req was set previously, hasn't been culled and - * we're searching for a seq on or after it (i.e. more - * recent), search from it onwards. - * Since the service history is LRU (i.e. culled reqs will - * be near the head), we shouldn't have to do long - * re-scans - */ - LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq, - "%s:%d: seek seq %llu, request seq %llu\n", - svcpt->scp_service->srv_name, svcpt->scp_cpt, - srhi->srhi_seq, srhi->srhi_req->rq_history_seq); - LASSERTF(!list_empty(&svcpt->scp_hist_reqs), - "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n", - svcpt->scp_service->srv_name, svcpt->scp_cpt, - seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled); - e = &srhi->srhi_req->rq_history_list; - } else { - /* search from start */ - e = svcpt->scp_hist_reqs.next; - } - - while (e != &svcpt->scp_hist_reqs) { - req = list_entry(e, struct ptlrpc_request, rq_history_list); - - if (req->rq_history_seq >= seq) { - srhi->srhi_seq = req->rq_history_seq; - srhi->srhi_req = req; - return 0; - } - e = e->next; - } - - return -ENOENT; -} - -/* - * ptlrpc history sequence is used as "position" of seq_file, in some case, - * seq_read() will increase "position" to indicate reading the next - * element, however, low bits of history sequence are reserved for CPT id - * (check the details from comments before ptlrpc_req_add_history), which - * means seq_read() might change CPT id of history sequence and never - * finish reading of requests on a CPT. To make it work, we have to shift - * CPT id to high bits and timestamp to low bits, so seq_read() will only - * increase timestamp which can correctly indicate the next position. - */ - -/* convert seq_file pos to cpt */ -#define PTLRPC_REQ_POS2CPT(svc, pos) \ - ((svc)->srv_cpt_bits == 0 ? 0 : \ - (__u64)(pos) >> (64 - (svc)->srv_cpt_bits)) - -/* make up seq_file pos from cpt */ -#define PTLRPC_REQ_CPT2POS(svc, cpt) \ - ((svc)->srv_cpt_bits == 0 ? 0 : \ - (cpt) << (64 - (svc)->srv_cpt_bits)) - -/* convert sequence to position */ -#define PTLRPC_REQ_SEQ2POS(svc, seq) \ - ((svc)->srv_cpt_bits == 0 ? (seq) : \ - ((seq) >> (svc)->srv_cpt_bits) | \ - ((seq) << (64 - (svc)->srv_cpt_bits))) - -/* convert position to sequence */ -#define PTLRPC_REQ_POS2SEQ(svc, pos) \ - ((svc)->srv_cpt_bits == 0 ? (pos) : \ - ((__u64)(pos) << (svc)->srv_cpt_bits) | \ - ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits))) - -static void * -ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) -{ - struct ptlrpc_service *svc = s->private; - struct ptlrpc_service_part *svcpt; - struct ptlrpc_srh_iterator *srhi; - unsigned int cpt; - int rc; - int i; - - if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */ - CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n", - (int)sizeof(loff_t)); - return NULL; - } - - srhi = kzalloc(sizeof(*srhi), GFP_NOFS); - if (!srhi) - return NULL; - - srhi->srhi_seq = 0; - srhi->srhi_req = NULL; - - cpt = PTLRPC_REQ_POS2CPT(svc, *pos); - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (i < cpt) /* skip */ - continue; - if (i > cpt) /* make up the lowest position for this CPT */ - *pos = PTLRPC_REQ_CPT2POS(svc, i); - - spin_lock(&svcpt->scp_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, - PTLRPC_REQ_POS2SEQ(svc, *pos)); - spin_unlock(&svcpt->scp_lock); - if (rc == 0) { - *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); - srhi->srhi_idx = i; - return srhi; - } - } - - kfree(srhi); - return NULL; -} - -static void -ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter) -{ - struct ptlrpc_srh_iterator *srhi = iter; - - kfree(srhi); -} - -static void * -ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s, - void *iter, loff_t *pos) -{ - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - struct ptlrpc_service_part *svcpt; - __u64 seq; - int rc; - int i; - - for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) { - svcpt = svc->srv_parts[i]; - - if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */ - srhi->srhi_req = NULL; - seq = 0; - srhi->srhi_seq = 0; - } else { /* the next sequence */ - seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits); - } - - spin_lock(&svcpt->scp_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq); - spin_unlock(&svcpt->scp_lock); - if (rc == 0) { - *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); - srhi->srhi_idx = i; - return srhi; - } - } - - kfree(srhi); - return NULL; -} - -static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) -{ - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - struct ptlrpc_service_part *svcpt; - struct ptlrpc_request *req; - int rc; - - LASSERT(srhi->srhi_idx < svc->srv_ncpts); - - svcpt = svc->srv_parts[srhi->srhi_idx]; - - spin_lock(&svcpt->scp_lock); - - rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq); - - if (rc == 0) { - struct timespec64 arrival, sent, arrivaldiff; - char nidstr[LNET_NIDSTR_SIZE]; - - req = srhi->srhi_req; - - libcfs_nid2str_r(req->rq_self, nidstr, sizeof(nidstr)); - arrival.tv_sec = req->rq_arrival_time.tv_sec; - arrival.tv_nsec = req->rq_arrival_time.tv_nsec; - sent.tv_sec = req->rq_sent; - sent.tv_nsec = 0; - arrivaldiff = timespec64_sub(sent, arrival); - - /* Print common req fields. - * CAVEAT EMPTOR: we're racing with the service handler - * here. The request could contain any old crap, so you - * must be just as careful as the service's request - * parser. Currently I only print stuff here I know is OK - * to look at coz it was set up in request_in_callback()!!! - */ - seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%lld.%06lld:%lld.%06llds(%+lld.0s) ", - req->rq_history_seq, nidstr, - libcfs_id2str(req->rq_peer), req->rq_xid, - req->rq_reqlen, ptlrpc_rqphase2str(req), - (s64)req->rq_arrival_time.tv_sec, - (s64)req->rq_arrival_time.tv_nsec / NSEC_PER_USEC, - (s64)arrivaldiff.tv_sec, - (s64)(arrivaldiff.tv_nsec / NSEC_PER_USEC), - (s64)(req->rq_sent - req->rq_deadline)); - if (!svc->srv_ops.so_req_printer) - seq_putc(s, '\n'); - else - svc->srv_ops.so_req_printer(s, srhi->srhi_req); - } - - spin_unlock(&svcpt->scp_lock); - return rc; -} - -static int -ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file) -{ - static const struct seq_operations sops = { - .start = ptlrpc_lprocfs_svc_req_history_start, - .stop = ptlrpc_lprocfs_svc_req_history_stop, - .next = ptlrpc_lprocfs_svc_req_history_next, - .show = ptlrpc_lprocfs_svc_req_history_show, - }; - struct seq_file *seqf; - int rc; - - rc = seq_open(file, &sops); - if (rc) - return rc; - - seqf = file->private_data; - seqf->private = inode->i_private; - return 0; -} - -/* See also lprocfs_rd_timeouts */ -static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n) -{ - struct ptlrpc_service *svc = m->private; - struct ptlrpc_service_part *svcpt; - struct dhms ts; - time64_t worstt; - unsigned int cur; - unsigned int worst; - int i; - - if (AT_OFF) { - seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n", - obd_timeout); - return 0; - } - - ptlrpc_service_for_each_part(svcpt, i, svc) { - cur = at_get(&svcpt->scp_at_estimate); - worst = svcpt->scp_at_estimate.at_worst_ever; - worstt = svcpt->scp_at_estimate.at_worst_time; - s2dhms(&ts, ktime_get_real_seconds() - worstt); - - seq_printf(m, "%10s : cur %3u worst %3u (at %lld, " - DHMS_FMT " ago) ", "service", - cur, worst, (s64)worstt, DHMS_VARS(&ts)); - - lprocfs_at_hist_helper(m, &svcpt->scp_at_estimate); - } - - return 0; -} - -LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts); - -static ssize_t high_priority_ratio_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - return sprintf(buf, "%d\n", svc->srv_hpreq_ratio); -} - -static ssize_t high_priority_ratio_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - int rc; - int val; - - rc = kstrtoint(buffer, 10, &val); - if (rc < 0) - return rc; - - if (val < 0) - return -ERANGE; - - spin_lock(&svc->srv_lock); - svc->srv_hpreq_ratio = val; - spin_unlock(&svc->srv_lock); - - return count; -} -LUSTRE_RW_ATTR(high_priority_ratio); - -static struct attribute *ptlrpc_svc_attrs[] = { - &lustre_attr_threads_min.attr, - &lustre_attr_threads_started.attr, - &lustre_attr_threads_max.attr, - &lustre_attr_high_priority_ratio.attr, - NULL, -}; - -static void ptlrpc_sysfs_svc_release(struct kobject *kobj) -{ - struct ptlrpc_service *svc = container_of(kobj, struct ptlrpc_service, - srv_kobj); - - complete(&svc->srv_kobj_unregister); -} - -static struct kobj_type ptlrpc_svc_ktype = { - .default_attrs = ptlrpc_svc_attrs, - .sysfs_ops = &lustre_sysfs_ops, - .release = ptlrpc_sysfs_svc_release, -}; - -void ptlrpc_sysfs_unregister_service(struct ptlrpc_service *svc) -{ - /* Let's see if we had a chance at initialization first */ - if (svc->srv_kobj.kset) { - kobject_put(&svc->srv_kobj); - wait_for_completion(&svc->srv_kobj_unregister); - } -} - -int ptlrpc_sysfs_register_service(struct kset *parent, - struct ptlrpc_service *svc) -{ - int rc; - - svc->srv_kobj.kset = parent; - init_completion(&svc->srv_kobj_unregister); - rc = kobject_init_and_add(&svc->srv_kobj, &ptlrpc_svc_ktype, NULL, - "%s", svc->srv_name); - - return rc; -} - -void ptlrpc_ldebugfs_register_service(struct dentry *entry, - struct ptlrpc_service *svc) -{ - struct lprocfs_vars lproc_vars[] = { - {.name = "req_buffer_history_len", - .fops = &ptlrpc_lprocfs_req_history_len_fops, - .data = svc}, - {.name = "req_buffer_history_max", - .fops = &ptlrpc_lprocfs_req_history_max_fops, - .data = svc}, - {.name = "timeouts", - .fops = &ptlrpc_lprocfs_timeouts_fops, - .data = svc}, - {.name = "nrs_policies", - .fops = &ptlrpc_lprocfs_nrs_fops, - .data = svc}, - {NULL} - }; - static const struct file_operations req_history_fops = { - .owner = THIS_MODULE, - .open = ptlrpc_lprocfs_svc_req_history_open, - .read = seq_read, - .llseek = seq_lseek, - .release = lprocfs_seq_release, - }; - - ptlrpc_ldebugfs_register(entry, svc->srv_name, - "stats", &svc->srv_debugfs_entry, - &svc->srv_stats); - - if (IS_ERR_OR_NULL(svc->srv_debugfs_entry)) - return; - - ldebugfs_add_vars(svc->srv_debugfs_entry, lproc_vars, NULL); - - debugfs_create_file("req_history", 0400, svc->srv_debugfs_entry, svc, - &req_history_fops); -} - -void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) -{ - ptlrpc_ldebugfs_register(obddev->obd_debugfs_entry, NULL, "stats", - &obddev->obd_svc_debugfs_entry, - &obddev->obd_svc_stats); -} -EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd); - -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount) -{ - struct lprocfs_stats *svc_stats; - __u32 op = lustre_msg_get_opc(req->rq_reqmsg); - int opc = opcode_offset(op); - - svc_stats = req->rq_import->imp_obd->obd_svc_stats; - if (!svc_stats || opc <= 0) - return; - LASSERT(opc < LUSTRE_MAX_OPCODES); - if (!(op == LDLM_ENQUEUE || op == MDS_REINT)) - lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount); -} - -void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) -{ - struct lprocfs_stats *svc_stats; - int idx; - - if (!req->rq_import) - return; - svc_stats = req->rq_import->imp_obd->obd_svc_stats; - if (!svc_stats) - return; - idx = lustre_msg_get_opc(req->rq_reqmsg); - switch (idx) { - case OST_READ: - idx = BRW_READ_BYTES + PTLRPC_LAST_CNTR; - break; - case OST_WRITE: - idx = BRW_WRITE_BYTES + PTLRPC_LAST_CNTR; - break; - default: - LASSERTF(0, "unsupported opcode %u\n", idx); - break; - } - - lprocfs_counter_add(svc_stats, idx, bytes); -} -EXPORT_SYMBOL(ptlrpc_lprocfs_brw); - -void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) -{ - debugfs_remove_recursive(svc->srv_debugfs_entry); - - if (svc->srv_stats) - lprocfs_free_stats(&svc->srv_stats); -} - -void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) -{ - debugfs_remove_recursive(obd->obd_svc_debugfs_entry); - - if (obd->obd_svc_stats) - lprocfs_free_stats(&obd->obd_svc_stats); -} -EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd); - -#undef BUFLEN - -int lprocfs_wr_ping(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *obd = ((struct seq_file *)file->private_data)->private; - struct ptlrpc_request *req; - int rc; - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - req = ptlrpc_prep_ping(obd->u.cli.cl_import); - up_read(&obd->u.cli.cl_sem); - if (!req) - return -ENOMEM; - - req->rq_send_state = LUSTRE_IMP_FULL; - - rc = ptlrpc_queue_wait(req); - - ptlrpc_req_finished(req); - if (rc >= 0) - return count; - return rc; -} -EXPORT_SYMBOL(lprocfs_wr_ping); - -/* Write the connection UUID to this file to attempt to connect to that node. - * The connection UUID is a node's primary NID. For example, - * "echo connection=192.168.0.1@tcp0::instance > .../import". - */ -int lprocfs_wr_import(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *obd = ((struct seq_file *)file->private_data)->private; - struct obd_import *imp = obd->u.cli.cl_import; - char *kbuf = NULL; - char *uuid; - char *ptr; - int do_reconn = 1; - const char prefix[] = "connection="; - const int prefix_len = sizeof(prefix) - 1; - - if (count > PAGE_SIZE - 1 || count <= prefix_len) - return -EINVAL; - - kbuf = kzalloc(count + 1, GFP_NOFS); - if (!kbuf) - return -ENOMEM; - - if (copy_from_user(kbuf, buffer, count)) { - count = -EFAULT; - goto out; - } - - kbuf[count] = 0; - - /* only support connection=uuid::instance now */ - if (strncmp(prefix, kbuf, prefix_len) != 0) { - count = -EINVAL; - goto out; - } - - uuid = kbuf + prefix_len; - ptr = strstr(uuid, "::"); - if (ptr) { - __u32 inst; - char *endptr; - - *ptr = 0; - do_reconn = 0; - ptr += strlen("::"); - inst = simple_strtoul(ptr, &endptr, 10); - if (*endptr) { - CERROR("config: wrong instance # %s\n", ptr); - } else if (inst != imp->imp_connect_data.ocd_instance) { - CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n", - imp->imp_obd->obd_name, - imp->imp_connect_data.ocd_instance, inst); - do_reconn = 1; - } else { - CDEBUG(D_INFO, "IR: %s has already been connecting to new target(%u)\n", - imp->imp_obd->obd_name, inst); - } - } - - if (do_reconn) - ptlrpc_recover_import(imp, uuid, 1); - -out: - kfree(kbuf); - return count; -} -EXPORT_SYMBOL(lprocfs_wr_import); - -int lprocfs_rd_pinger_recov(struct seq_file *m, void *n) -{ - struct obd_device *obd = m->private; - struct obd_import *imp = obd->u.cli.cl_import; - int rc; - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - seq_printf(m, "%d\n", !imp->imp_no_pinger_recover); - up_read(&obd->u.cli.cl_sem); - - return 0; -} -EXPORT_SYMBOL(lprocfs_rd_pinger_recov); - -int lprocfs_wr_pinger_recov(struct file *file, const char __user *buffer, - size_t count, loff_t *off) -{ - struct obd_device *obd = ((struct seq_file *)file->private_data)->private; - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - int rc, val; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc < 0) - return rc; - - if (val != 0 && val != 1) - return -ERANGE; - - rc = lprocfs_climp_check(obd); - if (rc) - return rc; - - spin_lock(&imp->imp_lock); - imp->imp_no_pinger_recover = !val; - spin_unlock(&imp->imp_lock); - up_read(&obd->u.cli.cl_sem); - - return count; -} -EXPORT_SYMBOL(lprocfs_wr_pinger_recov); diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c deleted file mode 100644 index 2897afb8806c..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c +++ /dev/null @@ -1,771 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC -#include -#include -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -/** - * Helper function. Sends \a len bytes from \a base at offset \a offset - * over \a conn connection to portal \a portal. - * Returns 0 on success or error code. - */ -static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len, - enum lnet_ack_req ack, struct ptlrpc_cb_id *cbid, - struct ptlrpc_connection *conn, int portal, __u64 xid, - unsigned int offset) -{ - int rc; - struct lnet_md md; - - LASSERT(portal != 0); - CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer)); - md.start = base; - md.length = len; - md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1; - md.options = PTLRPC_MD_OPTIONS; - md.user_ptr = cbid; - md.eq_handle = ptlrpc_eq_h; - - if (unlikely(ack == LNET_ACK_REQ && - OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, - OBD_FAIL_ONCE))) { - /* don't ask for the ack to simulate failing client */ - ack = LNET_NOACK_REQ; - } - - rc = LNetMDBind(md, LNET_UNLINK, mdh); - if (unlikely(rc != 0)) { - CERROR("LNetMDBind failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - return -ENOMEM; - } - - CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n", - len, portal, xid, offset); - - rc = LNetPut(conn->c_self, *mdh, ack, - conn->c_peer, portal, xid, offset, 0); - if (unlikely(rc != 0)) { - int rc2; - /* We're going to get an UNLINK event when I unlink below, - * which will complete just like any other failed send, so - * I fall through and return success here! - */ - CERROR("LNetPut(%s, %d, %lld) failed: %d\n", - libcfs_id2str(conn->c_peer), portal, xid, rc); - rc2 = LNetMDUnlink(*mdh); - LASSERTF(rc2 == 0, "rc2 = %d\n", rc2); - } - - return 0; -} - -static void mdunlink_iterate_helper(struct lnet_handle_md *bd_mds, int count) -{ - int i; - - for (i = 0; i < count; i++) - LNetMDUnlink(bd_mds[i]); -} - -/** - * Register bulk at the sender for later transfer. - * Returns 0 on success or error code. - */ -static int ptlrpc_register_bulk(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - struct lnet_process_id peer; - int rc = 0; - int rc2; - int posted_md; - int total_md; - u64 mbits; - struct lnet_handle_me me_h; - struct lnet_md md; - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) - return 0; - - /* NB no locking required until desc is on the network */ - LASSERT(desc->bd_nob > 0); - LASSERT(desc->bd_md_count == 0); - LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); - LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); - LASSERT(desc->bd_req); - LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type)); - - /* cleanup the state of the bulk for it will be reused */ - if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) - desc->bd_nob_transferred = 0; - else - LASSERT(desc->bd_nob_transferred == 0); - - desc->bd_failure = 0; - - peer = desc->bd_import->imp_connection->c_peer; - - LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); - LASSERT(desc->bd_cbid.cbid_arg == desc); - - total_md = DIV_ROUND_UP(desc->bd_iov_count, LNET_MAX_IOV); - /* rq_mbits is matchbits of the final bulk */ - mbits = req->rq_mbits - total_md + 1; - - LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK), - "first mbits = x%llu, last mbits = x%llu\n", - mbits, req->rq_mbits); - LASSERTF(!(desc->bd_registered && - req->rq_send_state != LUSTRE_IMP_REPLAY) || - mbits != desc->bd_last_mbits, - "registered: %d rq_mbits: %llu bd_last_mbits: %llu\n", - desc->bd_registered, mbits, desc->bd_last_mbits); - - desc->bd_registered = 1; - desc->bd_last_mbits = mbits; - desc->bd_md_count = total_md; - md.user_ptr = &desc->bd_cbid; - md.eq_handle = ptlrpc_eq_h; - md.threshold = 1; /* PUT or GET */ - - for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) { - md.options = PTLRPC_MD_OPTIONS | - (ptlrpc_is_bulk_op_get(desc->bd_type) ? - LNET_MD_OP_GET : LNET_MD_OP_PUT); - ptlrpc_fill_bulk_md(&md, desc, posted_md); - - rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, - LNET_UNLINK, LNET_INS_AFTER, &me_h); - if (rc != 0) { - CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", - desc->bd_import->imp_obd->obd_name, mbits, - posted_md, rc); - break; - } - - /* About to let the network at it... */ - rc = LNetMDAttach(me_h, md, LNET_UNLINK, - &desc->bd_mds[posted_md]); - if (rc != 0) { - CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", - desc->bd_import->imp_obd->obd_name, mbits, - posted_md, rc); - rc2 = LNetMEUnlink(me_h); - LASSERT(rc2 == 0); - break; - } - } - - if (rc != 0) { - LASSERT(rc == -ENOMEM); - spin_lock(&desc->bd_lock); - desc->bd_md_count -= total_md - posted_md; - spin_unlock(&desc->bd_lock); - LASSERT(desc->bd_md_count >= 0); - mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); - req->rq_status = -ENOMEM; - return -ENOMEM; - } - - spin_lock(&desc->bd_lock); - /* Holler if peer manages to touch buffers before he knows the mbits */ - if (desc->bd_md_count != total_md) - CWARN("%s: Peer %s touched %d buffers while I registered\n", - desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), - total_md - desc->bd_md_count); - spin_unlock(&desc->bd_lock); - - CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, mbits x%#llx-%#llx, portal %u\n", - desc->bd_md_count, - ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink", - desc->bd_iov_count, desc->bd_nob, - desc->bd_last_mbits, req->rq_mbits, desc->bd_portal); - - return 0; -} - -/** - * Disconnect a bulk desc from the network. Idempotent. Not - * thread-safe (i.e. only interlocks with completion callback). - * Returns 1 on success or 0 if network unregistration failed for whatever - * reason. - */ -int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) -{ - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - wait_queue_head_t *wq; - int rc; - - LASSERT(!in_interrupt()); /* might sleep */ - - /* Let's setup deadline for reply unlink. */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) - req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; - - if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ - return 1; /* never registered */ - - LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ - - /* the unlink ensures the callback happens ASAP and is the last - * one. If it fails, it must be because completion just happened, - * but we must still wait_event() in this case to give liblustre - * a chance to run client_bulk_callback() - */ - mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); - - if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ - return 1; /* never registered */ - - /* Move to "Unregistering" phase as bulk was not unlinked yet. */ - ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); - - /* Do not wait for unlink to finish. */ - if (async) - return 0; - - if (req->rq_set) - wq = &req->rq_set->set_waitq; - else - wq = &req->rq_reply_waitq; - - for (;;) { - /* Network access will complete in finite time but the HUGE - * timeout lets us CWARN for visibility of sluggish LNDs - */ - int cnt = 0; - while (cnt < LONG_UNLINK && - (rc = wait_event_idle_timeout(*wq, - !ptlrpc_client_bulk_active(req), - HZ)) == 0) - cnt += 1; - if (rc > 0) { - ptlrpc_rqphase_move(req, req->rq_next_phase); - return 1; - } - - DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", - desc); - } - return 0; -} - -static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags) -{ - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - struct ptlrpc_service *svc = svcpt->scp_service; - int service_time = max_t(int, ktime_get_real_seconds() - - req->rq_arrival_time.tv_sec, 1); - - if (!(flags & PTLRPC_REPLY_EARLY) && - (req->rq_type != PTL_RPC_MSG_ERR) && req->rq_reqmsg && - !(lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_RESENT | MSG_REPLAY | - MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) { - /* early replies, errors and recovery requests don't count - * toward our service time estimate - */ - int oldse = at_measured(&svcpt->scp_at_estimate, service_time); - - if (oldse != 0) { - DEBUG_REQ(D_ADAPTTO, req, - "svc %s changed estimate from %d to %d", - svc->srv_name, oldse, - at_get(&svcpt->scp_at_estimate)); - } - } - /* Report actual service time for client latency calc */ - lustre_msg_set_service_time(req->rq_repmsg, service_time); - /* Report service time estimate for future client reqs, but report 0 - * (to be ignored by client) if it's a error reply during recovery. - * (bz15815) - */ - if (req->rq_type == PTL_RPC_MSG_ERR && !req->rq_export) - lustre_msg_set_timeout(req->rq_repmsg, 0); - else - lustre_msg_set_timeout(req->rq_repmsg, - at_get(&svcpt->scp_at_estimate)); - - if (req->rq_reqmsg && - !(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { - CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x req_flags=%#x magic=%x/%x len=%d\n", - flags, lustre_msg_get_flags(req->rq_reqmsg), - lustre_msg_get_magic(req->rq_reqmsg), - lustre_msg_get_magic(req->rq_repmsg), req->rq_replen); - } -} - -/** - * Send request reply from request \a req reply buffer. - * \a flags defines reply types - * Returns 0 on success or error code - */ -int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - struct ptlrpc_connection *conn; - int rc; - - /* We must already have a reply buffer (only ptlrpc_error() may be - * called without one). The reply generated by sptlrpc layer (e.g. - * error notify, etc.) might have NULL rq->reqmsg; Otherwise we must - * have a request buffer which is either the actual (swabbed) incoming - * request, or a saved copy if this is a req saved in - * target_queue_final_reply(). - */ - LASSERT(req->rq_no_reply == 0); - LASSERT(req->rq_reqbuf); - LASSERT(rs); - LASSERT((flags & PTLRPC_REPLY_MAYBE_DIFFICULT) || !rs->rs_difficult); - LASSERT(req->rq_repmsg); - LASSERT(req->rq_repmsg == rs->rs_msg); - LASSERT(rs->rs_cb_id.cbid_fn == reply_out_callback); - LASSERT(rs->rs_cb_id.cbid_arg == rs); - - /* There may be no rq_export during failover */ - - if (unlikely(req->rq_export && req->rq_export->exp_obd && - req->rq_export->exp_obd->obd_fail)) { - /* Failed obd's only send ENODEV */ - req->rq_type = PTL_RPC_MSG_ERR; - req->rq_status = -ENODEV; - CDEBUG(D_HA, "sending ENODEV from failed obd %d\n", - req->rq_export->exp_obd->obd_minor); - } - - /* In order to keep interoperability with the client (< 2.3) which - * doesn't have pb_jobid in ptlrpc_body, We have to shrink the - * ptlrpc_body in reply buffer to ptlrpc_body_v2, otherwise, the - * reply buffer on client will be overflow. - * - * XXX Remove this whenever we drop the interoperability with - * such client. - */ - req->rq_replen = lustre_shrink_msg(req->rq_repmsg, 0, - sizeof(struct ptlrpc_body_v2), 1); - - if (req->rq_type != PTL_RPC_MSG_ERR) - req->rq_type = PTL_RPC_MSG_REPLY; - - lustre_msg_set_type(req->rq_repmsg, req->rq_type); - lustre_msg_set_status(req->rq_repmsg, - ptlrpc_status_hton(req->rq_status)); - lustre_msg_set_opc(req->rq_repmsg, - req->rq_reqmsg ? - lustre_msg_get_opc(req->rq_reqmsg) : 0); - - target_pack_pool_reply(req); - - ptlrpc_at_set_reply(req, flags); - - if (!req->rq_export || !req->rq_export->exp_connection) - conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL); - else - conn = ptlrpc_connection_addref(req->rq_export->exp_connection); - - if (unlikely(!conn)) { - CERROR("not replying on NULL connection\n"); /* bug 9635 */ - return -ENOTCONN; - } - ptlrpc_rs_addref(rs); /* +1 ref for the network */ - - rc = sptlrpc_svc_wrap_reply(req); - if (unlikely(rc)) - goto out; - - req->rq_sent = ktime_get_real_seconds(); - - rc = ptl_send_buf(&rs->rs_md_h, rs->rs_repbuf, rs->rs_repdata_len, - (rs->rs_difficult && !rs->rs_no_ack) ? - LNET_ACK_REQ : LNET_NOACK_REQ, - &rs->rs_cb_id, conn, - ptlrpc_req2svc(req)->srv_rep_portal, - req->rq_xid, req->rq_reply_off); -out: - if (unlikely(rc != 0)) - ptlrpc_req_drop_rs(req); - ptlrpc_connection_put(conn); - return rc; -} - -int ptlrpc_reply(struct ptlrpc_request *req) -{ - if (req->rq_no_reply) - return 0; - return ptlrpc_send_reply(req, 0); -} - -/** - * For request \a req send an error reply back. Create empty - * reply buffers if necessary. - */ -int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult) -{ - int rc; - - if (req->rq_no_reply) - return 0; - - if (!req->rq_repmsg) { - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc) - return rc; - } - - if (req->rq_status != -ENOSPC && req->rq_status != -EACCES && - req->rq_status != -EPERM && req->rq_status != -ENOENT && - req->rq_status != -EINPROGRESS && req->rq_status != -EDQUOT) - req->rq_type = PTL_RPC_MSG_ERR; - - rc = ptlrpc_send_reply(req, may_be_difficult); - return rc; -} - -int ptlrpc_error(struct ptlrpc_request *req) -{ - return ptlrpc_send_error(req, 0); -} - -/** - * Send request \a request. - * if \a noreply is set, don't expect any reply back and don't set up - * reply buffers. - * Returns 0 on success or error code. - */ -int ptl_send_rpc(struct ptlrpc_request *request, int noreply) -{ - int rc; - int rc2; - unsigned int mpflag = 0; - struct ptlrpc_connection *connection; - struct lnet_handle_me reply_me_h; - struct lnet_md reply_md; - struct obd_import *imp = request->rq_import; - struct obd_device *obd = imp->imp_obd; - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC)) - return 0; - - LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST); - LASSERT(request->rq_wait_ctx == 0); - - /* If this is a re-transmit, we're required to have disengaged - * cleanly from the previous attempt - */ - LASSERT(!request->rq_receiving_reply); - LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) && - (imp->imp_state == LUSTRE_IMP_FULL))); - - if (unlikely(obd && obd->obd_fail)) { - CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", - obd->obd_name); - /* this prevents us from waiting in ptlrpc_queue_wait */ - spin_lock(&request->rq_lock); - request->rq_err = 1; - spin_unlock(&request->rq_lock); - request->rq_status = -ENODEV; - return -ENODEV; - } - - connection = imp->imp_connection; - - lustre_msg_set_handle(request->rq_reqmsg, - &imp->imp_remote_handle); - lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST); - lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt); - lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags); - - /* - * If it's the first time to resend the request for EINPROGRESS, - * we need to allocate a new XID (see after_reply()), it's different - * from the resend for reply timeout. - */ - if (request->rq_nr_resend && list_empty(&request->rq_unreplied_list)) { - __u64 min_xid = 0; - /* - * resend for EINPROGRESS, allocate new xid to avoid reply - * reconstruction - */ - spin_lock(&imp->imp_lock); - ptlrpc_assign_next_xid_nolock(request); - min_xid = ptlrpc_known_replied_xid(imp); - spin_unlock(&imp->imp_lock); - - lustre_msg_set_last_xid(request->rq_reqmsg, min_xid); - DEBUG_REQ(D_RPCTRACE, request, "Allocating new xid for resend on EINPROGRESS"); - } - - if (request->rq_bulk) { - ptlrpc_set_bulk_mbits(request); - lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits); - } - - if (list_empty(&request->rq_unreplied_list) || - request->rq_xid <= imp->imp_known_replied_xid) { - DEBUG_REQ(D_ERROR, request, - "xid: %llu, replied: %llu, list_empty:%d\n", - request->rq_xid, imp->imp_known_replied_xid, - list_empty(&request->rq_unreplied_list)); - LBUG(); - } - - /** - * For enabled AT all request should have AT_SUPPORT in the - * FULL import state when OBD_CONNECT_AT is set - */ - LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || - (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || - !(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_AT)); - - if (request->rq_resend) - lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); - - if (request->rq_memalloc) - mpflag = memalloc_noreclaim_save(); - - rc = sptlrpc_cli_wrap_request(request); - if (rc) { - /* - * set rq_sent so that this request is treated - * as a delayed send in the upper layers - */ - if (rc == -ENOMEM) - request->rq_sent = ktime_get_seconds(); - goto out; - } - - /* bulk register should be done after wrap_request() */ - if (request->rq_bulk) { - rc = ptlrpc_register_bulk(request); - if (rc != 0) - goto out; - } - - if (!noreply) { - LASSERT(request->rq_replen != 0); - if (!request->rq_repbuf) { - LASSERT(!request->rq_repdata); - LASSERT(!request->rq_repmsg); - rc = sptlrpc_cli_alloc_repbuf(request, - request->rq_replen); - if (rc) { - /* this prevents us from looping in - * ptlrpc_queue_wait - */ - spin_lock(&request->rq_lock); - request->rq_err = 1; - spin_unlock(&request->rq_lock); - request->rq_status = rc; - goto cleanup_bulk; - } - } else { - request->rq_repdata = NULL; - request->rq_repmsg = NULL; - } - - rc = LNetMEAttach(request->rq_reply_portal,/*XXX FIXME bug 249*/ - connection->c_peer, request->rq_xid, 0, - LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); - if (rc != 0) { - CERROR("LNetMEAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - rc = -ENOMEM; - goto cleanup_bulk; - } - } - - spin_lock(&request->rq_lock); - /* We are responsible for unlinking the reply buffer */ - request->rq_reply_unlinked = noreply; - request->rq_receiving_reply = !noreply; - /* Clear any flags that may be present from previous sends. */ - request->rq_req_unlinked = 0; - request->rq_replied = 0; - request->rq_err = 0; - request->rq_timedout = 0; - request->rq_net_err = 0; - request->rq_resend = 0; - request->rq_restart = 0; - request->rq_reply_truncated = 0; - spin_unlock(&request->rq_lock); - - if (!noreply) { - reply_md.start = request->rq_repbuf; - reply_md.length = request->rq_repbuf_len; - /* Allow multiple early replies */ - reply_md.threshold = LNET_MD_THRESH_INF; - /* Manage remote for early replies */ - reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | - LNET_MD_MANAGE_REMOTE | - LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */ - reply_md.user_ptr = &request->rq_reply_cbid; - reply_md.eq_handle = ptlrpc_eq_h; - - /* We must see the unlink callback to set rq_reply_unlinked, - * so we can't auto-unlink - */ - rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, - &request->rq_reply_md_h); - if (rc != 0) { - CERROR("LNetMDAttach failed: %d\n", rc); - LASSERT(rc == -ENOMEM); - spin_lock(&request->rq_lock); - /* ...but the MD attach didn't succeed... */ - request->rq_receiving_reply = 0; - spin_unlock(&request->rq_lock); - rc = -ENOMEM; - goto cleanup_me; - } - - CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %llu, portal %u\n", - request->rq_repbuf_len, request->rq_xid, - request->rq_reply_portal); - } - - /* add references on request for request_out_callback */ - ptlrpc_request_addref(request); - if (obd && obd->obd_svc_stats) - lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR, - atomic_read(&imp->imp_inflight)); - - OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5); - - ktime_get_real_ts64(&request->rq_sent_tv); - request->rq_sent = ktime_get_real_seconds(); - /* We give the server rq_timeout secs to process the req, and - * add the network latency for our local timeout. - */ - request->rq_deadline = request->rq_sent + request->rq_timeout + - ptlrpc_at_get_net_latency(request); - - ptlrpc_pinger_sending_on_import(imp); - - DEBUG_REQ(D_INFO, request, "send flg=%x", - lustre_msg_get_flags(request->rq_reqmsg)); - rc = ptl_send_buf(&request->rq_req_md_h, - request->rq_reqbuf, request->rq_reqdata_len, - LNET_NOACK_REQ, &request->rq_req_cbid, - connection, - request->rq_request_portal, - request->rq_xid, 0); - if (likely(rc == 0)) - goto out; - - request->rq_req_unlinked = 1; - ptlrpc_req_finished(request); - if (noreply) - goto out; - - cleanup_me: - /* MEUnlink is safe; the PUT didn't even get off the ground, and - * nobody apart from the PUT's target has the right nid+XID to - * access the reply buffer. - */ - rc2 = LNetMEUnlink(reply_me_h); - LASSERT(rc2 == 0); - /* UNLINKED callback called synchronously */ - LASSERT(!request->rq_receiving_reply); - - cleanup_bulk: - /* We do sync unlink here as there was no real transfer here so - * the chance to have long unlink to sluggish net is smaller here. - */ - ptlrpc_unregister_bulk(request, 0); - out: - if (request->rq_memalloc) - memalloc_noreclaim_restore(mpflag); - return rc; -} -EXPORT_SYMBOL(ptl_send_rpc); - -/** - * Register request buffer descriptor for request receiving. - */ -int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) -{ - struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service; - static struct lnet_process_id match_id = {LNET_NID_ANY, LNET_PID_ANY}; - int rc; - struct lnet_md md; - struct lnet_handle_me me_h; - - CDEBUG(D_NET, "LNetMEAttach: portal %d\n", - service->srv_req_portal); - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_RQBD)) - return -ENOMEM; - - /* NB: CPT affinity service should use new LNet flag LNET_INS_LOCAL, - * which means buffer can only be attached on local CPT, and LND - * threads can find it by grabbing a local lock - */ - rc = LNetMEAttach(service->srv_req_portal, - match_id, 0, ~0, LNET_UNLINK, - rqbd->rqbd_svcpt->scp_cpt >= 0 ? - LNET_INS_LOCAL : LNET_INS_AFTER, &me_h); - if (rc != 0) { - CERROR("LNetMEAttach failed: %d\n", rc); - return -ENOMEM; - } - - LASSERT(rqbd->rqbd_refcount == 0); - rqbd->rqbd_refcount = 1; - - md.start = rqbd->rqbd_buffer; - md.length = service->srv_buf_size; - md.max_size = service->srv_max_req_size; - md.threshold = LNET_MD_THRESH_INF; - md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; - md.user_ptr = &rqbd->rqbd_cbid; - md.eq_handle = ptlrpc_eq_h; - - rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); - if (rc == 0) - return 0; - - CERROR("LNetMDAttach failed: %d;\n", rc); - LASSERT(rc == -ENOMEM); - rc = LNetMEUnlink(me_h); - LASSERT(rc == 0); - rqbd->rqbd_refcount = 0; - - return -ENOMEM; -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs.c b/drivers/staging/lustre/lustre/ptlrpc/nrs.c deleted file mode 100644 index e09b86529c5d..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/nrs.c +++ /dev/null @@ -1,1613 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. A copy is - * included in the COPYING file that accompanied this code. - - * GPL HEADER END - */ -/* - * Copyright (c) 2011 Intel Corporation - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * lustre/ptlrpc/nrs.c - * - * Network Request Scheduler (NRS) - * - * Allows to reorder the handling of RPCs at servers. - * - * Author: Liang Zhen - * Author: Nikitas Angelinas - */ -/** - * \addtogoup nrs - * @{ - */ - -#define DEBUG_SUBSYSTEM S_RPC -#include -#include -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -/** - * NRS core object. - */ -struct nrs_core nrs_core; - -static int nrs_policy_init(struct ptlrpc_nrs_policy *policy) -{ - return policy->pol_desc->pd_ops->op_policy_init ? - policy->pol_desc->pd_ops->op_policy_init(policy) : 0; -} - -static void nrs_policy_fini(struct ptlrpc_nrs_policy *policy) -{ - LASSERT(policy->pol_ref == 0); - LASSERT(policy->pol_req_queued == 0); - - if (policy->pol_desc->pd_ops->op_policy_fini) - policy->pol_desc->pd_ops->op_policy_fini(policy); -} - -static int nrs_policy_ctl_locked(struct ptlrpc_nrs_policy *policy, - enum ptlrpc_nrs_ctl opc, void *arg) -{ - /** - * The policy may be stopped, but the lprocfs files and - * ptlrpc_nrs_policy instances remain present until unregistration time. - * Do not perform the ctl operation if the policy is stopped, as - * policy->pol_private will be NULL in such a case. - */ - if (policy->pol_state == NRS_POL_STATE_STOPPED) - return -ENODEV; - - return policy->pol_desc->pd_ops->op_policy_ctl ? - policy->pol_desc->pd_ops->op_policy_ctl(policy, opc, arg) : - -ENOSYS; -} - -static void nrs_policy_stop0(struct ptlrpc_nrs_policy *policy) -{ - if (policy->pol_desc->pd_ops->op_policy_stop) - policy->pol_desc->pd_ops->op_policy_stop(policy); - - LASSERT(list_empty(&policy->pol_list_queued)); - LASSERT(policy->pol_req_queued == 0 && - policy->pol_req_started == 0); - - policy->pol_private = NULL; - - policy->pol_state = NRS_POL_STATE_STOPPED; - - if (atomic_dec_and_test(&policy->pol_desc->pd_refs)) - module_put(policy->pol_desc->pd_owner); -} - -static int nrs_policy_stop_locked(struct ptlrpc_nrs_policy *policy) -{ - struct ptlrpc_nrs *nrs = policy->pol_nrs; - - if (nrs->nrs_policy_fallback == policy && !nrs->nrs_stopping) - return -EPERM; - - if (policy->pol_state == NRS_POL_STATE_STARTING) - return -EAGAIN; - - /* In progress or already stopped */ - if (policy->pol_state != NRS_POL_STATE_STARTED) - return 0; - - policy->pol_state = NRS_POL_STATE_STOPPING; - - /* Immediately make it invisible */ - if (nrs->nrs_policy_primary == policy) { - nrs->nrs_policy_primary = NULL; - - } else { - LASSERT(nrs->nrs_policy_fallback == policy); - nrs->nrs_policy_fallback = NULL; - } - - /* I have the only refcount */ - if (policy->pol_ref == 1) - nrs_policy_stop0(policy); - - return 0; -} - -/** - * Transitions the \a nrs NRS head's primary policy to - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING and if the policy has no - * pending usage references, to ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED. - * - * \param[in] nrs the NRS head to carry out this operation on - */ -static void nrs_policy_stop_primary(struct ptlrpc_nrs *nrs) -{ - struct ptlrpc_nrs_policy *tmp = nrs->nrs_policy_primary; - - if (!tmp) - return; - - nrs->nrs_policy_primary = NULL; - - LASSERT(tmp->pol_state == NRS_POL_STATE_STARTED); - tmp->pol_state = NRS_POL_STATE_STOPPING; - - if (tmp->pol_ref == 0) - nrs_policy_stop0(tmp); -} - -/** - * Transitions a policy across the ptlrpc_nrs_pol_state range of values, in - * response to an lprocfs command to start a policy. - * - * If a primary policy different to the current one is specified, this function - * will transition the new policy to the - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTING and then to - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED, and will then transition - * the old primary policy (if there is one) to - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding - * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. - * - * If the fallback policy is specified, this is taken to indicate an instruction - * to stop the current primary policy, without substituting it with another - * primary policy, so the primary policy (if any) is transitioned to - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING, and if there are no outstanding - * references on the policy to ptlrpc_nrs_pol_stae::NRS_POL_STATE_STOPPED. In - * this case, the fallback policy is only left active in the NRS head. - */ -static int nrs_policy_start_locked(struct ptlrpc_nrs_policy *policy) -{ - struct ptlrpc_nrs *nrs = policy->pol_nrs; - int rc = 0; - - /** - * Don't allow multiple starting which is too complex, and has no real - * benefit. - */ - if (nrs->nrs_policy_starting) - return -EAGAIN; - - LASSERT(policy->pol_state != NRS_POL_STATE_STARTING); - - if (policy->pol_state == NRS_POL_STATE_STOPPING) - return -EAGAIN; - - if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) { - /** - * This is for cases in which the user sets the policy to the - * fallback policy (currently fifo for all services); i.e. the - * user is resetting the policy to the default; so we stop the - * primary policy, if any. - */ - if (policy == nrs->nrs_policy_fallback) { - nrs_policy_stop_primary(nrs); - return 0; - } - - /** - * If we reach here, we must be setting up the fallback policy - * at service startup time, and only a single policy with the - * nrs_policy_flags::PTLRPC_NRS_FL_FALLBACK flag set can - * register with NRS core. - */ - LASSERT(!nrs->nrs_policy_fallback); - } else { - /** - * Shouldn't start primary policy if w/o fallback policy. - */ - if (!nrs->nrs_policy_fallback) - return -EPERM; - - if (policy->pol_state == NRS_POL_STATE_STARTED) - return 0; - } - - /** - * Increase the module usage count for policies registering from other - * modules. - */ - if (atomic_inc_return(&policy->pol_desc->pd_refs) == 1 && - !try_module_get(policy->pol_desc->pd_owner)) { - atomic_dec(&policy->pol_desc->pd_refs); - CERROR("NRS: cannot get module for policy %s; is it alive?\n", - policy->pol_desc->pd_name); - return -ENODEV; - } - - /** - * Serialize policy starting across the NRS head - */ - nrs->nrs_policy_starting = 1; - - policy->pol_state = NRS_POL_STATE_STARTING; - - if (policy->pol_desc->pd_ops->op_policy_start) { - spin_unlock(&nrs->nrs_lock); - - rc = policy->pol_desc->pd_ops->op_policy_start(policy); - - spin_lock(&nrs->nrs_lock); - if (rc != 0) { - if (atomic_dec_and_test(&policy->pol_desc->pd_refs)) - module_put(policy->pol_desc->pd_owner); - - policy->pol_state = NRS_POL_STATE_STOPPED; - goto out; - } - } - - policy->pol_state = NRS_POL_STATE_STARTED; - - if (policy->pol_flags & PTLRPC_NRS_FL_FALLBACK) { - /** - * This path is only used at PTLRPC service setup time. - */ - nrs->nrs_policy_fallback = policy; - } else { - /* - * Try to stop the current primary policy if there is one. - */ - nrs_policy_stop_primary(nrs); - - /** - * And set the newly-started policy as the primary one. - */ - nrs->nrs_policy_primary = policy; - } - -out: - nrs->nrs_policy_starting = 0; - - return rc; -} - -/** - * Increases the policy's usage reference count. - */ -static inline void nrs_policy_get_locked(struct ptlrpc_nrs_policy *policy) -{ - policy->pol_ref++; -} - -/** - * Decreases the policy's usage reference count, and stops the policy in case it - * was already stopping and have no more outstanding usage references (which - * indicates it has no more queued or started requests, and can be safely - * stopped). - */ -static void nrs_policy_put_locked(struct ptlrpc_nrs_policy *policy) -{ - LASSERT(policy->pol_ref > 0); - - policy->pol_ref--; - if (unlikely(policy->pol_ref == 0 && - policy->pol_state == NRS_POL_STATE_STOPPING)) - nrs_policy_stop0(policy); -} - -static void nrs_policy_put(struct ptlrpc_nrs_policy *policy) -{ - spin_lock(&policy->pol_nrs->nrs_lock); - nrs_policy_put_locked(policy); - spin_unlock(&policy->pol_nrs->nrs_lock); -} - -/** - * Find and return a policy by name. - */ -static struct ptlrpc_nrs_policy *nrs_policy_find_locked(struct ptlrpc_nrs *nrs, - char *name) -{ - struct ptlrpc_nrs_policy *tmp; - - list_for_each_entry(tmp, &nrs->nrs_policy_list, pol_list) { - if (strncmp(tmp->pol_desc->pd_name, name, - NRS_POL_NAME_MAX) == 0) { - nrs_policy_get_locked(tmp); - return tmp; - } - } - return NULL; -} - -/** - * Release references for the resource hierarchy moving upwards towards the - * policy instance resource. - */ -static void nrs_resource_put(struct ptlrpc_nrs_resource *res) -{ - struct ptlrpc_nrs_policy *policy = res->res_policy; - - if (policy->pol_desc->pd_ops->op_res_put) { - struct ptlrpc_nrs_resource *parent; - - for (; res; res = parent) { - parent = res->res_parent; - policy->pol_desc->pd_ops->op_res_put(policy, res); - } - } -} - -/** - * Obtains references for each resource in the resource hierarchy for request - * \a nrq if it is to be handled by \a policy. - * - * \param[in] policy the policy - * \param[in] nrq the request - * \param[in] moving_req denotes whether this is a call to the function by - * ldlm_lock_reorder_req(), in order to move \a nrq to - * the high-priority NRS head; we should not sleep when - * set. - * - * \retval NULL resource hierarchy references not obtained - * \retval valid-pointer the bottom level of the resource hierarchy - * - * \see ptlrpc_nrs_pol_ops::op_res_get() - */ -static -struct ptlrpc_nrs_resource *nrs_resource_get(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - bool moving_req) -{ - /** - * Set to NULL to traverse the resource hierarchy from the top. - */ - struct ptlrpc_nrs_resource *res = NULL; - struct ptlrpc_nrs_resource *tmp = NULL; - int rc; - - while (1) { - rc = policy->pol_desc->pd_ops->op_res_get(policy, nrq, res, - &tmp, moving_req); - if (rc < 0) { - if (res) - nrs_resource_put(res); - return NULL; - } - - tmp->res_parent = res; - tmp->res_policy = policy; - res = tmp; - tmp = NULL; - /** - * Return once we have obtained a reference to the bottom level - * of the resource hierarchy. - */ - if (rc > 0) - return res; - } -} - -/** - * Obtains resources for the resource hierarchies and policy references for - * the fallback and current primary policy (if any), that will later be used - * to handle request \a nrq. - * - * \param[in] nrs the NRS head instance that will be handling request \a nrq. - * \param[in] nrq the request that is being handled. - * \param[out] resp the array where references to the resource hierarchy are - * stored. - * \param[in] moving_req is set when obtaining resources while moving a - * request from a policy on the regular NRS head to a - * policy on the HP NRS head (via - * ldlm_lock_reorder_req()). It signifies that - * allocations to get resources should be atomic; for - * a full explanation, see comment in - * ptlrpc_nrs_pol_ops::op_res_get(). - */ -static void nrs_resource_get_safe(struct ptlrpc_nrs *nrs, - struct ptlrpc_nrs_request *nrq, - struct ptlrpc_nrs_resource **resp, - bool moving_req) -{ - struct ptlrpc_nrs_policy *primary = NULL; - struct ptlrpc_nrs_policy *fallback = NULL; - - memset(resp, 0, sizeof(resp[0]) * NRS_RES_MAX); - - /** - * Obtain policy references. - */ - spin_lock(&nrs->nrs_lock); - - fallback = nrs->nrs_policy_fallback; - nrs_policy_get_locked(fallback); - - primary = nrs->nrs_policy_primary; - if (primary) - nrs_policy_get_locked(primary); - - spin_unlock(&nrs->nrs_lock); - - /** - * Obtain resource hierarchy references. - */ - resp[NRS_RES_FALLBACK] = nrs_resource_get(fallback, nrq, moving_req); - LASSERT(resp[NRS_RES_FALLBACK]); - - if (primary) { - resp[NRS_RES_PRIMARY] = nrs_resource_get(primary, nrq, - moving_req); - /** - * A primary policy may exist which may not wish to serve a - * particular request for different reasons; release the - * reference on the policy as it will not be used for this - * request. - */ - if (!resp[NRS_RES_PRIMARY]) - nrs_policy_put(primary); - } -} - -/** - * Releases references to resource hierarchies and policies, because they are no - * longer required; used when request handling has been completed, or the - * request is moving to the high priority NRS head. - * - * \param resp the resource hierarchy that is being released - * - * \see ptlrpc_nrs_req_finalize() - */ -static void nrs_resource_put_safe(struct ptlrpc_nrs_resource **resp) -{ - struct ptlrpc_nrs_policy *pols[NRS_RES_MAX]; - int i; - - for (i = 0; i < NRS_RES_MAX; i++) { - if (resp[i]) { - pols[i] = resp[i]->res_policy; - nrs_resource_put(resp[i]); - resp[i] = NULL; - } else { - pols[i] = NULL; - } - } - - for (i = 0; i < NRS_RES_MAX; i++) { - if (pols[i]) - nrs_policy_put(pols[i]); - } -} - -/** - * Obtains an NRS request from \a policy for handling or examination; the - * request should be removed in the 'handling' case. - * - * Calling into this function implies we already know the policy has a request - * waiting to be handled. - * - * \param[in] policy the policy from which a request - * \param[in] peek when set, signifies that we just want to examine the - * request, and not handle it, so the request is not removed - * from the policy. - * \param[in] force when set, it will force a policy to return a request if it - * has one pending - * - * \retval the NRS request to be handled - */ -static inline -struct ptlrpc_nrs_request *nrs_request_get(struct ptlrpc_nrs_policy *policy, - bool peek, bool force) -{ - struct ptlrpc_nrs_request *nrq; - - LASSERT(policy->pol_req_queued > 0); - - nrq = policy->pol_desc->pd_ops->op_req_get(policy, peek, force); - - LASSERT(ergo(nrq, nrs_request_policy(nrq) == policy)); - - return nrq; -} - -/** - * Enqueues request \a nrq for later handling, via one one the policies for - * which resources where earlier obtained via nrs_resource_get_safe(). The - * function attempts to enqueue the request first on the primary policy - * (if any), since this is the preferred choice. - * - * \param nrq the request being enqueued - * - * \see nrs_resource_get_safe() - */ -static inline void nrs_request_enqueue(struct ptlrpc_nrs_request *nrq) -{ - struct ptlrpc_nrs_policy *policy; - int rc; - int i; - - /** - * Try in descending order, because the primary policy (if any) is - * the preferred choice. - */ - for (i = NRS_RES_MAX - 1; i >= 0; i--) { - if (!nrq->nr_res_ptrs[i]) - continue; - - nrq->nr_res_idx = i; - policy = nrq->nr_res_ptrs[i]->res_policy; - - rc = policy->pol_desc->pd_ops->op_req_enqueue(policy, nrq); - if (rc == 0) { - policy->pol_nrs->nrs_req_queued++; - policy->pol_req_queued++; - return; - } - } - /** - * Should never get here, as at least the primary policy's - * ptlrpc_nrs_pol_ops::op_req_enqueue() implementation should always - * succeed. - */ - LBUG(); -} - -/** - * Called when a request has been handled - * - * \param[in] nrs the request that has been handled; can be used for - * job/resource control. - * - * \see ptlrpc_nrs_req_stop_nolock() - */ -static inline void nrs_request_stop(struct ptlrpc_nrs_request *nrq) -{ - struct ptlrpc_nrs_policy *policy = nrs_request_policy(nrq); - - if (policy->pol_desc->pd_ops->op_req_stop) - policy->pol_desc->pd_ops->op_req_stop(policy, nrq); - - LASSERT(policy->pol_nrs->nrs_req_started > 0); - LASSERT(policy->pol_req_started > 0); - - policy->pol_nrs->nrs_req_started--; - policy->pol_req_started--; -} - -/** - * Handler for operations that can be carried out on policies. - * - * Handles opcodes that are common to all policy types within NRS core, and - * passes any unknown opcodes to the policy-specific control function. - * - * \param[in] nrs the NRS head this policy belongs to. - * \param[in] name the human-readable policy name; should be the same as - * ptlrpc_nrs_pol_desc::pd_name. - * \param[in] opc the opcode of the operation being carried out. - * \param[in,out] arg can be used to pass information in and out between when - * carrying an operation; usually data that is private to - * the policy at some level, or generic policy status - * information. - * - * \retval -ve error condition - * \retval 0 operation was carried out successfully - */ -static int nrs_policy_ctl(struct ptlrpc_nrs *nrs, char *name, - enum ptlrpc_nrs_ctl opc, void *arg) -{ - struct ptlrpc_nrs_policy *policy; - int rc = 0; - - spin_lock(&nrs->nrs_lock); - - policy = nrs_policy_find_locked(nrs, name); - if (!policy) { - rc = -ENOENT; - goto out; - } - - if (policy->pol_state != NRS_POL_STATE_STARTED && - policy->pol_state != NRS_POL_STATE_STOPPED) { - rc = -EAGAIN; - goto out; - } - - switch (opc) { - /** - * Unknown opcode, pass it down to the policy-specific control - * function for handling. - */ - default: - rc = nrs_policy_ctl_locked(policy, opc, arg); - break; - - /** - * Start \e policy - */ - case PTLRPC_NRS_CTL_START: - rc = nrs_policy_start_locked(policy); - break; - } -out: - if (policy) - nrs_policy_put_locked(policy); - - spin_unlock(&nrs->nrs_lock); - - return rc; -} - -/** - * Unregisters a policy by name. - * - * \param[in] nrs the NRS head this policy belongs to. - * \param[in] name the human-readable policy name; should be the same as - * ptlrpc_nrs_pol_desc::pd_name - * - * \retval -ve error - * \retval 0 success - */ -static int nrs_policy_unregister(struct ptlrpc_nrs *nrs, char *name) -{ - struct ptlrpc_nrs_policy *policy = NULL; - - spin_lock(&nrs->nrs_lock); - - policy = nrs_policy_find_locked(nrs, name); - if (!policy) { - spin_unlock(&nrs->nrs_lock); - - CERROR("Can't find NRS policy %s\n", name); - return -ENOENT; - } - - if (policy->pol_ref > 1) { - CERROR("Policy %s is busy with %d references\n", name, - (int)policy->pol_ref); - nrs_policy_put_locked(policy); - - spin_unlock(&nrs->nrs_lock); - return -EBUSY; - } - - LASSERT(policy->pol_req_queued == 0); - LASSERT(policy->pol_req_started == 0); - - if (policy->pol_state != NRS_POL_STATE_STOPPED) { - nrs_policy_stop_locked(policy); - LASSERT(policy->pol_state == NRS_POL_STATE_STOPPED); - } - - list_del(&policy->pol_list); - nrs->nrs_num_pols--; - - nrs_policy_put_locked(policy); - - spin_unlock(&nrs->nrs_lock); - - nrs_policy_fini(policy); - - LASSERT(!policy->pol_private); - kfree(policy); - - return 0; -} - -/** - * Register a policy from \policy descriptor \a desc with NRS head \a nrs. - * - * \param[in] nrs the NRS head on which the policy will be registered. - * \param[in] desc the policy descriptor from which the information will be - * obtained to register the policy. - * - * \retval -ve error - * \retval 0 success - */ -static int nrs_policy_register(struct ptlrpc_nrs *nrs, - struct ptlrpc_nrs_pol_desc *desc) -{ - struct ptlrpc_nrs_policy *policy; - struct ptlrpc_nrs_policy *tmp; - struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt; - int rc; - - LASSERT(desc->pd_ops->op_res_get); - LASSERT(desc->pd_ops->op_req_get); - LASSERT(desc->pd_ops->op_req_enqueue); - LASSERT(desc->pd_ops->op_req_dequeue); - LASSERT(desc->pd_compat); - - policy = kzalloc_node(sizeof(*policy), GFP_NOFS, - cfs_cpt_spread_node(svcpt->scp_service->srv_cptable, - svcpt->scp_cpt)); - if (!policy) - return -ENOMEM; - - policy->pol_nrs = nrs; - policy->pol_desc = desc; - policy->pol_state = NRS_POL_STATE_STOPPED; - policy->pol_flags = desc->pd_flags; - - INIT_LIST_HEAD(&policy->pol_list); - INIT_LIST_HEAD(&policy->pol_list_queued); - - rc = nrs_policy_init(policy); - if (rc != 0) { - kfree(policy); - return rc; - } - - spin_lock(&nrs->nrs_lock); - - tmp = nrs_policy_find_locked(nrs, policy->pol_desc->pd_name); - if (tmp) { - CERROR("NRS policy %s has been registered, can't register it for %s\n", - policy->pol_desc->pd_name, - svcpt->scp_service->srv_name); - nrs_policy_put_locked(tmp); - - spin_unlock(&nrs->nrs_lock); - nrs_policy_fini(policy); - kfree(policy); - - return -EEXIST; - } - - list_add_tail(&policy->pol_list, &nrs->nrs_policy_list); - nrs->nrs_num_pols++; - - if (policy->pol_flags & PTLRPC_NRS_FL_REG_START) - rc = nrs_policy_start_locked(policy); - - spin_unlock(&nrs->nrs_lock); - - if (rc != 0) - (void)nrs_policy_unregister(nrs, policy->pol_desc->pd_name); - - return rc; -} - -/** - * Enqueue request \a req using one of the policies its resources are referring - * to. - * - * \param[in] req the request to enqueue. - */ -static void ptlrpc_nrs_req_add_nolock(struct ptlrpc_request *req) -{ - struct ptlrpc_nrs_policy *policy; - - LASSERT(req->rq_nrq.nr_initialized); - LASSERT(!req->rq_nrq.nr_enqueued); - - nrs_request_enqueue(&req->rq_nrq); - req->rq_nrq.nr_enqueued = 1; - - policy = nrs_request_policy(&req->rq_nrq); - /** - * Add the policy to the NRS head's list of policies with enqueued - * requests, if it has not been added there. - */ - if (unlikely(list_empty(&policy->pol_list_queued))) - list_add_tail(&policy->pol_list_queued, - &policy->pol_nrs->nrs_policy_queued); -} - -/** - * Enqueue a request on the high priority NRS head. - * - * \param req the request to enqueue. - */ -static void ptlrpc_nrs_hpreq_add_nolock(struct ptlrpc_request *req) -{ - int opc = lustre_msg_get_opc(req->rq_reqmsg); - - spin_lock(&req->rq_lock); - req->rq_hp = 1; - ptlrpc_nrs_req_add_nolock(req); - if (opc != OBD_PING) - DEBUG_REQ(D_NET, req, "high priority req"); - spin_unlock(&req->rq_lock); -} - -/** - * Returns a boolean predicate indicating whether the policy described by - * \a desc is adequate for use with service \a svc. - * - * \param[in] svc the service - * \param[in] desc the policy descriptor - * - * \retval false the policy is not compatible with the service - * \retval true the policy is compatible with the service - */ -static inline bool nrs_policy_compatible(const struct ptlrpc_service *svc, - const struct ptlrpc_nrs_pol_desc *desc) -{ - return desc->pd_compat(svc, desc); -} - -/** - * Registers all compatible policies in nrs_core.nrs_policies, for NRS head - * \a nrs. - * - * \param[in] nrs the NRS head - * - * \retval -ve error - * \retval 0 success - * - * \pre mutex_is_locked(&nrs_core.nrs_mutex) - * - * \see ptlrpc_service_nrs_setup() - */ -static int nrs_register_policies_locked(struct ptlrpc_nrs *nrs) -{ - struct ptlrpc_nrs_pol_desc *desc; - /* for convenience */ - struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt; - struct ptlrpc_service *svc = svcpt->scp_service; - int rc = -EINVAL; - - LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); - - list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { - if (nrs_policy_compatible(svc, desc)) { - rc = nrs_policy_register(nrs, desc); - if (rc != 0) { - CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n", - desc->pd_name, svcpt->scp_cpt, - svc->srv_name, rc); - /** - * Fail registration if any of the policies' - * registration fails. - */ - break; - } - } - } - - return rc; -} - -/** - * Initializes NRS head \a nrs of service partition \a svcpt, and registers all - * compatible policies in NRS core, with the NRS head. - * - * \param[in] nrs the NRS head - * \param[in] svcpt the PTLRPC service partition to setup - * - * \retval -ve error - * \retval 0 success - * - * \pre mutex_is_locked(&nrs_core.nrs_mutex) - */ -static int nrs_svcpt_setup_locked0(struct ptlrpc_nrs *nrs, - struct ptlrpc_service_part *svcpt) -{ - enum ptlrpc_nrs_queue_type queue; - - LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); - - if (nrs == &svcpt->scp_nrs_reg) - queue = PTLRPC_NRS_QUEUE_REG; - else if (nrs == svcpt->scp_nrs_hp) - queue = PTLRPC_NRS_QUEUE_HP; - else - LBUG(); - - nrs->nrs_svcpt = svcpt; - nrs->nrs_queue_type = queue; - spin_lock_init(&nrs->nrs_lock); - INIT_LIST_HEAD(&nrs->nrs_policy_list); - INIT_LIST_HEAD(&nrs->nrs_policy_queued); - - return nrs_register_policies_locked(nrs); -} - -/** - * Allocates a regular and optionally a high-priority NRS head (if the service - * handles high-priority RPCs), and then registers all available compatible - * policies on those NRS heads. - * - * \param[in,out] svcpt the PTLRPC service partition to setup - * - * \pre mutex_is_locked(&nrs_core.nrs_mutex) - */ -static int nrs_svcpt_setup_locked(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_nrs *nrs; - int rc; - - LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); - - /** - * Initialize the regular NRS head. - */ - nrs = nrs_svcpt2nrs(svcpt, false); - rc = nrs_svcpt_setup_locked0(nrs, svcpt); - if (rc < 0) - goto out; - - /** - * Optionally allocate a high-priority NRS head. - */ - if (!svcpt->scp_service->srv_ops.so_hpreq_handler) - goto out; - - svcpt->scp_nrs_hp = - kzalloc_node(sizeof(*svcpt->scp_nrs_hp), GFP_NOFS, - cfs_cpt_spread_node(svcpt->scp_service->srv_cptable, - svcpt->scp_cpt)); - if (!svcpt->scp_nrs_hp) { - rc = -ENOMEM; - goto out; - } - - nrs = nrs_svcpt2nrs(svcpt, true); - rc = nrs_svcpt_setup_locked0(nrs, svcpt); - -out: - return rc; -} - -/** - * Unregisters all policies on all available NRS heads in a service partition; - * called at PTLRPC service unregistration time. - * - * \param[in] svcpt the PTLRPC service partition - * - * \pre mutex_is_locked(&nrs_core.nrs_mutex) - */ -static void nrs_svcpt_cleanup_locked(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_nrs *nrs; - struct ptlrpc_nrs_policy *policy; - struct ptlrpc_nrs_policy *tmp; - int rc; - bool hp = false; - - LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); - -again: - /* scp_nrs_hp could be NULL due to short of memory. */ - nrs = hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg; - /* check the nrs_svcpt to see if nrs is initialized. */ - if (!nrs || !nrs->nrs_svcpt) - return; - nrs->nrs_stopping = 1; - - list_for_each_entry_safe(policy, tmp, &nrs->nrs_policy_list, pol_list) { - rc = nrs_policy_unregister(nrs, policy->pol_desc->pd_name); - LASSERT(rc == 0); - } - - /** - * If the service partition has an HP NRS head, clean that up as well. - */ - if (!hp && nrs_svcpt_has_hp(svcpt)) { - hp = true; - goto again; - } - - if (hp) - kfree(nrs); -} - -/** - * Returns the descriptor for a policy as identified by by \a name. - * - * \param[in] name the policy name - * - * \retval the policy descriptor - * \retval NULL - */ -static struct ptlrpc_nrs_pol_desc *nrs_policy_find_desc_locked(const char *name) -{ - struct ptlrpc_nrs_pol_desc *tmp; - - list_for_each_entry(tmp, &nrs_core.nrs_policies, pd_list) { - if (strncmp(tmp->pd_name, name, NRS_POL_NAME_MAX) == 0) - return tmp; - } - return NULL; -} - -/** - * Removes the policy from all supported NRS heads of all partitions of all - * PTLRPC services. - * - * \param[in] desc the policy descriptor to unregister - * - * \retval -ve error - * \retval 0 successfully unregistered policy on all supported NRS heads - * - * \pre mutex_is_locked(&nrs_core.nrs_mutex) - * \pre mutex_is_locked(&ptlrpc_all_services_mutex) - */ -static int nrs_policy_unregister_locked(struct ptlrpc_nrs_pol_desc *desc) -{ - struct ptlrpc_nrs *nrs; - struct ptlrpc_service *svc; - struct ptlrpc_service_part *svcpt; - int i; - int rc = 0; - - LASSERT(mutex_is_locked(&nrs_core.nrs_mutex)); - LASSERT(mutex_is_locked(&ptlrpc_all_services_mutex)); - - list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { - if (!nrs_policy_compatible(svc, desc) || - unlikely(svc->srv_is_stopping)) - continue; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - bool hp = false; - -again: - nrs = nrs_svcpt2nrs(svcpt, hp); - rc = nrs_policy_unregister(nrs, desc->pd_name); - /** - * Ignore -ENOENT as the policy may not have registered - * successfully on all service partitions. - */ - if (rc == -ENOENT) { - rc = 0; - } else if (rc != 0) { - CERROR("Failed to unregister NRS policy %s for partition %d of service %s: %d\n", - desc->pd_name, svcpt->scp_cpt, - svcpt->scp_service->srv_name, rc); - return rc; - } - - if (!hp && nrs_svc_has_hp(svc)) { - hp = true; - goto again; - } - } - - if (desc->pd_ops->op_lprocfs_fini) - desc->pd_ops->op_lprocfs_fini(svc); - } - - return rc; -} - -/** - * Registers a new policy with NRS core. - * - * The function will only succeed if policy registration with all compatible - * service partitions (if any) is successful. - * - * N.B. This function should be called either at ptlrpc module initialization - * time when registering a policy that ships with NRS core, or in a - * module's init() function for policies registering from other modules. - * - * \param[in] conf configuration information for the new policy to register - * - * \retval -ve error - * \retval 0 success - */ -static int ptlrpc_nrs_policy_register(struct ptlrpc_nrs_pol_conf *conf) -{ - struct ptlrpc_service *svc; - struct ptlrpc_nrs_pol_desc *desc; - size_t len; - int rc = 0; - - LASSERT(conf->nc_ops); - LASSERT(conf->nc_compat); - LASSERT(ergo(conf->nc_compat == nrs_policy_compat_one, - conf->nc_compat_svc_name)); - LASSERT(ergo((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0, - conf->nc_owner)); - - conf->nc_name[NRS_POL_NAME_MAX - 1] = '\0'; - - /** - * External policies are not allowed to start immediately upon - * registration, as there is a relatively higher chance that their - * registration might fail. In such a case, some policy instances may - * already have requests queued wen unregistration needs to happen as - * part o cleanup; since there is currently no way to drain requests - * from a policy unless the service is unregistering, we just disallow - * this. - */ - if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) && - (conf->nc_flags & (PTLRPC_NRS_FL_FALLBACK | - PTLRPC_NRS_FL_REG_START))) { - CERROR("NRS: failing to register policy %s. Please check policy flags; external policies cannot act as fallback policies, or be started immediately upon registration without interaction with lprocfs\n", - conf->nc_name); - return -EINVAL; - } - - mutex_lock(&nrs_core.nrs_mutex); - - if (nrs_policy_find_desc_locked(conf->nc_name)) { - CERROR("NRS: failing to register policy %s which has already been registered with NRS core!\n", - conf->nc_name); - rc = -EEXIST; - goto fail; - } - - desc = kzalloc(sizeof(*desc), GFP_NOFS); - if (!desc) { - rc = -ENOMEM; - goto fail; - } - - len = strlcpy(desc->pd_name, conf->nc_name, sizeof(desc->pd_name)); - if (len >= sizeof(desc->pd_name)) { - kfree(desc); - rc = -E2BIG; - goto fail; - } - desc->pd_ops = conf->nc_ops; - desc->pd_compat = conf->nc_compat; - desc->pd_compat_svc_name = conf->nc_compat_svc_name; - if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) != 0) - desc->pd_owner = conf->nc_owner; - desc->pd_flags = conf->nc_flags; - atomic_set(&desc->pd_refs, 0); - - /** - * For policies that are held in the same module as NRS (currently - * ptlrpc), do not register the policy with all compatible services, - * as the services will not have started at this point, since we are - * calling from ptlrpc module initialization code. In such cases each - * service will register all compatible policies later, via - * ptlrpc_service_nrs_setup(). - */ - if ((conf->nc_flags & PTLRPC_NRS_FL_REG_EXTERN) == 0) - goto internal; - - /** - * Register the new policy on all compatible services - */ - mutex_lock(&ptlrpc_all_services_mutex); - - list_for_each_entry(svc, &ptlrpc_all_services, srv_list) { - struct ptlrpc_service_part *svcpt; - int i; - int rc2; - - if (!nrs_policy_compatible(svc, desc) || - unlikely(svc->srv_is_stopping)) - continue; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - struct ptlrpc_nrs *nrs; - bool hp = false; -again: - nrs = nrs_svcpt2nrs(svcpt, hp); - rc = nrs_policy_register(nrs, desc); - if (rc != 0) { - CERROR("Failed to register NRS policy %s for partition %d of service %s: %d\n", - desc->pd_name, svcpt->scp_cpt, - svcpt->scp_service->srv_name, rc); - - rc2 = nrs_policy_unregister_locked(desc); - /** - * Should not fail at this point - */ - LASSERT(rc2 == 0); - mutex_unlock(&ptlrpc_all_services_mutex); - kfree(desc); - goto fail; - } - - if (!hp && nrs_svc_has_hp(svc)) { - hp = true; - goto again; - } - } - - /** - * No need to take a reference to other modules here, as we - * will be calling from the module's init() function. - */ - if (desc->pd_ops->op_lprocfs_init) { - rc = desc->pd_ops->op_lprocfs_init(svc); - if (rc != 0) { - rc2 = nrs_policy_unregister_locked(desc); - /** - * Should not fail at this point - */ - LASSERT(rc2 == 0); - mutex_unlock(&ptlrpc_all_services_mutex); - kfree(desc); - goto fail; - } - } - } - - mutex_unlock(&ptlrpc_all_services_mutex); -internal: - list_add_tail(&desc->pd_list, &nrs_core.nrs_policies); -fail: - mutex_unlock(&nrs_core.nrs_mutex); - - return rc; -} - -/** - * Setup NRS heads on all service partitions of service \a svc, and register - * all compatible policies on those NRS heads. - * - * To be called from within ptl - * \param[in] svc the service to setup - * - * \retval -ve error, the calling logic should eventually call - * ptlrpc_service_nrs_cleanup() to undo any work performed - * by this function. - * - * \see ptlrpc_register_service() - * \see ptlrpc_service_nrs_cleanup() - */ -int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - const struct ptlrpc_nrs_pol_desc *desc; - int i; - int rc = 0; - - mutex_lock(&nrs_core.nrs_mutex); - - /** - * Initialize NRS heads on all service CPTs. - */ - ptlrpc_service_for_each_part(svcpt, i, svc) { - rc = nrs_svcpt_setup_locked(svcpt); - if (rc != 0) - goto failed; - } - - /** - * Set up lprocfs interfaces for all supported policies for the - * service. - */ - list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { - if (!nrs_policy_compatible(svc, desc)) - continue; - - if (desc->pd_ops->op_lprocfs_init) { - rc = desc->pd_ops->op_lprocfs_init(svc); - if (rc != 0) - goto failed; - } - } - -failed: - - mutex_unlock(&nrs_core.nrs_mutex); - - return rc; -} - -/** - * Unregisters all policies on all service partitions of service \a svc. - * - * \param[in] svc the PTLRPC service to unregister - */ -void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - const struct ptlrpc_nrs_pol_desc *desc; - int i; - - mutex_lock(&nrs_core.nrs_mutex); - - /** - * Clean up NRS heads on all service partitions - */ - ptlrpc_service_for_each_part(svcpt, i, svc) - nrs_svcpt_cleanup_locked(svcpt); - - /** - * Clean up lprocfs interfaces for all supported policies for the - * service. - */ - list_for_each_entry(desc, &nrs_core.nrs_policies, pd_list) { - if (!nrs_policy_compatible(svc, desc)) - continue; - - if (desc->pd_ops->op_lprocfs_fini) - desc->pd_ops->op_lprocfs_fini(svc); - } - - mutex_unlock(&nrs_core.nrs_mutex); -} - -/** - * Obtains NRS head resources for request \a req. - * - * These could be either on the regular or HP NRS head of \a svcpt; resources - * taken on the regular head can later be swapped for HP head resources by - * ldlm_lock_reorder_req(). - * - * \param[in] svcpt the service partition - * \param[in] req the request - * \param[in] hp which NRS head of \a svcpt to use - */ -void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req, bool hp) -{ - struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); - - memset(&req->rq_nrq, 0, sizeof(req->rq_nrq)); - nrs_resource_get_safe(nrs, &req->rq_nrq, req->rq_nrq.nr_res_ptrs, - false); - - /** - * It is fine to access \e nr_initialized without locking as there is - * no contention at this early stage. - */ - req->rq_nrq.nr_initialized = 1; -} - -/** - * Releases resources for a request; is called after the request has been - * handled. - * - * \param[in] req the request - * - * \see ptlrpc_server_finish_request() - */ -void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req) -{ - if (req->rq_nrq.nr_initialized) { - nrs_resource_put_safe(req->rq_nrq.nr_res_ptrs); - /* no protection on bit nr_initialized because no - * contention at this late stage - */ - req->rq_nrq.nr_finalized = 1; - } -} - -void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req) -{ - if (req->rq_nrq.nr_started) - nrs_request_stop(&req->rq_nrq); -} - -/** - * Enqueues request \a req on either the regular or high-priority NRS head - * of service partition \a svcpt. - * - * \param[in] svcpt the service partition - * \param[in] req the request to be enqueued - * \param[in] hp whether to enqueue the request on the regular or - * high-priority NRS head. - */ -void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req, bool hp) -{ - spin_lock(&svcpt->scp_req_lock); - - if (hp) - ptlrpc_nrs_hpreq_add_nolock(req); - else - ptlrpc_nrs_req_add_nolock(req); - - spin_unlock(&svcpt->scp_req_lock); -} - -static void nrs_request_removed(struct ptlrpc_nrs_policy *policy) -{ - LASSERT(policy->pol_nrs->nrs_req_queued > 0); - LASSERT(policy->pol_req_queued > 0); - - policy->pol_nrs->nrs_req_queued--; - policy->pol_req_queued--; - - /** - * If the policy has no more requests queued, remove it from - * ptlrpc_nrs::nrs_policy_queued. - */ - if (unlikely(policy->pol_req_queued == 0)) { - list_del_init(&policy->pol_list_queued); - - /** - * If there are other policies with queued requests, move the - * current policy to the end so that we can round robin over - * all policies and drain the requests. - */ - } else if (policy->pol_req_queued != policy->pol_nrs->nrs_req_queued) { - LASSERT(policy->pol_req_queued < - policy->pol_nrs->nrs_req_queued); - - list_move_tail(&policy->pol_list_queued, - &policy->pol_nrs->nrs_policy_queued); - } -} - -/** - * Obtains a request for handling from an NRS head of service partition - * \a svcpt. - * - * \param[in] svcpt the service partition - * \param[in] hp whether to obtain a request from the regular or - * high-priority NRS head. - * \param[in] peek when set, signifies that we just want to examine the - * request, and not handle it, so the request is not removed - * from the policy. - * \param[in] force when set, it will force a policy to return a request if it - * has one pending - * - * \retval the request to be handled - * \retval NULL the head has no requests to serve - */ -struct ptlrpc_request * -ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp, - bool peek, bool force) -{ - struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); - struct ptlrpc_nrs_policy *policy; - struct ptlrpc_nrs_request *nrq; - - /** - * Always try to drain requests from all NRS polices even if they are - * inactive, because the user can change policy status at runtime. - */ - list_for_each_entry(policy, &nrs->nrs_policy_queued, pol_list_queued) { - nrq = nrs_request_get(policy, peek, force); - if (nrq) { - if (likely(!peek)) { - nrq->nr_started = 1; - - policy->pol_req_started++; - policy->pol_nrs->nrs_req_started++; - - nrs_request_removed(policy); - } - - return container_of(nrq, struct ptlrpc_request, rq_nrq); - } - } - - return NULL; -} - -/** - * Returns whether there are any requests currently enqueued on any of the - * policies of service partition's \a svcpt NRS head specified by \a hp. Should - * be called while holding ptlrpc_service_part::scp_req_lock to get a reliable - * result. - * - * \param[in] svcpt the service partition to enquire. - * \param[in] hp whether the regular or high-priority NRS head is to be - * enquired. - * - * \retval false the indicated NRS head has no enqueued requests. - * \retval true the indicated NRS head has some enqueued requests. - */ -bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp) -{ - struct ptlrpc_nrs *nrs = nrs_svcpt2nrs(svcpt, hp); - - return nrs->nrs_req_queued > 0; -}; - -/** - * Carries out a control operation \a opc on the policy identified by the - * human-readable \a name, on either all partitions, or only on the first - * partition of service \a svc. - * - * \param[in] svc the service the policy belongs to. - * \param[in] queue whether to carry out the command on the policy which - * belongs to the regular, high-priority, or both NRS - * heads of service partitions of \a svc. - * \param[in] name the policy to act upon, by human-readable name - * \param[in] opc the opcode of the operation to carry out - * \param[in] single when set, the operation will only be carried out on the - * NRS heads of the first service partition of \a svc. - * This is useful for some policies which e.g. share - * identical values on the same parameters of different - * service partitions; when reading these parameters via - * lprocfs, these policies may just want to obtain and - * print out the values from the first service partition. - * Storing these values centrally elsewhere then could be - * another solution for this. - * \param[in,out] arg can be used as a generic in/out buffer between control - * operations and the user environment. - * - *\retval -ve error condition - *\retval 0 operation was carried out successfully - */ -int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc, - enum ptlrpc_nrs_queue_type queue, char *name, - enum ptlrpc_nrs_ctl opc, bool single, void *arg) -{ - struct ptlrpc_service_part *svcpt; - int i; - int rc = 0; - - LASSERT(opc != PTLRPC_NRS_CTL_INVALID); - - if ((queue & PTLRPC_NRS_QUEUE_BOTH) == 0) - return -EINVAL; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if ((queue & PTLRPC_NRS_QUEUE_REG) != 0) { - rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, false), name, - opc, arg); - if (rc != 0 || (queue == PTLRPC_NRS_QUEUE_REG && - single)) - goto out; - } - - if ((queue & PTLRPC_NRS_QUEUE_HP) != 0) { - /** - * XXX: We could optionally check for - * nrs_svc_has_hp(svc) here, and return an error if it - * is false. Right now we rely on the policies' lprocfs - * handlers that call the present function to make this - * check; if they fail to do so, they might hit the - * assertion inside nrs_svcpt2nrs() below. - */ - rc = nrs_policy_ctl(nrs_svcpt2nrs(svcpt, true), name, - opc, arg); - if (rc != 0 || single) - goto out; - } - } -out: - return rc; -} - -/** - * Adds all policies that ship with the ptlrpc module, to NRS core's list of - * policies \e nrs_core.nrs_policies. - * - * \retval 0 all policies have been registered successfully - * \retval -ve error - */ -int ptlrpc_nrs_init(void) -{ - int rc; - - mutex_init(&nrs_core.nrs_mutex); - INIT_LIST_HEAD(&nrs_core.nrs_policies); - - rc = ptlrpc_nrs_policy_register(&nrs_conf_fifo); - if (rc != 0) - goto fail; - - return rc; -fail: - /** - * Since no PTLRPC services have been started at this point, all we need - * to do for cleanup is to free the descriptors. - */ - ptlrpc_nrs_fini(); - - return rc; -} - -/** - * Removes all policy descriptors from nrs_core::nrs_policies, and frees the - * policy descriptors. - * - * Since all PTLRPC services are stopped at this point, there are no more - * instances of any policies, because each service will have stopped its policy - * instances in ptlrpc_service_nrs_cleanup(), so we just need to free the - * descriptors here. - */ -void ptlrpc_nrs_fini(void) -{ - struct ptlrpc_nrs_pol_desc *desc; - struct ptlrpc_nrs_pol_desc *tmp; - - list_for_each_entry_safe(desc, tmp, &nrs_core.nrs_policies, pd_list) { - list_del_init(&desc->pd_list); - kfree(desc); - } -} - -/** @} nrs */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c b/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c deleted file mode 100644 index ff630d94dd26..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/nrs_fifo.c +++ /dev/null @@ -1,270 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License version 2 for more details. A copy is - * included in the COPYING file that accompanied this code. - - * GPL HEADER END - */ -/* - * Copyright (c) 2011 Intel Corporation - * - * Copyright 2012 Xyratex Technology Limited - */ -/* - * lustre/ptlrpc/nrs_fifo.c - * - * Network Request Scheduler (NRS) FIFO policy - * - * Handles RPCs in a FIFO manner, as received from the network. This policy is - * a logical wrapper around previous, non-NRS functionality. It is used as the - * default and fallback policy for all types of RPCs on all PTLRPC service - * partitions, for both regular and high-priority NRS heads. Default here means - * the policy is the one enabled at PTLRPC service partition startup time, and - * fallback means the policy is used to handle RPCs that are not handled - * successfully or are not handled at all by any primary policy that may be - * enabled on a given NRS head. - * - * Author: Liang Zhen - * Author: Nikitas Angelinas - */ -/** - * \addtogoup nrs - * @{ - */ - -#define DEBUG_SUBSYSTEM S_RPC -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -/** - * \name fifo - * - * The FIFO policy is a logical wrapper around previous, non-NRS functionality. - * It schedules RPCs in the same order as they are queued from LNet. - * - * @{ - */ - -#define NRS_POL_NAME_FIFO "fifo" - -/** - * Is called before the policy transitions into - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a - * policy-specific private data structure. - * - * \param[in] policy The policy to start - * - * \retval -ENOMEM OOM error - * \retval 0 success - * - * \see nrs_policy_register() - * \see nrs_policy_ctl() - */ -static int nrs_fifo_start(struct ptlrpc_nrs_policy *policy) -{ - struct nrs_fifo_head *head; - - head = kzalloc_node(sizeof(*head), GFP_NOFS, - cfs_cpt_spread_node(nrs_pol2cptab(policy), - nrs_pol2cptid(policy))); - if (!head) - return -ENOMEM; - - INIT_LIST_HEAD(&head->fh_list); - policy->pol_private = head; - return 0; -} - -/** - * Is called before the policy transitions into - * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific - * private data structure. - * - * \param[in] policy The policy to stop - * - * \see nrs_policy_stop0() - */ -static void nrs_fifo_stop(struct ptlrpc_nrs_policy *policy) -{ - struct nrs_fifo_head *head = policy->pol_private; - - LASSERT(head); - LASSERT(list_empty(&head->fh_list)); - - kfree(head); -} - -/** - * Is called for obtaining a FIFO policy resource. - * - * \param[in] policy The policy on which the request is being asked for - * \param[in] nrq The request for which resources are being taken - * \param[in] parent Parent resource, unused in this policy - * \param[out] resp Resources references are placed in this array - * \param[in] moving_req Signifies limited caller context; unused in this - * policy - * - * \retval 1 The FIFO policy only has a one-level resource hierarchy, as since - * it implements a simple scheduling algorithm in which request - * priority is determined on the request arrival order, it does not - * need to maintain a set of resources that would otherwise be used - * to calculate a request's priority. - * - * \see nrs_resource_get_safe() - */ -static int nrs_fifo_res_get(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq, - const struct ptlrpc_nrs_resource *parent, - struct ptlrpc_nrs_resource **resp, bool moving_req) -{ - /** - * Just return the resource embedded inside nrs_fifo_head, and end this - * resource hierarchy reference request. - */ - *resp = &((struct nrs_fifo_head *)policy->pol_private)->fh_res; - return 1; -} - -/** - * Called when getting a request from the FIFO policy for handling, or just - * peeking; removes the request from the policy when it is to be handled. - * - * \param[in] policy The policy - * \param[in] peek When set, signifies that we just want to examine the - * request, and not handle it, so the request is not removed - * from the policy. - * \param[in] force Force the policy to return a request; unused in this - * policy - * - * \retval The request to be handled; this is the next request in the FIFO - * queue - * - * \see ptlrpc_nrs_req_get_nolock() - * \see nrs_request_get() - */ -static -struct ptlrpc_nrs_request *nrs_fifo_req_get(struct ptlrpc_nrs_policy *policy, - bool peek, bool force) -{ - struct nrs_fifo_head *head = policy->pol_private; - struct ptlrpc_nrs_request *nrq; - - nrq = unlikely(list_empty(&head->fh_list)) ? NULL : - list_entry(head->fh_list.next, struct ptlrpc_nrs_request, - nr_u.fifo.fr_list); - - if (likely(!peek && nrq)) { - struct ptlrpc_request *req = container_of(nrq, - struct ptlrpc_request, - rq_nrq); - - list_del_init(&nrq->nr_u.fifo.fr_list); - - CDEBUG(D_RPCTRACE, "NRS start %s request from %s, seq: %llu\n", - policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer), - nrq->nr_u.fifo.fr_sequence); - } - - return nrq; -} - -/** - * Adds request \a nrq to \a policy's list of queued requests - * - * \param[in] policy The policy - * \param[in] nrq The request to add - * - * \retval 0 success; nrs_request_enqueue() assumes this function will always - * succeed - */ -static int nrs_fifo_req_add(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq) -{ - struct nrs_fifo_head *head; - - head = container_of(nrs_request_resource(nrq), struct nrs_fifo_head, - fh_res); - /** - * Only used for debugging - */ - nrq->nr_u.fifo.fr_sequence = head->fh_sequence++; - list_add_tail(&nrq->nr_u.fifo.fr_list, &head->fh_list); - - return 0; -} - -/** - * Removes request \a nrq from \a policy's list of queued requests. - * - * \param[in] policy The policy - * \param[in] nrq The request to remove - */ -static void nrs_fifo_req_del(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq) -{ - LASSERT(!list_empty(&nrq->nr_u.fifo.fr_list)); - list_del_init(&nrq->nr_u.fifo.fr_list); -} - -/** - * Prints a debug statement right before the request \a nrq stops being - * handled. - * - * \param[in] policy The policy handling the request - * \param[in] nrq The request being handled - * - * \see ptlrpc_server_finish_request() - * \see ptlrpc_nrs_req_stop_nolock() - */ -static void nrs_fifo_req_stop(struct ptlrpc_nrs_policy *policy, - struct ptlrpc_nrs_request *nrq) -{ - struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request, - rq_nrq); - - CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n", - policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer), - nrq->nr_u.fifo.fr_sequence); -} - -/** - * FIFO policy operations - */ -static const struct ptlrpc_nrs_pol_ops nrs_fifo_ops = { - .op_policy_start = nrs_fifo_start, - .op_policy_stop = nrs_fifo_stop, - .op_res_get = nrs_fifo_res_get, - .op_req_get = nrs_fifo_req_get, - .op_req_enqueue = nrs_fifo_req_add, - .op_req_dequeue = nrs_fifo_req_del, - .op_req_stop = nrs_fifo_req_stop, -}; - -/** - * FIFO policy configuration - */ -struct ptlrpc_nrs_pol_conf nrs_conf_fifo = { - .nc_name = NRS_POL_NAME_FIFO, - .nc_ops = &nrs_fifo_ops, - .nc_compat = nrs_policy_compat_all, - .nc_flags = PTLRPC_NRS_FL_FALLBACK | - PTLRPC_NRS_FL_REG_START -}; - -/** @} fifo */ - -/** @} nrs */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c deleted file mode 100644 index 6ac9bb570663..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c +++ /dev/null @@ -1,2311 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/pack_generic.c - * - * (Un)packing of OST requests - * - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include - -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -static inline u32 lustre_msg_hdr_size_v2(u32 count) -{ - return cfs_size_round(offsetof(struct lustre_msg_v2, - lm_buflens[count])); -} - -u32 lustre_msg_hdr_size(__u32 magic, u32 count) -{ - switch (magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_msg_hdr_size_v2(count); - default: - LASSERTF(0, "incorrect message magic: %08x\n", magic); - return 0; - } -} - -void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout, - u32 index) -{ - if (inout) - lustre_set_req_swabbed(req, index); - else - lustre_set_rep_swabbed(req, index); -} - -int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout, - u32 index) -{ - if (inout) - return (ptlrpc_req_need_swab(req) && - !lustre_req_swabbed(req, index)); - else - return (ptlrpc_rep_need_swab(req) && - !lustre_rep_swabbed(req, index)); -} - -/* early reply size */ -u32 lustre_msg_early_size(void) -{ - static u32 size; - - if (!size) { - /* Always reply old ptlrpc_body_v2 to keep interoperability - * with the old client (< 2.3) which doesn't have pb_jobid - * in the ptlrpc_body. - * - * XXX Remove this whenever we drop interoperability with such - * client. - */ - __u32 pblen = sizeof(struct ptlrpc_body_v2); - - size = lustre_msg_size(LUSTRE_MSG_MAGIC_V2, 1, &pblen); - } - return size; -} -EXPORT_SYMBOL(lustre_msg_early_size); - -u32 lustre_msg_size_v2(int count, __u32 *lengths) -{ - u32 size; - int i; - - size = lustre_msg_hdr_size_v2(count); - for (i = 0; i < count; i++) - size += cfs_size_round(lengths[i]); - - return size; -} -EXPORT_SYMBOL(lustre_msg_size_v2); - -/* This returns the size of the buffer that is required to hold a lustre_msg - * with the given sub-buffer lengths. - * NOTE: this should only be used for NEW requests, and should always be - * in the form of a v2 request. If this is a connection to a v1 - * target then the first buffer will be stripped because the ptlrpc - * data is part of the lustre_msg_v1 header. b=14043 - */ -u32 lustre_msg_size(__u32 magic, int count, __u32 *lens) -{ - __u32 size[] = { sizeof(struct ptlrpc_body) }; - - if (!lens) { - LASSERT(count == 1); - lens = size; - } - - LASSERT(count > 0); - LASSERT(lens[MSG_PTLRPC_BODY_OFF] >= sizeof(struct ptlrpc_body_v2)); - - switch (magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_msg_size_v2(count, lens); - default: - LASSERTF(0, "incorrect message magic: %08x\n", magic); - return 0; - } -} - -/* This is used to determine the size of a buffer that was already packed - * and will correctly handle the different message formats. - */ -u32 lustre_packed_msg_size(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} - -void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens, - char **bufs) -{ - char *ptr; - int i; - - msg->lm_bufcount = count; - /* XXX: lm_secflvr uninitialized here */ - msg->lm_magic = LUSTRE_MSG_MAGIC_V2; - - for (i = 0; i < count; i++) - msg->lm_buflens[i] = lens[i]; - - if (!bufs) - return; - - ptr = (char *)msg + lustre_msg_hdr_size_v2(count); - for (i = 0; i < count; i++) { - char *tmp = bufs[i]; - - if (tmp) - memcpy(ptr, tmp, lens[i]); - ptr += cfs_size_round(lens[i]); - } -} -EXPORT_SYMBOL(lustre_init_msg_v2); - -static int lustre_pack_request_v2(struct ptlrpc_request *req, - int count, __u32 *lens, char **bufs) -{ - int reqlen, rc; - - reqlen = lustre_msg_size_v2(count, lens); - - rc = sptlrpc_cli_alloc_reqbuf(req, reqlen); - if (rc) - return rc; - - req->rq_reqlen = reqlen; - - lustre_init_msg_v2(req->rq_reqmsg, count, lens, bufs); - lustre_msg_add_version(req->rq_reqmsg, PTLRPC_MSG_VERSION); - return 0; -} - -int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count, - __u32 *lens, char **bufs) -{ - __u32 size[] = { sizeof(struct ptlrpc_body) }; - - if (!lens) { - LASSERT(count == 1); - lens = size; - } - - LASSERT(count > 0); - LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); - - /* only use new format, we don't need to be compatible with 1.4 */ - return lustre_pack_request_v2(req, count, lens, bufs); -} - -#if RS_DEBUG -LIST_HEAD(ptlrpc_rs_debug_lru); -spinlock_t ptlrpc_rs_debug_lock; - -#define PTLRPC_RS_DEBUG_LRU_ADD(rs) \ -do { \ - spin_lock(&ptlrpc_rs_debug_lock); \ - list_add_tail(&(rs)->rs_debug_list, &ptlrpc_rs_debug_lru); \ - spin_unlock(&ptlrpc_rs_debug_lock); \ -} while (0) - -#define PTLRPC_RS_DEBUG_LRU_DEL(rs) \ -do { \ - spin_lock(&ptlrpc_rs_debug_lock); \ - list_del(&(rs)->rs_debug_list); \ - spin_unlock(&ptlrpc_rs_debug_lock); \ -} while (0) -#else -# define PTLRPC_RS_DEBUG_LRU_ADD(rs) do {} while (0) -# define PTLRPC_RS_DEBUG_LRU_DEL(rs) do {} while (0) -#endif - -struct ptlrpc_reply_state * -lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_reply_state *rs = NULL; - - spin_lock(&svcpt->scp_rep_lock); - - /* See if we have anything in a pool, and wait if nothing */ - while (list_empty(&svcpt->scp_rep_idle)) { - int rc; - - spin_unlock(&svcpt->scp_rep_lock); - /* If we cannot get anything for some long time, we better - * bail out instead of waiting infinitely - */ - rc = wait_event_idle_timeout(svcpt->scp_rep_waitq, - !list_empty(&svcpt->scp_rep_idle), - 10 * HZ); - if (rc == 0) - goto out; - spin_lock(&svcpt->scp_rep_lock); - } - - rs = list_entry(svcpt->scp_rep_idle.next, - struct ptlrpc_reply_state, rs_list); - list_del(&rs->rs_list); - - spin_unlock(&svcpt->scp_rep_lock); - - memset(rs, 0, svcpt->scp_service->srv_max_reply_size); - rs->rs_size = svcpt->scp_service->srv_max_reply_size; - rs->rs_svcpt = svcpt; - rs->rs_prealloc = 1; -out: - return rs; -} - -void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs) -{ - struct ptlrpc_service_part *svcpt = rs->rs_svcpt; - - spin_lock(&svcpt->scp_rep_lock); - list_add(&rs->rs_list, &svcpt->scp_rep_idle); - spin_unlock(&svcpt->scp_rep_lock); - wake_up(&svcpt->scp_rep_waitq); -} - -int lustre_pack_reply_v2(struct ptlrpc_request *req, int count, - __u32 *lens, char **bufs, int flags) -{ - struct ptlrpc_reply_state *rs; - int msg_len, rc; - - LASSERT(!req->rq_reply_state); - - if ((flags & LPRFL_EARLY_REPLY) == 0) { - spin_lock(&req->rq_lock); - req->rq_packed_final = 1; - spin_unlock(&req->rq_lock); - } - - msg_len = lustre_msg_size_v2(count, lens); - rc = sptlrpc_svc_alloc_rs(req, msg_len); - if (rc) - return rc; - - rs = req->rq_reply_state; - atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ - rs->rs_cb_id.cbid_fn = reply_out_callback; - rs->rs_cb_id.cbid_arg = rs; - rs->rs_svcpt = req->rq_rqbd->rqbd_svcpt; - INIT_LIST_HEAD(&rs->rs_exp_list); - INIT_LIST_HEAD(&rs->rs_obd_list); - INIT_LIST_HEAD(&rs->rs_list); - spin_lock_init(&rs->rs_lock); - - req->rq_replen = msg_len; - req->rq_reply_state = rs; - req->rq_repmsg = rs->rs_msg; - - lustre_init_msg_v2(rs->rs_msg, count, lens, bufs); - lustre_msg_add_version(rs->rs_msg, PTLRPC_MSG_VERSION); - - PTLRPC_RS_DEBUG_LRU_ADD(rs); - - return 0; -} -EXPORT_SYMBOL(lustre_pack_reply_v2); - -int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens, - char **bufs, int flags) -{ - int rc = 0; - __u32 size[] = { sizeof(struct ptlrpc_body) }; - - if (!lens) { - LASSERT(count == 1); - lens = size; - } - - LASSERT(count > 0); - LASSERT(lens[MSG_PTLRPC_BODY_OFF] == sizeof(struct ptlrpc_body)); - - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - rc = lustre_pack_reply_v2(req, count, lens, bufs, flags); - break; - default: - LASSERTF(0, "incorrect message magic: %08x\n", - req->rq_reqmsg->lm_magic); - rc = -EINVAL; - } - if (rc != 0) - CERROR("lustre_pack_reply failed: rc=%d size=%d\n", rc, - lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens)); - return rc; -} - -int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens, - char **bufs) -{ - return lustre_pack_reply_flags(req, count, lens, bufs, 0); -} -EXPORT_SYMBOL(lustre_pack_reply); - -void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size) -{ - u32 i, offset, buflen, bufcount; - - bufcount = m->lm_bufcount; - if (unlikely(n >= bufcount)) { - CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n", - m, n, bufcount); - return NULL; - } - - buflen = m->lm_buflens[n]; - if (unlikely(buflen < min_size)) { - CERROR("msg %p buffer[%d] size %d too small (required %d, opc=%d)\n", - m, n, buflen, min_size, - n == MSG_PTLRPC_BODY_OFF ? -1 : lustre_msg_get_opc(m)); - return NULL; - } - - offset = lustre_msg_hdr_size_v2(bufcount); - for (i = 0; i < n; i++) - offset += cfs_size_round(m->lm_buflens[i]); - - return (char *)m + offset; -} - -void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 min_size) -{ - switch (m->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_msg_buf_v2(m, n, min_size); - default: - LASSERTF(0, "incorrect message magic: %08x (msg:%p)\n", - m->lm_magic, m); - return NULL; - } -} -EXPORT_SYMBOL(lustre_msg_buf); - -static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, u32 segment, - unsigned int newlen, int move_data) -{ - char *tail = NULL, *newpos; - int tail_len = 0, n; - - LASSERT(msg); - LASSERT(msg->lm_bufcount > segment); - LASSERT(msg->lm_buflens[segment] >= newlen); - - if (msg->lm_buflens[segment] == newlen) - goto out; - - if (move_data && msg->lm_bufcount > segment + 1) { - tail = lustre_msg_buf_v2(msg, segment + 1, 0); - for (n = segment + 1; n < msg->lm_bufcount; n++) - tail_len += cfs_size_round(msg->lm_buflens[n]); - } - - msg->lm_buflens[segment] = newlen; - - if (tail && tail_len) { - newpos = lustre_msg_buf_v2(msg, segment + 1, 0); - LASSERT(newpos <= tail); - if (newpos != tail) - memmove(newpos, tail, tail_len); - } -out: - return lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); -} - -/* - * for @msg, shrink @segment to size @newlen. if @move_data is non-zero, - * we also move data forward from @segment + 1. - * - * if @newlen == 0, we remove the segment completely, but we still keep the - * totally bufcount the same to save possible data moving. this will leave a - * unused segment with size 0 at the tail, but that's ok. - * - * return new msg size after shrinking. - * - * CAUTION: - * + if any buffers higher than @segment has been filled in, must call shrink - * with non-zero @move_data. - * + caller should NOT keep pointers to msg buffers which higher than @segment - * after call shrink. - */ -int lustre_shrink_msg(struct lustre_msg *msg, int segment, - unsigned int newlen, int move_data) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_shrink_msg_v2(msg, segment, newlen, move_data); - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } - return 0; -} -EXPORT_SYMBOL(lustre_shrink_msg); - -void lustre_free_reply_state(struct ptlrpc_reply_state *rs) -{ - PTLRPC_RS_DEBUG_LRU_DEL(rs); - - LASSERT(atomic_read(&rs->rs_refcount) == 0); - LASSERT(!rs->rs_difficult || rs->rs_handled); - LASSERT(!rs->rs_on_net); - LASSERT(!rs->rs_scheduled); - LASSERT(!rs->rs_export); - LASSERT(rs->rs_nlocks == 0); - LASSERT(list_empty(&rs->rs_exp_list)); - LASSERT(list_empty(&rs->rs_obd_list)); - - sptlrpc_svc_free_rs(rs); -} - -static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len) -{ - int swabbed, required_len, i; - - /* Now we know the sender speaks my language. */ - required_len = lustre_msg_hdr_size_v2(0); - if (len < required_len) { - /* can't even look inside the message */ - CERROR("message length %d too small for lustre_msg\n", len); - return -EINVAL; - } - - swabbed = (m->lm_magic == LUSTRE_MSG_MAGIC_V2_SWABBED); - - if (swabbed) { - __swab32s(&m->lm_magic); - __swab32s(&m->lm_bufcount); - __swab32s(&m->lm_secflvr); - __swab32s(&m->lm_repsize); - __swab32s(&m->lm_cksum); - __swab32s(&m->lm_flags); - BUILD_BUG_ON(offsetof(typeof(*m), lm_padding_2) == 0); - BUILD_BUG_ON(offsetof(typeof(*m), lm_padding_3) == 0); - } - - required_len = lustre_msg_hdr_size_v2(m->lm_bufcount); - if (len < required_len) { - /* didn't receive all the buffer lengths */ - CERROR("message length %d too small for %d buflens\n", - len, m->lm_bufcount); - return -EINVAL; - } - - for (i = 0; i < m->lm_bufcount; i++) { - if (swabbed) - __swab32s(&m->lm_buflens[i]); - required_len += cfs_size_round(m->lm_buflens[i]); - } - - if (len < required_len) { - CERROR("len: %d, required_len %d\n", len, required_len); - CERROR("bufcount: %d\n", m->lm_bufcount); - for (i = 0; i < m->lm_bufcount; i++) - CERROR("buffer %d length %d\n", i, m->lm_buflens[i]); - return -EINVAL; - } - - return swabbed; -} - -int __lustre_unpack_msg(struct lustre_msg *m, int len) -{ - int required_len, rc; - - /* We can provide a slightly better error log, if we check the - * message magic and version first. In the future, struct - * lustre_msg may grow, and we'd like to log a version mismatch, - * rather than a short message. - * - */ - required_len = offsetof(struct lustre_msg, lm_magic) + - sizeof(m->lm_magic); - if (len < required_len) { - /* can't even look inside the message */ - CERROR("message length %d too small for magic/version check\n", - len); - return -EINVAL; - } - - rc = lustre_unpack_msg_v2(m, len); - - return rc; -} -EXPORT_SYMBOL(__lustre_unpack_msg); - -int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len) -{ - int rc; - - rc = __lustre_unpack_msg(req->rq_reqmsg, len); - if (rc == 1) { - lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); - rc = 0; - } - return rc; -} - -int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len) -{ - int rc; - - rc = __lustre_unpack_msg(req->rq_repmsg, len); - if (rc == 1) { - lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); - rc = 0; - } - return rc; -} - -static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req, - const int inout, int offset) -{ - struct ptlrpc_body *pb; - struct lustre_msg_v2 *m = inout ? req->rq_reqmsg : req->rq_repmsg; - - pb = lustre_msg_buf_v2(m, offset, sizeof(struct ptlrpc_body_v2)); - if (!pb) { - CERROR("error unpacking ptlrpc body\n"); - return -EFAULT; - } - if (ptlrpc_buf_need_swab(req, inout, offset)) { - lustre_swab_ptlrpc_body(pb); - ptlrpc_buf_set_swabbed(req, inout, offset); - } - - if ((pb->pb_version & ~LUSTRE_VERSION_MASK) != PTLRPC_MSG_VERSION) { - CERROR("wrong lustre_msg version %08x\n", pb->pb_version); - return -EINVAL; - } - - if (!inout) - pb->pb_status = ptlrpc_status_ntoh(pb->pb_status); - - return 0; -} - -int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset) -{ - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_unpack_ptlrpc_body_v2(req, 1, offset); - default: - CERROR("bad lustre msg magic: %08x\n", - req->rq_reqmsg->lm_magic); - return -EINVAL; - } -} - -int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset) -{ - switch (req->rq_repmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_unpack_ptlrpc_body_v2(req, 0, offset); - default: - CERROR("bad lustre msg magic: %08x\n", - req->rq_repmsg->lm_magic); - return -EINVAL; - } -} - -static inline u32 lustre_msg_buflen_v2(struct lustre_msg_v2 *m, u32 n) -{ - if (n >= m->lm_bufcount) - return 0; - - return m->lm_buflens[n]; -} - -/** - * lustre_msg_buflen - return the length of buffer \a n in message \a m - * \param m lustre_msg (request or reply) to look at - * \param n message index (base 0) - * - * returns zero for non-existent message indices - */ -u32 lustre_msg_buflen(struct lustre_msg *m, u32 n) -{ - switch (m->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_msg_buflen_v2(m, n); - default: - CERROR("incorrect message magic: %08x\n", m->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_buflen); - -/* NB return the bufcount for lustre_msg_v2 format, so if message is packed - * in V1 format, the result is one bigger. (add struct ptlrpc_body). - */ -u32 lustre_msg_bufcount(struct lustre_msg *m) -{ - switch (m->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return m->lm_bufcount; - default: - CERROR("incorrect message magic: %08x\n", m->lm_magic); - return 0; - } -} - -char *lustre_msg_string(struct lustre_msg *m, u32 index, u32 max_len) -{ - /* max_len == 0 means the string should fill the buffer */ - char *str; - u32 slen, blen; - - switch (m->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - str = lustre_msg_buf_v2(m, index, 0); - blen = lustre_msg_buflen_v2(m, index); - break; - default: - LASSERTF(0, "incorrect message magic: %08x\n", m->lm_magic); - } - - if (!str) { - CERROR("can't unpack string in msg %p buffer[%d]\n", m, index); - return NULL; - } - - slen = strnlen(str, blen); - - if (slen == blen) { /* not NULL terminated */ - CERROR("can't unpack non-NULL terminated string in msg %p buffer[%d] len %d\n", - m, index, blen); - return NULL; - } - - if (max_len == 0) { - if (slen != blen - 1) { - CERROR("can't unpack short string in msg %p buffer[%d] len %d: strlen %d\n", - m, index, blen, slen); - return NULL; - } - } else if (slen > max_len) { - CERROR("can't unpack oversized string in msg %p buffer[%d] len %d strlen %d: max %d expected\n", - m, index, blen, slen, max_len); - return NULL; - } - - return str; -} - -/* Wrap up the normal fixed length cases */ -static inline void *__lustre_swab_buf(struct lustre_msg *msg, u32 index, - u32 min_size, void *swabber) -{ - void *ptr = NULL; - - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - ptr = lustre_msg_buf_v2(msg, index, min_size); - break; - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - } - - if (ptr && swabber) - ((void (*)(void *))swabber)(ptr); - - return ptr; -} - -static inline struct ptlrpc_body *lustre_msg_ptlrpc_body(struct lustre_msg *msg) -{ - return lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, - sizeof(struct ptlrpc_body_v2)); -} - -__u32 lustre_msghdr_get_flags(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - /* already in host endian */ - return msg->lm_flags; - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msghdr_get_flags); - -void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - msg->lm_flags = flags; - return; - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -__u32 lustre_msg_get_flags(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (pb) - return pb->pb_flags; - - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - } - /* fall through */ - default: - /* flags might be printed in debug code while message - * uninitialized - */ - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_flags); - -void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_flags |= flags; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_add_flags); - -void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_flags = flags; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_flags &= ~(flags & MSG_GEN_FLAG_MASK); - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_clear_flags); - -__u32 lustre_msg_get_op_flags(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (pb) - return pb->pb_op_flags; - - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - } - /* fall through */ - default: - return 0; - } -} - -void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_op_flags |= flags; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_add_op_flags); - -struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return NULL; - } - return &pb->pb_handle; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return NULL; - } -} - -__u32 lustre_msg_get_type(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return PTL_RPC_MSG_ERR; - } - return pb->pb_type; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return PTL_RPC_MSG_ERR; - } -} -EXPORT_SYMBOL(lustre_msg_get_type); - -void lustre_msg_add_version(struct lustre_msg *msg, u32 version) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_version |= version; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -__u32 lustre_msg_get_opc(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_opc; - } - default: - CERROR("incorrect message magic: %08x (msg:%p)\n", - msg->lm_magic, msg); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_opc); - -__u16 lustre_msg_get_tag(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_tag; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_tag); - -__u64 lustre_msg_get_last_committed(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_last_committed; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_last_committed); - -__u64 *lustre_msg_get_versions(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return NULL; - } - return pb->pb_pre_versions; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return NULL; - } -} -EXPORT_SYMBOL(lustre_msg_get_versions); - -__u64 lustre_msg_get_transno(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_transno; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_transno); - -int lustre_msg_get_status(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (pb) - return pb->pb_status; - - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - } - /* fall through */ - default: - /* status might be printed in debug code while message - * uninitialized - */ - return -EINVAL; - } -} -EXPORT_SYMBOL(lustre_msg_get_status); - -__u64 lustre_msg_get_slv(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return -EINVAL; - } - return pb->pb_slv; - } - default: - CERROR("invalid msg magic %08x\n", msg->lm_magic); - return -EINVAL; - } -} - -void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return; - } - pb->pb_slv = slv; - return; - } - default: - CERROR("invalid msg magic %x\n", msg->lm_magic); - return; - } -} - -__u32 lustre_msg_get_limit(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return -EINVAL; - } - return pb->pb_limit; - } - default: - CERROR("invalid msg magic %x\n", msg->lm_magic); - return -EINVAL; - } -} - -void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return; - } - pb->pb_limit = limit; - return; - } - default: - CERROR("invalid msg magic %08x\n", msg->lm_magic); - return; - } -} - -__u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_conn_cnt; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} -EXPORT_SYMBOL(lustre_msg_get_conn_cnt); - -__u32 lustre_msg_get_magic(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return msg->lm_magic; - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} - -__u32 lustre_msg_get_timeout(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_timeout; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return -EPROTO; - } -} - -__u32 lustre_msg_get_service_time(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - if (!pb) { - CERROR("invalid msg %p: no ptlrpc body!\n", msg); - return 0; - } - return pb->pb_service_time; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} - -__u32 lustre_msg_get_cksum(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return msg->lm_cksum; - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} - -__u32 lustre_msg_calc_cksum(struct lustre_msg *msg) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - __u32 crc; - unsigned int hsize = 4; - - cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb, - lustre_msg_buflen(msg, - MSG_PTLRPC_BODY_OFF), - NULL, 0, (unsigned char *)&crc, &hsize); - return crc; - } - default: - CERROR("incorrect message magic: %08x\n", msg->lm_magic); - return 0; - } -} - -void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_handle = *handle; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_type(struct lustre_msg *msg, __u32 type) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_type = type; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_opc = opc; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_last_xid(struct lustre_msg *msg, u64 last_xid) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_last_xid = last_xid; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_tag(struct lustre_msg *msg, __u16 tag) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_tag = tag; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_tag); - -void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_pre_versions[0] = versions[0]; - pb->pb_pre_versions[1] = versions[1]; - pb->pb_pre_versions[2] = versions[2]; - pb->pb_pre_versions[3] = versions[3]; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_versions); - -void lustre_msg_set_transno(struct lustre_msg *msg, __u64 transno) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_transno = transno; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_transno); - -void lustre_msg_set_status(struct lustre_msg *msg, __u32 status) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_status = status; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_status); - -void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_conn_cnt = conn_cnt; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_timeout = timeout; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_service_time(struct lustre_msg *msg, __u32 service_time) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_service_time = service_time; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - __u32 opc = lustre_msg_get_opc(msg); - struct ptlrpc_body *pb; - - /* Don't set jobid for ldlm ast RPCs, they've been shrunk. - * See the comment in ptlrpc_request_pack(). - */ - if (!opc || opc == LDLM_BL_CALLBACK || - opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK) - return; - - pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, - sizeof(struct ptlrpc_body)); - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - - if (jobid) - memcpy(pb->pb_jobid, jobid, LUSTRE_JOBID_SIZE); - else if (pb->pb_jobid[0] == '\0') - lustre_get_jobid(pb->pb_jobid); - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_jobid); - -void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - msg->lm_cksum = cksum; - return; - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = lustre_msg_ptlrpc_body(msg); - - LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg); - pb->pb_mbits = mbits; - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} - -void ptlrpc_request_set_replen(struct ptlrpc_request *req) -{ - int count = req_capsule_filled_sizes(&req->rq_pill, RCL_SERVER); - - req->rq_replen = lustre_msg_size(req->rq_reqmsg->lm_magic, count, - req->rq_pill.rc_area[RCL_SERVER]); - if (req->rq_reqmsg->lm_magic == LUSTRE_MSG_MAGIC_V2) - req->rq_reqmsg->lm_repsize = req->rq_replen; -} -EXPORT_SYMBOL(ptlrpc_request_set_replen); - -/** - * Send a remote set_info_async. - * - * This may go from client to server or server to client. - */ -int do_set_info_async(struct obd_import *imp, - int opcode, int version, - u32 keylen, void *key, - u32 vallen, void *val, - struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *req; - char *tmp; - int rc; - - req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO); - if (!req) - return -ENOMEM; - - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, - RCL_CLIENT, keylen); - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL, - RCL_CLIENT, vallen); - rc = ptlrpc_request_pack(req, version, opcode); - if (rc) { - ptlrpc_request_free(req); - return rc; - } - - tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); - memcpy(tmp, key, keylen); - tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL); - memcpy(tmp, val, vallen); - - ptlrpc_request_set_replen(req); - - if (set) { - ptlrpc_set_add_req(set, req); - ptlrpc_check_set(NULL, set); - } else { - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); - } - - return rc; -} -EXPORT_SYMBOL(do_set_info_async); - -/* byte flipping routines for all wire types declared in - * lustre_idl.h implemented here. - */ -void lustre_swab_ptlrpc_body(struct ptlrpc_body *b) -{ - __swab32s(&b->pb_type); - __swab32s(&b->pb_version); - __swab32s(&b->pb_opc); - __swab32s(&b->pb_status); - __swab64s(&b->pb_last_xid); - __swab16s(&b->pb_tag); - __swab64s(&b->pb_last_committed); - __swab64s(&b->pb_transno); - __swab32s(&b->pb_flags); - __swab32s(&b->pb_op_flags); - __swab32s(&b->pb_conn_cnt); - __swab32s(&b->pb_timeout); - __swab32s(&b->pb_service_time); - __swab32s(&b->pb_limit); - __swab64s(&b->pb_slv); - __swab64s(&b->pb_pre_versions[0]); - __swab64s(&b->pb_pre_versions[1]); - __swab64s(&b->pb_pre_versions[2]); - __swab64s(&b->pb_pre_versions[3]); - __swab64s(&b->pb_mbits); - BUILD_BUG_ON(offsetof(typeof(*b), pb_padding0) == 0); - BUILD_BUG_ON(offsetof(typeof(*b), pb_padding1) == 0); - BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_0) == 0); - BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_1) == 0); - BUILD_BUG_ON(offsetof(typeof(*b), pb_padding64_2) == 0); - /* While we need to maintain compatibility between - * clients and servers without ptlrpc_body_v2 (< 2.3) - * do not swab any fields beyond pb_jobid, as we are - * using this swab function for both ptlrpc_body - * and ptlrpc_body_v2. - */ - BUILD_BUG_ON(offsetof(typeof(*b), pb_jobid) == 0); -} - -void lustre_swab_connect(struct obd_connect_data *ocd) -{ - __swab64s(&ocd->ocd_connect_flags); - __swab32s(&ocd->ocd_version); - __swab32s(&ocd->ocd_grant); - __swab64s(&ocd->ocd_ibits_known); - __swab32s(&ocd->ocd_index); - __swab32s(&ocd->ocd_brw_size); - /* ocd_blocksize and ocd_inodespace don't need to be swabbed because - * they are 8-byte values - */ - __swab16s(&ocd->ocd_grant_extent); - __swab32s(&ocd->ocd_unused); - __swab64s(&ocd->ocd_transno); - __swab32s(&ocd->ocd_group); - __swab32s(&ocd->ocd_cksum_types); - __swab32s(&ocd->ocd_instance); - /* Fields after ocd_cksum_types are only accessible by the receiver - * if the corresponding flag in ocd_connect_flags is set. Accessing - * any field after ocd_maxbytes on the receiver without a valid flag - * may result in out-of-bound memory access and kernel oops. - */ - if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE) - __swab32s(&ocd->ocd_max_easize); - if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES) - __swab64s(&ocd->ocd_maxbytes); - if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) - __swab16s(&ocd->ocd_maxmodrpcs); - BUILD_BUG_ON(!offsetof(typeof(*ocd), padding0)); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding1) == 0); - if (ocd->ocd_connect_flags & OBD_CONNECT_FLAGS2) - __swab64s(&ocd->ocd_connect_flags2); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding3) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding4) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding5) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding6) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding7) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding8) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), padding9) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingA) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingB) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingC) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingD) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingE) == 0); - BUILD_BUG_ON(offsetof(typeof(*ocd), paddingF) == 0); -} - -static void lustre_swab_obdo(struct obdo *o) -{ - __swab64s(&o->o_valid); - lustre_swab_ost_id(&o->o_oi); - __swab64s(&o->o_parent_seq); - __swab64s(&o->o_size); - __swab64s(&o->o_mtime); - __swab64s(&o->o_atime); - __swab64s(&o->o_ctime); - __swab64s(&o->o_blocks); - __swab64s(&o->o_grant); - __swab32s(&o->o_blksize); - __swab32s(&o->o_mode); - __swab32s(&o->o_uid); - __swab32s(&o->o_gid); - __swab32s(&o->o_flags); - __swab32s(&o->o_nlink); - __swab32s(&o->o_parent_oid); - __swab32s(&o->o_misc); - __swab64s(&o->o_ioepoch); - __swab32s(&o->o_stripe_idx); - __swab32s(&o->o_parent_ver); - /* o_handle is opaque */ - /* o_lcookie is swabbed elsewhere */ - __swab32s(&o->o_uid_h); - __swab32s(&o->o_gid_h); - __swab64s(&o->o_data_version); - BUILD_BUG_ON(offsetof(typeof(*o), o_padding_4) == 0); - BUILD_BUG_ON(offsetof(typeof(*o), o_padding_5) == 0); - BUILD_BUG_ON(offsetof(typeof(*o), o_padding_6) == 0); -} - -void lustre_swab_obd_statfs(struct obd_statfs *os) -{ - __swab64s(&os->os_type); - __swab64s(&os->os_blocks); - __swab64s(&os->os_bfree); - __swab64s(&os->os_bavail); - __swab64s(&os->os_files); - __swab64s(&os->os_ffree); - /* no need to swab os_fsid */ - __swab32s(&os->os_bsize); - __swab32s(&os->os_namelen); - __swab64s(&os->os_maxbytes); - __swab32s(&os->os_state); - BUILD_BUG_ON(offsetof(typeof(*os), os_fprecreated) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare2) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare3) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare4) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare5) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare6) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare7) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare8) == 0); - BUILD_BUG_ON(offsetof(typeof(*os), os_spare9) == 0); -} - -void lustre_swab_obd_ioobj(struct obd_ioobj *ioo) -{ - lustre_swab_ost_id(&ioo->ioo_oid); - __swab32s(&ioo->ioo_max_brw); - __swab32s(&ioo->ioo_bufcnt); -} - -void lustre_swab_niobuf_remote(struct niobuf_remote *nbr) -{ - __swab64s(&nbr->rnb_offset); - __swab32s(&nbr->rnb_len); - __swab32s(&nbr->rnb_flags); -} - -void lustre_swab_ost_body(struct ost_body *b) -{ - lustre_swab_obdo(&b->oa); -} - -void lustre_swab_ost_last_id(u64 *id) -{ - __swab64s(id); -} - -void lustre_swab_generic_32s(__u32 *val) -{ - __swab32s(val); -} - -void lustre_swab_gl_desc(union ldlm_gl_desc *desc) -{ - lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid); - __swab64s(&desc->lquota_desc.gl_flags); - __swab64s(&desc->lquota_desc.gl_ver); - __swab64s(&desc->lquota_desc.gl_hardlimit); - __swab64s(&desc->lquota_desc.gl_softlimit); - __swab64s(&desc->lquota_desc.gl_time); - BUILD_BUG_ON(offsetof(typeof(desc->lquota_desc), gl_pad2) == 0); -} - -void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb) -{ - __swab64s(&lvb->lvb_size); - __swab64s(&lvb->lvb_mtime); - __swab64s(&lvb->lvb_atime); - __swab64s(&lvb->lvb_ctime); - __swab64s(&lvb->lvb_blocks); -} -EXPORT_SYMBOL(lustre_swab_ost_lvb_v1); - -void lustre_swab_ost_lvb(struct ost_lvb *lvb) -{ - __swab64s(&lvb->lvb_size); - __swab64s(&lvb->lvb_mtime); - __swab64s(&lvb->lvb_atime); - __swab64s(&lvb->lvb_ctime); - __swab64s(&lvb->lvb_blocks); - __swab32s(&lvb->lvb_mtime_ns); - __swab32s(&lvb->lvb_atime_ns); - __swab32s(&lvb->lvb_ctime_ns); - __swab32s(&lvb->lvb_padding); -} -EXPORT_SYMBOL(lustre_swab_ost_lvb); - -void lustre_swab_lquota_lvb(struct lquota_lvb *lvb) -{ - __swab64s(&lvb->lvb_flags); - __swab64s(&lvb->lvb_id_may_rel); - __swab64s(&lvb->lvb_id_rel); - __swab64s(&lvb->lvb_id_qunit); - __swab64s(&lvb->lvb_pad1); -} -EXPORT_SYMBOL(lustre_swab_lquota_lvb); - -void lustre_swab_mdt_body(struct mdt_body *b) -{ - lustre_swab_lu_fid(&b->mbo_fid1); - lustre_swab_lu_fid(&b->mbo_fid2); - /* handle is opaque */ - __swab64s(&b->mbo_valid); - __swab64s(&b->mbo_size); - __swab64s(&b->mbo_mtime); - __swab64s(&b->mbo_atime); - __swab64s(&b->mbo_ctime); - __swab64s(&b->mbo_blocks); - __swab64s(&b->mbo_ioepoch); - __swab64s(&b->mbo_t_state); - __swab32s(&b->mbo_fsuid); - __swab32s(&b->mbo_fsgid); - __swab32s(&b->mbo_capability); - __swab32s(&b->mbo_mode); - __swab32s(&b->mbo_uid); - __swab32s(&b->mbo_gid); - __swab32s(&b->mbo_flags); - __swab32s(&b->mbo_rdev); - __swab32s(&b->mbo_nlink); - BUILD_BUG_ON(offsetof(typeof(*b), mbo_unused2) == 0); - __swab32s(&b->mbo_suppgid); - __swab32s(&b->mbo_eadatasize); - __swab32s(&b->mbo_aclsize); - __swab32s(&b->mbo_max_mdsize); - BUILD_BUG_ON(!offsetof(typeof(*b), mbo_unused3)); - __swab32s(&b->mbo_uid_h); - __swab32s(&b->mbo_gid_h); - BUILD_BUG_ON(offsetof(typeof(*b), mbo_padding_5) == 0); -} - -void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b) -{ - /* handle is opaque */ - /* mio_handle is opaque */ - BUILD_BUG_ON(!offsetof(typeof(*b), mio_unused1)); - BUILD_BUG_ON(!offsetof(typeof(*b), mio_unused2)); - BUILD_BUG_ON(!offsetof(typeof(*b), mio_padding)); -} - -void lustre_swab_mgs_target_info(struct mgs_target_info *mti) -{ - int i; - - __swab32s(&mti->mti_lustre_ver); - __swab32s(&mti->mti_stripe_index); - __swab32s(&mti->mti_config_ver); - __swab32s(&mti->mti_flags); - __swab32s(&mti->mti_instance); - __swab32s(&mti->mti_nid_count); - BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(__u64)); - for (i = 0; i < MTI_NIDS_MAX; i++) - __swab64s(&mti->mti_nids[i]); -} - -void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry) -{ - __u8 i; - - __swab64s(&entry->mne_version); - __swab32s(&entry->mne_instance); - __swab32s(&entry->mne_index); - __swab32s(&entry->mne_length); - - /* mne_nid_(count|type) must be one byte size because we're gonna - * access it w/o swapping. */ - BUILD_BUG_ON(sizeof(entry->mne_nid_count) != sizeof(__u8)); - BUILD_BUG_ON(sizeof(entry->mne_nid_type) != sizeof(__u8)); - - /* remove this assertion if ipv6 is supported. */ - LASSERT(entry->mne_nid_type == 0); - for (i = 0; i < entry->mne_nid_count; i++) { - BUILD_BUG_ON(sizeof(lnet_nid_t) != sizeof(__u64)); - __swab64s(&entry->u.nids[i]); - } -} -EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry); - -void lustre_swab_mgs_config_body(struct mgs_config_body *body) -{ - __swab64s(&body->mcb_offset); - __swab32s(&body->mcb_units); - __swab16s(&body->mcb_type); -} - -void lustre_swab_mgs_config_res(struct mgs_config_res *body) -{ - __swab64s(&body->mcr_offset); - __swab64s(&body->mcr_size); -} - -static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i) -{ - __swab64s(&i->dqi_bgrace); - __swab64s(&i->dqi_igrace); - __swab32s(&i->dqi_flags); - __swab32s(&i->dqi_valid); -} - -static void lustre_swab_obd_dqblk(struct obd_dqblk *b) -{ - __swab64s(&b->dqb_ihardlimit); - __swab64s(&b->dqb_isoftlimit); - __swab64s(&b->dqb_curinodes); - __swab64s(&b->dqb_bhardlimit); - __swab64s(&b->dqb_bsoftlimit); - __swab64s(&b->dqb_curspace); - __swab64s(&b->dqb_btime); - __swab64s(&b->dqb_itime); - __swab32s(&b->dqb_valid); - BUILD_BUG_ON(offsetof(typeof(*b), dqb_padding) == 0); -} - -void lustre_swab_obd_quotactl(struct obd_quotactl *q) -{ - __swab32s(&q->qc_cmd); - __swab32s(&q->qc_type); - __swab32s(&q->qc_id); - __swab32s(&q->qc_stat); - lustre_swab_obd_dqinfo(&q->qc_dqinfo); - lustre_swab_obd_dqblk(&q->qc_dqblk); -} - -void lustre_swab_fid2path(struct getinfo_fid2path *gf) -{ - lustre_swab_lu_fid(&gf->gf_fid); - __swab64s(&gf->gf_recno); - __swab32s(&gf->gf_linkno); - __swab32s(&gf->gf_pathlen); -} -EXPORT_SYMBOL(lustre_swab_fid2path); - -static void lustre_swab_fiemap_extent(struct fiemap_extent *fm_extent) -{ - __swab64s(&fm_extent->fe_logical); - __swab64s(&fm_extent->fe_physical); - __swab64s(&fm_extent->fe_length); - __swab32s(&fm_extent->fe_flags); - __swab32s(&fm_extent->fe_device); -} - -void lustre_swab_fiemap(struct fiemap *fiemap) -{ - __u32 i; - - __swab64s(&fiemap->fm_start); - __swab64s(&fiemap->fm_length); - __swab32s(&fiemap->fm_flags); - __swab32s(&fiemap->fm_mapped_extents); - __swab32s(&fiemap->fm_extent_count); - __swab32s(&fiemap->fm_reserved); - - for (i = 0; i < fiemap->fm_mapped_extents; i++) - lustre_swab_fiemap_extent(&fiemap->fm_extents[i]); -} - -void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr) -{ - __swab32s(&rr->rr_opcode); - __swab32s(&rr->rr_cap); - __swab32s(&rr->rr_fsuid); - /* rr_fsuid_h is unused */ - __swab32s(&rr->rr_fsgid); - /* rr_fsgid_h is unused */ - __swab32s(&rr->rr_suppgid1); - /* rr_suppgid1_h is unused */ - __swab32s(&rr->rr_suppgid2); - /* rr_suppgid2_h is unused */ - lustre_swab_lu_fid(&rr->rr_fid1); - lustre_swab_lu_fid(&rr->rr_fid2); - __swab64s(&rr->rr_mtime); - __swab64s(&rr->rr_atime); - __swab64s(&rr->rr_ctime); - __swab64s(&rr->rr_size); - __swab64s(&rr->rr_blocks); - __swab32s(&rr->rr_bias); - __swab32s(&rr->rr_mode); - __swab32s(&rr->rr_flags); - __swab32s(&rr->rr_flags_h); - __swab32s(&rr->rr_umask); - - BUILD_BUG_ON(offsetof(typeof(*rr), rr_padding_4) == 0); -}; - -void lustre_swab_lov_desc(struct lov_desc *ld) -{ - __swab32s(&ld->ld_tgt_count); - __swab32s(&ld->ld_active_tgt_count); - __swab32s(&ld->ld_default_stripe_count); - __swab32s(&ld->ld_pattern); - __swab64s(&ld->ld_default_stripe_size); - __swab64s(&ld->ld_default_stripe_offset); - __swab32s(&ld->ld_qos_maxage); - /* uuid endian insensitive */ -} -EXPORT_SYMBOL(lustre_swab_lov_desc); - -/* This structure is always in little-endian */ -static void lustre_swab_lmv_mds_md_v1(struct lmv_mds_md_v1 *lmm1) -{ - int i; - - __swab32s(&lmm1->lmv_magic); - __swab32s(&lmm1->lmv_stripe_count); - __swab32s(&lmm1->lmv_master_mdt_index); - __swab32s(&lmm1->lmv_hash_type); - __swab32s(&lmm1->lmv_layout_version); - for (i = 0; i < lmm1->lmv_stripe_count; i++) - lustre_swab_lu_fid(&lmm1->lmv_stripe_fids[i]); -} - -void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm) -{ - switch (lmm->lmv_magic) { - case LMV_MAGIC_V1: - lustre_swab_lmv_mds_md_v1(&lmm->lmv_md_v1); - break; - default: - break; - } -} -EXPORT_SYMBOL(lustre_swab_lmv_mds_md); - -void lustre_swab_lmv_user_md(struct lmv_user_md *lum) -{ - __swab32s(&lum->lum_magic); - __swab32s(&lum->lum_stripe_count); - __swab32s(&lum->lum_stripe_offset); - __swab32s(&lum->lum_hash_type); - __swab32s(&lum->lum_type); - BUILD_BUG_ON(!offsetof(typeof(*lum), lum_padding1)); -} -EXPORT_SYMBOL(lustre_swab_lmv_user_md); - -static void lustre_swab_lmm_oi(struct ost_id *oi) -{ - __swab64s(&oi->oi.oi_id); - __swab64s(&oi->oi.oi_seq); -} - -static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum) -{ - __swab32s(&lum->lmm_magic); - __swab32s(&lum->lmm_pattern); - lustre_swab_lmm_oi(&lum->lmm_oi); - __swab32s(&lum->lmm_stripe_size); - __swab16s(&lum->lmm_stripe_count); - __swab16s(&lum->lmm_stripe_offset); -} - -void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum) -{ - CDEBUG(D_IOCTL, "swabbing lov_user_md v1\n"); - lustre_swab_lov_user_md_common(lum); -} -EXPORT_SYMBOL(lustre_swab_lov_user_md_v1); - -void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum) -{ - CDEBUG(D_IOCTL, "swabbing lov_user_md v3\n"); - lustre_swab_lov_user_md_common((struct lov_user_md_v1 *)lum); - /* lmm_pool_name nothing to do with char */ -} -EXPORT_SYMBOL(lustre_swab_lov_user_md_v3); - -void lustre_swab_lov_mds_md(struct lov_mds_md *lmm) -{ - CDEBUG(D_IOCTL, "swabbing lov_mds_md\n"); - __swab32s(&lmm->lmm_magic); - __swab32s(&lmm->lmm_pattern); - lustre_swab_lmm_oi(&lmm->lmm_oi); - __swab32s(&lmm->lmm_stripe_size); - __swab16s(&lmm->lmm_stripe_count); - __swab16s(&lmm->lmm_layout_gen); -} -EXPORT_SYMBOL(lustre_swab_lov_mds_md); - -void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod, - int stripe_count) -{ - int i; - - for (i = 0; i < stripe_count; i++) { - lustre_swab_ost_id(&lod[i].l_ost_oi); - __swab32s(&lod[i].l_ost_gen); - __swab32s(&lod[i].l_ost_idx); - } -} -EXPORT_SYMBOL(lustre_swab_lov_user_md_objects); - -static void lustre_swab_ldlm_res_id(struct ldlm_res_id *id) -{ - int i; - - for (i = 0; i < RES_NAME_SIZE; i++) - __swab64s(&id->name[i]); -} - -static void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d) -{ - /* the lock data is a union and the first two fields are always an - * extent so it's ok to process an LDLM_EXTENT and LDLM_FLOCK lock - * data the same way. - */ - __swab64s(&d->l_extent.start); - __swab64s(&d->l_extent.end); - __swab64s(&d->l_extent.gid); - __swab64s(&d->l_flock.lfw_owner); - __swab32s(&d->l_flock.lfw_pid); -} - -void lustre_swab_ldlm_intent(struct ldlm_intent *i) -{ - __swab64s(&i->opc); -} - -static void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r) -{ - __swab32s(&r->lr_type); - BUILD_BUG_ON(offsetof(typeof(*r), lr_padding) == 0); - lustre_swab_ldlm_res_id(&r->lr_name); -} - -static void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l) -{ - lustre_swab_ldlm_resource_desc(&l->l_resource); - __swab32s(&l->l_req_mode); - __swab32s(&l->l_granted_mode); - lustre_swab_ldlm_policy_data(&l->l_policy_data); -} - -void lustre_swab_ldlm_request(struct ldlm_request *rq) -{ - __swab32s(&rq->lock_flags); - lustre_swab_ldlm_lock_desc(&rq->lock_desc); - __swab32s(&rq->lock_count); - /* lock_handle[] opaque */ -} - -void lustre_swab_ldlm_reply(struct ldlm_reply *r) -{ - __swab32s(&r->lock_flags); - BUILD_BUG_ON(offsetof(typeof(*r), lock_padding) == 0); - lustre_swab_ldlm_lock_desc(&r->lock_desc); - /* lock_handle opaque */ - __swab64s(&r->lock_policy_res1); - __swab64s(&r->lock_policy_res2); -} - -/* Dump functions */ -void dump_ioo(struct obd_ioobj *ioo) -{ - CDEBUG(D_RPCTRACE, - "obd_ioobj: ioo_oid=" DOSTID ", ioo_max_brw=%#x, ioo_bufct=%d\n", - POSTID(&ioo->ioo_oid), ioo->ioo_max_brw, - ioo->ioo_bufcnt); -} - -void dump_rniobuf(struct niobuf_remote *nb) -{ - CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n", - nb->rnb_offset, nb->rnb_len, nb->rnb_flags); -} - -static void dump_obdo(struct obdo *oa) -{ - __u32 valid = oa->o_valid; - - CDEBUG(D_RPCTRACE, "obdo: o_valid = %08x\n", valid); - if (valid & OBD_MD_FLID) - CDEBUG(D_RPCTRACE, "obdo: id = " DOSTID "\n", POSTID(&oa->o_oi)); - if (valid & OBD_MD_FLFID) - CDEBUG(D_RPCTRACE, "obdo: o_parent_seq = %#llx\n", - oa->o_parent_seq); - if (valid & OBD_MD_FLSIZE) - CDEBUG(D_RPCTRACE, "obdo: o_size = %lld\n", oa->o_size); - if (valid & OBD_MD_FLMTIME) - CDEBUG(D_RPCTRACE, "obdo: o_mtime = %lld\n", oa->o_mtime); - if (valid & OBD_MD_FLATIME) - CDEBUG(D_RPCTRACE, "obdo: o_atime = %lld\n", oa->o_atime); - if (valid & OBD_MD_FLCTIME) - CDEBUG(D_RPCTRACE, "obdo: o_ctime = %lld\n", oa->o_ctime); - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - CDEBUG(D_RPCTRACE, "obdo: o_blocks = %lld\n", oa->o_blocks); - if (valid & OBD_MD_FLGRANT) - CDEBUG(D_RPCTRACE, "obdo: o_grant = %lld\n", oa->o_grant); - if (valid & OBD_MD_FLBLKSZ) - CDEBUG(D_RPCTRACE, "obdo: o_blksize = %d\n", oa->o_blksize); - if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE)) - CDEBUG(D_RPCTRACE, "obdo: o_mode = %o\n", - oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) | - (valid & OBD_MD_FLMODE ? ~S_IFMT : 0))); - if (valid & OBD_MD_FLUID) - CDEBUG(D_RPCTRACE, "obdo: o_uid = %u\n", oa->o_uid); - if (valid & OBD_MD_FLUID) - CDEBUG(D_RPCTRACE, "obdo: o_uid_h = %u\n", oa->o_uid_h); - if (valid & OBD_MD_FLGID) - CDEBUG(D_RPCTRACE, "obdo: o_gid = %u\n", oa->o_gid); - if (valid & OBD_MD_FLGID) - CDEBUG(D_RPCTRACE, "obdo: o_gid_h = %u\n", oa->o_gid_h); - if (valid & OBD_MD_FLFLAGS) - CDEBUG(D_RPCTRACE, "obdo: o_flags = %x\n", oa->o_flags); - if (valid & OBD_MD_FLNLINK) - CDEBUG(D_RPCTRACE, "obdo: o_nlink = %u\n", oa->o_nlink); - else if (valid & OBD_MD_FLCKSUM) - CDEBUG(D_RPCTRACE, "obdo: o_checksum (o_nlink) = %u\n", - oa->o_nlink); - if (valid & OBD_MD_FLGENER) - CDEBUG(D_RPCTRACE, "obdo: o_parent_oid = %x\n", - oa->o_parent_oid); - if (valid & OBD_MD_FLEPOCH) - CDEBUG(D_RPCTRACE, "obdo: o_ioepoch = %lld\n", - oa->o_ioepoch); - if (valid & OBD_MD_FLFID) { - CDEBUG(D_RPCTRACE, "obdo: o_stripe_idx = %u\n", - oa->o_stripe_idx); - CDEBUG(D_RPCTRACE, "obdo: o_parent_ver = %x\n", - oa->o_parent_ver); - } - if (valid & OBD_MD_FLHANDLE) - CDEBUG(D_RPCTRACE, "obdo: o_handle = %lld\n", - oa->o_handle.cookie); -} - -void dump_ost_body(struct ost_body *ob) -{ - dump_obdo(&ob->oa); -} - -void dump_rcs(__u32 *rc) -{ - CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc); -} - -static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req) -{ - LASSERT(req->rq_reqmsg); - - switch (req->rq_reqmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_req_swabbed(req, MSG_PTLRPC_BODY_OFF); - default: - CERROR("bad lustre msg magic: %#08X\n", - req->rq_reqmsg->lm_magic); - } - return 0; -} - -static inline int rep_ptlrpc_body_swabbed(struct ptlrpc_request *req) -{ - LASSERT(req->rq_repmsg); - - switch (req->rq_repmsg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: - return lustre_rep_swabbed(req, MSG_PTLRPC_BODY_OFF); - default: - /* uninitialized yet */ - return 0; - } -} - -void _debug_req(struct ptlrpc_request *req, - struct libcfs_debug_msg_data *msgdata, - const char *fmt, ...) -{ - int req_ok = req->rq_reqmsg != NULL; - int rep_ok = req->rq_repmsg != NULL; - lnet_nid_t nid = LNET_NID_ANY; - va_list args; - - if (ptlrpc_req_need_swab(req)) { - req_ok = req_ok && req_ptlrpc_body_swabbed(req); - rep_ok = rep_ok && rep_ptlrpc_body_swabbed(req); - } - - if (req->rq_import && req->rq_import->imp_connection) - nid = req->rq_import->imp_connection->c_peer.nid; - else if (req->rq_export && req->rq_export->exp_connection) - nid = req->rq_export->exp_connection->c_peer.nid; - - va_start(args, fmt); - libcfs_debug_vmsg2(msgdata, fmt, args, - " req@%p x%llu/t%lld(%lld) o%d->%s@%s:%d/%d lens %d/%d e %d to %lld dl %lld ref %d fl " REQ_FLAGS_FMT "/%x/%x rc %d/%d\n", - req, req->rq_xid, req->rq_transno, - req_ok ? lustre_msg_get_transno(req->rq_reqmsg) : 0, - req_ok ? lustre_msg_get_opc(req->rq_reqmsg) : -1, - req->rq_import ? - req->rq_import->imp_obd->obd_name : - req->rq_export ? - req->rq_export->exp_client_uuid.uuid : - "", - libcfs_nid2str(nid), - req->rq_request_portal, req->rq_reply_portal, - req->rq_reqlen, req->rq_replen, - req->rq_early_count, (s64)req->rq_timedout, - (s64)req->rq_deadline, - atomic_read(&req->rq_refcount), - DEBUG_REQ_FLAGS(req), - req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1, - rep_ok ? lustre_msg_get_flags(req->rq_repmsg) : -1, - req->rq_status, - rep_ok ? lustre_msg_get_status(req->rq_repmsg) : -1); - va_end(args); -} -EXPORT_SYMBOL(_debug_req); - -void lustre_swab_lustre_capa(struct lustre_capa *c) -{ - lustre_swab_lu_fid(&c->lc_fid); - __swab64s(&c->lc_opc); - __swab64s(&c->lc_uid); - __swab64s(&c->lc_gid); - __swab32s(&c->lc_flags); - __swab32s(&c->lc_keyid); - __swab32s(&c->lc_timeout); - __swab32s(&c->lc_expiry); -} - -void lustre_swab_hsm_user_state(struct hsm_user_state *state) -{ - __swab32s(&state->hus_states); - __swab32s(&state->hus_archive_id); -} - -void lustre_swab_hsm_state_set(struct hsm_state_set *hss) -{ - __swab32s(&hss->hss_valid); - __swab64s(&hss->hss_setmask); - __swab64s(&hss->hss_clearmask); - __swab32s(&hss->hss_archive_id); -} -EXPORT_SYMBOL(lustre_swab_hsm_state_set); - -static void lustre_swab_hsm_extent(struct hsm_extent *extent) -{ - __swab64s(&extent->offset); - __swab64s(&extent->length); -} - -void lustre_swab_hsm_current_action(struct hsm_current_action *action) -{ - __swab32s(&action->hca_state); - __swab32s(&action->hca_action); - lustre_swab_hsm_extent(&action->hca_location); -} - -void lustre_swab_hsm_user_item(struct hsm_user_item *hui) -{ - lustre_swab_lu_fid(&hui->hui_fid); - lustre_swab_hsm_extent(&hui->hui_extent); -} - -void lustre_swab_layout_intent(struct layout_intent *li) -{ - __swab32s(&li->li_opc); - __swab32s(&li->li_flags); - __swab64s(&li->li_start); - __swab64s(&li->li_end); -} - -void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk) -{ - lustre_swab_lu_fid(&hpk->hpk_fid); - __swab64s(&hpk->hpk_cookie); - __swab64s(&hpk->hpk_extent.offset); - __swab64s(&hpk->hpk_extent.length); - __swab16s(&hpk->hpk_flags); - __swab16s(&hpk->hpk_errval); -} - -void lustre_swab_hsm_request(struct hsm_request *hr) -{ - __swab32s(&hr->hr_action); - __swab32s(&hr->hr_archive_id); - __swab64s(&hr->hr_flags); - __swab32s(&hr->hr_itemcount); - __swab32s(&hr->hr_data_len); -} - -void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl) -{ - __swab64s(&msl->msl_flags); -} -EXPORT_SYMBOL(lustre_swab_swap_layouts); - -void lustre_swab_close_data(struct close_data *cd) -{ - lustre_swab_lu_fid(&cd->cd_fid); - __swab64s(&cd->cd_data_version); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c deleted file mode 100644 index 2466868afb9c..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/pers.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2014, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -void ptlrpc_fill_bulk_md(struct lnet_md *md, struct ptlrpc_bulk_desc *desc, - int mdidx) -{ - int offset = mdidx * LNET_MAX_IOV; - - BUILD_BUG_ON(PTLRPC_MAX_BRW_PAGES >= LI_POISON); - - LASSERT(mdidx < desc->bd_md_max_brw); - LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); - LASSERT(!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | - LNET_MD_PHYS))); - - md->length = max(0, desc->bd_iov_count - mdidx * LNET_MAX_IOV); - md->length = min_t(unsigned int, LNET_MAX_IOV, md->length); - - if (ptlrpc_is_bulk_desc_kiov(desc->bd_type)) { - md->options |= LNET_MD_KIOV; - if (GET_ENC_KIOV(desc)) - md->start = &BD_GET_ENC_KIOV(desc, offset); - else - md->start = &BD_GET_KIOV(desc, offset); - } else { - md->options |= LNET_MD_IOVEC; - if (GET_ENC_KVEC(desc)) - md->start = &BD_GET_ENC_KVEC(desc, offset); - else - md->start = &BD_GET_KVEC(desc, offset); - } -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c deleted file mode 100644 index b3297b5ce395..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c +++ /dev/null @@ -1,474 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/pinger.c - * - * Portal-RPC reconnection and replay operations, for use in recovery. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include "ptlrpc_internal.h" - -struct mutex pinger_mutex; -static LIST_HEAD(pinger_imports); -static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list); - -struct ptlrpc_request * -ptlrpc_prep_ping(struct obd_import *imp) -{ - struct ptlrpc_request *req; - - req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, - LUSTRE_OBD_VERSION, OBD_PING); - if (req) { - ptlrpc_request_set_replen(req); - req->rq_no_resend = 1; - req->rq_no_delay = 1; - } - return req; -} - -int ptlrpc_obd_ping(struct obd_device *obd) -{ - int rc; - struct ptlrpc_request *req; - - req = ptlrpc_prep_ping(obd->u.cli.cl_import); - if (!req) - return -ENOMEM; - - req->rq_send_state = LUSTRE_IMP_FULL; - - rc = ptlrpc_queue_wait(req); - - ptlrpc_req_finished(req); - - return rc; -} -EXPORT_SYMBOL(ptlrpc_obd_ping); - -static int ptlrpc_ping(struct obd_import *imp) -{ - struct ptlrpc_request *req; - - req = ptlrpc_prep_ping(imp); - if (!req) { - CERROR("OOM trying to ping %s->%s\n", - imp->imp_obd->obd_uuid.uuid, - obd2cli_tgt(imp->imp_obd)); - return -ENOMEM; - } - - DEBUG_REQ(D_INFO, req, "pinging %s->%s", - imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); - ptlrpcd_add_req(req); - - return 0; -} - -static void ptlrpc_update_next_ping(struct obd_import *imp, int soon) -{ - int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL; - - if (imp->imp_state == LUSTRE_IMP_DISCON) { - int dtime = max_t(int, CONNECTION_SWITCH_MIN, - AT_OFF ? 0 : - at_get(&imp->imp_at.iat_net_latency)); - time = min(time, dtime); - } - imp->imp_next_ping = jiffies + time * HZ; -} - -static inline int imp_is_deactive(struct obd_import *imp) -{ - return (imp->imp_deactive || - OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE)); -} - -static inline int ptlrpc_next_reconnect(struct obd_import *imp) -{ - if (imp->imp_server_timeout) - return jiffies + obd_timeout / 2 * HZ; - else - return jiffies + obd_timeout * HZ; -} - -static long pinger_check_timeout(unsigned long time) -{ - struct timeout_item *item; - unsigned long timeout = PING_INTERVAL; - - /* The timeout list is a increase order sorted list */ - mutex_lock(&pinger_mutex); - list_for_each_entry(item, &timeout_list, ti_chain) { - int ti_timeout = item->ti_timeout; - - if (timeout > ti_timeout) - timeout = ti_timeout; - break; - } - mutex_unlock(&pinger_mutex); - - return time + timeout * HZ - jiffies; -} - -static bool ir_up; - -void ptlrpc_pinger_ir_up(void) -{ - CDEBUG(D_HA, "IR up\n"); - ir_up = true; -} -EXPORT_SYMBOL(ptlrpc_pinger_ir_up); - -void ptlrpc_pinger_ir_down(void) -{ - CDEBUG(D_HA, "IR down\n"); - ir_up = false; -} -EXPORT_SYMBOL(ptlrpc_pinger_ir_down); - -static void ptlrpc_pinger_process_import(struct obd_import *imp, - unsigned long this_ping) -{ - int level; - int force; - int force_next; - int suppress; - - spin_lock(&imp->imp_lock); - - level = imp->imp_state; - force = imp->imp_force_verify; - force_next = imp->imp_force_next_verify; - /* - * This will be used below only if the import is "FULL". - */ - suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS); - - imp->imp_force_verify = 0; - - if (time_after_eq(imp->imp_next_ping - 5, this_ping) && - !force) { - spin_unlock(&imp->imp_lock); - return; - } - - imp->imp_force_next_verify = 0; - - spin_unlock(&imp->imp_lock); - - CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n", - imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(level), level, force, force_next, - imp->imp_deactive, imp->imp_pingable, suppress); - - if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { - /* wait for a while before trying recovery again */ - imp->imp_next_ping = ptlrpc_next_reconnect(imp); - if (!imp->imp_no_pinger_recover) - ptlrpc_initiate_recovery(imp); - } else if (level != LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov || - imp_is_deactive(imp)) { - CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n", - imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), - ptlrpc_import_state_name(level)); - if (force) { - spin_lock(&imp->imp_lock); - imp->imp_force_verify = 1; - spin_unlock(&imp->imp_lock); - } - } else if ((imp->imp_pingable && !suppress) || force_next || force) { - ptlrpc_ping(imp); - } -} - -static struct workqueue_struct *pinger_wq; -static void ptlrpc_pinger_main(struct work_struct *ws); -static DECLARE_DELAYED_WORK(ping_work, ptlrpc_pinger_main); - -static void ptlrpc_pinger_main(struct work_struct *ws) -{ - unsigned long this_ping = jiffies; - long time_to_next_wake; - struct timeout_item *item; - struct obd_import *imp; - - do { - mutex_lock(&pinger_mutex); - list_for_each_entry(item, &timeout_list, ti_chain) { - item->ti_cb(item, item->ti_cb_data); - } - list_for_each_entry(imp, &pinger_imports, imp_pinger_chain) { - ptlrpc_pinger_process_import(imp, this_ping); - /* obd_timeout might have changed */ - if (imp->imp_pingable && imp->imp_next_ping && - time_after(imp->imp_next_ping, - this_ping + PING_INTERVAL * HZ)) - ptlrpc_update_next_ping(imp, 0); - } - mutex_unlock(&pinger_mutex); - - /* Wait until the next ping time, or until we're stopped. */ - time_to_next_wake = pinger_check_timeout(this_ping); - /* The ping sent by ptlrpc_send_rpc may get sent out - * say .01 second after this. - * ptlrpc_pinger_sending_on_import will then set the - * next ping time to next_ping + .01 sec, which means - * we will SKIP the next ping at next_ping, and the - * ping will get sent 2 timeouts from now! Beware. - */ - CDEBUG(D_INFO, "next wakeup in %ld (%ld)\n", - time_to_next_wake, - this_ping + PING_INTERVAL * HZ); - } while (time_to_next_wake <= 0); - - queue_delayed_work(pinger_wq, &ping_work, - round_jiffies_up_relative(time_to_next_wake)); -} - -int ptlrpc_start_pinger(void) -{ - if (pinger_wq) - return -EALREADY; - - pinger_wq = alloc_workqueue("ptlrpc_pinger", WQ_MEM_RECLAIM, 1); - if (!pinger_wq) { - CERROR("cannot start pinger workqueue\n"); - return -ENOMEM; - } - - queue_delayed_work(pinger_wq, &ping_work, 0); - return 0; -} - -static int ptlrpc_pinger_remove_timeouts(void); - -int ptlrpc_stop_pinger(void) -{ - int rc = 0; - - if (!pinger_wq) - return -EALREADY; - - ptlrpc_pinger_remove_timeouts(); - cancel_delayed_work_sync(&ping_work); - destroy_workqueue(pinger_wq); - pinger_wq = NULL; - - return rc; -} - -void ptlrpc_pinger_sending_on_import(struct obd_import *imp) -{ - ptlrpc_update_next_ping(imp, 0); -} - -void ptlrpc_pinger_commit_expected(struct obd_import *imp) -{ - ptlrpc_update_next_ping(imp, 1); - assert_spin_locked(&imp->imp_lock); - /* - * Avoid reading stale imp_connect_data. When not sure if pings are - * expected or not on next connection, we assume they are not and force - * one anyway to guarantee the chance of updating - * imp_peer_committed_transno. - */ - if (imp->imp_state != LUSTRE_IMP_FULL || - OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS)) - imp->imp_force_next_verify = 1; -} - -int ptlrpc_pinger_add_import(struct obd_import *imp) -{ - if (!list_empty(&imp->imp_pinger_chain)) - return -EALREADY; - - mutex_lock(&pinger_mutex); - CDEBUG(D_HA, "adding pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); - /* if we add to pinger we want recovery on this import */ - imp->imp_obd->obd_no_recov = 0; - ptlrpc_update_next_ping(imp, 0); - /* XXX sort, blah blah */ - list_add_tail(&imp->imp_pinger_chain, &pinger_imports); - class_import_get(imp); - - ptlrpc_pinger_wake_up(); - mutex_unlock(&pinger_mutex); - - return 0; -} -EXPORT_SYMBOL(ptlrpc_pinger_add_import); - -int ptlrpc_pinger_del_import(struct obd_import *imp) -{ - if (list_empty(&imp->imp_pinger_chain)) - return -ENOENT; - - mutex_lock(&pinger_mutex); - list_del_init(&imp->imp_pinger_chain); - CDEBUG(D_HA, "removing pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); - /* if we remove from pinger we don't want recovery on this import */ - imp->imp_obd->obd_no_recov = 1; - class_import_put(imp); - mutex_unlock(&pinger_mutex); - return 0; -} -EXPORT_SYMBOL(ptlrpc_pinger_del_import); - -/** - * Register a timeout callback to the pinger list, and the callback will - * be called when timeout happens. - */ -static struct timeout_item *ptlrpc_new_timeout(int time, - enum timeout_event event, - timeout_cb_t cb, void *data) -{ - struct timeout_item *ti; - - ti = kzalloc(sizeof(*ti), GFP_NOFS); - if (!ti) - return NULL; - - INIT_LIST_HEAD(&ti->ti_obd_list); - INIT_LIST_HEAD(&ti->ti_chain); - ti->ti_timeout = time; - ti->ti_event = event; - ti->ti_cb = cb; - ti->ti_cb_data = data; - - return ti; -} - -/** - * Register timeout event on the pinger thread. - * Note: the timeout list is an sorted list with increased timeout value. - */ -static struct timeout_item* -ptlrpc_pinger_register_timeout(int time, enum timeout_event event, - timeout_cb_t cb, void *data) -{ - struct timeout_item *item, *tmp; - - LASSERT(mutex_is_locked(&pinger_mutex)); - - list_for_each_entry(item, &timeout_list, ti_chain) - if (item->ti_event == event) - goto out; - - item = ptlrpc_new_timeout(time, event, cb, data); - if (item) { - list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) { - if (tmp->ti_timeout < time) { - list_add(&item->ti_chain, &tmp->ti_chain); - goto out; - } - } - list_add(&item->ti_chain, &timeout_list); - } -out: - return item; -} - -/* Add a client_obd to the timeout event list, when timeout(@time) - * happens, the callback(@cb) will be called. - */ -int ptlrpc_add_timeout_client(int time, enum timeout_event event, - timeout_cb_t cb, void *data, - struct list_head *obd_list) -{ - struct timeout_item *ti; - - mutex_lock(&pinger_mutex); - ti = ptlrpc_pinger_register_timeout(time, event, cb, data); - if (!ti) { - mutex_unlock(&pinger_mutex); - return -EINVAL; - } - list_add(obd_list, &ti->ti_obd_list); - mutex_unlock(&pinger_mutex); - return 0; -} -EXPORT_SYMBOL(ptlrpc_add_timeout_client); - -int ptlrpc_del_timeout_client(struct list_head *obd_list, - enum timeout_event event) -{ - struct timeout_item *ti = NULL, *item; - - if (list_empty(obd_list)) - return 0; - mutex_lock(&pinger_mutex); - list_del_init(obd_list); - /** - * If there are no obd attached to the timeout event - * list, remove this timeout event from the pinger - */ - list_for_each_entry(item, &timeout_list, ti_chain) { - if (item->ti_event == event) { - ti = item; - break; - } - } - if (list_empty(&ti->ti_obd_list)) { - list_del(&ti->ti_chain); - kfree(ti); - } - mutex_unlock(&pinger_mutex); - return 0; -} -EXPORT_SYMBOL(ptlrpc_del_timeout_client); - -static int ptlrpc_pinger_remove_timeouts(void) -{ - struct timeout_item *item, *tmp; - - mutex_lock(&pinger_mutex); - list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { - LASSERT(list_empty(&item->ti_obd_list)); - list_del(&item->ti_chain); - kfree(item); - } - mutex_unlock(&pinger_mutex); - return 0; -} - -void ptlrpc_pinger_wake_up(void) -{ - mod_delayed_work(pinger_wq, &ping_work, 0); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h deleted file mode 100644 index 134b74234519..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h +++ /dev/null @@ -1,371 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -/* Intramodule declarations for ptlrpc. */ - -#ifndef PTLRPC_INTERNAL_H -#define PTLRPC_INTERNAL_H - -#include "../ldlm/ldlm_internal.h" - -struct ldlm_namespace; -struct obd_import; -struct ldlm_res_id; -struct ptlrpc_request_set; -extern int test_req_buffer_pressure; -extern struct mutex ptlrpc_all_services_mutex; -extern struct list_head ptlrpc_all_services; - -extern struct mutex ptlrpcd_mutex; -extern struct mutex pinger_mutex; - -int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait); -/* ptlrpcd.c */ -int ptlrpcd_start(struct ptlrpcd_ctl *pc); - -/* client.c */ -void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req, - unsigned int service_time); -struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned int nfrags, - unsigned int max_brw, - enum ptlrpc_bulk_op_type type, - unsigned int portal, - const struct ptlrpc_bulk_frag_ops *ops); -int ptlrpc_request_cache_init(void); -void ptlrpc_request_cache_fini(void); -struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags); -void ptlrpc_request_cache_free(struct ptlrpc_request *req); -void ptlrpc_init_xid(void); -void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, - struct ptlrpc_request *req); -void ptlrpc_expired_set(struct ptlrpc_request_set *set); -int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set); -void ptlrpc_resend_req(struct ptlrpc_request *request); -void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req); -void ptlrpc_assign_next_xid_nolock(struct ptlrpc_request *req); -__u64 ptlrpc_known_replied_xid(struct obd_import *imp); -void ptlrpc_add_unreplied(struct ptlrpc_request *req); - -/* events.c */ -int ptlrpc_init_portals(void); -void ptlrpc_exit_portals(void); - -void ptlrpc_request_handle_notconn(struct ptlrpc_request *req); -void lustre_assert_wire_constants(void); -int ptlrpc_import_in_recovery(struct obd_import *imp); -int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt); -int ptlrpc_replay_next(struct obd_import *imp, int *inflight); -void ptlrpc_initiate_recovery(struct obd_import *imp); - -int lustre_unpack_req_ptlrpc_body(struct ptlrpc_request *req, int offset); -int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset); - -int ptlrpc_sysfs_register_service(struct kset *parent, - struct ptlrpc_service *svc); -void ptlrpc_sysfs_unregister_service(struct ptlrpc_service *svc); - -void ptlrpc_ldebugfs_register_service(struct dentry *debugfs_entry, - struct ptlrpc_service *svc); -void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc); -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount); - -/* NRS */ - -/** - * NRS core object. - * - * Holds NRS core fields. - */ -struct nrs_core { - /** - * Protects nrs_core::nrs_policies, serializes external policy - * registration/unregistration, and NRS core lprocfs operations. - */ - struct mutex nrs_mutex; - /** - * List of all policy descriptors registered with NRS core; protected - * by nrs_core::nrs_mutex. - */ - struct list_head nrs_policies; - -}; - -extern struct nrs_core nrs_core; - -int ptlrpc_service_nrs_setup(struct ptlrpc_service *svc); -void ptlrpc_service_nrs_cleanup(struct ptlrpc_service *svc); - -void ptlrpc_nrs_req_initialize(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req, bool hp); -void ptlrpc_nrs_req_finalize(struct ptlrpc_request *req); -void ptlrpc_nrs_req_stop_nolock(struct ptlrpc_request *req); -void ptlrpc_nrs_req_add(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req, bool hp); - -struct ptlrpc_request * -ptlrpc_nrs_req_get_nolock0(struct ptlrpc_service_part *svcpt, bool hp, - bool peek, bool force); - -static inline struct ptlrpc_request * -ptlrpc_nrs_req_get_nolock(struct ptlrpc_service_part *svcpt, bool hp, - bool force) -{ - return ptlrpc_nrs_req_get_nolock0(svcpt, hp, false, force); -} - -bool ptlrpc_nrs_req_pending_nolock(struct ptlrpc_service_part *svcpt, bool hp); - -int ptlrpc_nrs_policy_control(const struct ptlrpc_service *svc, - enum ptlrpc_nrs_queue_type queue, char *name, - enum ptlrpc_nrs_ctl opc, bool single, void *arg); - -int ptlrpc_nrs_init(void); -void ptlrpc_nrs_fini(void); - -static inline bool nrs_svcpt_has_hp(const struct ptlrpc_service_part *svcpt) -{ - return svcpt->scp_nrs_hp != NULL; -} - -static inline bool nrs_svc_has_hp(const struct ptlrpc_service *svc) -{ - /** - * If the first service partition has an HP NRS head, all service - * partitions will. - */ - return nrs_svcpt_has_hp(svc->srv_parts[0]); -} - -static inline -struct ptlrpc_nrs *nrs_svcpt2nrs(struct ptlrpc_service_part *svcpt, bool hp) -{ - LASSERT(ergo(hp, nrs_svcpt_has_hp(svcpt))); - return hp ? svcpt->scp_nrs_hp : &svcpt->scp_nrs_reg; -} - -static inline int nrs_pol2cptid(const struct ptlrpc_nrs_policy *policy) -{ - return policy->pol_nrs->nrs_svcpt->scp_cpt; -} - -static inline -struct ptlrpc_service *nrs_pol2svc(struct ptlrpc_nrs_policy *policy) -{ - return policy->pol_nrs->nrs_svcpt->scp_service; -} - -static inline -struct ptlrpc_service_part *nrs_pol2svcpt(struct ptlrpc_nrs_policy *policy) -{ - return policy->pol_nrs->nrs_svcpt; -} - -static inline -struct cfs_cpt_table *nrs_pol2cptab(struct ptlrpc_nrs_policy *policy) -{ - return nrs_pol2svc(policy)->srv_cptable; -} - -static inline struct ptlrpc_nrs_resource * -nrs_request_resource(struct ptlrpc_nrs_request *nrq) -{ - LASSERT(nrq->nr_initialized); - LASSERT(!nrq->nr_finalized); - - return nrq->nr_res_ptrs[nrq->nr_res_idx]; -} - -static inline -struct ptlrpc_nrs_policy *nrs_request_policy(struct ptlrpc_nrs_request *nrq) -{ - return nrs_request_resource(nrq)->res_policy; -} - -#define NRS_LPROCFS_QUANTUM_NAME_REG "reg_quantum:" -#define NRS_LPROCFS_QUANTUM_NAME_HP "hp_quantum:" - -/** - * the maximum size of nrs_crrn_client::cc_quantum and nrs_orr_data::od_quantum. - */ -#define LPROCFS_NRS_QUANTUM_MAX 65535 - -/** - * Max valid command string is the size of the labels, plus "65535" twice, plus - * a separating space character. - */ -#define LPROCFS_NRS_WR_QUANTUM_MAX_CMD \ - sizeof(NRS_LPROCFS_QUANTUM_NAME_REG __stringify(LPROCFS_NRS_QUANTUM_MAX) " " \ - NRS_LPROCFS_QUANTUM_NAME_HP __stringify(LPROCFS_NRS_QUANTUM_MAX)) - -/* ptlrpc/nrs_fifo.c */ -extern struct ptlrpc_nrs_pol_conf nrs_conf_fifo; - -/* recovd_thread.c */ - -int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink); - -/* pers.c */ -void ptlrpc_fill_bulk_md(struct lnet_md *md, struct ptlrpc_bulk_desc *desc, - int mdcnt); - -/* pack_generic.c */ -struct ptlrpc_reply_state * -lustre_get_emerg_rs(struct ptlrpc_service_part *svcpt); -void lustre_put_emerg_rs(struct ptlrpc_reply_state *rs); - -/* pinger.c */ -int ptlrpc_start_pinger(void); -int ptlrpc_stop_pinger(void); -void ptlrpc_pinger_sending_on_import(struct obd_import *imp); -void ptlrpc_pinger_commit_expected(struct obd_import *imp); -void ptlrpc_pinger_wake_up(void); - -/* sec_null.c */ -int sptlrpc_null_init(void); -void sptlrpc_null_fini(void); - -/* sec_plain.c */ -int sptlrpc_plain_init(void); -void sptlrpc_plain_fini(void); - -/* sec_bulk.c */ -int sptlrpc_enc_pool_init(void); -void sptlrpc_enc_pool_fini(void); -int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v); - -/* sec_lproc.c */ -void sptlrpc_lproc_init(void); -void sptlrpc_lproc_fini(void); - -/* sec_gc.c */ -int sptlrpc_gc_init(void); -void sptlrpc_gc_fini(void); - -/* sec_config.c */ -void sptlrpc_conf_choose_flavor(enum lustre_sec_part from, - enum lustre_sec_part to, - struct obd_uuid *target, - lnet_nid_t nid, - struct sptlrpc_flavor *sf); -int sptlrpc_conf_init(void); -void sptlrpc_conf_fini(void); - -/* sec.c */ -int sptlrpc_init(void); -void sptlrpc_fini(void); - -static inline bool ptlrpc_recoverable_error(int rc) -{ - return (rc == -ENOTCONN || rc == -ENODEV); -} - -static inline int tgt_mod_init(void) -{ - return 0; -} - -static inline void tgt_mod_exit(void) -{ - return; -} - -static inline void ptlrpc_reqset_put(struct ptlrpc_request_set *set) -{ - if (atomic_dec_and_test(&set->set_refcount)) - kfree(set); -} - -/** initialise ptlrpc common fields */ -static inline void ptlrpc_req_comm_init(struct ptlrpc_request *req) -{ - spin_lock_init(&req->rq_lock); - atomic_set(&req->rq_refcount, 1); - INIT_LIST_HEAD(&req->rq_list); - INIT_LIST_HEAD(&req->rq_replay_list); -} - -/** initialise client side ptlrpc request */ -static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_req *cr = &req->rq_cli; - - ptlrpc_req_comm_init(req); - - req->rq_receiving_reply = 0; - req->rq_req_unlinked = 1; - req->rq_reply_unlinked = 1; - - req->rq_receiving_reply = 0; - req->rq_req_unlinked = 1; - req->rq_reply_unlinked = 1; - - INIT_LIST_HEAD(&cr->cr_set_chain); - INIT_LIST_HEAD(&cr->cr_ctx_chain); - INIT_LIST_HEAD(&cr->cr_unreplied_list); - init_waitqueue_head(&cr->cr_reply_waitq); - init_waitqueue_head(&cr->cr_set_waitq); -} - -/** initialise server side ptlrpc request */ -static inline void ptlrpc_srv_req_init(struct ptlrpc_request *req) -{ - struct ptlrpc_srv_req *sr = &req->rq_srv; - - ptlrpc_req_comm_init(req); - req->rq_srv_req = 1; - INIT_LIST_HEAD(&sr->sr_exp_list); - INIT_LIST_HEAD(&sr->sr_timed_list); - INIT_LIST_HEAD(&sr->sr_hist_list); -} - -static inline bool ptlrpc_req_is_connect(struct ptlrpc_request *req) -{ - if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CONNECT || - lustre_msg_get_opc(req->rq_reqmsg) == OST_CONNECT || - lustre_msg_get_opc(req->rq_reqmsg) == MGS_CONNECT) - return true; - else - return false; -} - -static inline bool ptlrpc_req_is_disconnect(struct ptlrpc_request *req) -{ - if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_DISCONNECT || - lustre_msg_get_opc(req->rq_reqmsg) == OST_DISCONNECT || - lustre_msg_get_opc(req->rq_reqmsg) == MGS_DISCONNECT) - return true; - else - return false; -} - -#endif /* PTLRPC_INTERNAL_H */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c deleted file mode 100644 index 5c32b657b3b5..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_module.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -extern spinlock_t ptlrpc_last_xid_lock; -#if RS_DEBUG -extern spinlock_t ptlrpc_rs_debug_lock; -#endif - -DEFINE_MUTEX(ptlrpc_startup); -static int ptlrpc_active = 0; - -int ptlrpc_inc_ref(void) -{ - int rc = 0; - - mutex_lock(&ptlrpc_startup); - if (ptlrpc_active++ == 0) { - ptlrpc_put_connection_superhack = ptlrpc_connection_put; - - rc = ptlrpc_init_portals(); - if (!rc) { - rc= ptlrpc_start_pinger(); - if (rc) - ptlrpc_exit_portals(); - } - if (rc) - ptlrpc_active--; - } - mutex_unlock(&ptlrpc_startup); - return rc; -} -EXPORT_SYMBOL(ptlrpc_inc_ref); - -void ptlrpc_dec_ref(void) -{ - mutex_lock(&ptlrpc_startup); - if (--ptlrpc_active == 0) { - ptlrpc_stop_pinger(); - ptlrpc_exit_portals(); - } - mutex_unlock(&ptlrpc_startup); -} -EXPORT_SYMBOL(ptlrpc_dec_ref); - -static int __init ptlrpc_init(void) -{ - int rc, cleanup_phase = 0; - - lustre_assert_wire_constants(); -#if RS_DEBUG - spin_lock_init(&ptlrpc_rs_debug_lock); -#endif - mutex_init(&ptlrpc_all_services_mutex); - mutex_init(&pinger_mutex); - mutex_init(&ptlrpcd_mutex); - ptlrpc_init_xid(); - - rc = libcfs_setup(); - if (rc) - return rc; - - rc = req_layout_init(); - if (rc) - return rc; - - rc = ptlrpc_hr_init(); - if (rc) - return rc; - - cleanup_phase = 1; - rc = ptlrpc_request_cache_init(); - if (rc) - goto cleanup; - - cleanup_phase = 3; - - rc = ptlrpc_connection_init(); - if (rc) - goto cleanup; - - cleanup_phase = 5; - rc = ldlm_init(); - if (rc) - goto cleanup; - - cleanup_phase = 6; - rc = sptlrpc_init(); - if (rc) - goto cleanup; - - cleanup_phase = 7; - rc = ptlrpc_nrs_init(); - if (rc) - goto cleanup; - - cleanup_phase = 8; - rc = tgt_mod_init(); - if (rc) - goto cleanup; - return 0; - -cleanup: - switch (cleanup_phase) { - case 8: - ptlrpc_nrs_fini(); - /* Fall through */ - case 7: - sptlrpc_fini(); - /* Fall through */ - case 6: - ldlm_exit(); - /* Fall through */ - case 5: - ptlrpc_connection_fini(); - /* Fall through */ - case 3: - ptlrpc_request_cache_fini(); - /* Fall through */ - case 1: - ptlrpc_hr_fini(); - req_layout_fini(); - /* Fall through */ - default: - ; - } - - return rc; -} - -static void __exit ptlrpc_exit(void) -{ - tgt_mod_exit(); - ptlrpc_nrs_fini(); - sptlrpc_fini(); - ldlm_exit(); - ptlrpc_request_cache_fini(); - ptlrpc_hr_fini(); - ptlrpc_connection_fini(); -} - -MODULE_AUTHOR("OpenSFS, Inc. "); -MODULE_DESCRIPTION("Lustre Request Processor and Lock Management"); -MODULE_VERSION(LUSTRE_VERSION_STRING); -MODULE_LICENSE("GPL"); - -module_init(ptlrpc_init); -module_exit(ptlrpc_exit); diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c deleted file mode 100644 index 531005411edf..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c +++ /dev/null @@ -1,914 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/ptlrpcd.c - */ - -/** \defgroup ptlrpcd PortalRPC daemon - * - * ptlrpcd is a special thread with its own set where other user might add - * requests when they don't want to wait for their completion. - * PtlRPCD will take care of sending such requests and then processing their - * replies and calling completion callbacks as necessary. - * The callbacks are called directly from ptlrpcd context. - * It is important to never significantly block (esp. on RPCs!) within such - * completion handler or a deadlock might occur where ptlrpcd enters some - * callback that attempts to send another RPC and wait for it to return, - * during which time ptlrpcd is completely blocked, so e.g. if import - * fails, recovery cannot progress because connection requests are also - * sent by ptlrpcd. - * - * @{ - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include -#include -#include -#include /* for obd_zombie */ -#include /* for OBD_FAIL_CHECK */ -#include /* cl_env_{get,put}() */ -#include - -#include "ptlrpc_internal.h" - -/* One of these per CPT. */ -struct ptlrpcd { - int pd_size; - int pd_index; - int pd_cpt; - int pd_cursor; - int pd_nthreads; - int pd_groupsize; - struct ptlrpcd_ctl pd_threads[0]; -}; - -/* - * max_ptlrpcds is obsolete, but retained to ensure that the kernel - * module will load on a system where it has been tuned. - * A value other than 0 implies it was tuned, in which case the value - * is used to derive a setting for ptlrpcd_per_cpt_max. - */ -static int max_ptlrpcds; -module_param(max_ptlrpcds, int, 0644); -MODULE_PARM_DESC(max_ptlrpcds, - "Max ptlrpcd thread count to be started (obsolete)."); - -/* - * ptlrpcd_bind_policy is obsolete, but retained to ensure that - * the kernel module will load on a system where it has been tuned. - * A value other than 0 implies it was tuned, in which case the value - * is used to derive a setting for ptlrpcd_partner_group_size. - */ -static int ptlrpcd_bind_policy; -module_param(ptlrpcd_bind_policy, int, 0644); -MODULE_PARM_DESC(ptlrpcd_bind_policy, - "Ptlrpcd threads binding mode (obsolete)."); - -/* - * ptlrpcd_per_cpt_max: The maximum number of ptlrpcd threads to run - * in a CPT. - */ -static int ptlrpcd_per_cpt_max; -module_param(ptlrpcd_per_cpt_max, int, 0644); -MODULE_PARM_DESC(ptlrpcd_per_cpt_max, - "Max ptlrpcd thread count to be started per CPT."); - -/* - * ptlrpcd_partner_group_size: The desired number of threads in each - * ptlrpcd partner thread group. Default is 2, corresponding to the - * old PDB_POLICY_PAIR. A negative value makes all ptlrpcd threads in - * a CPT partners of each other. - */ -static int ptlrpcd_partner_group_size; -module_param(ptlrpcd_partner_group_size, int, 0644); -MODULE_PARM_DESC(ptlrpcd_partner_group_size, - "Number of ptlrpcd threads in a partner group."); - -/* - * ptlrpcd_cpts: A CPT string describing the CPU partitions that - * ptlrpcd threads should run on. Used to make ptlrpcd threads run on - * a subset of all CPTs. - * - * ptlrpcd_cpts=2 - * ptlrpcd_cpts=[2] - * run ptlrpcd threads only on CPT 2. - * - * ptlrpcd_cpts=0-3 - * ptlrpcd_cpts=[0-3] - * run ptlrpcd threads on CPTs 0, 1, 2, and 3. - * - * ptlrpcd_cpts=[0-3,5,7] - * run ptlrpcd threads on CPTS 0, 1, 2, 3, 5, and 7. - */ -static char *ptlrpcd_cpts; -module_param(ptlrpcd_cpts, charp, 0644); -MODULE_PARM_DESC(ptlrpcd_cpts, - "CPU partitions ptlrpcd threads should run in"); - -/* ptlrpcds_cpt_idx maps cpt numbers to an index in the ptlrpcds array. */ -static int *ptlrpcds_cpt_idx; - -/* ptlrpcds_num is the number of entries in the ptlrpcds array. */ -static int ptlrpcds_num; -static struct ptlrpcd **ptlrpcds; - -/* - * In addition to the regular thread pool above, there is a single - * global recovery thread. Recovery isn't critical for performance, - * and doesn't block, but must always be able to proceed, and it is - * possible that all normal ptlrpcd threads are blocked. Hence the - * need for a dedicated thread. - */ -static struct ptlrpcd_ctl ptlrpcd_rcv; - -struct mutex ptlrpcd_mutex; -static int ptlrpcd_users; - -void ptlrpcd_wake(struct ptlrpc_request *req) -{ - struct ptlrpc_request_set *set = req->rq_set; - - wake_up(&set->set_waitq); -} -EXPORT_SYMBOL(ptlrpcd_wake); - -static struct ptlrpcd_ctl * -ptlrpcd_select_pc(struct ptlrpc_request *req) -{ - struct ptlrpcd *pd; - int cpt; - int idx; - - if (req && req->rq_send_state != LUSTRE_IMP_FULL) - return &ptlrpcd_rcv; - - cpt = cfs_cpt_current(cfs_cpt_tab, 1); - if (!ptlrpcds_cpt_idx) - idx = cpt; - else - idx = ptlrpcds_cpt_idx[cpt]; - pd = ptlrpcds[idx]; - - /* We do not care whether it is strict load balance. */ - idx = pd->pd_cursor; - if (++idx == pd->pd_nthreads) - idx = 0; - pd->pd_cursor = idx; - - return &pd->pd_threads[idx]; -} - -/** - * Return transferred RPCs count. - */ -static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des, - struct ptlrpc_request_set *src) -{ - struct ptlrpc_request *req, *tmp; - int rc = 0; - - spin_lock(&src->set_new_req_lock); - if (likely(!list_empty(&src->set_new_requests))) { - list_for_each_entry_safe(req, tmp, &src->set_new_requests, rq_set_chain) - req->rq_set = des; - - list_splice_init(&src->set_new_requests, &des->set_requests); - rc = atomic_read(&src->set_new_count); - atomic_add(rc, &des->set_remaining); - atomic_set(&src->set_new_count, 0); - } - spin_unlock(&src->set_new_req_lock); - return rc; -} - -/** - * Requests that are added to the ptlrpcd queue are sent via - * ptlrpcd_check->ptlrpc_check_set(). - */ -void ptlrpcd_add_req(struct ptlrpc_request *req) -{ - struct ptlrpcd_ctl *pc; - - if (req->rq_reqmsg) - lustre_msg_set_jobid(req->rq_reqmsg, NULL); - - spin_lock(&req->rq_lock); - if (req->rq_invalid_rqset) { - req->rq_invalid_rqset = 0; - spin_unlock(&req->rq_lock); - if (wait_event_idle_timeout(req->rq_set_waitq, - !req->rq_set, - 5 * HZ) == 0) - wait_event_idle(req->rq_set_waitq, - !req->rq_set); - } else if (req->rq_set) { - /* If we have a valid "rq_set", just reuse it to avoid double - * linked. - */ - LASSERT(req->rq_phase == RQ_PHASE_NEW); - LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY); - - /* ptlrpc_check_set will decrease the count */ - atomic_inc(&req->rq_set->set_remaining); - spin_unlock(&req->rq_lock); - wake_up(&req->rq_set->set_waitq); - return; - } else { - spin_unlock(&req->rq_lock); - } - - pc = ptlrpcd_select_pc(req); - - DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]", - req, pc->pc_name, pc->pc_index); - - ptlrpc_set_add_new_req(pc, req); -} -EXPORT_SYMBOL(ptlrpcd_add_req); - -static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set) -{ - atomic_inc(&set->set_refcount); -} - -/** - * Check if there is more work to do on ptlrpcd set. - * Returns 1 if yes. - */ -static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc) -{ - struct ptlrpc_request *req, *tmp; - struct ptlrpc_request_set *set = pc->pc_set; - int rc = 0; - int rc2; - - if (atomic_read(&set->set_new_count)) { - spin_lock(&set->set_new_req_lock); - if (likely(!list_empty(&set->set_new_requests))) { - list_splice_init(&set->set_new_requests, - &set->set_requests); - atomic_add(atomic_read(&set->set_new_count), - &set->set_remaining); - atomic_set(&set->set_new_count, 0); - /* - * Need to calculate its timeout. - */ - rc = 1; - } - spin_unlock(&set->set_new_req_lock); - } - - /* We should call lu_env_refill() before handling new requests to make - * sure that env key the requests depending on really exists. - */ - rc2 = lu_env_refill(env); - if (rc2 != 0) { - /* - * XXX This is very awkward situation, because - * execution can neither continue (request - * interpreters assume that env is set up), nor repeat - * the loop (as this potentially results in a tight - * loop of -ENOMEM's). - * - * Fortunately, refill only ever does something when - * new modules are loaded, i.e., early during boot up. - */ - CERROR("Failure to refill session: %d\n", rc2); - return rc; - } - - if (atomic_read(&set->set_remaining)) - rc |= ptlrpc_check_set(env, set); - - /* NB: ptlrpc_check_set has already moved completed request at the - * head of seq::set_requests - */ - list_for_each_entry_safe(req, tmp, &set->set_requests, rq_set_chain) { - if (req->rq_phase != RQ_PHASE_COMPLETE) - break; - - list_del_init(&req->rq_set_chain); - req->rq_set = NULL; - ptlrpc_req_finished(req); - } - - if (rc == 0) { - /* - * If new requests have been added, make sure to wake up. - */ - rc = atomic_read(&set->set_new_count); - - /* If we have nothing to do, check whether we can take some - * work from our partner threads. - */ - if (rc == 0 && pc->pc_npartners > 0) { - struct ptlrpcd_ctl *partner; - struct ptlrpc_request_set *ps; - int first = pc->pc_cursor; - - do { - partner = pc->pc_partners[pc->pc_cursor++]; - if (pc->pc_cursor >= pc->pc_npartners) - pc->pc_cursor = 0; - if (!partner) - continue; - - spin_lock(&partner->pc_lock); - ps = partner->pc_set; - if (!ps) { - spin_unlock(&partner->pc_lock); - continue; - } - - ptlrpc_reqset_get(ps); - spin_unlock(&partner->pc_lock); - - if (atomic_read(&ps->set_new_count)) { - rc = ptlrpcd_steal_rqset(set, ps); - if (rc > 0) - CDEBUG(D_RPCTRACE, "transfer %d async RPCs [%d->%d]\n", - rc, partner->pc_index, - pc->pc_index); - } - ptlrpc_reqset_put(ps); - } while (rc == 0 && pc->pc_cursor != first); - } - } - - return rc; -} - -/** - * Main ptlrpcd thread. - * ptlrpc's code paths like to execute in process context, so we have this - * thread which spins on a set which contains the rpcs and sends them. - * - */ -static int ptlrpcd(void *arg) -{ - struct ptlrpcd_ctl *pc = arg; - struct ptlrpc_request_set *set; - struct lu_context ses = { 0 }; - struct lu_env env = { .le_ses = &ses }; - int rc = 0; - int exit = 0; - - unshare_fs_struct(); - if (cfs_cpt_bind(cfs_cpt_tab, pc->pc_cpt) != 0) - CWARN("Failed to bind %s on CPT %d\n", pc->pc_name, pc->pc_cpt); - - /* - * Allocate the request set after the thread has been bound - * above. This is safe because no requests will be queued - * until all ptlrpcd threads have confirmed that they have - * successfully started. - */ - set = ptlrpc_prep_set(); - if (!set) { - rc = -ENOMEM; - goto failed; - } - spin_lock(&pc->pc_lock); - pc->pc_set = set; - spin_unlock(&pc->pc_lock); - /* - * XXX So far only "client" ptlrpcd uses an environment. In - * the future, ptlrpcd thread (or a thread-set) has to given - * an argument, describing its "scope". - */ - rc = lu_context_init(&env.le_ctx, - LCT_CL_THREAD | LCT_REMEMBER | LCT_NOREF); - if (rc == 0) { - rc = lu_context_init(env.le_ses, - LCT_SESSION | LCT_REMEMBER | LCT_NOREF); - if (rc != 0) - lu_context_fini(&env.le_ctx); - } - - if (rc != 0) - goto failed; - - complete(&pc->pc_starting); - - /* - * This mainloop strongly resembles ptlrpc_set_wait() except that our - * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when - * there are requests in the set. New requests come in on the set's - * new_req_list and ptlrpcd_check() moves them into the set. - */ - do { - int timeout; - - timeout = ptlrpc_set_next_timeout(set); - - lu_context_enter(&env.le_ctx); - lu_context_enter(env.le_ses); - if (wait_event_idle_timeout(set->set_waitq, - ptlrpcd_check(&env, pc), - (timeout ? timeout : 1) * HZ) == 0) - ptlrpc_expired_set(set); - - lu_context_exit(&env.le_ctx); - lu_context_exit(env.le_ses); - - /* - * Abort inflight rpcs for forced stop case. - */ - if (test_bit(LIOD_STOP, &pc->pc_flags)) { - if (test_bit(LIOD_FORCE, &pc->pc_flags)) - ptlrpc_abort_set(set); - exit++; - } - - /* - * Let's make one more loop to make sure that ptlrpcd_check() - * copied all raced new rpcs into the set so we can kill them. - */ - } while (exit < 2); - - /* - * Wait for inflight requests to drain. - */ - if (!list_empty(&set->set_requests)) - ptlrpc_set_wait(set); - lu_context_fini(&env.le_ctx); - lu_context_fini(env.le_ses); - - complete(&pc->pc_finishing); - - return 0; -failed: - pc->pc_error = rc; - complete(&pc->pc_starting); - return rc; -} - -static void ptlrpcd_ctl_init(struct ptlrpcd_ctl *pc, int index, int cpt) -{ - pc->pc_index = index; - pc->pc_cpt = cpt; - init_completion(&pc->pc_starting); - init_completion(&pc->pc_finishing); - spin_lock_init(&pc->pc_lock); - - if (index < 0) { - /* Recovery thread. */ - snprintf(pc->pc_name, sizeof(pc->pc_name), "ptlrpcd_rcv"); - } else { - /* Regular thread. */ - snprintf(pc->pc_name, sizeof(pc->pc_name), - "ptlrpcd_%02d_%02d", cpt, index); - } -} - -/* XXX: We want multiple CPU cores to share the async RPC load. So we - * start many ptlrpcd threads. We also want to reduce the ptlrpcd - * overhead caused by data transfer cross-CPU cores. So we bind - * all ptlrpcd threads to a CPT, in the expectation that CPTs - * will be defined in a way that matches these boundaries. Within - * a CPT a ptlrpcd thread can be scheduled on any available core. - * - * Each ptlrpcd thread has its own request queue. This can cause - * response delay if the thread is already busy. To help with - * this we define partner threads: these are other threads bound - * to the same CPT which will check for work in each other's - * request queues if they have no work to do. - * - * The desired number of partner threads can be tuned by setting - * ptlrpcd_partner_group_size. The default is to create pairs of - * partner threads. - */ -static int ptlrpcd_partners(struct ptlrpcd *pd, int index) -{ - struct ptlrpcd_ctl *pc; - struct ptlrpcd_ctl **ppc; - int first; - int i; - int rc = 0; - int size; - - LASSERT(index >= 0 && index < pd->pd_nthreads); - pc = &pd->pd_threads[index]; - pc->pc_npartners = pd->pd_groupsize - 1; - - if (pc->pc_npartners <= 0) - goto out; - - size = sizeof(struct ptlrpcd_ctl *) * pc->pc_npartners; - pc->pc_partners = kzalloc_node(size, GFP_NOFS, - cfs_cpt_spread_node(cfs_cpt_tab, - pc->pc_cpt)); - if (!pc->pc_partners) { - pc->pc_npartners = 0; - rc = -ENOMEM; - goto out; - } - - first = index - index % pd->pd_groupsize; - ppc = pc->pc_partners; - for (i = first; i < first + pd->pd_groupsize; i++) { - if (i != index) - *ppc++ = &pd->pd_threads[i]; - } -out: - return rc; -} - -int ptlrpcd_start(struct ptlrpcd_ctl *pc) -{ - struct task_struct *task; - int rc = 0; - - /* - * Do not allow start second thread for one pc. - */ - if (test_and_set_bit(LIOD_START, &pc->pc_flags)) { - CWARN("Starting second thread (%s) for same pc %p\n", - pc->pc_name, pc); - return 0; - } - - task = kthread_run(ptlrpcd, pc, "%s", pc->pc_name); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - goto out_set; - } - - wait_for_completion(&pc->pc_starting); - rc = pc->pc_error; - if (rc != 0) - goto out_set; - - return 0; - -out_set: - if (pc->pc_set) { - struct ptlrpc_request_set *set = pc->pc_set; - - spin_lock(&pc->pc_lock); - pc->pc_set = NULL; - spin_unlock(&pc->pc_lock); - ptlrpc_set_destroy(set); - } - clear_bit(LIOD_START, &pc->pc_flags); - return rc; -} - -void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force) -{ - if (!test_bit(LIOD_START, &pc->pc_flags)) { - CWARN("Thread for pc %p was not started\n", pc); - return; - } - - set_bit(LIOD_STOP, &pc->pc_flags); - if (force) - set_bit(LIOD_FORCE, &pc->pc_flags); - wake_up(&pc->pc_set->set_waitq); -} - -void ptlrpcd_free(struct ptlrpcd_ctl *pc) -{ - struct ptlrpc_request_set *set = pc->pc_set; - - if (!test_bit(LIOD_START, &pc->pc_flags)) { - CWARN("Thread for pc %p was not started\n", pc); - goto out; - } - - wait_for_completion(&pc->pc_finishing); - - spin_lock(&pc->pc_lock); - pc->pc_set = NULL; - spin_unlock(&pc->pc_lock); - ptlrpc_set_destroy(set); - - clear_bit(LIOD_START, &pc->pc_flags); - clear_bit(LIOD_STOP, &pc->pc_flags); - clear_bit(LIOD_FORCE, &pc->pc_flags); - -out: - if (pc->pc_npartners > 0) { - LASSERT(pc->pc_partners); - - kfree(pc->pc_partners); - pc->pc_partners = NULL; - } - pc->pc_npartners = 0; - pc->pc_error = 0; -} - -static void ptlrpcd_fini(void) -{ - int i; - int j; - - if (ptlrpcds) { - for (i = 0; i < ptlrpcds_num; i++) { - if (!ptlrpcds[i]) - break; - for (j = 0; j < ptlrpcds[i]->pd_nthreads; j++) - ptlrpcd_stop(&ptlrpcds[i]->pd_threads[j], 0); - for (j = 0; j < ptlrpcds[i]->pd_nthreads; j++) - ptlrpcd_free(&ptlrpcds[i]->pd_threads[j]); - kfree(ptlrpcds[i]); - ptlrpcds[i] = NULL; - } - kfree(ptlrpcds); - } - ptlrpcds_num = 0; - - ptlrpcd_stop(&ptlrpcd_rcv, 0); - ptlrpcd_free(&ptlrpcd_rcv); - - kfree(ptlrpcds_cpt_idx); - ptlrpcds_cpt_idx = NULL; -} - -static int ptlrpcd_init(void) -{ - int nthreads; - int groupsize; - int size; - int i; - int j; - int rc = 0; - struct cfs_cpt_table *cptable; - __u32 *cpts = NULL; - int ncpts; - int cpt; - struct ptlrpcd *pd; - - /* - * Determine the CPTs that ptlrpcd threads will run on. - */ - cptable = cfs_cpt_tab; - ncpts = cfs_cpt_number(cptable); - if (ptlrpcd_cpts) { - struct cfs_expr_list *el; - - size = ncpts * sizeof(ptlrpcds_cpt_idx[0]); - ptlrpcds_cpt_idx = kzalloc(size, GFP_KERNEL); - if (!ptlrpcds_cpt_idx) { - rc = -ENOMEM; - goto out; - } - - rc = cfs_expr_list_parse(ptlrpcd_cpts, - strlen(ptlrpcd_cpts), - 0, ncpts - 1, &el); - - if (rc != 0) { - CERROR("ptlrpcd_cpts: invalid CPT pattern string: %s", - ptlrpcd_cpts); - rc = -EINVAL; - goto out; - } - - rc = cfs_expr_list_values(el, ncpts, &cpts); - cfs_expr_list_free(el); - if (rc <= 0) { - CERROR("ptlrpcd_cpts: failed to parse CPT array %s: %d\n", - ptlrpcd_cpts, rc); - if (rc == 0) - rc = -EINVAL; - goto out; - } - - /* - * Create the cpt-to-index map. When there is no match - * in the cpt table, pick a cpt at random. This could - * be changed to take the topology of the system into - * account. - */ - for (cpt = 0; cpt < ncpts; cpt++) { - for (i = 0; i < rc; i++) - if (cpts[i] == cpt) - break; - if (i >= rc) - i = cpt % rc; - ptlrpcds_cpt_idx[cpt] = i; - } - - cfs_expr_list_values_free(cpts, rc); - ncpts = rc; - } - ptlrpcds_num = ncpts; - - size = ncpts * sizeof(ptlrpcds[0]); - ptlrpcds = kzalloc(size, GFP_KERNEL); - if (!ptlrpcds) { - rc = -ENOMEM; - goto out; - } - - /* - * The max_ptlrpcds parameter is obsolete, but do something - * sane if it has been tuned, and complain if - * ptlrpcd_per_cpt_max has also been tuned. - */ - if (max_ptlrpcds != 0) { - CWARN("max_ptlrpcds is obsolete.\n"); - if (ptlrpcd_per_cpt_max == 0) { - ptlrpcd_per_cpt_max = max_ptlrpcds / ncpts; - /* Round up if there is a remainder. */ - if (max_ptlrpcds % ncpts != 0) - ptlrpcd_per_cpt_max++; - CWARN("Setting ptlrpcd_per_cpt_max = %d\n", - ptlrpcd_per_cpt_max); - } else { - CWARN("ptlrpd_per_cpt_max is also set!\n"); - } - } - - /* - * The ptlrpcd_bind_policy parameter is obsolete, but do - * something sane if it has been tuned, and complain if - * ptlrpcd_partner_group_size is also tuned. - */ - if (ptlrpcd_bind_policy != 0) { - CWARN("ptlrpcd_bind_policy is obsolete.\n"); - if (ptlrpcd_partner_group_size == 0) { - switch (ptlrpcd_bind_policy) { - case 1: /* PDB_POLICY_NONE */ - case 2: /* PDB_POLICY_FULL */ - ptlrpcd_partner_group_size = 1; - break; - case 3: /* PDB_POLICY_PAIR */ - ptlrpcd_partner_group_size = 2; - break; - case 4: /* PDB_POLICY_NEIGHBOR */ -#ifdef CONFIG_NUMA - ptlrpcd_partner_group_size = -1; /* CPT */ -#else - ptlrpcd_partner_group_size = 3; /* Triplets */ -#endif - break; - default: /* Illegal value, use the default. */ - ptlrpcd_partner_group_size = 2; - break; - } - CWARN("Setting ptlrpcd_partner_group_size = %d\n", - ptlrpcd_partner_group_size); - } else { - CWARN("ptlrpcd_partner_group_size is also set!\n"); - } - } - - if (ptlrpcd_partner_group_size == 0) - ptlrpcd_partner_group_size = 2; - else if (ptlrpcd_partner_group_size < 0) - ptlrpcd_partner_group_size = -1; - else if (ptlrpcd_per_cpt_max > 0 && - ptlrpcd_partner_group_size > ptlrpcd_per_cpt_max) - ptlrpcd_partner_group_size = ptlrpcd_per_cpt_max; - - /* - * Start the recovery thread first. - */ - set_bit(LIOD_RECOVERY, &ptlrpcd_rcv.pc_flags); - ptlrpcd_ctl_init(&ptlrpcd_rcv, -1, CFS_CPT_ANY); - rc = ptlrpcd_start(&ptlrpcd_rcv); - if (rc < 0) - goto out; - - for (i = 0; i < ncpts; i++) { - if (!cpts) - cpt = i; - else - cpt = cpts[i]; - - nthreads = cfs_cpt_weight(cptable, cpt); - if (ptlrpcd_per_cpt_max > 0 && ptlrpcd_per_cpt_max < nthreads) - nthreads = ptlrpcd_per_cpt_max; - if (nthreads < 2) - nthreads = 2; - - if (ptlrpcd_partner_group_size <= 0) { - groupsize = nthreads; - } else if (nthreads <= ptlrpcd_partner_group_size) { - groupsize = nthreads; - } else { - groupsize = ptlrpcd_partner_group_size; - if (nthreads % groupsize != 0) - nthreads += groupsize - (nthreads % groupsize); - } - - size = offsetof(struct ptlrpcd, pd_threads[nthreads]); - pd = kzalloc_node(size, GFP_NOFS, - cfs_cpt_spread_node(cfs_cpt_tab, cpt)); - if (!pd) { - rc = -ENOMEM; - goto out; - } - pd->pd_size = size; - pd->pd_index = i; - pd->pd_cpt = cpt; - pd->pd_cursor = 0; - pd->pd_nthreads = nthreads; - pd->pd_groupsize = groupsize; - ptlrpcds[i] = pd; - - /* - * The ptlrpcd threads in a partner group can access - * each other's struct ptlrpcd_ctl, so these must be - * initialized before any thread is started. - */ - for (j = 0; j < nthreads; j++) { - ptlrpcd_ctl_init(&pd->pd_threads[j], j, cpt); - rc = ptlrpcd_partners(pd, j); - if (rc < 0) - goto out; - } - - /* XXX: We start nthreads ptlrpc daemons. - * Each of them can process any non-recovery - * async RPC to improve overall async RPC - * efficiency. - * - * But there are some issues with async I/O RPCs - * and async non-I/O RPCs processed in the same - * set under some cases. The ptlrpcd may be - * blocked by some async I/O RPC(s), then will - * cause other async non-I/O RPC(s) can not be - * processed in time. - * - * Maybe we should distinguish blocked async RPCs - * from non-blocked async RPCs, and process them - * in different ptlrpcd sets to avoid unnecessary - * dependency. But how to distribute async RPCs - * load among all the ptlrpc daemons becomes - * another trouble. - */ - for (j = 0; j < nthreads; j++) { - rc = ptlrpcd_start(&pd->pd_threads[j]); - if (rc < 0) - goto out; - } - } -out: - if (rc != 0) - ptlrpcd_fini(); - - return rc; -} - -int ptlrpcd_addref(void) -{ - int rc = 0; - - mutex_lock(&ptlrpcd_mutex); - if (++ptlrpcd_users == 1) { - rc = ptlrpcd_init(); - if (rc < 0) - ptlrpcd_users--; - } - mutex_unlock(&ptlrpcd_mutex); - return rc; -} -EXPORT_SYMBOL(ptlrpcd_addref); - -void ptlrpcd_decref(void) -{ - mutex_lock(&ptlrpcd_mutex); - if (--ptlrpcd_users == 0) - ptlrpcd_fini(); - mutex_unlock(&ptlrpcd_mutex); -} -EXPORT_SYMBOL(ptlrpcd_decref); -/** @} ptlrpcd */ diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c deleted file mode 100644 index 2ea0a7ff87dd..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/recover.c +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/recover.c - * - * Author: Mike Shaver - */ - -#define DEBUG_SUBSYSTEM S_RPC -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -/** - * Start recovery on disconnected import. - * This is done by just attempting a connect - */ -void ptlrpc_initiate_recovery(struct obd_import *imp) -{ - CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd)); - ptlrpc_connect_import(imp); -} - -/** - * Identify what request from replay list needs to be replayed next - * (based on what we have already replayed) and send it to server. - */ -int ptlrpc_replay_next(struct obd_import *imp, int *inflight) -{ - int rc = 0; - struct ptlrpc_request *req = NULL, *pos; - __u64 last_transno; - - *inflight = 0; - - /* It might have committed some after we last spoke, so make sure we - * get rid of them now. - */ - spin_lock(&imp->imp_lock); - imp->imp_last_transno_checked = 0; - ptlrpc_free_committed(imp); - last_transno = imp->imp_last_replay_transno; - - CDEBUG(D_HA, "import %p from %s committed %llu last %llu\n", - imp, obd2cli_tgt(imp->imp_obd), - imp->imp_peer_committed_transno, last_transno); - - /* Replay all the committed open requests on committed_list first */ - if (!list_empty(&imp->imp_committed_list)) { - req = list_last_entry(&imp->imp_committed_list, - struct ptlrpc_request, rq_replay_list); - - /* The last request on committed_list hasn't been replayed */ - if (req->rq_transno > last_transno) { - if (!imp->imp_resend_replay || - imp->imp_replay_cursor == &imp->imp_committed_list) - imp->imp_replay_cursor = imp->imp_replay_cursor->next; - - while (imp->imp_replay_cursor != - &imp->imp_committed_list) { - req = list_entry(imp->imp_replay_cursor, - struct ptlrpc_request, - rq_replay_list); - if (req->rq_transno > last_transno) - break; - - req = NULL; - LASSERT(!list_empty(imp->imp_replay_cursor)); - imp->imp_replay_cursor = - imp->imp_replay_cursor->next; - } - } else { - /* All requests on committed_list have been replayed */ - imp->imp_replay_cursor = &imp->imp_committed_list; - req = NULL; - } - } - - /* All the requests in committed list have been replayed, let's replay - * the imp_replay_list - */ - if (!req) { - struct ptlrpc_request *tmp; - list_for_each_entry_safe(tmp, pos, &imp->imp_replay_list, - rq_replay_list) { - if (tmp->rq_transno > last_transno) { - req = tmp; - break; - } - } - } - - /* If need to resend the last sent transno (because a reconnect - * has occurred), then stop on the matching req and send it again. - * If, however, the last sent transno has been committed then we - * continue replay from the next request. - */ - if (req && imp->imp_resend_replay) - lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); - - /* The resend replay request may have been removed from the - * unreplied list. - */ - if (req && imp->imp_resend_replay && - list_empty(&req->rq_unreplied_list)) { - ptlrpc_add_unreplied(req); - imp->imp_known_replied_xid = ptlrpc_known_replied_xid(imp); - } - - imp->imp_resend_replay = 0; - spin_unlock(&imp->imp_lock); - - if (req) { - /* The request should have been added back in unreplied list - * by ptlrpc_prepare_replay(). - */ - LASSERT(!list_empty(&req->rq_unreplied_list)); - - rc = ptlrpc_replay_req(req); - if (rc) { - CERROR("recovery replay error %d for req %llu\n", - rc, req->rq_xid); - return rc; - } - *inflight = 1; - } - return rc; -} - -/** - * Schedule resending of request on sending_list. This is done after - * we completed replaying of requests and locks. - */ -int ptlrpc_resend(struct obd_import *imp) -{ - struct ptlrpc_request *req, *next; - - /* As long as we're in recovery, nothing should be added to the sending - * list, so we don't need to hold the lock during this iteration and - * resend process. - */ - /* Well... what if lctl recover is called twice at the same time? - */ - spin_lock(&imp->imp_lock); - if (imp->imp_state != LUSTRE_IMP_RECOVER) { - spin_unlock(&imp->imp_lock); - return -1; - } - - list_for_each_entry_safe(req, next, &imp->imp_sending_list, rq_list) { - LASSERTF((long)req > PAGE_SIZE && req != LP_POISON, - "req %p bad\n", req); - LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); - - /* - * If the request is allowed to be sent during replay and it - * is not timeout yet, then it does not need to be resent. - */ - if (!ptlrpc_no_resend(req) && - (req->rq_timedout || !req->rq_allow_replay)) - ptlrpc_resend_req(req); - } - spin_unlock(&imp->imp_lock); - - OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT, 2); - return 0; -} - -/** - * Go through all requests in delayed list and wake their threads - * for resending - */ -void ptlrpc_wake_delayed(struct obd_import *imp) -{ - struct ptlrpc_request *req, *pos; - - spin_lock(&imp->imp_lock); - list_for_each_entry_safe(req, pos, &imp->imp_delayed_list, rq_list) { - DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); - ptlrpc_client_wake_req(req); - } - spin_unlock(&imp->imp_lock); -} - -void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) -{ - struct obd_import *imp = failed_req->rq_import; - - CDEBUG(D_HA, "import %s of %s@%s abruptly disconnected: reconnecting\n", - imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid); - - if (ptlrpc_set_import_discon(imp, - lustre_msg_get_conn_cnt(failed_req->rq_reqmsg))) { - if (!imp->imp_replayable) { - CDEBUG(D_HA, "import %s@%s for %s not replayable, auto-deactivating\n", - obd2cli_tgt(imp->imp_obd), - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_obd->obd_name); - ptlrpc_deactivate_import(imp); - } - /* to control recovery via lctl {disable|enable}_recovery */ - if (imp->imp_deactive == 0) - ptlrpc_connect_import(imp); - } - - /* Wait for recovery to complete and resend. If evicted, then - * this request will be errored out later. - */ - spin_lock(&failed_req->rq_lock); - if (!failed_req->rq_no_resend) - failed_req->rq_resend = 1; - spin_unlock(&failed_req->rq_lock); -} - -/** - * Administratively active/deactive a client. - * This should only be called by the ioctl interface, currently - * - the lctl deactivate and activate commands - * - echo 0/1 >> /sys/fs/lustre/osc/XXX/active - * - client umount -f (ll_umount_begin) - */ -int ptlrpc_set_import_active(struct obd_import *imp, int active) -{ - struct obd_device *obd = imp->imp_obd; - int rc = 0; - - LASSERT(obd); - - /* When deactivating, mark import invalid, and abort in-flight - * requests. - */ - if (!active) { - LCONSOLE_WARN("setting import %s INACTIVE by administrator request\n", - obd2cli_tgt(imp->imp_obd)); - - /* set before invalidate to avoid messages about imp_inval - * set without imp_deactive in ptlrpc_import_delay_req - */ - spin_lock(&imp->imp_lock); - imp->imp_deactive = 1; - spin_unlock(&imp->imp_lock); - - obd_import_event(imp->imp_obd, imp, IMP_EVENT_DEACTIVATE); - - ptlrpc_invalidate_import(imp); - } - - /* When activating, mark import valid, and attempt recovery */ - if (active) { - CDEBUG(D_HA, "setting import %s VALID\n", - obd2cli_tgt(imp->imp_obd)); - - spin_lock(&imp->imp_lock); - imp->imp_deactive = 0; - spin_unlock(&imp->imp_lock); - obd_import_event(imp->imp_obd, imp, IMP_EVENT_ACTIVATE); - - rc = ptlrpc_recover_import(imp, NULL, 0); - } - - return rc; -} -EXPORT_SYMBOL(ptlrpc_set_import_active); - -/* Attempt to reconnect an import */ -int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async) -{ - int rc = 0; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_NEW || imp->imp_deactive || - atomic_read(&imp->imp_inval_count)) - rc = -EINVAL; - spin_unlock(&imp->imp_lock); - if (rc) - goto out; - - /* force import to be disconnected. */ - ptlrpc_set_import_discon(imp, 0); - - if (new_uuid) { - struct obd_uuid uuid; - - /* intruct import to use new uuid */ - obd_str2uuid(&uuid, new_uuid); - rc = import_set_conn_priority(imp, &uuid); - if (rc) - goto out; - } - - /* Check if reconnect is already in progress */ - spin_lock(&imp->imp_lock); - if (imp->imp_state != LUSTRE_IMP_DISCON) { - imp->imp_force_verify = 1; - rc = -EALREADY; - } - spin_unlock(&imp->imp_lock); - if (rc) - goto out; - - rc = ptlrpc_connect_import(imp); - if (rc) - goto out; - - if (!async) { - CDEBUG(D_HA, "%s: recovery started, waiting %u seconds\n", - obd2cli_tgt(imp->imp_obd), obd_timeout); - - rc = wait_event_idle_timeout(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), - obd_timeout * HZ); - CDEBUG(D_HA, "%s: recovery finished\n", - obd2cli_tgt(imp->imp_obd)); - rc = rc ? 0 : -ETIMEDOUT; - } - -out: - return rc; -} -EXPORT_SYMBOL(ptlrpc_recover_import); - -int ptlrpc_import_in_recovery(struct obd_import *imp) -{ - int in_recovery = 1; - - spin_lock(&imp->imp_lock); - if (imp->imp_state == LUSTRE_IMP_FULL || - imp->imp_state == LUSTRE_IMP_CLOSED || - imp->imp_state == LUSTRE_IMP_DISCON || - imp->imp_obd->obd_no_recov) - in_recovery = 0; - spin_unlock(&imp->imp_lock); - - return in_recovery; -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c deleted file mode 100644 index e193f3346e6f..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ /dev/null @@ -1,2379 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -/*********************************************** - * policy registers * - ***********************************************/ - -static rwlock_t policy_lock; -static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = { - NULL, -}; - -int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy) -{ - __u16 number = policy->sp_policy; - - LASSERT(policy->sp_name); - LASSERT(policy->sp_cops); - LASSERT(policy->sp_sops); - - if (number >= SPTLRPC_POLICY_MAX) - return -EINVAL; - - write_lock(&policy_lock); - if (unlikely(policies[number])) { - write_unlock(&policy_lock); - return -EALREADY; - } - policies[number] = policy; - write_unlock(&policy_lock); - - CDEBUG(D_SEC, "%s: registered\n", policy->sp_name); - return 0; -} -EXPORT_SYMBOL(sptlrpc_register_policy); - -int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy) -{ - __u16 number = policy->sp_policy; - - LASSERT(number < SPTLRPC_POLICY_MAX); - - write_lock(&policy_lock); - if (unlikely(!policies[number])) { - write_unlock(&policy_lock); - CERROR("%s: already unregistered\n", policy->sp_name); - return -EINVAL; - } - - LASSERT(policies[number] == policy); - policies[number] = NULL; - write_unlock(&policy_lock); - - CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name); - return 0; -} -EXPORT_SYMBOL(sptlrpc_unregister_policy); - -static -struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor) -{ - static DEFINE_MUTEX(load_mutex); - static atomic_t loaded = ATOMIC_INIT(0); - struct ptlrpc_sec_policy *policy; - __u16 number = SPTLRPC_FLVR_POLICY(flavor); - __u16 flag = 0; - - if (number >= SPTLRPC_POLICY_MAX) - return NULL; - - while (1) { - read_lock(&policy_lock); - policy = policies[number]; - if (policy && !try_module_get(policy->sp_owner)) - policy = NULL; - if (!policy) - flag = atomic_read(&loaded); - read_unlock(&policy_lock); - - if (policy || flag != 0 || - number != SPTLRPC_POLICY_GSS) - break; - - /* try to load gss module, once */ - mutex_lock(&load_mutex); - if (atomic_read(&loaded) == 0) { - if (request_module("ptlrpc_gss") == 0) - CDEBUG(D_SEC, - "module ptlrpc_gss loaded on demand\n"); - else - CERROR("Unable to load module ptlrpc_gss\n"); - - atomic_set(&loaded, 1); - } - mutex_unlock(&load_mutex); - } - - return policy; -} - -__u32 sptlrpc_name2flavor_base(const char *name) -{ - if (!strcmp(name, "null")) - return SPTLRPC_FLVR_NULL; - if (!strcmp(name, "plain")) - return SPTLRPC_FLVR_PLAIN; - if (!strcmp(name, "krb5n")) - return SPTLRPC_FLVR_KRB5N; - if (!strcmp(name, "krb5a")) - return SPTLRPC_FLVR_KRB5A; - if (!strcmp(name, "krb5i")) - return SPTLRPC_FLVR_KRB5I; - if (!strcmp(name, "krb5p")) - return SPTLRPC_FLVR_KRB5P; - - return SPTLRPC_FLVR_INVALID; -} -EXPORT_SYMBOL(sptlrpc_name2flavor_base); - -const char *sptlrpc_flavor2name_base(__u32 flvr) -{ - __u32 base = SPTLRPC_FLVR_BASE(flvr); - - if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) - return "null"; - else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN)) - return "plain"; - else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N)) - return "krb5n"; - else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A)) - return "krb5a"; - else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I)) - return "krb5i"; - else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P)) - return "krb5p"; - - CERROR("invalid wire flavor 0x%x\n", flvr); - return "invalid"; -} -EXPORT_SYMBOL(sptlrpc_flavor2name_base); - -char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf, - char *buf, int bufsize) -{ - if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) - snprintf(buf, bufsize, "hash:%s", - sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg)); - else - snprintf(buf, bufsize, "%s", - sptlrpc_flavor2name_base(sf->sf_rpc)); - - buf[bufsize - 1] = '\0'; - return buf; -} -EXPORT_SYMBOL(sptlrpc_flavor2name_bulk); - -char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize) -{ - strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize); - - /* - * currently we don't support customized bulk specification for - * flavors other than plain - */ - if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) { - char bspec[16]; - - bspec[0] = '-'; - sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1); - strlcat(buf, bspec, bufsize); - } - - return buf; -} -EXPORT_SYMBOL(sptlrpc_flavor2name); - -static char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize) -{ - buf[0] = '\0'; - - if (flags & PTLRPC_SEC_FL_REVERSE) - strlcat(buf, "reverse,", bufsize); - if (flags & PTLRPC_SEC_FL_ROOTONLY) - strlcat(buf, "rootonly,", bufsize); - if (flags & PTLRPC_SEC_FL_UDESC) - strlcat(buf, "udesc,", bufsize); - if (flags & PTLRPC_SEC_FL_BULK) - strlcat(buf, "bulk,", bufsize); - if (buf[0] == '\0') - strlcat(buf, "-,", bufsize); - - return buf; -} - -/************************************************** - * client context APIs * - **************************************************/ - -static -struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec) -{ - struct vfs_cred vcred; - int create = 1, remove_dead = 1; - - LASSERT(sec); - LASSERT(sec->ps_policy->sp_cops->lookup_ctx); - - if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE | - PTLRPC_SEC_FL_ROOTONLY)) { - vcred.vc_uid = 0; - vcred.vc_gid = 0; - if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) { - create = 0; - remove_dead = 0; - } - } else { - vcred.vc_uid = from_kuid(&init_user_ns, current_uid()); - vcred.vc_gid = from_kgid(&init_user_ns, current_gid()); - } - - return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred, - create, remove_dead); -} - -struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx) -{ - atomic_inc(&ctx->cc_refcount); - return ctx; -} -EXPORT_SYMBOL(sptlrpc_cli_ctx_get); - -void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync) -{ - struct ptlrpc_sec *sec = ctx->cc_sec; - - LASSERT(sec); - LASSERT_ATOMIC_POS(&ctx->cc_refcount); - - if (!atomic_dec_and_test(&ctx->cc_refcount)) - return; - - sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync); -} -EXPORT_SYMBOL(sptlrpc_cli_ctx_put); - -static int import_sec_check_expire(struct obd_import *imp) -{ - int adapt = 0; - - spin_lock(&imp->imp_lock); - if (imp->imp_sec_expire && - imp->imp_sec_expire < ktime_get_real_seconds()) { - adapt = 1; - imp->imp_sec_expire = 0; - } - spin_unlock(&imp->imp_lock); - - if (!adapt) - return 0; - - CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n"); - return sptlrpc_import_sec_adapt(imp, NULL, NULL); -} - -/** - * Get and validate the client side ptlrpc security facilities from - * \a imp. There is a race condition on client reconnect when the import is - * being destroyed while there are outstanding client bound requests. In - * this case do not output any error messages if import secuity is not - * found. - * - * \param[in] imp obd import associated with client - * \param[out] sec client side ptlrpc security - * - * \retval 0 if security retrieved successfully - * \retval -ve errno if there was a problem - */ -static int import_sec_validate_get(struct obd_import *imp, - struct ptlrpc_sec **sec) -{ - int rc; - - if (unlikely(imp->imp_sec_expire)) { - rc = import_sec_check_expire(imp); - if (rc) - return rc; - } - - *sec = sptlrpc_import_sec_ref(imp); - if (!*sec) { - CERROR("import %p (%s) with no sec\n", - imp, ptlrpc_import_state_name(imp->imp_state)); - return -EACCES; - } - - if (unlikely((*sec)->ps_dying)) { - CERROR("attempt to use dying sec %p\n", sec); - sptlrpc_sec_put(*sec); - return -EACCES; - } - - return 0; -} - -/** - * Given a \a req, find or allocate a appropriate context for it. - * \pre req->rq_cli_ctx == NULL. - * - * \retval 0 succeed, and req->rq_cli_ctx is set. - * \retval -ev error number, and req->rq_cli_ctx == NULL. - */ -int sptlrpc_req_get_ctx(struct ptlrpc_request *req) -{ - struct obd_import *imp = req->rq_import; - struct ptlrpc_sec *sec; - int rc; - - LASSERT(!req->rq_cli_ctx); - LASSERT(imp); - - rc = import_sec_validate_get(imp, &sec); - if (rc) - return rc; - - req->rq_cli_ctx = get_my_ctx(sec); - - sptlrpc_sec_put(sec); - - if (!req->rq_cli_ctx) { - CERROR("req %p: fail to get context\n", req); - return -ECONNREFUSED; - } - - return 0; -} - -/** - * Drop the context for \a req. - * \pre req->rq_cli_ctx != NULL. - * \post req->rq_cli_ctx == NULL. - * - * If \a sync == 0, this function should return quickly without sleep; - * otherwise it might trigger and wait for the whole process of sending - * an context-destroying rpc to server. - */ -void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync) -{ - LASSERT(req); - LASSERT(req->rq_cli_ctx); - - /* request might be asked to release earlier while still - * in the context waiting list. - */ - if (!list_empty(&req->rq_ctx_chain)) { - spin_lock(&req->rq_cli_ctx->cc_lock); - list_del_init(&req->rq_ctx_chain); - spin_unlock(&req->rq_cli_ctx->cc_lock); - } - - sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync); - req->rq_cli_ctx = NULL; -} - -static -int sptlrpc_req_ctx_switch(struct ptlrpc_request *req, - struct ptlrpc_cli_ctx *oldctx, - struct ptlrpc_cli_ctx *newctx) -{ - struct sptlrpc_flavor old_flvr; - char *reqmsg = NULL; /* to workaround old gcc */ - int reqmsg_size; - int rc = 0; - - LASSERT(req->rq_reqmsg); - LASSERT(req->rq_reqlen); - LASSERT(req->rq_replen); - - CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n", - req, - oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec), - newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec), - oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name, - newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name); - - /* save flavor */ - old_flvr = req->rq_flvr; - - /* save request message */ - reqmsg_size = req->rq_reqlen; - if (reqmsg_size != 0) { - reqmsg = kvzalloc(reqmsg_size, GFP_NOFS); - if (!reqmsg) - return -ENOMEM; - memcpy(reqmsg, req->rq_reqmsg, reqmsg_size); - } - - /* release old req/rep buf */ - req->rq_cli_ctx = oldctx; - sptlrpc_cli_free_reqbuf(req); - sptlrpc_cli_free_repbuf(req); - req->rq_cli_ctx = newctx; - - /* recalculate the flavor */ - sptlrpc_req_set_flavor(req, 0); - - /* alloc new request buffer - * we don't need to alloc reply buffer here, leave it to the - * rest procedure of ptlrpc - */ - if (reqmsg_size != 0) { - rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size); - if (!rc) { - LASSERT(req->rq_reqmsg); - memcpy(req->rq_reqmsg, reqmsg, reqmsg_size); - } else { - CWARN("failed to alloc reqbuf: %d\n", rc); - req->rq_flvr = old_flvr; - } - - kvfree(reqmsg); - } - return rc; -} - -/** - * If current context of \a req is dead somehow, e.g. we just switched flavor - * thus marked original contexts dead, we'll find a new context for it. if - * no switch is needed, \a req will end up with the same context. - * - * \note a request must have a context, to keep other parts of code happy. - * In any case of failure during the switching, we must restore the old one. - */ -static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx; - struct ptlrpc_cli_ctx *newctx; - int rc; - - LASSERT(oldctx); - - sptlrpc_cli_ctx_get(oldctx); - sptlrpc_req_put_ctx(req, 0); - - rc = sptlrpc_req_get_ctx(req); - if (unlikely(rc)) { - LASSERT(!req->rq_cli_ctx); - - /* restore old ctx */ - req->rq_cli_ctx = oldctx; - return rc; - } - - newctx = req->rq_cli_ctx; - LASSERT(newctx); - - if (unlikely(newctx == oldctx && - test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) { - /* - * still get the old dead ctx, usually means system too busy - */ - CDEBUG(D_SEC, - "ctx (%p, fl %lx) doesn't switch, relax a little bit\n", - newctx, newctx->cc_flags); - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC)); - } else if (unlikely(!test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags))) { - /* - * new ctx not up to date yet - */ - CDEBUG(D_SEC, - "ctx (%p, fl %lx) doesn't switch, not up to date yet\n", - newctx, newctx->cc_flags); - } else { - /* - * it's possible newctx == oldctx if we're switching - * subflavor with the same sec. - */ - rc = sptlrpc_req_ctx_switch(req, oldctx, newctx); - if (rc) { - /* restore old ctx */ - sptlrpc_req_put_ctx(req, 0); - req->rq_cli_ctx = oldctx; - return rc; - } - - LASSERT(req->rq_cli_ctx == newctx); - } - - sptlrpc_cli_ctx_put(oldctx, 1); - return 0; -} - -static -int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) -{ - if (cli_ctx_is_refreshed(ctx)) - return 1; - return 0; -} - -static -int ctx_refresh_timeout(struct ptlrpc_request *req) -{ - int rc; - - /* conn_cnt is needed in expire_one_request */ - lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); - - rc = ptlrpc_expire_one_request(req, 1); - /* if we started recovery, we should mark this ctx dead; otherwise - * in case of lgssd died nobody would retire this ctx, following - * connecting will still find the same ctx thus cause deadlock. - * there's an assumption that expire time of the request should be - * later than the context refresh expire time. - */ - if (rc == 0) - req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0); - return rc; -} - -static -void ctx_refresh_interrupt(struct ptlrpc_request *req) -{ - spin_lock(&req->rq_lock); - req->rq_intr = 1; - spin_unlock(&req->rq_lock); -} - -static -void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx) -{ - spin_lock(&ctx->cc_lock); - if (!list_empty(&req->rq_ctx_chain)) - list_del_init(&req->rq_ctx_chain); - spin_unlock(&ctx->cc_lock); -} - -/** - * To refresh the context of \req, if it's not up-to-date. - * \param timeout - * - < 0: don't wait - * - = 0: wait until success or fatal error occur - * - > 0: timeout value (in seconds) - * - * The status of the context could be subject to be changed by other threads - * at any time. We allow this race, but once we return with 0, the caller will - * suppose it's uptodated and keep using it until the owning rpc is done. - * - * \retval 0 only if the context is uptodated. - * \retval -ev error number. - */ -int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec *sec; - int rc; - - LASSERT(ctx); - - if (req->rq_ctx_init || req->rq_ctx_fini) - return 0; - - /* - * during the process a request's context might change type even - * (e.g. from gss ctx to null ctx), so each loop we need to re-check - * everything - */ -again: - rc = import_sec_validate_get(req->rq_import, &sec); - if (rc) - return rc; - - if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) { - CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n", - req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc); - req_off_ctx_list(req, ctx); - sptlrpc_req_replace_dead_ctx(req); - ctx = req->rq_cli_ctx; - } - sptlrpc_sec_put(sec); - - if (cli_ctx_is_eternal(ctx)) - return 0; - - if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) { - LASSERT(ctx->cc_ops->refresh); - ctx->cc_ops->refresh(ctx); - } - LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0); - - LASSERT(ctx->cc_ops->validate); - if (ctx->cc_ops->validate(ctx) == 0) { - req_off_ctx_list(req, ctx); - return 0; - } - - if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) { - spin_lock(&req->rq_lock); - req->rq_err = 1; - spin_unlock(&req->rq_lock); - req_off_ctx_list(req, ctx); - return -EPERM; - } - - /* - * There's a subtle issue for resending RPCs, suppose following - * situation: - * 1. the request was sent to server. - * 2. recovery was kicked start, after finished the request was - * marked as resent. - * 3. resend the request. - * 4. old reply from server received, we accept and verify the reply. - * this has to be success, otherwise the error will be aware - * by application. - * 5. new reply from server received, dropped by LNet. - * - * Note the xid of old & new request is the same. We can't simply - * change xid for the resent request because the server replies on - * it for reply reconstruction. - * - * Commonly the original context should be uptodate because we - * have a expiry nice time; server will keep its context because - * we at least hold a ref of old context which prevent context - * destroying RPC being sent. So server still can accept the request - * and finish the RPC. But if that's not the case: - * 1. If server side context has been trimmed, a NO_CONTEXT will - * be returned, gss_cli_ctx_verify/unseal will switch to new - * context by force. - * 2. Current context never be refreshed, then we are fine: we - * never really send request with old context before. - */ - if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) && - unlikely(req->rq_reqmsg) && - lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { - req_off_ctx_list(req, ctx); - return 0; - } - - if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { - req_off_ctx_list(req, ctx); - /* - * don't switch ctx if import was deactivated - */ - if (req->rq_import->imp_deactive) { - spin_lock(&req->rq_lock); - req->rq_err = 1; - spin_unlock(&req->rq_lock); - return -EINTR; - } - - rc = sptlrpc_req_replace_dead_ctx(req); - if (rc) { - LASSERT(ctx == req->rq_cli_ctx); - CERROR("req %p: failed to replace dead ctx %p: %d\n", - req, ctx, rc); - spin_lock(&req->rq_lock); - req->rq_err = 1; - spin_unlock(&req->rq_lock); - return rc; - } - - ctx = req->rq_cli_ctx; - goto again; - } - - /* - * Now we're sure this context is during upcall, add myself into - * waiting list - */ - spin_lock(&ctx->cc_lock); - if (list_empty(&req->rq_ctx_chain)) - list_add(&req->rq_ctx_chain, &ctx->cc_req_list); - spin_unlock(&ctx->cc_lock); - - if (timeout < 0) - return -EWOULDBLOCK; - - /* Clear any flags that may be present from previous sends */ - LASSERT(req->rq_receiving_reply == 0); - spin_lock(&req->rq_lock); - req->rq_err = 0; - req->rq_timedout = 0; - req->rq_resend = 0; - req->rq_restart = 0; - spin_unlock(&req->rq_lock); - - rc = wait_event_idle_timeout(req->rq_reply_waitq, - ctx_check_refresh(ctx), - timeout * HZ); - if (rc == 0 && ctx_refresh_timeout(req) == 0) { - /* Keep waiting, but enable some signals */ - rc = l_wait_event_abortable(req->rq_reply_waitq, - ctx_check_refresh(ctx)); - if (rc == 0) - rc = 1; - } - - if (rc > 0) - /* condition is true */ - rc = 0; - else if (rc == 0) - /* Timed out */ - rc = -ETIMEDOUT; - else { - /* Aborted by signal */ - rc = -EINTR; - ctx_refresh_interrupt(req); - } - - /* - * following cases could lead us here: - * - successfully refreshed; - * - interrupted; - * - timedout, and we don't want recover from the failure; - * - timedout, and waked up upon recovery finished; - * - someone else mark this ctx dead by force; - * - someone invalidate the req and call ptlrpc_client_wake_req(), - * e.g. ptlrpc_abort_inflight(); - */ - if (!cli_ctx_is_refreshed(ctx)) { - /* timed out or interrupted */ - req_off_ctx_list(req, ctx); - - LASSERT(rc != 0); - return rc; - } - - goto again; -} - -/** - * Initialize flavor settings for \a req, according to \a opcode. - * - * \note this could be called in two situations: - * - new request from ptlrpc_pre_req(), with proper @opcode - * - old request which changed ctx in the middle, with @opcode == 0 - */ -void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode) -{ - struct ptlrpc_sec *sec; - - LASSERT(req->rq_import); - LASSERT(req->rq_cli_ctx); - LASSERT(req->rq_cli_ctx->cc_sec); - LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0); - - /* special security flags according to opcode */ - switch (opcode) { - case OST_READ: - case MDS_READPAGE: - case MGS_CONFIG_READ: - case OBD_IDX_READ: - req->rq_bulk_read = 1; - break; - case OST_WRITE: - case MDS_WRITEPAGE: - req->rq_bulk_write = 1; - break; - case SEC_CTX_INIT: - req->rq_ctx_init = 1; - break; - case SEC_CTX_FINI: - req->rq_ctx_fini = 1; - break; - case 0: - /* init/fini rpc won't be resend, so can't be here */ - LASSERT(req->rq_ctx_init == 0); - LASSERT(req->rq_ctx_fini == 0); - - /* cleanup flags, which should be recalculated */ - req->rq_pack_udesc = 0; - req->rq_pack_bulk = 0; - break; - } - - sec = req->rq_cli_ctx->cc_sec; - - spin_lock(&sec->ps_lock); - req->rq_flvr = sec->ps_flvr; - spin_unlock(&sec->ps_lock); - - /* force SVC_NULL for context initiation rpc, SVC_INTG for context - * destruction rpc - */ - if (unlikely(req->rq_ctx_init)) - flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL); - else if (unlikely(req->rq_ctx_fini)) - flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG); - - /* user descriptor flag, null security can't do it anyway */ - if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) && - (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL)) - req->rq_pack_udesc = 1; - - /* bulk security flag */ - if ((req->rq_bulk_read || req->rq_bulk_write) && - sptlrpc_flavor_has_bulk(&req->rq_flvr)) - req->rq_pack_bulk = 1; -} - -void sptlrpc_request_out_callback(struct ptlrpc_request *req) -{ - if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV) - return; - - LASSERT(req->rq_clrbuf); - if (req->rq_pool || !req->rq_reqbuf) - return; - - kvfree(req->rq_reqbuf); - req->rq_reqbuf = NULL; - req->rq_reqbuf_len = 0; -} - -/** - * Given an import \a imp, check whether current user has a valid context - * or not. We may create a new context and try to refresh it, and try - * repeatedly try in case of non-fatal errors. Return 0 means success. - */ -int sptlrpc_import_check_ctx(struct obd_import *imp) -{ - struct ptlrpc_sec *sec; - struct ptlrpc_cli_ctx *ctx; - struct ptlrpc_request *req = NULL; - int rc; - - might_sleep(); - - sec = sptlrpc_import_sec_ref(imp); - ctx = get_my_ctx(sec); - sptlrpc_sec_put(sec); - - if (!ctx) - return -ENOMEM; - - if (cli_ctx_is_eternal(ctx) || - ctx->cc_ops->validate(ctx) == 0) { - sptlrpc_cli_ctx_put(ctx, 1); - return 0; - } - - if (cli_ctx_is_error(ctx)) { - sptlrpc_cli_ctx_put(ctx, 1); - return -EACCES; - } - - req = ptlrpc_request_cache_alloc(GFP_NOFS); - if (!req) - return -ENOMEM; - - ptlrpc_cli_req_init(req); - atomic_set(&req->rq_refcount, 10000); - - req->rq_import = imp; - req->rq_flvr = sec->ps_flvr; - req->rq_cli_ctx = ctx; - - rc = sptlrpc_req_refresh_ctx(req, 0); - LASSERT(list_empty(&req->rq_ctx_chain)); - sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1); - ptlrpc_request_cache_free(req); - - return rc; -} - -/** - * Used by ptlrpc client, to perform the pre-defined security transformation - * upon the request message of \a req. After this function called, - * req->rq_reqmsg is still accessible as clear text. - */ -int sptlrpc_cli_wrap_request(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - int rc = 0; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(req->rq_reqbuf || req->rq_clrbuf); - - /* we wrap bulk request here because now we can be sure - * the context is uptodate. - */ - if (req->rq_bulk) { - rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk); - if (rc) - return rc; - } - - switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { - case SPTLRPC_SVC_NULL: - case SPTLRPC_SVC_AUTH: - case SPTLRPC_SVC_INTG: - LASSERT(ctx->cc_ops->sign); - rc = ctx->cc_ops->sign(ctx, req); - break; - case SPTLRPC_SVC_PRIV: - LASSERT(ctx->cc_ops->seal); - rc = ctx->cc_ops->seal(ctx, req); - break; - default: - LBUG(); - } - - if (rc == 0) { - LASSERT(req->rq_reqdata_len); - LASSERT(req->rq_reqdata_len % 8 == 0); - LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len); - } - - return rc; -} - -static int do_cli_unwrap_reply(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - int rc; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(req->rq_repbuf); - LASSERT(req->rq_repdata); - LASSERT(!req->rq_repmsg); - - req->rq_rep_swab_mask = 0; - - rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len); - switch (rc) { - case 1: - lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF); - case 0: - break; - default: - CERROR("failed unpack reply: x%llu\n", req->rq_xid); - return -EPROTO; - } - - if (req->rq_repdata_len < sizeof(struct lustre_msg)) { - CERROR("replied data length %d too small\n", - req->rq_repdata_len); - return -EPROTO; - } - - if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) != - SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) { - CERROR("reply policy %u doesn't match request policy %u\n", - SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr), - SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)); - return -EPROTO; - } - - switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) { - case SPTLRPC_SVC_NULL: - case SPTLRPC_SVC_AUTH: - case SPTLRPC_SVC_INTG: - LASSERT(ctx->cc_ops->verify); - rc = ctx->cc_ops->verify(ctx, req); - break; - case SPTLRPC_SVC_PRIV: - LASSERT(ctx->cc_ops->unseal); - rc = ctx->cc_ops->unseal(ctx, req); - break; - default: - LBUG(); - } - LASSERT(rc || req->rq_repmsg || req->rq_resend); - - if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL && - !req->rq_ctx_init) - req->rq_rep_swab_mask = 0; - return rc; -} - -/** - * Used by ptlrpc client, to perform security transformation upon the reply - * message of \a req. After return successfully, req->rq_repmsg points to - * the reply message in clear text. - * - * \pre the reply buffer should have been un-posted from LNet, so nothing is - * going to change. - */ -int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req) -{ - LASSERT(req->rq_repbuf); - LASSERT(!req->rq_repdata); - LASSERT(!req->rq_repmsg); - LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len); - - if (req->rq_reply_off == 0 && - (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { - CERROR("real reply with offset 0\n"); - return -EPROTO; - } - - if (req->rq_reply_off % 8 != 0) { - CERROR("reply at odd offset %u\n", req->rq_reply_off); - return -EPROTO; - } - - req->rq_repdata = (struct lustre_msg *) - (req->rq_repbuf + req->rq_reply_off); - req->rq_repdata_len = req->rq_nob_received; - - return do_cli_unwrap_reply(req); -} - -/** - * Used by ptlrpc client, to perform security transformation upon the early - * reply message of \a req. We expect the rq_reply_off is 0, and - * rq_nob_received is the early reply size. - * - * Because the receive buffer might be still posted, the reply data might be - * changed at any time, no matter we're holding rq_lock or not. For this reason - * we allocate a separate ptlrpc_request and reply buffer for early reply - * processing. - * - * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request. - * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned - * \a *req_ret to release it. - * \retval -ev error number, and \a req_ret will not be set. - */ -int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req, - struct ptlrpc_request **req_ret) -{ - struct ptlrpc_request *early_req; - char *early_buf; - int early_bufsz, early_size; - int rc; - - early_req = ptlrpc_request_cache_alloc(GFP_NOFS); - if (!early_req) - return -ENOMEM; - - ptlrpc_cli_req_init(early_req); - - early_size = req->rq_nob_received; - early_bufsz = size_roundup_power2(early_size); - early_buf = kvzalloc(early_bufsz, GFP_NOFS); - if (!early_buf) { - rc = -ENOMEM; - goto err_req; - } - - /* sanity checkings and copy data out, do it inside spinlock */ - spin_lock(&req->rq_lock); - - if (req->rq_replied) { - spin_unlock(&req->rq_lock); - rc = -EALREADY; - goto err_buf; - } - - LASSERT(req->rq_repbuf); - LASSERT(!req->rq_repdata); - LASSERT(!req->rq_repmsg); - - if (req->rq_reply_off != 0) { - CERROR("early reply with offset %u\n", req->rq_reply_off); - spin_unlock(&req->rq_lock); - rc = -EPROTO; - goto err_buf; - } - - if (req->rq_nob_received != early_size) { - /* even another early arrived the size should be the same */ - CERROR("data size has changed from %u to %u\n", - early_size, req->rq_nob_received); - spin_unlock(&req->rq_lock); - rc = -EINVAL; - goto err_buf; - } - - if (req->rq_nob_received < sizeof(struct lustre_msg)) { - CERROR("early reply length %d too small\n", - req->rq_nob_received); - spin_unlock(&req->rq_lock); - rc = -EALREADY; - goto err_buf; - } - - memcpy(early_buf, req->rq_repbuf, early_size); - spin_unlock(&req->rq_lock); - - early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx); - early_req->rq_flvr = req->rq_flvr; - early_req->rq_repbuf = early_buf; - early_req->rq_repbuf_len = early_bufsz; - early_req->rq_repdata = (struct lustre_msg *)early_buf; - early_req->rq_repdata_len = early_size; - early_req->rq_early = 1; - early_req->rq_reqmsg = req->rq_reqmsg; - - rc = do_cli_unwrap_reply(early_req); - if (rc) { - DEBUG_REQ(D_ADAPTTO, early_req, - "error %d unwrap early reply", rc); - goto err_ctx; - } - - LASSERT(early_req->rq_repmsg); - *req_ret = early_req; - return 0; - -err_ctx: - sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); -err_buf: - kvfree(early_buf); -err_req: - ptlrpc_request_cache_free(early_req); - return rc; -} - -/** - * Used by ptlrpc client, to release a processed early reply \a early_req. - * - * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply(). - */ -void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req) -{ - LASSERT(early_req->rq_repbuf); - LASSERT(early_req->rq_repdata); - LASSERT(early_req->rq_repmsg); - - sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1); - kvfree(early_req->rq_repbuf); - ptlrpc_request_cache_free(early_req); -} - -/************************************************** - * sec ID * - **************************************************/ - -/* - * "fixed" sec (e.g. null) use sec_id < 0 - */ -static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1); - -int sptlrpc_get_next_secid(void) -{ - return atomic_inc_return(&sptlrpc_sec_id); -} -EXPORT_SYMBOL(sptlrpc_get_next_secid); - -/************************************************** - * client side high-level security APIs * - **************************************************/ - -static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid, - int grace, int force) -{ - struct ptlrpc_sec_policy *policy = sec->ps_policy; - - LASSERT(policy->sp_cops); - LASSERT(policy->sp_cops->flush_ctx_cache); - - return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force); -} - -static void sec_cop_destroy_sec(struct ptlrpc_sec *sec) -{ - struct ptlrpc_sec_policy *policy = sec->ps_policy; - - LASSERT_ATOMIC_ZERO(&sec->ps_refcount); - LASSERT_ATOMIC_ZERO(&sec->ps_nctx); - LASSERT(policy->sp_cops->destroy_sec); - - CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec); - - policy->sp_cops->destroy_sec(sec); - sptlrpc_policy_put(policy); -} - -static void sptlrpc_sec_kill(struct ptlrpc_sec *sec) -{ - LASSERT_ATOMIC_POS(&sec->ps_refcount); - - if (sec->ps_policy->sp_cops->kill_sec) { - sec->ps_policy->sp_cops->kill_sec(sec); - - sec_cop_flush_ctx_cache(sec, -1, 1, 1); - } -} - -static struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec) -{ - if (sec) - atomic_inc(&sec->ps_refcount); - - return sec; -} - -void sptlrpc_sec_put(struct ptlrpc_sec *sec) -{ - if (sec) { - LASSERT_ATOMIC_POS(&sec->ps_refcount); - - if (atomic_dec_and_test(&sec->ps_refcount)) { - sptlrpc_gc_del_sec(sec); - sec_cop_destroy_sec(sec); - } - } -} -EXPORT_SYMBOL(sptlrpc_sec_put); - -/* - * policy module is responsible for taking reference of import - */ -static -struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp, - struct ptlrpc_svc_ctx *svc_ctx, - struct sptlrpc_flavor *sf, - enum lustre_sec_part sp) -{ - struct ptlrpc_sec_policy *policy; - struct ptlrpc_sec *sec; - char str[32]; - - if (svc_ctx) { - LASSERT(imp->imp_dlm_fake == 1); - - CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n", - imp->imp_obd->obd_type->typ_name, - imp->imp_obd->obd_name, - sptlrpc_flavor2name(sf, str, sizeof(str))); - - policy = sptlrpc_policy_get(svc_ctx->sc_policy); - sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY; - } else { - LASSERT(imp->imp_dlm_fake == 0); - - CDEBUG(D_SEC, "%s %s: select security flavor %s\n", - imp->imp_obd->obd_type->typ_name, - imp->imp_obd->obd_name, - sptlrpc_flavor2name(sf, str, sizeof(str))); - - policy = sptlrpc_wireflavor2policy(sf->sf_rpc); - if (!policy) { - CERROR("invalid flavor 0x%x\n", sf->sf_rpc); - return NULL; - } - } - - sec = policy->sp_cops->create_sec(imp, svc_ctx, sf); - if (sec) { - atomic_inc(&sec->ps_refcount); - - sec->ps_part = sp; - - if (sec->ps_gc_interval && policy->sp_cops->gc_ctx) - sptlrpc_gc_add_sec(sec); - } else { - sptlrpc_policy_put(policy); - } - - return sec; -} - -struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp) -{ - struct ptlrpc_sec *sec; - - spin_lock(&imp->imp_lock); - sec = sptlrpc_sec_get(imp->imp_sec); - spin_unlock(&imp->imp_lock); - - return sec; -} -EXPORT_SYMBOL(sptlrpc_import_sec_ref); - -static void sptlrpc_import_sec_install(struct obd_import *imp, - struct ptlrpc_sec *sec) -{ - struct ptlrpc_sec *old_sec; - - LASSERT_ATOMIC_POS(&sec->ps_refcount); - - spin_lock(&imp->imp_lock); - old_sec = imp->imp_sec; - imp->imp_sec = sec; - spin_unlock(&imp->imp_lock); - - if (old_sec) { - sptlrpc_sec_kill(old_sec); - - /* balance the ref taken by this import */ - sptlrpc_sec_put(old_sec); - } -} - -static inline -int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2) -{ - return (memcmp(sf1, sf2, sizeof(*sf1)) == 0); -} - -static inline -void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src) -{ - *dst = *src; -} - -static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp, - struct ptlrpc_sec *sec, - struct sptlrpc_flavor *sf) -{ - char str1[32], str2[32]; - - if (sec->ps_flvr.sf_flags != sf->sf_flags) - CDEBUG(D_SEC, "changing sec flags: %s -> %s\n", - sptlrpc_secflags2str(sec->ps_flvr.sf_flags, - str1, sizeof(str1)), - sptlrpc_secflags2str(sf->sf_flags, - str2, sizeof(str2))); - - spin_lock(&sec->ps_lock); - flavor_copy(&sec->ps_flvr, sf); - spin_unlock(&sec->ps_lock); -} - -/** - * To get an appropriate ptlrpc_sec for the \a imp, according to the current - * configuration. Upon called, imp->imp_sec may or may not be NULL. - * - * - regular import: \a svc_ctx should be NULL and \a flvr is ignored; - * - reverse import: \a svc_ctx and \a flvr are obtained from incoming request. - */ -int sptlrpc_import_sec_adapt(struct obd_import *imp, - struct ptlrpc_svc_ctx *svc_ctx, - struct sptlrpc_flavor *flvr) -{ - struct ptlrpc_connection *conn; - struct sptlrpc_flavor sf; - struct ptlrpc_sec *sec, *newsec; - enum lustre_sec_part sp; - char str[24]; - int rc = 0; - - might_sleep(); - - if (!imp) - return 0; - - conn = imp->imp_connection; - - if (!svc_ctx) { - struct client_obd *cliobd = &imp->imp_obd->u.cli; - /* - * normal import, determine flavor from rule set, except - * for mgc the flavor is predetermined. - */ - if (cliobd->cl_sp_me == LUSTRE_SP_MGC) - sf = cliobd->cl_flvr_mgc; - else - sptlrpc_conf_choose_flavor(cliobd->cl_sp_me, - cliobd->cl_sp_to, - &cliobd->cl_target_uuid, - conn->c_self, &sf); - - sp = imp->imp_obd->u.cli.cl_sp_me; - } else { - /* reverse import, determine flavor from incoming request */ - sf = *flvr; - - if (sf.sf_rpc != SPTLRPC_FLVR_NULL) - sf.sf_flags = PTLRPC_SEC_FL_REVERSE | - PTLRPC_SEC_FL_ROOTONLY; - - sp = sptlrpc_target_sec_part(imp->imp_obd); - } - - sec = sptlrpc_import_sec_ref(imp); - if (sec) { - char str2[24]; - - if (flavor_equal(&sf, &sec->ps_flvr)) - goto out; - - CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n", - imp->imp_obd->obd_name, - obd_uuid2str(&conn->c_remote_uuid), - sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)), - sptlrpc_flavor2name(&sf, str2, sizeof(str2))); - - if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) == - SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) && - SPTLRPC_FLVR_MECH(sf.sf_rpc) == - SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) { - sptlrpc_import_sec_adapt_inplace(imp, sec, &sf); - goto out; - } - } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) != - SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) { - CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n", - imp->imp_obd->obd_name, - obd_uuid2str(&conn->c_remote_uuid), - LNET_NIDNET(conn->c_self), - sptlrpc_flavor2name(&sf, str, sizeof(str))); - } - - mutex_lock(&imp->imp_sec_mutex); - - newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp); - if (newsec) { - sptlrpc_import_sec_install(imp, newsec); - } else { - CERROR("import %s->%s: failed to create new sec\n", - imp->imp_obd->obd_name, - obd_uuid2str(&conn->c_remote_uuid)); - rc = -EPERM; - } - - mutex_unlock(&imp->imp_sec_mutex); -out: - sptlrpc_sec_put(sec); - return rc; -} - -void sptlrpc_import_sec_put(struct obd_import *imp) -{ - if (imp->imp_sec) { - sptlrpc_sec_kill(imp->imp_sec); - - sptlrpc_sec_put(imp->imp_sec); - imp->imp_sec = NULL; - } -} - -static void import_flush_ctx_common(struct obd_import *imp, - uid_t uid, int grace, int force) -{ - struct ptlrpc_sec *sec; - - if (!imp) - return; - - sec = sptlrpc_import_sec_ref(imp); - if (!sec) - return; - - sec_cop_flush_ctx_cache(sec, uid, grace, force); - sptlrpc_sec_put(sec); -} - -void sptlrpc_import_flush_my_ctx(struct obd_import *imp) -{ - import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()), - 1, 1); -} -EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx); - -void sptlrpc_import_flush_all_ctx(struct obd_import *imp) -{ - import_flush_ctx_common(imp, -1, 1, 1); -} -EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx); - -/** - * Used by ptlrpc client to allocate request buffer of \a req. Upon return - * successfully, req->rq_reqmsg points to a buffer with size \a msgsize. - */ -int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_policy *policy; - int rc; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_sec->ps_policy); - LASSERT(!req->rq_reqmsg); - LASSERT_ATOMIC_POS(&ctx->cc_refcount); - - policy = ctx->cc_sec->ps_policy; - rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize); - if (!rc) { - LASSERT(req->rq_reqmsg); - LASSERT(req->rq_reqbuf || req->rq_clrbuf); - - /* zeroing preallocated buffer */ - if (req->rq_pool) - memset(req->rq_reqmsg, 0, msgsize); - } - - return rc; -} - -/** - * Used by ptlrpc client to free request buffer of \a req. After this - * req->rq_reqmsg is set to NULL and should not be accessed anymore. - */ -void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_policy *policy; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_sec->ps_policy); - LASSERT_ATOMIC_POS(&ctx->cc_refcount); - - if (!req->rq_reqbuf && !req->rq_clrbuf) - return; - - policy = ctx->cc_sec->ps_policy; - policy->sp_cops->free_reqbuf(ctx->cc_sec, req); - req->rq_reqmsg = NULL; -} - -/* - * NOTE caller must guarantee the buffer size is enough for the enlargement - */ -void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg, - int segment, int newsize) -{ - void *src, *dst; - int oldsize, oldmsg_size, movesize; - - LASSERT(segment < msg->lm_bufcount); - LASSERT(msg->lm_buflens[segment] <= newsize); - - if (msg->lm_buflens[segment] == newsize) - return; - - /* nothing to do if we are enlarging the last segment */ - if (segment == msg->lm_bufcount - 1) { - msg->lm_buflens[segment] = newsize; - return; - } - - oldsize = msg->lm_buflens[segment]; - - src = lustre_msg_buf(msg, segment + 1, 0); - msg->lm_buflens[segment] = newsize; - dst = lustre_msg_buf(msg, segment + 1, 0); - msg->lm_buflens[segment] = oldsize; - - /* move from segment + 1 to end segment */ - LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2); - oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); - movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg); - LASSERT(movesize >= 0); - - if (movesize) - memmove(dst, src, movesize); - - /* note we don't clear the ares where old data live, not secret */ - - /* finally set new segment size */ - msg->lm_buflens[segment] = newsize; -} -EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace); - -/** - * Used by ptlrpc client to enlarge the \a segment of request message pointed - * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be - * preserved after the enlargement. this must be called after original request - * buffer being allocated. - * - * \note after this be called, rq_reqmsg and rq_reqlen might have been changed, - * so caller should refresh its local pointers if needed. - */ -int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req, - int segment, int newsize) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_cops *cops; - struct lustre_msg *msg = req->rq_reqmsg; - - LASSERT(ctx); - LASSERT(msg); - LASSERT(msg->lm_bufcount > segment); - LASSERT(msg->lm_buflens[segment] <= newsize); - - if (msg->lm_buflens[segment] == newsize) - return 0; - - cops = ctx->cc_sec->ps_policy->sp_cops; - LASSERT(cops->enlarge_reqbuf); - return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize); -} -EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf); - -/** - * Used by ptlrpc client to allocate reply buffer of \a req. - * - * \note After this, req->rq_repmsg is still not accessible. - */ -int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_policy *policy; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_sec->ps_policy); - - if (req->rq_repbuf) - return 0; - - policy = ctx->cc_sec->ps_policy; - return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize); -} - -/** - * Used by ptlrpc client to free reply buffer of \a req. After this - * req->rq_repmsg is set to NULL and should not be accessed anymore. - */ -void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req) -{ - struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx; - struct ptlrpc_sec_policy *policy; - - LASSERT(ctx); - LASSERT(ctx->cc_sec); - LASSERT(ctx->cc_sec->ps_policy); - LASSERT_ATOMIC_POS(&ctx->cc_refcount); - - if (!req->rq_repbuf) - return; - LASSERT(req->rq_repbuf_len); - - policy = ctx->cc_sec->ps_policy; - policy->sp_cops->free_repbuf(ctx->cc_sec, req); - req->rq_repmsg = NULL; -} - -static int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp, - struct ptlrpc_svc_ctx *ctx) -{ - struct ptlrpc_sec_policy *policy = ctx->sc_policy; - - if (!policy->sp_sops->install_rctx) - return 0; - return policy->sp_sops->install_rctx(imp, ctx); -} - -/**************************************** - * server side security * - ****************************************/ - -static int flavor_allowed(struct sptlrpc_flavor *exp, - struct ptlrpc_request *req) -{ - struct sptlrpc_flavor *flvr = &req->rq_flvr; - - if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc) - return 1; - - if ((req->rq_ctx_init || req->rq_ctx_fini) && - SPTLRPC_FLVR_POLICY(exp->sf_rpc) == - SPTLRPC_FLVR_POLICY(flvr->sf_rpc) && - SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc)) - return 1; - - return 0; -} - -#define EXP_FLVR_UPDATE_EXPIRE (OBD_TIMEOUT_DEFAULT + 10) - -/** - * Given an export \a exp, check whether the flavor of incoming \a req - * is allowed by the export \a exp. Main logic is about taking care of - * changing configurations. Return 0 means success. - */ -int sptlrpc_target_export_check(struct obd_export *exp, - struct ptlrpc_request *req) -{ - struct sptlrpc_flavor flavor; - - if (!exp) - return 0; - - /* client side export has no imp_reverse, skip - * FIXME maybe we should check flavor this as well??? - */ - if (!exp->exp_imp_reverse) - return 0; - - /* don't care about ctx fini rpc */ - if (req->rq_ctx_fini) - return 0; - - spin_lock(&exp->exp_lock); - - /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for - * the first req with the new flavor, then treat it as current flavor, - * adapt reverse sec according to it. - * note the first rpc with new flavor might not be with root ctx, in - * which case delay the sec_adapt by leaving exp_flvr_adapt == 1. - */ - if (unlikely(exp->exp_flvr_changed) && - flavor_allowed(&exp->exp_flvr_old[1], req)) { - /* make the new flavor as "current", and old ones as - * about-to-expire - */ - CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp, - exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc); - flavor = exp->exp_flvr_old[1]; - exp->exp_flvr_old[1] = exp->exp_flvr_old[0]; - exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0]; - exp->exp_flvr_old[0] = exp->exp_flvr; - exp->exp_flvr_expire[0] = ktime_get_real_seconds() + - EXP_FLVR_UPDATE_EXPIRE; - exp->exp_flvr = flavor; - - /* flavor change finished */ - exp->exp_flvr_changed = 0; - LASSERT(exp->exp_flvr_adapt == 1); - - /* if it's gss, we only interested in root ctx init */ - if (req->rq_auth_gss && - !(req->rq_ctx_init && - (req->rq_auth_usr_root || req->rq_auth_usr_mdt || - req->rq_auth_usr_ost))) { - spin_unlock(&exp->exp_lock); - CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n", - req->rq_auth_gss, req->rq_ctx_init, - req->rq_auth_usr_root, req->rq_auth_usr_mdt, - req->rq_auth_usr_ost); - return 0; - } - - exp->exp_flvr_adapt = 0; - spin_unlock(&exp->exp_lock); - - return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, - req->rq_svc_ctx, &flavor); - } - - /* if it equals to the current flavor, we accept it, but need to - * dealing with reverse sec/ctx - */ - if (likely(flavor_allowed(&exp->exp_flvr, req))) { - /* most cases should return here, we only interested in - * gss root ctx init - */ - if (!req->rq_auth_gss || !req->rq_ctx_init || - (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && - !req->rq_auth_usr_ost)) { - spin_unlock(&exp->exp_lock); - return 0; - } - - /* if flavor just changed, we should not proceed, just leave - * it and current flavor will be discovered and replaced - * shortly, and let _this_ rpc pass through - */ - if (exp->exp_flvr_changed) { - LASSERT(exp->exp_flvr_adapt); - spin_unlock(&exp->exp_lock); - return 0; - } - - if (exp->exp_flvr_adapt) { - exp->exp_flvr_adapt = 0; - CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n", - exp, exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_old[1].sf_rpc); - flavor = exp->exp_flvr; - spin_unlock(&exp->exp_lock); - - return sptlrpc_import_sec_adapt(exp->exp_imp_reverse, - req->rq_svc_ctx, - &flavor); - } else { - CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n", - exp, exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_old[1].sf_rpc); - spin_unlock(&exp->exp_lock); - - return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse, - req->rq_svc_ctx); - } - } - - if (exp->exp_flvr_expire[0]) { - if (exp->exp_flvr_expire[0] >= ktime_get_real_seconds()) { - if (flavor_allowed(&exp->exp_flvr_old[0], req)) { - CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (%lld)\n", exp, - exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_old[1].sf_rpc, - (s64)(exp->exp_flvr_expire[0] - - ktime_get_real_seconds())); - spin_unlock(&exp->exp_lock); - return 0; - } - } else { - CDEBUG(D_SEC, "mark middle expired\n"); - exp->exp_flvr_expire[0] = 0; - } - CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp, - exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, - req->rq_flvr.sf_rpc); - } - - /* now it doesn't match the current flavor, the only chance we can - * accept it is match the old flavors which is not expired. - */ - if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) { - if (exp->exp_flvr_expire[1] >= ktime_get_real_seconds()) { - if (flavor_allowed(&exp->exp_flvr_old[1], req)) { - CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (%lld)\n", - exp, - exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_old[1].sf_rpc, - (s64)(exp->exp_flvr_expire[1] - - ktime_get_real_seconds())); - spin_unlock(&exp->exp_lock); - return 0; - } - } else { - CDEBUG(D_SEC, "mark oldest expired\n"); - exp->exp_flvr_expire[1] = 0; - } - CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n", - exp, exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc, - req->rq_flvr.sf_rpc); - } else { - CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n", - exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_old[1].sf_rpc); - } - - spin_unlock(&exp->exp_lock); - - CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+lld)|%x(%+lld)\n", - exp, exp->exp_obd->obd_name, - req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini, - req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost, - req->rq_flvr.sf_rpc, - exp->exp_flvr.sf_rpc, - exp->exp_flvr_old[0].sf_rpc, - exp->exp_flvr_expire[0] ? - (s64)(exp->exp_flvr_expire[0] - ktime_get_real_seconds()) : 0, - exp->exp_flvr_old[1].sf_rpc, - exp->exp_flvr_expire[1] ? - (s64)(exp->exp_flvr_expire[1] - ktime_get_real_seconds()) : 0); - return -EACCES; -} -EXPORT_SYMBOL(sptlrpc_target_export_check); - -static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc) -{ - /* peer's claim is unreliable unless gss is being used */ - if (!req->rq_auth_gss || svc_rc == SECSVC_DROP) - return svc_rc; - - switch (req->rq_sp_from) { - case LUSTRE_SP_CLI: - if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) { - DEBUG_REQ(D_ERROR, req, "faked source CLI"); - svc_rc = SECSVC_DROP; - } - break; - case LUSTRE_SP_MDT: - if (!req->rq_auth_usr_mdt) { - DEBUG_REQ(D_ERROR, req, "faked source MDT"); - svc_rc = SECSVC_DROP; - } - break; - case LUSTRE_SP_OST: - if (!req->rq_auth_usr_ost) { - DEBUG_REQ(D_ERROR, req, "faked source OST"); - svc_rc = SECSVC_DROP; - } - break; - case LUSTRE_SP_MGS: - case LUSTRE_SP_MGC: - if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt && - !req->rq_auth_usr_ost) { - DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS"); - svc_rc = SECSVC_DROP; - } - break; - case LUSTRE_SP_ANY: - default: - DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from); - svc_rc = SECSVC_DROP; - } - - return svc_rc; -} - -/** - * Used by ptlrpc server, to perform transformation upon request message of - * incoming \a req. This must be the first thing to do with a incoming - * request in ptlrpc layer. - * - * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in - * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set. - * \retval SECSVC_COMPLETE success, the request has been fully processed, and - * reply message has been prepared. - * \retval SECSVC_DROP failed, this request should be dropped. - */ -int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req) -{ - struct ptlrpc_sec_policy *policy; - struct lustre_msg *msg = req->rq_reqbuf; - int rc; - - LASSERT(msg); - LASSERT(!req->rq_reqmsg); - LASSERT(!req->rq_repmsg); - LASSERT(!req->rq_svc_ctx); - - req->rq_req_swab_mask = 0; - - rc = __lustre_unpack_msg(msg, req->rq_reqdata_len); - switch (rc) { - case 1: - lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF); - case 0: - break; - default: - CERROR("error unpacking request from %s x%llu\n", - libcfs_id2str(req->rq_peer), req->rq_xid); - return SECSVC_DROP; - } - - req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr); - req->rq_sp_from = LUSTRE_SP_ANY; - req->rq_auth_uid = -1; - req->rq_auth_mapped_uid = -1; - - policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc); - if (!policy) { - CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc); - return SECSVC_DROP; - } - - LASSERT(policy->sp_sops->accept); - rc = policy->sp_sops->accept(req); - sptlrpc_policy_put(policy); - LASSERT(req->rq_reqmsg || rc != SECSVC_OK); - LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP); - - /* - * if it's not null flavor (which means embedded packing msg), - * reset the swab mask for the coming inner msg unpacking. - */ - if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) - req->rq_req_swab_mask = 0; - - /* sanity check for the request source */ - rc = sptlrpc_svc_check_from(req, rc); - return rc; -} - -/** - * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed, - * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to - * a buffer of \a msglen size. - */ -int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen) -{ - struct ptlrpc_sec_policy *policy; - struct ptlrpc_reply_state *rs; - int rc; - - LASSERT(req->rq_svc_ctx); - LASSERT(req->rq_svc_ctx->sc_policy); - - policy = req->rq_svc_ctx->sc_policy; - LASSERT(policy->sp_sops->alloc_rs); - - rc = policy->sp_sops->alloc_rs(req, msglen); - if (unlikely(rc == -ENOMEM)) { - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - - if (svcpt->scp_service->srv_max_reply_size < - msglen + sizeof(struct ptlrpc_reply_state)) { - /* Just return failure if the size is too big */ - CERROR("size of message is too big (%zd), %d allowed\n", - msglen + sizeof(struct ptlrpc_reply_state), - svcpt->scp_service->srv_max_reply_size); - return -ENOMEM; - } - - /* failed alloc, try emergency pool */ - rs = lustre_get_emerg_rs(svcpt); - if (!rs) - return -ENOMEM; - - req->rq_reply_state = rs; - rc = policy->sp_sops->alloc_rs(req, msglen); - if (rc) { - lustre_put_emerg_rs(rs); - req->rq_reply_state = NULL; - } - } - - LASSERT(rc != 0 || - (req->rq_reply_state && req->rq_reply_state->rs_msg)); - - return rc; -} - -/** - * Used by ptlrpc server, to perform transformation upon reply message. - * - * \post req->rq_reply_off is set to appropriate server-controlled reply offset. - * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible. - */ -int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req) -{ - struct ptlrpc_sec_policy *policy; - int rc; - - LASSERT(req->rq_svc_ctx); - LASSERT(req->rq_svc_ctx->sc_policy); - - policy = req->rq_svc_ctx->sc_policy; - LASSERT(policy->sp_sops->authorize); - - rc = policy->sp_sops->authorize(req); - LASSERT(rc || req->rq_reply_state->rs_repdata_len); - - return rc; -} - -/** - * Used by ptlrpc server, to free reply_state. - */ -void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs) -{ - struct ptlrpc_sec_policy *policy; - unsigned int prealloc; - - LASSERT(rs->rs_svc_ctx); - LASSERT(rs->rs_svc_ctx->sc_policy); - - policy = rs->rs_svc_ctx->sc_policy; - LASSERT(policy->sp_sops->free_rs); - - prealloc = rs->rs_prealloc; - policy->sp_sops->free_rs(rs); - - if (prealloc) - lustre_put_emerg_rs(rs); -} - -void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req) -{ - struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; - - if (ctx) - atomic_inc(&ctx->sc_refcount); -} - -void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req) -{ - struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx; - - if (!ctx) - return; - - LASSERT_ATOMIC_POS(&ctx->sc_refcount); - if (atomic_dec_and_test(&ctx->sc_refcount)) { - if (ctx->sc_policy->sp_sops->free_ctx) - ctx->sc_policy->sp_sops->free_ctx(ctx); - } - req->rq_svc_ctx = NULL; -} - -/**************************************** - * bulk security * - ****************************************/ - -/** - * Perform transformation upon bulk data pointed by \a desc. This is called - * before transforming the request message. - */ -int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_cli_ctx *ctx; - - LASSERT(req->rq_bulk_read || req->rq_bulk_write); - - if (!req->rq_pack_bulk) - return 0; - - ctx = req->rq_cli_ctx; - if (ctx->cc_ops->wrap_bulk) - return ctx->cc_ops->wrap_bulk(ctx, req, desc); - return 0; -} -EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk); - -/** - * This is called after unwrap the reply message. - * return nob of actual plain text size received, or error code. - */ -int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc, - int nob) -{ - struct ptlrpc_cli_ctx *ctx; - int rc; - - LASSERT(req->rq_bulk_read && !req->rq_bulk_write); - - if (!req->rq_pack_bulk) - return desc->bd_nob_transferred; - - ctx = req->rq_cli_ctx; - if (ctx->cc_ops->unwrap_bulk) { - rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); - if (rc < 0) - return rc; - } - return desc->bd_nob_transferred; -} -EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read); - -/** - * This is called after unwrap the reply message. - * return 0 for success or error code. - */ -int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_cli_ctx *ctx; - int rc; - - LASSERT(!req->rq_bulk_read && req->rq_bulk_write); - - if (!req->rq_pack_bulk) - return 0; - - ctx = req->rq_cli_ctx; - if (ctx->cc_ops->unwrap_bulk) { - rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc); - if (rc < 0) - return rc; - } - - /* - * if everything is going right, nob should equals to nob_transferred. - * in case of privacy mode, nob_transferred needs to be adjusted. - */ - if (desc->bd_nob != desc->bd_nob_transferred) { - CERROR("nob %d doesn't match transferred nob %d\n", - desc->bd_nob, desc->bd_nob_transferred); - return -EPROTO; - } - - return 0; -} -EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write); - -/**************************************** - * user descriptor helpers * - ****************************************/ - -int sptlrpc_current_user_desc_size(void) -{ - int ngroups; - - ngroups = current_ngroups; - - if (ngroups > LUSTRE_MAX_GROUPS) - ngroups = LUSTRE_MAX_GROUPS; - return sptlrpc_user_desc_size(ngroups); -} -EXPORT_SYMBOL(sptlrpc_current_user_desc_size); - -int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset) -{ - struct ptlrpc_user_desc *pud; - - pud = lustre_msg_buf(msg, offset, 0); - - if (!pud) - return -EINVAL; - - pud->pud_uid = from_kuid(&init_user_ns, current_uid()); - pud->pud_gid = from_kgid(&init_user_ns, current_gid()); - pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid()); - pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid()); - pud->pud_cap = current_cap().cap[0]; - pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4; - - task_lock(current); - if (pud->pud_ngroups > current_ngroups) - pud->pud_ngroups = current_ngroups; - memcpy(pud->pud_groups, current_cred()->group_info->gid, - pud->pud_ngroups * sizeof(__u32)); - task_unlock(current); - - return 0; -} -EXPORT_SYMBOL(sptlrpc_pack_user_desc); - -int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed) -{ - struct ptlrpc_user_desc *pud; - int i; - - pud = lustre_msg_buf(msg, offset, sizeof(*pud)); - if (!pud) - return -EINVAL; - - if (swabbed) { - __swab32s(&pud->pud_uid); - __swab32s(&pud->pud_gid); - __swab32s(&pud->pud_fsuid); - __swab32s(&pud->pud_fsgid); - __swab32s(&pud->pud_cap); - __swab32s(&pud->pud_ngroups); - } - - if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) { - CERROR("%u groups is too large\n", pud->pud_ngroups); - return -EINVAL; - } - - if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) > - msg->lm_buflens[offset]) { - CERROR("%u groups are claimed but bufsize only %u\n", - pud->pud_ngroups, msg->lm_buflens[offset]); - return -EINVAL; - } - - if (swabbed) { - for (i = 0; i < pud->pud_ngroups; i++) - __swab32s(&pud->pud_groups[i]); - } - - return 0; -} -EXPORT_SYMBOL(sptlrpc_unpack_user_desc); - -/**************************************** - * misc helpers * - ****************************************/ - -const char *sec2target_str(struct ptlrpc_sec *sec) -{ - if (!sec || !sec->ps_import || !sec->ps_import->imp_obd) - return "*"; - if (sec_is_reverse(sec)) - return "c"; - return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid); -} -EXPORT_SYMBOL(sec2target_str); - -/* - * return true if the bulk data is protected - */ -bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr) -{ - switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) { - case SPTLRPC_BULK_SVC_INTG: - case SPTLRPC_BULK_SVC_PRIV: - return true; - default: - return false; - } -} -EXPORT_SYMBOL(sptlrpc_flavor_has_bulk); - -/**************************************** - * crypto API helper/alloc blkciper * - ****************************************/ - -/**************************************** - * initialize/finalize * - ****************************************/ - -int sptlrpc_init(void) -{ - int rc; - - rwlock_init(&policy_lock); - - rc = sptlrpc_gc_init(); - if (rc) - goto out; - - rc = sptlrpc_conf_init(); - if (rc) - goto out_gc; - - rc = sptlrpc_enc_pool_init(); - if (rc) - goto out_conf; - - rc = sptlrpc_null_init(); - if (rc) - goto out_pool; - - rc = sptlrpc_plain_init(); - if (rc) - goto out_null; - - sptlrpc_lproc_init(); - - return 0; - -out_null: - sptlrpc_null_fini(); -out_pool: - sptlrpc_enc_pool_fini(); -out_conf: - sptlrpc_conf_fini(); -out_gc: - sptlrpc_gc_fini(); -out: - return rc; -} - -void sptlrpc_fini(void) -{ - sptlrpc_lproc_fini(); - sptlrpc_plain_fini(); - sptlrpc_null_fini(); - sptlrpc_enc_pool_fini(); - sptlrpc_conf_fini(); - sptlrpc_gc_fini(); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c deleted file mode 100644 index 625b9520d78f..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c +++ /dev/null @@ -1,572 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec_bulk.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -/**************************************** - * bulk encryption page pools * - ****************************************/ - -#define POINTERS_PER_PAGE (PAGE_SIZE / sizeof(void *)) -#define PAGES_PER_POOL (POINTERS_PER_PAGE) - -#define IDLE_IDX_MAX (100) -#define IDLE_IDX_WEIGHT (3) - -#define CACHE_QUIESCENT_PERIOD (20) - -static struct ptlrpc_enc_page_pool { - /* - * constants - */ - unsigned long epp_max_pages; /* maximum pages can hold, const */ - unsigned int epp_max_pools; /* number of pools, const */ - - /* - * wait queue in case of not enough free pages. - */ - wait_queue_head_t epp_waitq; /* waiting threads */ - unsigned int epp_waitqlen; /* wait queue length */ - unsigned long epp_pages_short; /* # of pages wanted of in-q users */ - unsigned int epp_growing:1; /* during adding pages */ - - /* - * indicating how idle the pools are, from 0 to MAX_IDLE_IDX - * this is counted based on each time when getting pages from - * the pools, not based on time. which means in case that system - * is idled for a while but the idle_idx might still be low if no - * activities happened in the pools. - */ - unsigned long epp_idle_idx; - - /* last shrink time due to mem tight */ - time64_t epp_last_shrink; - time64_t epp_last_access; - - /* - * in-pool pages bookkeeping - */ - spinlock_t epp_lock; /* protect following fields */ - unsigned long epp_total_pages; /* total pages in pools */ - unsigned long epp_free_pages; /* current pages available */ - - /* - * statistics - */ - unsigned long epp_st_max_pages; /* # of pages ever reached */ - unsigned int epp_st_grows; /* # of grows */ - unsigned int epp_st_grow_fails; /* # of add pages failures */ - unsigned int epp_st_shrinks; /* # of shrinks */ - unsigned long epp_st_access; /* # of access */ - unsigned long epp_st_missings; /* # of cache missing */ - unsigned long epp_st_lowfree; /* lowest free pages reached */ - unsigned int epp_st_max_wqlen; /* highest waitqueue length */ - unsigned long epp_st_max_wait; /* in jiffies */ - unsigned long epp_st_outofmem; /* # of out of mem requests */ - /* - * pointers to pools - */ - struct page ***epp_pools; -} page_pools; - -/* - * /sys/kernel/debug/lustre/sptlrpc/encrypt_page_pools - */ -int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) -{ - spin_lock(&page_pools.epp_lock); - - seq_printf(m, - "physical pages: %lu\n" - "pages per pool: %lu\n" - "max pages: %lu\n" - "max pools: %u\n" - "total pages: %lu\n" - "total free: %lu\n" - "idle index: %lu/100\n" - "last shrink: %lds\n" - "last access: %lds\n" - "max pages reached: %lu\n" - "grows: %u\n" - "grows failure: %u\n" - "shrinks: %u\n" - "cache access: %lu\n" - "cache missing: %lu\n" - "low free mark: %lu\n" - "max waitqueue depth: %u\n" - "max wait time: %ld/%lu\n" - "out of mem: %lu\n", - totalram_pages, - PAGES_PER_POOL, - page_pools.epp_max_pages, - page_pools.epp_max_pools, - page_pools.epp_total_pages, - page_pools.epp_free_pages, - page_pools.epp_idle_idx, - (long)(ktime_get_seconds() - page_pools.epp_last_shrink), - (long)(ktime_get_seconds() - page_pools.epp_last_access), - page_pools.epp_st_max_pages, - page_pools.epp_st_grows, - page_pools.epp_st_grow_fails, - page_pools.epp_st_shrinks, - page_pools.epp_st_access, - page_pools.epp_st_missings, - page_pools.epp_st_lowfree, - page_pools.epp_st_max_wqlen, - page_pools.epp_st_max_wait, - msecs_to_jiffies(MSEC_PER_SEC), - page_pools.epp_st_outofmem); - - spin_unlock(&page_pools.epp_lock); - - return 0; -} - -static void enc_pools_release_free_pages(long npages) -{ - int p_idx, g_idx; - int p_idx_max1, p_idx_max2; - - LASSERT(npages > 0); - LASSERT(npages <= page_pools.epp_free_pages); - LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages); - - /* max pool index before the release */ - p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL; - - page_pools.epp_free_pages -= npages; - page_pools.epp_total_pages -= npages; - - /* max pool index after the release */ - p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 : - ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL); - - p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; - g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; - LASSERT(page_pools.epp_pools[p_idx]); - - while (npages--) { - LASSERT(page_pools.epp_pools[p_idx]); - LASSERT(page_pools.epp_pools[p_idx][g_idx]); - - __free_page(page_pools.epp_pools[p_idx][g_idx]); - page_pools.epp_pools[p_idx][g_idx] = NULL; - - if (++g_idx == PAGES_PER_POOL) { - p_idx++; - g_idx = 0; - } - } - - /* free unused pools */ - while (p_idx_max1 < p_idx_max2) { - LASSERT(page_pools.epp_pools[p_idx_max2]); - kfree(page_pools.epp_pools[p_idx_max2]); - page_pools.epp_pools[p_idx_max2] = NULL; - p_idx_max2--; - } -} - -/* - * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. - */ -static unsigned long enc_pools_shrink_count(struct shrinker *s, - struct shrink_control *sc) -{ - /* - * if no pool access for a long time, we consider it's fully idle. - * a little race here is fine. - */ - if (unlikely(ktime_get_seconds() - page_pools.epp_last_access > - CACHE_QUIESCENT_PERIOD)) { - spin_lock(&page_pools.epp_lock); - page_pools.epp_idle_idx = IDLE_IDX_MAX; - spin_unlock(&page_pools.epp_lock); - } - - LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); - return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * - (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; -} - -/* - * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. - */ -static unsigned long enc_pools_shrink_scan(struct shrinker *s, - struct shrink_control *sc) -{ - spin_lock(&page_pools.epp_lock); - sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, - page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); - if (sc->nr_to_scan > 0) { - enc_pools_release_free_pages(sc->nr_to_scan); - CDEBUG(D_SEC, "released %ld pages, %ld left\n", - (long)sc->nr_to_scan, page_pools.epp_free_pages); - - page_pools.epp_st_shrinks++; - page_pools.epp_last_shrink = ktime_get_seconds(); - } - spin_unlock(&page_pools.epp_lock); - - /* - * if no pool access for a long time, we consider it's fully idle. - * a little race here is fine. - */ - if (unlikely(ktime_get_seconds() - page_pools.epp_last_access > - CACHE_QUIESCENT_PERIOD)) { - spin_lock(&page_pools.epp_lock); - page_pools.epp_idle_idx = IDLE_IDX_MAX; - spin_unlock(&page_pools.epp_lock); - } - - LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); - return sc->nr_to_scan; -} - -static inline -int npages_to_npools(unsigned long npages) -{ - return (int)DIV_ROUND_UP(npages, PAGES_PER_POOL); -} - -/* - * return how many pages cleaned up. - */ -static unsigned long enc_pools_cleanup(struct page ***pools, int npools) -{ - unsigned long cleaned = 0; - int i, j; - - for (i = 0; i < npools; i++) { - if (pools[i]) { - for (j = 0; j < PAGES_PER_POOL; j++) { - if (pools[i][j]) { - __free_page(pools[i][j]); - cleaned++; - } - } - kfree(pools[i]); - pools[i] = NULL; - } - } - - return cleaned; -} - -static inline void enc_pools_wakeup(void) -{ - assert_spin_locked(&page_pools.epp_lock); - - if (unlikely(page_pools.epp_waitqlen)) { - LASSERT(waitqueue_active(&page_pools.epp_waitq)); - wake_up_all(&page_pools.epp_waitq); - } -} - -/* - * Export the number of free pages in the pool - */ -int get_free_pages_in_pool(void) -{ - return page_pools.epp_free_pages; -} - -/* - * Let outside world know if enc_pool full capacity is reached - */ -int pool_is_at_full_capacity(void) -{ - return (page_pools.epp_total_pages == page_pools.epp_max_pages); -} - -void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) -{ - int p_idx, g_idx; - int i; - - LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); - - if (!GET_ENC_KIOV(desc)) - return; - - LASSERT(desc->bd_iov_count > 0); - - spin_lock(&page_pools.epp_lock); - - p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; - g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; - - LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <= - page_pools.epp_total_pages); - LASSERT(page_pools.epp_pools[p_idx]); - - for (i = 0; i < desc->bd_iov_count; i++) { - LASSERT(BD_GET_ENC_KIOV(desc, i).bv_page); - LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); - LASSERT(!page_pools.epp_pools[p_idx][g_idx]); - - page_pools.epp_pools[p_idx][g_idx] = - BD_GET_ENC_KIOV(desc, i).bv_page; - - if (++g_idx == PAGES_PER_POOL) { - p_idx++; - g_idx = 0; - } - } - - page_pools.epp_free_pages += desc->bd_iov_count; - - enc_pools_wakeup(); - - spin_unlock(&page_pools.epp_lock); - - kfree(GET_ENC_KIOV(desc)); - GET_ENC_KIOV(desc) = NULL; -} - -static inline void enc_pools_alloc(void) -{ - LASSERT(page_pools.epp_max_pools); - page_pools.epp_pools = - kvzalloc(page_pools.epp_max_pools * - sizeof(*page_pools.epp_pools), - GFP_KERNEL); -} - -static inline void enc_pools_free(void) -{ - LASSERT(page_pools.epp_max_pools); - LASSERT(page_pools.epp_pools); - - kvfree(page_pools.epp_pools); -} - -static struct shrinker pools_shrinker = { - .count_objects = enc_pools_shrink_count, - .scan_objects = enc_pools_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - -int sptlrpc_enc_pool_init(void) -{ - int rc; - - /* - * maximum capacity is 1/8 of total physical memory. - * is the 1/8 a good number? - */ - page_pools.epp_max_pages = totalram_pages / 8; - page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); - - init_waitqueue_head(&page_pools.epp_waitq); - page_pools.epp_waitqlen = 0; - page_pools.epp_pages_short = 0; - - page_pools.epp_growing = 0; - - page_pools.epp_idle_idx = 0; - page_pools.epp_last_shrink = ktime_get_seconds(); - page_pools.epp_last_access = ktime_get_seconds(); - - spin_lock_init(&page_pools.epp_lock); - page_pools.epp_total_pages = 0; - page_pools.epp_free_pages = 0; - - page_pools.epp_st_max_pages = 0; - page_pools.epp_st_grows = 0; - page_pools.epp_st_grow_fails = 0; - page_pools.epp_st_shrinks = 0; - page_pools.epp_st_access = 0; - page_pools.epp_st_missings = 0; - page_pools.epp_st_lowfree = 0; - page_pools.epp_st_max_wqlen = 0; - page_pools.epp_st_max_wait = 0; - page_pools.epp_st_outofmem = 0; - - enc_pools_alloc(); - if (!page_pools.epp_pools) - return -ENOMEM; - - rc = register_shrinker(&pools_shrinker); - if (rc) - enc_pools_free(); - - return rc; -} - -void sptlrpc_enc_pool_fini(void) -{ - unsigned long cleaned, npools; - - LASSERT(page_pools.epp_pools); - LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); - - unregister_shrinker(&pools_shrinker); - - npools = npages_to_npools(page_pools.epp_total_pages); - cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); - LASSERT(cleaned == page_pools.epp_total_pages); - - enc_pools_free(); - - if (page_pools.epp_st_access > 0) { - CDEBUG(D_SEC, - "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld, out of mem %lu\n", - page_pools.epp_st_max_pages, page_pools.epp_st_grows, - page_pools.epp_st_grow_fails, - page_pools.epp_st_shrinks, page_pools.epp_st_access, - page_pools.epp_st_missings, page_pools.epp_st_max_wqlen, - page_pools.epp_st_max_wait, - msecs_to_jiffies(MSEC_PER_SEC), - page_pools.epp_st_outofmem); - } -} - -static int cfs_hash_alg_id[] = { - [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL, - [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32, - [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32, - [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5, - [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1, - [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256, - [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384, - [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512, -}; - -const char *sptlrpc_get_hash_name(__u8 hash_alg) -{ - return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]); -} - -__u8 sptlrpc_get_hash_alg(const char *algname) -{ - return cfs_crypto_hash_alg(algname); -} - -int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed) -{ - struct ptlrpc_bulk_sec_desc *bsd; - int size = msg->lm_buflens[offset]; - - bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); - if (!bsd) { - CERROR("Invalid bulk sec desc: size %d\n", size); - return -EINVAL; - } - - if (swabbed) - __swab32s(&bsd->bsd_nob); - - if (unlikely(bsd->bsd_version != 0)) { - CERROR("Unexpected version %u\n", bsd->bsd_version); - return -EPROTO; - } - - if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) { - CERROR("Invalid type %u\n", bsd->bsd_type); - return -EPROTO; - } - - /* FIXME more sanity check here */ - - if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && - bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG && - bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) { - CERROR("Invalid svc %u\n", bsd->bsd_svc); - return -EPROTO; - } - - return 0; -} -EXPORT_SYMBOL(bulk_sec_desc_unpack); - -int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, - void *buf, int buflen) -{ - struct ahash_request *hdesc; - int hashsize; - unsigned int bufsize; - int i, err; - - LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX); - LASSERT(buflen >= 4); - - hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0); - if (IS_ERR(hdesc)) { - CERROR("Unable to initialize checksum hash %s\n", - cfs_crypto_hash_name(cfs_hash_alg_id[alg])); - return PTR_ERR(hdesc); - } - - hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]); - - for (i = 0; i < desc->bd_iov_count; i++) { - cfs_crypto_hash_update_page(hdesc, - BD_GET_KIOV(desc, i).bv_page, - BD_GET_KIOV(desc, i).bv_offset & - ~PAGE_MASK, - BD_GET_KIOV(desc, i).bv_len); - } - - if (hashsize > buflen) { - unsigned char hashbuf[CFS_CRYPTO_HASH_DIGESTSIZE_MAX]; - - bufsize = sizeof(hashbuf); - LASSERTF(bufsize >= hashsize, "bufsize = %u < hashsize %u\n", - bufsize, hashsize); - err = cfs_crypto_hash_final(hdesc, hashbuf, &bufsize); - memcpy(buf, hashbuf, buflen); - } else { - bufsize = buflen; - err = cfs_crypto_hash_final(hdesc, buf, &bufsize); - } - - return err; -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c deleted file mode 100644 index 2389f9a8f534..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c +++ /dev/null @@ -1,850 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd) -{ - const char *type = obd->obd_type->typ_name; - - if (!strcmp(type, LUSTRE_MDT_NAME)) - return LUSTRE_SP_MDT; - if (!strcmp(type, LUSTRE_OST_NAME)) - return LUSTRE_SP_OST; - if (!strcmp(type, LUSTRE_MGS_NAME)) - return LUSTRE_SP_MGS; - - CERROR("unknown target %p(%s)\n", obd, type); - return LUSTRE_SP_ANY; -} - -/**************************************** - * user supplied flavor string parsing * - ****************************************/ - -/* - * format: [-] - */ -int sptlrpc_parse_flavor(const char *str, struct sptlrpc_flavor *flvr) -{ - char buf[32]; - char *bulk, *alg; - - memset(flvr, 0, sizeof(*flvr)); - - if (!str || str[0] == '\0') { - flvr->sf_rpc = SPTLRPC_FLVR_INVALID; - return 0; - } - - strlcpy(buf, str, sizeof(buf)); - - bulk = strchr(buf, '-'); - if (bulk) - *bulk++ = '\0'; - - flvr->sf_rpc = sptlrpc_name2flavor_base(buf); - if (flvr->sf_rpc == SPTLRPC_FLVR_INVALID) - goto err_out; - - /* - * currently only base flavor "plain" can have bulk specification. - */ - if (flvr->sf_rpc == SPTLRPC_FLVR_PLAIN) { - flvr->u_bulk.hash.hash_alg = BULK_HASH_ALG_ADLER32; - if (bulk) { - /* - * format: plain-hash: - */ - alg = strchr(bulk, ':'); - if (!alg) - goto err_out; - *alg++ = '\0'; - - if (strcmp(bulk, "hash")) - goto err_out; - - flvr->u_bulk.hash.hash_alg = sptlrpc_get_hash_alg(alg); - if (flvr->u_bulk.hash.hash_alg >= BULK_HASH_ALG_MAX) - goto err_out; - } - - if (flvr->u_bulk.hash.hash_alg == BULK_HASH_ALG_NULL) - flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_NULL); - else - flvr_set_bulk_svc(&flvr->sf_rpc, SPTLRPC_BULK_SVC_INTG); - } else { - if (bulk) - goto err_out; - } - - flvr->sf_flags = 0; - return 0; - -err_out: - CERROR("invalid flavor string: %s\n", str); - return -EINVAL; -} -EXPORT_SYMBOL(sptlrpc_parse_flavor); - -/**************************************** - * configure rules * - ****************************************/ - -static void get_default_flavor(struct sptlrpc_flavor *sf) -{ - memset(sf, 0, sizeof(*sf)); - - sf->sf_rpc = SPTLRPC_FLVR_NULL; - sf->sf_flags = 0; -} - -static void sptlrpc_rule_init(struct sptlrpc_rule *rule) -{ - rule->sr_netid = LNET_NIDNET(LNET_NID_ANY); - rule->sr_from = LUSTRE_SP_ANY; - rule->sr_to = LUSTRE_SP_ANY; - rule->sr_padding = 0; - - get_default_flavor(&rule->sr_flvr); -} - -/* - * format: network[.direction]=flavor - */ -static int sptlrpc_parse_rule(char *param, struct sptlrpc_rule *rule) -{ - char *flavor, *dir; - int rc; - - sptlrpc_rule_init(rule); - - flavor = strchr(param, '='); - if (!flavor) { - CERROR("invalid param, no '='\n"); - return -EINVAL; - } - *flavor++ = '\0'; - - dir = strchr(param, '.'); - if (dir) - *dir++ = '\0'; - - /* 1.1 network */ - if (strcmp(param, "default")) { - rule->sr_netid = libcfs_str2net(param); - if (rule->sr_netid == LNET_NIDNET(LNET_NID_ANY)) { - CERROR("invalid network name: %s\n", param); - return -EINVAL; - } - } - - /* 1.2 direction */ - if (dir) { - if (!strcmp(dir, "mdt2ost")) { - rule->sr_from = LUSTRE_SP_MDT; - rule->sr_to = LUSTRE_SP_OST; - } else if (!strcmp(dir, "mdt2mdt")) { - rule->sr_from = LUSTRE_SP_MDT; - rule->sr_to = LUSTRE_SP_MDT; - } else if (!strcmp(dir, "cli2ost")) { - rule->sr_from = LUSTRE_SP_CLI; - rule->sr_to = LUSTRE_SP_OST; - } else if (!strcmp(dir, "cli2mdt")) { - rule->sr_from = LUSTRE_SP_CLI; - rule->sr_to = LUSTRE_SP_MDT; - } else { - CERROR("invalid rule dir segment: %s\n", dir); - return -EINVAL; - } - } - - /* 2.1 flavor */ - rc = sptlrpc_parse_flavor(flavor, &rule->sr_flvr); - if (rc) - return -EINVAL; - - return 0; -} - -static void sptlrpc_rule_set_free(struct sptlrpc_rule_set *rset) -{ - LASSERT(rset->srs_nslot || - (rset->srs_nrule == 0 && !rset->srs_rules)); - - if (rset->srs_nslot) { - kfree(rset->srs_rules); - sptlrpc_rule_set_init(rset); - } -} - -/* - * return 0 if the rule set could accommodate one more rule. - */ -static int sptlrpc_rule_set_expand(struct sptlrpc_rule_set *rset) -{ - struct sptlrpc_rule *rules; - int nslot; - - might_sleep(); - - if (rset->srs_nrule < rset->srs_nslot) - return 0; - - nslot = rset->srs_nslot + 8; - - /* better use realloc() if available */ - rules = kcalloc(nslot, sizeof(*rset->srs_rules), GFP_NOFS); - if (!rules) - return -ENOMEM; - - if (rset->srs_nrule) { - LASSERT(rset->srs_nslot && rset->srs_rules); - memcpy(rules, rset->srs_rules, - rset->srs_nrule * sizeof(*rset->srs_rules)); - - kfree(rset->srs_rules); - } - - rset->srs_rules = rules; - rset->srs_nslot = nslot; - return 0; -} - -static inline int rule_spec_dir(struct sptlrpc_rule *rule) -{ - return (rule->sr_from != LUSTRE_SP_ANY || - rule->sr_to != LUSTRE_SP_ANY); -} - -static inline int rule_spec_net(struct sptlrpc_rule *rule) -{ - return (rule->sr_netid != LNET_NIDNET(LNET_NID_ANY)); -} - -static inline int rule_match_dir(struct sptlrpc_rule *r1, - struct sptlrpc_rule *r2) -{ - return (r1->sr_from == r2->sr_from && r1->sr_to == r2->sr_to); -} - -static inline int rule_match_net(struct sptlrpc_rule *r1, - struct sptlrpc_rule *r2) -{ - return (r1->sr_netid == r2->sr_netid); -} - -/* - * merge @rule into @rset. - * the @rset slots might be expanded. - */ -static int sptlrpc_rule_set_merge(struct sptlrpc_rule_set *rset, - struct sptlrpc_rule *rule) -{ - struct sptlrpc_rule *p = rset->srs_rules; - int spec_dir, spec_net; - int rc, n, match = 0; - - might_sleep(); - - spec_net = rule_spec_net(rule); - spec_dir = rule_spec_dir(rule); - - for (n = 0; n < rset->srs_nrule; n++) { - p = &rset->srs_rules[n]; - - /* test network match, if failed: - * - spec rule: skip rules which is also spec rule match, until - * we hit a wild rule, which means no more chance - * - wild rule: skip until reach the one which is also wild - * and matches - */ - if (!rule_match_net(p, rule)) { - if (spec_net) { - if (rule_spec_net(p)) - continue; - else - break; - } else { - continue; - } - } - - /* test dir match, same logic as net matching */ - if (!rule_match_dir(p, rule)) { - if (spec_dir) { - if (rule_spec_dir(p)) - continue; - else - break; - } else { - continue; - } - } - - /* find a match */ - match = 1; - break; - } - - if (match) { - LASSERT(n >= 0 && n < rset->srs_nrule); - - if (rule->sr_flvr.sf_rpc == SPTLRPC_FLVR_INVALID) { - /* remove this rule */ - if (n < rset->srs_nrule - 1) - memmove(&rset->srs_rules[n], - &rset->srs_rules[n + 1], - (rset->srs_nrule - n - 1) * - sizeof(*rule)); - rset->srs_nrule--; - } else { - /* override the rule */ - memcpy(&rset->srs_rules[n], rule, sizeof(*rule)); - } - } else { - LASSERT(n >= 0 && n <= rset->srs_nrule); - - if (rule->sr_flvr.sf_rpc != SPTLRPC_FLVR_INVALID) { - rc = sptlrpc_rule_set_expand(rset); - if (rc) - return rc; - - if (n < rset->srs_nrule) - memmove(&rset->srs_rules[n + 1], - &rset->srs_rules[n], - (rset->srs_nrule - n) * sizeof(*rule)); - memcpy(&rset->srs_rules[n], rule, sizeof(*rule)); - rset->srs_nrule++; - } else { - CDEBUG(D_CONFIG, "ignore the unmatched deletion\n"); - } - } - - return 0; -} - -/** - * given from/to/nid, determine a matching flavor in ruleset. - * return 1 if a match found, otherwise return 0. - */ -static int sptlrpc_rule_set_choose(struct sptlrpc_rule_set *rset, - enum lustre_sec_part from, - enum lustre_sec_part to, - lnet_nid_t nid, - struct sptlrpc_flavor *sf) -{ - struct sptlrpc_rule *r; - int n; - - for (n = 0; n < rset->srs_nrule; n++) { - r = &rset->srs_rules[n]; - - if (LNET_NIDNET(nid) != LNET_NIDNET(LNET_NID_ANY) && - r->sr_netid != LNET_NIDNET(LNET_NID_ANY) && - LNET_NIDNET(nid) != r->sr_netid) - continue; - - if (from != LUSTRE_SP_ANY && r->sr_from != LUSTRE_SP_ANY && - from != r->sr_from) - continue; - - if (to != LUSTRE_SP_ANY && r->sr_to != LUSTRE_SP_ANY && - to != r->sr_to) - continue; - - *sf = r->sr_flvr; - return 1; - } - - return 0; -} - -/********************************** - * sptlrpc configuration support * - **********************************/ - -struct sptlrpc_conf_tgt { - struct list_head sct_list; - char sct_name[MAX_OBD_NAME]; - struct sptlrpc_rule_set sct_rset; -}; - -struct sptlrpc_conf { - struct list_head sc_list; - char sc_fsname[MTI_NAME_MAXLEN]; - unsigned int sc_modified; /* modified during updating */ - unsigned int sc_updated:1, /* updated copy from MGS */ - sc_local:1; /* local copy from target */ - struct sptlrpc_rule_set sc_rset; /* fs general rules */ - struct list_head sc_tgts; /* target-specific rules */ -}; - -static struct mutex sptlrpc_conf_lock; -static LIST_HEAD(sptlrpc_confs); - -static inline int is_hex(char c) -{ - return ((c >= '0' && c <= '9') || - (c >= 'a' && c <= 'f')); -} - -static void target2fsname(const char *tgt, char *fsname, int buflen) -{ - const char *ptr; - int len; - - ptr = strrchr(tgt, '-'); - if (ptr) { - if ((strncmp(ptr, "-MDT", 4) != 0 && - strncmp(ptr, "-OST", 4) != 0) || - !is_hex(ptr[4]) || !is_hex(ptr[5]) || - !is_hex(ptr[6]) || !is_hex(ptr[7])) - ptr = NULL; - } - - /* if we didn't find the pattern, treat the whole string as fsname */ - if (!ptr) - len = strlen(tgt); - else - len = ptr - tgt; - - len = min(len, buflen - 1); - memcpy(fsname, tgt, len); - fsname[len] = '\0'; -} - -static void sptlrpc_conf_free_rsets(struct sptlrpc_conf *conf) -{ - struct sptlrpc_conf_tgt *conf_tgt, *conf_tgt_next; - - sptlrpc_rule_set_free(&conf->sc_rset); - - list_for_each_entry_safe(conf_tgt, conf_tgt_next, - &conf->sc_tgts, sct_list) { - sptlrpc_rule_set_free(&conf_tgt->sct_rset); - list_del(&conf_tgt->sct_list); - kfree(conf_tgt); - } - LASSERT(list_empty(&conf->sc_tgts)); - - conf->sc_updated = 0; - conf->sc_local = 0; -} - -static void sptlrpc_conf_free(struct sptlrpc_conf *conf) -{ - CDEBUG(D_SEC, "free sptlrpc conf %s\n", conf->sc_fsname); - - sptlrpc_conf_free_rsets(conf); - list_del(&conf->sc_list); - kfree(conf); -} - -static -struct sptlrpc_conf_tgt *sptlrpc_conf_get_tgt(struct sptlrpc_conf *conf, - const char *name, - int create) -{ - struct sptlrpc_conf_tgt *conf_tgt; - - list_for_each_entry(conf_tgt, &conf->sc_tgts, sct_list) { - if (strcmp(conf_tgt->sct_name, name) == 0) - return conf_tgt; - } - - if (!create) - return NULL; - - conf_tgt = kzalloc(sizeof(*conf_tgt), GFP_NOFS); - if (conf_tgt) { - strlcpy(conf_tgt->sct_name, name, sizeof(conf_tgt->sct_name)); - sptlrpc_rule_set_init(&conf_tgt->sct_rset); - list_add(&conf_tgt->sct_list, &conf->sc_tgts); - } - - return conf_tgt; -} - -static -struct sptlrpc_conf *sptlrpc_conf_get(const char *fsname, - int create) -{ - struct sptlrpc_conf *conf; - size_t len; - - list_for_each_entry(conf, &sptlrpc_confs, sc_list) { - if (strcmp(conf->sc_fsname, fsname) == 0) - return conf; - } - - if (!create) - return NULL; - - conf = kzalloc(sizeof(*conf), GFP_NOFS); - if (!conf) - return NULL; - - len = strlcpy(conf->sc_fsname, fsname, sizeof(conf->sc_fsname)); - if (len >= sizeof(conf->sc_fsname)) { - kfree(conf); - return NULL; - } - sptlrpc_rule_set_init(&conf->sc_rset); - INIT_LIST_HEAD(&conf->sc_tgts); - list_add(&conf->sc_list, &sptlrpc_confs); - - CDEBUG(D_SEC, "create sptlrpc conf %s\n", conf->sc_fsname); - return conf; -} - -/** - * caller must hold conf_lock already. - */ -static int sptlrpc_conf_merge_rule(struct sptlrpc_conf *conf, - const char *target, - struct sptlrpc_rule *rule) -{ - struct sptlrpc_conf_tgt *conf_tgt; - struct sptlrpc_rule_set *rule_set; - - /* fsname == target means general rules for the whole fs */ - if (strcmp(conf->sc_fsname, target) == 0) { - rule_set = &conf->sc_rset; - } else { - conf_tgt = sptlrpc_conf_get_tgt(conf, target, 1); - if (conf_tgt) { - rule_set = &conf_tgt->sct_rset; - } else { - CERROR("out of memory, can't merge rule!\n"); - return -ENOMEM; - } - } - - return sptlrpc_rule_set_merge(rule_set, rule); -} - -/** - * process one LCFG_SPTLRPC_CONF record. if \a conf is NULL, we - * find one through the target name in the record inside conf_lock; - * otherwise means caller already hold conf_lock. - */ -static int __sptlrpc_process_config(struct lustre_cfg *lcfg, - struct sptlrpc_conf *conf) -{ - char *target, *param; - char fsname[MTI_NAME_MAXLEN]; - struct sptlrpc_rule rule; - int rc; - - target = lustre_cfg_string(lcfg, 1); - if (!target) { - CERROR("missing target name\n"); - return -EINVAL; - } - - param = lustre_cfg_string(lcfg, 2); - if (!param) { - CERROR("missing parameter\n"); - return -EINVAL; - } - - CDEBUG(D_SEC, "processing rule: %s.%s\n", target, param); - - /* parse rule to make sure the format is correct */ - if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) { - CERROR("Invalid sptlrpc parameter: %s\n", param); - return -EINVAL; - } - param += sizeof(PARAM_SRPC_FLVR) - 1; - - rc = sptlrpc_parse_rule(param, &rule); - if (rc) - return -EINVAL; - - if (!conf) { - target2fsname(target, fsname, sizeof(fsname)); - - mutex_lock(&sptlrpc_conf_lock); - conf = sptlrpc_conf_get(fsname, 0); - if (!conf) { - CERROR("can't find conf\n"); - rc = -ENOMEM; - } else { - rc = sptlrpc_conf_merge_rule(conf, target, &rule); - } - mutex_unlock(&sptlrpc_conf_lock); - } else { - LASSERT(mutex_is_locked(&sptlrpc_conf_lock)); - rc = sptlrpc_conf_merge_rule(conf, target, &rule); - } - - if (rc == 0) - conf->sc_modified++; - - return rc; -} - -int sptlrpc_process_config(struct lustre_cfg *lcfg) -{ - return __sptlrpc_process_config(lcfg, NULL); -} -EXPORT_SYMBOL(sptlrpc_process_config); - -static int logname2fsname(const char *logname, char *buf, int buflen) -{ - char *ptr; - int len; - - ptr = strrchr(logname, '-'); - if (!ptr || strcmp(ptr, "-sptlrpc")) { - CERROR("%s is not a sptlrpc config log\n", logname); - return -EINVAL; - } - - len = min((int)(ptr - logname), buflen - 1); - - memcpy(buf, logname, len); - buf[len] = '\0'; - return 0; -} - -void sptlrpc_conf_log_update_begin(const char *logname) -{ - struct sptlrpc_conf *conf; - char fsname[16]; - - if (logname2fsname(logname, fsname, sizeof(fsname))) - return; - - mutex_lock(&sptlrpc_conf_lock); - - conf = sptlrpc_conf_get(fsname, 0); - if (conf) { - if (conf->sc_local) { - LASSERT(conf->sc_updated == 0); - sptlrpc_conf_free_rsets(conf); - } - conf->sc_modified = 0; - } - - mutex_unlock(&sptlrpc_conf_lock); -} -EXPORT_SYMBOL(sptlrpc_conf_log_update_begin); - -/** - * mark a config log has been updated - */ -void sptlrpc_conf_log_update_end(const char *logname) -{ - struct sptlrpc_conf *conf; - char fsname[16]; - - if (logname2fsname(logname, fsname, sizeof(fsname))) - return; - - mutex_lock(&sptlrpc_conf_lock); - - conf = sptlrpc_conf_get(fsname, 0); - if (conf) { - /* - * if original state is not updated, make sure the - * modified counter > 0 to enforce updating local copy. - */ - if (conf->sc_updated == 0) - conf->sc_modified++; - - conf->sc_updated = 1; - } - - mutex_unlock(&sptlrpc_conf_lock); -} -EXPORT_SYMBOL(sptlrpc_conf_log_update_end); - -void sptlrpc_conf_log_start(const char *logname) -{ - char fsname[16]; - - if (logname2fsname(logname, fsname, sizeof(fsname))) - return; - - mutex_lock(&sptlrpc_conf_lock); - sptlrpc_conf_get(fsname, 1); - mutex_unlock(&sptlrpc_conf_lock); -} -EXPORT_SYMBOL(sptlrpc_conf_log_start); - -void sptlrpc_conf_log_stop(const char *logname) -{ - struct sptlrpc_conf *conf; - char fsname[16]; - - if (logname2fsname(logname, fsname, sizeof(fsname))) - return; - - mutex_lock(&sptlrpc_conf_lock); - conf = sptlrpc_conf_get(fsname, 0); - if (conf) - sptlrpc_conf_free(conf); - mutex_unlock(&sptlrpc_conf_lock); -} -EXPORT_SYMBOL(sptlrpc_conf_log_stop); - -static inline void flavor_set_flags(struct sptlrpc_flavor *sf, - enum lustre_sec_part from, - enum lustre_sec_part to, - unsigned int fl_udesc) -{ - /* - * null flavor doesn't need to set any flavor, and in fact - * we'd better not do that because everybody share a single sec. - */ - if (sf->sf_rpc == SPTLRPC_FLVR_NULL) - return; - - if (from == LUSTRE_SP_MDT) { - /* MDT->MDT; MDT->OST */ - sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY; - } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_OST) { - /* CLI->OST */ - sf->sf_flags |= PTLRPC_SEC_FL_ROOTONLY | PTLRPC_SEC_FL_BULK; - } else if (from == LUSTRE_SP_CLI && to == LUSTRE_SP_MDT) { - /* CLI->MDT */ - if (fl_udesc && sf->sf_rpc != SPTLRPC_FLVR_NULL) - sf->sf_flags |= PTLRPC_SEC_FL_UDESC; - } -} - -void sptlrpc_conf_choose_flavor(enum lustre_sec_part from, - enum lustre_sec_part to, - struct obd_uuid *target, - lnet_nid_t nid, - struct sptlrpc_flavor *sf) -{ - struct sptlrpc_conf *conf; - struct sptlrpc_conf_tgt *conf_tgt; - char name[MTI_NAME_MAXLEN]; - int len, rc = 0; - - target2fsname(target->uuid, name, sizeof(name)); - - mutex_lock(&sptlrpc_conf_lock); - - conf = sptlrpc_conf_get(name, 0); - if (!conf) - goto out; - - /* convert uuid name (supposed end with _UUID) to target name */ - len = strlen(target->uuid); - LASSERT(len > 5); - memcpy(name, target->uuid, len - 5); - name[len - 5] = '\0'; - - conf_tgt = sptlrpc_conf_get_tgt(conf, name, 0); - if (conf_tgt) { - rc = sptlrpc_rule_set_choose(&conf_tgt->sct_rset, - from, to, nid, sf); - if (rc) - goto out; - } - - rc = sptlrpc_rule_set_choose(&conf->sc_rset, from, to, nid, sf); -out: - mutex_unlock(&sptlrpc_conf_lock); - - if (rc == 0) - get_default_flavor(sf); - - flavor_set_flags(sf, from, to, 1); -} - -#define SEC_ADAPT_DELAY (10) - -/** - * called by client devices, notify the sptlrpc config has changed and - * do import_sec_adapt later. - */ -void sptlrpc_conf_client_adapt(struct obd_device *obd) -{ - struct obd_import *imp; - - LASSERT(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || - strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0); - CDEBUG(D_SEC, "obd %s\n", obd->u.cli.cl_target_uuid.uuid); - - /* serialize with connect/disconnect import */ - down_read_nested(&obd->u.cli.cl_sem, OBD_CLI_SEM_MDCOSC); - - imp = obd->u.cli.cl_import; - if (imp) { - spin_lock(&imp->imp_lock); - if (imp->imp_sec) - imp->imp_sec_expire = ktime_get_real_seconds() + - SEC_ADAPT_DELAY; - spin_unlock(&imp->imp_lock); - } - - up_read(&obd->u.cli.cl_sem); -} -EXPORT_SYMBOL(sptlrpc_conf_client_adapt); - -int sptlrpc_conf_init(void) -{ - mutex_init(&sptlrpc_conf_lock); - return 0; -} - -void sptlrpc_conf_fini(void) -{ - struct sptlrpc_conf *conf, *conf_next; - - mutex_lock(&sptlrpc_conf_lock); - list_for_each_entry_safe(conf, conf_next, &sptlrpc_confs, sc_list) { - sptlrpc_conf_free(conf); - } - LASSERT(list_empty(&sptlrpc_confs)); - mutex_unlock(&sptlrpc_conf_lock); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c deleted file mode 100644 index 2c8bad7b7877..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec_gc.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include - -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -#define SEC_GC_INTERVAL (30 * 60) - -static struct mutex sec_gc_mutex; -static LIST_HEAD(sec_gc_list); -static spinlock_t sec_gc_list_lock; - -static LIST_HEAD(sec_gc_ctx_list); -static spinlock_t sec_gc_ctx_list_lock; - -static atomic_t sec_gc_wait_del = ATOMIC_INIT(0); - -void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec) -{ - LASSERT(sec->ps_policy->sp_cops->gc_ctx); - LASSERT(sec->ps_gc_interval > 0); - LASSERT(list_empty(&sec->ps_gc_list)); - - sec->ps_gc_next = ktime_get_real_seconds() + sec->ps_gc_interval; - - spin_lock(&sec_gc_list_lock); - list_add_tail(&sec->ps_gc_list, &sec_gc_list); - spin_unlock(&sec_gc_list_lock); - - CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name); -} - -void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec) -{ - if (list_empty(&sec->ps_gc_list)) - return; - - might_sleep(); - - /* signal before list_del to make iteration in gc thread safe */ - atomic_inc(&sec_gc_wait_del); - - spin_lock(&sec_gc_list_lock); - list_del_init(&sec->ps_gc_list); - spin_unlock(&sec_gc_list_lock); - - /* barrier */ - mutex_lock(&sec_gc_mutex); - mutex_unlock(&sec_gc_mutex); - - atomic_dec(&sec_gc_wait_del); - - CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name); -} - -static void sec_process_ctx_list(void) -{ - struct ptlrpc_cli_ctx *ctx; - - spin_lock(&sec_gc_ctx_list_lock); - - while (!list_empty(&sec_gc_ctx_list)) { - ctx = list_entry(sec_gc_ctx_list.next, - struct ptlrpc_cli_ctx, cc_gc_chain); - list_del_init(&ctx->cc_gc_chain); - spin_unlock(&sec_gc_ctx_list_lock); - - LASSERT(ctx->cc_sec); - LASSERT(atomic_read(&ctx->cc_refcount) == 1); - CDEBUG(D_SEC, "gc pick up ctx %p(%u->%s)\n", - ctx, ctx->cc_vcred.vc_uid, sec2target_str(ctx->cc_sec)); - sptlrpc_cli_ctx_put(ctx, 1); - - spin_lock(&sec_gc_ctx_list_lock); - } - - spin_unlock(&sec_gc_ctx_list_lock); -} - -static void sec_do_gc(struct ptlrpc_sec *sec) -{ - LASSERT(sec->ps_policy->sp_cops->gc_ctx); - - if (unlikely(sec->ps_gc_next == 0)) { - CDEBUG(D_SEC, "sec %p(%s) has 0 gc time\n", - sec, sec->ps_policy->sp_name); - return; - } - - CDEBUG(D_SEC, "check on sec %p(%s)\n", sec, sec->ps_policy->sp_name); - - if (sec->ps_gc_next > ktime_get_real_seconds()) - return; - - sec->ps_policy->sp_cops->gc_ctx(sec); - sec->ps_gc_next = ktime_get_real_seconds() + sec->ps_gc_interval; -} - -static void sec_gc_main(struct work_struct *ws); -static DECLARE_DELAYED_WORK(sec_gc_work, sec_gc_main); - -static void sec_gc_main(struct work_struct *ws) -{ - struct ptlrpc_sec *sec; - - sec_process_ctx_list(); -again: - /* go through sec list do gc. - * FIXME here we iterate through the whole list each time which - * is not optimal. we perhaps want to use balanced binary tree - * to trace each sec as order of expiry time. - * another issue here is we wakeup as fixed interval instead of - * according to each sec's expiry time - */ - mutex_lock(&sec_gc_mutex); - list_for_each_entry(sec, &sec_gc_list, ps_gc_list) { - /* if someone is waiting to be deleted, let it - * proceed as soon as possible. - */ - if (atomic_read(&sec_gc_wait_del)) { - CDEBUG(D_SEC, "deletion pending, start over\n"); - mutex_unlock(&sec_gc_mutex); - goto again; - } - - sec_do_gc(sec); - } - mutex_unlock(&sec_gc_mutex); - - /* check ctx list again before sleep */ - sec_process_ctx_list(); - schedule_delayed_work(&sec_gc_work, SEC_GC_INTERVAL * HZ); -} - -int sptlrpc_gc_init(void) -{ - mutex_init(&sec_gc_mutex); - spin_lock_init(&sec_gc_list_lock); - spin_lock_init(&sec_gc_ctx_list_lock); - - schedule_delayed_work(&sec_gc_work, 0); - return 0; -} - -void sptlrpc_gc_fini(void) -{ - cancel_delayed_work_sync(&sec_gc_work); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c deleted file mode 100644 index 2bb75ebd5d98..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_lproc.c +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec_lproc.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -static char *sec_flags2str(unsigned long flags, char *buf, int bufsize) -{ - buf[0] = '\0'; - - if (flags & PTLRPC_SEC_FL_REVERSE) - strlcat(buf, "reverse,", bufsize); - if (flags & PTLRPC_SEC_FL_ROOTONLY) - strlcat(buf, "rootonly,", bufsize); - if (flags & PTLRPC_SEC_FL_UDESC) - strlcat(buf, "udesc,", bufsize); - if (flags & PTLRPC_SEC_FL_BULK) - strlcat(buf, "bulk,", bufsize); - if (buf[0] == '\0') - strlcat(buf, "-,", bufsize); - - return buf; -} - -static int sptlrpc_info_lprocfs_seq_show(struct seq_file *seq, void *v) -{ - struct obd_device *dev = seq->private; - struct client_obd *cli = &dev->u.cli; - struct ptlrpc_sec *sec = NULL; - char str[32]; - - LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || - strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || - strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); - - if (cli->cl_import) - sec = sptlrpc_import_sec_ref(cli->cl_import); - if (!sec) - goto out; - - sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str)); - - seq_printf(seq, "rpc flavor: %s\n", - sptlrpc_flavor2name_base(sec->ps_flvr.sf_rpc)); - seq_printf(seq, "bulk flavor: %s\n", - sptlrpc_flavor2name_bulk(&sec->ps_flvr, str, sizeof(str))); - seq_printf(seq, "flags: %s\n", - sec_flags2str(sec->ps_flvr.sf_flags, str, sizeof(str))); - seq_printf(seq, "id: %d\n", sec->ps_id); - seq_printf(seq, "refcount: %d\n", - atomic_read(&sec->ps_refcount)); - seq_printf(seq, "nctx: %d\n", atomic_read(&sec->ps_nctx)); - seq_printf(seq, "gc internal %ld\n", sec->ps_gc_interval); - seq_printf(seq, "gc next %lld\n", - sec->ps_gc_interval ? - (s64)(sec->ps_gc_next - ktime_get_real_seconds()) : 0ll); - - sptlrpc_sec_put(sec); -out: - return 0; -} - -LPROC_SEQ_FOPS_RO(sptlrpc_info_lprocfs); - -static int sptlrpc_ctxs_lprocfs_seq_show(struct seq_file *seq, void *v) -{ - struct obd_device *dev = seq->private; - struct client_obd *cli = &dev->u.cli; - struct ptlrpc_sec *sec = NULL; - - LASSERT(strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) == 0 || - strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) == 0 || - strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) == 0); - - if (cli->cl_import) - sec = sptlrpc_import_sec_ref(cli->cl_import); - if (!sec) - goto out; - - if (sec->ps_policy->sp_cops->display) - sec->ps_policy->sp_cops->display(sec, seq); - - sptlrpc_sec_put(sec); -out: - return 0; -} - -LPROC_SEQ_FOPS_RO(sptlrpc_ctxs_lprocfs); - -int sptlrpc_lprocfs_cliobd_attach(struct obd_device *dev) -{ - if (strcmp(dev->obd_type->typ_name, LUSTRE_OSC_NAME) != 0 && - strcmp(dev->obd_type->typ_name, LUSTRE_MDC_NAME) != 0 && - strcmp(dev->obd_type->typ_name, LUSTRE_MGC_NAME) != 0) { - CERROR("can't register lproc for obd type %s\n", - dev->obd_type->typ_name); - return -EINVAL; - } - - debugfs_create_file("srpc_info", 0444, dev->obd_debugfs_entry, dev, - &sptlrpc_info_lprocfs_fops); - debugfs_create_file("srpc_contexts", 0444, dev->obd_debugfs_entry, dev, - &sptlrpc_ctxs_lprocfs_fops); - - return 0; -} -EXPORT_SYMBOL(sptlrpc_lprocfs_cliobd_attach); - -LPROC_SEQ_FOPS_RO(sptlrpc_proc_enc_pool); -static struct lprocfs_vars sptlrpc_lprocfs_vars[] = { - { "encrypt_page_pools", &sptlrpc_proc_enc_pool_fops }, - { NULL } -}; - -static struct dentry *sptlrpc_debugfs_dir; - -void sptlrpc_lproc_init(void) -{ - sptlrpc_debugfs_dir = debugfs_create_dir("sptlrpc", debugfs_lustre_root); - ldebugfs_add_vars(sptlrpc_debugfs_dir, sptlrpc_lprocfs_vars, NULL); -} - -void sptlrpc_lproc_fini(void) -{ - debugfs_remove_recursive(sptlrpc_debugfs_dir); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c deleted file mode 100644 index ecc387d1b9b4..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c +++ /dev/null @@ -1,459 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec_null.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include -#include -#include -#include -#include - -#include "ptlrpc_internal.h" - -static struct ptlrpc_sec_policy null_policy; -static struct ptlrpc_sec null_sec; -static struct ptlrpc_cli_ctx null_cli_ctx; -static struct ptlrpc_svc_ctx null_svc_ctx; - -/* - * we can temporarily use the topmost 8-bits of lm_secflvr to identify - * the source sec part. - */ -static inline -void null_encode_sec_part(struct lustre_msg *msg, enum lustre_sec_part sp) -{ - msg->lm_secflvr |= (((__u32)sp) & 0xFF) << 24; -} - -static inline -enum lustre_sec_part null_decode_sec_part(struct lustre_msg *msg) -{ - return (msg->lm_secflvr >> 24) & 0xFF; -} - -static int null_ctx_refresh(struct ptlrpc_cli_ctx *ctx) -{ - /* should never reach here */ - LBUG(); - return 0; -} - -static -int null_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) -{ - req->rq_reqbuf->lm_secflvr = SPTLRPC_FLVR_NULL; - - if (!req->rq_import->imp_dlm_fake) { - struct obd_device *obd = req->rq_import->imp_obd; - - null_encode_sec_part(req->rq_reqbuf, - obd->u.cli.cl_sp_me); - } - req->rq_reqdata_len = req->rq_reqlen; - return 0; -} - -static -int null_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) -{ - __u32 cksums, cksumc; - - LASSERT(req->rq_repdata); - - req->rq_repmsg = req->rq_repdata; - req->rq_replen = req->rq_repdata_len; - - if (req->rq_early) { - cksums = lustre_msg_get_cksum(req->rq_repdata); - cksumc = lustre_msg_calc_cksum(req->rq_repmsg); - if (cksumc != cksums) { - CDEBUG(D_SEC, - "early reply checksum mismatch: %08x != %08x\n", - cksumc, cksums); - return -EINVAL; - } - } - - return 0; -} - -static -struct ptlrpc_sec *null_create_sec(struct obd_import *imp, - struct ptlrpc_svc_ctx *svc_ctx, - struct sptlrpc_flavor *sf) -{ - LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_NULL); - - /* general layer has take a module reference for us, because we never - * really destroy the sec, simply release the reference here. - */ - sptlrpc_policy_put(&null_policy); - return &null_sec; -} - -static -void null_destroy_sec(struct ptlrpc_sec *sec) -{ - LASSERT(sec == &null_sec); -} - -static -struct ptlrpc_cli_ctx *null_lookup_ctx(struct ptlrpc_sec *sec, - struct vfs_cred *vcred, - int create, int remove_dead) -{ - atomic_inc(&null_cli_ctx.cc_refcount); - return &null_cli_ctx; -} - -static -int null_flush_ctx_cache(struct ptlrpc_sec *sec, - uid_t uid, - int grace, int force) -{ - return 0; -} - -static -int null_alloc_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int msgsize) -{ - if (!req->rq_reqbuf) { - int alloc_size = size_roundup_power2(msgsize); - - LASSERT(!req->rq_pool); - req->rq_reqbuf = kvzalloc(alloc_size, GFP_NOFS); - if (!req->rq_reqbuf) - return -ENOMEM; - - req->rq_reqbuf_len = alloc_size; - } else { - LASSERT(req->rq_pool); - LASSERT(req->rq_reqbuf_len >= msgsize); - memset(req->rq_reqbuf, 0, msgsize); - } - - req->rq_reqmsg = req->rq_reqbuf; - return 0; -} - -static -void null_free_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req) -{ - if (!req->rq_pool) { - LASSERTF(req->rq_reqmsg == req->rq_reqbuf, - "req %p: reqmsg %p is not reqbuf %p in null sec\n", - req, req->rq_reqmsg, req->rq_reqbuf); - LASSERTF(req->rq_reqbuf_len >= req->rq_reqlen, - "req %p: reqlen %d should smaller than buflen %d\n", - req, req->rq_reqlen, req->rq_reqbuf_len); - - kvfree(req->rq_reqbuf); - req->rq_reqbuf = NULL; - req->rq_reqbuf_len = 0; - } -} - -static -int null_alloc_repbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int msgsize) -{ - /* add space for early replied */ - msgsize += lustre_msg_early_size(); - - msgsize = size_roundup_power2(msgsize); - - req->rq_repbuf = kvzalloc(msgsize, GFP_NOFS); - if (!req->rq_repbuf) - return -ENOMEM; - - req->rq_repbuf_len = msgsize; - return 0; -} - -static -void null_free_repbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req) -{ - LASSERT(req->rq_repbuf); - - kvfree(req->rq_repbuf); - req->rq_repbuf = NULL; - req->rq_repbuf_len = 0; -} - -static -int null_enlarge_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int segment, int newsize) -{ - struct lustre_msg *newbuf; - struct lustre_msg *oldbuf = req->rq_reqmsg; - int oldsize, newmsg_size, alloc_size; - - LASSERT(req->rq_reqbuf); - LASSERT(req->rq_reqbuf == req->rq_reqmsg); - LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); - LASSERT(req->rq_reqlen == lustre_packed_msg_size(oldbuf)); - - /* compute new message size */ - oldsize = req->rq_reqbuf->lm_buflens[segment]; - req->rq_reqbuf->lm_buflens[segment] = newsize; - newmsg_size = lustre_packed_msg_size(oldbuf); - req->rq_reqbuf->lm_buflens[segment] = oldsize; - - /* request from pool should always have enough buffer */ - LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newmsg_size); - - if (req->rq_reqbuf_len < newmsg_size) { - alloc_size = size_roundup_power2(newmsg_size); - - newbuf = kvzalloc(alloc_size, GFP_NOFS); - if (!newbuf) - return -ENOMEM; - - /* Must lock this, so that otherwise unprotected change of - * rq_reqmsg is not racing with parallel processing of - * imp_replay_list traversing threads. See LU-3333 - * This is a bandaid at best, we really need to deal with this - * in request enlarging code before unpacking that's already - * there - */ - if (req->rq_import) - spin_lock(&req->rq_import->imp_lock); - memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen); - - kvfree(req->rq_reqbuf); - req->rq_reqbuf = newbuf; - req->rq_reqmsg = newbuf; - req->rq_reqbuf_len = alloc_size; - - if (req->rq_import) - spin_unlock(&req->rq_import->imp_lock); - } - - _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); - req->rq_reqlen = newmsg_size; - - return 0; -} - -static struct ptlrpc_svc_ctx null_svc_ctx = { - .sc_refcount = ATOMIC_INIT(1), - .sc_policy = &null_policy, -}; - -static -int null_accept(struct ptlrpc_request *req) -{ - LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == - SPTLRPC_POLICY_NULL); - - if (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL) { - CERROR("Invalid rpc flavor 0x%x\n", req->rq_flvr.sf_rpc); - return SECSVC_DROP; - } - - req->rq_sp_from = null_decode_sec_part(req->rq_reqbuf); - - req->rq_reqmsg = req->rq_reqbuf; - req->rq_reqlen = req->rq_reqdata_len; - - req->rq_svc_ctx = &null_svc_ctx; - atomic_inc(&req->rq_svc_ctx->sc_refcount); - - return SECSVC_OK; -} - -static -int null_alloc_rs(struct ptlrpc_request *req, int msgsize) -{ - struct ptlrpc_reply_state *rs; - int rs_size = sizeof(*rs) + msgsize; - - LASSERT(msgsize % 8 == 0); - - rs = req->rq_reply_state; - - if (rs) { - /* pre-allocated */ - LASSERT(rs->rs_size >= rs_size); - } else { - rs = kvzalloc(rs_size, GFP_NOFS); - if (!rs) - return -ENOMEM; - - rs->rs_size = rs_size; - } - - rs->rs_svc_ctx = req->rq_svc_ctx; - atomic_inc(&req->rq_svc_ctx->sc_refcount); - - rs->rs_repbuf = (struct lustre_msg *)(rs + 1); - rs->rs_repbuf_len = rs_size - sizeof(*rs); - rs->rs_msg = rs->rs_repbuf; - - req->rq_reply_state = rs; - return 0; -} - -static -void null_free_rs(struct ptlrpc_reply_state *rs) -{ - LASSERT_ATOMIC_GT(&rs->rs_svc_ctx->sc_refcount, 1); - atomic_dec(&rs->rs_svc_ctx->sc_refcount); - - if (!rs->rs_prealloc) - kvfree(rs); -} - -static -int null_authorize(struct ptlrpc_request *req) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - - LASSERT(rs); - - rs->rs_repbuf->lm_secflvr = SPTLRPC_FLVR_NULL; - rs->rs_repdata_len = req->rq_replen; - - if (likely(req->rq_packed_final)) { - if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) - req->rq_reply_off = lustre_msg_early_size(); - else - req->rq_reply_off = 0; - } else { - __u32 cksum; - - cksum = lustre_msg_calc_cksum(rs->rs_repbuf); - lustre_msg_set_cksum(rs->rs_repbuf, cksum); - req->rq_reply_off = 0; - } - - return 0; -} - -static struct ptlrpc_ctx_ops null_ctx_ops = { - .refresh = null_ctx_refresh, - .sign = null_ctx_sign, - .verify = null_ctx_verify, -}; - -static struct ptlrpc_sec_cops null_sec_cops = { - .create_sec = null_create_sec, - .destroy_sec = null_destroy_sec, - .lookup_ctx = null_lookup_ctx, - .flush_ctx_cache = null_flush_ctx_cache, - .alloc_reqbuf = null_alloc_reqbuf, - .alloc_repbuf = null_alloc_repbuf, - .free_reqbuf = null_free_reqbuf, - .free_repbuf = null_free_repbuf, - .enlarge_reqbuf = null_enlarge_reqbuf, -}; - -static struct ptlrpc_sec_sops null_sec_sops = { - .accept = null_accept, - .alloc_rs = null_alloc_rs, - .authorize = null_authorize, - .free_rs = null_free_rs, -}; - -static struct ptlrpc_sec_policy null_policy = { - .sp_owner = THIS_MODULE, - .sp_name = "sec.null", - .sp_policy = SPTLRPC_POLICY_NULL, - .sp_cops = &null_sec_cops, - .sp_sops = &null_sec_sops, -}; - -static void null_init_internal(void) -{ - static HLIST_HEAD(__list); - - null_sec.ps_policy = &null_policy; - atomic_set(&null_sec.ps_refcount, 1); /* always busy */ - null_sec.ps_id = -1; - null_sec.ps_import = NULL; - null_sec.ps_flvr.sf_rpc = SPTLRPC_FLVR_NULL; - null_sec.ps_flvr.sf_flags = 0; - null_sec.ps_part = LUSTRE_SP_ANY; - null_sec.ps_dying = 0; - spin_lock_init(&null_sec.ps_lock); - atomic_set(&null_sec.ps_nctx, 1); /* for "null_cli_ctx" */ - INIT_LIST_HEAD(&null_sec.ps_gc_list); - null_sec.ps_gc_interval = 0; - null_sec.ps_gc_next = 0; - - hlist_add_head(&null_cli_ctx.cc_cache, &__list); - atomic_set(&null_cli_ctx.cc_refcount, 1); /* for hash */ - null_cli_ctx.cc_sec = &null_sec; - null_cli_ctx.cc_ops = &null_ctx_ops; - null_cli_ctx.cc_expire = 0; - null_cli_ctx.cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_ETERNAL | - PTLRPC_CTX_UPTODATE; - null_cli_ctx.cc_vcred.vc_uid = 0; - spin_lock_init(&null_cli_ctx.cc_lock); - INIT_LIST_HEAD(&null_cli_ctx.cc_req_list); - INIT_LIST_HEAD(&null_cli_ctx.cc_gc_chain); -} - -int sptlrpc_null_init(void) -{ - int rc; - - null_init_internal(); - - rc = sptlrpc_register_policy(&null_policy); - if (rc) - CERROR("failed to register %s: %d\n", null_policy.sp_name, rc); - - return rc; -} - -void sptlrpc_null_fini(void) -{ - int rc; - - rc = sptlrpc_unregister_policy(&null_policy); - if (rc) - CERROR("failed to unregister %s: %d\n", - null_policy.sp_name, rc); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c deleted file mode 100644 index ec3d9af76b17..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ /dev/null @@ -1,1023 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lustre/ptlrpc/sec_plain.c - * - * Author: Eric Mei - */ - -#define DEBUG_SUBSYSTEM S_SEC - -#include -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -struct plain_sec { - struct ptlrpc_sec pls_base; - rwlock_t pls_lock; - struct ptlrpc_cli_ctx *pls_ctx; -}; - -static inline struct plain_sec *sec2plsec(struct ptlrpc_sec *sec) -{ - return container_of(sec, struct plain_sec, pls_base); -} - -static struct ptlrpc_sec_policy plain_policy; -static struct ptlrpc_ctx_ops plain_ctx_ops; -static struct ptlrpc_svc_ctx plain_svc_ctx; - -static unsigned int plain_at_offset; - -/* - * for simplicity, plain policy rpc use fixed layout. - */ -#define PLAIN_PACK_SEGMENTS (4) - -#define PLAIN_PACK_HDR_OFF (0) -#define PLAIN_PACK_MSG_OFF (1) -#define PLAIN_PACK_USER_OFF (2) -#define PLAIN_PACK_BULK_OFF (3) - -#define PLAIN_FL_USER (0x01) -#define PLAIN_FL_BULK (0x02) - -struct plain_header { - __u8 ph_ver; /* 0 */ - __u8 ph_flags; - __u8 ph_sp; /* source */ - __u8 ph_bulk_hash_alg; /* complete flavor desc */ - __u8 ph_pad[4]; -}; - -struct plain_bulk_token { - __u8 pbt_hash[8]; -}; - -#define PLAIN_BSD_SIZE \ - (sizeof(struct ptlrpc_bulk_sec_desc) + sizeof(struct plain_bulk_token)) - -/**************************************** - * bulk checksum helpers * - ****************************************/ - -static int plain_unpack_bsd(struct lustre_msg *msg, int swabbed) -{ - struct ptlrpc_bulk_sec_desc *bsd; - - if (bulk_sec_desc_unpack(msg, PLAIN_PACK_BULK_OFF, swabbed)) - return -EPROTO; - - bsd = lustre_msg_buf(msg, PLAIN_PACK_BULK_OFF, PLAIN_BSD_SIZE); - if (!bsd) { - CERROR("bulk sec desc has short size %d\n", - lustre_msg_buflen(msg, PLAIN_PACK_BULK_OFF)); - return -EPROTO; - } - - if (bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && - bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG) { - CERROR("invalid bulk svc %u\n", bsd->bsd_svc); - return -EPROTO; - } - - return 0; -} - -static int plain_generate_bulk_csum(struct ptlrpc_bulk_desc *desc, - __u8 hash_alg, - struct plain_bulk_token *token) -{ - if (hash_alg == BULK_HASH_ALG_NULL) - return 0; - - memset(token->pbt_hash, 0, sizeof(token->pbt_hash)); - return sptlrpc_get_bulk_checksum(desc, hash_alg, token->pbt_hash, - sizeof(token->pbt_hash)); -} - -static int plain_verify_bulk_csum(struct ptlrpc_bulk_desc *desc, - __u8 hash_alg, - struct plain_bulk_token *tokenr) -{ - struct plain_bulk_token tokenv; - int rc; - - if (hash_alg == BULK_HASH_ALG_NULL) - return 0; - - memset(&tokenv.pbt_hash, 0, sizeof(tokenv.pbt_hash)); - rc = sptlrpc_get_bulk_checksum(desc, hash_alg, tokenv.pbt_hash, - sizeof(tokenv.pbt_hash)); - if (rc) - return rc; - - if (memcmp(tokenr->pbt_hash, tokenv.pbt_hash, sizeof(tokenr->pbt_hash))) - return -EACCES; - return 0; -} - -static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc) -{ - char *ptr; - unsigned int off, i; - - LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); - - for (i = 0; i < desc->bd_iov_count; i++) { - if (!BD_GET_KIOV(desc, i).bv_len) - continue; - - ptr = kmap(BD_GET_KIOV(desc, i).bv_page); - off = BD_GET_KIOV(desc, i).bv_offset & ~PAGE_MASK; - ptr[off] ^= 0x1; - kunmap(BD_GET_KIOV(desc, i).bv_page); - return; - } -} - -/**************************************** - * cli_ctx apis * - ****************************************/ - -static -int plain_ctx_refresh(struct ptlrpc_cli_ctx *ctx) -{ - /* should never reach here */ - LBUG(); - return 0; -} - -static -int plain_ctx_validate(struct ptlrpc_cli_ctx *ctx) -{ - return 0; -} - -static -int plain_ctx_sign(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) -{ - struct lustre_msg *msg = req->rq_reqbuf; - struct plain_header *phdr; - - msg->lm_secflvr = req->rq_flvr.sf_rpc; - - phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0); - phdr->ph_ver = 0; - phdr->ph_flags = 0; - phdr->ph_sp = ctx->cc_sec->ps_part; - phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg; - - if (req->rq_pack_udesc) - phdr->ph_flags |= PLAIN_FL_USER; - if (req->rq_pack_bulk) - phdr->ph_flags |= PLAIN_FL_BULK; - - req->rq_reqdata_len = lustre_msg_size_v2(msg->lm_bufcount, - msg->lm_buflens); - return 0; -} - -static -int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req) -{ - struct lustre_msg *msg = req->rq_repdata; - struct plain_header *phdr; - __u32 cksum; - int swabbed; - - if (msg->lm_bufcount != PLAIN_PACK_SEGMENTS) { - CERROR("unexpected reply buf count %u\n", msg->lm_bufcount); - return -EPROTO; - } - - swabbed = ptlrpc_rep_need_swab(req); - - phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr)); - if (!phdr) { - CERROR("missing plain header\n"); - return -EPROTO; - } - - if (phdr->ph_ver != 0) { - CERROR("Invalid header version\n"); - return -EPROTO; - } - - /* expect no user desc in reply */ - if (phdr->ph_flags & PLAIN_FL_USER) { - CERROR("Unexpected udesc flag in reply\n"); - return -EPROTO; - } - - if (phdr->ph_bulk_hash_alg != req->rq_flvr.u_bulk.hash.hash_alg) { - CERROR("reply bulk flavor %u != %u\n", phdr->ph_bulk_hash_alg, - req->rq_flvr.u_bulk.hash.hash_alg); - return -EPROTO; - } - - if (unlikely(req->rq_early)) { - unsigned int hsize = 4; - - cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, - lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, - 0), - lustre_msg_buflen(msg, - PLAIN_PACK_MSG_OFF), - NULL, 0, (unsigned char *)&cksum, - &hsize); - if (cksum != msg->lm_cksum) { - CDEBUG(D_SEC, - "early reply checksum mismatch: %08x != %08x\n", - cpu_to_le32(cksum), msg->lm_cksum); - return -EINVAL; - } - } else { - /* whether we sent with bulk or not, we expect the same - * in reply, except for early reply - */ - if (!req->rq_early && - !equi(req->rq_pack_bulk == 1, - phdr->ph_flags & PLAIN_FL_BULK)) { - CERROR("%s bulk checksum in reply\n", - req->rq_pack_bulk ? "Missing" : "Unexpected"); - return -EPROTO; - } - - if (phdr->ph_flags & PLAIN_FL_BULK) { - if (plain_unpack_bsd(msg, swabbed)) - return -EPROTO; - } - } - - req->rq_repmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0); - req->rq_replen = lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF); - return 0; -} - -static -int plain_cli_wrap_bulk(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_bulk_sec_desc *bsd; - struct plain_bulk_token *token; - int rc; - - LASSERT(req->rq_pack_bulk); - LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS); - - bsd = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); - token = (struct plain_bulk_token *)bsd->bsd_data; - - bsd->bsd_version = 0; - bsd->bsd_flags = 0; - bsd->bsd_type = SPTLRPC_BULK_DEFAULT; - bsd->bsd_svc = SPTLRPC_FLVR_BULK_SVC(req->rq_flvr.sf_rpc); - - if (bsd->bsd_svc == SPTLRPC_BULK_SVC_NULL) - return 0; - - if (req->rq_bulk_read) - return 0; - - rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, - token); - if (rc) { - CERROR("bulk write: failed to compute checksum: %d\n", rc); - } else { - /* - * for sending we only compute the wrong checksum instead - * of corrupting the data so it is still correct on a redo - */ - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND) && - req->rq_flvr.u_bulk.hash.hash_alg != BULK_HASH_ALG_NULL) - token->pbt_hash[0] ^= 0x1; - } - - return rc; -} - -static -int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx, - struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_bulk_sec_desc *bsdv; - struct plain_bulk_token *tokenv; - int rc; - int i, nob; - - LASSERT(req->rq_pack_bulk); - LASSERT(req->rq_reqbuf->lm_bufcount == PLAIN_PACK_SEGMENTS); - LASSERT(req->rq_repdata->lm_bufcount == PLAIN_PACK_SEGMENTS); - - bsdv = lustre_msg_buf(req->rq_repdata, PLAIN_PACK_BULK_OFF, 0); - tokenv = (struct plain_bulk_token *)bsdv->bsd_data; - - if (req->rq_bulk_write) { - if (bsdv->bsd_flags & BSD_FL_ERR) - return -EIO; - return 0; - } - - /* fix the actual data size */ - for (i = 0, nob = 0; i < desc->bd_iov_count; i++) { - struct bio_vec bv_desc = BD_GET_KIOV(desc, i); - - if (bv_desc.bv_len + nob > desc->bd_nob_transferred) - bv_desc.bv_len = desc->bd_nob_transferred - nob; - nob += bv_desc.bv_len; - } - - rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, - tokenv); - if (rc) - CERROR("bulk read: client verify failed: %d\n", rc); - - return rc; -} - -/**************************************** - * sec apis * - ****************************************/ - -static -struct ptlrpc_cli_ctx *plain_sec_install_ctx(struct plain_sec *plsec) -{ - struct ptlrpc_cli_ctx *ctx, *ctx_new; - - ctx_new = kzalloc(sizeof(*ctx_new), GFP_NOFS); - - write_lock(&plsec->pls_lock); - - ctx = plsec->pls_ctx; - if (ctx) { - atomic_inc(&ctx->cc_refcount); - - kfree(ctx_new); - } else if (ctx_new) { - ctx = ctx_new; - - atomic_set(&ctx->cc_refcount, 1); /* for cache */ - ctx->cc_sec = &plsec->pls_base; - ctx->cc_ops = &plain_ctx_ops; - ctx->cc_expire = 0; - ctx->cc_flags = PTLRPC_CTX_CACHED | PTLRPC_CTX_UPTODATE; - ctx->cc_vcred.vc_uid = 0; - spin_lock_init(&ctx->cc_lock); - INIT_LIST_HEAD(&ctx->cc_req_list); - INIT_LIST_HEAD(&ctx->cc_gc_chain); - - plsec->pls_ctx = ctx; - atomic_inc(&plsec->pls_base.ps_nctx); - atomic_inc(&plsec->pls_base.ps_refcount); - - atomic_inc(&ctx->cc_refcount); /* for caller */ - } - - write_unlock(&plsec->pls_lock); - - return ctx; -} - -static -void plain_destroy_sec(struct ptlrpc_sec *sec) -{ - struct plain_sec *plsec = sec2plsec(sec); - - LASSERT(sec->ps_policy == &plain_policy); - LASSERT(sec->ps_import); - LASSERT(atomic_read(&sec->ps_refcount) == 0); - LASSERT(atomic_read(&sec->ps_nctx) == 0); - LASSERT(!plsec->pls_ctx); - - class_import_put(sec->ps_import); - - kfree(plsec); -} - -static -void plain_kill_sec(struct ptlrpc_sec *sec) -{ - sec->ps_dying = 1; -} - -static -struct ptlrpc_sec *plain_create_sec(struct obd_import *imp, - struct ptlrpc_svc_ctx *svc_ctx, - struct sptlrpc_flavor *sf) -{ - struct plain_sec *plsec; - struct ptlrpc_sec *sec; - struct ptlrpc_cli_ctx *ctx; - - LASSERT(SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN); - - plsec = kzalloc(sizeof(*plsec), GFP_NOFS); - if (!plsec) - return NULL; - - /* - * initialize plain_sec - */ - rwlock_init(&plsec->pls_lock); - plsec->pls_ctx = NULL; - - sec = &plsec->pls_base; - sec->ps_policy = &plain_policy; - atomic_set(&sec->ps_refcount, 0); - atomic_set(&sec->ps_nctx, 0); - sec->ps_id = sptlrpc_get_next_secid(); - sec->ps_import = class_import_get(imp); - sec->ps_flvr = *sf; - spin_lock_init(&sec->ps_lock); - INIT_LIST_HEAD(&sec->ps_gc_list); - sec->ps_gc_interval = 0; - sec->ps_gc_next = 0; - - /* install ctx immediately if this is a reverse sec */ - if (svc_ctx) { - ctx = plain_sec_install_ctx(plsec); - if (!ctx) { - plain_destroy_sec(sec); - return NULL; - } - sptlrpc_cli_ctx_put(ctx, 1); - } - - return sec; -} - -static -struct ptlrpc_cli_ctx *plain_lookup_ctx(struct ptlrpc_sec *sec, - struct vfs_cred *vcred, - int create, int remove_dead) -{ - struct plain_sec *plsec = sec2plsec(sec); - struct ptlrpc_cli_ctx *ctx; - - read_lock(&plsec->pls_lock); - ctx = plsec->pls_ctx; - if (ctx) - atomic_inc(&ctx->cc_refcount); - read_unlock(&plsec->pls_lock); - - if (unlikely(!ctx)) - ctx = plain_sec_install_ctx(plsec); - - return ctx; -} - -static -void plain_release_ctx(struct ptlrpc_sec *sec, - struct ptlrpc_cli_ctx *ctx, int sync) -{ - LASSERT(atomic_read(&sec->ps_refcount) > 0); - LASSERT(atomic_read(&sec->ps_nctx) > 0); - LASSERT(atomic_read(&ctx->cc_refcount) == 0); - LASSERT(ctx->cc_sec == sec); - - kfree(ctx); - - atomic_dec(&sec->ps_nctx); - sptlrpc_sec_put(sec); -} - -static -int plain_flush_ctx_cache(struct ptlrpc_sec *sec, - uid_t uid, int grace, int force) -{ - struct plain_sec *plsec = sec2plsec(sec); - struct ptlrpc_cli_ctx *ctx; - - /* do nothing unless caller want to flush for 'all' */ - if (uid != -1) - return 0; - - write_lock(&plsec->pls_lock); - ctx = plsec->pls_ctx; - plsec->pls_ctx = NULL; - write_unlock(&plsec->pls_lock); - - if (ctx) - sptlrpc_cli_ctx_put(ctx, 1); - return 0; -} - -static -int plain_alloc_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int msgsize) -{ - __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; - int alloc_len; - - buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); - buflens[PLAIN_PACK_MSG_OFF] = msgsize; - - if (req->rq_pack_udesc) - buflens[PLAIN_PACK_USER_OFF] = sptlrpc_current_user_desc_size(); - - if (req->rq_pack_bulk) { - LASSERT(req->rq_bulk_read || req->rq_bulk_write); - buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; - } - - alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); - - if (!req->rq_reqbuf) { - LASSERT(!req->rq_pool); - - alloc_len = size_roundup_power2(alloc_len); - req->rq_reqbuf = kvzalloc(alloc_len, GFP_NOFS); - if (!req->rq_reqbuf) - return -ENOMEM; - - req->rq_reqbuf_len = alloc_len; - } else { - LASSERT(req->rq_pool); - LASSERT(req->rq_reqbuf_len >= alloc_len); - memset(req->rq_reqbuf, 0, alloc_len); - } - - lustre_init_msg_v2(req->rq_reqbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); - req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0); - - if (req->rq_pack_udesc) { - int rc = sptlrpc_pack_user_desc(req->rq_reqbuf, - PLAIN_PACK_USER_OFF); - if (rc < 0) - return rc; - } - - return 0; -} - -static -void plain_free_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req) -{ - if (!req->rq_pool) { - kvfree(req->rq_reqbuf); - req->rq_reqbuf = NULL; - req->rq_reqbuf_len = 0; - } -} - -static -int plain_alloc_repbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int msgsize) -{ - __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; - int alloc_len; - - buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); - buflens[PLAIN_PACK_MSG_OFF] = msgsize; - - if (req->rq_pack_bulk) { - LASSERT(req->rq_bulk_read || req->rq_bulk_write); - buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; - } - - alloc_len = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); - - /* add space for early reply */ - alloc_len += plain_at_offset; - - alloc_len = size_roundup_power2(alloc_len); - - req->rq_repbuf = kvzalloc(alloc_len, GFP_NOFS); - if (!req->rq_repbuf) - return -ENOMEM; - - req->rq_repbuf_len = alloc_len; - return 0; -} - -static -void plain_free_repbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req) -{ - kvfree(req->rq_repbuf); - req->rq_repbuf = NULL; - req->rq_repbuf_len = 0; -} - -static -int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, - struct ptlrpc_request *req, - int segment, int newsize) -{ - struct lustre_msg *newbuf; - int oldsize; - int newmsg_size, newbuf_size; - - LASSERT(req->rq_reqbuf); - LASSERT(req->rq_reqbuf_len >= req->rq_reqlen); - LASSERT(lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0) == - req->rq_reqmsg); - - /* compute new embedded msg size. */ - oldsize = req->rq_reqmsg->lm_buflens[segment]; - req->rq_reqmsg->lm_buflens[segment] = newsize; - newmsg_size = lustre_msg_size_v2(req->rq_reqmsg->lm_bufcount, - req->rq_reqmsg->lm_buflens); - req->rq_reqmsg->lm_buflens[segment] = oldsize; - - /* compute new wrapper msg size. */ - oldsize = req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF]; - req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = newmsg_size; - newbuf_size = lustre_msg_size_v2(req->rq_reqbuf->lm_bufcount, - req->rq_reqbuf->lm_buflens); - req->rq_reqbuf->lm_buflens[PLAIN_PACK_MSG_OFF] = oldsize; - - /* request from pool should always have enough buffer */ - LASSERT(!req->rq_pool || req->rq_reqbuf_len >= newbuf_size); - - if (req->rq_reqbuf_len < newbuf_size) { - newbuf_size = size_roundup_power2(newbuf_size); - - newbuf = kvzalloc(newbuf_size, GFP_NOFS); - if (!newbuf) - return -ENOMEM; - - /* Must lock this, so that otherwise unprotected change of - * rq_reqmsg is not racing with parallel processing of - * imp_replay_list traversing threads. See LU-3333 - * This is a bandaid at best, we really need to deal with this - * in request enlarging code before unpacking that's already - * there - */ - if (req->rq_import) - spin_lock(&req->rq_import->imp_lock); - - memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); - - kvfree(req->rq_reqbuf); - req->rq_reqbuf = newbuf; - req->rq_reqbuf_len = newbuf_size; - req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, - PLAIN_PACK_MSG_OFF, 0); - - if (req->rq_import) - spin_unlock(&req->rq_import->imp_lock); - } - - _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, - newmsg_size); - _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); - - req->rq_reqlen = newmsg_size; - return 0; -} - -/**************************************** - * service apis * - ****************************************/ - -static struct ptlrpc_svc_ctx plain_svc_ctx = { - .sc_refcount = ATOMIC_INIT(1), - .sc_policy = &plain_policy, -}; - -static -int plain_accept(struct ptlrpc_request *req) -{ - struct lustre_msg *msg = req->rq_reqbuf; - struct plain_header *phdr; - int swabbed; - - LASSERT(SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) == - SPTLRPC_POLICY_PLAIN); - - if (SPTLRPC_FLVR_BASE(req->rq_flvr.sf_rpc) != - SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN) || - SPTLRPC_FLVR_BULK_TYPE(req->rq_flvr.sf_rpc) != - SPTLRPC_FLVR_BULK_TYPE(SPTLRPC_FLVR_PLAIN)) { - CERROR("Invalid rpc flavor %x\n", req->rq_flvr.sf_rpc); - return SECSVC_DROP; - } - - if (msg->lm_bufcount < PLAIN_PACK_SEGMENTS) { - CERROR("unexpected request buf count %u\n", msg->lm_bufcount); - return SECSVC_DROP; - } - - swabbed = ptlrpc_req_need_swab(req); - - phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, sizeof(*phdr)); - if (!phdr) { - CERROR("missing plain header\n"); - return -EPROTO; - } - - if (phdr->ph_ver != 0) { - CERROR("Invalid header version\n"); - return -EPROTO; - } - - if (phdr->ph_bulk_hash_alg >= BULK_HASH_ALG_MAX) { - CERROR("invalid hash algorithm: %u\n", phdr->ph_bulk_hash_alg); - return -EPROTO; - } - - req->rq_sp_from = phdr->ph_sp; - req->rq_flvr.u_bulk.hash.hash_alg = phdr->ph_bulk_hash_alg; - - if (phdr->ph_flags & PLAIN_FL_USER) { - if (sptlrpc_unpack_user_desc(msg, PLAIN_PACK_USER_OFF, - swabbed)) { - CERROR("Mal-formed user descriptor\n"); - return SECSVC_DROP; - } - - req->rq_pack_udesc = 1; - req->rq_user_desc = lustre_msg_buf(msg, PLAIN_PACK_USER_OFF, 0); - } - - if (phdr->ph_flags & PLAIN_FL_BULK) { - if (plain_unpack_bsd(msg, swabbed)) - return SECSVC_DROP; - - req->rq_pack_bulk = 1; - } - - req->rq_reqmsg = lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0); - req->rq_reqlen = msg->lm_buflens[PLAIN_PACK_MSG_OFF]; - - req->rq_svc_ctx = &plain_svc_ctx; - atomic_inc(&req->rq_svc_ctx->sc_refcount); - - return SECSVC_OK; -} - -static -int plain_alloc_rs(struct ptlrpc_request *req, int msgsize) -{ - struct ptlrpc_reply_state *rs; - __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; - int rs_size = sizeof(*rs); - - LASSERT(msgsize % 8 == 0); - - buflens[PLAIN_PACK_HDR_OFF] = sizeof(struct plain_header); - buflens[PLAIN_PACK_MSG_OFF] = msgsize; - - if (req->rq_pack_bulk && (req->rq_bulk_read || req->rq_bulk_write)) - buflens[PLAIN_PACK_BULK_OFF] = PLAIN_BSD_SIZE; - - rs_size += lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); - - rs = req->rq_reply_state; - - if (rs) { - /* pre-allocated */ - LASSERT(rs->rs_size >= rs_size); - } else { - rs = kvzalloc(rs_size, GFP_NOFS); - if (!rs) - return -ENOMEM; - - rs->rs_size = rs_size; - } - - rs->rs_svc_ctx = req->rq_svc_ctx; - atomic_inc(&req->rq_svc_ctx->sc_refcount); - rs->rs_repbuf = (struct lustre_msg *)(rs + 1); - rs->rs_repbuf_len = rs_size - sizeof(*rs); - - lustre_init_msg_v2(rs->rs_repbuf, PLAIN_PACK_SEGMENTS, buflens, NULL); - rs->rs_msg = lustre_msg_buf_v2(rs->rs_repbuf, PLAIN_PACK_MSG_OFF, 0); - - req->rq_reply_state = rs; - return 0; -} - -static -void plain_free_rs(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_svc_ctx->sc_refcount) > 1); - atomic_dec(&rs->rs_svc_ctx->sc_refcount); - - if (!rs->rs_prealloc) - kvfree(rs); -} - -static -int plain_authorize(struct ptlrpc_request *req) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - struct lustre_msg_v2 *msg = rs->rs_repbuf; - struct plain_header *phdr; - int len; - - LASSERT(rs); - LASSERT(msg); - - if (req->rq_replen != msg->lm_buflens[PLAIN_PACK_MSG_OFF]) - len = lustre_shrink_msg(msg, PLAIN_PACK_MSG_OFF, - req->rq_replen, 1); - else - len = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens); - - msg->lm_secflvr = req->rq_flvr.sf_rpc; - - phdr = lustre_msg_buf(msg, PLAIN_PACK_HDR_OFF, 0); - phdr->ph_ver = 0; - phdr->ph_flags = 0; - phdr->ph_bulk_hash_alg = req->rq_flvr.u_bulk.hash.hash_alg; - - if (req->rq_pack_bulk) - phdr->ph_flags |= PLAIN_FL_BULK; - - rs->rs_repdata_len = len; - - if (likely(req->rq_packed_final)) { - if (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) - req->rq_reply_off = plain_at_offset; - else - req->rq_reply_off = 0; - } else { - unsigned int hsize = 4; - - cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, - lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, - 0), - lustre_msg_buflen(msg, - PLAIN_PACK_MSG_OFF), - NULL, 0, (unsigned char *)&msg->lm_cksum, - &hsize); - req->rq_reply_off = 0; - } - - return 0; -} - -static -int plain_svc_unwrap_bulk(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - struct ptlrpc_bulk_sec_desc *bsdr, *bsdv; - struct plain_bulk_token *tokenr; - int rc; - - LASSERT(req->rq_bulk_write); - LASSERT(req->rq_pack_bulk); - - bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); - tokenr = (struct plain_bulk_token *)bsdr->bsd_data; - bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); - - bsdv->bsd_version = 0; - bsdv->bsd_type = SPTLRPC_BULK_DEFAULT; - bsdv->bsd_svc = bsdr->bsd_svc; - bsdv->bsd_flags = 0; - - if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL) - return 0; - - rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, - tokenr); - if (rc) { - bsdv->bsd_flags |= BSD_FL_ERR; - CERROR("bulk write: server verify failed: %d\n", rc); - } - - return rc; -} - -static -int plain_svc_wrap_bulk(struct ptlrpc_request *req, - struct ptlrpc_bulk_desc *desc) -{ - struct ptlrpc_reply_state *rs = req->rq_reply_state; - struct ptlrpc_bulk_sec_desc *bsdr, *bsdv; - struct plain_bulk_token *tokenv; - int rc; - - LASSERT(req->rq_bulk_read); - LASSERT(req->rq_pack_bulk); - - bsdr = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_BULK_OFF, 0); - bsdv = lustre_msg_buf(rs->rs_repbuf, PLAIN_PACK_BULK_OFF, 0); - tokenv = (struct plain_bulk_token *)bsdv->bsd_data; - - bsdv->bsd_version = 0; - bsdv->bsd_type = SPTLRPC_BULK_DEFAULT; - bsdv->bsd_svc = bsdr->bsd_svc; - bsdv->bsd_flags = 0; - - if (bsdr->bsd_svc == SPTLRPC_BULK_SVC_NULL) - return 0; - - rc = plain_generate_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg, - tokenv); - if (rc) { - CERROR("bulk read: server failed to compute checksum: %d\n", - rc); - } else { - if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) - corrupt_bulk_data(desc); - } - - return rc; -} - -static struct ptlrpc_ctx_ops plain_ctx_ops = { - .refresh = plain_ctx_refresh, - .validate = plain_ctx_validate, - .sign = plain_ctx_sign, - .verify = plain_ctx_verify, - .wrap_bulk = plain_cli_wrap_bulk, - .unwrap_bulk = plain_cli_unwrap_bulk, -}; - -static struct ptlrpc_sec_cops plain_sec_cops = { - .create_sec = plain_create_sec, - .destroy_sec = plain_destroy_sec, - .kill_sec = plain_kill_sec, - .lookup_ctx = plain_lookup_ctx, - .release_ctx = plain_release_ctx, - .flush_ctx_cache = plain_flush_ctx_cache, - .alloc_reqbuf = plain_alloc_reqbuf, - .free_reqbuf = plain_free_reqbuf, - .alloc_repbuf = plain_alloc_repbuf, - .free_repbuf = plain_free_repbuf, - .enlarge_reqbuf = plain_enlarge_reqbuf, -}; - -static struct ptlrpc_sec_sops plain_sec_sops = { - .accept = plain_accept, - .alloc_rs = plain_alloc_rs, - .authorize = plain_authorize, - .free_rs = plain_free_rs, - .unwrap_bulk = plain_svc_unwrap_bulk, - .wrap_bulk = plain_svc_wrap_bulk, -}; - -static struct ptlrpc_sec_policy plain_policy = { - .sp_owner = THIS_MODULE, - .sp_name = "plain", - .sp_policy = SPTLRPC_POLICY_PLAIN, - .sp_cops = &plain_sec_cops, - .sp_sops = &plain_sec_sops, -}; - -int sptlrpc_plain_init(void) -{ - __u32 buflens[PLAIN_PACK_SEGMENTS] = { 0, }; - int rc; - - buflens[PLAIN_PACK_MSG_OFF] = lustre_msg_early_size(); - plain_at_offset = lustre_msg_size_v2(PLAIN_PACK_SEGMENTS, buflens); - - rc = sptlrpc_register_policy(&plain_policy); - if (rc) - CERROR("failed to register: %d\n", rc); - - return rc; -} - -void sptlrpc_plain_fini(void) -{ - int rc; - - rc = sptlrpc_unregister_policy(&plain_policy); - if (rc) - CERROR("cannot unregister: %d\n", rc); -} diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c deleted file mode 100644 index 3fd8c746f460..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ /dev/null @@ -1,2807 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2010, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include -#include -#include -#include -#include -#include "ptlrpc_internal.h" -#include -#include - -/* The following are visible and mutable through /sys/module/ptlrpc */ -int test_req_buffer_pressure; -module_param(test_req_buffer_pressure, int, 0444); -MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools"); -module_param(at_min, int, 0644); -MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)"); -module_param(at_max, int, 0644); -MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)"); -module_param(at_history, int, 0644); -MODULE_PARM_DESC(at_history, - "Adaptive timeouts remember the slowest event that took place within this period (sec)"); -module_param(at_early_margin, int, 0644); -MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply"); -module_param(at_extra, int, 0644); -MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply"); - -/* forward ref */ -static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt); -static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req); -static void ptlrpc_at_remove_timed(struct ptlrpc_request *req); - -/** Holds a list of all PTLRPC services */ -LIST_HEAD(ptlrpc_all_services); -/** Used to protect the \e ptlrpc_all_services list */ -struct mutex ptlrpc_all_services_mutex; - -static struct ptlrpc_request_buffer_desc * -ptlrpc_alloc_rqbd(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_service *svc = svcpt->scp_service; - struct ptlrpc_request_buffer_desc *rqbd; - - rqbd = kzalloc_node(sizeof(*rqbd), GFP_NOFS, - cfs_cpt_spread_node(svc->srv_cptable, - svcpt->scp_cpt)); - if (!rqbd) - return NULL; - - rqbd->rqbd_svcpt = svcpt; - rqbd->rqbd_refcount = 0; - rqbd->rqbd_cbid.cbid_fn = request_in_callback; - rqbd->rqbd_cbid.cbid_arg = rqbd; - INIT_LIST_HEAD(&rqbd->rqbd_reqs); - rqbd->rqbd_buffer = kvzalloc_node(svc->srv_buf_size, GFP_KERNEL, - cfs_cpt_spread_node(svc->srv_cptable, - svcpt->scp_cpt)); - - if (!rqbd->rqbd_buffer) { - kfree(rqbd); - return NULL; - } - - spin_lock(&svcpt->scp_lock); - list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle); - svcpt->scp_nrqbds_total++; - spin_unlock(&svcpt->scp_lock); - - return rqbd; -} - -static void -ptlrpc_free_rqbd(struct ptlrpc_request_buffer_desc *rqbd) -{ - struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; - - LASSERT(rqbd->rqbd_refcount == 0); - LASSERT(list_empty(&rqbd->rqbd_reqs)); - - spin_lock(&svcpt->scp_lock); - list_del(&rqbd->rqbd_list); - svcpt->scp_nrqbds_total--; - spin_unlock(&svcpt->scp_lock); - - kvfree(rqbd->rqbd_buffer); - kfree(rqbd); -} - -static int -ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post) -{ - struct ptlrpc_service *svc = svcpt->scp_service; - struct ptlrpc_request_buffer_desc *rqbd; - int rc = 0; - int i; - - if (svcpt->scp_rqbd_allocating) - goto try_post; - - spin_lock(&svcpt->scp_lock); - /* check again with lock */ - if (svcpt->scp_rqbd_allocating) { - /* NB: we might allow more than one thread in the future */ - LASSERT(svcpt->scp_rqbd_allocating == 1); - spin_unlock(&svcpt->scp_lock); - goto try_post; - } - - svcpt->scp_rqbd_allocating++; - spin_unlock(&svcpt->scp_lock); - - for (i = 0; i < svc->srv_nbuf_per_group; i++) { - /* NB: another thread might have recycled enough rqbds, we - * need to make sure it wouldn't over-allocate, see LU-1212. - */ - if (svcpt->scp_nrqbds_posted >= svc->srv_nbuf_per_group) - break; - - rqbd = ptlrpc_alloc_rqbd(svcpt); - - if (!rqbd) { - CERROR("%s: Can't allocate request buffer\n", - svc->srv_name); - rc = -ENOMEM; - break; - } - } - - spin_lock(&svcpt->scp_lock); - - LASSERT(svcpt->scp_rqbd_allocating == 1); - svcpt->scp_rqbd_allocating--; - - spin_unlock(&svcpt->scp_lock); - - CDEBUG(D_RPCTRACE, - "%s: allocate %d new %d-byte reqbufs (%d/%d left), rc = %d\n", - svc->srv_name, i, svc->srv_buf_size, svcpt->scp_nrqbds_posted, - svcpt->scp_nrqbds_total, rc); - - try_post: - if (post && rc == 0) - rc = ptlrpc_server_post_idle_rqbds(svcpt); - - return rc; -} - -struct ptlrpc_hr_partition; - -struct ptlrpc_hr_thread { - int hrt_id; /* thread ID */ - spinlock_t hrt_lock; - wait_queue_head_t hrt_waitq; - struct list_head hrt_queue; /* RS queue */ - struct ptlrpc_hr_partition *hrt_partition; -}; - -struct ptlrpc_hr_partition { - /* # of started threads */ - atomic_t hrp_nstarted; - /* # of stopped threads */ - atomic_t hrp_nstopped; - /* cpu partition id */ - int hrp_cpt; - /* round-robin rotor for choosing thread */ - int hrp_rotor; - /* total number of threads on this partition */ - int hrp_nthrs; - /* threads table */ - struct ptlrpc_hr_thread *hrp_thrs; -}; - -#define HRT_RUNNING 0 -#define HRT_STOPPING 1 - -struct ptlrpc_hr_service { - /* CPU partition table, it's just cfs_cpt_tab for now */ - struct cfs_cpt_table *hr_cpt_table; - /** controller sleep waitq */ - wait_queue_head_t hr_waitq; - unsigned int hr_stopping; - /** roundrobin rotor for non-affinity service */ - unsigned int hr_rotor; - /* partition data */ - struct ptlrpc_hr_partition **hr_partitions; -}; - -/** reply handling service. */ -static struct ptlrpc_hr_service ptlrpc_hr; - -/** - * Choose an hr thread to dispatch requests to. - */ -static struct ptlrpc_hr_thread * -ptlrpc_hr_select(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_hr_partition *hrp; - unsigned int rotor; - - if (svcpt->scp_cpt >= 0 && - svcpt->scp_service->srv_cptable == ptlrpc_hr.hr_cpt_table) { - /* directly match partition */ - hrp = ptlrpc_hr.hr_partitions[svcpt->scp_cpt]; - - } else { - rotor = ptlrpc_hr.hr_rotor++; - rotor %= cfs_cpt_number(ptlrpc_hr.hr_cpt_table); - - hrp = ptlrpc_hr.hr_partitions[rotor]; - } - - rotor = hrp->hrp_rotor++; - return &hrp->hrp_thrs[rotor % hrp->hrp_nthrs]; -} - -/** - * Put reply state into a queue for processing because we received - * ACK from the client - */ -void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs) -{ - struct ptlrpc_hr_thread *hrt; - - LASSERT(list_empty(&rs->rs_list)); - - hrt = ptlrpc_hr_select(rs->rs_svcpt); - - spin_lock(&hrt->hrt_lock); - list_add_tail(&rs->rs_list, &hrt->hrt_queue); - spin_unlock(&hrt->hrt_lock); - - wake_up(&hrt->hrt_waitq); -} - -void -ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs) -{ - assert_spin_locked(&rs->rs_svcpt->scp_rep_lock); - assert_spin_locked(&rs->rs_lock); - LASSERT(rs->rs_difficult); - rs->rs_scheduled_ever = 1; /* flag any notification attempt */ - - if (rs->rs_scheduled) { /* being set up or already notified */ - return; - } - - rs->rs_scheduled = 1; - list_del_init(&rs->rs_list); - ptlrpc_dispatch_difficult_reply(rs); -} -EXPORT_SYMBOL(ptlrpc_schedule_difficult_reply); - -static int -ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_request_buffer_desc *rqbd; - int rc; - int posted = 0; - - for (;;) { - spin_lock(&svcpt->scp_lock); - - if (list_empty(&svcpt->scp_rqbd_idle)) { - spin_unlock(&svcpt->scp_lock); - return posted; - } - - rqbd = list_entry(svcpt->scp_rqbd_idle.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); - list_del(&rqbd->rqbd_list); - - /* assume we will post successfully */ - svcpt->scp_nrqbds_posted++; - list_add(&rqbd->rqbd_list, &svcpt->scp_rqbd_posted); - - spin_unlock(&svcpt->scp_lock); - - rc = ptlrpc_register_rqbd(rqbd); - if (rc != 0) - break; - - posted = 1; - } - - spin_lock(&svcpt->scp_lock); - - svcpt->scp_nrqbds_posted--; - list_del(&rqbd->rqbd_list); - list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle); - - /* Don't complain if no request buffers are posted right now; LNET - * won't drop requests because we set the portal lazy! - */ - - spin_unlock(&svcpt->scp_lock); - - return -1; -} - -static void ptlrpc_at_timer(struct timer_list *t) -{ - struct ptlrpc_service_part *svcpt; - - svcpt = from_timer(svcpt, t, scp_at_timer); - - svcpt->scp_at_check = 1; - svcpt->scp_at_checktime = jiffies; - wake_up(&svcpt->scp_waitq); -} - -static void -ptlrpc_server_nthreads_check(struct ptlrpc_service *svc, - struct ptlrpc_service_conf *conf) -{ - struct ptlrpc_service_thr_conf *tc = &conf->psc_thr; - unsigned int init; - unsigned int total; - unsigned int nthrs; - int weight; - - /* - * Common code for estimating & validating threads number. - * CPT affinity service could have percpt thread-pool instead - * of a global thread-pool, which means user might not always - * get the threads number they give it in conf::tc_nthrs_user - * even they did set. It's because we need to validate threads - * number for each CPT to guarantee each pool will have enough - * threads to keep the service healthy. - */ - init = PTLRPC_NTHRS_INIT + (svc->srv_ops.so_hpreq_handler != NULL); - init = max_t(int, init, tc->tc_nthrs_init); - - /* NB: please see comments in lustre_lnet.h for definition - * details of these members - */ - LASSERT(tc->tc_nthrs_max != 0); - - if (tc->tc_nthrs_user != 0) { - /* In case there is a reason to test a service with many - * threads, we give a less strict check here, it can - * be up to 8 * nthrs_max - */ - total = min(tc->tc_nthrs_max * 8, tc->tc_nthrs_user); - nthrs = total / svc->srv_ncpts; - init = max(init, nthrs); - goto out; - } - - total = tc->tc_nthrs_max; - if (tc->tc_nthrs_base == 0) { - /* don't care about base threads number per partition, - * this is most for non-affinity service - */ - nthrs = total / svc->srv_ncpts; - goto out; - } - - nthrs = tc->tc_nthrs_base; - if (svc->srv_ncpts == 1) { - int i; - - /* NB: Increase the base number if it's single partition - * and total number of cores/HTs is larger or equal to 4. - * result will always < 2 * nthrs_base - */ - weight = cfs_cpt_weight(svc->srv_cptable, CFS_CPT_ANY); - for (i = 1; (weight >> (i + 1)) != 0 && /* >= 4 cores/HTs */ - (tc->tc_nthrs_base >> i) != 0; i++) - nthrs += tc->tc_nthrs_base >> i; - } - - if (tc->tc_thr_factor != 0) { - int factor = tc->tc_thr_factor; - const int fade = 4; - - /* - * User wants to increase number of threads with for - * each CPU core/HT, most likely the factor is larger then - * one thread/core because service threads are supposed to - * be blocked by lock or wait for IO. - */ - /* - * Amdahl's law says that adding processors wouldn't give - * a linear increasing of parallelism, so it's nonsense to - * have too many threads no matter how many cores/HTs - * there are. - */ - /* weight is # of HTs */ - if (cpumask_weight(topology_sibling_cpumask(0)) > 1) { - /* depress thread factor for hyper-thread */ - factor = factor - (factor >> 1) + (factor >> 3); - } - - weight = cfs_cpt_weight(svc->srv_cptable, 0); - LASSERT(weight > 0); - - for (; factor > 0 && weight > 0; factor--, weight -= fade) - nthrs += min(weight, fade) * factor; - } - - if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) { - nthrs = max(tc->tc_nthrs_base, - tc->tc_nthrs_max / svc->srv_ncpts); - } - out: - nthrs = max(nthrs, tc->tc_nthrs_init); - svc->srv_nthrs_cpt_limit = nthrs; - svc->srv_nthrs_cpt_init = init; - - if (nthrs * svc->srv_ncpts > tc->tc_nthrs_max) { - CDEBUG(D_OTHER, "%s: This service may have more threads (%d) than the given soft limit (%d)\n", - svc->srv_name, nthrs * svc->srv_ncpts, - tc->tc_nthrs_max); - } -} - -/** - * Initialize percpt data for a service - */ -static int -ptlrpc_service_part_init(struct ptlrpc_service *svc, - struct ptlrpc_service_part *svcpt, int cpt) -{ - struct ptlrpc_at_array *array; - int size; - int index; - int rc; - - svcpt->scp_cpt = cpt; - INIT_LIST_HEAD(&svcpt->scp_threads); - - /* rqbd and incoming request queue */ - spin_lock_init(&svcpt->scp_lock); - INIT_LIST_HEAD(&svcpt->scp_rqbd_idle); - INIT_LIST_HEAD(&svcpt->scp_rqbd_posted); - INIT_LIST_HEAD(&svcpt->scp_req_incoming); - init_waitqueue_head(&svcpt->scp_waitq); - /* history request & rqbd list */ - INIT_LIST_HEAD(&svcpt->scp_hist_reqs); - INIT_LIST_HEAD(&svcpt->scp_hist_rqbds); - - /* active requests and hp requests */ - spin_lock_init(&svcpt->scp_req_lock); - - /* reply states */ - spin_lock_init(&svcpt->scp_rep_lock); - INIT_LIST_HEAD(&svcpt->scp_rep_active); - INIT_LIST_HEAD(&svcpt->scp_rep_idle); - init_waitqueue_head(&svcpt->scp_rep_waitq); - atomic_set(&svcpt->scp_nreps_difficult, 0); - - /* adaptive timeout */ - spin_lock_init(&svcpt->scp_at_lock); - array = &svcpt->scp_at_array; - - size = at_est2timeout(at_max); - array->paa_size = size; - array->paa_count = 0; - array->paa_deadline = -1; - - /* allocate memory for scp_at_array (ptlrpc_at_array) */ - array->paa_reqs_array = - kzalloc_node(sizeof(struct list_head) * size, GFP_NOFS, - cfs_cpt_spread_node(svc->srv_cptable, cpt)); - if (!array->paa_reqs_array) - return -ENOMEM; - - for (index = 0; index < size; index++) - INIT_LIST_HEAD(&array->paa_reqs_array[index]); - - array->paa_reqs_count = - kzalloc_node(sizeof(__u32) * size, GFP_NOFS, - cfs_cpt_spread_node(svc->srv_cptable, cpt)); - if (!array->paa_reqs_count) - goto free_reqs_array; - - timer_setup(&svcpt->scp_at_timer, ptlrpc_at_timer, 0); - - /* At SOW, service time should be quick; 10s seems generous. If client - * timeout is less than this, we'll be sending an early reply. - */ - at_init(&svcpt->scp_at_estimate, 10, 0); - - /* assign this before call ptlrpc_grow_req_bufs */ - svcpt->scp_service = svc; - /* Now allocate the request buffers, but don't post them now */ - rc = ptlrpc_grow_req_bufs(svcpt, 0); - /* We shouldn't be under memory pressure at startup, so - * fail if we can't allocate all our buffers at this time. - */ - if (rc != 0) - goto free_reqs_count; - - return 0; - -free_reqs_count: - kfree(array->paa_reqs_count); - array->paa_reqs_count = NULL; -free_reqs_array: - kfree(array->paa_reqs_array); - array->paa_reqs_array = NULL; - - return -ENOMEM; -} - -/** - * Initialize service on a given portal. - * This includes starting serving threads , allocating and posting rqbds and - * so on. - */ -struct ptlrpc_service * -ptlrpc_register_service(struct ptlrpc_service_conf *conf, - struct kset *parent, - struct dentry *debugfs_entry) -{ - struct ptlrpc_service_cpt_conf *cconf = &conf->psc_cpt; - struct ptlrpc_service *service; - struct ptlrpc_service_part *svcpt; - struct cfs_cpt_table *cptable; - __u32 *cpts = NULL; - int ncpts; - int cpt; - int rc; - int i; - - LASSERT(conf->psc_buf.bc_nbufs > 0); - LASSERT(conf->psc_buf.bc_buf_size >= - conf->psc_buf.bc_req_max_size + SPTLRPC_MAX_PAYLOAD); - LASSERT(conf->psc_thr.tc_ctx_tags != 0); - - cptable = cconf->cc_cptable; - if (!cptable) - cptable = cfs_cpt_tab; - - if (!conf->psc_thr.tc_cpu_affinity) { - ncpts = 1; - } else { - ncpts = cfs_cpt_number(cptable); - if (cconf->cc_pattern) { - struct cfs_expr_list *el; - - rc = cfs_expr_list_parse(cconf->cc_pattern, - strlen(cconf->cc_pattern), - 0, ncpts - 1, &el); - if (rc != 0) { - CERROR("%s: invalid CPT pattern string: %s", - conf->psc_name, cconf->cc_pattern); - return ERR_PTR(-EINVAL); - } - - rc = cfs_expr_list_values(el, ncpts, &cpts); - cfs_expr_list_free(el); - if (rc <= 0) { - CERROR("%s: failed to parse CPT array %s: %d\n", - conf->psc_name, cconf->cc_pattern, rc); - kfree(cpts); - return ERR_PTR(rc < 0 ? rc : -EINVAL); - } - ncpts = rc; - } - } - - service = kzalloc(offsetof(struct ptlrpc_service, srv_parts[ncpts]), - GFP_NOFS); - if (!service) { - kfree(cpts); - return ERR_PTR(-ENOMEM); - } - - service->srv_cptable = cptable; - service->srv_cpts = cpts; - service->srv_ncpts = ncpts; - - service->srv_cpt_bits = 0; /* it's zero already, easy to read... */ - while ((1 << service->srv_cpt_bits) < cfs_cpt_number(cptable)) - service->srv_cpt_bits++; - - /* public members */ - spin_lock_init(&service->srv_lock); - service->srv_name = conf->psc_name; - service->srv_watchdog_factor = conf->psc_watchdog_factor; - INIT_LIST_HEAD(&service->srv_list); /* for safety of cleanup */ - - /* buffer configuration */ - service->srv_nbuf_per_group = test_req_buffer_pressure ? - 1 : conf->psc_buf.bc_nbufs; - service->srv_max_req_size = conf->psc_buf.bc_req_max_size + - SPTLRPC_MAX_PAYLOAD; - service->srv_buf_size = conf->psc_buf.bc_buf_size; - service->srv_rep_portal = conf->psc_buf.bc_rep_portal; - service->srv_req_portal = conf->psc_buf.bc_req_portal; - - /* Increase max reply size to next power of two */ - service->srv_max_reply_size = 1; - while (service->srv_max_reply_size < - conf->psc_buf.bc_rep_max_size + SPTLRPC_MAX_PAYLOAD) - service->srv_max_reply_size <<= 1; - - service->srv_thread_name = conf->psc_thr.tc_thr_name; - service->srv_ctx_tags = conf->psc_thr.tc_ctx_tags; - service->srv_hpreq_ratio = PTLRPC_SVC_HP_RATIO; - service->srv_ops = conf->psc_ops; - - for (i = 0; i < ncpts; i++) { - if (!conf->psc_thr.tc_cpu_affinity) - cpt = CFS_CPT_ANY; - else - cpt = cpts ? cpts[i] : i; - - svcpt = kzalloc_node(sizeof(*svcpt), GFP_NOFS, - cfs_cpt_spread_node(cptable, cpt)); - if (!svcpt) { - rc = -ENOMEM; - goto failed; - } - - service->srv_parts[i] = svcpt; - rc = ptlrpc_service_part_init(service, svcpt, cpt); - if (rc != 0) - goto failed; - } - - ptlrpc_server_nthreads_check(service, conf); - - rc = LNetSetLazyPortal(service->srv_req_portal); - LASSERT(rc == 0); - - mutex_lock(&ptlrpc_all_services_mutex); - list_add(&service->srv_list, &ptlrpc_all_services); - mutex_unlock(&ptlrpc_all_services_mutex); - - if (parent) { - rc = ptlrpc_sysfs_register_service(parent, service); - if (rc) - goto failed; - } - - if (!IS_ERR_OR_NULL(debugfs_entry)) - ptlrpc_ldebugfs_register_service(debugfs_entry, service); - - rc = ptlrpc_service_nrs_setup(service); - if (rc != 0) - goto failed; - - CDEBUG(D_NET, "%s: Started, listening on portal %d\n", - service->srv_name, service->srv_req_portal); - - rc = ptlrpc_start_threads(service); - if (rc != 0) { - CERROR("Failed to start threads for service %s: %d\n", - service->srv_name, rc); - goto failed; - } - - return service; -failed: - ptlrpc_unregister_service(service); - return ERR_PTR(rc); -} -EXPORT_SYMBOL(ptlrpc_register_service); - -/** - * to actually free the request, must be called without holding svc_lock. - * note it's caller's responsibility to unlink req->rq_list. - */ -static void ptlrpc_server_free_request(struct ptlrpc_request *req) -{ - LASSERT(atomic_read(&req->rq_refcount) == 0); - LASSERT(list_empty(&req->rq_timed_list)); - - /* DEBUG_REQ() assumes the reply state of a request with a valid - * ref will not be destroyed until that reference is dropped. - */ - ptlrpc_req_drop_rs(req); - - sptlrpc_svc_ctx_decref(req); - - if (req != &req->rq_rqbd->rqbd_req) { - /* NB request buffers use an embedded - * req if the incoming req unlinked the - * MD; this isn't one of them! - */ - ptlrpc_request_cache_free(req); - } -} - -/** - * drop a reference count of the request. if it reaches 0, we either - * put it into history list, or free it immediately. - */ -static void ptlrpc_server_drop_request(struct ptlrpc_request *req) -{ - struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd; - struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; - struct ptlrpc_service *svc = svcpt->scp_service; - int refcount; - - if (!atomic_dec_and_test(&req->rq_refcount)) - return; - - if (req->rq_at_linked) { - spin_lock(&svcpt->scp_at_lock); - /* recheck with lock, in case it's unlinked by - * ptlrpc_at_check_timed() - */ - if (likely(req->rq_at_linked)) - ptlrpc_at_remove_timed(req); - spin_unlock(&svcpt->scp_at_lock); - } - - LASSERT(list_empty(&req->rq_timed_list)); - - /* finalize request */ - if (req->rq_export) { - class_export_put(req->rq_export); - req->rq_export = NULL; - } - - spin_lock(&svcpt->scp_lock); - - list_add(&req->rq_list, &rqbd->rqbd_reqs); - - refcount = --(rqbd->rqbd_refcount); - if (refcount == 0) { - /* request buffer is now idle: add to history */ - list_del(&rqbd->rqbd_list); - - list_add_tail(&rqbd->rqbd_list, &svcpt->scp_hist_rqbds); - svcpt->scp_hist_nrqbds++; - - /* cull some history? - * I expect only about 1 or 2 rqbds need to be recycled here - */ - while (svcpt->scp_hist_nrqbds > svc->srv_hist_nrqbds_cpt_max) { - rqbd = list_entry(svcpt->scp_hist_rqbds.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); - - list_del(&rqbd->rqbd_list); - svcpt->scp_hist_nrqbds--; - - /* remove rqbd's reqs from svc's req history while - * I've got the service lock - */ - list_for_each_entry(req, &rqbd->rqbd_reqs, rq_list) { - /* Track the highest culled req seq */ - if (req->rq_history_seq > - svcpt->scp_hist_seq_culled) { - svcpt->scp_hist_seq_culled = - req->rq_history_seq; - } - list_del(&req->rq_history_list); - } - - spin_unlock(&svcpt->scp_lock); - - while ((req = list_first_entry_or_null( - &rqbd->rqbd_reqs, - struct ptlrpc_request, rq_list))) { - list_del(&req->rq_list); - ptlrpc_server_free_request(req); - } - - spin_lock(&svcpt->scp_lock); - /* - * now all reqs including the embedded req has been - * disposed, schedule request buffer for re-use. - */ - LASSERT(atomic_read(&rqbd->rqbd_req.rq_refcount) == - 0); - list_add_tail(&rqbd->rqbd_list, &svcpt->scp_rqbd_idle); - } - - spin_unlock(&svcpt->scp_lock); - } else if (req->rq_reply_state && req->rq_reply_state->rs_prealloc) { - /* If we are low on memory, we are not interested in history */ - list_del(&req->rq_list); - list_del_init(&req->rq_history_list); - - /* Track the highest culled req seq */ - if (req->rq_history_seq > svcpt->scp_hist_seq_culled) - svcpt->scp_hist_seq_culled = req->rq_history_seq; - - spin_unlock(&svcpt->scp_lock); - - ptlrpc_server_free_request(req); - } else { - spin_unlock(&svcpt->scp_lock); - } -} - -/** - * to finish a request: stop sending more early replies, and release - * the request. - */ -static void ptlrpc_server_finish_request(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req) -{ - ptlrpc_server_hpreq_fini(req); - - if (req->rq_session.lc_thread) { - lu_context_exit(&req->rq_session); - lu_context_fini(&req->rq_session); - } - - ptlrpc_server_drop_request(req); -} - -/** - * to finish a active request: stop sending more early replies, and release - * the request. should be called after we finished handling the request. - */ -static void ptlrpc_server_finish_active_request( - struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req) -{ - spin_lock(&svcpt->scp_req_lock); - ptlrpc_nrs_req_stop_nolock(req); - svcpt->scp_nreqs_active--; - if (req->rq_hp) - svcpt->scp_nhreqs_active--; - spin_unlock(&svcpt->scp_req_lock); - - ptlrpc_nrs_req_finalize(req); - - if (req->rq_export) - class_export_rpc_dec(req->rq_export); - - ptlrpc_server_finish_request(svcpt, req); -} - -/** - * Sanity check request \a req. - * Return 0 if all is ok, error code otherwise. - */ -static int ptlrpc_check_req(struct ptlrpc_request *req) -{ - struct obd_device *obd = req->rq_export->exp_obd; - int rc = 0; - - if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) < - req->rq_export->exp_conn_cnt)) { - DEBUG_REQ(D_RPCTRACE, req, - "DROPPING req from old connection %d < %d", - lustre_msg_get_conn_cnt(req->rq_reqmsg), - req->rq_export->exp_conn_cnt); - return -EEXIST; - } - if (unlikely(!obd || obd->obd_fail)) { - /* - * Failing over, don't handle any more reqs, send - * error response instead. - */ - CDEBUG(D_RPCTRACE, "Dropping req %p for failed obd %s\n", - req, obd ? obd->obd_name : "unknown"); - rc = -ENODEV; - } else if (lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_REPLAY | MSG_REQ_REPLAY_DONE)) { - DEBUG_REQ(D_ERROR, req, "Invalid replay without recovery"); - class_fail_export(req->rq_export); - rc = -ENODEV; - } else if (lustre_msg_get_transno(req->rq_reqmsg) != 0) { - DEBUG_REQ(D_ERROR, req, - "Invalid req with transno %llu without recovery", - lustre_msg_get_transno(req->rq_reqmsg)); - class_fail_export(req->rq_export); - rc = -ENODEV; - } - - if (unlikely(rc < 0)) { - req->rq_status = rc; - ptlrpc_error(req); - } - return rc; -} - -static void ptlrpc_at_set_timer(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_at_array *array = &svcpt->scp_at_array; - __s32 next; - - if (array->paa_count == 0) { - del_timer(&svcpt->scp_at_timer); - return; - } - - /* Set timer for closest deadline */ - next = (__s32)(array->paa_deadline - ktime_get_real_seconds() - - at_early_margin); - if (next <= 0) { - ptlrpc_at_timer(&svcpt->scp_at_timer); - } else { - mod_timer(&svcpt->scp_at_timer, jiffies + next * HZ); - CDEBUG(D_INFO, "armed %s at %+ds\n", - svcpt->scp_service->srv_name, next); - } -} - -/* Add rpc to early reply check list */ -static int ptlrpc_at_add_timed(struct ptlrpc_request *req) -{ - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - struct ptlrpc_at_array *array = &svcpt->scp_at_array; - struct ptlrpc_request *rq = NULL; - __u32 index; - - if (AT_OFF) - return 0; - - if (req->rq_no_reply) - return 0; - - if ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT) == 0) - return -ENOSYS; - - spin_lock(&svcpt->scp_at_lock); - LASSERT(list_empty(&req->rq_timed_list)); - - div_u64_rem(req->rq_deadline, array->paa_size, &index); - if (array->paa_reqs_count[index] > 0) { - /* latest rpcs will have the latest deadlines in the list, - * so search backward. - */ - list_for_each_entry_reverse(rq, &array->paa_reqs_array[index], - rq_timed_list) { - if (req->rq_deadline >= rq->rq_deadline) { - list_add(&req->rq_timed_list, - &rq->rq_timed_list); - break; - } - } - } - - /* Add the request at the head of the list */ - if (list_empty(&req->rq_timed_list)) - list_add(&req->rq_timed_list, &array->paa_reqs_array[index]); - - spin_lock(&req->rq_lock); - req->rq_at_linked = 1; - spin_unlock(&req->rq_lock); - req->rq_at_index = index; - array->paa_reqs_count[index]++; - array->paa_count++; - if (array->paa_count == 1 || array->paa_deadline > req->rq_deadline) { - array->paa_deadline = req->rq_deadline; - ptlrpc_at_set_timer(svcpt); - } - spin_unlock(&svcpt->scp_at_lock); - - return 0; -} - -static void -ptlrpc_at_remove_timed(struct ptlrpc_request *req) -{ - struct ptlrpc_at_array *array; - - array = &req->rq_rqbd->rqbd_svcpt->scp_at_array; - - /* NB: must call with hold svcpt::scp_at_lock */ - LASSERT(!list_empty(&req->rq_timed_list)); - list_del_init(&req->rq_timed_list); - - spin_lock(&req->rq_lock); - req->rq_at_linked = 0; - spin_unlock(&req->rq_lock); - - array->paa_reqs_count[req->rq_at_index]--; - array->paa_count--; -} - -/* - * Attempt to extend the request deadline by sending an early reply to the - * client. - */ -static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) -{ - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - struct ptlrpc_request *reqcopy; - struct lustre_msg *reqmsg; - long olddl = req->rq_deadline - ktime_get_real_seconds(); - time64_t newdl; - int rc; - - /* deadline is when the client expects us to reply, margin is the - * difference between clients' and servers' expectations - */ - DEBUG_REQ(D_ADAPTTO, req, - "%ssending early reply (deadline %+lds, margin %+lds) for %d+%d", - AT_OFF ? "AT off - not " : "", - olddl, olddl - at_get(&svcpt->scp_at_estimate), - at_get(&svcpt->scp_at_estimate), at_extra); - - if (AT_OFF) - return 0; - - if (olddl < 0) { - DEBUG_REQ(D_WARNING, req, "Already past deadline (%+lds), not sending early reply. Consider increasing at_early_margin (%d)?", - olddl, at_early_margin); - - /* Return an error so we're not re-added to the timed list. */ - return -ETIMEDOUT; - } - - if (!(lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) { - DEBUG_REQ(D_INFO, req, "Wanted to ask client for more time, but no AT support"); - return -ENOSYS; - } - - /* - * We want to extend the request deadline by at_extra seconds, - * so we set our service estimate to reflect how much time has - * passed since this request arrived plus an additional - * at_extra seconds. The client will calculate the new deadline - * based on this service estimate (plus some additional time to - * account for network latency). See ptlrpc_at_recv_early_reply - */ - at_measured(&svcpt->scp_at_estimate, at_extra + - ktime_get_real_seconds() - req->rq_arrival_time.tv_sec); - newdl = req->rq_arrival_time.tv_sec + at_get(&svcpt->scp_at_estimate); - - /* Check to see if we've actually increased the deadline - - * we may be past adaptive_max - */ - if (req->rq_deadline >= newdl) { - DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%lld), not sending early reply\n", - olddl, newdl - ktime_get_real_seconds()); - return -ETIMEDOUT; - } - - reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); - if (!reqcopy) - return -ENOMEM; - reqmsg = kvzalloc(req->rq_reqlen, GFP_NOFS); - if (!reqmsg) { - rc = -ENOMEM; - goto out_free; - } - - *reqcopy = *req; - reqcopy->rq_reply_state = NULL; - reqcopy->rq_rep_swab_mask = 0; - reqcopy->rq_pack_bulk = 0; - reqcopy->rq_pack_udesc = 0; - reqcopy->rq_packed_final = 0; - sptlrpc_svc_ctx_addref(reqcopy); - /* We only need the reqmsg for the magic */ - reqcopy->rq_reqmsg = reqmsg; - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); - - LASSERT(atomic_read(&req->rq_refcount)); - /** if it is last refcount then early reply isn't needed */ - if (atomic_read(&req->rq_refcount) == 1) { - DEBUG_REQ(D_ADAPTTO, reqcopy, "Normal reply already sent out, abort sending early reply\n"); - rc = -EINVAL; - goto out; - } - - /* Connection ref */ - reqcopy->rq_export = class_conn2export( - lustre_msg_get_handle(reqcopy->rq_reqmsg)); - if (!reqcopy->rq_export) { - rc = -ENODEV; - goto out; - } - - /* RPC ref */ - class_export_rpc_inc(reqcopy->rq_export); - if (reqcopy->rq_export->exp_obd && - reqcopy->rq_export->exp_obd->obd_fail) { - rc = -ENODEV; - goto out_put; - } - - rc = lustre_pack_reply_flags(reqcopy, 1, NULL, NULL, LPRFL_EARLY_REPLY); - if (rc) - goto out_put; - - rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY); - - if (!rc) { - /* Adjust our own deadline to what we told the client */ - req->rq_deadline = newdl; - req->rq_early_count++; /* number sent, server side */ - } else { - DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc); - } - - /* Free the (early) reply state from lustre_pack_reply. - * (ptlrpc_send_reply takes it's own rs ref, so this is safe here) - */ - ptlrpc_req_drop_rs(reqcopy); - -out_put: - class_export_rpc_dec(reqcopy->rq_export); - class_export_put(reqcopy->rq_export); -out: - sptlrpc_svc_ctx_decref(reqcopy); - kvfree(reqmsg); -out_free: - ptlrpc_request_cache_free(reqcopy); - return rc; -} - -/* Send early replies to everybody expiring within at_early_margin - * asking for at_extra time - */ -static void ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_at_array *array = &svcpt->scp_at_array; - struct ptlrpc_request *rq, *n; - struct list_head work_list; - __u32 index, count; - time64_t deadline; - time64_t now = ktime_get_real_seconds(); - long delay; - int first, counter = 0; - - spin_lock(&svcpt->scp_at_lock); - if (svcpt->scp_at_check == 0) { - spin_unlock(&svcpt->scp_at_lock); - return; - } - delay = jiffies - svcpt->scp_at_checktime; - svcpt->scp_at_check = 0; - - if (array->paa_count == 0) { - spin_unlock(&svcpt->scp_at_lock); - return; - } - - /* The timer went off, but maybe the nearest rpc already completed. */ - first = array->paa_deadline - now; - if (first > at_early_margin) { - /* We've still got plenty of time. Reset the timer. */ - ptlrpc_at_set_timer(svcpt); - spin_unlock(&svcpt->scp_at_lock); - return; - } - - /* We're close to a timeout, and we don't know how much longer the - * server will take. Send early replies to everyone expiring soon. - */ - INIT_LIST_HEAD(&work_list); - deadline = -1; - div_u64_rem(array->paa_deadline, array->paa_size, &index); - count = array->paa_count; - while (count > 0) { - count -= array->paa_reqs_count[index]; - list_for_each_entry_safe(rq, n, &array->paa_reqs_array[index], - rq_timed_list) { - if (rq->rq_deadline > now + at_early_margin) { - /* update the earliest deadline */ - if (deadline == -1 || - rq->rq_deadline < deadline) - deadline = rq->rq_deadline; - break; - } - - ptlrpc_at_remove_timed(rq); - /** - * ptlrpc_server_drop_request() may drop - * refcount to 0 already. Let's check this and - * don't add entry to work_list - */ - if (likely(atomic_inc_not_zero(&rq->rq_refcount))) - list_add(&rq->rq_timed_list, &work_list); - counter++; - } - - if (++index >= array->paa_size) - index = 0; - } - array->paa_deadline = deadline; - /* we have a new earliest deadline, restart the timer */ - ptlrpc_at_set_timer(svcpt); - - spin_unlock(&svcpt->scp_at_lock); - - CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early replies\n", - first, at_extra, counter); - if (first < 0) { - /* We're already past request deadlines before we even get a - * chance to send early replies - */ - LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n", - svcpt->scp_service->srv_name); - CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%ld(jiff)\n", - counter, svcpt->scp_nreqs_incoming, - svcpt->scp_nreqs_active, - at_get(&svcpt->scp_at_estimate), delay); - } - - /* we took additional refcount so entries can't be deleted from list, no - * locking is needed - */ - while (!list_empty(&work_list)) { - rq = list_entry(work_list.next, struct ptlrpc_request, - rq_timed_list); - list_del_init(&rq->rq_timed_list); - - if (ptlrpc_at_send_early_reply(rq) == 0) - ptlrpc_at_add_timed(rq); - - ptlrpc_server_drop_request(rq); - } -} - -/** - * Put the request to the export list if the request may become - * a high priority one. - */ -static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req) -{ - int rc = 0; - - if (svcpt->scp_service->srv_ops.so_hpreq_handler) { - rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req); - if (rc < 0) - return rc; - LASSERT(rc == 0); - } - if (req->rq_export && req->rq_ops) { - /* Perform request specific check. We should do this check - * before the request is added into exp_hp_rpcs list otherwise - * it may hit swab race at LU-1044. - */ - if (req->rq_ops->hpreq_check) { - rc = req->rq_ops->hpreq_check(req); - if (rc == -ESTALE) { - req->rq_status = rc; - ptlrpc_error(req); - } - /** can only return error, - * 0 for normal request, - * or 1 for high priority request - */ - LASSERT(rc <= 1); - } - - spin_lock_bh(&req->rq_export->exp_rpc_lock); - list_add(&req->rq_exp_list, &req->rq_export->exp_hp_rpcs); - spin_unlock_bh(&req->rq_export->exp_rpc_lock); - } - - ptlrpc_nrs_req_initialize(svcpt, req, rc); - - return rc; -} - -/** Remove the request from the export list. */ -static void ptlrpc_server_hpreq_fini(struct ptlrpc_request *req) -{ - if (req->rq_export && req->rq_ops) { - /* refresh lock timeout again so that client has more - * room to send lock cancel RPC. - */ - if (req->rq_ops->hpreq_fini) - req->rq_ops->hpreq_fini(req); - - spin_lock_bh(&req->rq_export->exp_rpc_lock); - list_del_init(&req->rq_exp_list); - spin_unlock_bh(&req->rq_export->exp_rpc_lock); - } -} - -static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, - struct ptlrpc_request *req) -{ - int rc; - - rc = ptlrpc_server_hpreq_init(svcpt, req); - if (rc < 0) - return rc; - - ptlrpc_nrs_req_add(svcpt, req, !!rc); - - return 0; -} - -/** - * Allow to handle high priority request - * User can call it w/o any lock but need to hold - * ptlrpc_service_part::scp_req_lock to get reliable result - */ -static bool ptlrpc_server_allow_high(struct ptlrpc_service_part *svcpt, - bool force) -{ - int running = svcpt->scp_nthrs_running; - - if (!nrs_svcpt_has_hp(svcpt)) - return false; - - if (force) - return true; - - if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL && - CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) { - /* leave just 1 thread for normal RPCs */ - running = PTLRPC_NTHRS_INIT; - if (svcpt->scp_service->srv_ops.so_hpreq_handler) - running += 1; - } - - if (svcpt->scp_nreqs_active >= running - 1) - return false; - - if (svcpt->scp_nhreqs_active == 0) - return true; - - return !ptlrpc_nrs_req_pending_nolock(svcpt, false) || - svcpt->scp_hreq_count < svcpt->scp_service->srv_hpreq_ratio; -} - -static bool ptlrpc_server_high_pending(struct ptlrpc_service_part *svcpt, - bool force) -{ - return ptlrpc_server_allow_high(svcpt, force) && - ptlrpc_nrs_req_pending_nolock(svcpt, true); -} - -/** - * Only allow normal priority requests on a service that has a high-priority - * queue if forced (i.e. cleanup), if there are other high priority requests - * already being processed (i.e. those threads can service more high-priority - * requests), or if there are enough idle threads that a later thread can do - * a high priority request. - * User can call it w/o any lock but need to hold - * ptlrpc_service_part::scp_req_lock to get reliable result - */ -static bool ptlrpc_server_allow_normal(struct ptlrpc_service_part *svcpt, - bool force) -{ - int running = svcpt->scp_nthrs_running; - - if (unlikely(svcpt->scp_service->srv_req_portal == MDS_REQUEST_PORTAL && - CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CANCEL_RESEND))) { - /* leave just 1 thread for normal RPCs */ - running = PTLRPC_NTHRS_INIT; - if (svcpt->scp_service->srv_ops.so_hpreq_handler) - running += 1; - } - - if (force || - svcpt->scp_nreqs_active < running - 2) - return true; - - if (svcpt->scp_nreqs_active >= running - 1) - return false; - - return svcpt->scp_nhreqs_active > 0 || !nrs_svcpt_has_hp(svcpt); -} - -static bool ptlrpc_server_normal_pending(struct ptlrpc_service_part *svcpt, - bool force) -{ - return ptlrpc_server_allow_normal(svcpt, force) && - ptlrpc_nrs_req_pending_nolock(svcpt, false); -} - -/** - * Returns true if there are requests available in incoming - * request queue for processing and it is allowed to fetch them. - * User can call it w/o any lock but need to hold ptlrpc_service::scp_req_lock - * to get reliable result - * \see ptlrpc_server_allow_normal - * \see ptlrpc_server_allow high - */ -static inline bool -ptlrpc_server_request_pending(struct ptlrpc_service_part *svcpt, bool force) -{ - return ptlrpc_server_high_pending(svcpt, force) || - ptlrpc_server_normal_pending(svcpt, force); -} - -/** - * Fetch a request for processing from queue of unprocessed requests. - * Favors high-priority requests. - * Returns a pointer to fetched request. - */ -static struct ptlrpc_request * -ptlrpc_server_request_get(struct ptlrpc_service_part *svcpt, bool force) -{ - struct ptlrpc_request *req = NULL; - - spin_lock(&svcpt->scp_req_lock); - - if (ptlrpc_server_high_pending(svcpt, force)) { - req = ptlrpc_nrs_req_get_nolock(svcpt, true, force); - if (req) { - svcpt->scp_hreq_count++; - goto got_request; - } - } - - if (ptlrpc_server_normal_pending(svcpt, force)) { - req = ptlrpc_nrs_req_get_nolock(svcpt, false, force); - if (req) { - svcpt->scp_hreq_count = 0; - goto got_request; - } - } - - spin_unlock(&svcpt->scp_req_lock); - return NULL; - -got_request: - svcpt->scp_nreqs_active++; - if (req->rq_hp) - svcpt->scp_nhreqs_active++; - - spin_unlock(&svcpt->scp_req_lock); - - if (likely(req->rq_export)) - class_export_rpc_inc(req->rq_export); - - return req; -} - -/** - * Handle freshly incoming reqs, add to timed early reply list, - * pass on to regular request queue. - * All incoming requests pass through here before getting into - * ptlrpc_server_handle_req later on. - */ -static int -ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt, - struct ptlrpc_thread *thread) -{ - struct ptlrpc_service *svc = svcpt->scp_service; - struct ptlrpc_request *req; - __u32 deadline; - int rc; - - spin_lock(&svcpt->scp_lock); - if (list_empty(&svcpt->scp_req_incoming)) { - spin_unlock(&svcpt->scp_lock); - return 0; - } - - req = list_entry(svcpt->scp_req_incoming.next, - struct ptlrpc_request, rq_list); - list_del_init(&req->rq_list); - svcpt->scp_nreqs_incoming--; - /* Consider this still a "queued" request as far as stats are - * concerned - */ - spin_unlock(&svcpt->scp_lock); - - /* go through security check/transform */ - rc = sptlrpc_svc_unwrap_request(req); - switch (rc) { - case SECSVC_OK: - break; - case SECSVC_COMPLETE: - target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET); - goto err_req; - case SECSVC_DROP: - goto err_req; - default: - LBUG(); - } - - /* - * for null-flavored rpc, msg has been unpacked by sptlrpc, although - * redo it wouldn't be harmful. - */ - if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) { - rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen); - if (rc != 0) { - CERROR("error unpacking request: ptl %d from %s x%llu\n", - svc->srv_req_portal, libcfs_id2str(req->rq_peer), - req->rq_xid); - goto err_req; - } - } - - rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF); - if (rc) { - CERROR("error unpacking ptlrpc body: ptl %d from %s x%llu\n", - svc->srv_req_portal, libcfs_id2str(req->rq_peer), - req->rq_xid); - goto err_req; - } - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) && - lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) { - CERROR("drop incoming rpc opc %u, x%llu\n", - cfs_fail_val, req->rq_xid); - goto err_req; - } - - rc = -EINVAL; - if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) { - CERROR("wrong packet type received (type=%u) from %s\n", - lustre_msg_get_type(req->rq_reqmsg), - libcfs_id2str(req->rq_peer)); - goto err_req; - } - - switch (lustre_msg_get_opc(req->rq_reqmsg)) { - case MDS_WRITEPAGE: - case OST_WRITE: - req->rq_bulk_write = 1; - break; - case MDS_READPAGE: - case OST_READ: - case MGS_CONFIG_READ: - req->rq_bulk_read = 1; - break; - } - - CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid); - - req->rq_export = class_conn2export( - lustre_msg_get_handle(req->rq_reqmsg)); - if (req->rq_export) { - rc = ptlrpc_check_req(req); - if (rc == 0) { - rc = sptlrpc_target_export_check(req->rq_export, req); - if (rc) - DEBUG_REQ(D_ERROR, req, "DROPPING req with illegal security flavor,"); - } - - if (rc) - goto err_req; - } - - /* req_in handling should/must be fast */ - if (ktime_get_real_seconds() - req->rq_arrival_time.tv_sec > 5) - DEBUG_REQ(D_WARNING, req, "Slow req_in handling %llds", - (s64)(ktime_get_real_seconds() - - req->rq_arrival_time.tv_sec)); - - /* Set rpc server deadline and add it to the timed list */ - deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) & - MSGHDR_AT_SUPPORT) ? - /* The max time the client expects us to take */ - lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout; - req->rq_deadline = req->rq_arrival_time.tv_sec + deadline; - if (unlikely(deadline == 0)) { - DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout"); - goto err_req; - } - - req->rq_svc_thread = thread; - if (thread) { - /* initialize request session, it is needed for request - * processing by target - */ - rc = lu_context_init(&req->rq_session, - LCT_SERVER_SESSION | LCT_NOREF); - if (rc) { - CERROR("%s: failure to initialize session: rc = %d\n", - thread->t_name, rc); - goto err_req; - } - req->rq_session.lc_thread = thread; - lu_context_enter(&req->rq_session); - req->rq_svc_thread->t_env->le_ses = &req->rq_session; - } - - ptlrpc_at_add_timed(req); - - /* Move it over to the request processing queue */ - rc = ptlrpc_server_request_add(svcpt, req); - if (rc) - goto err_req; - - wake_up(&svcpt->scp_waitq); - return 1; - -err_req: - ptlrpc_server_finish_request(svcpt, req); - - return 1; -} - -/** - * Main incoming request handling logic. - * Calls handler function from service to do actual processing. - */ -static int -ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, - struct ptlrpc_thread *thread) -{ - struct ptlrpc_service *svc = svcpt->scp_service; - struct ptlrpc_request *request; - struct timespec64 work_start; - struct timespec64 work_end; - struct timespec64 timediff; - struct timespec64 arrived; - unsigned long timediff_usecs; - unsigned long arrived_usecs; - int fail_opc = 0; - - request = ptlrpc_server_request_get(svcpt, false); - if (!request) - return 0; - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT)) - fail_opc = OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT; - else if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT)) - fail_opc = OBD_FAIL_PTLRPC_HPREQ_TIMEOUT; - - if (unlikely(fail_opc)) { - if (request->rq_export && request->rq_ops) - OBD_FAIL_TIMEOUT(fail_opc, 4); - } - - ptlrpc_rqphase_move(request, RQ_PHASE_INTERPRET); - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DUMP_LOG)) - libcfs_debug_dumplog(); - - ktime_get_real_ts64(&work_start); - timediff = timespec64_sub(work_start, request->rq_arrival_time); - timediff_usecs = timediff.tv_sec * USEC_PER_SEC + - timediff.tv_nsec / NSEC_PER_USEC; - if (likely(svc->srv_stats)) { - lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, - timediff_usecs); - lprocfs_counter_add(svc->srv_stats, PTLRPC_REQQDEPTH_CNTR, - svcpt->scp_nreqs_incoming); - lprocfs_counter_add(svc->srv_stats, PTLRPC_REQACTIVE_CNTR, - svcpt->scp_nreqs_active); - lprocfs_counter_add(svc->srv_stats, PTLRPC_TIMEOUT, - at_get(&svcpt->scp_at_estimate)); - } - - if (likely(request->rq_export)) { - if (unlikely(ptlrpc_check_req(request))) - goto put_conn; - } - - /* Discard requests queued for longer than the deadline. - * The deadline is increased if we send an early reply. - */ - if (ktime_get_real_seconds() > request->rq_deadline) { - DEBUG_REQ(D_ERROR, request, "Dropping timed-out request from %s: deadline %lld:%llds ago\n", - libcfs_id2str(request->rq_peer), - request->rq_deadline - - request->rq_arrival_time.tv_sec, - ktime_get_real_seconds() - request->rq_deadline); - goto put_conn; - } - - CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d\n", - current->comm, - (request->rq_export ? - (char *)request->rq_export->exp_client_uuid.uuid : "0"), - (request->rq_export ? - atomic_read(&request->rq_export->exp_refcount) : -99), - lustre_msg_get_status(request->rq_reqmsg), request->rq_xid, - libcfs_id2str(request->rq_peer), - lustre_msg_get_opc(request->rq_reqmsg)); - - if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING) - CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val); - - CDEBUG(D_NET, "got req %llu\n", request->rq_xid); - - /* re-assign request and sesson thread to the current one */ - request->rq_svc_thread = thread; - if (thread) { - LASSERT(request->rq_session.lc_thread); - request->rq_session.lc_thread = thread; - request->rq_session.lc_cookie = 0x55; - thread->t_env->le_ses = &request->rq_session; - } - svc->srv_ops.so_req_handler(request); - - ptlrpc_rqphase_move(request, RQ_PHASE_COMPLETE); - -put_conn: - if (unlikely(ktime_get_real_seconds() > request->rq_deadline)) { - DEBUG_REQ(D_WARNING, request, - "Request took longer than estimated (%lld:%llds); " - "client may timeout.", - (s64)request->rq_deadline - - request->rq_arrival_time.tv_sec, - (s64)ktime_get_real_seconds() - request->rq_deadline); - } - - ktime_get_real_ts64(&work_end); - timediff = timespec64_sub(work_end, work_start); - timediff_usecs = timediff.tv_sec * USEC_PER_SEC + - timediff.tv_nsec / NSEC_PER_USEC; - arrived = timespec64_sub(work_end, request->rq_arrival_time); - arrived_usecs = arrived.tv_sec * USEC_PER_SEC + - arrived.tv_nsec / NSEC_PER_USEC; - CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc %s:%s+%d:%d:x%llu:%s:%d Request processed in %ldus (%ldus total) trans %llu rc %d/%d\n", - current->comm, - (request->rq_export ? - (char *)request->rq_export->exp_client_uuid.uuid : "0"), - (request->rq_export ? - atomic_read(&request->rq_export->exp_refcount) : -99), - lustre_msg_get_status(request->rq_reqmsg), - request->rq_xid, - libcfs_id2str(request->rq_peer), - lustre_msg_get_opc(request->rq_reqmsg), - timediff_usecs, - arrived_usecs, - (request->rq_repmsg ? - lustre_msg_get_transno(request->rq_repmsg) : - request->rq_transno), - request->rq_status, - (request->rq_repmsg ? - lustre_msg_get_status(request->rq_repmsg) : -999)); - if (likely(svc->srv_stats && request->rq_reqmsg)) { - __u32 op = lustre_msg_get_opc(request->rq_reqmsg); - int opc = opcode_offset(op); - - if (opc > 0 && !(op == LDLM_ENQUEUE || op == MDS_REINT)) { - LASSERT(opc < LUSTRE_MAX_OPCODES); - lprocfs_counter_add(svc->srv_stats, - opc + EXTRA_MAX_OPCODES, - timediff_usecs); - } - } - if (unlikely(request->rq_early_count)) { - DEBUG_REQ(D_ADAPTTO, request, - "sent %d early replies before finishing in %llds", - request->rq_early_count, - (s64)work_end.tv_sec - - request->rq_arrival_time.tv_sec); - } - - ptlrpc_server_finish_active_request(svcpt, request); - - return 1; -} - -/** - * An internal function to process a single reply state object. - */ -static int -ptlrpc_handle_rs(struct ptlrpc_reply_state *rs) -{ - struct ptlrpc_service_part *svcpt = rs->rs_svcpt; - struct ptlrpc_service *svc = svcpt->scp_service; - struct obd_export *exp; - int nlocks; - int been_handled; - - exp = rs->rs_export; - - LASSERT(rs->rs_difficult); - LASSERT(rs->rs_scheduled); - LASSERT(list_empty(&rs->rs_list)); - - spin_lock(&exp->exp_lock); - /* Noop if removed already */ - list_del_init(&rs->rs_exp_list); - spin_unlock(&exp->exp_lock); - - /* The disk commit callback holds exp_uncommitted_replies_lock while it - * iterates over newly committed replies, removing them from - * exp_uncommitted_replies. It then drops this lock and schedules the - * replies it found for handling here. - * - * We can avoid contention for exp_uncommitted_replies_lock between the - * HRT threads and further commit callbacks by checking rs_committed - * which is set in the commit callback while it holds both - * rs_lock and exp_uncommitted_reples. - * - * If we see rs_committed clear, the commit callback _may_ not have - * handled this reply yet and we race with it to grab - * exp_uncommitted_replies_lock before removing the reply from - * exp_uncommitted_replies. Note that if we lose the race and the - * reply has already been removed, list_del_init() is a noop. - * - * If we see rs_committed set, we know the commit callback is handling, - * or has handled this reply since store reordering might allow us to - * see rs_committed set out of sequence. But since this is done - * holding rs_lock, we can be sure it has all completed once we hold - * rs_lock, which we do right next. - */ - if (!rs->rs_committed) { - spin_lock(&exp->exp_uncommitted_replies_lock); - list_del_init(&rs->rs_obd_list); - spin_unlock(&exp->exp_uncommitted_replies_lock); - } - - spin_lock(&rs->rs_lock); - - been_handled = rs->rs_handled; - rs->rs_handled = 1; - - nlocks = rs->rs_nlocks; /* atomic "steal", but */ - rs->rs_nlocks = 0; /* locks still on rs_locks! */ - - if (nlocks == 0 && !been_handled) { - /* If we see this, we should already have seen the warning - * in mds_steal_ack_locks() - */ - CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld o%d NID %s\n", - rs, - rs->rs_xid, rs->rs_transno, rs->rs_opc, - libcfs_nid2str(exp->exp_connection->c_peer.nid)); - } - - if ((!been_handled && rs->rs_on_net) || nlocks > 0) { - spin_unlock(&rs->rs_lock); - - if (!been_handled && rs->rs_on_net) { - LNetMDUnlink(rs->rs_md_h); - /* Ignore return code; we're racing with completion */ - } - - while (nlocks-- > 0) - ldlm_lock_decref(&rs->rs_locks[nlocks], - rs->rs_modes[nlocks]); - - spin_lock(&rs->rs_lock); - } - - rs->rs_scheduled = 0; - - if (!rs->rs_on_net) { - /* Off the net */ - spin_unlock(&rs->rs_lock); - - class_export_put(exp); - rs->rs_export = NULL; - ptlrpc_rs_decref(rs); - if (atomic_dec_and_test(&svcpt->scp_nreps_difficult) && - svc->srv_is_stopping) - wake_up_all(&svcpt->scp_waitq); - return 1; - } - - /* still on the net; callback will schedule */ - spin_unlock(&rs->rs_lock); - return 1; -} - -static void -ptlrpc_check_rqbd_pool(struct ptlrpc_service_part *svcpt) -{ - int avail = svcpt->scp_nrqbds_posted; - int low_water = test_req_buffer_pressure ? 0 : - svcpt->scp_service->srv_nbuf_per_group / 2; - - /* NB I'm not locking; just looking. */ - - /* CAVEAT EMPTOR: We might be allocating buffers here because we've - * allowed the request history to grow out of control. We could put a - * sanity check on that here and cull some history if we need the - * space. - */ - - if (avail <= low_water) - ptlrpc_grow_req_bufs(svcpt, 1); - - if (svcpt->scp_service->srv_stats) { - lprocfs_counter_add(svcpt->scp_service->srv_stats, - PTLRPC_REQBUF_AVAIL_CNTR, avail); - } -} - -static inline int -ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt) -{ - return svcpt->scp_nreqs_active < - svcpt->scp_nthrs_running - 1 - - (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL); -} - -/** - * allowed to create more threads - * user can call it w/o any lock but need to hold - * ptlrpc_service_part::scp_lock to get reliable result - */ -static inline int -ptlrpc_threads_increasable(struct ptlrpc_service_part *svcpt) -{ - return svcpt->scp_nthrs_running + - svcpt->scp_nthrs_starting < - svcpt->scp_service->srv_nthrs_cpt_limit; -} - -/** - * too many requests and allowed to create more threads - */ -static inline int -ptlrpc_threads_need_create(struct ptlrpc_service_part *svcpt) -{ - return !ptlrpc_threads_enough(svcpt) && - ptlrpc_threads_increasable(svcpt); -} - -static inline int -ptlrpc_thread_stopping(struct ptlrpc_thread *thread) -{ - return thread_is_stopping(thread) || - thread->t_svcpt->scp_service->srv_is_stopping; -} - -static inline int -ptlrpc_rqbd_pending(struct ptlrpc_service_part *svcpt) -{ - return !list_empty(&svcpt->scp_rqbd_idle) && - svcpt->scp_rqbd_timeout == 0; -} - -static inline int -ptlrpc_at_check(struct ptlrpc_service_part *svcpt) -{ - return svcpt->scp_at_check; -} - -/** - * requests wait on preprocessing - * user can call it w/o any lock but need to hold - * ptlrpc_service_part::scp_lock to get reliable result - */ -static inline int -ptlrpc_server_request_incoming(struct ptlrpc_service_part *svcpt) -{ - return !list_empty(&svcpt->scp_req_incoming); -} - -/* We perfer lifo queuing, but kernel doesn't provide that yet. */ -#ifndef wait_event_idle_exclusive_lifo -#define wait_event_idle_exclusive_lifo wait_event_idle_exclusive -#define wait_event_idle_exclusive_lifo_timeout wait_event_idle_exclusive_timeout -#endif - -static __attribute__((__noinline__)) int -ptlrpc_wait_event(struct ptlrpc_service_part *svcpt, - struct ptlrpc_thread *thread) -{ - /* Don't exit while there are replies to be handled */ - - /* XXX: Add this back when libcfs watchdog is merged upstream - lc_watchdog_disable(thread->t_watchdog); - */ - - cond_resched(); - - if (svcpt->scp_rqbd_timeout == 0) - wait_event_idle_exclusive_lifo( - svcpt->scp_waitq, - ptlrpc_thread_stopping(thread) || - ptlrpc_server_request_incoming(svcpt) || - ptlrpc_server_request_pending(svcpt, - false) || - ptlrpc_rqbd_pending(svcpt) || - ptlrpc_at_check(svcpt)); - else if (0 == wait_event_idle_exclusive_lifo_timeout( - svcpt->scp_waitq, - ptlrpc_thread_stopping(thread) || - ptlrpc_server_request_incoming(svcpt) || - ptlrpc_server_request_pending(svcpt, - false) || - ptlrpc_rqbd_pending(svcpt) || - ptlrpc_at_check(svcpt), - svcpt->scp_rqbd_timeout)) - svcpt->scp_rqbd_timeout = 0; - - if (ptlrpc_thread_stopping(thread)) - return -EINTR; - - /* - lc_watchdog_touch(thread->t_watchdog, - ptlrpc_server_get_timeout(svcpt)); - */ - return 0; -} - -/** - * Main thread body for service threads. - * Waits in a loop waiting for new requests to process to appear. - * Every time an incoming requests is added to its queue, a waitq - * is woken up and one of the threads will handle it. - */ -static int ptlrpc_main(void *arg) -{ - struct ptlrpc_thread *thread = arg; - struct ptlrpc_service_part *svcpt = thread->t_svcpt; - struct ptlrpc_service *svc = svcpt->scp_service; - struct ptlrpc_reply_state *rs; - struct group_info *ginfo = NULL; - struct lu_env *env; - int counter = 0, rc = 0; - - thread->t_pid = current->pid; - unshare_fs_struct(); - - /* NB: we will call cfs_cpt_bind() for all threads, because we - * might want to run lustre server only on a subset of system CPUs, - * in that case ->scp_cpt is CFS_CPT_ANY - */ - rc = cfs_cpt_bind(svc->srv_cptable, svcpt->scp_cpt); - if (rc != 0) { - CWARN("%s: failed to bind %s on CPT %d\n", - svc->srv_name, thread->t_name, svcpt->scp_cpt); - } - - ginfo = groups_alloc(0); - if (!ginfo) { - rc = -ENOMEM; - goto out; - } - - set_current_groups(ginfo); - put_group_info(ginfo); - - if (svc->srv_ops.so_thr_init) { - rc = svc->srv_ops.so_thr_init(thread); - if (rc) - goto out; - } - - env = kzalloc(sizeof(*env), GFP_KERNEL); - if (!env) { - rc = -ENOMEM; - goto out_srv_fini; - } - - rc = lu_context_init(&env->le_ctx, - svc->srv_ctx_tags | LCT_REMEMBER | LCT_NOREF); - if (rc) - goto out_srv_fini; - - thread->t_env = env; - env->le_ctx.lc_thread = thread; - env->le_ctx.lc_cookie = 0x6; - - while (!list_empty(&svcpt->scp_rqbd_idle)) { - rc = ptlrpc_server_post_idle_rqbds(svcpt); - if (rc >= 0) - continue; - - CERROR("Failed to post rqbd for %s on CPT %d: %d\n", - svc->srv_name, svcpt->scp_cpt, rc); - goto out_srv_fini; - } - - /* Alloc reply state structure for this one */ - rs = kvzalloc(svc->srv_max_reply_size, GFP_KERNEL); - if (!rs) { - rc = -ENOMEM; - goto out_srv_fini; - } - - spin_lock(&svcpt->scp_lock); - - LASSERT(thread_is_starting(thread)); - thread_clear_flags(thread, SVC_STARTING); - - LASSERT(svcpt->scp_nthrs_starting == 1); - svcpt->scp_nthrs_starting--; - - /* SVC_STOPPING may already be set here if someone else is trying - * to stop the service while this new thread has been dynamically - * forked. We still set SVC_RUNNING to let our creator know that - * we are now running, however we will exit as soon as possible - */ - thread_add_flags(thread, SVC_RUNNING); - svcpt->scp_nthrs_running++; - spin_unlock(&svcpt->scp_lock); - - /* wake up our creator in case he's still waiting. */ - wake_up(&thread->t_ctl_waitq); - - /* - thread->t_watchdog = lc_watchdog_add(ptlrpc_server_get_timeout(svcpt), - NULL, NULL); - */ - - spin_lock(&svcpt->scp_rep_lock); - list_add(&rs->rs_list, &svcpt->scp_rep_idle); - wake_up(&svcpt->scp_rep_waitq); - spin_unlock(&svcpt->scp_rep_lock); - - CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id, - svcpt->scp_nthrs_running); - - /* XXX maintain a list of all managed devices: insert here */ - while (!ptlrpc_thread_stopping(thread)) { - if (ptlrpc_wait_event(svcpt, thread)) - break; - - ptlrpc_check_rqbd_pool(svcpt); - - if (ptlrpc_threads_need_create(svcpt)) { - /* Ignore return code - we tried... */ - ptlrpc_start_thread(svcpt, 0); - } - - /* Process all incoming reqs before handling any */ - if (ptlrpc_server_request_incoming(svcpt)) { - lu_context_enter(&env->le_ctx); - env->le_ses = NULL; - ptlrpc_server_handle_req_in(svcpt, thread); - lu_context_exit(&env->le_ctx); - - /* but limit ourselves in case of flood */ - if (counter++ < 100) - continue; - counter = 0; - } - - if (ptlrpc_at_check(svcpt)) - ptlrpc_at_check_timed(svcpt); - - if (ptlrpc_server_request_pending(svcpt, false)) { - lu_context_enter(&env->le_ctx); - ptlrpc_server_handle_request(svcpt, thread); - lu_context_exit(&env->le_ctx); - } - - if (ptlrpc_rqbd_pending(svcpt) && - ptlrpc_server_post_idle_rqbds(svcpt) < 0) { - /* I just failed to repost request buffers. - * Wait for a timeout (unless something else - * happens) before I try again - */ - svcpt->scp_rqbd_timeout = HZ / 10; - CDEBUG(D_RPCTRACE, "Posted buffers: %d\n", - svcpt->scp_nrqbds_posted); - } - } - - /* - lc_watchdog_delete(thread->t_watchdog); - thread->t_watchdog = NULL; - */ - -out_srv_fini: - /* - * deconstruct service specific state created by ptlrpc_start_thread() - */ - if (svc->srv_ops.so_thr_done) - svc->srv_ops.so_thr_done(thread); - - if (env) { - lu_context_fini(&env->le_ctx); - kfree(env); - } -out: - CDEBUG(D_RPCTRACE, "service thread [ %p : %u ] %d exiting: rc %d\n", - thread, thread->t_pid, thread->t_id, rc); - - spin_lock(&svcpt->scp_lock); - if (thread_test_and_clear_flags(thread, SVC_STARTING)) - svcpt->scp_nthrs_starting--; - - if (thread_test_and_clear_flags(thread, SVC_RUNNING)) { - /* must know immediately */ - svcpt->scp_nthrs_running--; - } - - thread->t_id = rc; - thread_add_flags(thread, SVC_STOPPED); - - wake_up(&thread->t_ctl_waitq); - spin_unlock(&svcpt->scp_lock); - - return rc; -} - -static int hrt_dont_sleep(struct ptlrpc_hr_thread *hrt, - struct list_head *replies) -{ - int result; - - spin_lock(&hrt->hrt_lock); - - list_splice_init(&hrt->hrt_queue, replies); - result = ptlrpc_hr.hr_stopping || !list_empty(replies); - - spin_unlock(&hrt->hrt_lock); - return result; -} - -/** - * Main body of "handle reply" function. - * It processes acked reply states - */ -static int ptlrpc_hr_main(void *arg) -{ - struct ptlrpc_hr_thread *hrt = arg; - struct ptlrpc_hr_partition *hrp = hrt->hrt_partition; - LIST_HEAD(replies); - char threadname[20]; - int rc; - - snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d", - hrp->hrp_cpt, hrt->hrt_id); - unshare_fs_struct(); - - rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt); - if (rc != 0) { - CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n", - threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc); - } - - atomic_inc(&hrp->hrp_nstarted); - wake_up(&ptlrpc_hr.hr_waitq); - - while (!ptlrpc_hr.hr_stopping) { - wait_event_idle(hrt->hrt_waitq, hrt_dont_sleep(hrt, &replies)); - - while (!list_empty(&replies)) { - struct ptlrpc_reply_state *rs; - - rs = list_entry(replies.prev, struct ptlrpc_reply_state, - rs_list); - list_del_init(&rs->rs_list); - ptlrpc_handle_rs(rs); - } - } - - atomic_inc(&hrp->hrp_nstopped); - wake_up(&ptlrpc_hr.hr_waitq); - - return 0; -} - -static void ptlrpc_stop_hr_threads(void) -{ - struct ptlrpc_hr_partition *hrp; - int i; - int j; - - ptlrpc_hr.hr_stopping = 1; - - cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { - if (!hrp->hrp_thrs) - continue; /* uninitialized */ - for (j = 0; j < hrp->hrp_nthrs; j++) - wake_up_all(&hrp->hrp_thrs[j].hrt_waitq); - } - - cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { - if (!hrp->hrp_thrs) - continue; /* uninitialized */ - wait_event(ptlrpc_hr.hr_waitq, - atomic_read(&hrp->hrp_nstopped) == - atomic_read(&hrp->hrp_nstarted)); - } -} - -static int ptlrpc_start_hr_threads(void) -{ - struct ptlrpc_hr_partition *hrp; - int i; - int j; - - cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { - int rc = 0; - - for (j = 0; j < hrp->hrp_nthrs; j++) { - struct ptlrpc_hr_thread *hrt = &hrp->hrp_thrs[j]; - struct task_struct *task; - - task = kthread_run(ptlrpc_hr_main, - &hrp->hrp_thrs[j], - "ptlrpc_hr%02d_%03d", - hrp->hrp_cpt, hrt->hrt_id); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - break; - } - } - wait_event(ptlrpc_hr.hr_waitq, - atomic_read(&hrp->hrp_nstarted) == j); - - if (rc < 0) { - CERROR("cannot start reply handler thread %d:%d: rc = %d\n", - i, j, rc); - ptlrpc_stop_hr_threads(); - return rc; - } - } - return 0; -} - -static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt) -{ - struct ptlrpc_thread *thread; - LIST_HEAD(zombie); - - CDEBUG(D_INFO, "Stopping threads for service %s\n", - svcpt->scp_service->srv_name); - - spin_lock(&svcpt->scp_lock); - /* let the thread know that we would like it to stop asap */ - list_for_each_entry(thread, &svcpt->scp_threads, t_link) { - CDEBUG(D_INFO, "Stopping thread %s #%u\n", - svcpt->scp_service->srv_thread_name, thread->t_id); - thread_add_flags(thread, SVC_STOPPING); - } - - wake_up_all(&svcpt->scp_waitq); - - while (!list_empty(&svcpt->scp_threads)) { - thread = list_entry(svcpt->scp_threads.next, - struct ptlrpc_thread, t_link); - if (thread_is_stopped(thread)) { - list_del(&thread->t_link); - list_add(&thread->t_link, &zombie); - continue; - } - spin_unlock(&svcpt->scp_lock); - - CDEBUG(D_INFO, "waiting for stopping-thread %s #%u\n", - svcpt->scp_service->srv_thread_name, thread->t_id); - wait_event_idle(thread->t_ctl_waitq, - thread_is_stopped(thread)); - - spin_lock(&svcpt->scp_lock); - } - - spin_unlock(&svcpt->scp_lock); - - while (!list_empty(&zombie)) { - thread = list_entry(zombie.next, - struct ptlrpc_thread, t_link); - list_del(&thread->t_link); - kfree(thread); - } -} - -/** - * Stops all threads of a particular service \a svc - */ -static void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (svcpt->scp_service) - ptlrpc_svcpt_stop_threads(svcpt); - } -} - -int ptlrpc_start_threads(struct ptlrpc_service *svc) -{ - int rc = 0; - int i; - int j; - - /* We require 2 threads min, see note in ptlrpc_server_handle_request */ - LASSERT(svc->srv_nthrs_cpt_init >= PTLRPC_NTHRS_INIT); - - for (i = 0; i < svc->srv_ncpts; i++) { - for (j = 0; j < svc->srv_nthrs_cpt_init; j++) { - rc = ptlrpc_start_thread(svc->srv_parts[i], 1); - if (rc == 0) - continue; - - if (rc != -EMFILE) - goto failed; - /* We have enough threads, don't start more. b=15759 */ - break; - } - } - - return 0; - failed: - CERROR("cannot start %s thread #%d_%d: rc %d\n", - svc->srv_thread_name, i, j, rc); - ptlrpc_stop_all_threads(svc); - return rc; -} - -int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait) -{ - struct ptlrpc_thread *thread; - struct ptlrpc_service *svc; - struct task_struct *task; - int rc; - - svc = svcpt->scp_service; - - CDEBUG(D_RPCTRACE, "%s[%d] started %d min %d max %d\n", - svc->srv_name, svcpt->scp_cpt, svcpt->scp_nthrs_running, - svc->srv_nthrs_cpt_init, svc->srv_nthrs_cpt_limit); - - again: - if (unlikely(svc->srv_is_stopping)) - return -ESRCH; - - if (!ptlrpc_threads_increasable(svcpt) || - (OBD_FAIL_CHECK(OBD_FAIL_TGT_TOOMANY_THREADS) && - svcpt->scp_nthrs_running == svc->srv_nthrs_cpt_init - 1)) - return -EMFILE; - - thread = kzalloc_node(sizeof(*thread), GFP_NOFS, - cfs_cpt_spread_node(svc->srv_cptable, - svcpt->scp_cpt)); - if (!thread) - return -ENOMEM; - init_waitqueue_head(&thread->t_ctl_waitq); - - spin_lock(&svcpt->scp_lock); - if (!ptlrpc_threads_increasable(svcpt)) { - spin_unlock(&svcpt->scp_lock); - kfree(thread); - return -EMFILE; - } - - if (svcpt->scp_nthrs_starting != 0) { - /* serialize starting because some modules (obdfilter) - * might require unique and contiguous t_id - */ - LASSERT(svcpt->scp_nthrs_starting == 1); - spin_unlock(&svcpt->scp_lock); - kfree(thread); - if (wait) { - CDEBUG(D_INFO, "Waiting for creating thread %s #%d\n", - svc->srv_thread_name, svcpt->scp_thr_nextid); - schedule(); - goto again; - } - - CDEBUG(D_INFO, "Creating thread %s #%d race, retry later\n", - svc->srv_thread_name, svcpt->scp_thr_nextid); - return -EAGAIN; - } - - svcpt->scp_nthrs_starting++; - thread->t_id = svcpt->scp_thr_nextid++; - thread_add_flags(thread, SVC_STARTING); - thread->t_svcpt = svcpt; - - list_add(&thread->t_link, &svcpt->scp_threads); - spin_unlock(&svcpt->scp_lock); - - if (svcpt->scp_cpt >= 0) { - snprintf(thread->t_name, sizeof(thread->t_name), "%s%02d_%03d", - svc->srv_thread_name, svcpt->scp_cpt, thread->t_id); - } else { - snprintf(thread->t_name, sizeof(thread->t_name), "%s_%04d", - svc->srv_thread_name, thread->t_id); - } - - CDEBUG(D_RPCTRACE, "starting thread '%s'\n", thread->t_name); - task = kthread_run(ptlrpc_main, thread, "%s", thread->t_name); - if (IS_ERR(task)) { - rc = PTR_ERR(task); - CERROR("cannot start thread '%s': rc = %d\n", - thread->t_name, rc); - spin_lock(&svcpt->scp_lock); - --svcpt->scp_nthrs_starting; - if (thread_is_stopping(thread)) { - /* this ptlrpc_thread is being handled - * by ptlrpc_svcpt_stop_threads now - */ - thread_add_flags(thread, SVC_STOPPED); - wake_up(&thread->t_ctl_waitq); - spin_unlock(&svcpt->scp_lock); - } else { - list_del(&thread->t_link); - spin_unlock(&svcpt->scp_lock); - kfree(thread); - } - return rc; - } - - if (!wait) - return 0; - - wait_event_idle(thread->t_ctl_waitq, - thread_is_running(thread) || thread_is_stopped(thread)); - - rc = thread_is_stopped(thread) ? thread->t_id : 0; - return rc; -} - -int ptlrpc_hr_init(void) -{ - struct ptlrpc_hr_partition *hrp; - struct ptlrpc_hr_thread *hrt; - int rc; - int i; - int j; - int weight; - - memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr)); - ptlrpc_hr.hr_cpt_table = cfs_cpt_tab; - - ptlrpc_hr.hr_partitions = cfs_percpt_alloc(ptlrpc_hr.hr_cpt_table, - sizeof(*hrp)); - if (!ptlrpc_hr.hr_partitions) - return -ENOMEM; - - init_waitqueue_head(&ptlrpc_hr.hr_waitq); - - weight = cpumask_weight(topology_sibling_cpumask(0)); - - cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { - hrp->hrp_cpt = i; - - atomic_set(&hrp->hrp_nstarted, 0); - atomic_set(&hrp->hrp_nstopped, 0); - - hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i); - hrp->hrp_nthrs /= weight; - if (hrp->hrp_nthrs == 0) - hrp->hrp_nthrs = 1; - - hrp->hrp_thrs = - kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS, - cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table, - i)); - if (!hrp->hrp_thrs) { - rc = -ENOMEM; - goto out; - } - - for (j = 0; j < hrp->hrp_nthrs; j++) { - hrt = &hrp->hrp_thrs[j]; - - hrt->hrt_id = j; - hrt->hrt_partition = hrp; - init_waitqueue_head(&hrt->hrt_waitq); - spin_lock_init(&hrt->hrt_lock); - INIT_LIST_HEAD(&hrt->hrt_queue); - } - } - - rc = ptlrpc_start_hr_threads(); -out: - if (rc != 0) - ptlrpc_hr_fini(); - return rc; -} - -void ptlrpc_hr_fini(void) -{ - struct ptlrpc_hr_partition *hrp; - int i; - - if (!ptlrpc_hr.hr_partitions) - return; - - ptlrpc_stop_hr_threads(); - - cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { - kfree(hrp->hrp_thrs); - } - - cfs_percpt_free(ptlrpc_hr.hr_partitions); - ptlrpc_hr.hr_partitions = NULL; -} - -/** - * Wait until all already scheduled replies are processed. - */ -static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt) -{ - while (1) { - int rc; - - rc = wait_event_idle_timeout( - svcpt->scp_waitq, - atomic_read(&svcpt->scp_nreps_difficult) == 0, - 10 * HZ); - if (rc > 0) - break; - CWARN("Unexpectedly long timeout %s %p\n", - svcpt->scp_service->srv_name, svcpt->scp_service); - } -} - -static void -ptlrpc_service_del_atimer(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - int i; - - /* early disarm AT timer... */ - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (svcpt->scp_service) - del_timer(&svcpt->scp_at_timer); - } -} - -static void -ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - struct ptlrpc_request_buffer_desc *rqbd; - int cnt; - int rc; - int i; - - /* All history will be culled when the next request buffer is - * freed in ptlrpc_service_purge_all() - */ - svc->srv_hist_nrqbds_cpt_max = 0; - - rc = LNetClearLazyPortal(svc->srv_req_portal); - LASSERT(rc == 0); - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (!svcpt->scp_service) - break; - - /* Unlink all the request buffers. This forces a 'final' - * event with its 'unlink' flag set for each posted rqbd - */ - list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted, - rqbd_list) { - rc = LNetMDUnlink(rqbd->rqbd_md_h); - LASSERT(rc == 0 || rc == -ENOENT); - } - } - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (!svcpt->scp_service) - break; - - /* Wait for the network to release any buffers - * it's currently filling - */ - spin_lock(&svcpt->scp_lock); - while (svcpt->scp_nrqbds_posted != 0) { - spin_unlock(&svcpt->scp_lock); - /* Network access will complete in finite time but - * the HUGE timeout lets us CWARN for visibility - * of sluggish LNDs - */ - cnt = 0; - while (cnt < LONG_UNLINK && - (rc = wait_event_idle_timeout(svcpt->scp_waitq, - svcpt->scp_nrqbds_posted == 0, - HZ)) == 0) - cnt++; - if (rc == 0) { - CWARN("Service %s waiting for request buffers\n", - svcpt->scp_service->srv_name); - } - spin_lock(&svcpt->scp_lock); - } - spin_unlock(&svcpt->scp_lock); - } -} - -static void -ptlrpc_service_purge_all(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - struct ptlrpc_request_buffer_desc *rqbd; - struct ptlrpc_request *req; - struct ptlrpc_reply_state *rs; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (!svcpt->scp_service) - break; - - spin_lock(&svcpt->scp_rep_lock); - while (!list_empty(&svcpt->scp_rep_active)) { - rs = list_entry(svcpt->scp_rep_active.next, - struct ptlrpc_reply_state, rs_list); - spin_lock(&rs->rs_lock); - ptlrpc_schedule_difficult_reply(rs); - spin_unlock(&rs->rs_lock); - } - spin_unlock(&svcpt->scp_rep_lock); - - /* purge the request queue. NB No new replies (rqbds - * all unlinked) and no service threads, so I'm the only - * thread noodling the request queue now - */ - while (!list_empty(&svcpt->scp_req_incoming)) { - req = list_entry(svcpt->scp_req_incoming.next, - struct ptlrpc_request, rq_list); - - list_del(&req->rq_list); - svcpt->scp_nreqs_incoming--; - ptlrpc_server_finish_request(svcpt, req); - } - - while (ptlrpc_server_request_pending(svcpt, true)) { - req = ptlrpc_server_request_get(svcpt, true); - ptlrpc_server_finish_active_request(svcpt, req); - } - - LASSERT(list_empty(&svcpt->scp_rqbd_posted)); - LASSERT(svcpt->scp_nreqs_incoming == 0); - LASSERT(svcpt->scp_nreqs_active == 0); - /* history should have been culled by - * ptlrpc_server_finish_request - */ - LASSERT(svcpt->scp_hist_nrqbds == 0); - - /* Now free all the request buffers since nothing - * references them any more... - */ - - while (!list_empty(&svcpt->scp_rqbd_idle)) { - rqbd = list_entry(svcpt->scp_rqbd_idle.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); - ptlrpc_free_rqbd(rqbd); - } - ptlrpc_wait_replies(svcpt); - - while (!list_empty(&svcpt->scp_rep_idle)) { - rs = list_entry(svcpt->scp_rep_idle.next, - struct ptlrpc_reply_state, - rs_list); - list_del(&rs->rs_list); - kvfree(rs); - } - } -} - -static void -ptlrpc_service_free(struct ptlrpc_service *svc) -{ - struct ptlrpc_service_part *svcpt; - struct ptlrpc_at_array *array; - int i; - - ptlrpc_service_for_each_part(svcpt, i, svc) { - if (!svcpt->scp_service) - break; - - /* In case somebody rearmed this in the meantime */ - del_timer(&svcpt->scp_at_timer); - array = &svcpt->scp_at_array; - - kfree(array->paa_reqs_array); - array->paa_reqs_array = NULL; - kfree(array->paa_reqs_count); - array->paa_reqs_count = NULL; - } - - ptlrpc_service_for_each_part(svcpt, i, svc) - kfree(svcpt); - - if (svc->srv_cpts) - cfs_expr_list_values_free(svc->srv_cpts, svc->srv_ncpts); - - kfree(svc); -} - -int ptlrpc_unregister_service(struct ptlrpc_service *service) -{ - CDEBUG(D_NET, "%s: tearing down\n", service->srv_name); - - service->srv_is_stopping = 1; - - mutex_lock(&ptlrpc_all_services_mutex); - list_del_init(&service->srv_list); - mutex_unlock(&ptlrpc_all_services_mutex); - - ptlrpc_service_del_atimer(service); - ptlrpc_stop_all_threads(service); - - ptlrpc_service_unlink_rqbd(service); - ptlrpc_service_purge_all(service); - ptlrpc_service_nrs_cleanup(service); - - ptlrpc_lprocfs_unregister_service(service); - ptlrpc_sysfs_unregister_service(service); - - ptlrpc_service_free(service); - - return 0; -} -EXPORT_SYMBOL(ptlrpc_unregister_service); diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c deleted file mode 100644 index f9394c3e1ee2..000000000000 --- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c +++ /dev/null @@ -1,4210 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2011, 2015, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#include -#include - -#include -#include -#include -#include -#include "ptlrpc_internal.h" - -void lustre_assert_wire_constants(void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * (make -C lustre/utils newwiretest) - * running on Linux centos6-bis 2.6.32-358.0.1.el6-head - * #3 SMP Wed Apr 17 17:37:43 CEST 2013 - * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) - */ - - /* Constants... */ - LASSERTF(PTL_RPC_MSG_REQUEST == 4711, "found %lld\n", - (long long)PTL_RPC_MSG_REQUEST); - LASSERTF(PTL_RPC_MSG_ERR == 4712, "found %lld\n", - (long long)PTL_RPC_MSG_ERR); - LASSERTF(PTL_RPC_MSG_REPLY == 4713, "found %lld\n", - (long long)PTL_RPC_MSG_REPLY); - LASSERTF(MDS_DIR_END_OFF == 0xfffffffffffffffeULL, "found 0x%.16llxULL\n", - MDS_DIR_END_OFF); - LASSERTF(DEAD_HANDLE_MAGIC == 0xdeadbeefcafebabeULL, "found 0x%.16llxULL\n", - DEAD_HANDLE_MAGIC); - BUILD_BUG_ON(MTI_NAME_MAXLEN != 64); - LASSERTF(OST_REPLY == 0, "found %lld\n", - (long long)OST_REPLY); - LASSERTF(OST_GETATTR == 1, "found %lld\n", - (long long)OST_GETATTR); - LASSERTF(OST_SETATTR == 2, "found %lld\n", - (long long)OST_SETATTR); - LASSERTF(OST_READ == 3, "found %lld\n", - (long long)OST_READ); - LASSERTF(OST_WRITE == 4, "found %lld\n", - (long long)OST_WRITE); - LASSERTF(OST_CREATE == 5, "found %lld\n", - (long long)OST_CREATE); - LASSERTF(OST_DESTROY == 6, "found %lld\n", - (long long)OST_DESTROY); - LASSERTF(OST_GET_INFO == 7, "found %lld\n", - (long long)OST_GET_INFO); - LASSERTF(OST_CONNECT == 8, "found %lld\n", - (long long)OST_CONNECT); - LASSERTF(OST_DISCONNECT == 9, "found %lld\n", - (long long)OST_DISCONNECT); - LASSERTF(OST_PUNCH == 10, "found %lld\n", - (long long)OST_PUNCH); - LASSERTF(OST_OPEN == 11, "found %lld\n", - (long long)OST_OPEN); - LASSERTF(OST_CLOSE == 12, "found %lld\n", - (long long)OST_CLOSE); - LASSERTF(OST_STATFS == 13, "found %lld\n", - (long long)OST_STATFS); - LASSERTF(OST_SYNC == 16, "found %lld\n", - (long long)OST_SYNC); - LASSERTF(OST_SET_INFO == 17, "found %lld\n", - (long long)OST_SET_INFO); - LASSERTF(OST_QUOTACHECK == 18, "found %lld\n", - (long long)OST_QUOTACHECK); - LASSERTF(OST_QUOTACTL == 19, "found %lld\n", - (long long)OST_QUOTACTL); - LASSERTF(OST_QUOTA_ADJUST_QUNIT == 20, "found %lld\n", - (long long)OST_QUOTA_ADJUST_QUNIT); - LASSERTF(OST_LAST_OPC == 21, "found %lld\n", - (long long)OST_LAST_OPC); - LASSERTF(OBD_OBJECT_EOF == 0xffffffffffffffffULL, "found 0x%.16llxULL\n", - OBD_OBJECT_EOF); - LASSERTF(OST_MIN_PRECREATE == 32, "found %lld\n", - (long long)OST_MIN_PRECREATE); - LASSERTF(OST_MAX_PRECREATE == 20000, "found %lld\n", - (long long)OST_MAX_PRECREATE); - LASSERTF(OST_LVB_ERR_INIT == 0xffbadbad80000000ULL, "found 0x%.16llxULL\n", - OST_LVB_ERR_INIT); - LASSERTF(OST_LVB_ERR_MASK == 0xffbadbad00000000ULL, "found 0x%.16llxULL\n", - OST_LVB_ERR_MASK); - LASSERTF(MDS_FIRST_OPC == 33, "found %lld\n", - (long long)MDS_FIRST_OPC); - LASSERTF(MDS_GETATTR == 33, "found %lld\n", - (long long)MDS_GETATTR); - LASSERTF(MDS_GETATTR_NAME == 34, "found %lld\n", - (long long)MDS_GETATTR_NAME); - LASSERTF(MDS_CLOSE == 35, "found %lld\n", - (long long)MDS_CLOSE); - LASSERTF(MDS_REINT == 36, "found %lld\n", - (long long)MDS_REINT); - LASSERTF(MDS_READPAGE == 37, "found %lld\n", - (long long)MDS_READPAGE); - LASSERTF(MDS_CONNECT == 38, "found %lld\n", - (long long)MDS_CONNECT); - LASSERTF(MDS_DISCONNECT == 39, "found %lld\n", - (long long)MDS_DISCONNECT); - LASSERTF(MDS_GETSTATUS == 40, "found %lld\n", - (long long)MDS_GETSTATUS); - LASSERTF(MDS_STATFS == 41, "found %lld\n", - (long long)MDS_STATFS); - LASSERTF(MDS_PIN == 42, "found %lld\n", - (long long)MDS_PIN); - LASSERTF(MDS_UNPIN == 43, "found %lld\n", - (long long)MDS_UNPIN); - LASSERTF(MDS_SYNC == 44, "found %lld\n", - (long long)MDS_SYNC); - LASSERTF(MDS_DONE_WRITING == 45, "found %lld\n", - (long long)MDS_DONE_WRITING); - LASSERTF(MDS_SET_INFO == 46, "found %lld\n", - (long long)MDS_SET_INFO); - LASSERTF(MDS_QUOTACHECK == 47, "found %lld\n", - (long long)MDS_QUOTACHECK); - LASSERTF(MDS_QUOTACTL == 48, "found %lld\n", - (long long)MDS_QUOTACTL); - LASSERTF(MDS_GETXATTR == 49, "found %lld\n", - (long long)MDS_GETXATTR); - LASSERTF(MDS_SETXATTR == 50, "found %lld\n", - (long long)MDS_SETXATTR); - LASSERTF(MDS_WRITEPAGE == 51, "found %lld\n", - (long long)MDS_WRITEPAGE); - LASSERTF(MDS_IS_SUBDIR == 52, "found %lld\n", - (long long)MDS_IS_SUBDIR); - LASSERTF(MDS_GET_INFO == 53, "found %lld\n", - (long long)MDS_GET_INFO); - LASSERTF(MDS_HSM_STATE_GET == 54, "found %lld\n", - (long long)MDS_HSM_STATE_GET); - LASSERTF(MDS_HSM_STATE_SET == 55, "found %lld\n", - (long long)MDS_HSM_STATE_SET); - LASSERTF(MDS_HSM_ACTION == 56, "found %lld\n", - (long long)MDS_HSM_ACTION); - LASSERTF(MDS_HSM_PROGRESS == 57, "found %lld\n", - (long long)MDS_HSM_PROGRESS); - LASSERTF(MDS_HSM_REQUEST == 58, "found %lld\n", - (long long)MDS_HSM_REQUEST); - LASSERTF(MDS_HSM_CT_REGISTER == 59, "found %lld\n", - (long long)MDS_HSM_CT_REGISTER); - LASSERTF(MDS_HSM_CT_UNREGISTER == 60, "found %lld\n", - (long long)MDS_HSM_CT_UNREGISTER); - LASSERTF(MDS_SWAP_LAYOUTS == 61, "found %lld\n", - (long long)MDS_SWAP_LAYOUTS); - LASSERTF(MDS_LAST_OPC == 62, "found %lld\n", - (long long)MDS_LAST_OPC); - LASSERTF(REINT_SETATTR == 1, "found %lld\n", - (long long)REINT_SETATTR); - LASSERTF(REINT_CREATE == 2, "found %lld\n", - (long long)REINT_CREATE); - LASSERTF(REINT_LINK == 3, "found %lld\n", - (long long)REINT_LINK); - LASSERTF(REINT_UNLINK == 4, "found %lld\n", - (long long)REINT_UNLINK); - LASSERTF(REINT_RENAME == 5, "found %lld\n", - (long long)REINT_RENAME); - LASSERTF(REINT_OPEN == 6, "found %lld\n", - (long long)REINT_OPEN); - LASSERTF(REINT_SETXATTR == 7, "found %lld\n", - (long long)REINT_SETXATTR); - LASSERTF(REINT_RMENTRY == 8, "found %lld\n", - (long long)REINT_RMENTRY); - LASSERTF(REINT_MIGRATE == 9, "found %lld\n", - (long long)REINT_MIGRATE); - LASSERTF(REINT_MAX == 10, "found %lld\n", - (long long)REINT_MAX); - LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)DISP_IT_EXECD); - LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)DISP_LOOKUP_EXECD); - LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)DISP_LOOKUP_NEG); - LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned int)DISP_LOOKUP_POS); - LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned int)DISP_OPEN_CREATE); - LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned int)DISP_OPEN_OPEN); - LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n", - (unsigned int)DISP_ENQ_COMPLETE); - LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n", - (unsigned int)DISP_ENQ_OPEN_REF); - LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n", - (unsigned int)DISP_ENQ_CREATE_REF); - LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n", - (unsigned int)DISP_OPEN_LOCK); - LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n", - (long long)MDS_STATUS_CONN); - LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n", - (long long)MDS_STATUS_LOV); - LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_MODE); - LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_UID); - LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_GID); - LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_SIZE); - LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_ATIME); - LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_MTIME); - LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_CTIME); - LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_ATIME_SET); - LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_MTIME_SET); - LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_FORCE); - LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_ATTR_FLAG); - LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_KILL_SUID); - LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_KILL_SGID); - LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_CTIME_SET); - LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_FROM_OPEN); - LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n", - (long long)MDS_ATTR_BLOCKS); - LASSERTF(FLD_QUERY == 900, "found %lld\n", - (long long)FLD_QUERY); - LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n", - (long long)FLD_FIRST_OPC); - LASSERTF(FLD_READ == 901, "found %lld\n", - (long long)FLD_READ); - LASSERTF(FLD_LAST_OPC == 902, "found %lld\n", - (long long)FLD_LAST_OPC); - LASSERTF(SEQ_QUERY == 700, "found %lld\n", - (long long)SEQ_QUERY); - LASSERTF(SEQ_FIRST_OPC == 700, "found %lld\n", - (long long)SEQ_FIRST_OPC); - LASSERTF(SEQ_LAST_OPC == 701, "found %lld\n", - (long long)SEQ_LAST_OPC); - LASSERTF(SEQ_ALLOC_SUPER == 0, "found %lld\n", - (long long)SEQ_ALLOC_SUPER); - LASSERTF(SEQ_ALLOC_META == 1, "found %lld\n", - (long long)SEQ_ALLOC_META); - LASSERTF(LDLM_ENQUEUE == 101, "found %lld\n", - (long long)LDLM_ENQUEUE); - LASSERTF(LDLM_CONVERT == 102, "found %lld\n", - (long long)LDLM_CONVERT); - LASSERTF(LDLM_CANCEL == 103, "found %lld\n", - (long long)LDLM_CANCEL); - LASSERTF(LDLM_BL_CALLBACK == 104, "found %lld\n", - (long long)LDLM_BL_CALLBACK); - LASSERTF(LDLM_CP_CALLBACK == 105, "found %lld\n", - (long long)LDLM_CP_CALLBACK); - LASSERTF(LDLM_GL_CALLBACK == 106, "found %lld\n", - (long long)LDLM_GL_CALLBACK); - LASSERTF(LDLM_SET_INFO == 107, "found %lld\n", - (long long)LDLM_SET_INFO); - LASSERTF(LDLM_LAST_OPC == 108, "found %lld\n", - (long long)LDLM_LAST_OPC); - LASSERTF(LCK_MINMODE == 0, "found %lld\n", - (long long)LCK_MINMODE); - LASSERTF(LCK_EX == 1, "found %lld\n", - (long long)LCK_EX); - LASSERTF(LCK_PW == 2, "found %lld\n", - (long long)LCK_PW); - LASSERTF(LCK_PR == 4, "found %lld\n", - (long long)LCK_PR); - LASSERTF(LCK_CW == 8, "found %lld\n", - (long long)LCK_CW); - LASSERTF(LCK_CR == 16, "found %lld\n", - (long long)LCK_CR); - LASSERTF(LCK_NL == 32, "found %lld\n", - (long long)LCK_NL); - LASSERTF(LCK_GROUP == 64, "found %lld\n", - (long long)LCK_GROUP); - LASSERTF(LCK_COS == 128, "found %lld\n", - (long long)LCK_COS); - LASSERTF(LCK_MAXMODE == 129, "found %lld\n", - (long long)LCK_MAXMODE); - LASSERTF(LCK_MODE_NUM == 8, "found %lld\n", - (long long)LCK_MODE_NUM); - BUILD_BUG_ON(LDLM_PLAIN != 10); - BUILD_BUG_ON(LDLM_EXTENT != 11); - BUILD_BUG_ON(LDLM_FLOCK != 12); - BUILD_BUG_ON(LDLM_IBITS != 13); - BUILD_BUG_ON(LDLM_MAX_TYPE != 14); - BUILD_BUG_ON(LUSTRE_RES_ID_SEQ_OFF != 0); - BUILD_BUG_ON(LUSTRE_RES_ID_VER_OID_OFF != 1); - BUILD_BUG_ON(LUSTRE_RES_ID_QUOTA_SEQ_OFF != 2); - BUILD_BUG_ON(LUSTRE_RES_ID_QUOTA_VER_OID_OFF != 3); - BUILD_BUG_ON(LUSTRE_RES_ID_HSH_OFF != 3); - LASSERTF(OBD_PING == 400, "found %lld\n", - (long long)OBD_PING); - LASSERTF(OBD_LOG_CANCEL == 401, "found %lld\n", - (long long)OBD_LOG_CANCEL); - LASSERTF(OBD_QC_CALLBACK == 402, "found %lld\n", - (long long)OBD_QC_CALLBACK); - LASSERTF(OBD_IDX_READ == 403, "found %lld\n", - (long long)OBD_IDX_READ); - LASSERTF(OBD_LAST_OPC == 404, "found %lld\n", - (long long)OBD_LAST_OPC); - LASSERTF(QUOTA_DQACQ == 601, "found %lld\n", - (long long)QUOTA_DQACQ); - LASSERTF(QUOTA_DQREL == 602, "found %lld\n", - (long long)QUOTA_DQREL); - LASSERTF(QUOTA_LAST_OPC == 603, "found %lld\n", - (long long)QUOTA_LAST_OPC); - LASSERTF(MGS_CONNECT == 250, "found %lld\n", - (long long)MGS_CONNECT); - LASSERTF(MGS_DISCONNECT == 251, "found %lld\n", - (long long)MGS_DISCONNECT); - LASSERTF(MGS_EXCEPTION == 252, "found %lld\n", - (long long)MGS_EXCEPTION); - LASSERTF(MGS_TARGET_REG == 253, "found %lld\n", - (long long)MGS_TARGET_REG); - LASSERTF(MGS_TARGET_DEL == 254, "found %lld\n", - (long long)MGS_TARGET_DEL); - LASSERTF(MGS_SET_INFO == 255, "found %lld\n", - (long long)MGS_SET_INFO); - LASSERTF(MGS_LAST_OPC == 257, "found %lld\n", - (long long)MGS_LAST_OPC); - LASSERTF(SEC_CTX_INIT == 801, "found %lld\n", - (long long)SEC_CTX_INIT); - LASSERTF(SEC_CTX_INIT_CONT == 802, "found %lld\n", - (long long)SEC_CTX_INIT_CONT); - LASSERTF(SEC_CTX_FINI == 803, "found %lld\n", - (long long)SEC_CTX_FINI); - LASSERTF(SEC_LAST_OPC == 804, "found %lld\n", - (long long)SEC_LAST_OPC); - /* Sizes and Offsets */ - - /* Checks for struct obd_uuid */ - LASSERTF((int)sizeof(struct obd_uuid) == 40, "found %lld\n", - (long long)(int)sizeof(struct obd_uuid)); - - /* Checks for struct lu_seq_range */ - LASSERTF((int)sizeof(struct lu_seq_range) == 24, "found %lld\n", - (long long)(int)sizeof(struct lu_seq_range)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_start) == 0, "found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_start)); - LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_start)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_end) == 8, "found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_end)); - LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_end) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_end)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_index) == 16, "found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_index)); - LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_index)); - LASSERTF((int)offsetof(struct lu_seq_range, lsr_flags) == 20, "found %lld\n", - (long long)(int)offsetof(struct lu_seq_range, lsr_flags)); - LASSERTF((int)sizeof(((struct lu_seq_range *)0)->lsr_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lu_seq_range *)0)->lsr_flags)); - LASSERTF(LU_SEQ_RANGE_MDT == 0, "found %lld\n", - (long long)LU_SEQ_RANGE_MDT); - LASSERTF(LU_SEQ_RANGE_OST == 1, "found %lld\n", - (long long)LU_SEQ_RANGE_OST); - - /* Checks for struct lustre_mdt_attrs */ - LASSERTF((int)sizeof(struct lustre_mdt_attrs) == 24, "found %lld\n", - (long long)(int)sizeof(struct lustre_mdt_attrs)); - LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_compat) == 0, "found %lld\n", - (long long)(int)offsetof(struct lustre_mdt_attrs, lma_compat)); - LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_compat)); - LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_incompat) == 4, "found %lld\n", - (long long)(int)offsetof(struct lustre_mdt_attrs, lma_incompat)); - LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_incompat)); - LASSERTF((int)offsetof(struct lustre_mdt_attrs, lma_self_fid) == 8, "found %lld\n", - (long long)(int)offsetof(struct lustre_mdt_attrs, lma_self_fid)); - LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid)); - LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)LMAI_RELEASED); - LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)LMAC_HSM); - LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)LMAC_NOT_IN_OI); - LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n", - (unsigned int)LMAC_FID_ON_OST); - - /* Checks for struct ost_id */ - LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n", - (long long)(int)sizeof(struct ost_id)); - LASSERTF((int)offsetof(struct ost_id, oi) == 0, "found %lld\n", - (long long)(int)offsetof(struct ost_id, oi)); - LASSERTF((int)sizeof(((struct ost_id *)0)->oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct ost_id *)0)->oi)); - LASSERTF(LUSTRE_FID_INIT_OID == 1, "found %lld\n", - (long long)LUSTRE_FID_INIT_OID); - LASSERTF(FID_SEQ_OST_MDT0 == 0, "found %lld\n", - (long long)FID_SEQ_OST_MDT0); - LASSERTF(FID_SEQ_LLOG == 1, "found %lld\n", - (long long)FID_SEQ_LLOG); - LASSERTF(FID_SEQ_ECHO == 2, "found %lld\n", - (long long)FID_SEQ_ECHO); - LASSERTF(FID_SEQ_OST_MDT1 == 3, "found %lld\n", - (long long)FID_SEQ_OST_MDT1); - LASSERTF(FID_SEQ_OST_MAX == 9, "found %lld\n", - (long long)FID_SEQ_OST_MAX); - LASSERTF(FID_SEQ_RSVD == 11, "found %lld\n", - (long long)FID_SEQ_RSVD); - LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n", - (long long)FID_SEQ_IGIF); - LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_IGIF_MAX); - LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_IDIF); - LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_IDIF_MAX); - LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_START); - LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_LOCAL_FILE); - LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_DOT_LUSTRE); - LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_SPECIAL); - LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_QUOTA); - LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_QUOTA_GLB); - LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_ROOT); - LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_NORMAL); - LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n", - (long long)FID_SEQ_LOV_DEFAULT); - LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)FID_OID_SPECIAL_BFL); - LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)FID_OID_DOT_LUSTRE); - LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)FID_OID_DOT_LUSTRE_OBF); - - /* Checks for struct lu_dirent */ - LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n", - (long long)(int)sizeof(struct lu_dirent)); - LASSERTF((int)offsetof(struct lu_dirent, lde_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_fid)); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_fid)); - LASSERTF((int)offsetof(struct lu_dirent, lde_hash) == 16, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_hash)); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_hash) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_hash)); - LASSERTF((int)offsetof(struct lu_dirent, lde_reclen) == 24, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_reclen)); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_reclen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_reclen)); - LASSERTF((int)offsetof(struct lu_dirent, lde_namelen) == 26, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_namelen)); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_namelen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_namelen)); - LASSERTF((int)offsetof(struct lu_dirent, lde_attrs) == 28, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_attrs)); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_attrs) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_attrs)); - LASSERTF((int)offsetof(struct lu_dirent, lde_name[0]) == 32, "found %lld\n", - (long long)(int)offsetof(struct lu_dirent, lde_name[0])); - LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0])); - LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)LUDA_FID); - LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)LUDA_TYPE); - LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)LUDA_64BITHASH); - - /* Checks for struct luda_type */ - LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n", - (long long)(int)sizeof(struct luda_type)); - LASSERTF((int)offsetof(struct luda_type, lt_type) == 0, "found %lld\n", - (long long)(int)offsetof(struct luda_type, lt_type)); - LASSERTF((int)sizeof(((struct luda_type *)0)->lt_type) == 2, "found %lld\n", - (long long)(int)sizeof(((struct luda_type *)0)->lt_type)); - - /* Checks for struct lu_dirpage */ - LASSERTF((int)sizeof(struct lu_dirpage) == 24, "found %lld\n", - (long long)(int)sizeof(struct lu_dirpage)); - LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_start) == 0, "found %lld\n", - (long long)(int)offsetof(struct lu_dirpage, ldp_hash_start)); - LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_start)); - LASSERTF((int)offsetof(struct lu_dirpage, ldp_hash_end) == 8, "found %lld\n", - (long long)(int)offsetof(struct lu_dirpage, ldp_hash_end)); - LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_hash_end)); - LASSERTF((int)offsetof(struct lu_dirpage, ldp_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct lu_dirpage, ldp_flags)); - LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_flags)); - LASSERTF((int)offsetof(struct lu_dirpage, ldp_pad0) == 20, "found %lld\n", - (long long)(int)offsetof(struct lu_dirpage, ldp_pad0)); - LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_pad0) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_pad0)); - LASSERTF((int)offsetof(struct lu_dirpage, ldp_entries[0]) == 24, "found %lld\n", - (long long)(int)offsetof(struct lu_dirpage, ldp_entries[0])); - LASSERTF((int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0]) == 32, "found %lld\n", - (long long)(int)sizeof(((struct lu_dirpage *)0)->ldp_entries[0])); - LASSERTF(LDF_EMPTY == 1, "found %lld\n", - (long long)LDF_EMPTY); - LASSERTF(LDF_COLLIDE == 2, "found %lld\n", - (long long)LDF_COLLIDE); - LASSERTF(LU_PAGE_SIZE == 4096, "found %lld\n", - (long long)LU_PAGE_SIZE); - - /* Checks for struct lustre_handle */ - LASSERTF((int)sizeof(struct lustre_handle) == 8, "found %lld\n", - (long long)(int)sizeof(struct lustre_handle)); - LASSERTF((int)offsetof(struct lustre_handle, cookie) == 0, "found %lld\n", - (long long)(int)offsetof(struct lustre_handle, cookie)); - LASSERTF((int)sizeof(((struct lustre_handle *)0)->cookie) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lustre_handle *)0)->cookie)); - - /* Checks for struct lustre_msg_v2 */ - LASSERTF((int)sizeof(struct lustre_msg_v2) == 32, "found %lld\n", - (long long)(int)sizeof(struct lustre_msg_v2)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_bufcount) == 0, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_bufcount)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_bufcount)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_secflvr) == 4, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_secflvr)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_secflvr)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_magic) == 8, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_magic)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_magic)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_repsize) == 12, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_repsize)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_repsize)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_cksum) == 16, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_cksum)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_cksum)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_flags) == 20, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_flags)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_flags)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_2) == 24, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_2)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_2)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_padding_3) == 28, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_padding_3)); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_padding_3)); - LASSERTF((int)offsetof(struct lustre_msg_v2, lm_buflens[0]) == 32, "found %lld\n", - (long long)(int)offsetof(struct lustre_msg_v2, lm_buflens[0])); - LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0])); - LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n", - LUSTRE_MSG_MAGIC_V2); - LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n", - LUSTRE_MSG_MAGIC_V2_SWABBED); - - /* Checks for struct ptlrpc_body */ - LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n", - (long long)(int)sizeof(struct ptlrpc_body_v3)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == 0, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_handle)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == 8, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_type)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == 12, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_version)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == 16, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_opc)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == 20, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_status)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == 24, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_xid)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == 32, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_tag)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == 2, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == 34, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding0)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == 2, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == 36, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding1)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == 40, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_last_committed)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == 48, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_transno)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == 56, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_flags)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == 60, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_op_flags)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == 64, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == 68, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_timeout)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == 72, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_service_time)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == 76, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_limit)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == 80, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_slv)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv)); - BUILD_BUG_ON(PTLRPC_NUM_VERSIONS != 4); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == 88, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_pre_versions)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == 120, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_mbits)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == 128, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_0)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == 136, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_1)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == 144, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding64_2)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2)); - BUILD_BUG_ON(LUSTRE_JOBID_SIZE != 32); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n", - (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_handle) == (int)offsetof(struct ptlrpc_body_v2, pb_handle), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_handle), (int)offsetof(struct ptlrpc_body_v2, pb_handle)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_handle), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_handle)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_type) == (int)offsetof(struct ptlrpc_body_v2, pb_type), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_type), (int)offsetof(struct ptlrpc_body_v2, pb_type)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_type), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_type)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_version) == (int)offsetof(struct ptlrpc_body_v2, pb_version), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_version), (int)offsetof(struct ptlrpc_body_v2, pb_version)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_version), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_version)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_opc) == (int)offsetof(struct ptlrpc_body_v2, pb_opc), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_opc), (int)offsetof(struct ptlrpc_body_v2, pb_opc)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_opc), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_opc)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_status) == (int)offsetof(struct ptlrpc_body_v2, pb_status), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_status), (int)offsetof(struct ptlrpc_body_v2, pb_status)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_status), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_status)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_xid) == (int)offsetof(struct ptlrpc_body_v2, pb_last_xid), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_last_xid), (int)offsetof(struct ptlrpc_body_v2, pb_last_xid)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_xid), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_xid)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_tag) == (int)offsetof(struct ptlrpc_body_v2, pb_tag), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_tag), (int)offsetof(struct ptlrpc_body_v2, pb_tag)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_tag), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_tag)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding0), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding0), (int)offsetof(struct ptlrpc_body_v2, pb_padding0)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding0)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding1), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding1), (int)offsetof(struct ptlrpc_body_v2, pb_padding1)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding1)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_last_committed) == (int)offsetof(struct ptlrpc_body_v2, pb_last_committed), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_last_committed), (int)offsetof(struct ptlrpc_body_v2, pb_last_committed)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_last_committed), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_last_committed)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_transno) == (int)offsetof(struct ptlrpc_body_v2, pb_transno), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_transno), (int)offsetof(struct ptlrpc_body_v2, pb_transno)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_transno), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_transno)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_flags), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_flags), (int)offsetof(struct ptlrpc_body_v2, pb_flags)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_flags)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_op_flags) == (int)offsetof(struct ptlrpc_body_v2, pb_op_flags), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_op_flags), (int)offsetof(struct ptlrpc_body_v2, pb_op_flags)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_op_flags), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_op_flags)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt) == (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_conn_cnt), (int)offsetof(struct ptlrpc_body_v2, pb_conn_cnt)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_conn_cnt), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_conn_cnt)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_timeout) == (int)offsetof(struct ptlrpc_body_v2, pb_timeout), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_timeout), (int)offsetof(struct ptlrpc_body_v2, pb_timeout)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_timeout), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_timeout)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_service_time) == (int)offsetof(struct ptlrpc_body_v2, pb_service_time), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_service_time), (int)offsetof(struct ptlrpc_body_v2, pb_service_time)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_service_time), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_service_time)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_limit) == (int)offsetof(struct ptlrpc_body_v2, pb_limit), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_limit), (int)offsetof(struct ptlrpc_body_v2, pb_limit)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_limit), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_limit)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_slv) == (int)offsetof(struct ptlrpc_body_v2, pb_slv), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_slv), (int)offsetof(struct ptlrpc_body_v2, pb_slv)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_slv), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_slv)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_pre_versions) == (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_pre_versions), (int)offsetof(struct ptlrpc_body_v2, pb_pre_versions)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_pre_versions), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_pre_versions)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_mbits) == (int)offsetof(struct ptlrpc_body_v2, pb_mbits), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_mbits), (int)offsetof(struct ptlrpc_body_v2, pb_mbits)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_mbits), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_mbits)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_0) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding64_0), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_0)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_0), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_0)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_1) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding64_1), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_1)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_1), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_1)); - LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_padding64_2) == (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2), "%d != %d\n", - (int)offsetof(struct ptlrpc_body_v3, pb_padding64_2), (int)offsetof(struct ptlrpc_body_v2, pb_padding64_2)); - LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2) == (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2), "%d != %d\n", - (int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding64_2), (int)sizeof(((struct ptlrpc_body_v2 *)0)->pb_padding64_2)); - LASSERTF(MSG_PTLRPC_BODY_OFF == 0, "found %lld\n", - (long long)MSG_PTLRPC_BODY_OFF); - LASSERTF(REQ_REC_OFF == 1, "found %lld\n", - (long long)REQ_REC_OFF); - LASSERTF(REPLY_REC_OFF == 1, "found %lld\n", - (long long)REPLY_REC_OFF); - LASSERTF(DLM_LOCKREQ_OFF == 1, "found %lld\n", - (long long)DLM_LOCKREQ_OFF); - LASSERTF(DLM_REQ_REC_OFF == 2, "found %lld\n", - (long long)DLM_REQ_REC_OFF); - LASSERTF(DLM_INTENT_IT_OFF == 2, "found %lld\n", - (long long)DLM_INTENT_IT_OFF); - LASSERTF(DLM_INTENT_REC_OFF == 3, "found %lld\n", - (long long)DLM_INTENT_REC_OFF); - LASSERTF(DLM_LOCKREPLY_OFF == 1, "found %lld\n", - (long long)DLM_LOCKREPLY_OFF); - LASSERTF(DLM_REPLY_REC_OFF == 2, "found %lld\n", - (long long)DLM_REPLY_REC_OFF); - LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n", - (long long)MSG_PTLRPC_HEADER_OFF); - LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n", - PTLRPC_MSG_VERSION); - LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n", - LUSTRE_VERSION_MASK); - LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n", - LUSTRE_OBD_VERSION); - LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n", - LUSTRE_MDS_VERSION); - LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n", - LUSTRE_OST_VERSION); - LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n", - LUSTRE_DLM_VERSION); - LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n", - LUSTRE_LOG_VERSION); - LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n", - LUSTRE_MGS_VERSION); - LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n", - (long long)MSGHDR_AT_SUPPORT); - LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n", - (long long)MSGHDR_CKSUM_INCOMPAT18); - LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n", - (unsigned int)MSG_OP_FLAG_MASK); - LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n", - (long long)MSG_OP_FLAG_SHIFT); - LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n", - (unsigned int)MSG_GEN_FLAG_MASK); - LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)MSG_LAST_REPLAY); - LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)MSG_RESENT); - LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)MSG_REPLAY); - LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned int)MSG_DELAY_REPLAY); - LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned int)MSG_VERSION_REPLAY); - LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n", - (unsigned int)MSG_REQ_REPLAY_DONE); - LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n", - (unsigned int)MSG_LOCK_REPLAY_DONE); - LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_RECOVERING); - LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_RECONNECT); - LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_REPLAYABLE); - LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_LIBCLIENT); - LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_INITIAL); - LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_ASYNC); - LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_NEXT_VER); - LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n", - (unsigned int)MSG_CONNECT_TRANSNO); - - /* Checks for struct obd_connect_data */ - LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n", - (long long)(int)sizeof(struct obd_connect_data)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_version)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_grant)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_index)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_brw_size) == 20, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_brw_size)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_brw_size)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_blocksize) == 32, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_blocksize)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize) == 1, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_blocksize)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_inodespace) == 33, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_inodespace)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace) == 1, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_inodespace)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant_extent) == 34, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_grant_extent)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent) == 2, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant_extent)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_unused) == 36, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_unused)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_unused) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_unused)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_transno) == 40, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_transno)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_transno) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_transno)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_group) == 48, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_group)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_group) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_group)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_cksum_types) == 52, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_cksum_types)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_cksum_types)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_max_easize) == 56, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_instance)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxmodrpcs) == 72, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_maxmodrpcs)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs) == 2, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_maxmodrpcs)); - LASSERTF((int)offsetof(struct obd_connect_data, padding0) == 74, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding0)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding0) == 2, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding0)); - LASSERTF((int)offsetof(struct obd_connect_data, padding1) == 76, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding1)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding1)); - LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags2) == 80, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags2)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags2)); - LASSERTF((int)offsetof(struct obd_connect_data, padding3) == 88, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding3)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding3)); - LASSERTF((int)offsetof(struct obd_connect_data, padding4) == 96, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding4)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding4) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding4)); - LASSERTF((int)offsetof(struct obd_connect_data, padding5) == 104, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding5)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding5) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding5)); - LASSERTF((int)offsetof(struct obd_connect_data, padding6) == 112, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding6)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding6) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding6)); - LASSERTF((int)offsetof(struct obd_connect_data, padding7) == 120, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding7)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding7) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding7)); - LASSERTF((int)offsetof(struct obd_connect_data, padding8) == 128, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding8)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding8) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding8)); - LASSERTF((int)offsetof(struct obd_connect_data, padding9) == 136, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, padding9)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding9) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->padding9)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingA) == 144, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingA)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingA) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingA)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingB) == 152, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingB)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingB) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingB)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingC) == 160, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingC)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingC) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingC)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingD) == 168, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingD)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingD) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingD)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingE) == 176, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingE)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingE) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingE)); - LASSERTF((int)offsetof(struct obd_connect_data, paddingF) == 184, "found %lld\n", - (long long)(int)offsetof(struct obd_connect_data, paddingF)); - LASSERTF((int)sizeof(((struct obd_connect_data *)0)->paddingF) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_connect_data *)0)->paddingF)); - LASSERTF(OBD_CONNECT_RDONLY == 0x1ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_RDONLY); - LASSERTF(OBD_CONNECT_INDEX == 0x2ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_INDEX); - LASSERTF(OBD_CONNECT_MDS == 0x4ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MDS); - LASSERTF(OBD_CONNECT_GRANT == 0x8ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_GRANT); - LASSERTF(OBD_CONNECT_SRVLOCK == 0x10ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_SRVLOCK); - LASSERTF(OBD_CONNECT_VERSION == 0x20ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_VERSION); - LASSERTF(OBD_CONNECT_REQPORTAL == 0x40ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_REQPORTAL); - LASSERTF(OBD_CONNECT_ACL == 0x80ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_ACL); - LASSERTF(OBD_CONNECT_XATTR == 0x100ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_XATTR); - LASSERTF(OBD_CONNECT_LARGE_ACL == 0x200ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LARGE_ACL); - LASSERTF(OBD_CONNECT_TRUNCLOCK == 0x400ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_TRUNCLOCK); - LASSERTF(OBD_CONNECT_TRANSNO == 0x800ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_TRANSNO); - LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_IBITS); - LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_JOIN); - LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_ATTRFID); - LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_NODEVOH); - LASSERTF(OBD_CONNECT_RMT_CLIENT == 0x10000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_RMT_CLIENT); - LASSERTF(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_RMT_CLIENT_FORCE); - LASSERTF(OBD_CONNECT_BRW_SIZE == 0x40000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_BRW_SIZE); - LASSERTF(OBD_CONNECT_QUOTA64 == 0x80000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_QUOTA64); - LASSERTF(OBD_CONNECT_MDS_CAPA == 0x100000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MDS_CAPA); - LASSERTF(OBD_CONNECT_OSS_CAPA == 0x200000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_OSS_CAPA); - LASSERTF(OBD_CONNECT_CANCELSET == 0x400000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_CANCELSET); - LASSERTF(OBD_CONNECT_SOM == 0x800000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_SOM); - LASSERTF(OBD_CONNECT_AT == 0x1000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_AT); - LASSERTF(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LRU_RESIZE); - LASSERTF(OBD_CONNECT_MDS_MDS == 0x4000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MDS_MDS); - LASSERTF(OBD_CONNECT_REAL == 0x8000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_REAL); - LASSERTF(OBD_CONNECT_CHANGE_QS == 0x10000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_CHANGE_QS); - LASSERTF(OBD_CONNECT_CKSUM == 0x20000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_CKSUM); - LASSERTF(OBD_CONNECT_FID == 0x40000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_FID); - LASSERTF(OBD_CONNECT_VBR == 0x80000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_VBR); - LASSERTF(OBD_CONNECT_LOV_V3 == 0x100000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LOV_V3); - LASSERTF(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_GRANT_SHRINK); - LASSERTF(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_SKIP_ORPHAN); - LASSERTF(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MAX_EASIZE); - LASSERTF(OBD_CONNECT_FULL20 == 0x1000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_FULL20); - LASSERTF(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LAYOUTLOCK); - LASSERTF(OBD_CONNECT_64BITHASH == 0x4000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_64BITHASH); - LASSERTF(OBD_CONNECT_MAXBYTES == 0x8000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MAXBYTES); - LASSERTF(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_IMP_RECOV); - LASSERTF(OBD_CONNECT_JOBSTATS == 0x20000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_JOBSTATS); - LASSERTF(OBD_CONNECT_UMASK == 0x40000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_UMASK); - LASSERTF(OBD_CONNECT_EINPROGRESS == 0x80000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_EINPROGRESS); - LASSERTF(OBD_CONNECT_GRANT_PARAM == 0x100000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_GRANT_PARAM); - LASSERTF(OBD_CONNECT_FLOCK_OWNER == 0x200000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_FLOCK_OWNER); - LASSERTF(OBD_CONNECT_LVB_TYPE == 0x400000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LVB_TYPE); - LASSERTF(OBD_CONNECT_NANOSEC_TIME == 0x800000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_NANOSEC_TIME); - LASSERTF(OBD_CONNECT_LIGHTWEIGHT == 0x1000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LIGHTWEIGHT); - LASSERTF(OBD_CONNECT_SHORTIO == 0x2000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_SHORTIO); - LASSERTF(OBD_CONNECT_PINGLESS == 0x4000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_PINGLESS); - LASSERTF(OBD_CONNECT_FLOCK_DEAD == 0x8000000000000ULL, - "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD); - LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL, - "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID); - LASSERTF(OBD_CONNECT_LFSCK == 0x40000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LFSCK); - LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_UNLINK_CLOSE); - LASSERTF(OBD_CONNECT_MULTIMODRPCS == 0x200000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_MULTIMODRPCS); - LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_DIR_STRIPE); - LASSERTF(OBD_CONNECT_SUBTREE == 0x800000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_SUBTREE); - LASSERTF(OBD_CONNECT_LOCK_AHEAD == 0x1000000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_LOCK_AHEAD); - LASSERTF(OBD_CONNECT_OBDOPACK == 0x4000000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_OBDOPACK); - LASSERTF(OBD_CONNECT_FLAGS2 == 0x8000000000000000ULL, "found 0x%.16llxULL\n", - OBD_CONNECT_FLAGS2); - LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)OBD_CKSUM_CRC32); - LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)OBD_CKSUM_ADLER); - LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n", - (unsigned int)OBD_CKSUM_CRC32C); - - /* Checks for struct obdo */ - LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n", - (long long)(int)sizeof(struct obdo)); - LASSERTF((int)offsetof(struct obdo, o_valid) == 0, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_valid)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_valid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_valid)); - LASSERTF((int)offsetof(struct obdo, o_oi) == 8, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_oi)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_oi)); - LASSERTF((int)offsetof(struct obdo, o_parent_seq) == 24, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_parent_seq)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_seq) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_parent_seq)); - LASSERTF((int)offsetof(struct obdo, o_size) == 32, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_size)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_size)); - LASSERTF((int)offsetof(struct obdo, o_mtime) == 40, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_mtime)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_mtime)); - LASSERTF((int)offsetof(struct obdo, o_atime) == 48, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_atime)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_atime)); - LASSERTF((int)offsetof(struct obdo, o_ctime) == 56, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_ctime)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_ctime)); - LASSERTF((int)offsetof(struct obdo, o_blocks) == 64, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_blocks)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_blocks)); - LASSERTF((int)offsetof(struct obdo, o_grant) == 72, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_grant)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_grant) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_grant)); - LASSERTF((int)offsetof(struct obdo, o_blksize) == 80, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_blksize)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_blksize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_blksize)); - LASSERTF((int)offsetof(struct obdo, o_mode) == 84, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_mode)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_mode)); - LASSERTF((int)offsetof(struct obdo, o_uid) == 88, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_uid)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_uid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_uid)); - LASSERTF((int)offsetof(struct obdo, o_gid) == 92, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_gid)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_gid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_gid)); - LASSERTF((int)offsetof(struct obdo, o_flags) == 96, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_flags)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_flags)); - LASSERTF((int)offsetof(struct obdo, o_nlink) == 100, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_nlink)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_nlink) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_nlink)); - LASSERTF((int)offsetof(struct obdo, o_parent_oid) == 104, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_parent_oid)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_oid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_parent_oid)); - LASSERTF((int)offsetof(struct obdo, o_misc) == 108, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_misc)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_misc) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_misc)); - LASSERTF((int)offsetof(struct obdo, o_ioepoch) == 112, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_ioepoch)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_ioepoch) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_ioepoch)); - LASSERTF((int)offsetof(struct obdo, o_stripe_idx) == 120, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_stripe_idx)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_stripe_idx) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_stripe_idx)); - LASSERTF((int)offsetof(struct obdo, o_parent_ver) == 124, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_parent_ver)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_parent_ver) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_parent_ver)); - LASSERTF((int)offsetof(struct obdo, o_handle) == 128, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_handle)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_handle)); - LASSERTF((int)offsetof(struct obdo, o_lcookie) == 136, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_lcookie)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_lcookie) == 32, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_lcookie)); - LASSERTF((int)offsetof(struct obdo, o_uid_h) == 168, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_uid_h)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_uid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_uid_h)); - LASSERTF((int)offsetof(struct obdo, o_gid_h) == 172, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_gid_h)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_gid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_gid_h)); - LASSERTF((int)offsetof(struct obdo, o_data_version) == 176, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_data_version)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_data_version) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_data_version)); - LASSERTF((int)offsetof(struct obdo, o_padding_4) == 184, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_padding_4)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_4) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_padding_4)); - LASSERTF((int)offsetof(struct obdo, o_padding_5) == 192, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_padding_5)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_5) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_padding_5)); - LASSERTF((int)offsetof(struct obdo, o_padding_6) == 200, "found %lld\n", - (long long)(int)offsetof(struct obdo, o_padding_6)); - LASSERTF((int)sizeof(((struct obdo *)0)->o_padding_6) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obdo *)0)->o_padding_6)); - LASSERTF(OBD_MD_FLID == (0x00000001ULL), "found 0x%.16llxULL\n", - OBD_MD_FLID); - LASSERTF(OBD_MD_FLATIME == (0x00000002ULL), "found 0x%.16llxULL\n", - OBD_MD_FLATIME); - LASSERTF(OBD_MD_FLMTIME == (0x00000004ULL), "found 0x%.16llxULL\n", - OBD_MD_FLMTIME); - LASSERTF(OBD_MD_FLCTIME == (0x00000008ULL), "found 0x%.16llxULL\n", - OBD_MD_FLCTIME); - LASSERTF(OBD_MD_FLSIZE == (0x00000010ULL), "found 0x%.16llxULL\n", - OBD_MD_FLSIZE); - LASSERTF(OBD_MD_FLBLOCKS == (0x00000020ULL), "found 0x%.16llxULL\n", - OBD_MD_FLBLOCKS); - LASSERTF(OBD_MD_FLBLKSZ == (0x00000040ULL), "found 0x%.16llxULL\n", - OBD_MD_FLBLKSZ); - LASSERTF(OBD_MD_FLMODE == (0x00000080ULL), "found 0x%.16llxULL\n", - OBD_MD_FLMODE); - LASSERTF(OBD_MD_FLTYPE == (0x00000100ULL), "found 0x%.16llxULL\n", - OBD_MD_FLTYPE); - LASSERTF(OBD_MD_FLUID == (0x00000200ULL), "found 0x%.16llxULL\n", - OBD_MD_FLUID); - LASSERTF(OBD_MD_FLGID == (0x00000400ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGID); - LASSERTF(OBD_MD_FLFLAGS == (0x00000800ULL), "found 0x%.16llxULL\n", - OBD_MD_FLFLAGS); - LASSERTF(OBD_MD_FLNLINK == (0x00002000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLNLINK); - LASSERTF(OBD_MD_FLGENER == (0x00004000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGENER); - LASSERTF(OBD_MD_FLRDEV == (0x00010000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLRDEV); - LASSERTF(OBD_MD_FLEASIZE == (0x00020000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLEASIZE); - LASSERTF(OBD_MD_LINKNAME == (0x00040000ULL), "found 0x%.16llxULL\n", - OBD_MD_LINKNAME); - LASSERTF(OBD_MD_FLHANDLE == (0x00080000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLHANDLE); - LASSERTF(OBD_MD_FLCKSUM == (0x00100000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLCKSUM); - LASSERTF(OBD_MD_FLQOS == (0x00200000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLQOS); - LASSERTF(OBD_MD_FLGROUP == (0x01000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGROUP); - LASSERTF(OBD_MD_FLFID == (0x02000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLFID); - LASSERTF(OBD_MD_FLEPOCH == (0x04000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLEPOCH); - LASSERTF(OBD_MD_FLGRANT == (0x08000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGRANT); - LASSERTF(OBD_MD_FLDIREA == (0x10000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLDIREA); - LASSERTF(OBD_MD_FLUSRQUOTA == (0x20000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLUSRQUOTA); - LASSERTF(OBD_MD_FLGRPQUOTA == (0x40000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGRPQUOTA); - LASSERTF(OBD_MD_FLMODEASIZE == (0x80000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLMODEASIZE); - LASSERTF(OBD_MD_MDS == (0x0000000100000000ULL), "found 0x%.16llxULL\n", - OBD_MD_MDS); - LASSERTF(OBD_MD_REINT == (0x0000000200000000ULL), "found 0x%.16llxULL\n", - OBD_MD_REINT); - LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n", - OBD_MD_MEA); - LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL), - "found 0x%.16llxULL\n", OBD_MD_TSTATE); - LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLXATTR); - LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLXATTRLS); - LASSERTF(OBD_MD_FLXATTRRM == (0x0000004000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLXATTRRM); - LASSERTF(OBD_MD_FLACL == (0x0000008000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLACL); - LASSERTF(OBD_MD_FLMDSCAPA == (0x0000020000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLMDSCAPA); - LASSERTF(OBD_MD_FLOSSCAPA == (0x0000040000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLOSSCAPA); - LASSERTF(OBD_MD_FLCKSPLIT == (0x0000080000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLCKSPLIT); - LASSERTF(OBD_MD_FLCROSSREF == (0x0000100000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLCROSSREF); - LASSERTF(OBD_MD_FLGETATTRLOCK == (0x0000200000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLGETATTRLOCK); - LASSERTF(OBD_MD_FLDATAVERSION == (0x0010000000000000ULL), "found 0x%.16llxULL\n", - OBD_MD_FLDATAVERSION); - BUILD_BUG_ON(OBD_FL_INLINEDATA != 0x00000001); - BUILD_BUG_ON(OBD_FL_OBDMDEXISTS != 0x00000002); - BUILD_BUG_ON(OBD_FL_DELORPHAN != 0x00000004); - BUILD_BUG_ON(OBD_FL_NORPC != 0x00000008); - BUILD_BUG_ON(OBD_FL_IDONLY != 0x00000010); - BUILD_BUG_ON(OBD_FL_RECREATE_OBJS != 0x00000020); - BUILD_BUG_ON(OBD_FL_DEBUG_CHECK != 0x00000040); - BUILD_BUG_ON(OBD_FL_NO_USRQUOTA != 0x00000100); - BUILD_BUG_ON(OBD_FL_NO_GRPQUOTA != 0x00000200); - BUILD_BUG_ON(OBD_FL_CREATE_CROW != 0x00000400); - BUILD_BUG_ON(OBD_FL_SRVLOCK != 0x00000800); - BUILD_BUG_ON(OBD_FL_CKSUM_CRC32 != 0x00001000); - BUILD_BUG_ON(OBD_FL_CKSUM_ADLER != 0x00002000); - BUILD_BUG_ON(OBD_FL_CKSUM_CRC32C != 0x00004000); - BUILD_BUG_ON(OBD_FL_CKSUM_RSVD2 != 0x00008000); - BUILD_BUG_ON(OBD_FL_CKSUM_RSVD3 != 0x00010000); - BUILD_BUG_ON(OBD_FL_SHRINK_GRANT != 0x00020000); - BUILD_BUG_ON(OBD_FL_MMAP != 0x00040000); - BUILD_BUG_ON(OBD_FL_RECOV_RESEND != 0x00080000); - BUILD_BUG_ON(OBD_FL_NOSPC_BLK != 0x00100000); - BUILD_BUG_ON(OBD_FL_LOCAL_MASK != 0xf0000000); - - /* Checks for struct lov_ost_data_v1 */ - LASSERTF((int)sizeof(struct lov_ost_data_v1) == 24, "found %lld\n", - (long long)(int)sizeof(struct lov_ost_data_v1)); - LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_oi) == 0, "found %lld\n", - (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_oi)); - LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_oi)); - LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_gen) == 16, "found %lld\n", - (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_gen)); - LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_gen)); - LASSERTF((int)offsetof(struct lov_ost_data_v1, l_ost_idx) == 20, "found %lld\n", - (long long)(int)offsetof(struct lov_ost_data_v1, l_ost_idx)); - LASSERTF((int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_ost_data_v1 *)0)->l_ost_idx)); - - /* Checks for struct lov_mds_md_v1 */ - LASSERTF((int)sizeof(struct lov_mds_md_v1) == 32, "found %lld\n", - (long long)(int)sizeof(struct lov_mds_md_v1)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_magic) == 0, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_magic)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_magic)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_pattern) == 4, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_pattern)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_pattern)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_oi) == 8, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_oi)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_oi)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_size) == 24, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_size)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_size)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_stripe_count) == 28, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_stripe_count)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_stripe_count)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_layout_gen) == 30, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_layout_gen)); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_layout_gen)); - LASSERTF((int)offsetof(struct lov_mds_md_v1, lmm_objects[0]) == 32, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0])); - LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0])); - BUILD_BUG_ON(LOV_MAGIC_V1 != (0x0BD10000 | 0x0BD0)); - - /* Checks for struct lov_mds_md_v3 */ - LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n", - (long long)(int)sizeof(struct lov_mds_md_v3)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_magic) == 0, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_magic)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_magic)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pattern) == 4, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pattern)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pattern)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_oi) == 8, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_oi)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_oi)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_size) == 24, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_size)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_size)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_stripe_count) == 28, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_stripe_count)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_stripe_count)); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_layout_gen) == 30, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen)); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen)); - BUILD_BUG_ON(LOV_MAXPOOLNAME != 15); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16])); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16])); - LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_objects[0]) == 48, "found %lld\n", - (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0])); - LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n", - (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0])); - BUILD_BUG_ON(LOV_MAGIC_V3 != (0x0BD30000 | 0x0BD0)); - LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)LOV_PATTERN_RAID0); - LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)LOV_PATTERN_RAID1); - LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n", - (unsigned int)LOV_PATTERN_FIRST); - LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n", - (unsigned int)LOV_PATTERN_CMOBD); - - /* Checks for struct lmv_mds_md_v1 */ - LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n", - (long long)(int)sizeof(struct lmv_mds_md_v1)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_magic) == 0, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_magic)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count) == 4, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index) == 8, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_hash_type) == 12, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_hash_type)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_layout_version) == 16, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_layout_version)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding1) == 20, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding1)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding2) == 24, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding2)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding3) == 32, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding3)); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3)); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]) == 56, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16])); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16])); - LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]) == 56, "found %lld\n", - (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0])); - LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0])); - BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0); - BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0); - BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff); - BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000); - BUILD_BUG_ON(LMV_HASH_FLAG_DEAD != 0x40000000); - - /* Checks for struct obd_statfs */ - LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n", - (long long)(int)sizeof(struct obd_statfs)); - LASSERTF((int)offsetof(struct obd_statfs, os_type) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_type)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_type) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_type)); - LASSERTF((int)offsetof(struct obd_statfs, os_blocks) == 8, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_blocks)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_blocks)); - LASSERTF((int)offsetof(struct obd_statfs, os_bfree) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_bfree)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bfree) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_bfree)); - LASSERTF((int)offsetof(struct obd_statfs, os_bavail) == 24, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_bavail)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bavail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_bavail)); - LASSERTF((int)offsetof(struct obd_statfs, os_ffree) == 40, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_ffree)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_ffree) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_ffree)); - LASSERTF((int)offsetof(struct obd_statfs, os_fsid) == 48, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_fsid)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fsid) == 40, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_fsid)); - LASSERTF((int)offsetof(struct obd_statfs, os_bsize) == 88, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_bsize)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_bsize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_bsize)); - LASSERTF((int)offsetof(struct obd_statfs, os_namelen) == 92, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_namelen)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_namelen) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_namelen)); - LASSERTF((int)offsetof(struct obd_statfs, os_state) == 104, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_state)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_state) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_state)); - LASSERTF((int)offsetof(struct obd_statfs, os_fprecreated) == 108, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_fprecreated)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_fprecreated) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_fprecreated)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare2) == 112, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare2)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare2)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare3) == 116, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare3)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare3)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare4) == 120, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare4)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare4) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare4)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare5) == 124, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare5)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare5) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare5)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare6) == 128, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare6)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare6) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare6)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare7) == 132, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare7)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare7) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare7)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare8) == 136, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare8)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare8) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare8)); - LASSERTF((int)offsetof(struct obd_statfs, os_spare9) == 140, "found %lld\n", - (long long)(int)offsetof(struct obd_statfs, os_spare9)); - LASSERTF((int)sizeof(((struct obd_statfs *)0)->os_spare9) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_statfs *)0)->os_spare9)); - - /* Checks for struct obd_ioobj */ - LASSERTF((int)sizeof(struct obd_ioobj) == 24, "found %lld\n", - (long long)(int)sizeof(struct obd_ioobj)); - LASSERTF((int)offsetof(struct obd_ioobj, ioo_oid) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_ioobj, ioo_oid)); - LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_oid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_oid)); - LASSERTF((int)offsetof(struct obd_ioobj, ioo_max_brw) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_ioobj, ioo_max_brw)); - LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_max_brw)); - LASSERTF((int)offsetof(struct obd_ioobj, ioo_bufcnt) == 20, "found %lld\n", - (long long)(int)offsetof(struct obd_ioobj, ioo_bufcnt)); - LASSERTF((int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_ioobj *)0)->ioo_bufcnt)); - LASSERTF(IOOBJ_MAX_BRW_BITS == 16, "found %lld\n", - (long long)IOOBJ_MAX_BRW_BITS); - - /* Checks for union lquota_id */ - LASSERTF((int)sizeof(union lquota_id) == 16, "found %lld\n", - (long long)(int)sizeof(union lquota_id)); - - /* Checks for struct obd_quotactl */ - LASSERTF((int)sizeof(struct obd_quotactl) == 112, "found %lld\n", - (long long)(int)sizeof(struct obd_quotactl)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_cmd) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_cmd)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_cmd) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_cmd)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_type) == 4, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_type)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_type)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_id) == 8, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_id)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_id)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_stat) == 12, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_stat)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_stat) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_stat)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_dqinfo) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_dqinfo)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo) == 24, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqinfo)); - LASSERTF((int)offsetof(struct obd_quotactl, qc_dqblk) == 40, "found %lld\n", - (long long)(int)offsetof(struct obd_quotactl, qc_dqblk)); - LASSERTF((int)sizeof(((struct obd_quotactl *)0)->qc_dqblk) == 72, "found %lld\n", - (long long)(int)sizeof(((struct obd_quotactl *)0)->qc_dqblk)); - - /* Checks for struct obd_dqinfo */ - LASSERTF((int)sizeof(struct obd_dqinfo) == 24, "found %lld\n", - (long long)(int)sizeof(struct obd_dqinfo)); - LASSERTF((int)offsetof(struct obd_dqinfo, dqi_bgrace) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_dqinfo, dqi_bgrace)); - LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_bgrace)); - LASSERTF((int)offsetof(struct obd_dqinfo, dqi_igrace) == 8, "found %lld\n", - (long long)(int)offsetof(struct obd_dqinfo, dqi_igrace)); - LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_igrace)); - LASSERTF((int)offsetof(struct obd_dqinfo, dqi_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_dqinfo, dqi_flags)); - LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_flags)); - LASSERTF((int)offsetof(struct obd_dqinfo, dqi_valid) == 20, "found %lld\n", - (long long)(int)offsetof(struct obd_dqinfo, dqi_valid)); - LASSERTF((int)sizeof(((struct obd_dqinfo *)0)->dqi_valid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqinfo *)0)->dqi_valid)); - - /* Checks for struct obd_dqblk */ - LASSERTF((int)sizeof(struct obd_dqblk) == 72, "found %lld\n", - (long long)(int)sizeof(struct obd_dqblk)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_bhardlimit) == 0, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_bhardlimit)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bhardlimit)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_bsoftlimit) == 8, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_bsoftlimit)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_bsoftlimit)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_curspace) == 16, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_curspace)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curspace) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curspace)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_ihardlimit) == 24, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_ihardlimit)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_ihardlimit)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_isoftlimit) == 32, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_isoftlimit)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_isoftlimit)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_curinodes) == 40, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_curinodes)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_curinodes)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_btime) == 48, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_btime)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_btime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_btime)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_itime) == 56, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_itime)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_itime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_itime)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_valid) == 64, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_valid)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_valid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_valid)); - LASSERTF((int)offsetof(struct obd_dqblk, dqb_padding) == 68, "found %lld\n", - (long long)(int)offsetof(struct obd_dqblk, dqb_padding)); - LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding)); - LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n", - Q_QUOTACHECK); - LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n", - Q_INITQUOTA); - LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n", - Q_GETOINFO); - LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n", - Q_GETOQUOTA); - LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n", - Q_FINVALIDATE); - - /* Checks for struct niobuf_remote */ - LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n", - (long long)(int)sizeof(struct niobuf_remote)); - LASSERTF((int)offsetof(struct niobuf_remote, rnb_offset) == 0, "found %lld\n", - (long long)(int)offsetof(struct niobuf_remote, rnb_offset)); - LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_offset) == 8, "found %lld\n", - (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_offset)); - LASSERTF((int)offsetof(struct niobuf_remote, rnb_len) == 8, "found %lld\n", - (long long)(int)offsetof(struct niobuf_remote, rnb_len)); - LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_len)); - LASSERTF((int)offsetof(struct niobuf_remote, rnb_flags) == 12, "found %lld\n", - (long long)(int)offsetof(struct niobuf_remote, rnb_flags)); - LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_flags)); - LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n", - OBD_BRW_READ); - LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n", - OBD_BRW_WRITE); - LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n", - OBD_BRW_SYNC); - LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n", - OBD_BRW_CHECK); - LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n", - OBD_BRW_FROM_GRANT); - LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n", - OBD_BRW_GRANTED); - LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n", - OBD_BRW_NOCACHE); - LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n", - OBD_BRW_NOQUOTA); - LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n", - OBD_BRW_SRVLOCK); - LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n", - OBD_BRW_ASYNC); - LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n", - OBD_BRW_MEMALLOC); - LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n", - OBD_BRW_OVER_USRQUOTA); - LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n", - OBD_BRW_OVER_GRPQUOTA); - LASSERTF(OBD_BRW_SOFT_SYNC == 0x4000, "found 0x%.8x\n", - OBD_BRW_SOFT_SYNC); - - /* Checks for struct ost_body */ - LASSERTF((int)sizeof(struct ost_body) == 208, "found %lld\n", - (long long)(int)sizeof(struct ost_body)); - LASSERTF((int)offsetof(struct ost_body, oa) == 0, "found %lld\n", - (long long)(int)offsetof(struct ost_body, oa)); - LASSERTF((int)sizeof(((struct ost_body *)0)->oa) == 208, "found %lld\n", - (long long)(int)sizeof(((struct ost_body *)0)->oa)); - - /* Checks for struct ll_fid */ - LASSERTF((int)sizeof(struct ll_fid) == 16, "found %lld\n", - (long long)(int)sizeof(struct ll_fid)); - LASSERTF((int)offsetof(struct ll_fid, id) == 0, "found %lld\n", - (long long)(int)offsetof(struct ll_fid, id)); - LASSERTF((int)sizeof(((struct ll_fid *)0)->id) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ll_fid *)0)->id)); - LASSERTF((int)offsetof(struct ll_fid, generation) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fid, generation)); - LASSERTF((int)sizeof(((struct ll_fid *)0)->generation) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_fid *)0)->generation)); - LASSERTF((int)offsetof(struct ll_fid, f_type) == 12, "found %lld\n", - (long long)(int)offsetof(struct ll_fid, f_type)); - LASSERTF((int)sizeof(((struct ll_fid *)0)->f_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ll_fid *)0)->f_type)); - - /* Checks for struct mdt_body */ - LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n", - (long long)(int)sizeof(struct mdt_body)); - LASSERTF((int)offsetof(struct mdt_body, mbo_fid1) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_fid1)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid1)); - LASSERTF((int)offsetof(struct mdt_body, mbo_fid2) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_fid2)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid2)); - LASSERTF((int)offsetof(struct mdt_body, mbo_handle) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_handle)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_handle)); - LASSERTF((int)offsetof(struct mdt_body, mbo_valid) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_valid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_valid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_valid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_size) == 48, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_size)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_size)); - LASSERTF((int)offsetof(struct mdt_body, mbo_mtime) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_mtime)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mtime)); - LASSERTF((int)offsetof(struct mdt_body, mbo_atime) == 64, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_atime)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_atime)); - LASSERTF((int)offsetof(struct mdt_body, mbo_ctime) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_ctime)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_ctime)); - LASSERTF((int)offsetof(struct mdt_body, mbo_blocks) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_blocks)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_blocks)); - LASSERTF((int)offsetof(struct mdt_body, mbo_t_state) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_t_state)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_t_state) == 8, - "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_t_state)); - LASSERTF((int)offsetof(struct mdt_body, mbo_fsuid) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_fsuid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsuid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_fsgid) == 108, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_fsgid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsgid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_capability) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_capability)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_capability) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_capability)); - LASSERTF((int)offsetof(struct mdt_body, mbo_mode) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_mode)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mode)); - LASSERTF((int)offsetof(struct mdt_body, mbo_uid) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_uid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_gid) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_gid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_flags) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_flags)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_flags)); - LASSERTF((int)offsetof(struct mdt_body, mbo_rdev) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_rdev)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_rdev) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_rdev)); - LASSERTF((int)offsetof(struct mdt_body, mbo_nlink) == 136, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_nlink)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_nlink) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_nlink)); - LASSERTF((int)offsetof(struct mdt_body, mbo_unused2) == 140, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_unused2)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused2)); - LASSERTF((int)offsetof(struct mdt_body, mbo_suppgid) == 144, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_suppgid)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_suppgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_suppgid)); - LASSERTF((int)offsetof(struct mdt_body, mbo_eadatasize) == 148, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_eadatasize)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_eadatasize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_eadatasize)); - LASSERTF((int)offsetof(struct mdt_body, mbo_aclsize) == 152, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_aclsize)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_aclsize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_aclsize)); - LASSERTF((int)offsetof(struct mdt_body, mbo_max_mdsize) == 156, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize)); - LASSERTF((int)offsetof(struct mdt_body, mbo_unused3) == 160, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_unused3)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused3)); - LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_uid_h)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid_h)); - LASSERTF((int)offsetof(struct mdt_body, mbo_gid_h) == 168, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_gid_h)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid_h)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_5) == 172, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_5)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_5) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_5)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_6) == 176, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_6)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_6) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_6)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_7) == 184, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_7)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_7) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_7)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_8) == 192, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_8)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_8) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_8)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_9) == 200, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_9)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_9) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_9)); - LASSERTF((int)offsetof(struct mdt_body, mbo_padding_10) == 208, "found %lld\n", - (long long)(int)offsetof(struct mdt_body, mbo_padding_10)); - LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_10) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_10)); - LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n", - MDS_FMODE_CLOSED); - LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n", - MDS_FMODE_EXEC); - LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n", - MDS_OPEN_CREATED); - LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n", - MDS_OPEN_CROSS); - LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n", - MDS_OPEN_CREAT); - LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n", - MDS_OPEN_EXCL); - LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n", - MDS_OPEN_TRUNC); - LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n", - MDS_OPEN_APPEND); - LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n", - MDS_OPEN_SYNC); - LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n", - MDS_OPEN_DIRECTORY); - LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n", - MDS_OPEN_BY_FID); - LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n", - MDS_OPEN_DELAY_CREATE); - LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n", - MDS_OPEN_OWNEROVERRIDE); - LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n", - MDS_OPEN_JOIN_FILE); - LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n", - MDS_OPEN_LOCK); - LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n", - MDS_OPEN_HAS_EA); - LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n", - MDS_OPEN_HAS_OBJS); - LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n", - (long long)MDS_OPEN_NORESTORE); - LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n", - (long long)MDS_OPEN_NEWSTRIPE); - LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n", - (long long)MDS_OPEN_VOLATILE); - LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n", - LUSTRE_SYNC_FL); - LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n", - LUSTRE_IMMUTABLE_FL); - LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n", - LUSTRE_APPEND_FL); - LASSERTF(LUSTRE_NODUMP_FL == 0x00000040, "found 0x%.8x\n", - LUSTRE_NODUMP_FL); - LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n", - LUSTRE_NOATIME_FL); - LASSERTF(LUSTRE_INDEX_FL == 0x00001000, "found 0x%.8x\n", - LUSTRE_INDEX_FL); - LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n", - LUSTRE_DIRSYNC_FL); - LASSERTF(LUSTRE_TOPDIR_FL == 0x00020000, "found 0x%.8x\n", - LUSTRE_TOPDIR_FL); - LASSERTF(LUSTRE_DIRECTIO_FL == 0x00100000, "found 0x%.8x\n", - LUSTRE_DIRECTIO_FL); - LASSERTF(LUSTRE_INLINE_DATA_FL == 0x10000000, "found 0x%.8x\n", - LUSTRE_INLINE_DATA_FL); - LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n", - MDS_INODELOCK_LOOKUP); - LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n", - MDS_INODELOCK_UPDATE); - LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n", - MDS_INODELOCK_OPEN); - LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n", - MDS_INODELOCK_LAYOUT); - - /* Checks for struct mdt_ioepoch */ - LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n", - (long long)(int)sizeof(struct mdt_ioepoch)); - LASSERTF((int)offsetof(struct mdt_ioepoch, mio_handle) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, mio_handle)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_handle)); - LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused1) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, mio_unused1)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused1)); - LASSERTF((int)offsetof(struct mdt_ioepoch, mio_unused2) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, mio_unused2)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_unused2)); - LASSERTF((int)offsetof(struct mdt_ioepoch, mio_padding) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_ioepoch, mio_padding)); - LASSERTF((int)sizeof(((struct mdt_ioepoch *)0)->mio_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_ioepoch *)0)->mio_padding)); - - /* Checks for struct mdt_rec_setattr */ - LASSERTF((int)sizeof(struct mdt_rec_setattr) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_setattr)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_cap)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_suppgid_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_suppgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_suppgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_1_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_1_h)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_fid) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_fid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_fid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_valid) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_valid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_valid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_uid) == 64, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_uid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_uid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_gid) == 68, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_gid)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_gid)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_size) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_size)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_size)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_blocks) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_blocks)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_blocks)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mtime) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_mtime)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mtime)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_atime) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_atime)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_atime)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_ctime) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_ctime)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_ctime)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_attr_flags) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_attr_flags)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_attr_flags)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_mode) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_mode)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_mode)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_bias) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_bias)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_3) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_3)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_3)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_4) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_4)); - LASSERTF((int)offsetof(struct mdt_rec_setattr, sa_padding_5) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setattr, sa_padding_5)); - LASSERTF((int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setattr *)0)->sa_padding_5)); - - /* Checks for struct mdt_rec_create */ - LASSERTF((int)sizeof(struct mdt_rec_create) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_create)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_cap)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid1) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fid1)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid1)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_fid2) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_fid2)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_fid2)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_old_handle) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_old_handle)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_old_handle)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_time) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_time)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_time)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_rdev) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_rdev)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_rdev) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_rdev)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_ioepoch) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_ioepoch)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_ioepoch)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_1) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_padding_1)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_1)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_mode) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_mode)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_mode)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_bias) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_bias)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_l) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_flags_l)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_l)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_flags_h) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_flags_h)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_flags_h)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_umask) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_umask)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_umask) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_umask)); - LASSERTF((int)offsetof(struct mdt_rec_create, cr_padding_4) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_create, cr_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_create *)0)->cr_padding_4)); - - /* Checks for struct mdt_rec_link */ - LASSERTF((int)sizeof(struct mdt_rec_link) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_link)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_cap)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid1) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fid1)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid1)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_fid2) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_fid2)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_fid2)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_time) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_time)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_time)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_1) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_1)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_1)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_2) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_2)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_2)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_3) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_3)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_3)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_4) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_4)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_bias) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_bias)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_5) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_5)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_5)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_6) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_6)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_6)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_7) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_7)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_7)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_8) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_8)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_8)); - LASSERTF((int)offsetof(struct mdt_rec_link, lk_padding_9) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_link, lk_padding_9)); - LASSERTF((int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_link *)0)->lk_padding_9)); - - /* Checks for struct mdt_rec_unlink */ - LASSERTF((int)sizeof(struct mdt_rec_unlink) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_unlink)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_cap)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid1) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid1)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid1)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_fid2) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_fid2)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_fid2)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_time) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_time)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_time)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_2) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_2)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_2)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_3) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_3)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_3)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_4) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_4)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_5) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_5)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_5)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_bias) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_bias)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_mode) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_mode)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_mode)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_6) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_6)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_6)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_7) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_7)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_7)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_8) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_8)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_8)); - LASSERTF((int)offsetof(struct mdt_rec_unlink, ul_padding_9) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_unlink, ul_padding_9)); - LASSERTF((int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_unlink *)0)->ul_padding_9)); - - /* Checks for struct mdt_rec_rename */ - LASSERTF((int)sizeof(struct mdt_rec_rename) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_rename)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_cap)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid1) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fid1)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid1)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_fid2) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_fid2)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_fid2)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_time) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_time)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_time)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_1) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_1)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_1)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_2) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_2)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_2)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_3) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_3)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_3)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_4) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_4)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_bias) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_bias)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_mode) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_mode)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_mode)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_5) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_5)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_5)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_6) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_6)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_6)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_7) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_7)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_7)); - LASSERTF((int)offsetof(struct mdt_rec_rename, rn_padding_8) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_rename, rn_padding_8)); - LASSERTF((int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_rename *)0)->rn_padding_8)); - - /* Checks for struct mdt_rec_setxattr */ - LASSERTF((int)sizeof(struct mdt_rec_setxattr) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_setxattr)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_cap)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_fid) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_fid)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_fid)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_1) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_1)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_1)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_2) == 64, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_2)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_2)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_3) == 68, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_3)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_3)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_valid) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_valid)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_valid)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_time) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_time)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_time)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_5) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_5)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_5)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_6) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_6)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_6)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_7) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_7)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_7)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_size) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_size)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_size)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_flags) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_flags)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_flags)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_8) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_8)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_8)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_9) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_9)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_9)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_10) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_10)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_10)); - LASSERTF((int)offsetof(struct mdt_rec_setxattr, sx_padding_11) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_setxattr, sx_padding_11)); - LASSERTF((int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_setxattr *)0)->sx_padding_11)); - - /* Checks for struct mdt_rec_reint */ - LASSERTF((int)sizeof(struct mdt_rec_reint) == 136, "found %lld\n", - (long long)(int)sizeof(struct mdt_rec_reint)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_opcode) == 0, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_opcode)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_opcode)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_cap) == 4, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_cap)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_cap) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_cap)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid) == 8, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsuid_h) == 12, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fsuid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsuid_h)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid) == 16, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fsgid_h) == 20, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fsgid_h)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fsgid_h)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1) == 24, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid1_h) == 28, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid1_h)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid1_h)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2) == 32, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_suppgid2_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_suppgid2_h)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_suppgid2_h)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid1) == 40, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fid1)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid1)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_fid2) == 56, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_fid2)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2) == 16, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_fid2)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mtime) == 72, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_mtime)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mtime)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_atime) == 80, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_atime)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_atime)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_ctime) == 88, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_ctime)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_ctime)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_size) == 96, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_size)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_size)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_blocks) == 104, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_blocks)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_blocks)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_bias) == 112, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_bias)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_bias) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_bias)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_mode) == 116, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_mode)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_mode)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags) == 120, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_flags)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_flags_h) == 124, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_flags_h)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_flags_h)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_umask) == 128, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_umask)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_umask) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_umask)); - LASSERTF((int)offsetof(struct mdt_rec_reint, rr_padding_4) == 132, "found %lld\n", - (long long)(int)offsetof(struct mdt_rec_reint, rr_padding_4)); - LASSERTF((int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mdt_rec_reint *)0)->rr_padding_4)); - - /* Checks for struct lmv_desc */ - LASSERTF((int)sizeof(struct lmv_desc) == 88, "found %lld\n", - (long long)(int)sizeof(struct lmv_desc)); - LASSERTF((int)offsetof(struct lmv_desc, ld_tgt_count) == 0, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_tgt_count)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_tgt_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_tgt_count)); - LASSERTF((int)offsetof(struct lmv_desc, ld_active_tgt_count) == 4, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_active_tgt_count)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_active_tgt_count)); - LASSERTF((int)offsetof(struct lmv_desc, ld_default_stripe_count) == 8, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_default_stripe_count)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_stripe_count)); - LASSERTF((int)offsetof(struct lmv_desc, ld_pattern) == 12, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_pattern)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_pattern) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_pattern)); - LASSERTF((int)offsetof(struct lmv_desc, ld_default_hash_size) == 16, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_default_hash_size)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_default_hash_size)); - LASSERTF((int)offsetof(struct lmv_desc, ld_padding_1) == 24, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_padding_1)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_1)); - LASSERTF((int)offsetof(struct lmv_desc, ld_padding_2) == 32, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_padding_2)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_2)); - LASSERTF((int)offsetof(struct lmv_desc, ld_qos_maxage) == 36, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_qos_maxage)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_qos_maxage)); - LASSERTF((int)offsetof(struct lmv_desc, ld_padding_3) == 40, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_padding_3)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_3)); - LASSERTF((int)offsetof(struct lmv_desc, ld_padding_4) == 44, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_padding_4)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_padding_4) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_padding_4)); - LASSERTF((int)offsetof(struct lmv_desc, ld_uuid) == 48, "found %lld\n", - (long long)(int)offsetof(struct lmv_desc, ld_uuid)); - LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n", - (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid)); - - /* Checks for struct lov_desc */ - LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n", - (long long)(int)sizeof(struct lov_desc)); - LASSERTF((int)offsetof(struct lov_desc, ld_tgt_count) == 0, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_tgt_count)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_tgt_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_tgt_count)); - LASSERTF((int)offsetof(struct lov_desc, ld_active_tgt_count) == 4, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_active_tgt_count)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_active_tgt_count)); - LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_count) == 8, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_default_stripe_count)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_count)); - LASSERTF((int)offsetof(struct lov_desc, ld_pattern) == 12, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_pattern)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_pattern) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_pattern)); - LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_size) == 16, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_default_stripe_size)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_size)); - LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_0) == 32, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_0)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_0) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_0)); - LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_1)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_2)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); - LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, "found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_uuid)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, "found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_uuid)); - BUILD_BUG_ON(LOV_DESC_MAGIC != 0xB0CCDE5C); - - /* Checks for struct ldlm_res_id */ - LASSERTF((int)sizeof(struct ldlm_res_id) == 32, "found %lld\n", - (long long)(int)sizeof(struct ldlm_res_id)); - BUILD_BUG_ON(RES_NAME_SIZE != 4); - LASSERTF((int)offsetof(struct ldlm_res_id, name[4]) == 32, "found %lld\n", - (long long)(int)offsetof(struct ldlm_res_id, name[4])); - LASSERTF((int)sizeof(((struct ldlm_res_id *)0)->name[4]) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_res_id *)0)->name[4])); - - /* Checks for struct ldlm_extent */ - LASSERTF((int)sizeof(struct ldlm_extent) == 24, "found %lld\n", - (long long)(int)sizeof(struct ldlm_extent)); - LASSERTF((int)offsetof(struct ldlm_extent, start) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_extent, start)); - LASSERTF((int)sizeof(((struct ldlm_extent *)0)->start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_extent *)0)->start)); - LASSERTF((int)offsetof(struct ldlm_extent, end) == 8, "found %lld\n", - (long long)(int)offsetof(struct ldlm_extent, end)); - LASSERTF((int)sizeof(((struct ldlm_extent *)0)->end) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_extent *)0)->end)); - LASSERTF((int)offsetof(struct ldlm_extent, gid) == 16, "found %lld\n", - (long long)(int)offsetof(struct ldlm_extent, gid)); - LASSERTF((int)sizeof(((struct ldlm_extent *)0)->gid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_extent *)0)->gid)); - - /* Checks for struct ldlm_inodebits */ - LASSERTF((int)sizeof(struct ldlm_inodebits) == 8, "found %lld\n", - (long long)(int)sizeof(struct ldlm_inodebits)); - LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_inodebits, bits)); - LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->bits) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_inodebits *)0)->bits)); - - /* Checks for struct ldlm_flock_wire */ - LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n", - (long long)(int)sizeof(struct ldlm_flock_wire)); - LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_start) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_flock_wire, lfw_start)); - LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_start)); - LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_end) == 8, "found %lld\n", - (long long)(int)offsetof(struct ldlm_flock_wire, lfw_end)); - LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_end)); - LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_owner) == 16, "found %lld\n", - (long long)(int)offsetof(struct ldlm_flock_wire, lfw_owner)); - LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_owner)); - LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_padding) == 24, "found %lld\n", - (long long)(int)offsetof(struct ldlm_flock_wire, lfw_padding)); - LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_padding)); - LASSERTF((int)offsetof(struct ldlm_flock_wire, lfw_pid) == 28, "found %lld\n", - (long long)(int)offsetof(struct ldlm_flock_wire, lfw_pid)); - LASSERTF((int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_flock_wire *)0)->lfw_pid)); - - /* Checks for struct ldlm_intent */ - LASSERTF((int)sizeof(struct ldlm_intent) == 8, "found %lld\n", - (long long)(int)sizeof(struct ldlm_intent)); - LASSERTF((int)offsetof(struct ldlm_intent, opc) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_intent, opc)); - LASSERTF((int)sizeof(((struct ldlm_intent *)0)->opc) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_intent *)0)->opc)); - - /* Checks for struct ldlm_resource_desc */ - LASSERTF((int)sizeof(struct ldlm_resource_desc) == 40, "found %lld\n", - (long long)(int)sizeof(struct ldlm_resource_desc)); - LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_type) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_resource_desc, lr_type)); - LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_type)); - LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_padding) == 4, "found %lld\n", - (long long)(int)offsetof(struct ldlm_resource_desc, lr_padding)); - LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_padding)); - LASSERTF((int)offsetof(struct ldlm_resource_desc, lr_name) == 8, "found %lld\n", - (long long)(int)offsetof(struct ldlm_resource_desc, lr_name)); - LASSERTF((int)sizeof(((struct ldlm_resource_desc *)0)->lr_name) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_resource_desc *)0)->lr_name)); - - /* Checks for struct ldlm_lock_desc */ - LASSERTF((int)sizeof(struct ldlm_lock_desc) == 80, "found %lld\n", - (long long)(int)sizeof(struct ldlm_lock_desc)); - LASSERTF((int)offsetof(struct ldlm_lock_desc, l_resource) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_lock_desc, l_resource)); - LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_resource) == 40, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_resource)); - LASSERTF((int)offsetof(struct ldlm_lock_desc, l_req_mode) == 40, "found %lld\n", - (long long)(int)offsetof(struct ldlm_lock_desc, l_req_mode)); - LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_req_mode)); - LASSERTF((int)offsetof(struct ldlm_lock_desc, l_granted_mode) == 44, "found %lld\n", - (long long)(int)offsetof(struct ldlm_lock_desc, l_granted_mode)); - LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_granted_mode)); - LASSERTF((int)offsetof(struct ldlm_lock_desc, l_policy_data) == 48, "found %lld\n", - (long long)(int)offsetof(struct ldlm_lock_desc, l_policy_data)); - LASSERTF((int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_lock_desc *)0)->l_policy_data)); - - /* Checks for struct ldlm_request */ - LASSERTF((int)sizeof(struct ldlm_request) == 104, "found %lld\n", - (long long)(int)sizeof(struct ldlm_request)); - LASSERTF((int)offsetof(struct ldlm_request, lock_flags) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_request, lock_flags)); - LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_request *)0)->lock_flags)); - LASSERTF((int)offsetof(struct ldlm_request, lock_count) == 4, "found %lld\n", - (long long)(int)offsetof(struct ldlm_request, lock_count)); - LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_request *)0)->lock_count)); - LASSERTF((int)offsetof(struct ldlm_request, lock_desc) == 8, "found %lld\n", - (long long)(int)offsetof(struct ldlm_request, lock_desc)); - LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_desc) == 80, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_request *)0)->lock_desc)); - LASSERTF((int)offsetof(struct ldlm_request, lock_handle) == 88, "found %lld\n", - (long long)(int)offsetof(struct ldlm_request, lock_handle)); - LASSERTF((int)sizeof(((struct ldlm_request *)0)->lock_handle) == 16, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_request *)0)->lock_handle)); - - /* Checks for struct ldlm_reply */ - LASSERTF((int)sizeof(struct ldlm_reply) == 112, "found %lld\n", - (long long)(int)sizeof(struct ldlm_reply)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_flags) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_flags)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_flags)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_padding) == 4, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_padding)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_padding)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_desc) == 8, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_desc)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_desc) == 80, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_desc)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_handle) == 88, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_handle)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_handle) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_handle)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res1) == 96, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_policy_res1)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res1)); - LASSERTF((int)offsetof(struct ldlm_reply, lock_policy_res2) == 104, "found %lld\n", - (long long)(int)offsetof(struct ldlm_reply, lock_policy_res2)); - LASSERTF((int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_reply *)0)->lock_policy_res2)); - - /* Checks for struct ost_lvb_v1 */ - LASSERTF((int)sizeof(struct ost_lvb_v1) == 40, "found %lld\n", - (long long)(int)sizeof(struct ost_lvb_v1)); - LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_size) == 0, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb_v1, lvb_size)); - LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_size)); - LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_mtime) == 8, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb_v1, lvb_mtime)); - LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_mtime)); - LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_atime) == 16, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb_v1, lvb_atime)); - LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_atime)); - LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_ctime) == 24, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb_v1, lvb_ctime)); - LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_ctime)); - LASSERTF((int)offsetof(struct ost_lvb_v1, lvb_blocks) == 32, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb_v1, lvb_blocks)); - LASSERTF((int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb_v1 *)0)->lvb_blocks)); - - /* Checks for struct ost_lvb */ - LASSERTF((int)sizeof(struct ost_lvb) == 56, "found %lld\n", - (long long)(int)sizeof(struct ost_lvb)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_size) == 0, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_size)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_size) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_size)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime) == 8, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_mtime)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_atime) == 16, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_atime)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime) == 24, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_ctime)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_blocks) == 32, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_blocks)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_blocks) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_blocks)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_mtime_ns) == 40, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_mtime_ns)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_mtime_ns)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_atime_ns) == 44, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_atime_ns)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_atime_ns)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_ctime_ns) == 48, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_ctime_ns)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_ctime_ns)); - LASSERTF((int)offsetof(struct ost_lvb, lvb_padding) == 52, "found %lld\n", - (long long)(int)offsetof(struct ost_lvb, lvb_padding)); - LASSERTF((int)sizeof(((struct ost_lvb *)0)->lvb_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct ost_lvb *)0)->lvb_padding)); - - /* Checks for struct lquota_lvb */ - LASSERTF((int)sizeof(struct lquota_lvb) == 40, "found %lld\n", - (long long)(int)sizeof(struct lquota_lvb)); - LASSERTF((int)offsetof(struct lquota_lvb, lvb_flags) == 0, "found %lld\n", - (long long)(int)offsetof(struct lquota_lvb, lvb_flags)); - LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_flags) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_flags)); - LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_may_rel) == 8, "found %lld\n", - (long long)(int)offsetof(struct lquota_lvb, lvb_id_may_rel)); - LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_may_rel)); - LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_rel) == 16, "found %lld\n", - (long long)(int)offsetof(struct lquota_lvb, lvb_id_rel)); - LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_rel)); - LASSERTF((int)offsetof(struct lquota_lvb, lvb_id_qunit) == 24, "found %lld\n", - (long long)(int)offsetof(struct lquota_lvb, lvb_id_qunit)); - LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_id_qunit)); - LASSERTF((int)offsetof(struct lquota_lvb, lvb_pad1) == 32, "found %lld\n", - (long long)(int)offsetof(struct lquota_lvb, lvb_pad1)); - LASSERTF((int)sizeof(((struct lquota_lvb *)0)->lvb_pad1) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lquota_lvb *)0)->lvb_pad1)); - LASSERTF(LQUOTA_FL_EDQUOT == 1, "found %lld\n", - (long long)LQUOTA_FL_EDQUOT); - - /* Checks for struct ldlm_gl_lquota_desc */ - LASSERTF((int)sizeof(struct ldlm_gl_lquota_desc) == 64, "found %lld\n", - (long long)(int)sizeof(struct ldlm_gl_lquota_desc)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_id) == 0, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_id)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id) == 16, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_id)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_flags)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_flags)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_ver) == 24, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_ver)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_ver)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit) == 32, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_hardlimit)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_hardlimit)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit) == 40, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_softlimit)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_softlimit)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_time) == 48, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_time)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_time)); - LASSERTF((int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2) == 56, "found %lld\n", - (long long)(int)offsetof(struct ldlm_gl_lquota_desc, gl_pad2)); - LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2)); - - /* Checks for struct mgs_send_param */ - LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n", - (long long)(int)sizeof(struct mgs_send_param)); - BUILD_BUG_ON(MGS_PARAM_MAXLEN != 1024); - LASSERTF((int)offsetof(struct mgs_send_param, mgs_param[1024]) == 1024, "found %lld\n", - (long long)(int)offsetof(struct mgs_send_param, mgs_param[1024])); - LASSERTF((int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct mgs_send_param *)0)->mgs_param[1024])); - - /* Checks for struct cfg_marker */ - LASSERTF((int)sizeof(struct cfg_marker) == 160, "found %lld\n", - (long long)(int)sizeof(struct cfg_marker)); - LASSERTF((int)offsetof(struct cfg_marker, cm_step) == 0, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_step)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_step) == 4, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_step)); - LASSERTF((int)offsetof(struct cfg_marker, cm_flags) == 4, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_flags)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_flags)); - LASSERTF((int)offsetof(struct cfg_marker, cm_vers) == 8, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_vers)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_vers) == 4, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_vers)); - LASSERTF((int)offsetof(struct cfg_marker, cm_padding) == 12, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_padding)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_padding)); - LASSERTF((int)offsetof(struct cfg_marker, cm_createtime) == 16, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_createtime)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_createtime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_createtime)); - LASSERTF((int)offsetof(struct cfg_marker, cm_canceltime) == 24, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_canceltime)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_canceltime) == 8, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_canceltime)); - LASSERTF((int)offsetof(struct cfg_marker, cm_tgtname) == 32, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_tgtname)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_tgtname) == 64, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_tgtname)); - LASSERTF((int)offsetof(struct cfg_marker, cm_comment) == 96, "found %lld\n", - (long long)(int)offsetof(struct cfg_marker, cm_comment)); - LASSERTF((int)sizeof(((struct cfg_marker *)0)->cm_comment) == 64, "found %lld\n", - (long long)(int)sizeof(((struct cfg_marker *)0)->cm_comment)); - - /* Checks for struct llog_logid */ - LASSERTF((int)sizeof(struct llog_logid) == 20, "found %lld\n", - (long long)(int)sizeof(struct llog_logid)); - LASSERTF((int)offsetof(struct llog_logid, lgl_oi) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_logid, lgl_oi)); - LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid *)0)->lgl_oi)); - LASSERTF((int)offsetof(struct llog_logid, lgl_ogen) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_logid, lgl_ogen)); - LASSERTF((int)sizeof(((struct llog_logid *)0)->lgl_ogen) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid *)0)->lgl_ogen)); - BUILD_BUG_ON(OST_SZ_REC != 274730752); - BUILD_BUG_ON(MDS_UNLINK_REC != 274801668); - BUILD_BUG_ON(MDS_UNLINK64_REC != 275325956); - BUILD_BUG_ON(MDS_SETATTR64_REC != 275325953); - BUILD_BUG_ON(OBD_CFG_REC != 274857984); - BUILD_BUG_ON(LLOG_GEN_REC != 274989056); - BUILD_BUG_ON(CHANGELOG_REC != 275120128); - BUILD_BUG_ON(CHANGELOG_USER_REC != 275185664); - BUILD_BUG_ON(LLOG_HDR_MAGIC != 275010873); - BUILD_BUG_ON(LLOG_LOGID_MAGIC != 275010875); - - /* Checks for struct llog_catid */ - LASSERTF((int)sizeof(struct llog_catid) == 32, "found %lld\n", - (long long)(int)sizeof(struct llog_catid)); - LASSERTF((int)offsetof(struct llog_catid, lci_logid) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_catid, lci_logid)); - LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_logid) == 20, "found %lld\n", - (long long)(int)sizeof(((struct llog_catid *)0)->lci_logid)); - LASSERTF((int)offsetof(struct llog_catid, lci_padding1) == 20, "found %lld\n", - (long long)(int)offsetof(struct llog_catid, lci_padding1)); - LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding1)); - LASSERTF((int)offsetof(struct llog_catid, lci_padding2) == 24, "found %lld\n", - (long long)(int)offsetof(struct llog_catid, lci_padding2)); - LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding2) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding2)); - LASSERTF((int)offsetof(struct llog_catid, lci_padding3) == 28, "found %lld\n", - (long long)(int)offsetof(struct llog_catid, lci_padding3)); - LASSERTF((int)sizeof(((struct llog_catid *)0)->lci_padding3) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_catid *)0)->lci_padding3)); - - /* Checks for struct llog_rec_hdr */ - LASSERTF((int)sizeof(struct llog_rec_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(struct llog_rec_hdr)); - LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_len) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_hdr, lrh_len)); - LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_len)); - LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_index) == 4, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_hdr, lrh_index)); - LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_index)); - LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_type) == 8, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_hdr, lrh_type)); - LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_type)); - LASSERTF((int)offsetof(struct llog_rec_hdr, lrh_id) == 12, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_hdr, lrh_id)); - LASSERTF((int)sizeof(((struct llog_rec_hdr *)0)->lrh_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_hdr *)0)->lrh_id)); - - /* Checks for struct llog_rec_tail */ - LASSERTF((int)sizeof(struct llog_rec_tail) == 8, "found %lld\n", - (long long)(int)sizeof(struct llog_rec_tail)); - LASSERTF((int)offsetof(struct llog_rec_tail, lrt_len) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_tail, lrt_len)); - LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_len)); - LASSERTF((int)offsetof(struct llog_rec_tail, lrt_index) == 4, "found %lld\n", - (long long)(int)offsetof(struct llog_rec_tail, lrt_index)); - LASSERTF((int)sizeof(((struct llog_rec_tail *)0)->lrt_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_rec_tail *)0)->lrt_index)); - - /* Checks for struct llog_logid_rec */ - LASSERTF((int)sizeof(struct llog_logid_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct llog_logid_rec)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_hdr)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_hdr)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_id) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_id)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_id) == 20, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_id)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding1) == 36, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_padding1)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding1)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding2) == 40, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_padding2)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding2)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_padding3) == 48, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_padding3)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_padding3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_padding3)); - LASSERTF((int)offsetof(struct llog_logid_rec, lid_tail) == 56, "found %lld\n", - (long long)(int)offsetof(struct llog_logid_rec, lid_tail)); - LASSERTF((int)sizeof(((struct llog_logid_rec *)0)->lid_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_logid_rec *)0)->lid_tail)); - - /* Checks for struct llog_unlink_rec */ - LASSERTF((int)sizeof(struct llog_unlink_rec) == 40, "found %lld\n", - (long long)(int)sizeof(struct llog_unlink_rec)); - LASSERTF((int)offsetof(struct llog_unlink_rec, lur_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink_rec, lur_hdr)); - LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_hdr)); - LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oid) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink_rec, lur_oid)); - LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oid)); - LASSERTF((int)offsetof(struct llog_unlink_rec, lur_oseq) == 24, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink_rec, lur_oseq)); - LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_oseq)); - LASSERTF((int)offsetof(struct llog_unlink_rec, lur_count) == 28, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink_rec, lur_count)); - LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_count)); - LASSERTF((int)offsetof(struct llog_unlink_rec, lur_tail) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink_rec, lur_tail)); - LASSERTF((int)sizeof(((struct llog_unlink_rec *)0)->lur_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink_rec *)0)->lur_tail)); - /* Checks for struct llog_unlink64_rec */ - LASSERTF((int)sizeof(struct llog_unlink64_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct llog_unlink64_rec)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_hdr)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_hdr)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_fid) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_fid)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_fid)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_count) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_count)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_count)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_tail) == 56, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_tail)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_tail)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding1) == 36, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding1)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding1)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding2) == 40, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding2)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding2)); - LASSERTF((int)offsetof(struct llog_unlink64_rec, lur_padding3) == 48, "found %lld\n", - (long long)(int)offsetof(struct llog_unlink64_rec, lur_padding3)); - LASSERTF((int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_unlink64_rec *)0)->lur_padding3)); - - /* Checks for struct llog_setattr64_rec */ - LASSERTF((int)sizeof(struct llog_setattr64_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct llog_setattr64_rec)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_hdr)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_hdr)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_oi) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_oi)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_oi)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_uid_h) == 36, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_uid_h)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_uid_h)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid) == 40, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_gid_h) == 44, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid)); - LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n", - (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail)); - LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail)); - - /* Checks for struct llog_size_change_rec */ - LASSERTF((int)sizeof(struct llog_size_change_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct llog_size_change_rec)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_hdr)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_hdr)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_fid) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_fid)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_fid)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_ioepoch) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_ioepoch)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_ioepoch)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding1) == 36, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding1)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding1)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding2) == 40, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding2)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding2)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_padding3) == 48, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_padding3)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_padding3)); - LASSERTF((int)offsetof(struct llog_size_change_rec, lsc_tail) == 56, "found %lld\n", - (long long)(int)offsetof(struct llog_size_change_rec, lsc_tail)); - LASSERTF((int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_size_change_rec *)0)->lsc_tail)); - - /* Checks for struct changelog_rec */ - LASSERTF((int)sizeof(struct changelog_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct changelog_rec)); - LASSERTF((int)offsetof(struct changelog_rec, cr_namelen) == 0, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_namelen)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_namelen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_namelen)); - LASSERTF((int)offsetof(struct changelog_rec, cr_flags) == 2, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_flags)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_flags) == 2, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_flags)); - LASSERTF((int)offsetof(struct changelog_rec, cr_type) == 4, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_type)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_type) == 4, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_type)); - LASSERTF((int)offsetof(struct changelog_rec, cr_index) == 8, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_index)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_index) == 8, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_index)); - LASSERTF((int)offsetof(struct changelog_rec, cr_prev) == 16, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_prev)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_prev) == 8, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_prev)); - LASSERTF((int)offsetof(struct changelog_rec, cr_time) == 24, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_time)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_time) == 8, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_time)); - LASSERTF((int)offsetof(struct changelog_rec, cr_tfid) == 32, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_tfid)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_tfid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_tfid)); - LASSERTF((int)offsetof(struct changelog_rec, cr_pfid) == 48, "found %lld\n", - (long long)(int)offsetof(struct changelog_rec, cr_pfid)); - LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid)); - - /* Checks for struct changelog_setinfo */ - LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n", - (long long)(int)sizeof(struct changelog_setinfo)); - LASSERTF((int)offsetof(struct changelog_setinfo, cs_recno) == 0, "found %lld\n", - (long long)(int)offsetof(struct changelog_setinfo, cs_recno)); - LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_recno) == 8, "found %lld\n", - (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_recno)); - LASSERTF((int)offsetof(struct changelog_setinfo, cs_id) == 8, "found %lld\n", - (long long)(int)offsetof(struct changelog_setinfo, cs_id)); - LASSERTF((int)sizeof(((struct changelog_setinfo *)0)->cs_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct changelog_setinfo *)0)->cs_id)); - - /* Checks for struct llog_changelog_rec */ - LASSERTF((int)sizeof(struct llog_changelog_rec) == 88, "found %lld\n", - (long long)(int)sizeof(struct llog_changelog_rec)); - LASSERTF((int)offsetof(struct llog_changelog_rec, cr_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_rec, cr_hdr)); - LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_hdr)); - LASSERTF((int)offsetof(struct llog_changelog_rec, cr) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_rec, cr)); - LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr)); - LASSERTF((int)offsetof(struct llog_changelog_rec, cr_do_not_use) == 80, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_rec, cr_do_not_use)); - LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use)); - - /* Checks for struct llog_changelog_user_rec */ - LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n", - (long long)(int)sizeof(struct llog_changelog_user_rec)); - LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_user_rec, cur_hdr)); - LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_hdr)); - LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_id) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id)); - LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id)); - LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding)); - LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding)); - LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec)); - LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec)); - LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_tail) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_changelog_user_rec, cur_tail)); - LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_tail)); - - /* Checks for struct llog_gen */ - LASSERTF((int)sizeof(struct llog_gen) == 16, "found %lld\n", - (long long)(int)sizeof(struct llog_gen)); - LASSERTF((int)offsetof(struct llog_gen, mnt_cnt) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_gen, mnt_cnt)); - LASSERTF((int)sizeof(((struct llog_gen *)0)->mnt_cnt) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_gen *)0)->mnt_cnt)); - LASSERTF((int)offsetof(struct llog_gen, conn_cnt) == 8, "found %lld\n", - (long long)(int)offsetof(struct llog_gen, conn_cnt)); - LASSERTF((int)sizeof(((struct llog_gen *)0)->conn_cnt) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_gen *)0)->conn_cnt)); - - /* Checks for struct llog_gen_rec */ - LASSERTF((int)sizeof(struct llog_gen_rec) == 64, "found %lld\n", - (long long)(int)sizeof(struct llog_gen_rec)); - LASSERTF((int)offsetof(struct llog_gen_rec, lgr_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_gen_rec, lgr_hdr)); - LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_hdr)); - LASSERTF((int)offsetof(struct llog_gen_rec, lgr_gen) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_gen_rec, lgr_gen)); - LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_gen) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_gen)); - LASSERTF((int)offsetof(struct llog_gen_rec, lgr_tail) == 56, "found %lld\n", - (long long)(int)offsetof(struct llog_gen_rec, lgr_tail)); - LASSERTF((int)sizeof(((struct llog_gen_rec *)0)->lgr_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_gen_rec *)0)->lgr_tail)); - - /* Checks for struct llog_log_hdr */ - LASSERTF((int)sizeof(struct llog_log_hdr) == 8192, "found %lld\n", - (long long)(int)sizeof(struct llog_log_hdr)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_hdr) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_hdr)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_hdr) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_hdr)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_timestamp) == 16, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_timestamp)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_timestamp)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_count) == 24, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_count)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_count)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap_offset) == 28, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap_offset)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap_offset)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_size) == 32, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_size)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_size) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_size)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_flags) == 36, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_flags)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_flags)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_cat_idx) == 40, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_cat_idx)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_cat_idx)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_tgtuuid) == 44, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_tgtuuid)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid) == 40, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tgtuuid)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_reserved) == 84, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_reserved)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_reserved) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_reserved)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_bitmap) == 88, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_bitmap)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap) == 8096, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_bitmap)); - LASSERTF((int)offsetof(struct llog_log_hdr, llh_tail) == 8184, "found %lld\n", - (long long)(int)offsetof(struct llog_log_hdr, llh_tail)); - LASSERTF((int)sizeof(((struct llog_log_hdr *)0)->llh_tail) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llog_log_hdr *)0)->llh_tail)); - - /* Checks for struct llog_cookie */ - LASSERTF((int)sizeof(struct llog_cookie) == 32, "found %lld\n", - (long long)(int)sizeof(struct llog_cookie)); - LASSERTF((int)offsetof(struct llog_cookie, lgc_lgl) == 0, "found %lld\n", - (long long)(int)offsetof(struct llog_cookie, lgc_lgl)); - LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_lgl) == 20, "found %lld\n", - (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_lgl)); - LASSERTF((int)offsetof(struct llog_cookie, lgc_subsys) == 20, "found %lld\n", - (long long)(int)offsetof(struct llog_cookie, lgc_subsys)); - LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_subsys) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_subsys)); - LASSERTF((int)offsetof(struct llog_cookie, lgc_index) == 24, "found %lld\n", - (long long)(int)offsetof(struct llog_cookie, lgc_index)); - LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_index)); - LASSERTF((int)offsetof(struct llog_cookie, lgc_padding) == 28, "found %lld\n", - (long long)(int)offsetof(struct llog_cookie, lgc_padding)); - LASSERTF((int)sizeof(((struct llog_cookie *)0)->lgc_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llog_cookie *)0)->lgc_padding)); - - /* Checks for struct llogd_body */ - LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n", - (long long)(int)sizeof(struct llogd_body)); - LASSERTF((int)offsetof(struct llogd_body, lgd_logid) == 0, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_logid)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_logid) == 20, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_logid)); - LASSERTF((int)offsetof(struct llogd_body, lgd_ctxt_idx) == 20, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_ctxt_idx)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_ctxt_idx)); - LASSERTF((int)offsetof(struct llogd_body, lgd_llh_flags) == 24, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_llh_flags)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_llh_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_llh_flags)); - LASSERTF((int)offsetof(struct llogd_body, lgd_index) == 28, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_index)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_index)); - LASSERTF((int)offsetof(struct llogd_body, lgd_saved_index) == 32, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_saved_index)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_saved_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_saved_index)); - LASSERTF((int)offsetof(struct llogd_body, lgd_len) == 36, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_len)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_len)); - LASSERTF((int)offsetof(struct llogd_body, lgd_cur_offset) == 40, "found %lld\n", - (long long)(int)offsetof(struct llogd_body, lgd_cur_offset)); - LASSERTF((int)sizeof(((struct llogd_body *)0)->lgd_cur_offset) == 8, "found %lld\n", - (long long)(int)sizeof(((struct llogd_body *)0)->lgd_cur_offset)); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_CREATE != 501); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_NEXT_BLOCK != 502); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_READ_HEADER != 503); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_WRITE_REC != 504); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_CLOSE != 505); - BUILD_BUG_ON(LLOG_ORIGIN_CONNECT != 506); - BUILD_BUG_ON(LLOG_CATINFO != 507); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_PREV_BLOCK != 508); - BUILD_BUG_ON(LLOG_ORIGIN_HANDLE_DESTROY != 509); - BUILD_BUG_ON(LLOG_FIRST_OPC != 501); - BUILD_BUG_ON(LLOG_LAST_OPC != 510); - BUILD_BUG_ON(LLOG_CONFIG_ORIG_CTXT != 0); - BUILD_BUG_ON(LLOG_CONFIG_REPL_CTXT != 1); - BUILD_BUG_ON(LLOG_MDS_OST_ORIG_CTXT != 2); - BUILD_BUG_ON(LLOG_MDS_OST_REPL_CTXT != 3); - BUILD_BUG_ON(LLOG_SIZE_ORIG_CTXT != 4); - BUILD_BUG_ON(LLOG_SIZE_REPL_CTXT != 5); - BUILD_BUG_ON(LLOG_TEST_ORIG_CTXT != 8); - BUILD_BUG_ON(LLOG_TEST_REPL_CTXT != 9); - BUILD_BUG_ON(LLOG_CHANGELOG_ORIG_CTXT != 12); - BUILD_BUG_ON(LLOG_CHANGELOG_REPL_CTXT != 13); - BUILD_BUG_ON(LLOG_CHANGELOG_USER_ORIG_CTXT != 14); - BUILD_BUG_ON(LLOG_AGENT_ORIG_CTXT != 15); - BUILD_BUG_ON(LLOG_MAX_CTXTS != 16); - - /* Checks for struct llogd_conn_body */ - LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n", - (long long)(int)sizeof(struct llogd_conn_body)); - LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_gen) == 0, "found %lld\n", - (long long)(int)offsetof(struct llogd_conn_body, lgdc_gen)); - LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen) == 16, "found %lld\n", - (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_gen)); - LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_logid) == 16, "found %lld\n", - (long long)(int)offsetof(struct llogd_conn_body, lgdc_logid)); - LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid) == 20, "found %lld\n", - (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_logid)); - LASSERTF((int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx) == 36, "found %lld\n", - (long long)(int)offsetof(struct llogd_conn_body, lgdc_ctxt_idx)); - LASSERTF((int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx) == 4, "found %lld\n", - (long long)(int)sizeof(((struct llogd_conn_body *)0)->lgdc_ctxt_idx)); - - /* Checks for struct fiemap_info_key */ - LASSERTF((int)sizeof(struct ll_fiemap_info_key) == 248, "found %lld\n", - (long long)(int)sizeof(struct ll_fiemap_info_key)); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_name[8]) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_name[8])); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_name[8])); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_oa) == 8, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_oa)); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa) == 208, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_oa)); - LASSERTF((int)offsetof(struct ll_fiemap_info_key, lfik_fiemap) == 216, "found %lld\n", - (long long)(int)offsetof(struct ll_fiemap_info_key, lfik_fiemap)); - LASSERTF((int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap) == 32, "found %lld\n", - (long long)(int)sizeof(((struct ll_fiemap_info_key *)0)->lfik_fiemap)); - - /* Checks for struct mgs_target_info */ - LASSERTF((int)sizeof(struct mgs_target_info) == 4544, "found %lld\n", - (long long)(int)sizeof(struct mgs_target_info)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_lustre_ver) == 0, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_lustre_ver)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_lustre_ver)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_stripe_index) == 4, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_stripe_index)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_stripe_index)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_config_ver) == 8, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_config_ver)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_config_ver) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_config_ver)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_flags) == 12, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_flags)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_flags)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_nid_count) == 16, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_nid_count)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nid_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nid_count)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_instance) == 20, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_instance)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_instance) == 4, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_instance)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_fsname) == 24, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_fsname)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_fsname) == 64, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_fsname)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_svname) == 88, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_svname)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_svname) == 64, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_svname)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_uuid) == 152, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_uuid)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_uuid) == 40, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_uuid)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_nids) == 192, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_nids)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_nids) == 256, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_nids)); - LASSERTF((int)offsetof(struct mgs_target_info, mti_params) == 448, "found %lld\n", - (long long)(int)offsetof(struct mgs_target_info, mti_params)); - LASSERTF((int)sizeof(((struct mgs_target_info *)0)->mti_params) == 4096, "found %lld\n", - (long long)(int)sizeof(((struct mgs_target_info *)0)->mti_params)); - - /* Checks for struct lustre_capa */ - LASSERTF((int)sizeof(struct lustre_capa) == 120, "found %lld\n", - (long long)(int)sizeof(struct lustre_capa)); - LASSERTF((int)offsetof(struct lustre_capa, lc_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_fid)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_fid)); - LASSERTF((int)offsetof(struct lustre_capa, lc_opc) == 16, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_opc)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_opc) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_opc)); - LASSERTF((int)offsetof(struct lustre_capa, lc_uid) == 24, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_uid)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_uid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_uid)); - LASSERTF((int)offsetof(struct lustre_capa, lc_gid) == 32, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_gid)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_gid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_gid)); - LASSERTF((int)offsetof(struct lustre_capa, lc_flags) == 40, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_flags)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_flags)); - LASSERTF((int)offsetof(struct lustre_capa, lc_keyid) == 44, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_keyid)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_keyid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_keyid)); - LASSERTF((int)offsetof(struct lustre_capa, lc_timeout) == 48, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_timeout)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_timeout) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_timeout)); - LASSERTF((int)offsetof(struct lustre_capa, lc_expiry) == 52, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_expiry)); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_expiry) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_expiry)); - BUILD_BUG_ON(CAPA_HMAC_MAX_LEN != 64); - LASSERTF((int)offsetof(struct lustre_capa, lc_hmac[64]) == 120, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa, lc_hmac[64])); - LASSERTF((int)sizeof(((struct lustre_capa *)0)->lc_hmac[64]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa *)0)->lc_hmac[64])); - - /* Checks for struct lustre_capa_key */ - LASSERTF((int)sizeof(struct lustre_capa_key) == 72, "found %lld\n", - (long long)(int)sizeof(struct lustre_capa_key)); - LASSERTF((int)offsetof(struct lustre_capa_key, lk_seq) == 0, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa_key, lk_seq)); - LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_seq) == 8, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_seq)); - LASSERTF((int)offsetof(struct lustre_capa_key, lk_keyid) == 8, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa_key, lk_keyid)); - LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_keyid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_keyid)); - LASSERTF((int)offsetof(struct lustre_capa_key, lk_padding) == 12, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa_key, lk_padding)); - LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_padding)); - BUILD_BUG_ON(CAPA_HMAC_KEY_MAX_LEN != 56); - LASSERTF((int)offsetof(struct lustre_capa_key, lk_key[56]) == 72, "found %lld\n", - (long long)(int)offsetof(struct lustre_capa_key, lk_key[56])); - LASSERTF((int)sizeof(((struct lustre_capa_key *)0)->lk_key[56]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct lustre_capa_key *)0)->lk_key[56])); - - /* Checks for struct getinfo_fid2path */ - LASSERTF((int)sizeof(struct getinfo_fid2path) == 32, "found %lld\n", - (long long)(int)sizeof(struct getinfo_fid2path)); - LASSERTF((int)offsetof(struct getinfo_fid2path, gf_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct getinfo_fid2path, gf_fid)); - LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_fid)); - LASSERTF((int)offsetof(struct getinfo_fid2path, gf_recno) == 16, "found %lld\n", - (long long)(int)offsetof(struct getinfo_fid2path, gf_recno)); - LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_recno) == 8, "found %lld\n", - (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_recno)); - LASSERTF((int)offsetof(struct getinfo_fid2path, gf_linkno) == 24, "found %lld\n", - (long long)(int)offsetof(struct getinfo_fid2path, gf_linkno)); - LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno) == 4, "found %lld\n", - (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_linkno)); - LASSERTF((int)offsetof(struct getinfo_fid2path, gf_pathlen) == 28, "found %lld\n", - (long long)(int)offsetof(struct getinfo_fid2path, gf_pathlen)); - LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen) == 4, "found %lld\n", - (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_pathlen)); - LASSERTF((int)offsetof(struct getinfo_fid2path, gf_path[0]) == 32, "found %lld\n", - (long long)(int)offsetof(struct getinfo_fid2path, gf_path[0])); - LASSERTF((int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0]) == 1, "found %lld\n", - (long long)(int)sizeof(((struct getinfo_fid2path *)0)->gf_path[0])); - - /* Checks for struct fiemap */ - LASSERTF((int)sizeof(struct fiemap) == 32, "found %lld\n", - (long long)(int)sizeof(struct fiemap)); - LASSERTF((int)offsetof(struct fiemap, fm_start) == 0, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_start)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_start)); - LASSERTF((int)offsetof(struct fiemap, fm_length) == 8, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_length)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_length) == 8, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_length)); - LASSERTF((int)offsetof(struct fiemap, fm_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_flags)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_flags)); - LASSERTF((int)offsetof(struct fiemap, fm_mapped_extents) == 20, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_mapped_extents)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_mapped_extents) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_mapped_extents)); - LASSERTF((int)offsetof(struct fiemap, fm_extent_count) == 24, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_extent_count)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extent_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_extent_count)); - LASSERTF((int)offsetof(struct fiemap, fm_reserved) == 28, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_reserved)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_reserved) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_reserved)); - LASSERTF((int)offsetof(struct fiemap, fm_extents) == 32, "found %lld\n", - (long long)(int)offsetof(struct fiemap, fm_extents)); - LASSERTF((int)sizeof(((struct fiemap *)0)->fm_extents) == 0, "found %lld\n", - (long long)(int)sizeof(((struct fiemap *)0)->fm_extents)); - BUILD_BUG_ON(FIEMAP_FLAG_SYNC != 0x00000001); - BUILD_BUG_ON(FIEMAP_FLAG_XATTR != 0x00000002); - BUILD_BUG_ON(FIEMAP_FLAG_DEVICE_ORDER != 0x40000000); - - /* Checks for struct fiemap_extent */ - LASSERTF((int)sizeof(struct fiemap_extent) == 56, "found %lld\n", - (long long)(int)sizeof(struct fiemap_extent)); - LASSERTF((int)offsetof(struct fiemap_extent, fe_logical) == 0, "found %lld\n", - (long long)(int)offsetof(struct fiemap_extent, fe_logical)); - LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_logical) == 8, "found %lld\n", - (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_logical)); - LASSERTF((int)offsetof(struct fiemap_extent, fe_physical) == 8, "found %lld\n", - (long long)(int)offsetof(struct fiemap_extent, fe_physical)); - LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_physical) == 8, "found %lld\n", - (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_physical)); - LASSERTF((int)offsetof(struct fiemap_extent, fe_length) == 16, "found %lld\n", - (long long)(int)offsetof(struct fiemap_extent, fe_length)); - LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_length) == 8, "found %lld\n", - (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_length)); - LASSERTF((int)offsetof(struct fiemap_extent, fe_flags) == 40, "found %lld\n", - (long long)(int)offsetof(struct fiemap_extent, fe_flags)); - LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_flags)); - LASSERTF((int)offsetof(struct fiemap_extent, fe_reserved[0]) == 44, "found %lld\n", - (long long)(int)offsetof(struct fiemap_extent, fe_reserved[0])); - LASSERTF((int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0]) == 4, "found %lld\n", - (long long)(int)sizeof(((struct fiemap_extent *)0)->fe_reserved[0])); - BUILD_BUG_ON(FIEMAP_EXTENT_LAST != 0x00000001); - BUILD_BUG_ON(FIEMAP_EXTENT_UNKNOWN != 0x00000002); - BUILD_BUG_ON(FIEMAP_EXTENT_DELALLOC != 0x00000004); - BUILD_BUG_ON(FIEMAP_EXTENT_ENCODED != 0x00000008); - BUILD_BUG_ON(FIEMAP_EXTENT_DATA_ENCRYPTED != 0x00000080); - BUILD_BUG_ON(FIEMAP_EXTENT_NOT_ALIGNED != 0x00000100); - BUILD_BUG_ON(FIEMAP_EXTENT_DATA_INLINE != 0x00000200); - BUILD_BUG_ON(FIEMAP_EXTENT_DATA_TAIL != 0x00000400); - BUILD_BUG_ON(FIEMAP_EXTENT_UNWRITTEN != 0x00000800); - BUILD_BUG_ON(FIEMAP_EXTENT_MERGED != 0x00001000); - BUILD_BUG_ON(FIEMAP_EXTENT_NO_DIRECT != 0x40000000); - BUILD_BUG_ON(FIEMAP_EXTENT_NET != 0x80000000); - - /* Checks for type posix_acl_xattr_entry */ - LASSERTF((int)sizeof(struct posix_acl_xattr_entry) == 8, "found %lld\n", - (long long)(int)sizeof(struct posix_acl_xattr_entry)); - LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_tag) == 0, "found %lld\n", - (long long)(int)offsetof(struct posix_acl_xattr_entry, e_tag)); - LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n", - (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag)); - LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_perm) == 2, "found %lld\n", - (long long)(int)offsetof(struct posix_acl_xattr_entry, e_perm)); - LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n", - (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm)); - LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_id) == 4, "found %lld\n", - (long long)(int)offsetof(struct posix_acl_xattr_entry, e_id)); - LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id)); - - /* Checks for type posix_acl_xattr_header */ - LASSERTF((int)sizeof(struct posix_acl_xattr_header) == 4, "found %lld\n", - (long long)(int)sizeof(struct posix_acl_xattr_header)); - LASSERTF((int)offsetof(struct posix_acl_xattr_header, a_version) == 0, "found %lld\n", - (long long)(int)offsetof(struct posix_acl_xattr_header, a_version)); - LASSERTF((int)sizeof(((struct posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n", - (long long)(int)sizeof(((struct posix_acl_xattr_header *)0)->a_version)); - - /* Checks for struct link_ea_header */ - LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n", - (long long)(int)sizeof(struct link_ea_header)); - LASSERTF((int)offsetof(struct link_ea_header, leh_magic) == 0, "found %lld\n", - (long long)(int)offsetof(struct link_ea_header, leh_magic)); - LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_magic) == 4, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_header *)0)->leh_magic)); - LASSERTF((int)offsetof(struct link_ea_header, leh_reccount) == 4, "found %lld\n", - (long long)(int)offsetof(struct link_ea_header, leh_reccount)); - LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_reccount) == 4, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_header *)0)->leh_reccount)); - LASSERTF((int)offsetof(struct link_ea_header, leh_len) == 8, "found %lld\n", - (long long)(int)offsetof(struct link_ea_header, leh_len)); - LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_len) == 8, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_header *)0)->leh_len)); - LASSERTF((int)offsetof(struct link_ea_header, leh_overflow_time) == 16, "found %lld\n", - (long long)(int)offsetof(struct link_ea_header, leh_overflow_time)); - LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_overflow_time) == 4, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_header *)0)->leh_overflow_time)); - LASSERTF((int)offsetof(struct link_ea_header, leh_padding) == 20, "found %lld\n", - (long long)(int)offsetof(struct link_ea_header, leh_padding)); - LASSERTF((int)sizeof(((struct link_ea_header *)0)->leh_padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_header *)0)->leh_padding)); - BUILD_BUG_ON(LINK_EA_MAGIC != 0x11EAF1DFUL); - - /* Checks for struct link_ea_entry */ - LASSERTF((int)sizeof(struct link_ea_entry) == 18, "found %lld\n", - (long long)(int)sizeof(struct link_ea_entry)); - LASSERTF((int)offsetof(struct link_ea_entry, lee_reclen) == 0, "found %lld\n", - (long long)(int)offsetof(struct link_ea_entry, lee_reclen)); - LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_reclen) == 2, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_reclen)); - LASSERTF((int)offsetof(struct link_ea_entry, lee_parent_fid) == 2, "found %lld\n", - (long long)(int)offsetof(struct link_ea_entry, lee_parent_fid)); - LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_parent_fid)); - LASSERTF((int)offsetof(struct link_ea_entry, lee_name) == 18, "found %lld\n", - (long long)(int)offsetof(struct link_ea_entry, lee_name)); - LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, "found %lld\n", - (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name)); - - /* Checks for struct layout_intent */ - LASSERTF((int)sizeof(struct layout_intent) == 24, "found %lld\n", - (long long)(int)sizeof(struct layout_intent)); - LASSERTF((int)offsetof(struct layout_intent, li_opc) == 0, "found %lld\n", - (long long)(int)offsetof(struct layout_intent, li_opc)); - LASSERTF((int)sizeof(((struct layout_intent *)0)->li_opc) == 4, "found %lld\n", - (long long)(int)sizeof(((struct layout_intent *)0)->li_opc)); - LASSERTF((int)offsetof(struct layout_intent, li_flags) == 4, "found %lld\n", - (long long)(int)offsetof(struct layout_intent, li_flags)); - LASSERTF((int)sizeof(((struct layout_intent *)0)->li_flags) == 4, "found %lld\n", - (long long)(int)sizeof(((struct layout_intent *)0)->li_flags)); - LASSERTF((int)offsetof(struct layout_intent, li_start) == 8, "found %lld\n", - (long long)(int)offsetof(struct layout_intent, li_start)); - LASSERTF((int)sizeof(((struct layout_intent *)0)->li_start) == 8, "found %lld\n", - (long long)(int)sizeof(((struct layout_intent *)0)->li_start)); - LASSERTF((int)offsetof(struct layout_intent, li_end) == 16, "found %lld\n", - (long long)(int)offsetof(struct layout_intent, li_end)); - LASSERTF((int)sizeof(((struct layout_intent *)0)->li_end) == 8, "found %lld\n", - (long long)(int)sizeof(((struct layout_intent *)0)->li_end)); - LASSERTF(LAYOUT_INTENT_ACCESS == 0, "found %lld\n", - (long long)LAYOUT_INTENT_ACCESS); - LASSERTF(LAYOUT_INTENT_READ == 1, "found %lld\n", - (long long)LAYOUT_INTENT_READ); - LASSERTF(LAYOUT_INTENT_WRITE == 2, "found %lld\n", - (long long)LAYOUT_INTENT_WRITE); - LASSERTF(LAYOUT_INTENT_GLIMPSE == 3, "found %lld\n", - (long long)LAYOUT_INTENT_GLIMPSE); - LASSERTF(LAYOUT_INTENT_TRUNC == 4, "found %lld\n", - (long long)LAYOUT_INTENT_TRUNC); - LASSERTF(LAYOUT_INTENT_RELEASE == 5, "found %lld\n", - (long long)LAYOUT_INTENT_RELEASE); - LASSERTF(LAYOUT_INTENT_RESTORE == 6, "found %lld\n", - (long long)LAYOUT_INTENT_RESTORE); - - /* Checks for struct hsm_action_item */ - LASSERTF((int)sizeof(struct hsm_action_item) == 72, "found %lld\n", - (long long)(int)sizeof(struct hsm_action_item)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_len) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_len)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_len)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_action) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_action)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_action) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_action)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_fid) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_fid)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_fid)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_dfid) == 24, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_dfid)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_dfid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_dfid)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_extent) == 40, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_extent)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_extent) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_extent)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_cookie) == 56, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_cookie)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_cookie) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_cookie)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_gid) == 64, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_gid)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_gid) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_gid)); - LASSERTF((int)offsetof(struct hsm_action_item, hai_data) == 72, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_item, hai_data)); - LASSERTF((int)sizeof(((struct hsm_action_item *)0)->hai_data) == 0, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_item *)0)->hai_data)); - - /* Checks for struct hsm_action_list */ - LASSERTF((int)sizeof(struct hsm_action_list) == 32, "found %lld\n", - (long long)(int)sizeof(struct hsm_action_list)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_version) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_version)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_version) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_version)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_count) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_count)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_count) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_count)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_compound_id) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_compound_id)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_compound_id) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_compound_id)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_flags) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_flags)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_flags) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_flags)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_archive_id) == 24, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_archive_id)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_archive_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_archive_id)); - LASSERTF((int)offsetof(struct hsm_action_list, padding1) == 28, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, padding1)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->padding1)); - LASSERTF((int)offsetof(struct hsm_action_list, hal_fsname) == 32, "found %lld\n", - (long long)(int)offsetof(struct hsm_action_list, hal_fsname)); - LASSERTF((int)sizeof(((struct hsm_action_list *)0)->hal_fsname) == 0, "found %lld\n", - (long long)(int)sizeof(((struct hsm_action_list *)0)->hal_fsname)); - - /* Checks for struct hsm_progress */ - LASSERTF((int)sizeof(struct hsm_progress) == 48, "found %lld\n", - (long long)(int)sizeof(struct hsm_progress)); - LASSERTF((int)offsetof(struct hsm_progress, hp_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, hp_fid)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->hp_fid)); - LASSERTF((int)offsetof(struct hsm_progress, hp_cookie) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, hp_cookie)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_cookie) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->hp_cookie)); - LASSERTF((int)offsetof(struct hsm_progress, hp_extent) == 24, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, hp_extent)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_extent) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->hp_extent)); - LASSERTF((int)offsetof(struct hsm_progress, hp_flags) == 40, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, hp_flags)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_flags) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->hp_flags)); - LASSERTF((int)offsetof(struct hsm_progress, hp_errval) == 42, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, hp_errval)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->hp_errval) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->hp_errval)); - LASSERTF((int)offsetof(struct hsm_progress, padding) == 44, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress, padding)); - LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress *)0)->padding)); - LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n", - HP_FLAG_COMPLETED); - LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n", - HP_FLAG_RETRY); - - LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_copy, hc_data_version)); - LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_data_version) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_copy *)0)->hc_data_version)); - LASSERTF((int)offsetof(struct hsm_copy, hc_flags) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_copy, hc_flags)); - LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_flags) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_copy *)0)->hc_flags)); - LASSERTF((int)offsetof(struct hsm_copy, hc_errval) == 10, "found %lld\n", - (long long)(int)offsetof(struct hsm_copy, hc_errval)); - LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_errval) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_copy *)0)->hc_errval)); - LASSERTF((int)offsetof(struct hsm_copy, padding) == 12, "found %lld\n", - (long long)(int)offsetof(struct hsm_copy, padding)); - LASSERTF((int)sizeof(((struct hsm_copy *)0)->padding) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_copy *)0)->padding)); - LASSERTF((int)offsetof(struct hsm_copy, hc_hai) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_copy, hc_hai)); - LASSERTF((int)sizeof(((struct hsm_copy *)0)->hc_hai) == 72, "found %lld\n", - (long long)(int)sizeof(((struct hsm_copy *)0)->hc_hai)); - - /* Checks for struct hsm_progress_kernel */ - LASSERTF((int)sizeof(struct hsm_progress_kernel) == 64, "found %lld\n", - (long long)(int)sizeof(struct hsm_progress_kernel)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_fid)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_fid)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_cookie) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_cookie)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_cookie)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_extent) == 24, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_extent)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_extent)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_flags) == 40, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_flags)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_flags)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_errval) == 42, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_errval)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval) == 2, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_errval)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding1) == 44, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding1)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding1)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_data_version) == 48, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_data_version)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_data_version)); - LASSERTF((int)offsetof(struct hsm_progress_kernel, hpk_padding2) == 56, "found %lld\n", - (long long)(int)offsetof(struct hsm_progress_kernel, hpk_padding2)); - LASSERTF((int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_progress_kernel *)0)->hpk_padding2)); - - /* Checks for struct hsm_user_item */ - LASSERTF((int)sizeof(struct hsm_user_item) == 32, "found %lld\n", - (long long)(int)sizeof(struct hsm_user_item)); - LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_item, hui_fid)); - LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid)); - LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_item, hui_extent)); - LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent)); - - /* Checks for struct hsm_user_state */ - LASSERTF((int)sizeof(struct hsm_user_state) == 32, "found %lld\n", - (long long)(int)sizeof(struct hsm_user_state)); - LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_state, hus_states)); - LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states)); - LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_id) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_state, hus_archive_id)); - LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_id)); - LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state)); - LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state)); - LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action)); - LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action)); - LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location)); - LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location)); - - /* Checks for struct hsm_state_set */ - LASSERTF((int)sizeof(struct hsm_state_set) == 24, "found %lld\n", - (long long)(int)sizeof(struct hsm_state_set)); - LASSERTF((int)offsetof(struct hsm_state_set, hss_valid) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_state_set, hss_valid)); - LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_valid) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_valid)); - LASSERTF((int)offsetof(struct hsm_state_set, hss_archive_id) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_state_set, hss_archive_id)); - LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_archive_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_archive_id)); - LASSERTF((int)offsetof(struct hsm_state_set, hss_setmask) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_state_set, hss_setmask)); - LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_setmask) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_setmask)); - LASSERTF((int)offsetof(struct hsm_state_set, hss_clearmask) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_state_set, hss_clearmask)); - LASSERTF((int)sizeof(((struct hsm_state_set *)0)->hss_clearmask) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_state_set *)0)->hss_clearmask)); - - /* Checks for struct hsm_current_action */ - LASSERTF((int)sizeof(struct hsm_current_action) == 24, "found %lld\n", - (long long)(int)sizeof(struct hsm_current_action)); - LASSERTF((int)offsetof(struct hsm_current_action, hca_state) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_current_action, hca_state)); - LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_state) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_state)); - LASSERTF((int)offsetof(struct hsm_current_action, hca_action) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_current_action, hca_action)); - LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_action) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_action)); - LASSERTF((int)offsetof(struct hsm_current_action, hca_location) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_current_action, hca_location)); - LASSERTF((int)sizeof(((struct hsm_current_action *)0)->hca_location) == 16, "found %lld\n", - (long long)(int)sizeof(((struct hsm_current_action *)0)->hca_location)); - - /* Checks for struct hsm_request */ - LASSERTF((int)sizeof(struct hsm_request) == 24, "found %lld\n", - (long long)(int)sizeof(struct hsm_request)); - LASSERTF((int)offsetof(struct hsm_request, hr_action) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_request, hr_action)); - LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_action) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_request *)0)->hr_action)); - LASSERTF((int)offsetof(struct hsm_request, hr_archive_id) == 4, "found %lld\n", - (long long)(int)offsetof(struct hsm_request, hr_archive_id)); - LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_archive_id) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_request *)0)->hr_archive_id)); - LASSERTF((int)offsetof(struct hsm_request, hr_flags) == 8, "found %lld\n", - (long long)(int)offsetof(struct hsm_request, hr_flags)); - LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_flags) == 8, "found %lld\n", - (long long)(int)sizeof(((struct hsm_request *)0)->hr_flags)); - LASSERTF((int)offsetof(struct hsm_request, hr_itemcount) == 16, "found %lld\n", - (long long)(int)offsetof(struct hsm_request, hr_itemcount)); - LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_itemcount) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_request *)0)->hr_itemcount)); - LASSERTF((int)offsetof(struct hsm_request, hr_data_len) == 20, "found %lld\n", - (long long)(int)offsetof(struct hsm_request, hr_data_len)); - LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n", - (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len)); - LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n", - (unsigned int)HSM_FORCE_ACTION); - LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n", - (unsigned int)HSM_GHOST_COPY); - - /* Checks for struct hsm_user_request */ - LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n", - (long long)(int)sizeof(struct hsm_user_request)); - LASSERTF((int)offsetof(struct hsm_user_request, hur_request) == 0, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_request, hur_request)); - LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_request) == 24, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_request)); - LASSERTF((int)offsetof(struct hsm_user_request, hur_user_item) == 24, "found %lld\n", - (long long)(int)offsetof(struct hsm_user_request, hur_user_item)); - LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_user_item) == 0, "found %lld\n", - (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_user_item)); - - /* Checks for struct hsm_user_import */ - LASSERTF(sizeof(struct hsm_user_import) == 48, "found %lld\n", - (long long)sizeof(struct hsm_user_import)); - LASSERTF(offsetof(struct hsm_user_import, hui_size) == 0, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_size)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_size) == 8, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_size)); - LASSERTF(offsetof(struct hsm_user_import, hui_uid) == 32, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_uid)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_uid) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_uid)); - LASSERTF(offsetof(struct hsm_user_import, hui_gid) == 36, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_gid)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_gid) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_gid)); - LASSERTF(offsetof(struct hsm_user_import, hui_mode) == 40, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_mode)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mode) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_mode)); - LASSERTF(offsetof(struct hsm_user_import, hui_atime) == 8, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_atime)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime) == 8, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_atime)); - LASSERTF(offsetof(struct hsm_user_import, hui_atime_ns) == 24, - "found %lld\n", - (long long)(int)offsetof(struct hsm_user_import, hui_atime_ns)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_atime_ns) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_atime_ns)); - LASSERTF(offsetof(struct hsm_user_import, hui_mtime) == 16, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_mtime)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime) == 8, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime)); - LASSERTF(offsetof(struct hsm_user_import, hui_mtime_ns) == 28, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_mtime_ns)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_mtime_ns) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_mtime_ns)); - LASSERTF(offsetof(struct hsm_user_import, hui_archive_id) == 44, - "found %lld\n", - (long long)offsetof(struct hsm_user_import, hui_archive_id)); - LASSERTF(sizeof(((struct hsm_user_import *)0)->hui_archive_id) == 4, - "found %lld\n", - (long long)sizeof(((struct hsm_user_import *)0)->hui_archive_id)); -} diff --git a/drivers/staging/lustre/sysfs-fs-lustre b/drivers/staging/lustre/sysfs-fs-lustre deleted file mode 100644 index 8691c6543a9c..000000000000 --- a/drivers/staging/lustre/sysfs-fs-lustre +++ /dev/null @@ -1,654 +0,0 @@ -What: /sys/fs/lustre/version -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows current running lustre version. - -What: /sys/fs/lustre/pinger -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows if the lustre module has pinger support. - "on" means yes and "off" means no. - -What: /sys/fs/lustre/health_check -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows whenever current system state believed to be "healthy", - "NOT HEALTHY", or "LBUG" whenever lustre has experienced - an internal assertion failure - -What: /sys/fs/lustre/jobid_name -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Currently running job "name" for this node to be transferred - to Lustre servers for purposes of QoS and statistics gathering. - Writing into this file will change the name, reading outputs - currently set value. - -What: /sys/fs/lustre/jobid_var -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Control file for lustre "jobstats" functionality, write new - value from the list below to change the mode: - disable - disable job name reporting to the servers (default) - procname_uid - form the job name as the current running - command name and pid with a dot in between - e.g. dd.1253 - nodelocal - use jobid_name value from above. - -What: /sys/fs/lustre/timeout -Date: June 2015 -Contact: "Oleg Drokin" -Description: - Controls "lustre timeout" variable, also known as obd_timeout - in some old manual. In the past obd_timeout was of paramount - importance as the timeout value used everywhere and where - other timeouts were derived from. These days it's much less - important as network timeouts are mostly determined by - AT (adaptive timeouts). - Unit: seconds, default: 100 - -What: /sys/fs/lustre/max_dirty_mb -Date: June 2015 -Contact: "Oleg Drokin" -Description: - Controls total number of dirty cache (in megabytes) allowed - across all mounted lustre filesystems. - Since writeout of dirty pages in Lustre is somewhat expensive, - when you allow to many dirty pages, this might lead to - performance degradations as kernel tries to desperately - find some pages to free/writeout. - Default 1/2 RAM. Min value 4, max value 9/10 of RAM. - -What: /sys/fs/lustre/debug_peer_on_timeout -Date: June 2015 -Contact: "Oleg Drokin" -Description: - Control if lnet debug information should be printed when - an RPC timeout occurs. - 0 disabled (default) - 1 enabled - -What: /sys/fs/lustre/dump_on_timeout -Date: June 2015 -Contact: "Oleg Drokin" -Description: - Controls if Lustre debug log should be dumped when an RPC - timeout occurs. This is useful if yout debug buffer typically - rolls over by the time you notice RPC timeouts. - -What: /sys/fs/lustre/dump_on_eviction -Date: June 2015 -Contact: "Oleg Drokin" -Description: - Controls if Lustre debug log should be dumped when an this - client is evicted from one of the servers. - This is useful if yout debug buffer typically rolls over - by the time you notice the eviction event. - -What: /sys/fs/lustre/at_min -Date: July 2015 -Contact: "Oleg Drokin" -Description: - Controls minimum adaptive timeout in seconds. If you encounter - a case where clients timeout due to server-reported processing - time being too short, you might consider increasing this value. - One common case of this if the underlying network has - unpredictable long delays. - Default: 0 - -What: /sys/fs/lustre/at_max -Date: July 2015 -Contact: "Oleg Drokin" -Description: - Controls maximum adaptive timeout in seconds. If at_max timeout - is reached for an RPC, the RPC will time out. - Some genuinuely slow network hardware might warrant increasing - this value. - Setting this value to 0 disables Adaptive Timeouts - functionality and old-style obd_timeout value is then used. - Default: 600 - -What: /sys/fs/lustre/at_extra -Date: July 2015 -Contact: "Oleg Drokin" -Description: - Controls how much extra time to request for unfinished requests - in processing in seconds. Normally a server-side parameter, it - is also used on the client for responses to various LDLM ASTs - that are handled with a special server thread on the client. - This is a way for the servers to ask the clients not to time - out the request that reached current servicing time estimate - yet and give it some more time. - Default: 30 - -What: /sys/fs/lustre/at_early_margin -Date: July 2015 -Contact: "Oleg Drokin" -Description: - Controls when to send the early reply for requests that are - about to timeout as an offset to the estimated service time in - seconds.. - Default: 5 - -What: /sys/fs/lustre/at_history -Date: July 2015 -Contact: "Oleg Drokin" -Description: - Controls for how many seconds to remember slowest events - encountered by adaptive timeouts code. - Default: 600 - -What: /sys/fs/lustre/llite/-/blocksize -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Biggest blocksize on object storage server for this filesystem. - -What: /sys/fs/lustre/llite/-/kbytestotal -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows total number of kilobytes of space on this filesystem - -What: /sys/fs/lustre/llite/-/kbytesfree -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows total number of free kilobytes of space on this filesystem - -What: /sys/fs/lustre/llite/-/kbytesavail -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows total number of free kilobytes of space on this filesystem - actually available for use (taking into account per-client - grants and filesystem reservations). - -What: /sys/fs/lustre/llite/-/filestotal -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows total number of inodes on the filesystem. - -What: /sys/fs/lustre/llite/-/filesfree -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows estimated number of free inodes on the filesystem - -What: /sys/fs/lustre/llite/-/client_type -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows whenever this filesystem considers this client to be - compute cluster-local or remote. Remote clients have - additional uid/gid convrting logic applied. - -What: /sys/fs/lustre/llite/-/fstype -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows filesystem type of the filesystem - -What: /sys/fs/lustre/llite/-/uuid -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows this filesystem superblock uuid - -What: /sys/fs/lustre/llite/-/max_read_ahead_mb -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Sets maximum number of megabytes in system memory to be - given to read-ahead cache. - -What: /sys/fs/lustre/llite/-/max_read_ahead_per_file_mb -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Sets maximum number of megabytes to read-ahead for a single file - -What: /sys/fs/lustre/llite/-/max_read_ahead_whole_mb -Date: May 2015 -Contact: "Oleg Drokin" -Description: - For small reads, how many megabytes to actually request from - the server as initial read-ahead. - -What: /sys/fs/lustre/llite/-/checksum_pages -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Enables or disables per-page checksum at llite layer, before - the pages are actually given to lower level for network transfer - -What: /sys/fs/lustre/llite/-/stats_track_pid -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Limit Lustre vfs operations gathering to just a single pid. - 0 to track everything. - -What: /sys/fs/lustre/llite/-/stats_track_ppid -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Limit Lustre vfs operations gathering to just a single ppid. - 0 to track everything. - -What: /sys/fs/lustre/llite/-/stats_track_gid -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Limit Lustre vfs operations gathering to just a single gid. - 0 to track everything. - -What: /sys/fs/lustre/llite/-/statahead_max -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls maximum number of statahead requests to send when - sequential readdir+stat pattern is detected. - -What: /sys/fs/lustre/llite/-/statahead_agl -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls if AGL (async glimpse ahead - obtain object information - from OSTs in parallel with MDS during statahead) should be - enabled or disabled. - 0 to disable, 1 to enable. - -What: /sys/fs/lustre/llite/-/lazystatfs -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls statfs(2) behaviour in the face of down servers. - If 0, always wait for all servers to come online, - if 1, ignote inactive servers. - -What: /sys/fs/lustre/llite/-/max_easize -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows maximum number of bytes file striping data could be - in current configuration of storage. - -What: /sys/fs/lustre/llite/-/default_easize -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows maximum observed file striping data seen by this - filesystem client instance. - -What: /sys/fs/lustre/llite/-/xattr_cache -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls extended attributes client-side cache. - 1 to enable, 0 to disable. - -What: /sys/fs/lustre/llite/-/unstable_stats -Date: Apr 2016 -Contact: "Oleg Drokin" -Description: - Shows number of pages that were sent and acknowledged by - server but were not yet committed and therefore still - pinned in client memory even though no longer dirty. - -What: /sys/fs/lustre/ldlm/cancel_unused_locks_before_replay -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls if client should replay unused locks during recovery - If a client tends to have a lot of unused locks in LRU, - recovery times might become prolonged. - 1 - just locally cancel unused locks (default) - 0 - replay unused locks. - -What: /sys/fs/lustre/ldlm/namespaces//resource_count -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Displays number of lock resources (objects on which individual - locks are taken) currently allocated in this namespace. - -What: /sys/fs/lustre/ldlm/namespaces//lock_count -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Displays number or locks allocated in this namespace. - -What: /sys/fs/lustre/ldlm/namespaces//lru_size -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls and displays LRU size limit for unused locks for this - namespace. - 0 - LRU size is unlimited, controlled by server resources - positive number - number of locks to allow in lock LRU list - -What: /sys/fs/lustre/ldlm/namespaces//lock_unused_count -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Display number of locks currently sitting in the LRU list - of this namespace - -What: /sys/fs/lustre/ldlm/namespaces//lru_max_age -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Maximum number of milliseconds a lock could sit in LRU list - before client would voluntarily cancel it as unused. - -What: /sys/fs/lustre/ldlm/namespaces//early_lock_cancel -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls "early lock cancellation" feature on this namespace - if supported by the server. - When enabled, tries to preemtively cancel locks that would be - cancelled by verious operations and bundle the cancellation - requests in the same RPC as the main operation, which results - in significant speedups due to reduced lock-pingpong RPCs. - 0 - disabled - 1 - enabled (default) - -What: /sys/fs/lustre/ldlm/namespaces//pool/granted -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Displays number of granted locks in this namespace - -What: /sys/fs/lustre/ldlm/namespaces//pool/grant_rate -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of granted locks in this namespace during last - time interval - -What: /sys/fs/lustre/ldlm/namespaces//pool/cancel_rate -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of lock cancellations in this namespace during - last time interval - -What: /sys/fs/lustre/ldlm/namespaces//pool/grant_speed -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Calculated speed of lock granting (grant_rate - cancel_rate) - in this namespace - -What: /sys/fs/lustre/ldlm/namespaces//pool/grant_plan -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Estimated number of locks to be granted in the next time - interval in this namespace - -What: /sys/fs/lustre/ldlm/namespaces//pool/limit -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls number of allowed locks in this pool. - When lru_size is 0, this is the actual limit then. - -What: /sys/fs/lustre/ldlm/namespaces//pool/lock_volume_factor -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Multiplier for all lock volume calculations above. - Default is 1. Increase to make the client to more agressively - clean it's lock LRU list for this namespace. - -What: /sys/fs/lustre/ldlm/namespaces//pool/server_lock_volume -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Calculated server lock volume. - -What: /sys/fs/lustre/ldlm/namespaces//pool/recalc_period -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls length of time between recalculation of above - values (in seconds). - -What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_min -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls minimum number of ldlm callback threads to start. - -What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_max -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls maximum number of ldlm callback threads to start. - -What: /sys/fs/lustre/ldlm/services/ldlm_cbd/threads_started -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows actual number of ldlm callback threads running. - -What: /sys/fs/lustre/ldlm/services/ldlm_cbd/high_priority_ratio -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls what percentage of ldlm callback threads is dedicated - to "high priority" incoming requests. - -What: /sys/fs/lustre/{obdtype}/{connection_name}/blocksize -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Blocksize on backend filesystem for service behind this obd - device (or biggest blocksize for compound devices like lov - and lmv) - -What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytestotal -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Total number of kilobytes of space on backend filesystem - for service behind this obd (or total amount for compound - devices like lov lmv) - -What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytesfree -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of free kilobytes on backend filesystem for service - behind this obd (or total amount for compound devices - like lov lmv) - -What: /sys/fs/lustre/{obdtype}/{connection_name}/kbytesavail -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of kilobytes of free space on backend filesystem - for service behind this obd (or total amount for compound - devices like lov lmv) that is actually available for use - (taking into account per-client and filesystem reservations). - -What: /sys/fs/lustre/{obdtype}/{connection_name}/filestotal -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of inodes on backend filesystem for service behind this - obd. - -What: /sys/fs/lustre/{obdtype}/{connection_name}/filesfree -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of free inodes on backend filesystem for service - behind this obd. - -What: /sys/fs/lustre/mdc/{connection_name}/max_pages_per_rpc -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Maximum number of readdir pages to fit into a single readdir - RPC. - -What: /sys/fs/lustre/{mdc,osc}/{connection_name}/max_rpcs_in_flight -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Maximum number of parallel RPCs on the wire to allow on - this connection. Increasing this number would help on higher - latency links, but has a chance of overloading a server - if you have too many clients like this. - Default: 8 - -What: /sys/fs/lustre/osc/{connection_name}/max_pages_per_rpc -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Maximum number of pages to fit into a single RPC. - Typically bigger RPCs allow for better performance. - Default: however many pages to form 1M of data (256 pages - for 4K page sized platforms) - -What: /sys/fs/lustre/osc/{connection_name}/active -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls accessibility of this connection. If set to 0, - fail all accesses immediately. - -What: /sys/fs/lustre/osc/{connection_name}/checksums -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls whenever to checksum bulk RPC data over the wire - to this target. - 1: enable (default) ; 0: disable - -What: /sys/fs/lustre/osc/{connection_name}/contention_seconds -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls for how long to consider a file contended once - indicated as such by the server. - When a file is considered contended, all operations switch to - synchronous lockless mode to avoid cache and lock pingpong. - -What: /sys/fs/lustre/osc/{connection_name}/cur_dirty_bytes -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Displays how many dirty bytes is presently in the cache for this - target. - -What: /sys/fs/lustre/osc/{connection_name}/cur_grant_bytes -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows how many bytes we have as a "dirty cache" grant from the - server. Writing a value smaller than shown allows to release - some grant back to the server. - Dirty cache grant is a way Lustre ensures that cached successful - writes on client do not end up discarded by the server due to - lack of space later on. - -What: /sys/fs/lustre/osc/{connection_name}/cur_lost_grant_bytes -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Shows how many granted bytes were released to the server due - to lack of write activity on this client. - -What: /sys/fs/lustre/osc/{connection_name}/grant_shrink_interval -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of seconds with no write activity for this target - to start releasing dirty grant back to the server. - -What: /sys/fs/lustre/osc/{connection_name}/destroys_in_flight -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of DESTROY RPCs currently in flight to this target. - -What: /sys/fs/lustre/osc/{connection_name}/lockless_truncate -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls whether lockless truncate RPCs are allowed to this - target. - Lockless truncate causes server to perform the locking which - is beneficial if the truncate is not followed by a write - immediately. - 1: enable ; 0: disable (default) - -What: /sys/fs/lustre/osc/{connection_name}/max_dirty_mb -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls how much dirty data this client can accumulate - for this target. This is orthogonal to dirty grant and is - a hard limit even if the server would allow a bigger dirty - cache. - While allowing higher dirty cache is beneficial for write - performance, flushing write cache takes longer and as such - the node might be more prone to OOMs. - Having this value set too low might result in not being able - to sent too many parallel WRITE RPCs. - Default: 32 - -What: /sys/fs/lustre/osc/{connection_name}/resend_count -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Controls how many times to try and resend RPCs to this target - that failed with "recoverable" status, such as EAGAIN, - ENOMEM. - -What: /sys/fs/lustre/lov/{connection_name}/numobd -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of OSC targets managed by this LOV instance. - -What: /sys/fs/lustre/lov/{connection_name}/activeobd -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of OSC targets managed by this LOV instance that are - actually active. - -What: /sys/fs/lustre/lmv/{connection_name}/numobd -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of MDC targets managed by this LMV instance. - -What: /sys/fs/lustre/lmv/{connection_name}/activeobd -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Number of MDC targets managed by this LMV instance that are - actually active. - -What: /sys/fs/lustre/lmv/{connection_name}/placement -Date: May 2015 -Contact: "Oleg Drokin" -Description: - Determines policy of inode placement in case of multiple - metadata servers: - CHAR - based on a hash of the file name used at creation time - (Default) - NID - based on a hash of creating client network id. diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index ffe8179f5d41..073fe7537f6c 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -124,7 +124,6 @@ int main(int argc, char *argv[]) fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr jffs2 user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr gfs2 user_u:base_r:base_t;\n"); - fprintf(fout, "fs_use_xattr lustre user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task eventpollfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n"); -- cgit v1.2.3-70-g09d2