From 3af75db12e3d3537bc01c06bbdc876cb17e82b35 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 24 Aug 2017 09:22:57 +0900 Subject: iio: adc: ti-ads1015: fix comparator polarity setting The comparator polarity field in config register is not correctly initialized as per the interrupt trigger setting. Because the bitfield definision is wrong and bit shifting is missed. Fixes: d9f39babd8ba ("iio: adc: ti-ads1015: add threshold event support") Cc: Daniel Baluta Cc: Jonathan Cameron Signed-off-by: Akinobu Mita Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads1015.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c index d1210024f6bc..e0dc20488335 100644 --- a/drivers/iio/adc/ti-ads1015.c +++ b/drivers/iio/adc/ti-ads1015.c @@ -52,7 +52,7 @@ #define ADS1015_CFG_COMP_QUE_MASK GENMASK(1, 0) #define ADS1015_CFG_COMP_LAT_MASK BIT(2) -#define ADS1015_CFG_COMP_POL_MASK BIT(2) +#define ADS1015_CFG_COMP_POL_MASK BIT(3) #define ADS1015_CFG_COMP_MODE_MASK BIT(4) #define ADS1015_CFG_DR_MASK GENMASK(7, 5) #define ADS1015_CFG_MOD_MASK BIT(8) @@ -1017,10 +1017,12 @@ static int ads1015_probe(struct i2c_client *client, switch (irq_trig) { case IRQF_TRIGGER_LOW: - cfg_comp |= ADS1015_CFG_COMP_POL_LOW; + cfg_comp |= ADS1015_CFG_COMP_POL_LOW << + ADS1015_CFG_COMP_POL_SHIFT; break; case IRQF_TRIGGER_HIGH: - cfg_comp |= ADS1015_CFG_COMP_POL_HIGH; + cfg_comp |= ADS1015_CFG_COMP_POL_HIGH << + ADS1015_CFG_COMP_POL_SHIFT; break; default: return -EINVAL; -- cgit v1.2.3-70-g09d2 From c65e3d6ef4bfdc4c8460509f08507cf7dc026974 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 29 Aug 2017 13:45:11 +0200 Subject: iio: magnetometer: st_magn: fix drdy line configuration for LIS3MDL Data-ready line in LIS3MDL is routed to drdy pin and it is not possible to select a different INT pin. st_sensors_set_dataready_irq() assumes that if drdy int address is not exported in register map, irq trigger is not supported by the sensor and hw_irq_trigger is always false. Based on this configuration st_sensors_irq_thread does not consume generated interrupt causing an unhandled irq. Fix this taking into account status register address in st_sensors_set_dataready_irq() Fixes: 90efe0556292 (iio: st_sensors: harden interrupt handling) Signed-off-by: Lorenzo Bianconi Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/st_sensors/st_sensors_core.c | 11 ++++++++++- drivers/iio/magnetometer/st_magn_core.c | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index d99bb1460fe2..02e833b14db0 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -463,8 +463,17 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable) u8 drdy_mask; struct st_sensor_data *sdata = iio_priv(indio_dev); - if (!sdata->sensor_settings->drdy_irq.addr) + if (!sdata->sensor_settings->drdy_irq.addr) { + /* + * there are some devices (e.g. LIS3MDL) where drdy line is + * routed to a given pin and it is not possible to select a + * different one. Take into account irq status register + * to understand if irq trigger can be properly supported + */ + if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) + sdata->hw_irq_trigger = enable; return 0; + } /* Enable/Disable the interrupt generator 1. */ if (sdata->sensor_settings->drdy_irq.ig1.en_addr > 0) { diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 703e77008652..2e36d746f5bc 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -315,6 +315,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { }, }, }, + .drdy_irq = { + /* drdy line is routed drdy pin */ + .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, + }, .multi_read_bit = true, .bootime = 2, }, -- cgit v1.2.3-70-g09d2 From 50662499f9112ecced68d064846a2f1fd9640b66 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:18 +0200 Subject: ARM64: dts: meson-gx: Use correct mmc clock source 0 Now that the clock source 0 is properly described in the CCF, use it instead of assuming the default value (xtal) Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 6 +++--- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 52f1687e7a09..8f0c0cb02157 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -661,21 +661,21 @@ &sd_emmc_a { clocks = <&clkc CLKID_SD_EMMC_A>, - <&xtal>, + <&clkc CLKID_SD_EMMC_A_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; &sd_emmc_b { clocks = <&clkc CLKID_SD_EMMC_B>, - <&xtal>, + <&clkc CLKID_SD_EMMC_B_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; &sd_emmc_c { clocks = <&clkc CLKID_SD_EMMC_C>, - <&xtal>, + <&clkc CLKID_SD_EMMC_C_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index d6876e64979e..829d84db5fc5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -603,21 +603,21 @@ &sd_emmc_a { clocks = <&clkc CLKID_SD_EMMC_A>, - <&xtal>, + <&clkc CLKID_SD_EMMC_A_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; &sd_emmc_b { clocks = <&clkc CLKID_SD_EMMC_B>, - <&xtal>, + <&clkc CLKID_SD_EMMC_B_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; &sd_emmc_c { clocks = <&clkc CLKID_SD_EMMC_C>, - <&xtal>, + <&clkc CLKID_SD_EMMC_C_CLK0>, <&clkc CLKID_FCLK_DIV2>; clock-names = "core", "clkin0", "clkin1"; }; -- cgit v1.2.3-70-g09d2 From 673ccaaccf32a044d961c3bac3dd63452bdfa86c Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:19 +0200 Subject: ARM64: dts: meson: remove cap-sd-highspeed from emmc nodes It does not make much sense to define cap-sd-highspeed in the emmc nodes Just remove it. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi | 1 - arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi | 1 - arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi | 1 - arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi | 1 - arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts | 1 - arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts | 1 - 11 files changed, 11 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index c89010e56488..d4f9c5b550c7 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -215,7 +215,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 9697a7a79464..7dae6acd3c8c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -297,7 +297,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; max-frequency = <200000000>; non-removable; disable-wp; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts index 9c59c3c6d1b6..a690956d6c75 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts @@ -274,7 +274,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index d147c853ab05..a12303becab4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -274,7 +274,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; max-frequency = <200000000>; non-removable; disable-wp; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index 81ffc689a5bf..d77e19591ee3 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -241,7 +241,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 346753fb6324..0262ef8d48e4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -201,7 +201,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts index 2a5804ce7f4b..f779a985f923 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts @@ -144,7 +144,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <100000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 4c2ac7650fcd..21274a6c1b9b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -231,7 +231,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index f3eea8e89d12..8899121f79e1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -137,7 +137,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index 9b10c5f4f8c0..ff8a9f780485 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -196,7 +196,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts index 08f1dd69b679..470f72bb863c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts @@ -220,7 +220,6 @@ pinctrl-names = "default"; bus-width = <8>; - cap-sd-highspeed; cap-mmc-highspeed; max-frequency = <200000000>; non-removable; -- cgit v1.2.3-70-g09d2 From 67e7607fcdf1fad10e9f183424e709c59713e45d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:20 +0200 Subject: ARM64: dts: meson: add mmc clk gate pins Add the pinctrl to switch mmc clk pins in gpio (pulled down) mode. This is necessary to be able to gate the clk outside of the SoC while keeping it running in the controller Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- .../arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi | 9 ++++-- .../boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 9 ++++-- .../boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts | 11 +++++--- .../arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts | 8 ++++-- arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi | 9 ++++-- .../boot/dts/amlogic/meson-gxbb-vega-s95.dtsi | 9 ++++-- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 33 ++++++++++++++++++++++ .../dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts | 6 ++-- .../dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 6 ++-- .../dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts | 9 ++++-- .../boot/dts/amlogic/meson-gxl-s905x-p212.dtsi | 9 ++++-- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 33 ++++++++++++++++++++++ .../arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts | 6 ++-- 13 files changed, 126 insertions(+), 31 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi index d4f9c5b550c7..4157987f4a3d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx-p23x-q20x.dtsi @@ -168,7 +168,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -194,7 +195,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -212,7 +214,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 7dae6acd3c8c..60d5f2da6916 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -250,7 +250,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>, <&sdio_irq_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -276,7 +277,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -294,7 +296,8 @@ &sd_emmc_c { status = "disabled"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; max-frequency = <200000000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts index a690956d6c75..38dfdde5c147 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nexbox-a95x.dts @@ -51,7 +51,7 @@ / { compatible = "nexbox,a95x", "amlogic,meson-gxbb"; model = "NEXBOX A95X"; - + aliases { serial0 = &uart_AO; }; @@ -232,7 +232,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -253,7 +254,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -271,7 +273,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index a12303becab4..1ffa1c238a72 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -50,7 +50,7 @@ / { compatible = "hardkernel,odroid-c2", "amlogic,meson-gxbb"; model = "Hardkernel ODROID-C2"; - + aliases { serial0 = &uart_AO; }; @@ -253,7 +253,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -271,7 +272,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; max-frequency = <200000000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index d77e19591ee3..704b214e8894 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -194,7 +194,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -220,7 +221,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -238,7 +240,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 0262ef8d48e4..f2bc6dea1fc6 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -155,7 +155,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins &sdio_irq_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -181,7 +182,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -198,7 +200,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 8f0c0cb02157..af834cdbba79 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -392,6 +392,17 @@ }; }; + emmc_clk_gate_pins: emmc_clk_gate { + mux { + groups = "BOOT_8"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "BOOT_8"; + bias-pull-down; + }; + }; + nor_pins: nor { mux { groups = "nor_d", @@ -430,6 +441,17 @@ }; }; + sdcard_clk_gate_pins: sdcard_clk_gate { + mux { + groups = "CARD_2"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "CARD_2"; + bias-pull-down; + }; + }; + sdio_pins: sdio { mux { groups = "sdio_d0", @@ -442,6 +464,17 @@ }; }; + sdio_clk_gate_pins: sdio_clk_gate { + mux { + groups = "GPIOX_4"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "GPIOX_4"; + bias-pull-down; + }; + }; + sdio_irq_pins: sdio_irq { mux { groups = "sdio_irq"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts index f779a985f923..977b4240f3c1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-hwacom-amazetv.dts @@ -123,7 +123,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -141,7 +142,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 69ca14ac10fa..a014c052241e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -197,7 +197,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -215,7 +216,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts index 21274a6c1b9b..1b8f32867aa1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts @@ -189,7 +189,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -210,7 +211,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -228,7 +230,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index 8899121f79e1..129af9068814 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -95,7 +95,8 @@ &sd_emmc_a { status = "okay"; pinctrl-0 = <&sdio_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdio_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; #address-cells = <1>; #size-cells = <0>; @@ -116,7 +117,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -134,7 +136,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 829d84db5fc5..d8dd3298b15c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -281,6 +281,17 @@ }; }; + emmc_clk_gate_pins: emmc_clk_gate { + mux { + groups = "BOOT_8"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "BOOT_8"; + bias-pull-down; + }; + }; + nor_pins: nor { mux { groups = "nor_d", @@ -319,6 +330,17 @@ }; }; + sdcard_clk_gate_pins: sdcard_clk_gate { + mux { + groups = "CARD_2"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "CARD_2"; + bias-pull-down; + }; + }; + sdio_pins: sdio { mux { groups = "sdio_d0", @@ -331,6 +353,17 @@ }; }; + sdio_clk_gate_pins: sdio_clk_gate { + mux { + groups = "GPIOX_4"; + function = "gpio_periphs"; + }; + cfg-pull-down { + pins = "GPIOX_4"; + bias-pull-down; + }; + }; + sdio_irq_pins: sdio_irq { mux { groups = "sdio_irq"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index ff8a9f780485..22c697732f66 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -175,7 +175,8 @@ &sd_emmc_b { status = "okay"; pinctrl-0 = <&sdcard_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&sdcard_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <4>; cap-sd-highspeed; @@ -193,7 +194,8 @@ &sd_emmc_c { status = "okay"; pinctrl-0 = <&emmc_pins>; - pinctrl-names = "default"; + pinctrl-1 = <&emmc_clk_gate_pins>; + pinctrl-names = "default", "clk-gate"; bus-width = <8>; cap-mmc-highspeed; -- cgit v1.2.3-70-g09d2 From 42776561a1def5d96699574efd7c9cbbd2e0fbc4 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:21 +0200 Subject: ARM64: dts: meson-gxbb: nanopi-k2: add card regulator settle times Changing the card voltage on the nanopi-k2 is not instantaneous, especially when switching from 3.3v to 1.8v. It take at least 3ms for the regulator to go from 3.3v to 1.8v. Add margin to that to make sure we don't upset the sdcard during the voltage switch Fixes: 9bc7ffb08daf ("arm64: dts: amlogic: Add NanoPi K2") Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 60d5f2da6916..acb6797756e5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -107,6 +107,9 @@ states = <3300000 0>, <1800000 1>; + + regulator-settling-time-up-us = <100>; + regulator-settling-time-down-us = <5000>; }; wifi_32k: wifi-32k { -- cgit v1.2.3-70-g09d2 From 8a5085c420d272af04552b2d2213471247fa86f2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:22 +0200 Subject: ARM64: dts: meson-gxl: libretech-cc: add card regulator settle times Changing the card voltage on the cc is not instantaneous, especially when switching from 3.3v to 1.8v. It take at least 30ms for the regulator to go from 3.3v to 1.8v. Add margin to that to make sure we don't upset the sdcard during the voltage switch Fixes: 61ff2af9b278 ("ARM64: dts: fixup libretech cc definition") Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index a014c052241e..7d252168c2fa 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -91,6 +91,9 @@ states = <3300000 0>, <1800000 1>; + + regulator-settling-time-up-us = <200>; + regulator-settling-time-down-us = <50000>; }; vddio_boot: regulator-vddio_boot { -- cgit v1.2.3-70-g09d2 From 3cde63ebc85cea63806d86a690d04457c0347703 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:23 +0200 Subject: ARM64: dts: meson-gxl: libretech-cc: enable high speed modes Enable sdcard UHS modes up to SDR50. Unfortunately, it seems the PCB of the libretech-cc cannot handle SDR104 at 200Mhz reliably. Also enable eMMC DDR52 mode. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 7d252168c2fa..64c54c92e214 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -205,6 +205,9 @@ bus-width = <4>; cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; max-frequency = <100000000>; disable-wp; @@ -224,6 +227,7 @@ bus-width = <8>; cap-mmc-highspeed; + mmc-ddr-3_3v; max-frequency = <50000000>; non-removable; disable-wp; -- cgit v1.2.3-70-g09d2 From 0f553358241a3346b7eef133d631e5bc2f067a15 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:24 +0200 Subject: ARM64: dts: meson-gxbb: p20x: enable sdcard UHS modes Enable sdcard UHS modes, up to SDR50, on p20x based boards. While the s905 supports SDR104 mode, it appears that the PCB of p20x based boards can't cope with a rate as high as 200Mhz. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi index 704b214e8894..23c08c3afd0a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p20x.dtsi @@ -226,6 +226,9 @@ bus-width = <4>; cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; max-frequency = <100000000>; disable-wp; -- cgit v1.2.3-70-g09d2 From c1429e20a5a9f578e0e3ddb551c8ea94e8d3ddb3 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:25 +0200 Subject: ARM64: dts: meson-gxbb: nanopi-k2: enable sdcard UHS modes Enable UHS modes, up to SDR50, on the nanopi-k2 SBC. Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index acb6797756e5..4c1320a93fef 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -285,6 +285,9 @@ bus-width = <4>; cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; max-frequency = <100000000>; disable-wp; -- cgit v1.2.3-70-g09d2 From 485a308f05d843034b6e82f688704c44888aecde Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 31 Aug 2017 15:52:26 +0200 Subject: ARM64: dts: meson-gxbb: nanopi-k2: enable sdr104 mode SDR104 seems to be OK on the nanopi-k2 SBC so enable it Signed-off-by: Jerome Brunet Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 4c1320a93fef..4b17a76959b2 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -288,7 +288,8 @@ sd-uhs-sdr12; sd-uhs-sdr25; sd-uhs-sdr50; - max-frequency = <100000000>; + sd-uhs-sdr104; + max-frequency = <200000000>; disable-wp; cd-gpios = <&gpio CARD_6 GPIO_ACTIVE_HIGH>; -- cgit v1.2.3-70-g09d2 From ce06760ba46b66dae50f2519ae76bd15e89b5710 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Thu, 31 Aug 2017 15:50:03 -0700 Subject: HID: wacom: bits shifted too much for 9th and 10th buttons Cintiq 12 has 10 expresskey buttons. The bit shift for the last two buttons were off by 5. Fixes: c7f0522 ("HID: wacom: Slim down wacom_intuos_pad processing") Signed-off-by: Ping Cheng Tested-by: Matthieu Robin Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index bb17d7bbefd3..f8ddcaaa2ac6 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -567,8 +567,8 @@ static int wacom_intuos_pad(struct wacom_wac *wacom) keys = data[9] & 0x07; } } else { - buttons = ((data[6] & 0x10) << 10) | - ((data[5] & 0x10) << 9) | + buttons = ((data[6] & 0x10) << 5) | + ((data[5] & 0x10) << 4) | ((data[6] & 0x0F) << 4) | (data[5] & 0x0F); } -- cgit v1.2.3-70-g09d2 From 74aebed6dc13425233f2224668353cff7a112776 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 28 Aug 2017 14:15:39 -0700 Subject: HID: wacom: leds: Don't try to control the EKR's read-only LEDs Commit a50aac7193f1 introduces 'led.groups' and adds EKR support for these groups. However, unlike the other devices with LEDs, the EKR's LEDs are read-only and we shouldn't attempt to control them in wacom_led_control(). See bug: https://sourceforge.net/p/linuxwacom/bugs/342/ Fixes: a50aac7193f1 ("HID: wacom: leds: dynamically allocate LED groups") Cc: stable # 4.9 Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index e82a696a1d07..735bfbbcaa82 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -766,6 +766,9 @@ static int wacom_led_control(struct wacom *wacom) if (!wacom->led.groups) return -ENOTSUPP; + if (wacom->wacom_wac.features.type == REMOTE) + return -ENOTSUPP; + if (wacom->wacom_wac.pid) { /* wireless connected */ report_id = WAC_CMD_WL_LED_CONTROL; buf_size = 13; -- cgit v1.2.3-70-g09d2 From a3ae552b5259e6ffd8d124a9fa9923283af2828a Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Mon, 4 Sep 2017 15:40:42 -0400 Subject: HID: add multi-input quirk for IDC6680 touchscreen The Ideacom 6680 touchscreen is found in the Dell Latitude 2100. It has two USB descriptors, the first of which has two input reports. The HID_QUIRK_MULTI_INPUT quirk is needed to keep the correct maximum value for ABS_X/ABS_Y (8191 instead of 65535). Signed-off-by: Nicholas Bishop Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b397a14ab970..11798b10e522 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -533,6 +533,7 @@ #define USB_VENDOR_ID_IDEACOM 0x1cb6 #define USB_DEVICE_ID_IDEACOM_IDC6650 0x6650 #define USB_DEVICE_ID_IDEACOM_IDC6651 0x6651 +#define USB_DEVICE_ID_IDEACOM_IDC6680 0x6680 #define USB_VENDOR_ID_ILITEK 0x222a #define USB_DEVICE_ID_ILITEK_MULTITOUCH 0x0001 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index a83fa76655b9..f489a5cfcb48 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -99,6 +99,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_IDEACOM, USB_DEVICE_ID_IDEACOM_IDC6680, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS, HID_QUIRK_NOGET }, -- cgit v1.2.3-70-g09d2 From e57f4e67a0c73ca1c3afb68928b917906bd82eaf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 22 Aug 2017 08:37:52 +0200 Subject: HID: multitouch: Fix system-control buttons not working Some laptops have system-control buttons (e.g. KEY_SLEEP) on the same interface as a hid-multitouch touch-pad. This commit fixes these buttons not working. Signed-off-by: Hans de Goede Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 440b999304a5..5609725df714 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -930,6 +930,7 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi, field->application != HID_DG_PEN && field->application != HID_DG_TOUCHPAD && field->application != HID_GD_KEYBOARD && + field->application != HID_GD_SYSTEM_CONTROL && field->application != HID_CP_CONSUMER_CONTROL && field->application != HID_GD_WIRELESS_RADIO_CTLS && !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS && -- cgit v1.2.3-70-g09d2 From b63c4c2718d641ba9bec888994f0cb0c23a1ef45 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 30 Aug 2017 15:13:25 -0700 Subject: HID: wacom: Properly report negative values from Intuos Pro 2 Bluetooth The wacom driver's IRQ handler for Bluetooth reports from the 2nd-gen Intuos Pro does not correctly process negative numbers. Values for tilt and rotation (which can go negative) are instead interpreted as unsigned and so jump to very large values when the data should be negative. This commit properly casts the data to ensure we report negative numbers when necessary. Fixes: 4922cd2 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface") Cc: stable@vger.kernel.org # v4.11 Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index f8ddcaaa2ac6..1d9e32d2bc63 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1229,9 +1229,9 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) if (range) { input_report_abs(pen_input, ABS_X, get_unaligned_le16(&frame[1])); input_report_abs(pen_input, ABS_Y, get_unaligned_le16(&frame[3])); - input_report_abs(pen_input, ABS_TILT_X, frame[7]); - input_report_abs(pen_input, ABS_TILT_Y, frame[8]); - input_report_abs(pen_input, ABS_Z, get_unaligned_le16(&frame[9])); + input_report_abs(pen_input, ABS_TILT_X, (char)frame[7]); + input_report_abs(pen_input, ABS_TILT_Y, (char)frame[8]); + input_report_abs(pen_input, ABS_Z, (int16_t)get_unaligned_le16(&frame[9])); input_report_abs(pen_input, ABS_WHEEL, get_unaligned_le16(&frame[11])); } input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); -- cgit v1.2.3-70-g09d2 From d252f4a10fb9c8f7187c6c936ff530039f8cb799 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 30 Aug 2017 15:13:26 -0700 Subject: HID: wacom: Correct coordinate system of touchring and pen twist The MobileStudio Pro, Cintiq Pro, and 2nd-gen Intuos Pro devices use a different coordinate system for their touchring and pen twist than prior devices. Prior devices had zero aligned to the tablet's left and would increase clockwise. Userspace expects data from the kernel to be in this old coordinate space, so adjustments are necessary. While the coordinate system for pen twist is formally defined by the HID standard, no such definition existed for the touchring at the time these tablets were introduced. Future tablets are expected to report touchring data using the same "zero-up clockwise-increasing" coordinate system defined for twist. Fixes: 50066a042d ("HID: wacom: generic: Add support for height, tilt, and twist usages") Fixes: 4922cd26f0 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface") Fixes: 60a2218698 ("HID: wacom: generic: add support for touchring") Cc: stable@vger.kernel.org # 4.10, 4.11 Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1d9e32d2bc63..78d0398904dc 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1227,11 +1227,17 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) continue; if (range) { + /* Fix rotation alignment: userspace expects zero at left */ + int16_t rotation = (int16_t)get_unaligned_le16(&frame[9]); + rotation += 1800/4; + if (rotation > 899) + rotation -= 1800; + input_report_abs(pen_input, ABS_X, get_unaligned_le16(&frame[1])); input_report_abs(pen_input, ABS_Y, get_unaligned_le16(&frame[3])); input_report_abs(pen_input, ABS_TILT_X, (char)frame[7]); input_report_abs(pen_input, ABS_TILT_Y, (char)frame[8]); - input_report_abs(pen_input, ABS_Z, (int16_t)get_unaligned_le16(&frame[9])); + input_report_abs(pen_input, ABS_Z, rotation); input_report_abs(pen_input, ABS_WHEEL, get_unaligned_le16(&frame[11])); } input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5])); @@ -1319,12 +1325,19 @@ static void wacom_intuos_pro2_bt_pad(struct wacom_wac *wacom) unsigned char *data = wacom->data; int buttons = (data[282] << 1) | ((data[281] >> 6) & 0x01); - int ring = data[285]; - int prox = buttons | (ring & 0x80); + int ring = data[285] & 0x7F; + bool ringstatus = data[285] & 0x80; + bool prox = buttons || ringstatus; + + /* Fix touchring data: userspace expects 0 at left and increasing clockwise */ + ring = 71 - ring; + ring += 3*72/16; + if (ring > 71) + ring -= 72; wacom_report_numbered_buttons(pad_input, 9, buttons); - input_report_abs(pad_input, ABS_WHEEL, (ring & 0x80) ? (ring & 0x7f) : 0); + input_report_abs(pad_input, ABS_WHEEL, ringstatus ? ring : 0); input_report_key(pad_input, wacom->tool[1], prox ? 1 : 0); input_report_abs(pad_input, ABS_MISC, prox ? PAD_DEVICE_ID : 0); @@ -1616,6 +1629,20 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len) return 0; } +static int wacom_offset_rotation(struct input_dev *input, struct hid_usage *usage, + int value, int num, int denom) +{ + struct input_absinfo *abs = &input->absinfo[usage->code]; + int range = (abs->maximum - abs->minimum + 1); + + value += num*range/denom; + if (value > abs->maximum) + value -= range; + else if (value < abs->minimum) + value += range; + return value; +} + int wacom_equivalent_usage(int usage) { if ((usage & HID_USAGE_PAGE) == WACOM_HID_UP_WACOMDIGITIZER) { @@ -1898,6 +1925,7 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field unsigned equivalent_usage = wacom_equivalent_usage(usage->hid); int i; bool is_touch_on = value; + bool do_report = false; /* * Avoid reporting this event and setting inrange_state if this usage @@ -1912,6 +1940,29 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field } switch (equivalent_usage) { + case WACOM_HID_WD_TOUCHRING: + /* + * Userspace expects touchrings to increase in value with + * clockwise gestures and have their zero point at the + * tablet's left. HID events "should" be clockwise- + * increasing and zero at top, though the MobileStudio + * Pro and 2nd-gen Intuos Pro don't do this... + */ + if (hdev->vendor == 0x56a && + (hdev->product == 0x34d || hdev->product == 0x34e || /* MobileStudio Pro */ + hdev->product == 0x357 || hdev->product == 0x358)) { /* Intuos Pro 2 */ + value = (field->logical_maximum - value); + + if (hdev->product == 0x357 || hdev->product == 0x358) + value = wacom_offset_rotation(input, usage, value, 3, 16); + else if (hdev->product == 0x34d || hdev->product == 0x34e) + value = wacom_offset_rotation(input, usage, value, 1, 2); + } + else { + value = wacom_offset_rotation(input, usage, value, 1, 4); + } + do_report = true; + break; case WACOM_HID_WD_TOUCHRINGSTATUS: if (!value) input_event(input, usage->type, usage->code, 0); @@ -1945,10 +1996,14 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field value, i); /* fall through*/ default: + do_report = true; + break; + } + + if (do_report) { input_event(input, usage->type, usage->code, value); if (value) wacom_wac->hid_data.pad_input_event_flag = true; - break; } } @@ -2089,6 +2144,14 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); wacom_wac->serial[0] |= (__u32)value; return; + case HID_DG_TWIST: + /* + * Userspace expects pen twist to have its zero point when + * the buttons/finger is on the tablet's left. HID values + * are zero when buttons are toward the top. + */ + value = wacom_offset_rotation(input, usage, value, 1, 4); + break; case WACOM_HID_WD_SENSE: wacom_wac->hid_data.sense_state = value; return; -- cgit v1.2.3-70-g09d2 From 56d859e11aad4016e1cf864d65a0954d83120571 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 25 Aug 2017 18:06:04 -0400 Subject: HID: multitouch: support buttons and trackpoint on Lenovo X1 Tab Gen2 On the 2nd generation Lenovo Tablet only clickpad is working; the trackpoint and three mouse buttons do not work. hid_multitouch must export all inputs in order to get trackpoint and buttons to function. Signed-off-by: Pavel Tatashin Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 11798b10e522..a98919199858 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -661,6 +661,7 @@ #define USB_DEVICE_ID_LENOVO_CBTKBD 0x6048 #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 +#define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_VENDOR_ID_LG 0x1fd2 #define USB_DEVICE_ID_LG_MULTITOUCH 0x0064 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 5609725df714..9e8c4d2ba11d 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1420,6 +1420,12 @@ static const struct hid_device_id mt_devices[] = { USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, + /* Lenovo X1 TAB Gen 2 */ + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X1_TAB) }, + /* Anton devices */ { .driver_data = MT_CLS_EXPORT_ALL_INPUTS, MT_USB_DEVICE(USB_VENDOR_ID_ANTON, -- cgit v1.2.3-70-g09d2 From fcaa4a07d2a4b541e91da7a55d8b3331f96d1865 Mon Sep 17 00:00:00 2001 From: Shrirang Bagul Date: Thu, 10 Aug 2017 17:54:01 +0800 Subject: HID: multitouch: Support ALPS PTP stick with pid 0x120A This patch adds ALPS PTP sticks with pid/device id 0x120A to the list of devices supported by hid-multitouch. Signed-off-by: Shrirang Bagul Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index a98919199858..832897d4a849 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -75,6 +75,7 @@ #define USB_VENDOR_ID_ALPS_JP 0x044E #define HID_DEVICE_ID_ALPS_U1_DUAL 0x120B +#define HID_DEVICE_ID_ALPS_U1_PTP_2 0x120A #define HID_DEVICE_ID_ALPS_U1_DUAL_PTP 0x121F #define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 9e8c4d2ba11d..c78625dceced 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1419,6 +1419,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, + { .driver_data = MT_CLS_WIN_8_DUAL, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_ALPS_JP, + HID_DEVICE_ID_ALPS_U1_PTP_2) }, /* Lenovo X1 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_DUAL, -- cgit v1.2.3-70-g09d2 From 23e4c67ae13da7549a54f4e8c0014f48b2ef5204 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 23:45:34 +0200 Subject: ata: avoid gcc-7 warning in ata_timing_quantize gcc-7 warns about the result of a constant multiplication used as a boolean: drivers/ata/libata-core.c: In function 'ata_timing_quantize': drivers/ata/libata-core.c:3164:30: warning: '*' in boolean context, suggest '&&' instead [-Wint-in-bool-context] This slightly rearranges the macro to simplify the code and avoid the warning at the same time. Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1945a8ea2099..ee4c1ec9dca0 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3234,19 +3234,19 @@ static const struct ata_timing ata_timing[] = { }; #define ENOUGH(v, unit) (((v)-1)/(unit)+1) -#define EZ(v, unit) ((v)?ENOUGH(v, unit):0) +#define EZ(v, unit) ((v)?ENOUGH(((v) * 1000), unit):0) static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT) { - q->setup = EZ(t->setup * 1000, T); - q->act8b = EZ(t->act8b * 1000, T); - q->rec8b = EZ(t->rec8b * 1000, T); - q->cyc8b = EZ(t->cyc8b * 1000, T); - q->active = EZ(t->active * 1000, T); - q->recover = EZ(t->recover * 1000, T); - q->dmack_hold = EZ(t->dmack_hold * 1000, T); - q->cycle = EZ(t->cycle * 1000, T); - q->udma = EZ(t->udma * 1000, UT); + q->setup = EZ(t->setup, T); + q->act8b = EZ(t->act8b, T); + q->rec8b = EZ(t->rec8b, T); + q->cyc8b = EZ(t->cyc8b, T); + q->active = EZ(t->active, T); + q->recover = EZ(t->recover, T); + q->dmack_hold = EZ(t->dmack_hold, T); + q->cycle = EZ(t->cycle, T); + q->udma = EZ(t->udma, UT); } void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, -- cgit v1.2.3-70-g09d2 From 59cd827f26019ac790b2f34cbad478037f51c570 Mon Sep 17 00:00:00 2001 From: Matt Chen Date: Mon, 28 Aug 2017 14:57:54 +0800 Subject: iwlwifi: mvm: fix wowlan resume failed to load INIT ucode If we set disconnect on wowlan and run suspend/resume, will run into: ...snipped iwlwifi 0000:01:00.0: Failed to load firmware chunk! iwlwifi 0000:01:00.0: Could not load the [0] uCode section iwlwifi 0000:01:00.0: Failed to start INIT ucode: -110 iwlwifi 0000:01:00.0: Failed to run INIT ucode: -110 iwlwifi 0000:01:00.0: Failed to start RT ucode: -110 It is because we still keep IWL_MVM_STATUS_IN_HW_RESTART in __iwl_mvm_resume. When mac80211 starts the device as __iwl_mvm_mac_start(), we will miss iwl_mvm_restart_cleanup(mvm). Signed-off-by: Matt Chen Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 5de19ea10575..b205a7bfb828 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2167,7 +2167,7 @@ out: * 1. We are not using a unified image * 2. We are using a unified image but had an error while exiting D3 */ - set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status); + set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status); set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status); /* * When switching images we return 1, which causes mac80211 -- cgit v1.2.3-70-g09d2 From 6110d9e5bdd15c4e60fb67f330fbf74681e7daf7 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Tue, 29 Aug 2017 13:56:02 +0300 Subject: iwlwifi: mvm: Flush non STA TX queues When starting wowlan mac80211 requests flush w/o vif and we ignore this request. As a result some packets stay stuck in the queue and it may end up with a queue hang. Allow the driver to flush queues even if station isn't specified. Signed-off-by: David Spinadel Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 44 ++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 15f2d826bb4b..64b0be73ea72 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3975,6 +3975,43 @@ out_unlock: return ret; } +static void iwl_mvm_flush_no_vif(struct iwl_mvm *mvm, u32 queues, bool drop) +{ + if (drop) { + if (iwl_mvm_has_new_tx_api(mvm)) + /* TODO new tx api */ + WARN_ONCE(1, + "Need to implement flush TX queue\n"); + else + iwl_mvm_flush_tx_path(mvm, + iwl_mvm_flushable_queues(mvm) & queues, + 0); + } else { + if (iwl_mvm_has_new_tx_api(mvm)) { + struct ieee80211_sta *sta; + int i; + + mutex_lock(&mvm->mutex); + + for (i = 0; i < ARRAY_SIZE(mvm->fw_id_to_mac_id); i++) { + sta = rcu_dereference_protected( + mvm->fw_id_to_mac_id[i], + lockdep_is_held(&mvm->mutex)); + if (IS_ERR_OR_NULL(sta)) + continue; + + iwl_mvm_wait_sta_queues_empty(mvm, + iwl_mvm_sta_from_mac80211(sta)); + } + + mutex_unlock(&mvm->mutex); + } else { + iwl_trans_wait_tx_queues_empty(mvm->trans, + queues); + } + } +} + static void iwl_mvm_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) { @@ -3985,7 +4022,12 @@ static void iwl_mvm_mac_flush(struct ieee80211_hw *hw, int i; u32 msk = 0; - if (!vif || vif->type != NL80211_IFTYPE_STATION) + if (!vif) { + iwl_mvm_flush_no_vif(mvm, queues, drop); + return; + } + + if (vif->type != NL80211_IFTYPE_STATION) return; /* Make sure we're done with the deferred traffic before flushing */ -- cgit v1.2.3-70-g09d2 From 0fe8bed6e37c259b85d123ef9667f972305c9d6b Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Thu, 31 Aug 2017 16:27:06 +0300 Subject: iwlwifi: mvm: send all non-bufferable frames on the probe queue AP interfaces now send all non-bufferable frames using the broadcast station. Thus allow them to use the probe queue and don't warn about it. Fixes: eb045e6e0389 ("iwlwifi: mvm: Avoid deferring non bufferable frames") Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 172b5e63d3fb..6f2e2af23219 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -564,8 +564,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: /* - * Handle legacy hostapd as well, where station will be added - * only just before sending the association response. + * Non-bufferable frames use the broadcast station, thus they + * use the probe queue. * Also take care of the case where we send a deauth to a * station that we don't have, or similarly an association * response (with non-success status) for a station we can't @@ -573,9 +573,9 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, * Also, disassociate frames might happen, particular with * reason 7 ("Class 3 frame received from nonassociated STA"). */ - if (ieee80211_is_probe_resp(fc) || ieee80211_is_auth(fc) || - ieee80211_is_deauth(fc) || ieee80211_is_assoc_resp(fc) || - ieee80211_is_disassoc(fc)) + if (ieee80211_is_mgmt(fc) && + (!ieee80211_is_bufferable_mmpdu(fc) || + ieee80211_is_deauth(fc) || ieee80211_is_disassoc(fc))) return mvm->probe_queue; if (info->hw_queue == info->control.vif->cab_queue) return mvmvif->cab_queue; -- cgit v1.2.3-70-g09d2 From bd800e41a3de5c7e56b2fd27088bdaf5e228d227 Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Mon, 28 Aug 2017 11:51:05 +0300 Subject: iwlwifi: mvm: change state when queueing agg start work Add a new state to enum iwl_mvm_agg_state, which is used between queueing the work that starts tx aggregations and actually starting that work (changing to state IWL_AGG_STARTING). This solves a race where ieee80211_start_tx_ba_session is called a second time, before the work queued by the first run has a chance to change the agg_state. In this case the second call to ieee80211_start_tx_ba_session returns an error, and the fallback is to abort the ba session start. Fixes: 482e48440a0e ("iwlwifi: mvm: change open and close criteria of a BA session") Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index ba7bd049d3d4..0fe723ca844e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -661,7 +661,8 @@ static void rs_tl_turn_on_agg(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, (lq_sta->tx_agg_tid_en & BIT(tid)) && (tid_data->tx_count_last >= IWL_MVM_RS_AGG_START_THRESHOLD)) { IWL_DEBUG_RATE(mvm, "try to aggregate tid %d\n", tid); - rs_tl_turn_on_agg_for_tid(mvm, lq_sta, tid, sta); + if (rs_tl_turn_on_agg_for_tid(mvm, lq_sta, tid, sta) == 0) + tid_data->state = IWL_AGG_QUEUED; } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 411a2055dc45..2dafe9bb4d8b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2385,8 +2385,10 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) return -EINVAL; - if (mvmsta->tid_data[tid].state != IWL_AGG_OFF) { - IWL_ERR(mvm, "Start AGG when state is not IWL_AGG_OFF %d!\n", + if (mvmsta->tid_data[tid].state != IWL_AGG_QUEUED && + mvmsta->tid_data[tid].state != IWL_AGG_OFF) { + IWL_ERR(mvm, + "Start AGG when state is not IWL_AGG_QUEUED or IWL_AGG_OFF %d!\n", mvmsta->tid_data[tid].state); return -ENXIO; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index d13893806513..aedabe101cf0 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -281,6 +281,7 @@ struct iwl_mvm_vif; * These states relate to a specific RA / TID. * * @IWL_AGG_OFF: aggregation is not used + * @IWL_AGG_QUEUED: aggregation start work has been queued * @IWL_AGG_STARTING: aggregation are starting (between start and oper) * @IWL_AGG_ON: aggregation session is up * @IWL_EMPTYING_HW_QUEUE_ADDBA: establishing a BA session - waiting for the @@ -290,6 +291,7 @@ struct iwl_mvm_vif; */ enum iwl_mvm_agg_state { IWL_AGG_OFF = 0, + IWL_AGG_QUEUED, IWL_AGG_STARTING, IWL_AGG_ON, IWL_EMPTYING_HW_QUEUE_ADDBA, -- cgit v1.2.3-70-g09d2 From 8458e48ac7ad86a5ab7f3d1a8cacd9205a9a97ce Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Sun, 3 Sep 2017 16:04:38 +0300 Subject: iwlwifi: mvm: wake the correct mac80211 queue iwl_mvm_start_mac_queues() takes a bitmap of the queues to wake. When deferred tx is purged, set the bit of the hw_queue so the correct queue will be waken up. Fixes: 7e39a00d5931 ("iwlwifi: mvm: start mac queues when deferred tx frames are purged") Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 64b0be73ea72..3a6ce4222ff5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2563,7 +2563,7 @@ static void iwl_mvm_purge_deferred_tx_frames(struct iwl_mvm *mvm, * queues, so we should never get a second deferred * frame for the RA/TID. */ - iwl_mvm_start_mac_queues(mvm, info->hw_queue); + iwl_mvm_start_mac_queues(mvm, BIT(info->hw_queue)); ieee80211_free_txskb(mvm->hw, skb); } } -- cgit v1.2.3-70-g09d2 From 97bce57bd7f96e1218751996f549a6e61f18cc8c Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 1 Sep 2017 17:59:15 +0300 Subject: iwlwifi: mvm: use IWL_HCMD_NOCOPY for MCAST_FILTER_CMD The MCAST_FILTER_CMD can get quite large when we have many mcast addresses to set (we support up to 255). So the command should be send as NOCOPY to prevent a warning caused by too-long commands: WARNING: CPU: 0 PID: 9700 at /root/iwlwifi/stack-dev/drivers/net/wireless/intel/iwlwifi/pcie/tx.c:1550 iwl_pcie_enqueue_hcmd+0x8c7/0xb40 [iwlwifi] Command MCAST_FILTER_CMD (0x1d0) is too large (328 bytes) This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196743 Cc: stable@vger.kernel.org Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3a6ce4222ff5..635db63f972e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1546,6 +1546,11 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, struct iwl_mvm_mc_iter_data *data = _data; struct iwl_mvm *mvm = data->mvm; struct iwl_mcast_filter_cmd *cmd = mvm->mcast_filter_cmd; + struct iwl_host_cmd hcmd = { + .id = MCAST_FILTER_CMD, + .flags = CMD_ASYNC, + .dataflags[0] = IWL_HCMD_DFL_NOCOPY, + }; int ret, len; /* if we don't have free ports, mcast frames will be dropped */ @@ -1560,7 +1565,10 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, memcpy(cmd->bssid, vif->bss_conf.bssid, ETH_ALEN); len = roundup(sizeof(*cmd) + cmd->count * ETH_ALEN, 4); - ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_ASYNC, len, cmd); + hcmd.len[0] = len; + hcmd.data[0] = cmd; + + ret = iwl_mvm_send_cmd(mvm, &hcmd); if (ret) IWL_ERR(mvm, "mcast filter cmd error. ret=%d\n", ret); } -- cgit v1.2.3-70-g09d2 From 61e7d91bcf7725b9fcd9cbfc5fa0e0f84f19e6de Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 1 Sep 2017 18:57:35 +0300 Subject: iwlwifi: mvm: handle FIF_ALLMULTI when setting multicast addresses We were ignoring the FIF_ALLMULTI flag when setting the multicast addresses with MCAST_FILTER_CMD. Check if this flag is set and enable pass_all accordingly. We also need to set the count to 0 if pass_all is enable so we don't pass addresses to the firmware when not needed (as doing so causes an assert). This fixes https://bugzilla.kernel.org/show_bug.cgi?id=196741 Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 635db63f972e..3bcaa82f59b2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1643,6 +1643,12 @@ static void iwl_mvm_configure_filter(struct ieee80211_hw *hw, if (!cmd) goto out; + if (changed_flags & FIF_ALLMULTI) + cmd->pass_all = !!(*total_flags & FIF_ALLMULTI); + + if (cmd->pass_all) + cmd->count = 0; + iwl_mvm_recalc_multicast(mvm); out: mutex_unlock(&mvm->mutex); -- cgit v1.2.3-70-g09d2 From 3f497de997c7ed34ad8a90b64f1ca53a41d428b4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 2 Sep 2017 11:05:22 +0300 Subject: iwlwifi: mvm: initialize status in iwl_mvm_add_int_sta_common() We always need to initialize the status argument to the success case before calling iwl_mvm_send_cmd_status() or iwl_mvm_send_cmd_pdu_status() (which calls the former) otherwise we may get an uninitialized value back. In this case, we use ADD_STA_SUCCESS as success. Fixes: 732d06e9d9cf ("iwlwifi: mvm: add station before allocating a queue") Reported by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 2dafe9bb4d8b..c4a343534c5e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1285,7 +1285,7 @@ static int iwl_mvm_add_int_sta_common(struct iwl_mvm *mvm, { struct iwl_mvm_add_sta_cmd cmd; int ret; - u32 status; + u32 status = ADD_STA_SUCCESS; lockdep_assert_held(&mvm->mutex); -- cgit v1.2.3-70-g09d2 From d460f1fb83a44833a09c8eaa34b30ce553cab8c5 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Sat, 2 Sep 2017 11:25:40 +0300 Subject: iwlwifi: mvm: set status before calling iwl_mvm_send_cmd_status() We always must set the status to what we consider success before calling iwl_mvm_send_cmd_status() (also iwl_mvm_send_cmd_pdu_status() which calls it). Fix a few places where initialization is missing. Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 50983615dce6..774122fed454 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -555,7 +555,7 @@ static int iwl_mvm_lmac_scan_abort(struct iwl_mvm *mvm) struct iwl_host_cmd cmd = { .id = SCAN_OFFLOAD_ABORT_CMD, }; - u32 status; + u32 status = CAN_ABORT_STATUS; ret = iwl_mvm_send_cmd_status(mvm, &cmd, &status); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 8876c2abc440..4d907f60bce9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -529,6 +529,7 @@ int iwl_mvm_ctdp_command(struct iwl_mvm *mvm, u32 op, u32 state) lockdep_assert_held(&mvm->mutex); + status = 0; ret = iwl_mvm_send_cmd_pdu_status(mvm, WIDE_ID(PHY_OPS_GROUP, CTDP_CONFIG_CMD), sizeof(cmd), &cmd, &status); -- cgit v1.2.3-70-g09d2 From 5f90472c00ddf1e64c2865f71cced297bd5f80a2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 4 Sep 2017 20:27:04 +0300 Subject: iwlwifi: mvm: fix reorder buffer for 9000 devices The condition to check if reorder buffer ran out of space is faulty, as it takes into account only the NSSN. In case the head SN was too far behind the reorder buffer should move forward, regardless of the NSSN status. This caused the driver to release packets out of order in some scenarios. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 67ffd9774712..77f77bc5d083 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -672,11 +672,12 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, * If there was a significant jump in the nssn - adjust. * If the SN is smaller than the NSSN it might need to first go into * the reorder buffer, in which case we just release up to it and the - * rest of the function will take of storing it and releasing up to the - * nssn + * rest of the function will take care of storing it and releasing up to + * the nssn */ if (!iwl_mvm_is_sn_less(nssn, buffer->head_sn + buffer->buf_size, - buffer->buf_size)) { + buffer->buf_size) || + !ieee80211_sn_less(sn, buffer->head_sn + buffer->buf_size)) { u16 min_sn = ieee80211_sn_less(sn, nssn) ? sn : nssn; iwl_mvm_release_frames(mvm, sta, napi, buffer, min_sn); -- cgit v1.2.3-70-g09d2 From cac72b990d34f4c70208998a86f910ba38253c94 Mon Sep 17 00:00:00 2001 From: Lyude Date: Sat, 22 Jul 2017 21:15:09 -0400 Subject: HID: rmi: Make sure the HID device is opened on resume So it looks like that suspend/resume has actually always been broken on hid-rmi. The fact it worked was a rather silly coincidence that was relying on the HID device to already be opened upon resume. This means that so long as anything was reading the /dev/input/eventX node for for an RMI device, it would suspend and resume correctly. As well, if nothing happened to be keeping the HID device away it would shut off, then the RMI driver would get confused on resume when it stopped responding and explode. So, call hid_hw_open() in rmi_post_resume() so we make sure that the device is alive before we try talking to it. This fixes RMI device suspend/resume over HID. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196851 [jkosina@suse.cz: removed useless hunk that was zero-initializing 'ret'] Signed-off-by: Lyude Cc: Andrew Duggan Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-rmi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 5b40c2614599..ef241d66562e 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -436,17 +436,24 @@ static int rmi_post_resume(struct hid_device *hdev) if (!(data->device_flags & RMI_DEVICE)) return 0; - ret = rmi_reset_attn_mode(hdev); + /* Make sure the HID device is ready to receive events */ + ret = hid_hw_open(hdev); if (ret) return ret; + ret = rmi_reset_attn_mode(hdev); + if (ret) + goto out; + ret = rmi_driver_resume(rmi_dev, false); if (ret) { hid_warn(hdev, "Failed to resume device: %d\n", ret); - return ret; + goto out; } - return 0; +out: + hid_hw_close(hdev); + return ret; } #endif /* CONFIG_PM */ -- cgit v1.2.3-70-g09d2 From c5d4d7d83165ae863954b113c7f403d8b58febed Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 4 Sep 2017 10:28:02 +0200 Subject: xfrm: Fix deletion of offloaded SAs on failure. When we off load a SA, it gets pushed to the NIC before we can add it. In case of a failure, we don't delete this SA from the NIC. Fix this by calling xfrm_dev_state_delete on failure. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Shannon Nelson Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2bfbd9121e3b..b997f1395357 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -657,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) { x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); goto out; } -- cgit v1.2.3-70-g09d2 From 67a63387b1417b5954eedb15f638f1f0bee3da49 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 4 Sep 2017 10:59:55 +0200 Subject: xfrm: Fix negative device refcount on offload failure. Reset the offload device at the xfrm_state if the device was not able to offload the state. Otherwise we drop the device refcount twice. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Shannon Nelson Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index acf00104ef31..30e5746085b8 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -91,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + xso->dev = NULL; dev_put(dev); return 0; } -- cgit v1.2.3-70-g09d2 From 23e9fcfef1f3d10675acce023592796851bcaf1a Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 12 Sep 2017 14:53:46 +0300 Subject: vti: fix NULL dereference in xfrm_input() Can be reproduced with LTP tests: # icmp-uni-vti.sh -p ah -a sha256 -m tunnel -S fffffffe -k 1 -s 10 IPv4: RIP: 0010:xfrm_input+0x7f9/0x870 ... Call Trace: vti_input+0xaa/0x110 [ip_vti] ? skb_free_head+0x21/0x40 vti_rcv+0x33/0x40 [ip_vti] xfrm4_ah_rcv+0x33/0x60 ip_local_deliver_finish+0x94/0x1e0 ip_local_deliver+0x6f/0xe0 ? ip_route_input_noref+0x28/0x50 ... # icmp-uni-vti.sh -6 -p ah -a sha256 -m tunnel -S fffffffe -k 1 -s 10 IPv6: RIP: 0010:xfrm_input+0x7f9/0x870 ... Call Trace: xfrm6_rcv_tnl+0x3c/0x40 vti6_rcv+0xd5/0xe0 [ip6_vti] xfrm6_ah_rcv+0x33/0x60 ip6_input_finish+0xee/0x460 ip6_input+0x3f/0xb0 ip6_rcv_finish+0x45/0xa0 ipv6_rcv+0x34b/0x540 xfrm_input() invokes xfrm_rcv_cb() -> vti_rcv_cb(), the last callback might call skb_scrub_packet(), which in turn can reset secpath. Fix it by adding a check that skb->sp is not NULL. Fixes: 7e9e9202bccc ("xfrm: Clear RX SKB secpath xfrm_offload") Signed-off-by: Alexey Kodanev Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 2515cd2bc5db..8ac9d32fb79d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -429,7 +429,8 @@ resume: nf_reset(skb); if (decaps) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; @@ -440,7 +441,8 @@ resume: err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; -- cgit v1.2.3-70-g09d2 From 8320caeeffdefec3b58b9d4a7ed8e1079492fe7b Mon Sep 17 00:00:00 2001 From: Adrian Salido Date: Fri, 8 Sep 2017 10:55:27 -0700 Subject: HID: i2c-hid: allocate hid buffers for real worst case The buffer allocation is not currently accounting for an extra byte for the report id. This can cause an out of bounds access in function i2c_hid_set_or_send_report() with reportID > 15. Cc: stable@vger.kernel.org Signed-off-by: Adrian Salido Reviewed-by: Benson Leung Signed-off-by: Guenter Roeck Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 77396145d2d0..9145c2129a96 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -543,7 +543,8 @@ static int i2c_hid_alloc_buffers(struct i2c_hid *ihid, size_t report_size) { /* the worst case is computed from the set_report command with a * reportID > 15 and the maximum report length */ - int args_len = sizeof(__u8) + /* optional ReportID byte */ + int args_len = sizeof(__u8) + /* ReportID */ + sizeof(__u8) + /* optional ReportID byte */ sizeof(__u16) + /* data register */ sizeof(__u16) + /* size of the report */ report_size; /* report */ -- cgit v1.2.3-70-g09d2 From 747e1f60470b975363cbbfcde0c41a3166391be5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 13 Sep 2017 18:21:38 +0200 Subject: spi: armada-3700: Fix failing commands with quad-SPI A3700 SPI controller datasheet states that only the first line (IO0) is used to receive and send instructions, addresses and dummy bytes, unless for addresses during an RX operation in a quad SPI configuration (see p.821 of the Armada-3720-DB datasheet). Otherwise, some commands such as SPI NOR commands like READ_FROM_CACHE_DUAL_IO(0xeb) and READ_FROM_CACHE_DUAL_IO(0xbb) will fail because these commands must send address bytes through the four pins. Data transfer always use the four bytes with this setup. Thus, in quad SPI configuration, the A3700_SPI_ADDR_PIN bit must be set only in this case to inform the controller that it must use the number of pins indicated in the {A3700_SPI_DATA_PIN1,A3700_SPI_DATA_PIN0} field during the address cycles of an RX operation. Suggested-by: Ken Ma Signed-off-by: Miquel Raynal Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-armada-3700.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 6c7d7a460689..a28702b1fa05 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -161,7 +161,7 @@ static void a3700_spi_deactivate_cs(struct a3700_spi *a3700_spi, } static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi, - unsigned int pin_mode) + unsigned int pin_mode, bool receiving) { u32 val; @@ -177,6 +177,9 @@ static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi, break; case SPI_NBITS_QUAD: val |= A3700_SPI_DATA_PIN1; + /* RX during address reception uses 4-pin */ + if (receiving) + val |= A3700_SPI_ADDR_PIN; break; default: dev_err(&a3700_spi->master->dev, "wrong pin mode %u", pin_mode); @@ -653,7 +656,7 @@ static int a3700_spi_transfer_one(struct spi_master *master, else if (xfer->rx_buf) nbits = xfer->rx_nbits; - a3700_spi_pin_mode_set(a3700_spi, nbits); + a3700_spi_pin_mode_set(a3700_spi, nbits, xfer->rx_buf ? true : false); if (xfer->rx_buf) { /* Set read data length */ -- cgit v1.2.3-70-g09d2 From 6fd6fd68c9e2f3a206a098ef57b1d5548f9d00d1 Mon Sep 17 00:00:00 2001 From: Zachary Zhang Date: Wed, 13 Sep 2017 18:21:39 +0200 Subject: spi: armada-3700: Fix padding when sending not 4-byte aligned data In 4-byte transfer mode, extra padding/dummy bytes '0xff' would be sent in write operation if TX data is not 4-byte aligned since the SPI data register is always shifted out as whole 4 bytes. Fix this by using the header count feature that allows to transfer 0 to 4 bytes. Use it to actually send the first 1 to 3 bytes of data before the rest of the buffer that will hence be 4-byte aligned. Signed-off-by: Zachary Zhang Signed-off-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 135 +++++++++++++----------------------------- 1 file changed, 41 insertions(+), 94 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index a28702b1fa05..9172cb2d2e7a 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -99,11 +99,6 @@ /* A3700_SPI_IF_TIME_REG */ #define A3700_SPI_CLK_CAPT_EDGE BIT(7) -/* Flags and macros for struct a3700_spi */ -#define A3700_INSTR_CNT 1 -#define A3700_ADDR_CNT 3 -#define A3700_DUMMY_CNT 1 - struct a3700_spi { struct spi_master *master; void __iomem *base; @@ -117,9 +112,6 @@ struct a3700_spi { u8 byte_len; u32 wait_mask; struct completion done; - u32 addr_cnt; - u32 instr_cnt; - size_t hdr_cnt; }; static u32 spireg_read(struct a3700_spi *a3700_spi, u32 offset) @@ -449,59 +441,43 @@ static void a3700_spi_set_cs(struct spi_device *spi, bool enable) static void a3700_spi_header_set(struct a3700_spi *a3700_spi) { - u32 instr_cnt = 0, addr_cnt = 0, dummy_cnt = 0; + unsigned int addr_cnt; u32 val = 0; /* Clear the header registers */ spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, 0); spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, 0); spireg_write(a3700_spi, A3700_SPI_IF_RMODE_REG, 0); + spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, 0); /* Set header counters */ if (a3700_spi->tx_buf) { - if (a3700_spi->buf_len <= a3700_spi->instr_cnt) { - instr_cnt = a3700_spi->buf_len; - } else if (a3700_spi->buf_len <= (a3700_spi->instr_cnt + - a3700_spi->addr_cnt)) { - instr_cnt = a3700_spi->instr_cnt; - addr_cnt = a3700_spi->buf_len - instr_cnt; - } else if (a3700_spi->buf_len <= a3700_spi->hdr_cnt) { - instr_cnt = a3700_spi->instr_cnt; - addr_cnt = a3700_spi->addr_cnt; - /* Need to handle the normal write case with 1 byte - * data - */ - if (!a3700_spi->tx_buf[instr_cnt + addr_cnt]) - dummy_cnt = a3700_spi->buf_len - instr_cnt - - addr_cnt; + /* + * when tx data is not 4 bytes aligned, there will be unexpected + * bytes out of SPI output register, since it always shifts out + * as whole 4 bytes. This might cause incorrect transaction with + * some devices. To avoid that, use SPI header count feature to + * transfer up to 3 bytes of data first, and then make the rest + * of data 4-byte aligned. + */ + addr_cnt = a3700_spi->buf_len % 4; + if (addr_cnt) { + val = (addr_cnt & A3700_SPI_ADDR_CNT_MASK) + << A3700_SPI_ADDR_CNT_BIT; + spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, val); + + /* Update the buffer length to be transferred */ + a3700_spi->buf_len -= addr_cnt; + + /* transfer 1~3 bytes through address count */ + val = 0; + while (addr_cnt--) { + val = (val << 8) | a3700_spi->tx_buf[0]; + a3700_spi->tx_buf++; + } + spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, val); } - val |= ((instr_cnt & A3700_SPI_INSTR_CNT_MASK) - << A3700_SPI_INSTR_CNT_BIT); - val |= ((addr_cnt & A3700_SPI_ADDR_CNT_MASK) - << A3700_SPI_ADDR_CNT_BIT); - val |= ((dummy_cnt & A3700_SPI_DUMMY_CNT_MASK) - << A3700_SPI_DUMMY_CNT_BIT); } - spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, val); - - /* Update the buffer length to be transferred */ - a3700_spi->buf_len -= (instr_cnt + addr_cnt + dummy_cnt); - - /* Set Instruction */ - val = 0; - while (instr_cnt--) { - val = (val << 8) | a3700_spi->tx_buf[0]; - a3700_spi->tx_buf++; - } - spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, val); - - /* Set Address */ - val = 0; - while (addr_cnt--) { - val = (val << 8) | a3700_spi->tx_buf[0]; - a3700_spi->tx_buf++; - } - spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, val); } static int a3700_is_wfifo_full(struct a3700_spi *a3700_spi) @@ -515,35 +491,12 @@ static int a3700_is_wfifo_full(struct a3700_spi *a3700_spi) static int a3700_spi_fifo_write(struct a3700_spi *a3700_spi) { u32 val; - int i = 0; while (!a3700_is_wfifo_full(a3700_spi) && a3700_spi->buf_len) { - val = 0; - if (a3700_spi->buf_len >= 4) { - val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); - spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); - - a3700_spi->buf_len -= 4; - a3700_spi->tx_buf += 4; - } else { - /* - * If the remained buffer length is less than 4-bytes, - * we should pad the write buffer with all ones. So that - * it avoids overwrite the unexpected bytes following - * the last one. - */ - val = GENMASK(31, 0); - while (a3700_spi->buf_len) { - val &= ~(0xff << (8 * i)); - val |= *a3700_spi->tx_buf++ << (8 * i); - i++; - a3700_spi->buf_len--; - - spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, - val); - } - break; - } + val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); + spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); + a3700_spi->buf_len -= 4; + a3700_spi->tx_buf += 4; } return 0; @@ -648,9 +601,6 @@ static int a3700_spi_transfer_one(struct spi_master *master, a3700_spi->rx_buf = xfer->rx_buf; a3700_spi->buf_len = xfer->len; - /* SPI transfer headers */ - a3700_spi_header_set(a3700_spi); - if (xfer->tx_buf) nbits = xfer->tx_nbits; else if (xfer->rx_buf) @@ -658,6 +608,12 @@ static int a3700_spi_transfer_one(struct spi_master *master, a3700_spi_pin_mode_set(a3700_spi, nbits, xfer->rx_buf ? true : false); + /* Flush the FIFOs */ + a3700_spi_fifo_flush(a3700_spi); + + /* Transfer first bytes of data when buffer is not 4-byte aligned */ + a3700_spi_header_set(a3700_spi); + if (xfer->rx_buf) { /* Set read data length */ spireg_write(a3700_spi, A3700_SPI_IF_DIN_CNT_REG, @@ -736,16 +692,11 @@ static int a3700_spi_transfer_one(struct spi_master *master, dev_err(&spi->dev, "wait wfifo empty timed out\n"); return -ETIMEDOUT; } - } else { - /* - * If the instruction in SPI_INSTR does not require data - * to be written to the SPI device, wait until SPI_RDY - * is 1 for the SPI interface to be in idle. - */ - if (!a3700_spi_transfer_wait(spi, A3700_SPI_XFER_RDY)) { - dev_err(&spi->dev, "wait xfer ready timed out\n"); - return -ETIMEDOUT; - } + } + + if (!a3700_spi_transfer_wait(spi, A3700_SPI_XFER_RDY)) { + dev_err(&spi->dev, "wait xfer ready timed out\n"); + return -ETIMEDOUT; } val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); @@ -837,10 +788,6 @@ static int a3700_spi_probe(struct platform_device *pdev) memset(spi, 0, sizeof(struct a3700_spi)); spi->master = master; - spi->instr_cnt = A3700_INSTR_CNT; - spi->addr_cnt = A3700_ADDR_CNT; - spi->hdr_cnt = A3700_INSTR_CNT + A3700_ADDR_CNT + - A3700_DUMMY_CNT; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); spi->base = devm_ioremap_resource(dev, res); -- cgit v1.2.3-70-g09d2 From 8b5d729a3a8a07fe273af266e90bc52114dd69a6 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sun, 10 Sep 2017 14:55:29 +0100 Subject: spi: stm32: Fix logical error in stm32_spi_prepare_mbr() stm32_spi_prepare_mbr() is returning an error value when div is less than SPI_MBR_DIV_MIN *and* greater than SPI_MBR_DIV_MAX, which always evaluates to false. This should change to use *or*. Signed-off-by: Christos Gkekas Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 680cdf549506..ba9743fa2326 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -263,8 +263,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 speed_hz) * no need to check it there. * However, we need to ensure the following calculations. */ - if ((div < SPI_MBR_DIV_MIN) && - (div > SPI_MBR_DIV_MAX)) + if (div < SPI_MBR_DIV_MIN || + div > SPI_MBR_DIV_MAX) return -EINVAL; /* Determine the first power of 2 greater than or equal to div */ -- cgit v1.2.3-70-g09d2 From a2b4a79b88b24c49d98d45a06a014ffd22ada1a4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 10 Sep 2017 20:29:45 +0300 Subject: spi: uapi: spidev: add missing ioctl header The SPI_IOC_MESSAGE() macro references _IOC_SIZEBITS. Add linux/ioctl.h to make sure this macro is defined. This fixes the following build failure of lcdproc with the musl libc: In file included from .../sysroot/usr/include/sys/ioctl.h:7:0, from hd44780-spi.c:31: hd44780-spi.c: In function 'spi_transfer': hd44780-spi.c:89:24: error: '_IOC_SIZEBITS' undeclared (first use in this function) status = ioctl(p->fd, SPI_IOC_MESSAGE(1), &xfer); ^ Signed-off-by: Baruch Siach Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- include/uapi/linux/spi/spidev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h index dd5f21e75805..856de39d0b89 100644 --- a/include/uapi/linux/spi/spidev.h +++ b/include/uapi/linux/spi/spidev.h @@ -23,6 +23,7 @@ #define SPIDEV_H #include +#include /* User space versions of kernel symbols for SPI clocking modes, * matching -- cgit v1.2.3-70-g09d2 From 993f0d93f8538c15bd5c12a1a9fd74c777efea1b Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 7 Sep 2017 17:44:12 -0700 Subject: HID: wacom: generic: Send MSC_SERIAL and ABS_MISC when leaving prox The latest generation of pro devices (MobileStudio Pro, 2nd-gen Intuos Pro, Cintiq Pro) send a serial number of '0' whenever the pen is too far away for reliable communication. Userspace defines that a serial number of '0' is invalid, so we need to be careful not to actually forward this value. Additionally, since EMR ISDv4 devices do not support serial numbers or tool IDs, we'd like to not send these events if they aren't necessary. The existing code achieves these goals by adding a check for a non-zero serial number within the wacom_wac_pen_report function. The MSC_SERIAL and ABS_MISC events are only sent if the serial number is non-zero. This code fails, however when the pen for a pro device leaves proximity. When the pen leaves prox and the tablet sends a serial of 0, wacom_wac_pen_event dutifully clears the serial number. When wacom_wac_pen_report is called, it does not send either the MSC_SERIAL of the exiting tool nor an ABS_MISC event. This patch prevents the wacom_wac_pen_event function from clearing an already-set serial number. This ensures that we have the serial number handy when exiting proximity, but requires us to manually clear it afterwards to ensure the driver does not send stale data (e.g. when switching between AES pens that report a serial nubmer of 0 for the first few fully in-proximity packets). Fixes: f85c9dc678 ("HID: wacom: generic: Support tool ID and additional tool types") Cc: stable # v4.10 Signed-off-by: Ping Cheng Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 78d0398904dc..e2ba36da6e20 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2141,8 +2141,10 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field wacom_wac->hid_data.tipswitch |= value; return; case HID_DG_TOOLSERIALNUMBER: - wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); - wacom_wac->serial[0] |= (__u32)value; + if (value) { + wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); + wacom_wac->serial[0] |= (__u32)value; + } return; case HID_DG_TWIST: /* @@ -2156,15 +2158,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field wacom_wac->hid_data.sense_state = value; return; case WACOM_HID_WD_SERIALHI: - wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF); - wacom_wac->serial[0] |= ((__u64)value) << 32; - /* - * Non-USI EMR devices may contain additional tool type - * information here. See WACOM_HID_WD_TOOLTYPE case for - * more details. - */ - if (value >> 20 == 1) { - wacom_wac->id[0] |= value & 0xFFFFF; + if (value) { + wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF); + wacom_wac->serial[0] |= ((__u64)value) << 32; + /* + * Non-USI EMR devices may contain additional tool type + * information here. See WACOM_HID_WD_TOOLTYPE case for + * more details. + */ + if (value >> 20 == 1) { + wacom_wac->id[0] |= value & 0xFFFFF; + } } return; case WACOM_HID_WD_TOOLTYPE: @@ -2279,6 +2283,7 @@ static void wacom_wac_pen_report(struct hid_device *hdev, if (!prox) { wacom_wac->tool[0] = 0; wacom_wac->id[0] = 0; + wacom_wac->serial[0] = 0; } } -- cgit v1.2.3-70-g09d2 From 92380b572d95caf48f8424746aeee63c5a2b1922 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 7 Sep 2017 17:47:38 -0700 Subject: HID: wacom: generic: Clear ABS_MISC when tool leaves proximity The tool ID information sent in ABS_MISC is expected to be reset to 0 when a tool leaves proximity. Not doing this can cause problems if a tool is removed and then re-introduced. Kernel event filtering will prevent the (identical) ABS_MISC event from being sent when the tool re-enters proxmity. This can cause userspace to not properly set the tool ID. Fixes: f85c9dc678 ("HID: wacom: generic: Support tool ID and additional tool types") Cc: stable # v4.10 Signed-off-by: Ping Cheng Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index e2ba36da6e20..aa692e28b2cd 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2272,7 +2272,7 @@ static void wacom_wac_pen_report(struct hid_device *hdev, input_report_key(input, wacom_wac->tool[0], prox); if (wacom_wac->serial[0]) { input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); - input_report_abs(input, ABS_MISC, id); + input_report_abs(input, ABS_MISC, prox ? id : 0); } wacom_wac->hid_data.tipswitch = false; -- cgit v1.2.3-70-g09d2 From d0b6e0a8ef24b1b07078ababe5d91bcdf4f4264a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Sep 2017 21:36:55 +0200 Subject: watchdog/hardlockup: Provide interface to stop/restart perf events Provide an interface to stop and restart perf NMI watchdog events on all CPUs. This is only usable during init and especially for handling the perf HT bug on Intel machines. It's safe to use it this way as nothing can start/stop the NMI watchdog in parallel. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.167649596@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 4 ++++ kernel/watchdog_hld.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a36abe2da13e..b24d4a58674a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -75,7 +75,11 @@ static inline void hardlockup_detector_disable(void) {} #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); +extern void hardlockup_detector_perf_stop(void); +extern void hardlockup_detector_perf_restart(void); #else +static inline void hardlockup_detector_perf_stop(void) { } +static inline void hardlockup_detector_perf_restart(void) { } #if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 3a09ea1b1d3d..c9586ebc2e98 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -261,3 +261,44 @@ void watchdog_nmi_disable(unsigned int cpu) firstcpu_err = 0; } } + +/** + * hardlockup_detector_perf_stop - Globally stop watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_stop(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_disable(event); + } +} + +/** + * hardlockup_detector_perf_restart - Globally restart watchdog events + * + * Special interface for x86 to handle the perf HT bug. + */ +void __init hardlockup_detector_perf_restart(void) +{ + int cpu; + + lockdep_assert_cpus_held(); + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return; + + for_each_online_cpu(cpu) { + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (event) + perf_event_enable(event); + } +} -- cgit v1.2.3-70-g09d2 From 2406e3b166eee42777a6b0b38f52f924454474d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Sep 2017 21:36:56 +0200 Subject: perf/x86/intel, watchdog/core: Sanitize PMU HT bug workaround The lockup_detector_suspend/resume() interface is broken in several ways especially as it results in recursive locking of the CPU hotplug lock. Use the new stop/restart interface in the perf NMI watchdog to temporarily disable and reenable the already active watchdog events. That's enough to handle it. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.247141871@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 829e89cfcee2..9fb9a1f1e47b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void) return 0; } - if (lockup_detector_suspend() != 0) { - pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); - return 0; - } + cpus_read_lock(); + + hardlockup_detector_perf_stop(); x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); @@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void) x86_pmu.commit_scheduling = NULL; x86_pmu.stop_scheduling = NULL; - lockup_detector_resume(); - - cpus_read_lock(); + hardlockup_detector_perf_restart(); for_each_online_cpu(c) free_excl_cntrs(c); -- cgit v1.2.3-70-g09d2 From 6554fd8cf06db86f861bb24d7487b2873ca444c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:57 +0200 Subject: watchdog/core: Provide interface to stop from poweroff() PARISC has a a busy looping power off routine. If the watchdog is enabled the watchdog timer will still fire, but the thread is not running, which causes the softlockup watchdog to trigger. Provide a interface which allows to turn the watchdog off. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Helge Deller Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/20170912194146.327343752@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 +++--- kernel/watchdog.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b24d4a58674a..85bb268be39c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -12,10 +12,10 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_soft_poweroff(void); #else -static inline void lockup_detector_init(void) -{ -} +static inline void lockup_detector_init(void) { } +static inline void lockup_detector_soft_poweroff(void) { } #endif #ifdef CONFIG_SOFTLOCKUP_DETECTOR diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f5d52024f6b7..f23e373aa3bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -333,7 +333,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - if (atomic_read(&watchdog_park_in_progress) != 0) + if (!watchdog_enabled || + atomic_read(&watchdog_park_in_progress) != 0) return HRTIMER_NORESTART; /* kick the hardlockup detector */ @@ -660,6 +661,17 @@ static void set_sample_period(void) } #endif /* SOFTLOCKUP */ +/** + * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) + * + * Special interface for parisc. It prevents lockup detector warnings from + * the default pm_poweroff() function which busy loops forever. + */ +void lockup_detector_soft_poweroff(void) +{ + watchdog_enabled = 0; +} + /* * Suspend the hard and soft lockup detector by parking the watchdog threads. */ -- cgit v1.2.3-70-g09d2 From 47bb4baf7df43ac8bbc51c24022466972ba29ef1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:58 +0200 Subject: parisc, watchdog/core: Use lockup_detector_stop() The broken lockup_detector_suspend/resume() interface is going away. Use the new lockup_detector_soft_poweroff() interface to stop the watchdog from the busy looping power off routine. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Helge Deller Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/20170912194146.407385557@linutronix.de Signed-off-by: Ingo Molnar --- arch/parisc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index a45a67d526f8..30f92391a93e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -146,7 +146,7 @@ void machine_power_off(void) /* prevent soft lockup/stalled CPU messages for endless loop. */ rcu_sysrq_start(); - lockup_detector_suspend(); + lockup_detector_soft_poweroff(); for (;;); } -- cgit v1.2.3-70-g09d2 From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:59 +0200 Subject: watchdog/core: Remove broken suspend/resume interfaces This interface has several issues: - It's causing recursive locking of the hotplug lock. - It's complete overkill to teardown all threads and then recreate them The same can be achieved with the simple hardlockup_detector_perf_stop / restart() interfaces. The abuse from the busy looping poweroff() loop of PARISC has been solved as well. Remove the cruft. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 3 -- include/linux/nmi.h | 12 ------ kernel/watchdog.c | 89 +----------------------------------------- 3 files changed, 1 insertion(+), 103 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 2f6eadd9408d..5ded171f02d6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) return 0; - if (watchdog_suspended) - return 0; - if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 85bb268be39c..7eefe7abf44b 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -164,7 +164,6 @@ extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; @@ -192,17 +191,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_cpumask(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int lockup_detector_suspend(void); -extern void lockup_detector_resume(void); -#else -static inline int lockup_detector_suspend(void) -{ - return 0; -} - -static inline void lockup_detector_resume(void) -{ -} #endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f23e373aa3bf..b2d46757917e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -97,19 +97,6 @@ unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); * unregistered/stopped, so it is an indicator whether the threads exist. */ static int __read_mostly watchdog_running; -/* - * If a subsystem has a need to deactivate the watchdog temporarily, it - * can use the suspend/resume interface to achieve this. The content of - * the 'watchdog_suspended' variable reflects this state. Existing threads - * are parked/unparked by the lockup_detector_{suspend|resume} functions - * (see comment blocks pertaining to those functions for further details). - * - * 'watchdog_suspended' also prevents threads from being registered/started - * or unregistered/stopped via parameters in /proc/sys/kernel, so the state - * of 'watchdog_running' cannot change while the watchdog is deactivated - * temporarily (see related code in 'proc' handlers). - */ -int __read_mostly watchdog_suspended; /* * These functions can be overridden if an architecture implements its @@ -136,7 +123,6 @@ void __weak watchdog_nmi_disable(unsigned int cpu) * - watchdog_cpumask * - sysctl_hardlockup_all_cpu_backtrace * - hardlockup_panic - * - watchdog_suspended */ void __weak watchdog_nmi_reconfigure(void) { @@ -672,61 +658,6 @@ void lockup_detector_soft_poweroff(void) watchdog_enabled = 0; } -/* - * Suspend the hard and soft lockup detector by parking the watchdog threads. - */ -int lockup_detector_suspend(void) -{ - int ret = 0; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - /* - * Multiple suspend requests can be active in parallel (counted by - * the 'watchdog_suspended' variable). If the watchdog threads are - * running, the first caller takes care that they will be parked. - * The state of 'watchdog_running' cannot change while a suspend - * request is active (see related code in 'proc' handlers). - */ - if (watchdog_running && !watchdog_suspended) - ret = watchdog_park_threads(); - - if (ret == 0) - watchdog_suspended++; - else { - watchdog_disable_all_cpus(); - pr_err("Failed to suspend lockup detectors, disabled\n"); - watchdog_enabled = 0; - } - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - - return ret; -} - -/* - * Resume the hard and soft lockup detector by unparking the watchdog threads. - */ -void lockup_detector_resume(void) -{ - mutex_lock(&watchdog_proc_mutex); - - watchdog_suspended--; - /* - * The watchdog threads are unparked if they were previously running - * and if there is no more active suspend request. - */ - if (watchdog_running && !watchdog_suspended) - watchdog_unpark_threads(); - - watchdog_nmi_reconfigure(); - - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); -} - #ifdef CONFIG_SYSCTL /* @@ -775,12 +706,6 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - /* * If the parameter is being read return the state of the corresponding * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the @@ -872,12 +797,6 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - old = ACCESS_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -917,12 +836,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, get_online_cpus(); mutex_lock(&watchdog_proc_mutex); - if (watchdog_suspended) { - /* no parameter changes allowed while watchdog is suspended */ - err = -EAGAIN; - goto out; - } - err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) { /* Remove impossible cpus to keep sysctl output cleaner. */ @@ -941,7 +854,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, watchdog_nmi_reconfigure(); } -out: + mutex_unlock(&watchdog_proc_mutex); put_online_cpus(); return err; -- cgit v1.2.3-70-g09d2 From b7a349819d4b9b5db64e523351e66a79a758eaa5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:00 +0200 Subject: watchdog/core: Rework CPU hotplug locking The watchdog proc interface causes extensive recursive locking of the CPU hotplug percpu rwsem, which is deadlock prone. Replace the get/put_online_cpus() pairs with cpu_hotplug_disable()/enable() calls for now. Later patches will remove that requirement completely. Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.568079057@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b2d46757917e..cd79f644ea34 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -703,7 +703,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int err, old, new; int *watchdog_param = (int *)table->data; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); /* @@ -752,7 +752,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } @@ -794,7 +794,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, { int err, old, new; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); old = ACCESS_ONCE(watchdog_thresh); @@ -818,7 +818,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, } out: mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } @@ -833,7 +833,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, { int err; - get_online_cpus(); + cpu_hotplug_disable(); mutex_lock(&watchdog_proc_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); @@ -856,7 +856,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, } mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); + cpu_hotplug_enable(); return err; } -- cgit v1.2.3-70-g09d2 From 946d197794b23202b8b46c43016747c72fe23393 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:01 +0200 Subject: watchdog/core: Rename watchdog_proc_mutex Following patches will use the mutex for other purposes as well. Rename it as it is not longer a proc specific thing. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.647714850@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cd79f644ea34..7c3a0a76b41b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,8 +29,7 @@ #include #include -/* Watchdog configuration */ -static DEFINE_MUTEX(watchdog_proc_mutex); +static DEFINE_MUTEX(watchdog_mutex); int __read_mostly nmi_watchdog_enabled; @@ -704,7 +703,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, int *watchdog_param = (int *)table->data; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); /* * If the parameter is being read return the state of the corresponding @@ -751,7 +750,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, err = proc_watchdog_update(); } out: - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } @@ -795,7 +794,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, int err, old, new; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); old = ACCESS_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -817,7 +816,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, set_sample_period(); } out: - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } @@ -834,7 +833,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, int err; cpu_hotplug_disable(); - mutex_lock(&watchdog_proc_mutex); + mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) { @@ -855,7 +854,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, watchdog_nmi_reconfigure(); } - mutex_unlock(&watchdog_proc_mutex); + mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } -- cgit v1.2.3-70-g09d2 From 7a3558200739e1378800a7a6d7f63c031115f7a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:02 +0200 Subject: watchdog/core: Mark hardlockup_detector_disable() __init The function is only used by the KVM init code. Mark it __init to prevent creative abuse. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.727134632@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7c3a0a76b41b..1c185d9dd468 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -55,7 +55,7 @@ unsigned int __read_mostly hardlockup_panic = * kernel command line parameters are parsed, because otherwise it is not * possible to override this in hardlockup_panic_setup(). */ -void hardlockup_detector_disable(void) +void __init hardlockup_detector_disable(void) { watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; } -- cgit v1.2.3-70-g09d2 From 20d853fd0703b1d73c35a22024c0d4fcbcc57c8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:03 +0200 Subject: watchdog/hardlockup/perf: Remove broken self disable on failure The self disabling feature is broken vs. CPU hotplug locking: CPU 0 CPU 1 cpus_write_lock(); cpu_up(1) wait_for_completion() .... unpark_watchdog() ->unpark() perf_event_create() <- fails watchdog_enable &= ~NMI_WATCHDOG; .... cpus_write_unlock(); CPU 2 cpus_write_lock() cpu_down(2) wait_for_completion() wakeup(watchdog); watchdog() if (!(watchdog_enable & NMI_WATCHDOG)) watchdog_nmi_disable() perf_event_disable() .... cpus_read_lock(); stop_smpboot_threads() park_watchdog(); wait_for_completion(watchdog->parked); Result: End of hotplug and instantaneous full lockup of the machine. There is a similar problem with disabling the watchdog via the user space interface as the sysctl function fiddles with watchdog_enable directly. It's very debatable whether this is required at all. If the watchdog works nicely on N CPUs and it fails to enable on the N + 1 CPU either during hotplug or because the user space interface disabled it via sysctl cpumask and then some perf user grabbed the counter which is then unavailable for the watchdog when the sysctl cpumask gets changed back. There is no real justification for this. One of the reasons WHY this is done is the utter stupidity of the init code of the perf NMI watchdog. Instead of checking upfront at boot whether PERF is available and functional at all, it just does this check at run time over and over when user space fiddles with the sysctl. That's broken beyond repair along with the idiotic error code dependent warn level printks and the even more silly printk rate limiting. If the init code checks whether perf works at boot time, then this mess can be more or less avoided completely. Perf does not come magically into life at runtime. Brain usage while coding is overrated. Remove the cruft and add a temporary safe guard which gets removed later. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.806708429@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 15 --------------- kernel/watchdog_hld.c | 20 +++++++------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1c185d9dd468..af000956286c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -485,21 +485,6 @@ static void watchdog(unsigned int cpu) __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); - - /* - * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the - * failure path. Check for failures that can occur asynchronously - - * for example, when CPUs are on-lined - and shut down the hardware - * perf event on each CPU accordingly. - * - * The only non-obvious place this bit can be cleared is through - * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a - * pr_info here would be too noisy as it would result in a message - * every few seconds if the hardlockup was disabled but the softlockup - * enabled. - */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - watchdog_nmi_disable(cpu); } static struct smp_hotplug_thread watchdog_threads = { diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index c9586ebc2e98..7b602714ea53 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -23,6 +23,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static unsigned long hardlockup_allcpu_dumped; +static bool hardlockup_detector_disabled; void arch_touch_nmi_watchdog(void) { @@ -178,6 +179,10 @@ int watchdog_nmi_enable(unsigned int cpu) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) goto out; + /* A failure disabled the hardlockup detector permanently */ + if (hardlockup_detector_disabled) + return -ENODEV; + /* is it already setup and enabled? */ if (event && event->state > PERF_EVENT_STATE_OFF) goto out; @@ -206,18 +211,6 @@ int watchdog_nmi_enable(unsigned int cpu) goto out_save; } - /* - * Disable the hard lockup detector if _any_ CPU fails to set up - * set up the hardware perf event. The watchdog() function checks - * the NMI_WATCHDOG_ENABLED bit periodically. - * - * The barriers are for syncing up watchdog_enabled across all the - * cpus, as clear_bit() does not use barriers. - */ - smp_mb__before_atomic(); - clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); - smp_mb__after_atomic(); - /* skip displaying the same error again */ if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) return PTR_ERR(event); @@ -232,7 +225,8 @@ int watchdog_nmi_enable(unsigned int cpu) pr_err("disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); - pr_info("Shutting down hard lockup detector on all cpus\n"); + pr_info("Disabling hard lockup detector permanently\n"); + hardlockup_detector_disabled = true; return PTR_ERR(event); -- cgit v1.2.3-70-g09d2 From 941154bd6937a710ae9193a3c733c0029e5ae7b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:04 +0200 Subject: watchdog/hardlockup/perf: Prevent CPU hotplug deadlock The following deadlock is possible in the watchdog hotplug code: cpus_write_lock() ... takedown_cpu() smpboot_park_threads() smpboot_park_thread() kthread_park() ->park() := watchdog_disable() watchdog_nmi_disable() perf_event_release_kernel(); put_event() _free_event() ->destroy() := hw_perf_event_destroy() x86_release_hardware() release_ds_buffers() get_online_cpus() when a per cpu watchdog perf event is destroyed which drops the last reference to the PMU hardware. The cleanup code there invokes get_online_cpus() which instantly deadlocks because the hotplug percpu rwsem is write locked. To solve this add a deferring mechanism: cpus_write_lock() kthread_park() watchdog_nmi_disable(deferred) perf_event_disable(event); move_event_to_deferred(event); .... cpus_write_unlock() cleaup_deferred_events() perf_event_release_kernel() This is still properly serialized against concurrent hotplug via the cpu_add_remove_lock, which is held by the task which initiated the hotplug event. This is also used to handle event destruction when the watchdog threads are parked via other mechanisms than CPU hotplug. Analyzed-by: Peter Zijlstra Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.884469246@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 ++++++ kernel/cpu.c | 6 ++++++ kernel/watchdog.c | 25 +++++++++++++++++++++++++ kernel/watchdog_hld.c | 34 ++++++++++++++++++++++++++++------ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 7eefe7abf44b..80354e6fa86d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,9 +13,11 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); +void lockup_detector_cleanup(void); #else static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } +static inline void lockup_detector_cleanup(void) { } #endif #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -77,9 +79,13 @@ static inline void hardlockup_detector_disable(void) {} extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); +extern void hardlockup_detector_perf_disable(void); +extern void hardlockup_detector_perf_cleanup(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } +static inline void hardlockup_detector_perf_disable(void) { } +static inline void hardlockup_detector_perf_cleanup(void) { } #if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index acf5308fad51..a96b348591df 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -734,6 +735,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, out: cpus_write_unlock(); + /* + * Do post unplug cleanup. This is still protected against + * concurrent CPU hotplug via cpu_add_remove_lock. + */ + lockup_detector_cleanup(); return ret; } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index af000956286c..dd1fd59683c5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -109,8 +109,10 @@ int __weak watchdog_nmi_enable(unsigned int cpu) { return 0; } + void __weak watchdog_nmi_disable(unsigned int cpu) { + hardlockup_detector_perf_disable(); } /* @@ -193,6 +195,8 @@ __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); #endif #endif +static void __lockup_detector_cleanup(void); + /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- * lockups can have false positives under extreme conditions. So we generally @@ -631,6 +635,24 @@ static void set_sample_period(void) } #endif /* SOFTLOCKUP */ +static void __lockup_detector_cleanup(void) +{ + lockdep_assert_held(&watchdog_mutex); + hardlockup_detector_perf_cleanup(); +} + +/** + * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes + * + * Caller must not hold the cpu hotplug rwsem. + */ +void lockup_detector_cleanup(void) +{ + mutex_lock(&watchdog_mutex); + __lockup_detector_cleanup(); + mutex_unlock(&watchdog_mutex); +} + /** * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) * @@ -665,6 +687,8 @@ static int proc_watchdog_update(void) watchdog_nmi_reconfigure(); + __lockup_detector_cleanup(); + return err; } @@ -837,6 +861,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, } watchdog_nmi_reconfigure(); + __lockup_detector_cleanup(); } mutex_unlock(&watchdog_mutex); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 7b602714ea53..94111ccb09b5 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -21,6 +21,8 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); +static DEFINE_PER_CPU(struct perf_event *, dead_event); +static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; static bool hardlockup_detector_disabled; @@ -239,16 +241,18 @@ out: return 0; } -void watchdog_nmi_disable(unsigned int cpu) +/** + * hardlockup_detector_perf_disable - Disable the local event + */ +void hardlockup_detector_perf_disable(void) { - struct perf_event *event = per_cpu(watchdog_ev, cpu); + struct perf_event *event = this_cpu_read(watchdog_ev); if (event) { perf_event_disable(event); - per_cpu(watchdog_ev, cpu) = NULL; - - /* should be in cleanup, but blocks oprofile */ - perf_event_release_kernel(event); + this_cpu_write(watchdog_ev, NULL); + this_cpu_write(dead_event, event); + cpumask_set_cpu(smp_processor_id(), &dead_events_mask); /* watchdog_nmi_enable() expects this to be zero initially. */ if (atomic_dec_and_test(&watchdog_cpus)) @@ -256,6 +260,24 @@ void watchdog_nmi_disable(unsigned int cpu) } } +/** + * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them + * + * Called from lockup_detector_cleanup(). Serialized by the caller. + */ +void hardlockup_detector_perf_cleanup(void) +{ + int cpu; + + for_each_cpu(cpu, &dead_events_mask) { + struct perf_event *event = per_cpu(dead_event, cpu); + + per_cpu(dead_event, cpu) = NULL; + perf_event_release_kernel(event); + } + cpumask_clear(&dead_events_mask); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events * -- cgit v1.2.3-70-g09d2 From 01f0a02701cbcf32d22cfc9d1ab9a3f0ff2ba68c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:05 +0200 Subject: watchdog/core: Remove the park_in_progress obfuscation Commit: b94f51183b06 ("kernel/watchdog: prevent false hardlockup on overloaded system") tries to fix the following issue: proc_write() set_sample_period() <--- New sample period becoms visible <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() thread->park() disable_nmi() <----- Broken ends The reason why this is broken is that the update of the watchdog threshold becomes immediately effective and visible for the hrtimer function which uses that value to rearm the timer. But the NMI/perf side still uses the old value up to the point where it is disabled. If the rate has been lowered then the NMI can run fast enough to 'detect' a hard lockup because the timer has not fired due to the longer period. The patch 'fixed' this by adding a variable: proc_write() set_sample_period() <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() park_in_progress = 1 <----- Broken ends nmi_watchdog() if (park_in_progress) return; The only effect of this variable was to make the window where the breakage can hit small enough that it was not longer observable in testing. From a correctness point of view it is a pointless bandaid which merily papers over the root cause: the unsychronized update of the variable. Looking deeper into the related code pathes unearthed similar problems in the watchdog_start()/stop() functions. watchdog_start() perf_nmi_event_start() hrtimer_start() watchdog_stop() hrtimer_cancel() perf_nmi_event_stop() In both cases the call order is wrong because if the tasks gets preempted or the VM gets scheduled out long enough after the first call, then there is a chance that the next NMI will see a stale hrtimer interrupt count and trigger a false positive hard lockup splat. Get rid of park_in_progress so the code can be gradually deobfuscated and pruned from several layers of duct tape papering over the root cause, which has been either ignored or not understood at all. Once this is removed the underlying problem will be fixed by rewriting the proc interface to do a proper synchronized update. Address the start/stop() ordering problem as well by reverting the call order, so this part is at least correct now. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1709052038270.2393@nanos Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 1 - kernel/watchdog.c | 37 +++++++++++++++++-------------------- kernel/watchdog_hld.c | 7 ++----- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 80354e6fa86d..91a3a4a4c8ae 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -27,7 +27,6 @@ extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; extern int soft_watchdog_enabled; -extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index dd1fd59683c5..c290135fb415 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -136,8 +136,6 @@ void __weak watchdog_nmi_reconfigure(void) #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); - static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -322,8 +320,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) int duration; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; - if (!watchdog_enabled || - atomic_read(&watchdog_park_in_progress) != 0) + if (!watchdog_enabled) return HRTIMER_NORESTART; /* kick the hardlockup detector */ @@ -437,32 +434,37 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) static void watchdog_enable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); - /* kick off the timer for the hardlockup detector */ + /* + * Start the timer first to prevent the NMI watchdog triggering + * before the timer has a chance to fire. + */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ns_to_ktime(sample_period), + HRTIMER_MODE_REL_PINNED); + /* Initialize timestamp */ + __touch_watchdog(); /* Enable the perf event */ watchdog_nmi_enable(cpu); - /* done here because hrtimer_start can only pin to smp_processor_id() */ - hrtimer_start(hrtimer, ns_to_ktime(sample_period), - HRTIMER_MODE_REL_PINNED); - - /* initialize timestamp */ watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); - __touch_watchdog(); } static void watchdog_disable(unsigned int cpu) { - struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); watchdog_set_prio(SCHED_NORMAL, 0); - hrtimer_cancel(hrtimer); - /* disable the perf event */ + /* + * Disable the perf event first. That prevents that a large delay + * between disabling the timer and disabling the perf event causes + * the perf NMI to detect a false positive. + */ watchdog_nmi_disable(cpu); + hrtimer_cancel(hrtimer); } static void watchdog_cleanup(unsigned int cpu, bool online) @@ -518,16 +520,11 @@ static int watchdog_park_threads(void) { int cpu, ret = 0; - atomic_set(&watchdog_park_in_progress, 1); - for_each_watchdog_cpu(cpu) { ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); if (ret) break; } - - atomic_set(&watchdog_park_in_progress, 0); - return ret; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 94111ccb09b5..0aa191ee3d51 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -106,15 +106,12 @@ static struct perf_event_attr wd_hw_attr = { /* Callback function for perf event subsystem */ static void watchdog_overflow_callback(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) + struct perf_sample_data *data, + struct pt_regs *regs) { /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; - if (atomic_read(&watchdog_park_in_progress) != 0) - return; - if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; -- cgit v1.2.3-70-g09d2 From 2b9d7f233b835663cbc7b6b3f88dd20f61118d1e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:06 +0200 Subject: watchdog/core: Clean up stub functions Having stub functions which take a full page is not helping the readablility of code. Condense them and move the doubled #ifdef variant into the SYSFS section. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.045545271@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 68 ++++++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index c290135fb415..af37c040436c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -125,10 +125,7 @@ void __weak watchdog_nmi_disable(unsigned int cpu) * - sysctl_hardlockup_all_cpu_backtrace * - hardlockup_panic */ -void __weak watchdog_nmi_reconfigure(void) -{ -} - +void __weak watchdog_nmi_reconfigure(void) { } #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -136,6 +133,11 @@ void __weak watchdog_nmi_reconfigure(void) #define for_each_watchdog_cpu(cpu) \ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) +/* Global variables, exported for sysctl */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; +int __read_mostly soft_watchdog_enabled; + static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -149,13 +151,9 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static unsigned long soft_lockup_nmi_warn; -unsigned int __read_mostly softlockup_panic = - CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; - static int __init softlockup_panic_setup(char *str) { softlockup_panic = simple_strtoul(str, NULL, 0); - return 1; } __setup("softlockup_panic=", softlockup_panic_setup); @@ -593,44 +591,13 @@ static void watchdog_disable_all_cpus(void) } } -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) -{ - return smpboot_update_cpumask_percpu_thread( - &watchdog_threads, &watchdog_cpumask); -} -#endif - -#else /* SOFTLOCKUP */ -static int watchdog_park_threads(void) -{ - return 0; -} - -static void watchdog_unpark_threads(void) -{ -} - -static int watchdog_enable_all_cpus(void) -{ - return 0; -} - -static void watchdog_disable_all_cpus(void) -{ -} - -#ifdef CONFIG_SYSCTL -static int watchdog_update_cpus(void) -{ - return 0; -} -#endif - -static void set_sample_period(void) -{ -} -#endif /* SOFTLOCKUP */ +#else /* CONFIG_SOFTLOCKUP_DETECTOR */ +static inline int watchdog_park_threads(void) { return 0; } +static inline void watchdog_unpark_threads(void) { } +static inline int watchdog_enable_all_cpus(void) { return 0; } +static inline void watchdog_disable_all_cpus(void) { } +static inline void set_sample_period(void) { } +#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) { @@ -827,6 +794,15 @@ out: return err; } +static int watchdog_update_cpus(void) +{ + if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { + return smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_cpumask); + } + return 0; +} + /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the -- cgit v1.2.3-70-g09d2 From 368a7e2ce8ff0ddcdcb37eadb76530b033f6eb2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:07 +0200 Subject: watchdog/core: Clean up the #ifdef maze The #ifdef maze in this file is horrible, group stuff at least a bit so one can figure out what belongs to what. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.139629546@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index af37c040436c..a9bdfde73e4b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -41,7 +41,6 @@ unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif #ifdef CONFIG_HARDLOCKUP_DETECTOR -/* boot commands */ /* * Should we panic when a soft-lockup or hard-lockup occurs: */ @@ -74,19 +73,21 @@ static int __init hardlockup_panic_setup(char *str) } __setup("nmi_watchdog=", hardlockup_panic_setup); -#endif +# ifdef CONFIG_SMP +int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#ifdef CONFIG_SOFTLOCKUP_DETECTOR -int __read_mostly soft_watchdog_enabled; -#endif +static int __init hardlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); +# endif /* CONFIG_SMP */ +#endif /* CONFIG_HARDLOCKUP_DETECTOR */ int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_thresh = 10; -#ifdef CONFIG_SMP -int __read_mostly sysctl_softlockup_all_cpu_backtrace; -int __read_mostly sysctl_hardlockup_all_cpu_backtrace; -#endif struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -173,22 +174,14 @@ static int __init nosoftlockup_setup(char *str) __setup("nosoftlockup", nosoftlockup_setup); #ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; + static int __init softlockup_all_cpu_backtrace_setup(char *str) { - sysctl_softlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); + sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); -#ifdef CONFIG_HARDLOCKUP_DETECTOR -static int __init hardlockup_all_cpu_backtrace_setup(char *str) -{ - sysctl_hardlockup_all_cpu_backtrace = - !!simple_strtol(str, NULL, 0); - return 1; -} -__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); -#endif #endif static void __lockup_detector_cleanup(void); -- cgit v1.2.3-70-g09d2 From 05ba3de74a3f499dcaa37b186220aaf174c95a4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:08 +0200 Subject: watchdog/core: Split out cpumask write function Split the write part of the cpumask proc handler out into a separate helper to avoid deep indentation. This also reduces the patch complexity in the following cleanups. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.218075991@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a9bdfde73e4b..cedf45ab4d81 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -792,10 +792,29 @@ static int watchdog_update_cpus(void) if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { return smpboot_update_cpumask_percpu_thread(&watchdog_threads, &watchdog_cpumask); + __lockup_detector_cleanup(); } return 0; } +static void proc_watchdog_cpumask_update(void) +{ + /* Remove impossible cpus to keep sysctl output clean. */ + cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); + + if (watchdog_running) { + /* + * Failure would be due to being unable to allocate a + * temporary cpumask, so we are likely not in a position to + * do much else to make things better. + */ + if (watchdog_update_cpus() != 0) + pr_err("cpumask update failed\n"); + } + + watchdog_nmi_reconfigure(); +} + /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the @@ -811,30 +830,13 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); - if (!err && write) { - /* Remove impossible cpus to keep sysctl output cleaner. */ - cpumask_and(&watchdog_cpumask, &watchdog_cpumask, - cpu_possible_mask); - - if (watchdog_running) { - /* - * Failure would be due to being unable to allocate - * a temporary cpumask, so we are likely not in a - * position to do much else to make things better. - */ - if (watchdog_update_cpus() != 0) - pr_err("cpumask update failed\n"); - } - - watchdog_nmi_reconfigure(); - __lockup_detector_cleanup(); - } + if (!err && write) + proc_watchdog_cpumask_update(); mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } - #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) -- cgit v1.2.3-70-g09d2 From 0d85923c7a81719567311ba0eae8ecb2efd4c8a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:09 +0200 Subject: smpboot/threads, watchdog/core: Avoid runtime allocation smpboot_update_cpumask_threads_percpu() allocates a temporary cpumask at runtime. This is suboptimal because the call site needs more code size for proper error handling than a statically allocated temporary mask requires data size. Add static temporary cpumask. The function is globaly serialized, so no further protection required. Remove the half baken error handling in the watchdog code and get rid of the export as there are no in tree modular users of that function. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.297288838@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/smpboot.h | 4 ++-- kernel/smpboot.c | 22 +++++++--------------- kernel/watchdog.c | 21 +++++---------------- 3 files changed, 14 insertions(+), 33 deletions(-) diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index 12910cf19869..c149aa7bedf3 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) } void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *); +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *); #endif diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 1d71c051a951..ed7507b69b48 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -344,39 +344,31 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); * by the client, but only by calling this function. * This function can only be called on a registered smp_hotplug_thread. */ -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *new) +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *new) { struct cpumask *old = plug_thread->cpumask; - cpumask_var_t tmp; + static struct cpumask tmp; unsigned int cpu; - if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) - return -ENOMEM; - get_online_cpus(); mutex_lock(&smpboot_threads_lock); /* Park threads that were exclusively enabled on the old mask. */ - cpumask_andnot(tmp, old, new); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, old, new); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_park_thread(plug_thread, cpu); /* Unpark threads that are exclusively enabled on the new mask. */ - cpumask_andnot(tmp, new, old); - for_each_cpu_and(cpu, tmp, cpu_online_mask) + cpumask_andnot(&tmp, new, old); + for_each_cpu_and(cpu, &tmp, cpu_online_mask) smpboot_unpark_thread(plug_thread, cpu); cpumask_copy(old, new); mutex_unlock(&smpboot_threads_lock); put_online_cpus(); - - free_cpumask_var(tmp); - - return 0; } -EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread); static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cedf45ab4d81..8935a3a4c2fb 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -787,31 +787,20 @@ out: return err; } -static int watchdog_update_cpus(void) +static void watchdog_update_cpus(void) { - if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) { - return smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_cpumask); + if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR) && watchdog_running) { + smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_cpumask); __lockup_detector_cleanup(); } - return 0; } static void proc_watchdog_cpumask_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - - if (watchdog_running) { - /* - * Failure would be due to being unable to allocate a - * temporary cpumask, so we are likely not in a position to - * do much else to make things better. - */ - if (watchdog_update_cpus() != 0) - pr_err("cpumask update failed\n"); - } - + watchdog_update_cpus(); watchdog_nmi_reconfigure(); } -- cgit v1.2.3-70-g09d2 From 2eb2527f847d1bd8d8fb9db1e8139db5d6eddb36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:10 +0200 Subject: watchdog/core: Create new thread handling infrastructure The lockup detector reconfiguration tears down all watchdog threads when the watchdog is disabled and sets them up again when its enabled. That's a pointless exercise. The watchdog threads are not consuming an insane amount of resources, so it's enough to set them up at init time and keep them in parked position when the watchdog is disabled and unpark them when it is reenabled. The smpboot thread infrastructure takes care of keeping the force parked threads in place even across cpu hotplug. Another horrible mechanism are the open coded park/unpark loops which are used for reconfiguration of the watchdog. The smpboot infrastructure allows exactly the same via smpboot_update_cpumask_thread_percpu(), which is cpu hotplug safe. Using that instead of the open coded loops allows to get rid of the hotplug locking mess in the watchdog code. Implement a clean infrastructure which allows to replace the open coded nonsense. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.377182587@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8935a3a4c2fb..b35518375fb7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -139,6 +139,9 @@ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; int __read_mostly soft_watchdog_enabled; +struct cpumask watchdog_allowed_mask __read_mostly; +static bool softlockup_threads_initialized __read_mostly; + static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -584,12 +587,84 @@ static void watchdog_disable_all_cpus(void) } } +static void softlockup_update_smpboot_threads(void) +{ + lockdep_assert_held(&watchdog_mutex); + + if (!softlockup_threads_initialized) + return; + + smpboot_update_cpumask_percpu_thread(&watchdog_threads, + &watchdog_allowed_mask); + __lockup_detector_cleanup(); +} + +/* Temporarily park all watchdog threads */ +static void softlockup_park_all_threads(void) +{ + cpumask_clear(&watchdog_allowed_mask); + softlockup_update_smpboot_threads(); +} + +/* + * Park threads which are not longer enabled and unpark threads which have + * been newly enabled. + */ +static void softlockup_update_threads(void) +{ + cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); + softlockup_update_smpboot_threads(); +} + +static void softlockup_reconfigure_threads(bool enabled) +{ + softlockup_park_all_threads(); + set_sample_period(); + if (enabled) + softlockup_update_threads(); +} + +/* + * Create the watchdog thread infrastructure. + * + * The threads are not unparked as watchdog_allowed_mask is empty. When + * the threads are sucessfully initialized, take the proper locks and + * unpark the threads in the watchdog_cpumask if the watchdog is enabled. + */ +static __init void softlockup_init_threads(void) +{ + int ret; + + /* + * If sysctl is off and watchdog got disabled on the command line, + * nothing to do here. + */ + if (!IS_ENABLED(CONFIG_SYSCTL) && + !(watchdog_enabled && watchdog_thresh)) + return; + + ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, + &watchdog_allowed_mask); + if (ret) { + pr_err("Failed to initialize soft lockup detector threads\n"); + return; + } + + mutex_lock(&watchdog_mutex); + softlockup_threads_initialized = true; + softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); + mutex_unlock(&watchdog_mutex); +} + #else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } static inline void set_sample_period(void) { } +static inline void softlockup_init_threads(void) { } +static inline void softlockup_update_threads(void) { } +static inline void softlockup_reconfigure_threads(bool enabled) { } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) -- cgit v1.2.3-70-g09d2 From d57108d4f6791291e89d980e7f7a3566c32ab188 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:11 +0200 Subject: watchdog/core: Get rid of the thread teardown/setup dance The lockup detector reconfiguration tears down all watchdog threads when the watchdog is disabled and sets them up again when its enabled. That's a pointless exercise. The watchdog threads are not consuming an insane amount of resources, so it's enough to set them up at init time and keep them in parked position when the watchdog is disabled and unpark them when it is reenabled. The smpboot thread infrastructure takes care of keeping the force parked threads in place even across cpu hotplug. Aside of that the code implements the park/unpark facility of smp hotplug threads on its own, which is even more pointless. We have functionality in the smpboot thread code to do so. Use the new thread management functions and get rid of the unholy mess. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.470370113@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 190 ++++++------------------------------------------------ 1 file changed, 19 insertions(+), 171 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b35518375fb7..762d3ed82a08 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -91,13 +91,6 @@ int __read_mostly watchdog_thresh = 10; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -/* - * The 'watchdog_running' variable is set to 1 when the watchdog threads - * are registered/started and is set to 0 when the watchdog threads are - * unregistered/stopped, so it is an indicator whether the threads exist. - */ -static int __read_mostly watchdog_running; - /* * These functions can be overridden if an architecture implements its * own hardlockup detector. @@ -130,10 +123,6 @@ void __weak watchdog_nmi_reconfigure(void) { } #ifdef CONFIG_SOFTLOCKUP_DETECTOR -/* Helper for online, unparked cpus. */ -#define for_each_watchdog_cpu(cpu) \ - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) - /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -259,11 +248,15 @@ void touch_all_softlockup_watchdogs(void) int cpu; /* - * this is done lockless - * do we care if a 0 races with a timestamp? - * all it means is the softlock check starts one cycle later + * watchdog_mutex cannpt be taken here, as this might be called + * from (soft)interrupt context, so the access to + * watchdog_allowed_cpumask might race with a concurrent update. + * + * The watchdog time stamp can race against a concurrent real + * update as well, the only side effect might be a cycle delay for + * the softlockup check. */ - for_each_watchdog_cpu(cpu) + for_each_cpu(cpu, &watchdog_allowed_mask) per_cpu(watchdog_touch_ts, cpu) = 0; wq_watchdog_touch(-1); } @@ -303,9 +296,6 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } -static int watchdog_enable_all_cpus(void); -static void watchdog_disable_all_cpus(void); - /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -498,95 +488,6 @@ static struct smp_hotplug_thread watchdog_threads = { .unpark = watchdog_enable, }; -/* - * park all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function returns an error if kthread_park() of a watchdog thread - * fails. In this situation, the watchdog threads of some CPUs can already - * be parked and the watchdog threads of other CPUs can still be runnable. - * Callers are expected to handle this special condition as appropriate in - * their context. - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static int watchdog_park_threads(void) -{ - int cpu, ret = 0; - - for_each_watchdog_cpu(cpu) { - ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); - if (ret) - break; - } - return ret; -} - -/* - * unpark all watchdog threads that are specified in 'watchdog_cpumask' - * - * This function may only be called in a context that is protected against - * races with CPU hotplug - for example, via get_online_cpus(). - */ -static void watchdog_unpark_threads(void) -{ - int cpu; - - for_each_watchdog_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); -} - -static int update_watchdog_all_cpus(void) -{ - int ret; - - ret = watchdog_park_threads(); - if (ret) - return ret; - - watchdog_unpark_threads(); - - return 0; -} - -static int watchdog_enable_all_cpus(void) -{ - int err = 0; - - if (!watchdog_running) { - err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, - &watchdog_cpumask); - if (err) - pr_err("Failed to create watchdog threads, disabled\n"); - else - watchdog_running = 1; - } else { - /* - * Enable/disable the lockup detectors or - * change the sample period 'on the fly'. - */ - err = update_watchdog_all_cpus(); - - if (err) { - watchdog_disable_all_cpus(); - pr_err("Failed to update lockup detectors, disabled\n"); - } - } - - if (err) - watchdog_enabled = 0; - - return err; -} - -static void watchdog_disable_all_cpus(void) -{ - if (watchdog_running) { - watchdog_running = 0; - smpboot_unregister_percpu_thread(&watchdog_threads); - } -} - static void softlockup_update_smpboot_threads(void) { lockdep_assert_held(&watchdog_mutex); @@ -661,7 +562,6 @@ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } -static inline void set_sample_period(void) { } static inline void softlockup_init_threads(void) { } static inline void softlockup_update_threads(void) { } static inline void softlockup_reconfigure_threads(bool enabled) { } @@ -701,28 +601,10 @@ void lockup_detector_soft_poweroff(void) /* * Update the run state of the lockup detectors. */ -static int proc_watchdog_update(void) +static void proc_watchdog_update(void) { - int err = 0; - - /* - * Watchdog threads won't be started if they are already active. - * The 'watchdog_running' variable in watchdog_*_all_cpus() takes - * care of this. If those threads are already active, the sample - * period will be updated and the lockup detectors will be enabled - * or disabled 'on the fly'. - */ - if (watchdog_enabled && watchdog_thresh) - err = watchdog_enable_all_cpus(); - else - watchdog_disable_all_cpus(); - + softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); watchdog_nmi_reconfigure(); - - __lockup_detector_cleanup(); - - return err; - } /* @@ -778,17 +660,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, new = old & ~which; } while (cmpxchg(&watchdog_enabled, old, new) != old); - /* - * Update the run state of the lockup detectors. There is _no_ - * need to check the value returned by proc_watchdog_update() - * and to restore the previous value of 'watchdog_enabled' as - * both lockup detectors are disabled if proc_watchdog_update() - * returns an error. - */ - if (old == new) - goto out; - - err = proc_watchdog_update(); + if (old != new) + proc_watchdog_update(); } out: mutex_unlock(&watchdog_mutex); @@ -832,50 +705,28 @@ int proc_soft_watchdog(struct ctl_table *table, int write, int proc_watchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old, new; + int err, old; cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); - old = ACCESS_ONCE(watchdog_thresh); + old = READ_ONCE(watchdog_thresh); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (err || !write) - goto out; + if (!err && write && old != READ_ONCE(watchdog_thresh)) + proc_watchdog_update(); - /* - * Update the sample period. Restore on failure. - */ - new = ACCESS_ONCE(watchdog_thresh); - if (old == new) - goto out; - - set_sample_period(); - err = proc_watchdog_update(); - if (err) { - watchdog_thresh = old; - set_sample_period(); - } -out: mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; } -static void watchdog_update_cpus(void) -{ - if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR) && watchdog_running) { - smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_cpumask); - __lockup_detector_cleanup(); - } -} - static void proc_watchdog_cpumask_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - watchdog_update_cpus(); + + softlockup_update_threads(); watchdog_nmi_reconfigure(); } @@ -905,8 +756,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, void __init lockup_detector_init(void) { - set_sample_period(); - #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_enabled()) { pr_info("Disabling watchdog on nohz_full cores by default\n"); @@ -917,6 +766,5 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, cpu_possible_mask); #endif - if (watchdog_enabled) - watchdog_enable_all_cpus(); + softlockup_init_threads(); } -- cgit v1.2.3-70-g09d2 From e8b62b2dd14f8f2427856ba24cb7db922bda9bfd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:12 +0200 Subject: watchdog/core: Further simplify sysctl handling Use a single function to update sysctl changes. This is not a high frequency user space interface and it's root only. Preparatory patch to cleanup the sysctl variable handling. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.549114957@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 762d3ed82a08..ca8747221e87 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -507,11 +507,8 @@ static void softlockup_park_all_threads(void) softlockup_update_smpboot_threads(); } -/* - * Park threads which are not longer enabled and unpark threads which have - * been newly enabled. - */ -static void softlockup_update_threads(void) +/* Unpark enabled threads */ +static void softlockup_unpark_threads(void) { cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); softlockup_update_smpboot_threads(); @@ -522,7 +519,7 @@ static void softlockup_reconfigure_threads(bool enabled) softlockup_park_all_threads(); set_sample_period(); if (enabled) - softlockup_update_threads(); + softlockup_unpark_threads(); } /* @@ -563,7 +560,6 @@ static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } static inline void softlockup_init_threads(void) { } -static inline void softlockup_update_threads(void) { } static inline void softlockup_reconfigure_threads(bool enabled) { } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -598,11 +594,11 @@ void lockup_detector_soft_poweroff(void) #ifdef CONFIG_SYSCTL -/* - * Update the run state of the lockup detectors. - */ +/* Propagate any changes to the watchdog threads */ static void proc_watchdog_update(void) { + /* Remove impossible cpus to keep sysctl output clean. */ + cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); watchdog_nmi_reconfigure(); } @@ -721,15 +717,6 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, return err; } -static void proc_watchdog_cpumask_update(void) -{ - /* Remove impossible cpus to keep sysctl output clean. */ - cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - - softlockup_update_threads(); - watchdog_nmi_reconfigure(); -} - /* * The cpumask is the mask of possible cpus that the watchdog can run * on, not the mask of cpus it is actually running on. This allows the @@ -746,7 +733,7 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); if (!err && write) - proc_watchdog_cpumask_update(); + proc_watchdog_update(); mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); -- cgit v1.2.3-70-g09d2 From 3b371b5936e7777c819619c00ca60f196a8e13fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:13 +0200 Subject: watchdog/core: Clean up header mess Having the same #ifdef in various places does not make it more readable. Collect stuff into one place. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.627096864@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 60 ++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 91a3a4a4c8ae..cfebb3bc4eed 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,11 +14,29 @@ void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); +bool is_hardlockup(void); + +extern int watchdog_user_enabled; +extern int nmi_watchdog_enabled; +extern int soft_watchdog_enabled; +extern int watchdog_thresh; +extern unsigned long watchdog_enabled; + +extern struct cpumask watchdog_cpumask; +extern unsigned long *watchdog_cpumask_bits; +#ifdef CONFIG_SMP +extern int sysctl_softlockup_all_cpu_backtrace; +extern int sysctl_hardlockup_all_cpu_backtrace; #else +#define sysctl_softlockup_all_cpu_backtrace 0 +#define sysctl_hardlockup_all_cpu_backtrace 0 +#endif /* !CONFIG_SMP */ + +#else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } -#endif +#endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); @@ -26,28 +44,17 @@ extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern int soft_watchdog_enabled; #else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} +static inline void touch_softlockup_watchdog_sched(void) { } +static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) { } +static inline void touch_all_softlockup_watchdogs(void) { } #endif #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else -static inline void reset_hung_task_detector(void) -{ -} +static inline void reset_hung_task_detector(void) { } #endif /* @@ -92,7 +99,7 @@ static inline void arch_touch_nmi_watchdog(void) {} /** * touch_nmi_watchdog - restart NMI watchdog timeout. - * + * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. @@ -162,21 +169,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu) u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int nmi_watchdog_enabled; -extern int watchdog_user_enabled; -extern int watchdog_thresh; -extern unsigned long watchdog_enabled; -extern struct cpumask watchdog_cpumask; -extern unsigned long *watchdog_cpumask_bits; -#ifdef CONFIG_SMP -extern int sysctl_softlockup_all_cpu_backtrace; -extern int sysctl_hardlockup_all_cpu_backtrace; -#else -#define sysctl_softlockup_all_cpu_backtrace 0 -#define sysctl_hardlockup_all_cpu_backtrace 0 -#endif - #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); @@ -184,7 +176,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -196,7 +187,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_cpumask(struct ctl_table *, int, void __user *, size_t *, loff_t *); -#endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include -- cgit v1.2.3-70-g09d2 From 51d4052b01ca555e0d1d5fe297b309beb6c64aa0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:14 +0200 Subject: watchdog/sysctl: Get rid of the #ifdeffery The sysctl of the nmi_watchdog file prevents writes by setting: min = max = 0 if none of the users is enabled. That involves ifdeffery and is competely non obvious. If none of the facilities is enabeld, then the file can simply be made read only. Move the ifdeffery into the header and use a constant for file permissions. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.706073616@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 ++++++ kernel/sysctl.c | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index cfebb3bc4eed..5774b443dba1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -81,6 +81,12 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +# define NMI_WATCHDOG_SYSCTL_PERM 0644 +#else +# define NMI_WATCHDOG_SYSCTL_PERM 0444 +#endif + #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6648fbbb8157..539cb4e97bb8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -891,14 +891,10 @@ static struct ctl_table kern_table[] = { .procname = "nmi_watchdog", .data = &nmi_watchdog_enabled, .maxlen = sizeof (int), - .mode = 0644, + .mode = NMI_WATCHDOG_SYSCTL_PERM, .proc_handler = proc_nmi_watchdog, .extra1 = &zero, -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) .extra2 = &one, -#else - .extra2 = &zero, -#endif }, { .procname = "watchdog_cpumask", -- cgit v1.2.3-70-g09d2 From 7feeb9cd4f5b34476ffb9e6d58d58c5416375b19 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:15 +0200 Subject: watchdog/sysctl: Clean up sysctl variable name space Reflect that these variables are user interface related and remove the whitespace damage in the sysctl table while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.783210221@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 16 ++++++++-------- kernel/sysctl.c | 16 ++++++++-------- kernel/watchdog.c | 41 ++++++++++++++++++++--------------------- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 5774b443dba1..4a8d1037364e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -17,8 +17,8 @@ void lockup_detector_cleanup(void); bool is_hardlockup(void); extern int watchdog_user_enabled; -extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; +extern int nmi_watchdog_user_enabled; +extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; @@ -62,12 +62,12 @@ static inline void reset_hung_task_detector(void) { } * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * - * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' - * are variables that are only used as an 'interface' between the parameters - * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The - * 'watchdog_thresh' variable is handled differently because its value is not - * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' - * is equal zero. + * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and + * 'soft_watchdog_user_enabled' are variables that are only used as an + * 'interface' between the parameters in /proc/sys/kernel and the internal + * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is + * handled differently because its value is not boolean, and the lockup + * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 539cb4e97bb8..4c08ed4a379e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -871,9 +871,9 @@ static struct ctl_table kern_table[] = { #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", - .data = &watchdog_user_enabled, - .maxlen = sizeof (int), - .mode = 0644, + .data = &watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_watchdog, .extra1 = &zero, .extra2 = &one, @@ -889,8 +889,8 @@ static struct ctl_table kern_table[] = { }, { .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, - .maxlen = sizeof (int), + .data = &nmi_watchdog_user_enabled, + .maxlen = sizeof(int), .mode = NMI_WATCHDOG_SYSCTL_PERM, .proc_handler = proc_nmi_watchdog, .extra1 = &zero, @@ -906,9 +906,9 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_SOFTLOCKUP_DETECTOR { .procname = "soft_watchdog", - .data = &soft_watchdog_enabled, - .maxlen = sizeof (int), - .mode = 0644, + .data = &soft_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = proc_soft_watchdog, .extra1 = &zero, .extra2 = &one, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ca8747221e87..baae9fc95031 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,8 +31,6 @@ static DEFINE_MUTEX(watchdog_mutex); -int __read_mostly nmi_watchdog_enabled; - #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED; @@ -40,6 +38,17 @@ unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif +int __read_mostly nmi_watchdog_user_enabled; +int __read_mostly soft_watchdog_user_enabled; +int __read_mostly watchdog_user_enabled; +int __read_mostly watchdog_thresh = 10; + +struct cpumask watchdog_allowed_mask __read_mostly; +static bool softlockup_threads_initialized __read_mostly; + +struct cpumask watchdog_cpumask __read_mostly; +unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); + #ifdef CONFIG_HARDLOCKUP_DETECTOR /* * Should we panic when a soft-lockup or hard-lockup occurs: @@ -85,12 +94,6 @@ __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); # endif /* CONFIG_SMP */ #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -int __read_mostly watchdog_user_enabled; -int __read_mostly watchdog_thresh = 10; - -struct cpumask watchdog_cpumask __read_mostly; -unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); - /* * These functions can be overridden if an architecture implements its * own hardlockup detector. @@ -113,7 +116,7 @@ void __weak watchdog_nmi_disable(unsigned int cpu) * watchdog_nmi_reconfigure can be implemented to be notified after any * watchdog configuration change. The arch hardlockup watchdog should * respond to the following variables: - * - nmi_watchdog_enabled + * - watchdog_enabled * - watchdog_thresh * - watchdog_cpumask * - sysctl_hardlockup_all_cpu_backtrace @@ -126,10 +129,6 @@ void __weak watchdog_nmi_reconfigure(void) { } /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; -int __read_mostly soft_watchdog_enabled; - -struct cpumask watchdog_allowed_mask __read_mostly; -static bool softlockup_threads_initialized __read_mostly; static u64 __read_mostly sample_period; @@ -606,14 +605,14 @@ static void proc_watchdog_update(void) /* * common function for watchdog, nmi_watchdog and soft_watchdog parameter * - * caller | table->data points to | 'which' contains the flag(s) - * -------------------|-----------------------|----------------------------- - * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed - * | | with SOFT_WATCHDOG_ENABLED - * -------------------|-----------------------|----------------------------- - * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED - * -------------------|-----------------------|----------------------------- - * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED + * caller | table->data points to | 'which' + * -------------------|----------------------------|-------------------------- + * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED | + * | | SOFT_WATCHDOG_ENABLED + * -------------------|----------------------------|-------------------------- + * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED + * -------------------|----------------------------|-------------------------- + * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED */ static int proc_watchdog_common(int which, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- cgit v1.2.3-70-g09d2 From 6592ad2fcc8f15b4f99b36c1db7d9f65510c203b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:16 +0200 Subject: watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage Both the perf reconfiguration and the powerpc watchdog_nmi_reconfigure() need to be done in two steps. 1) Stop all NMIs 2) Read the new parameters and start NMIs Right now watchdog_nmi_reconfigure() is a combination of both. To allow a clean reconfiguration add a 'run' argument and split the functionality in powerpc. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170912194147.862865570@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 17 +++++++++-------- include/linux/nmi.h | 2 ++ kernel/watchdog.c | 31 ++++++++++++++++++++++--------- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 5ded171f02d6..291af79a9826 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -355,17 +355,18 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_reconfigure(void) +void watchdog_nmi_reconfigure(bool run) { int cpu; - watchdog_calc_timeouts(); - - for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); - - for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + if (!run) { + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + } else { + watchdog_calc_timeouts(); + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); + } } /* diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4a8d1037364e..eee255bc0fd6 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -103,6 +103,8 @@ static inline void arch_touch_nmi_watchdog(void) {} #endif #endif +void watchdog_nmi_reconfigure(bool run); + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * diff --git a/kernel/watchdog.c b/kernel/watchdog.c index baae9fc95031..5693afd2b8ea 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -112,17 +112,25 @@ void __weak watchdog_nmi_disable(unsigned int cpu) hardlockup_detector_perf_disable(); } -/* - * watchdog_nmi_reconfigure can be implemented to be notified after any - * watchdog configuration change. The arch hardlockup watchdog should - * respond to the following variables: +/** + * watchdog_nmi_reconfigure - Optional function to reconfigure NMI watchdogs + * @run: If false stop the watchdogs on all enabled CPUs + * If true start the watchdogs on all enabled CPUs + * + * The core call order is: + * watchdog_nmi_reconfigure(false); + * update_variables(); + * watchdog_nmi_reconfigure(true); + * + * The second call which starts the watchdogs again guarantees that the + * following variables are stable across the call. * - watchdog_enabled * - watchdog_thresh * - watchdog_cpumask - * - sysctl_hardlockup_all_cpu_backtrace - * - hardlockup_panic + * + * After the call the variables can be changed again. */ -void __weak watchdog_nmi_reconfigure(void) { } +void __weak watchdog_nmi_reconfigure(bool run) { } #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -515,10 +523,12 @@ static void softlockup_unpark_threads(void) static void softlockup_reconfigure_threads(bool enabled) { + watchdog_nmi_reconfigure(false); softlockup_park_all_threads(); set_sample_period(); if (enabled) softlockup_unpark_threads(); + watchdog_nmi_reconfigure(true); } /* @@ -559,7 +569,11 @@ static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } static inline void softlockup_init_threads(void) { } -static inline void softlockup_reconfigure_threads(bool enabled) { } +static void softlockup_reconfigure_threads(bool enabled) +{ + watchdog_nmi_reconfigure(false); + watchdog_nmi_reconfigure(true); +} #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) @@ -599,7 +613,6 @@ static void proc_watchdog_update(void) /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); - watchdog_nmi_reconfigure(); } /* -- cgit v1.2.3-70-g09d2 From 091549858ed881e5f3054374af4f5b1cac681d50 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:17 +0200 Subject: watchdog/core: Get rid of the racy update loop Letting user space poke directly at variables which are used at run time is stupid and causes a lot of race conditions and other issues. Seperate the user variables and on change invoke the reconfiguration, which then stops the watchdogs, reevaluates the new user value and restarts the watchdogs with the new parameters. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.939985640@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 95 +++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5693afd2b8ea..84886319d7b0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -32,15 +32,17 @@ static DEFINE_MUTEX(watchdog_mutex); #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) -unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | - NMI_WATCHDOG_ENABLED; +# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) +# define NMI_WATCHDOG_DEFAULT 1 #else -unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; +# define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) +# define NMI_WATCHDOG_DEFAULT 0 #endif -int __read_mostly nmi_watchdog_user_enabled; -int __read_mostly soft_watchdog_user_enabled; -int __read_mostly watchdog_user_enabled; +unsigned long __read_mostly watchdog_enabled; +int __read_mostly watchdog_user_enabled = 1; +int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; +int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; struct cpumask watchdog_allowed_mask __read_mostly; @@ -65,7 +67,7 @@ unsigned int __read_mostly hardlockup_panic = */ void __init hardlockup_detector_disable(void) { - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 0; } static int __init hardlockup_panic_setup(char *str) @@ -75,9 +77,9 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 0; else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; + nmi_watchdog_user_enabled = 1; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -132,6 +134,23 @@ void __weak watchdog_nmi_disable(unsigned int cpu) */ void __weak watchdog_nmi_reconfigure(bool run) { } +/** + * lockup_detector_update_enable - Update the sysctl enable bit + * + * Caller needs to make sure that the NMI/perf watchdogs are off, so this + * can't race with watchdog_nmi_disable(). + */ +static void lockup_detector_update_enable(void) +{ + watchdog_enabled = 0; + if (!watchdog_user_enabled) + return; + if (nmi_watchdog_user_enabled) + watchdog_enabled |= NMI_WATCHDOG_ENABLED; + if (soft_watchdog_user_enabled) + watchdog_enabled |= SOFT_WATCHDOG_ENABLED; +} + #ifdef CONFIG_SOFTLOCKUP_DETECTOR /* Global variables, exported for sysctl */ @@ -160,14 +179,14 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); static int __init nosoftlockup_setup(char *str) { - watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; + soft_watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -521,12 +540,13 @@ static void softlockup_unpark_threads(void) softlockup_update_smpboot_threads(); } -static void softlockup_reconfigure_threads(bool enabled) +static void softlockup_reconfigure_threads(void) { watchdog_nmi_reconfigure(false); softlockup_park_all_threads(); set_sample_period(); - if (enabled) + lockup_detector_update_enable(); + if (watchdog_enabled && watchdog_thresh) softlockup_unpark_threads(); watchdog_nmi_reconfigure(true); } @@ -546,6 +566,8 @@ static __init void softlockup_init_threads(void) * If sysctl is off and watchdog got disabled on the command line, * nothing to do here. */ + lockup_detector_update_enable(); + if (!IS_ENABLED(CONFIG_SYSCTL) && !(watchdog_enabled && watchdog_thresh)) return; @@ -559,7 +581,7 @@ static __init void softlockup_init_threads(void) mutex_lock(&watchdog_mutex); softlockup_threads_initialized = true; - softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); + softlockup_reconfigure_threads(); mutex_unlock(&watchdog_mutex); } @@ -569,9 +591,10 @@ static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } static inline void softlockup_init_threads(void) { } -static void softlockup_reconfigure_threads(bool enabled) +static void softlockup_reconfigure_threads(void) { watchdog_nmi_reconfigure(false); + lockup_detector_update_enable(); watchdog_nmi_reconfigure(true); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -612,7 +635,7 @@ static void proc_watchdog_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh); + softlockup_reconfigure_threads(); } /* @@ -630,48 +653,24 @@ static void proc_watchdog_update(void) static int proc_watchdog_common(int which, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int err, old, new; - int *watchdog_param = (int *)table->data; + int err, old, *param = table->data; cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); - /* - * If the parameter is being read return the state of the corresponding - * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the - * run state of the lockup detectors. - */ if (!write) { - *watchdog_param = (watchdog_enabled & which) != 0; + /* + * On read synchronize the userspace interface. This is a + * racy snapshot. + */ + *param = (watchdog_enabled & which) != 0; err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); } else { + old = READ_ONCE(*param); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (err) - goto out; - - /* - * There is a race window between fetching the current value - * from 'watchdog_enabled' and storing the new value. During - * this race window, watchdog_nmi_enable() can sneak in and - * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. - * The 'cmpxchg' detects this race and the loop retries. - */ - do { - old = watchdog_enabled; - /* - * If the parameter value is not zero set the - * corresponding bit(s), else clear it(them). - */ - if (*watchdog_param) - new = old | which; - else - new = old & ~which; - } while (cmpxchg(&watchdog_enabled, old, new) != old); - - if (old != new) + if (!err && old != READ_ONCE(*param)) proc_watchdog_update(); } -out: mutex_unlock(&watchdog_mutex); cpu_hotplug_enable(); return err; -- cgit v1.2.3-70-g09d2 From 178b9f7a36d2c74a38274b66dd89f53611298a19 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:18 +0200 Subject: watchdog/hardlockup/perf: Implement init time perf validation The watchdog tries to create perf events even after it figured out that perf is not functional or the requested event is not supported. That's braindead as this can be done once at init time and if not supported the NMI watchdog can be turned off unconditonally. Implement the perf hardlockup detector functionality for that. This creates a new event create function, which will replace the unholy mess of the existing one in later patches. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.019090547@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 8 ++++++-- kernel/watchdog_hld.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index eee255bc0fd6..72c62a809e92 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -93,14 +93,18 @@ extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_cleanup(void); +extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } -#if !defined(CONFIG_HAVE_NMI_WATCHDOG) +# if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} -#endif +# else +static inline int hardlockup_detector_perf_init(void) { return 0; } +# endif #endif void watchdog_nmi_reconfigure(bool run); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 0aa191ee3d51..f7e752e6e9b4 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -238,6 +238,27 @@ out: return 0; } +static int hardlockup_detector_event_create(void) +{ + unsigned int cpu = smp_processor_id(); + struct perf_event_attr *wd_attr; + struct perf_event *evt; + + wd_attr = &wd_hw_attr; + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); + + /* Try to register using hardware perf events */ + evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, + watchdog_overflow_callback, NULL); + if (IS_ERR(evt)) { + pr_info("Perf event create on CPU %d failed with %ld\n", cpu, + PTR_ERR(evt)); + return PTR_ERR(evt); + } + this_cpu_write(watchdog_ev, evt); + return 0; +} + /** * hardlockup_detector_perf_disable - Disable the local event */ @@ -315,3 +336,19 @@ void __init hardlockup_detector_perf_restart(void) perf_event_enable(event); } } + +/** + * hardlockup_detector_perf_init - Probe whether NMI event is available at all + */ +int __init hardlockup_detector_perf_init(void) +{ + int ret = hardlockup_detector_event_create(); + + if (ret) { + pr_info("Perf NMI watchdog permanetely disabled\n"); + } else { + perf_event_release_kernel(this_cpu_read(watchdog_ev)); + this_cpu_write(watchdog_ev, NULL); + } + return ret; +} -- cgit v1.2.3-70-g09d2 From a994a3147e4c0c9c50a46e6cace7586254975e20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:19 +0200 Subject: watchdog/hardlockup/perf: Implement init time detection of perf Use the init time detection of the perf NMI watchdog to determine whether the perf NMI watchdog is functional. If not disable it permanentely. It won't come back magically at runtime. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.099799541@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 84886319d7b0..fd8a998eb197 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -44,6 +44,7 @@ int __read_mostly watchdog_user_enabled = 1; int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; +int __read_mostly nmi_watchdog_available; struct cpumask watchdog_allowed_mask __read_mostly; static bool softlockup_threads_initialized __read_mostly; @@ -114,6 +115,12 @@ void __weak watchdog_nmi_disable(unsigned int cpu) hardlockup_detector_perf_disable(); } +/* Return 0, if a NMI watchdog is available. Error code otherwise */ +int __weak __init watchdog_nmi_probe(void) +{ + return hardlockup_detector_perf_init(); +} + /** * watchdog_nmi_reconfigure - Optional function to reconfigure NMI watchdogs * @run: If false stop the watchdogs on all enabled CPUs @@ -145,7 +152,7 @@ static void lockup_detector_update_enable(void) watchdog_enabled = 0; if (!watchdog_user_enabled) return; - if (nmi_watchdog_user_enabled) + if (nmi_watchdog_available && nmi_watchdog_user_enabled) watchdog_enabled |= NMI_WATCHDOG_ENABLED; if (soft_watchdog_user_enabled) watchdog_enabled |= SOFT_WATCHDOG_ENABLED; @@ -692,6 +699,8 @@ int proc_watchdog(struct ctl_table *table, int write, int proc_nmi_watchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + if (!nmi_watchdog_available && write) + return -ENOTSUPP; return proc_watchdog_common(NMI_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); } @@ -764,5 +773,7 @@ void __init lockup_detector_init(void) cpumask_copy(&watchdog_cpumask, cpu_possible_mask); #endif + if (!watchdog_nmi_probe()) + nmi_watchdog_available = true; softlockup_init_threads(); } -- cgit v1.2.3-70-g09d2 From 2a1b8ee4f5665b4291e43e4a25d964c3eb2f4c32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:20 +0200 Subject: watchdog/hardlockup/perf: Implement CPU enable replacement watchdog_nmi_enable() is an unparseable mess, Provide a clean perf specific implementation, which will be used when the existing setup/teardown mess is replaced. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.180215498@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 2 ++ kernel/watchdog_hld.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 72c62a809e92..89ba8b23c6fe 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -92,12 +92,14 @@ extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); +extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } +static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index f7e752e6e9b4..99a3f22e48cc 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -259,6 +259,17 @@ static int hardlockup_detector_event_create(void) return 0; } +/** + * hardlockup_detector_perf_enable - Enable the local event + */ +void hardlockup_detector_perf_enable(void) +{ + if (hardlockup_detector_event_create()) + return; + + perf_event_enable(this_cpu_read(watchdog_ev)); +} + /** * hardlockup_detector_perf_disable - Disable the local event */ -- cgit v1.2.3-70-g09d2 From 146c9d0e9dfdb62ed6afd43cc263efafbbfd1dcf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:21 +0200 Subject: watchdog/hardlockup/perf: Use new perf CPU enable mechanism Get rid of the hodgepodge which tries to be smart about perf being unavailable and error printout rate limiting. That's all not required simply because this is never invoked when the perf NMI watchdog is not functional. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.259651788@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog.c | 4 ++- kernel/watchdog_hld.c | 88 +++------------------------------------------------ 2 files changed, 8 insertions(+), 84 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index fd8a998eb197..5eb11960e4a2 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -107,6 +107,7 @@ __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); */ int __weak watchdog_nmi_enable(unsigned int cpu) { + hardlockup_detector_perf_enable(); return 0; } @@ -465,7 +466,8 @@ static void watchdog_enable(unsigned int cpu) /* Initialize timestamp */ __touch_watchdog(); /* Enable the perf event */ - watchdog_nmi_enable(cpu); + if (watchdog_enabled & NMI_WATCHDOG_ENABLED) + watchdog_nmi_enable(cpu); watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); } diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 99a3f22e48cc..509bb6b59c41 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -25,7 +25,7 @@ static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; -static bool hardlockup_detector_disabled; +static unsigned int watchdog_cpus; void arch_touch_nmi_watchdog(void) { @@ -160,84 +160,6 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } -/* - * People like the simple clean cpu node info on boot. - * Reduce the watchdog noise by only printing messages - * that are different from what cpu0 displayed. - */ -static unsigned long firstcpu_err; -static atomic_t watchdog_cpus; - -int watchdog_nmi_enable(unsigned int cpu) -{ - struct perf_event_attr *wd_attr; - struct perf_event *event = per_cpu(watchdog_ev, cpu); - int firstcpu = 0; - - /* nothing to do if the hard lockup detector is disabled */ - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - goto out; - - /* A failure disabled the hardlockup detector permanently */ - if (hardlockup_detector_disabled) - return -ENODEV; - - /* is it already setup and enabled? */ - if (event && event->state > PERF_EVENT_STATE_OFF) - goto out; - - /* it is setup but not enabled */ - if (event != NULL) - goto out_enable; - - if (atomic_inc_return(&watchdog_cpus) == 1) - firstcpu = 1; - - wd_attr = &wd_hw_attr; - wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); - - /* Try to register using hardware perf events */ - event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); - - /* save the first cpu's error for future comparision */ - if (firstcpu && IS_ERR(event)) - firstcpu_err = PTR_ERR(event); - - if (!IS_ERR(event)) { - /* only print for the first cpu initialized */ - if (firstcpu || firstcpu_err) - pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); - goto out_save; - } - - /* skip displaying the same error again */ - if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) - return PTR_ERR(event); - - /* vary the KERN level based on the returned errno */ - if (PTR_ERR(event) == -EOPNOTSUPP) - pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); - else if (PTR_ERR(event) == -ENOENT) - pr_warn("disabled (cpu%i): hardware events not enabled\n", - cpu); - else - pr_err("disabled (cpu%i): unable to create perf event: %ld\n", - cpu, PTR_ERR(event)); - - pr_info("Disabling hard lockup detector permanently\n"); - hardlockup_detector_disabled = true; - - return PTR_ERR(event); - - /* success path */ -out_save: - per_cpu(watchdog_ev, cpu) = event; -out_enable: - perf_event_enable(per_cpu(watchdog_ev, cpu)); -out: - return 0; -} - static int hardlockup_detector_event_create(void) { unsigned int cpu = smp_processor_id(); @@ -267,6 +189,9 @@ void hardlockup_detector_perf_enable(void) if (hardlockup_detector_event_create()) return; + if (!watchdog_cpus++) + pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); + perf_event_enable(this_cpu_read(watchdog_ev)); } @@ -282,10 +207,7 @@ void hardlockup_detector_perf_disable(void) this_cpu_write(watchdog_ev, NULL); this_cpu_write(dead_event, event); cpumask_set_cpu(smp_processor_id(), &dead_events_mask); - - /* watchdog_nmi_enable() expects this to be zero initially. */ - if (atomic_dec_and_test(&watchdog_cpus)) - firstcpu_err = 0; + watchdog_cpus--; } } -- cgit v1.2.3-70-g09d2 From a33d44843d4574ec05bec39527d8a87b7af2072c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:22 +0200 Subject: watchdog/hardlockup/perf: Simplify deferred event destroy Now that all functionality is properly serialized against CPU hotplug, remove the extra per cpu storage which holds the disabled events for cleanup. The core makes sure that cleanup happens before new events are created. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.340708074@linutronix.de Signed-off-by: Ingo Molnar --- kernel/watchdog_hld.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 509bb6b59c41..b2931154b5f2 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -21,7 +21,6 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -static DEFINE_PER_CPU(struct perf_event *, dead_event); static struct cpumask dead_events_mask; static unsigned long hardlockup_allcpu_dumped; @@ -204,8 +203,6 @@ void hardlockup_detector_perf_disable(void) if (event) { perf_event_disable(event); - this_cpu_write(watchdog_ev, NULL); - this_cpu_write(dead_event, event); cpumask_set_cpu(smp_processor_id(), &dead_events_mask); watchdog_cpus--; } @@ -221,9 +218,9 @@ void hardlockup_detector_perf_cleanup(void) int cpu; for_each_cpu(cpu, &dead_events_mask) { - struct perf_event *event = per_cpu(dead_event, cpu); + struct perf_event *event = per_cpu(watchdog_ev, cpu); - per_cpu(dead_event, cpu) = NULL; + per_cpu(watchdog_ev, cpu) = NULL; perf_event_release_kernel(event); } cpumask_clear(&dead_events_mask); -- cgit v1.2.3-70-g09d2 From ab5fe3ff38ff9653490910cc71dbbedc95a86e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:23 +0200 Subject: watchdog/hardlockup: Clean up hotplug locking mess All watchdog thread related functions are delegated to the smpboot thread infrastructure, which handles serialization against CPU hotplug correctly. The sysctl interface is completely decoupled from anything which requires CPU hotplug protection. No need to protect the sysctl writes against cpu hotplug anymore. Remove it and add the now required protection to the powerpc arch_nmi_watchdog implementation. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170912194148.418497420@linutronix.de Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/watchdog.c | 2 ++ kernel/watchdog.c | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 291af79a9826..dfb067764480 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -359,6 +359,7 @@ void watchdog_nmi_reconfigure(bool run) { int cpu; + cpus_read_lock(); if (!run) { for_each_cpu(cpu, &wd_cpus_enabled) stop_wd_on_cpu(cpu); @@ -367,6 +368,7 @@ void watchdog_nmi_reconfigure(bool run) for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) start_wd_on_cpu(cpu); } + cpus_read_unlock(); } /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5eb11960e4a2..f6ef163b72cd 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -664,7 +664,6 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, { int err, old, *param = table->data; - cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); if (!write) { @@ -681,7 +680,6 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, proc_watchdog_update(); } mutex_unlock(&watchdog_mutex); - cpu_hotplug_enable(); return err; } @@ -725,7 +723,6 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, { int err, old; - cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); old = READ_ONCE(watchdog_thresh); @@ -735,7 +732,6 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, proc_watchdog_update(); mutex_unlock(&watchdog_mutex); - cpu_hotplug_enable(); return err; } @@ -750,7 +746,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, { int err; - cpu_hotplug_disable(); mutex_lock(&watchdog_mutex); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); @@ -758,7 +753,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, proc_watchdog_update(); mutex_unlock(&watchdog_mutex); - cpu_hotplug_enable(); return err; } #endif /* CONFIG_SYSCTL */ -- cgit v1.2.3-70-g09d2 From 6354a06cbaa8c49d8377a6cee3e7db399c23601c Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 13 Sep 2017 09:38:40 +0200 Subject: Revert "arm64: dts: rockchip: Add basic cpu frequencies for RK3368" This reverts commit 6f2dea1f5fdb73eb2e050d9ebe990121d557e519. Without accurate cpu regulators being set for boards this will wreak havoc when cpufreq-dt begins to set new frequencies without adjusting the core frequency. Additionally the rk3368 has an unsolved issue in that it has two separate cpu clusters with separate clock lines but only one cpu supply regulator for both clusters, which causes even more problems. While it seems that originally only one cluster was supposed to be active at a time (big or little), talking with real users of the hardware revealed that having all 8 cores accessible at 1.2GHz max is way more liked than having 4 cores at 1.5GHz max. Such an approach needs changes to cpufreq and/or opp though to control the two separate clock lines when setting both clusters to the same frequencies. In any case, having the OPPs in the dts at this point in time is undesireable, so remove them again for now. Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368.dtsi | 72 +------------------------------- 1 file changed, 2 insertions(+), 70 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index e0518b4bc6c2..19fbaa5e7bdd 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -113,8 +113,7 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x0>; enable-method = "psci"; - clocks = <&cru ARMCLKL>; - operating-points-v2 = <&cluster0_opp>; + #cooling-cells = <2>; /* min followed by max */ }; @@ -123,8 +122,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x1>; enable-method = "psci"; - clocks = <&cru ARMCLKL>; - operating-points-v2 = <&cluster0_opp>; }; cpu_l2: cpu@2 { @@ -132,8 +129,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x2>; enable-method = "psci"; - clocks = <&cru ARMCLKL>; - operating-points-v2 = <&cluster0_opp>; }; cpu_l3: cpu@3 { @@ -141,8 +136,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x3>; enable-method = "psci"; - clocks = <&cru ARMCLKL>; - operating-points-v2 = <&cluster0_opp>; }; cpu_b0: cpu@100 { @@ -150,8 +143,7 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x100>; enable-method = "psci"; - clocks = <&cru ARMCLKB>; - operating-points-v2 = <&cluster1_opp>; + #cooling-cells = <2>; /* min followed by max */ }; @@ -160,8 +152,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x101>; enable-method = "psci"; - clocks = <&cru ARMCLKB>; - operating-points-v2 = <&cluster1_opp>; }; cpu_b2: cpu@102 { @@ -169,8 +159,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x102>; enable-method = "psci"; - clocks = <&cru ARMCLKB>; - operating-points-v2 = <&cluster1_opp>; }; cpu_b3: cpu@103 { @@ -178,62 +166,6 @@ compatible = "arm,cortex-a53", "arm,armv8"; reg = <0x0 0x103>; enable-method = "psci"; - clocks = <&cru ARMCLKB>; - operating-points-v2 = <&cluster1_opp>; - }; - }; - - cluster0_opp: opp-table0 { - compatible = "operating-points-v2"; - opp-shared; - - opp00 { - opp-hz = /bits/ 64 <312000000>; - opp-microvolt = <950000>; - clock-latency-ns = <40000>; - }; - opp01 { - opp-hz = /bits/ 64 <408000000>; - opp-microvolt = <950000>; - }; - opp02 { - opp-hz = /bits/ 64 <600000000>; - opp-microvolt = <950000>; - }; - opp03 { - opp-hz = /bits/ 64 <816000000>; - opp-microvolt = <1025000>; - }; - opp04 { - opp-hz = /bits/ 64 <1008000000>; - opp-microvolt = <1125000>; - }; - }; - - cluster1_opp: opp-table1 { - compatible = "operating-points-v2"; - opp-shared; - - opp00 { - opp-hz = /bits/ 64 <312000000>; - opp-microvolt = <950000>; - clock-latency-ns = <40000>; - }; - opp01 { - opp-hz = /bits/ 64 <408000000>; - opp-microvolt = <950000>; - }; - opp02 { - opp-hz = /bits/ 64 <600000000>; - opp-microvolt = <950000>; - }; - opp03 { - opp-hz = /bits/ 64 <816000000>; - opp-microvolt = <975000>; - }; - opp04 { - opp-hz = /bits/ 64 <1008000000>; - opp-microvolt = <1050000>; }; }; -- cgit v1.2.3-70-g09d2 From e8620acc90785ecbde041d241a13778044df9208 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Fri, 1 Sep 2017 10:01:44 +0800 Subject: clk: rockchip: add pclk_pmu as critical clock on rk3128 pclk_pmu need always on, and no dts node to handle this clk, so make it as critical clock Signed-off-by: Elaine Zhang Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index 62d7854e4b87..f15c9b874911 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -541,7 +541,7 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { GATE(0, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS), GATE(0, "pclk_mipiphy", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "pclk_pmu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 2, GFLAGS), + GATE(0, "pclk_pmu", "pclk_pmu_pre", 0, RK2928_CLKGATE_CON(9), 2, GFLAGS), GATE(0, "pclk_pmu_niu", "pclk_pmu_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(9), 3, GFLAGS), /* PD_MMC */ @@ -577,6 +577,7 @@ static const char *const rk3128_critical_clocks[] __initconst = { "aclk_peri", "hclk_peri", "pclk_peri", + "pclk_pmu", }; static struct rockchip_clk_provider *__init rk3128_common_clk_init(struct device_node *np) -- cgit v1.2.3-70-g09d2 From a4eb286565eb07ee5acfe6a1409f68ad9f663845 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Fri, 1 Sep 2017 10:01:45 +0800 Subject: clk: rockchip: fix up rk3128 pvtm and mipi_24m gate regs error A copy-paste error made them use the wrong bits in the register. Signed-off-by: Elaine Zhang Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index f15c9b874911..ce02d2cff608 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -315,13 +315,13 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(10), 8, GFLAGS), GATE(SCLK_PVTM_CORE, "clk_pvtm_core", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 0, GFLAGS), GATE(SCLK_PVTM_GPU, "clk_pvtm_gpu", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 1, GFLAGS), GATE(SCLK_PVTM_FUNC, "clk_pvtm_func", "xin24m", 0, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(10), 2, GFLAGS), GATE(SCLK_MIPI_24M, "clk_mipi_24m", "xin24m", CLK_IGNORE_UNUSED, - RK2928_CLKGATE_CON(10), 8, GFLAGS), + RK2928_CLKGATE_CON(2), 15, GFLAGS), COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0, RK2928_CLKSEL_CON(11), 6, 2, MFLAGS, 0, 6, DFLAGS, -- cgit v1.2.3-70-g09d2 From 00e6751ffc9e6e0651e514961316fd15f0409683 Mon Sep 17 00:00:00 2001 From: Elaine Zhang Date: Fri, 1 Sep 2017 10:01:46 +0800 Subject: clk: rockchip: add sclk_timer5 as critical clock on rk3128 sclk_timer5 is for arm arch counter, so need always on. but no dts node to handle this clk, so make it as critical clock Signed-off-by: Elaine Zhang Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3128.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/rockchip/clk-rk3128.c b/drivers/clk/rockchip/clk-rk3128.c index ce02d2cff608..5970a50671b9 100644 --- a/drivers/clk/rockchip/clk-rk3128.c +++ b/drivers/clk/rockchip/clk-rk3128.c @@ -578,6 +578,7 @@ static const char *const rk3128_critical_clocks[] __initconst = { "hclk_peri", "pclk_peri", "pclk_pmu", + "sclk_timer5", }; static struct rockchip_clk_provider *__init rk3128_common_clk_init(struct device_node *np) -- cgit v1.2.3-70-g09d2 From 3f241bfa60bdc9c4fde63fa6664a8ce00fd668c6 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Sat, 12 Aug 2017 11:10:02 +0530 Subject: arm64: allwinner: a64: pine64: Use dcdc1 regulator for mmc0 Since current tree support AXP803 regulators, replace fixed regulator with AXP803 dcdc1 regulator. Tested on pine64. Signed-off-by: Jagan Teki Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index caf8b6fbe5e3..d06e34b5d192 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -61,13 +61,6 @@ chosen { stdout-path = "serial0:115200n8"; }; - - reg_vcc3v3: vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; }; &ehci0 { @@ -91,7 +84,7 @@ &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; cd-inverted; disable-wp; -- cgit v1.2.3-70-g09d2 From a231d2783c332ef3e3ba238e82dbe599ff41ba14 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 8 Sep 2017 15:50:09 +0800 Subject: ARM: dts: sun6i: Fix endpoint IDs in second display pipeline When the second display pipeline device nodes for the A31/A31s were added, it was not known that the TCONs could (through either DRCs) select either backend as their input. Thus in the endpoints connecting these components together, the endpoint IDs were set to 0, while in fact they should have been set to 1. Cc: Fixes: 9a26882a7378 ("ARM: dts: sun6i: Add second display pipeline device nodes") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun6i-a31.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index b147cb0dc14b..eef072a21acc 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -311,8 +311,8 @@ #size-cells = <0>; reg = <0>; - tcon1_in_drc1: endpoint@0 { - reg = <0>; + tcon1_in_drc1: endpoint@1 { + reg = <1>; remote-endpoint = <&drc1_out_tcon1>; }; }; @@ -1012,8 +1012,8 @@ #size-cells = <0>; reg = <1>; - be1_out_drc1: endpoint@0 { - reg = <0>; + be1_out_drc1: endpoint@1 { + reg = <1>; remote-endpoint = <&drc1_in_be1>; }; }; @@ -1042,8 +1042,8 @@ #size-cells = <0>; reg = <0>; - drc1_in_be1: endpoint@0 { - reg = <0>; + drc1_in_be1: endpoint@1 { + reg = <1>; remote-endpoint = <&be1_out_drc1>; }; }; @@ -1053,8 +1053,8 @@ #size-cells = <0>; reg = <1>; - drc1_out_tcon1: endpoint@0 { - reg = <0>; + drc1_out_tcon1: endpoint@1 { + reg = <1>; remote-endpoint = <&tcon1_in_drc1>; }; }; -- cgit v1.2.3-70-g09d2 From 27563cd9f8f52f09523e061985917c38f302bd0c Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 14 Sep 2017 17:28:12 +0300 Subject: ARM: dts: at91: sama5d27_som1_ek: update pinmux/pinconf for LEDs and USB There are some changes from the prototype board concerning LEDs and USB pins: - USBB power enable and red LED pins are inverted. - The polarity of LEDs is inverted too. Signed-off-by: Ludovic Desroches Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91-sama5d27_som1_ek.dts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index 9c9088c99cc4..f13ef4fbab60 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -67,7 +67,7 @@ usb1: ohci@00400000 { num-ports = <3>; - atmel,vbus-gpio = <&pioA PIN_PA10 GPIO_ACTIVE_HIGH>; + atmel,vbus-gpio = <&pioA PIN_PA27 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; @@ -330,7 +330,7 @@ }; pinctrl_led_gpio_default: led_gpio_default { - pinmux = , + pinmux = , , ; bias-pull-up; @@ -396,7 +396,7 @@ }; pinctrl_usb_default: usb_default { - pinmux = , + pinmux = , ; bias-disable; }; @@ -520,17 +520,17 @@ red { label = "red"; - gpios = <&pioA PIN_PA27 GPIO_ACTIVE_LOW>; + gpios = <&pioA PIN_PA10 GPIO_ACTIVE_HIGH>; }; green { label = "green"; - gpios = <&pioA PIN_PB1 GPIO_ACTIVE_LOW>; + gpios = <&pioA PIN_PB1 GPIO_ACTIVE_HIGH>; }; blue { label = "blue"; - gpios = <&pioA PIN_PA31 GPIO_ACTIVE_LOW>; + gpios = <&pioA PIN_PA31 GPIO_ACTIVE_HIGH>; linux,default-trigger = "heartbeat"; }; }; -- cgit v1.2.3-70-g09d2 From 5f506faa0de810f07af9345826fd588f61bb3b2f Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Thu, 14 Sep 2017 17:28:13 +0300 Subject: ARM: dts: at91: sama5d27_som1_ek: fix typos Fix typos that prevent proper using of uart2 and uart4 devices. Signed-off-by: Ludovic Desroches Signed-off-by: Claudiu Beznea Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91-sama5d27_som1_ek.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index f13ef4fbab60..be5cd913f274 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -120,7 +120,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mikrobus2_uart>; atmel,use-dma-rx; - atmel-use-dma-tx; + atmel,use-dma-tx; status = "okay"; }; @@ -178,7 +178,7 @@ uart4: serial@fc00c000 { atmel,use-dma-rx; atmel,use-dma-tx; - pinctrl-name = "default"; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mikrobus1_uart>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From e025a3ac3460275bf86a4c5d02857eee14db4247 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 14 Sep 2017 17:28:14 +0300 Subject: ARM: dts: at91: sama5d27_som1_ek: fix USB host vbus The USB host has 3 ports so we must specify the entries for each in the atmel,vbus-gpio property. The specified pin (PA27) is the vbus for USBB and not USBA. Signed-off-by: Nicolas Ferre [claudiu.beznea@microchip.com: change subject to match the desired prefix] Signed-off-by: Claudiu Beznea --- arch/arm/boot/dts/at91-sama5d27_som1_ek.dts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index be5cd913f274..60cb084a8d92 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -67,7 +67,10 @@ usb1: ohci@00400000 { num-ports = <3>; - atmel,vbus-gpio = <&pioA PIN_PA27 GPIO_ACTIVE_HIGH>; + atmel,vbus-gpio = <0 /* &pioA PIN_PD20 GPIO_ACTIVE_HIGH */ + &pioA PIN_PA27 GPIO_ACTIVE_HIGH + 0 + >; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; -- cgit v1.2.3-70-g09d2 From 093d79f62a89f47d9b5fd0746768146d9696535c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 24 Aug 2017 13:44:54 +0200 Subject: ARM: at91: Replace uses of virt_to_phys with __pa_symbol The PM code wrongly uses virt_to_phys() instead of __pa_symbol() and was not updated by commit 64fc2a947a98 ("ARM: 8641/1: treewide: Replace uses of virt_to_phys with __pa_symbol") because it was not yet in tree. Signed-off-by: Alexandre Belloni Signed-off-by: Nicolas Ferre --- arch/arm/mach-at91/pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 5036f996e694..849014c01cf4 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -533,8 +533,8 @@ static void __init at91_pm_backup_init(void) } pm_bu->suspended = 0; - pm_bu->canary = virt_to_phys(&canary); - pm_bu->resume = virt_to_phys(cpu_resume); + pm_bu->canary = __pa_symbol(&canary); + pm_bu->resume = __pa_symbol(cpu_resume); return; -- cgit v1.2.3-70-g09d2 From a6c215e21b0dc5fe9416dce90f9acc2ea53c4502 Mon Sep 17 00:00:00 2001 From: Jeffrey Chu Date: Fri, 8 Sep 2017 21:08:58 +0000 Subject: USB: serial: ftdi_sio: add id for Cypress WICED dev board Add CYPRESS_VID vid and CYPRESS_WICED_BT_USB and CYPRESS_WICED_WL_USB device IDs to ftdi_sio driver. Signed-off-by: Jeffrey Chu Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1cec03799cdf..49d1b2d4606d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1015,6 +1015,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, + { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4fcf1cecb6d7..f9d15bd62785 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -609,6 +609,13 @@ #define ADI_GNICE_PID 0xF000 #define ADI_GNICEPLUS_PID 0xF001 +/* + * Cypress WICED USB UART + */ +#define CYPRESS_VID 0x04B4 +#define CYPRESS_WICED_BT_USB_PID 0x009B +#define CYPRESS_WICED_WL_USB_PID 0xF900 + /* * Microchip Technology, Inc. * -- cgit v1.2.3-70-g09d2 From 837ddc4793a69b256ac5e781a5e729b448a8d983 Mon Sep 17 00:00:00 2001 From: Henryk Heisig Date: Mon, 11 Sep 2017 17:57:34 +0200 Subject: USB: serial: option: add support for TP-Link LTE module This commit adds support for TP-Link LTE mPCIe module is used in in TP-Link MR200v1, MR6400v1 and v2 routers. Signed-off-by: Henryk Heisig Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 54bfef13966a..ba672cf4e888 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -522,6 +522,7 @@ static void option_instat_callback(struct urb *urb); /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 +#define TPLINK_PRODUCT_LTE 0x000D #define TPLINK_PRODUCT_MA180 0x0201 /* Changhong products */ @@ -2011,6 +2012,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, + { USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, TPLINK_PRODUCT_LTE, 0xff, 0x00, 0x00) }, /* TP-Link LTE Module */ { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ -- cgit v1.2.3-70-g09d2 From 2d48a237c8b61b457d146310d7e1e61224d0ca56 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 9 Sep 2017 06:02:46 +0200 Subject: ARC: reset: Only build on archs that have IOMEM This avoids the error: drivers/reset/reset-hsdk-v1.o: In function `hsdkv1_reset_probe': /home/thomas/git/linux/drivers/reset/reset-hsdk-v1.c:101: undefined reference to `devm_ioremap_resource' collect2: error: ld returned 1 exit status Signed-off-by: Thomas Meyer Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index e0c393214264..d063bd5373dd 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -36,6 +36,7 @@ config RESET_BERLIN config RESET_HSDK_V1 bool "HSDK v1 Reset Driver" + depends on HAS_IOMEM default n help This enables the reset controller driver for HSDK v1. -- cgit v1.2.3-70-g09d2 From fc9655e65160936e32adae0d0e8aae25eb12c4e0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 14 Sep 2017 12:49:41 +0200 Subject: ARC: reset: add missing DT binding documentation for HSDKv1 reset driver When applying the original patch [1], the DT binding docs were lost. This patch adds them back. [1] https://patchwork.kernel.org/patch/9852997/ Fixes: e0be864f1424 ("ARC: reset: introduce HSDKv1 reset driver") Signed-off-by: Eugeniy Paltsev Signed-off-by: Philipp Zabel --- .../bindings/reset/snps,hsdk-v1-reset.txt | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt diff --git a/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt b/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt new file mode 100644 index 000000000000..6a68146ee353 --- /dev/null +++ b/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt @@ -0,0 +1,28 @@ +Binding for the HSDK v1 reset controller + +This binding uses the common reset binding[1]. + +[1] Documentation/devicetree/bindings/reset/reset.txt + +Required properties: +- compatible: should be "snps,hsdk-v1.0-reset". +- reg: should always contain 2 pairs address - length: first for reset + configuration register and second for corresponding SW reset and status bits + register. +- #reset-cells: from common reset binding; Should always be set to 1. + +Example: + reset: reset@880 { + compatible = "snps,hsdk-v1.0-reset"; + #reset-cells = <1>; + reg = <0x8A0 0x4>, <0xFF0 0x4>; + }; + +Specifying reset lines connected to IP modules: + ethernet@.... { + .... + resets = <&reset HSDK_V1_ETH_RESET>; + .... + }; + +The index could be found in -- cgit v1.2.3-70-g09d2 From 70e743e4cec3733dc13559f6184b35d358b9ef3f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 14 Sep 2017 16:52:59 +0200 Subject: uwb: ensure that endpoint is interrupt hwarc_neep_init() assumes that endpoint 0 is interrupt, but there's no check for that, which results in a WARNING in USB core code, when a bad USB descriptor is provided from a device: usb 1-1: BOGUS urb xfer, pipe 1 != type 3 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 3 at drivers/usb/core/urb.c:449 usb_submit_urb+0xf8a/0x11d0 Modules linked in: CPU: 0 PID: 3 Comm: kworker/0:0 Not tainted 4.13.0+ #111 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event task: ffff88006bdc1a00 task.stack: ffff88006bde8000 RIP: 0010:usb_submit_urb+0xf8a/0x11d0 drivers/usb/core/urb.c:448 RSP: 0018:ffff88006bdee3c0 EFLAGS: 00010282 RAX: 0000000000000029 RBX: ffff8800672a7200 RCX: 0000000000000000 RDX: 0000000000000029 RSI: ffff88006c815c78 RDI: ffffed000d7bdc6a RBP: ffff88006bdee4c0 R08: fffffbfff0fe00ff R09: fffffbfff0fe00ff R10: 0000000000000018 R11: fffffbfff0fe00fe R12: 1ffff1000d7bdc7f R13: 0000000000000003 R14: 0000000000000001 R15: ffff88006b02cc90 FS: 0000000000000000(0000) GS:ffff88006c800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe4daddf000 CR3: 000000006add6000 CR4: 00000000000006f0 Call Trace: hwarc_neep_init+0x4ce/0x9c0 drivers/uwb/hwa-rc.c:710 uwb_rc_add+0x2fb/0x730 drivers/uwb/lc-rc.c:361 hwarc_probe+0x34e/0x9b0 drivers/uwb/hwa-rc.c:858 usb_probe_interface+0x351/0x8d0 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:385 driver_probe_device+0x610/0xa00 drivers/base/dd.c:529 __device_attach_driver+0x230/0x290 drivers/base/dd.c:625 bus_for_each_drv+0x15e/0x210 drivers/base/bus.c:463 __device_attach+0x269/0x3c0 drivers/base/dd.c:682 device_initial_probe+0x1f/0x30 drivers/base/dd.c:729 bus_probe_device+0x1da/0x280 drivers/base/bus.c:523 device_add+0xcf9/0x1640 drivers/base/core.c:1703 usb_set_configuration+0x1064/0x1890 drivers/usb/core/message.c:1932 generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174 usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266 really_probe drivers/base/dd.c:385 driver_probe_device+0x610/0xa00 drivers/base/dd.c:529 __device_attach_driver+0x230/0x290 drivers/base/dd.c:625 bus_for_each_drv+0x15e/0x210 drivers/base/bus.c:463 __device_attach+0x269/0x3c0 drivers/base/dd.c:682 device_initial_probe+0x1f/0x30 drivers/base/dd.c:729 bus_probe_device+0x1da/0x280 drivers/base/bus.c:523 device_add+0xcf9/0x1640 drivers/base/core.c:1703 usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2457 hub_port_connect drivers/usb/core/hub.c:4890 hub_port_connect_change drivers/usb/core/hub.c:4996 port_event drivers/usb/core/hub.c:5102 hub_event+0x23c8/0x37c0 drivers/usb/core/hub.c:5182 process_one_work+0x9fb/0x1570 kernel/workqueue.c:2097 worker_thread+0x1e4/0x1350 kernel/workqueue.c:2231 kthread+0x324/0x3f0 kernel/kthread.c:231 ret_from_fork+0x25/0x30 arch/x86/entry/entry_64.S:425 Code: 48 8b 85 30 ff ff ff 48 8d b8 98 00 00 00 e8 8e 93 07 ff 45 89 e8 44 89 f1 4c 89 fa 48 89 c6 48 c7 c7 a0 e5 55 86 e8 20 08 8f fd <0f> ff e9 9b f7 ff ff e8 4a 04 d6 fd e9 80 f7 ff ff e8 60 11 a6 ---[ end trace 55d741234124cfc3 ]--- Check that endpoint is interrupt. Found by syzkaller. Signed-off-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/hwa-rc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 35a1e777b449..9a53912bdfe9 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -825,6 +825,8 @@ static int hwarc_probe(struct usb_interface *iface, if (iface->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; + if (!usb_endpoint_xfer_int(&iface->cur_altsetting->endpoint[0].desc)) + return -ENODEV; result = -ENOMEM; uwb_rc = uwb_rc_alloc(); -- cgit v1.2.3-70-g09d2 From bbf26183b7a6236ba602f4d6a2f7cade35bba043 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 14 Sep 2017 14:30:55 +0200 Subject: uwb: properly check kthread_run return value uwbd_start() calls kthread_run() and checks that the return value is not NULL. But the return value is not NULL in case kthread_run() fails, it takes the form of ERR_PTR(-EINTR). Use IS_ERR() instead. Also add a check to uwbd_stop(). Signed-off-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/uwbd.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c index 01c20a260a8b..39dd4ef53c77 100644 --- a/drivers/uwb/uwbd.c +++ b/drivers/uwb/uwbd.c @@ -302,18 +302,22 @@ static int uwbd(void *param) /** Start the UWB daemon */ void uwbd_start(struct uwb_rc *rc) { - rc->uwbd.task = kthread_run(uwbd, rc, "uwbd"); - if (rc->uwbd.task == NULL) + struct task_struct *task = kthread_run(uwbd, rc, "uwbd"); + if (IS_ERR(task)) { + rc->uwbd.task = NULL; printk(KERN_ERR "UWB: Cannot start management daemon; " "UWB won't work\n"); - else + } else { + rc->uwbd.task = task; rc->uwbd.pid = rc->uwbd.task->pid; + } } /* Stop the UWB daemon and free any unprocessed events */ void uwbd_stop(struct uwb_rc *rc) { - kthread_stop(rc->uwbd.task); + if (rc->uwbd.task) + kthread_stop(rc->uwbd.task); uwbd_flush(rc); } -- cgit v1.2.3-70-g09d2 From b2a542bbb3081dbd64acc8929c140d196664c406 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Tue, 5 Sep 2017 11:40:56 +0300 Subject: usb: Increase quirk delay for USB devices Commit e0429362ab15 ("usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e") introduced quirk to workaround an issue with some Logitech webcams. The workaround is introducing delay for some USB operations. According to our testing, delay introduced by original commit is not long enough and in rare cases we still see issues described by the aforementioned commit. This patch increases delays introduced by original commit. Having this patch applied we do not see those problems anymore. Signed-off-by: Dmitry Fleytman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 2 +- drivers/usb/core/hub.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 4be52c602e9b..854c8d66cfbe 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -852,7 +852,7 @@ int usb_get_configuration(struct usb_device *dev) } if (dev->quirks & USB_QUIRK_DELAY_INIT) - msleep(100); + msleep(200); result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno, bigbuffer, length); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 41eaf0b52518..b5c733613823 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4838,7 +4838,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, goto loop; if (udev->quirks & USB_QUIRK_DELAY_INIT) - msleep(1000); + msleep(2000); /* consecutive bus-powered hubs aren't reliable; they can * violate the voltage drop budget. if the new child has -- cgit v1.2.3-70-g09d2 From 056e4fc2018364ba01a23a1ced0ccbbdfa4520b3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Sep 2017 21:23:13 +0200 Subject: staging: unisys/visorbus: add __init/__exit annotations gcc-4.6 causes a harmless warning about the init function: WARNING: vmlinux.o(.text+0xed62c2): Section mismatch in reference from the function init_unisys() to the function .init.text:visorutil_spar_detect() The function init_unisys() references the function __init visorutil_spar_detect(). This is often because init_unisys lacks a __init annotation or the annotation of visorutil_spar_detect is wrong. It appears that newer versions inline visorutil_spar_detect(), end up with an empty __init section. This marks the module entry points as __init and __exit respectively, which avoids the warning and slightly reduces the runtime code size. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/unisys/visorbus/visorchipset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c index 74cce4f1a7bd..27ecf6fb49fd 100644 --- a/drivers/staging/unisys/visorbus/visorchipset.c +++ b/drivers/staging/unisys/visorbus/visorchipset.c @@ -1826,7 +1826,7 @@ static __init int visorutil_spar_detect(void) return 0; } -static int init_unisys(void) +static int __init init_unisys(void) { int result; @@ -1841,7 +1841,7 @@ static int init_unisys(void) return 0; }; -static void exit_unisys(void) +static void __exit exit_unisys(void) { acpi_bus_unregister_driver(&unisys_acpi_driver); } -- cgit v1.2.3-70-g09d2 From a3563b09f132661d447f69224ef65fdec02f5c61 Mon Sep 17 00:00:00 2001 From: Arun Nagendran Date: Fri, 15 Sep 2017 12:30:32 -0400 Subject: staging: mt29f_spinand: Enable the read ECC before program the page Current program_page function did following operation: 1. read page (with ECC OFF) 2. modify the page 3. write the page (with ECC ON) For some case(buggy flash Chip), while read the page without ECC ON, we may read the page with bit flip error and modify that bad page without knowing the bit flip error on that page. also we re-calculate the hash for bad page and write it. This could bring potential in-consistency problem with Flash data. Verify this logic with GIGA DEVICE Part(GD5F2GQ4RCFIG): we see this in-conststency problem wit Giga Device and fix on this patch resovle that issue. Signed-off-by: Arun Nagendran Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt29f_spinand/mt29f_spinand.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c index 13eaf16ecd16..87595c594b12 100644 --- a/drivers/staging/mt29f_spinand/mt29f_spinand.c +++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c @@ -496,8 +496,12 @@ static int spinand_program_page(struct spi_device *spi_nand, if (!wbuf) return -ENOMEM; - enable_read_hw_ecc = 0; - spinand_read_page(spi_nand, page_id, 0, CACHE_BUF, wbuf); + enable_read_hw_ecc = 1; + retval = spinand_read_page(spi_nand, page_id, 0, CACHE_BUF, wbuf); + if (retval < 0) { + dev_err(&spi_nand->dev, "ecc error on read page!!!\n"); + return retval; + } for (i = offset, j = 0; i < len; i++, j++) wbuf[i] &= buf[j]; -- cgit v1.2.3-70-g09d2 From e1bf28868ab0bfd1fc73dd8d5e642d88456c30e9 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 14:55:06 +0100 Subject: staging: r8822be: fix null pointer dereferences with a null driver_adapter The call to _rtl_dbg_trace via macro HALMAC_RT_TRACE will trigger a null pointer deference on a null driver_adapter. Fix this by assigning driver_adapter earlier to halmac_adapter->driver_adapter before the tracing call so that a non-null driver_adapter is passed instead. I should have spotted these with an earlier patch I sent, but I overlooked these in the rather large CoverityScan logs. Detected by CoverityScan, CID#1454550, CID#1454554, CID#1454565, CID#1454591, CID#1454598 ("Explicit null dereferenced") Fixes: 938a0447f094 ("staging: r8822be: Add code for halmac sub-driver") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- .../staging/rtlwifi/halmac/halmac_88xx/halmac_api_88xx.c | 4 ++-- .../staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_api_88xx.c b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_api_88xx.c index 5f84526cb5b5..edbf6af1c8b7 100644 --- a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_api_88xx.c +++ b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_api_88xx.c @@ -2901,11 +2901,11 @@ halmac_update_datapack_88xx(struct halmac_adapter *halmac_adapter, if (halmac_adapter->fw_version.h2c_version < 4) return HALMAC_RET_FW_NO_SUPPORT; + driver_adapter = halmac_adapter->driver_adapter; + HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "[TRACE]%s ==========>\n", __func__); - driver_adapter = halmac_adapter->driver_adapter; - HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "[TRACE]%s <==========\n", __func__); diff --git a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c index f33024e4d853..544f638ed3ef 100644 --- a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c +++ b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c @@ -1618,10 +1618,11 @@ halmac_send_h2c_set_pwr_mode_88xx(struct halmac_adapter *halmac_adapter, void *driver_adapter = NULL; enum halmac_ret_status status = HALMAC_RET_SUCCESS; + driver_adapter = halmac_adapter->driver_adapter; + HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "%s!!\n", __func__); - driver_adapter = halmac_adapter->driver_adapter; h2c_header = h2c_buff; h2c_cmd = h2c_header + HALMAC_H2C_CMD_HDR_SIZE_88XX; @@ -1713,10 +1714,11 @@ halmac_media_status_rpt_88xx(struct halmac_adapter *halmac_adapter, u8 op_mode, void *driver_adapter = NULL; enum halmac_ret_status status = HALMAC_RET_SUCCESS; + driver_adapter = halmac_adapter->driver_adapter; + HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "halmac_send_h2c_set_pwr_mode_88xx!!\n"); - driver_adapter = halmac_adapter->driver_adapter; h2c_header = H2c_buff; h2c_cmd = h2c_header + HALMAC_H2C_CMD_HDR_SIZE_88XX; @@ -2143,10 +2145,11 @@ halmac_func_ctrl_ch_switch_88xx(struct halmac_adapter *halmac_adapter, enum halmac_cmd_process_status *process_status = &halmac_adapter->halmac_state.scan_state_set.process_status; + driver_adapter = halmac_adapter->driver_adapter; + HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "halmac_ctrl_ch_switch!!\n"); - driver_adapter = halmac_adapter->driver_adapter; halmac_api = (struct halmac_api *)halmac_adapter->halmac_api; if (halmac_transition_scan_state_88xx( @@ -2276,15 +2279,13 @@ enum halmac_ret_status halmac_send_h2c_update_bcn_parse_info_88xx( { u8 h2c_buff[HALMAC_H2C_CMD_SIZE_88XX] = {0}; u16 h2c_seq_mum = 0; - void *driver_adapter = NULL; + void *driver_adapter = halmac_adapter->driver_adapter; struct halmac_h2c_header_info h2c_header_info; enum halmac_ret_status status = HALMAC_RET_SUCCESS; HALMAC_RT_TRACE(driver_adapter, HALMAC_MSG_H2C, DBG_DMESG, "%s!!\n", __func__); - driver_adapter = halmac_adapter->driver_adapter; - UPDATE_BEACON_PARSING_INFO_SET_FUNC_EN(h2c_buff, bcn_ie_info->func_en); UPDATE_BEACON_PARSING_INFO_SET_SIZE_TH(h2c_buff, bcn_ie_info->size_th); UPDATE_BEACON_PARSING_INFO_SET_TIMEOUT(h2c_buff, bcn_ie_info->timeout); -- cgit v1.2.3-70-g09d2 From b72703e26b9478da531144ce5c4552dd22f1103d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Sep 2017 17:40:25 +0200 Subject: staging: pi433: Move limit check to switch default to kill warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-4.1.2: drivers/staging/pi433/rf69.c: In function ‘rf69_set_dio_mapping’: drivers/staging/pi433/rf69.c:566: warning: ‘regaddr’ may be used uninitialized in this function drivers/staging/pi433/rf69.c:565: warning: ‘shift’ may be used uninitialized in this function drivers/staging/pi433/rf69.c:564: warning: ‘mask’ may be used uninitialized in this function While this is a false positive, it can easily be fixed by moving the limit check into the "default" case of the switch statement. Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/staging/pi433/rf69.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/staging/pi433/rf69.c b/drivers/staging/pi433/rf69.c index c4b1b218ea38..290b419aa9dd 100644 --- a/drivers/staging/pi433/rf69.c +++ b/drivers/staging/pi433/rf69.c @@ -570,12 +570,6 @@ int rf69_set_dio_mapping(struct spi_device *spi, u8 DIONumber, u8 value) dev_dbg(&spi->dev, "set: DIO mapping"); #endif - // check DIO number - if (DIONumber > 5) { - dev_dbg(&spi->dev, "set: illegal input param"); - return -EINVAL; - } - switch (DIONumber) { case 0: mask=MASK_DIO0; shift=SHIFT_DIO0; regaddr=REG_DIOMAPPING1; break; case 1: mask=MASK_DIO1; shift=SHIFT_DIO1; regaddr=REG_DIOMAPPING1; break; @@ -583,6 +577,9 @@ int rf69_set_dio_mapping(struct spi_device *spi, u8 DIONumber, u8 value) case 3: mask=MASK_DIO3; shift=SHIFT_DIO3; regaddr=REG_DIOMAPPING1; break; case 4: mask=MASK_DIO4; shift=SHIFT_DIO4; regaddr=REG_DIOMAPPING2; break; case 5: mask=MASK_DIO5; shift=SHIFT_DIO5; regaddr=REG_DIOMAPPING2; break; + default: + dev_dbg(&spi->dev, "set: illegal input param"); + return -EINVAL; } // read reg -- cgit v1.2.3-70-g09d2 From e5f5d0e20b6cecc0ebe6fc8e7df6f8823ad2d594 Mon Sep 17 00:00:00 2001 From: Okash Khawaja Date: Tue, 5 Sep 2017 12:51:59 +0100 Subject: staging: speakup: fix speakup-r empty line lockup When cursor is at beginning of an empty or whitespace-only line and speakup-r typed, kernel locks up. This happens because deadlock of in input_event function over dev->event_lock, as demonstrated by lockdep logs. The reason for that is speakup simulates a down arrow - because cursor is at an empty line - while inside key press notifier handler which is ultimately triggered from input_event function. The simulated key press leads to input_event being called again, this time under its own context. So the spinlock is dev->event_lock is acquired while still being held. This patch ensures that key press is not simulated from inside key press notifier handler. Instead it delegates to cursor_timer. It starts the timer and passes RA_DOWN_ARROW as argument. When timer handler runs and sees RA_DOWN_ARROW, it will then call kbd_fakekey2(RA_DOWN_ARROW) which will correctly simulate the keypress inside timer context. When not inside key press notifier callback, the behaviour will remain the same as before this patch. Signed-off-by: Okash Khawaja Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/main.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 67956e24779c..56f7be6af1f6 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -1376,6 +1376,8 @@ static void reset_highlight_buffers(struct vc_data *); static int read_all_key; +static int in_keyboard_notifier; + static void start_read_all_timer(struct vc_data *vc, int command); enum { @@ -1408,7 +1410,10 @@ static void read_all_doc(struct vc_data *vc) cursor_track = read_all_mode; spk_reset_index_count(0); if (get_sentence_buf(vc, 0) == -1) { - kbd_fakekey2(vc, RA_DOWN_ARROW); + del_timer(&cursor_timer); + if (!in_keyboard_notifier) + speakup_fake_down_arrow(); + start_read_all_timer(vc, RA_DOWN_ARROW); } else { say_sentence_num(0, 0); synth_insert_next_index(0); @@ -2212,8 +2217,10 @@ static int keyboard_notifier_call(struct notifier_block *nb, int ret = NOTIFY_OK; static int keycode; /* to hold the current keycode */ + in_keyboard_notifier = 1; + if (vc->vc_mode == KD_GRAPHICS) - return ret; + goto out; /* * First, determine whether we are handling a fake keypress on @@ -2225,7 +2232,7 @@ static int keyboard_notifier_call(struct notifier_block *nb, */ if (speakup_fake_key_pressed()) - return ret; + goto out; switch (code) { case KBD_KEYCODE: @@ -2266,6 +2273,8 @@ static int keyboard_notifier_call(struct notifier_block *nb, break; } } +out: + in_keyboard_notifier = 0; return ret; } -- cgit v1.2.3-70-g09d2 From 974d4d03fc020af4fa4e9e72a86f0fefa37803c5 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 3 Sep 2017 19:06:31 +0200 Subject: staging: vchiq_2835_arm: Fix NULL ptr dereference in free_pagelist This fixes a NULL pointer dereference on RPi 2 with multi_v7_defconfig. The function page_address() could return NULL with enabled CONFIG_HIGHMEM. So fix this by using kmap() instead. Signed-off-by: Stefan Wahren Fixes: 71bad7f08641 ("staging: add bcm2708 vchiq driver") Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index 0159ca4407d8..be08849175ea 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -612,18 +612,20 @@ free_pagelist(struct vchiq_pagelist_info *pagelistinfo, if (head_bytes > actual) head_bytes = actual; - memcpy((char *)page_address(pages[0]) + + memcpy((char *)kmap(pages[0]) + pagelist->offset, fragments, head_bytes); + kunmap(pages[0]); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { - memcpy((char *)page_address(pages[num_pages - 1]) + + memcpy((char *)kmap(pages[num_pages - 1]) + ((pagelist->offset + actual) & (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)), fragments + g_cache_line_size, tail_bytes); + kunmap(pages[num_pages - 1]); } down(&g_free_fragments_mutex); -- cgit v1.2.3-70-g09d2 From 55168470835688e5da5828cdcf1b1498d7baadb1 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Mon, 11 Sep 2017 10:45:12 +0300 Subject: usb: dwc3: ep0: fix DMA starvation by assigning req->trb on ep0 If we don't assign a TRB to ep0 requests, we won't be able to unmap the request later on resulting in starvation of DMA resources. Fixes: 4a71fcb8ac5f ("usb: dwc3: gadget: only unmap requests from DMA if mapped") Reported-by: Thinh Nguyen Tested-by: Thinh Nguyen Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/ep0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 827e376bfa97..75e6cb044eb2 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -990,6 +990,8 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, DWC3_TRBCTL_CONTROL_DATA, true); + req->trb = &dwc->ep0_trb[dep->trb_enqueue - 1]; + /* Now prepare one extra TRB to align transfer size */ dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, maxpacket - rem, @@ -1015,6 +1017,8 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, DWC3_TRBCTL_CONTROL_DATA, true); + req->trb = &dwc->ep0_trb[dep->trb_enqueue - 1]; + /* Now prepare one extra TRB to align transfer size */ dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, 0, DWC3_TRBCTL_CONTROL_DATA, @@ -1029,6 +1033,9 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, dwc3_ep0_prepare_one_trb(dep, req->request.dma, req->request.length, DWC3_TRBCTL_CONTROL_DATA, false); + + req->trb = &dwc->ep0_trb[dep->trb_enqueue]; + ret = dwc3_ep0_start_trans(dep); } -- cgit v1.2.3-70-g09d2 From 13541226dc056fa3f54417ce12f18ba711a1591c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 31 Aug 2017 11:06:07 -0700 Subject: ARC: reset: remove the misleading v1 suffix all over There is no plan yet to do a v2 board. And even if we were to do it only some IPs would actually change, so it be best to add suffixes at that point, not now ! Signed-off-by: Vineet Gupta Signed-off-by: Philipp Zabel --- .../devicetree/bindings/reset/snps,hsdk-reset.txt | 28 +++++ .../bindings/reset/snps,hsdk-v1-reset.txt | 28 ----- MAINTAINERS | 6 +- drivers/reset/Kconfig | 6 +- drivers/reset/Makefile | 2 +- drivers/reset/reset-hsdk-v1.c | 137 --------------------- drivers/reset/reset-hsdk.c | 137 +++++++++++++++++++++ include/dt-bindings/reset/snps,hsdk-reset.h | 17 +++ include/dt-bindings/reset/snps,hsdk-v1-reset.h | 17 --- 9 files changed, 189 insertions(+), 189 deletions(-) create mode 100644 Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt delete mode 100644 Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt delete mode 100644 drivers/reset/reset-hsdk-v1.c create mode 100644 drivers/reset/reset-hsdk.c create mode 100644 include/dt-bindings/reset/snps,hsdk-reset.h delete mode 100644 include/dt-bindings/reset/snps,hsdk-v1-reset.h diff --git a/Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt b/Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt new file mode 100644 index 000000000000..830069b1c37c --- /dev/null +++ b/Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt @@ -0,0 +1,28 @@ +Binding for the Synopsys HSDK reset controller + +This binding uses the common reset binding[1]. + +[1] Documentation/devicetree/bindings/reset/reset.txt + +Required properties: +- compatible: should be "snps,hsdk-reset". +- reg: should always contain 2 pairs address - length: first for reset + configuration register and second for corresponding SW reset and status bits + register. +- #reset-cells: from common reset binding; Should always be set to 1. + +Example: + reset: reset@880 { + compatible = "snps,hsdk-reset"; + #reset-cells = <1>; + reg = <0x8A0 0x4>, <0xFF0 0x4>; + }; + +Specifying reset lines connected to IP modules: + ethernet@.... { + .... + resets = <&reset HSDK_V1_ETH_RESET>; + .... + }; + +The index could be found in diff --git a/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt b/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt deleted file mode 100644 index 6a68146ee353..000000000000 --- a/Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt +++ /dev/null @@ -1,28 +0,0 @@ -Binding for the HSDK v1 reset controller - -This binding uses the common reset binding[1]. - -[1] Documentation/devicetree/bindings/reset/reset.txt - -Required properties: -- compatible: should be "snps,hsdk-v1.0-reset". -- reg: should always contain 2 pairs address - length: first for reset - configuration register and second for corresponding SW reset and status bits - register. -- #reset-cells: from common reset binding; Should always be set to 1. - -Example: - reset: reset@880 { - compatible = "snps,hsdk-v1.0-reset"; - #reset-cells = <1>; - reg = <0x8A0 0x4>, <0xFF0 0x4>; - }; - -Specifying reset lines connected to IP modules: - ethernet@.... { - .... - resets = <&reset HSDK_V1_ETH_RESET>; - .... - }; - -The index could be found in diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..3b9887b3644e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12915,9 +12915,9 @@ F: drivers/mmc/host/dw_mmc* SYNOPSYS HSDK RESET CONTROLLER DRIVER M: Eugeniy Paltsev S: Supported -F: drivers/reset/reset-hsdk-v1.c -F: include/dt-bindings/reset/snps,hsdk-v1-reset.h -F: Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt +F: drivers/reset/reset-hsdk.c +F: include/dt-bindings/reset/snps,hsdk-reset.h +F: Documentation/devicetree/bindings/reset/snps,hsdk-reset.txt SYSTEM CONFIGURATION (SYSCON) M: Lee Jones diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index d063bd5373dd..a7c7d5a8c089 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -34,12 +34,12 @@ config RESET_BERLIN help This enables the reset controller driver for Marvell Berlin SoCs. -config RESET_HSDK_V1 - bool "HSDK v1 Reset Driver" +config RESET_HSDK + bool "Synopsys HSDK Reset Driver" depends on HAS_IOMEM default n help - This enables the reset controller driver for HSDK v1. + This enables the reset controller driver for HSDK board. config RESET_IMX7 bool "i.MX7 Reset Driver" if COMPILE_TEST diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile index d368367110e5..af1c15c330b3 100644 --- a/drivers/reset/Makefile +++ b/drivers/reset/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra/ obj-$(CONFIG_RESET_A10SR) += reset-a10sr.o obj-$(CONFIG_RESET_ATH79) += reset-ath79.o obj-$(CONFIG_RESET_BERLIN) += reset-berlin.o -obj-$(CONFIG_RESET_HSDK_V1) += reset-hsdk-v1.o +obj-$(CONFIG_RESET_HSDK) += reset-hsdk.o obj-$(CONFIG_RESET_IMX7) += reset-imx7.o obj-$(CONFIG_RESET_LANTIQ) += reset-lantiq.o obj-$(CONFIG_RESET_LPC18XX) += reset-lpc18xx.o diff --git a/drivers/reset/reset-hsdk-v1.c b/drivers/reset/reset-hsdk-v1.c deleted file mode 100644 index bca13e4bf622..000000000000 --- a/drivers/reset/reset-hsdk-v1.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2017 Synopsys. - * - * Synopsys HSDKv1 SDP reset driver. - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define to_hsdkv1_rst(p) container_of((p), struct hsdkv1_rst, rcdev) - -struct hsdkv1_rst { - void __iomem *regs_ctl; - void __iomem *regs_rst; - spinlock_t lock; - struct reset_controller_dev rcdev; -}; - -static const u32 rst_map[] = { - BIT(16), /* APB_RST */ - BIT(17), /* AXI_RST */ - BIT(18), /* ETH_RST */ - BIT(19), /* USB_RST */ - BIT(20), /* SDIO_RST */ - BIT(21), /* HDMI_RST */ - BIT(22), /* GFX_RST */ - BIT(25), /* DMAC_RST */ - BIT(31), /* EBI_RST */ -}; - -#define HSDK_MAX_RESETS ARRAY_SIZE(rst_map) - -#define CGU_SYS_RST_CTRL 0x0 -#define CGU_IP_SW_RESET 0x0 -#define CGU_IP_SW_RESET_DELAY_SHIFT 16 -#define CGU_IP_SW_RESET_DELAY_MASK GENMASK(31, CGU_IP_SW_RESET_DELAY_SHIFT) -#define CGU_IP_SW_RESET_DELAY 0 -#define CGU_IP_SW_RESET_RESET BIT(0) -#define SW_RESET_TIMEOUT 10000 - -static void hsdkv1_reset_config(struct hsdkv1_rst *rst, unsigned long id) -{ - writel(rst_map[id], rst->regs_ctl + CGU_SYS_RST_CTRL); -} - -static int hsdkv1_reset_do(struct hsdkv1_rst *rst) -{ - u32 reg; - - reg = readl(rst->regs_rst + CGU_IP_SW_RESET); - reg &= ~CGU_IP_SW_RESET_DELAY_MASK; - reg |= CGU_IP_SW_RESET_DELAY << CGU_IP_SW_RESET_DELAY_SHIFT; - reg |= CGU_IP_SW_RESET_RESET; - writel(reg, rst->regs_rst + CGU_IP_SW_RESET); - - /* wait till reset bit is back to 0 */ - return readl_poll_timeout_atomic(rst->regs_rst + CGU_IP_SW_RESET, reg, - !(reg & CGU_IP_SW_RESET_RESET), 5, SW_RESET_TIMEOUT); -} - -static int hsdkv1_reset_reset(struct reset_controller_dev *rcdev, - unsigned long id) -{ - struct hsdkv1_rst *rst = to_hsdkv1_rst(rcdev); - unsigned long flags; - int ret; - - spin_lock_irqsave(&rst->lock, flags); - hsdkv1_reset_config(rst, id); - ret = hsdkv1_reset_do(rst); - spin_unlock_irqrestore(&rst->lock, flags); - - return ret; -} - -static const struct reset_control_ops hsdkv1_reset_ops = { - .reset = hsdkv1_reset_reset, -}; - -static int hsdkv1_reset_probe(struct platform_device *pdev) -{ - struct hsdkv1_rst *rst; - struct resource *mem; - - rst = devm_kzalloc(&pdev->dev, sizeof(*rst), GFP_KERNEL); - if (!rst) - return -ENOMEM; - - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rst->regs_ctl = devm_ioremap_resource(&pdev->dev, mem); - if (IS_ERR(rst->regs_ctl)) - return PTR_ERR(rst->regs_ctl); - - mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); - rst->regs_rst = devm_ioremap_resource(&pdev->dev, mem); - if (IS_ERR(rst->regs_rst)) - return PTR_ERR(rst->regs_rst); - - spin_lock_init(&rst->lock); - - rst->rcdev.owner = THIS_MODULE; - rst->rcdev.ops = &hsdkv1_reset_ops; - rst->rcdev.of_node = pdev->dev.of_node; - rst->rcdev.nr_resets = HSDK_MAX_RESETS; - rst->rcdev.of_reset_n_cells = 1; - - return reset_controller_register(&rst->rcdev); -} - -static const struct of_device_id hsdkv1_reset_dt_match[] = { - { .compatible = "snps,hsdk-v1.0-reset" }, - { }, -}; - -static struct platform_driver hsdkv1_reset_driver = { - .probe = hsdkv1_reset_probe, - .driver = { - .name = "hsdk-v1.0-reset", - .of_match_table = hsdkv1_reset_dt_match, - }, -}; -builtin_platform_driver(hsdkv1_reset_driver); - -MODULE_AUTHOR("Eugeniy Paltsev "); -MODULE_DESCRIPTION("Synopsys HSDKv1 SDP reset driver"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/reset/reset-hsdk.c b/drivers/reset/reset-hsdk.c new file mode 100644 index 000000000000..8bce391c6943 --- /dev/null +++ b/drivers/reset/reset-hsdk.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2017 Synopsys. + * + * Synopsys HSDK Development platform reset driver. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define to_hsdk_rst(p) container_of((p), struct hsdk_rst, rcdev) + +struct hsdk_rst { + void __iomem *regs_ctl; + void __iomem *regs_rst; + spinlock_t lock; + struct reset_controller_dev rcdev; +}; + +static const u32 rst_map[] = { + BIT(16), /* APB_RST */ + BIT(17), /* AXI_RST */ + BIT(18), /* ETH_RST */ + BIT(19), /* USB_RST */ + BIT(20), /* SDIO_RST */ + BIT(21), /* HDMI_RST */ + BIT(22), /* GFX_RST */ + BIT(25), /* DMAC_RST */ + BIT(31), /* EBI_RST */ +}; + +#define HSDK_MAX_RESETS ARRAY_SIZE(rst_map) + +#define CGU_SYS_RST_CTRL 0x0 +#define CGU_IP_SW_RESET 0x0 +#define CGU_IP_SW_RESET_DELAY_SHIFT 16 +#define CGU_IP_SW_RESET_DELAY_MASK GENMASK(31, CGU_IP_SW_RESET_DELAY_SHIFT) +#define CGU_IP_SW_RESET_DELAY 0 +#define CGU_IP_SW_RESET_RESET BIT(0) +#define SW_RESET_TIMEOUT 10000 + +static void hsdk_reset_config(struct hsdk_rst *rst, unsigned long id) +{ + writel(rst_map[id], rst->regs_ctl + CGU_SYS_RST_CTRL); +} + +static int hsdk_reset_do(struct hsdk_rst *rst) +{ + u32 reg; + + reg = readl(rst->regs_rst + CGU_IP_SW_RESET); + reg &= ~CGU_IP_SW_RESET_DELAY_MASK; + reg |= CGU_IP_SW_RESET_DELAY << CGU_IP_SW_RESET_DELAY_SHIFT; + reg |= CGU_IP_SW_RESET_RESET; + writel(reg, rst->regs_rst + CGU_IP_SW_RESET); + + /* wait till reset bit is back to 0 */ + return readl_poll_timeout_atomic(rst->regs_rst + CGU_IP_SW_RESET, reg, + !(reg & CGU_IP_SW_RESET_RESET), 5, SW_RESET_TIMEOUT); +} + +static int hsdk_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct hsdk_rst *rst = to_hsdk_rst(rcdev); + unsigned long flags; + int ret; + + spin_lock_irqsave(&rst->lock, flags); + hsdk_reset_config(rst, id); + ret = hsdk_reset_do(rst); + spin_unlock_irqrestore(&rst->lock, flags); + + return ret; +} + +static const struct reset_control_ops hsdk_reset_ops = { + .reset = hsdk_reset_reset, +}; + +static int hsdk_reset_probe(struct platform_device *pdev) +{ + struct hsdk_rst *rst; + struct resource *mem; + + rst = devm_kzalloc(&pdev->dev, sizeof(*rst), GFP_KERNEL); + if (!rst) + return -ENOMEM; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rst->regs_ctl = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(rst->regs_ctl)) + return PTR_ERR(rst->regs_ctl); + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + rst->regs_rst = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(rst->regs_rst)) + return PTR_ERR(rst->regs_rst); + + spin_lock_init(&rst->lock); + + rst->rcdev.owner = THIS_MODULE; + rst->rcdev.ops = &hsdk_reset_ops; + rst->rcdev.of_node = pdev->dev.of_node; + rst->rcdev.nr_resets = HSDK_MAX_RESETS; + rst->rcdev.of_reset_n_cells = 1; + + return reset_controller_register(&rst->rcdev); +} + +static const struct of_device_id hsdk_reset_dt_match[] = { + { .compatible = "snps,hsdk-reset" }, + { }, +}; + +static struct platform_driver hsdk_reset_driver = { + .probe = hsdk_reset_probe, + .driver = { + .name = "hsdk-reset", + .of_match_table = hsdk_reset_dt_match, + }, +}; +builtin_platform_driver(hsdk_reset_driver); + +MODULE_AUTHOR("Eugeniy Paltsev "); +MODULE_DESCRIPTION("Synopsys HSDK SDP reset driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/dt-bindings/reset/snps,hsdk-reset.h b/include/dt-bindings/reset/snps,hsdk-reset.h new file mode 100644 index 000000000000..e1a643e4bc91 --- /dev/null +++ b/include/dt-bindings/reset/snps,hsdk-reset.h @@ -0,0 +1,17 @@ +/** + * This header provides index for the HSDK reset controller. + */ +#ifndef _DT_BINDINGS_RESET_CONTROLLER_SNPS_HSDK +#define _DT_BINDINGS_RESET_CONTROLLER_SNPS_HSDK + +#define HSDK_APB_RESET 0 +#define HSDK_AXI_RESET 1 +#define HSDK_ETH_RESET 2 +#define HSDK_USB_RESET 3 +#define HSDK_SDIO_RESET 4 +#define HSDK_HDMI_RESET 5 +#define HSDK_GFX_RESET 6 +#define HSDK_DMAC_RESET 7 +#define HSDK_EBI_RESET 8 + +#endif /*_DT_BINDINGS_RESET_CONTROLLER_SNPS_HSDK*/ diff --git a/include/dt-bindings/reset/snps,hsdk-v1-reset.h b/include/dt-bindings/reset/snps,hsdk-v1-reset.h deleted file mode 100644 index d898c89b7123..000000000000 --- a/include/dt-bindings/reset/snps,hsdk-v1-reset.h +++ /dev/null @@ -1,17 +0,0 @@ -/** - * This header provides index for the HSDK v1 reset controller. - */ -#ifndef _DT_BINDINGS_RESET_CONTROLLER_HSDK_V1 -#define _DT_BINDINGS_RESET_CONTROLLER_HSDK_V1 - -#define HSDK_V1_APB_RESET 0 -#define HSDK_V1_AXI_RESET 1 -#define HSDK_V1_ETH_RESET 2 -#define HSDK_V1_USB_RESET 3 -#define HSDK_V1_SDIO_RESET 4 -#define HSDK_V1_HDMI_RESET 5 -#define HSDK_V1_GFX_RESET 6 -#define HSDK_V1_DMAC_RESET 7 -#define HSDK_V1_EBI_RESET 8 - -#endif /*_DT_BINDINGS_RESET_CONTROLLER_HSDK_V1*/ -- cgit v1.2.3-70-g09d2 From a931b9ce93841a5b66b709ba5a244276e345e63b Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Thu, 14 Sep 2017 17:49:40 +0530 Subject: ALSA: compress: Remove unused variable Commit 04c5d5a430fc ("ALSA: compress: Embed struct device") removed the statement that used 'str' but didn't remove the variable itself. So remove it. [Adding stable to Cc since pr_debug() may refer to the uninitialized buffer -- tiwai] Fixes: 04c5d5a430fc ("ALSA: compress: Embed struct device") Signed-off-by: Guneshwor Singh Cc: Signed-off-by: Takashi Iwai --- sound/core/compress_offload.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index fec1dfdb14ad..4490a699030b 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -948,14 +948,13 @@ static const struct file_operations snd_compr_file_ops = { static int snd_compress_dev_register(struct snd_device *device) { int ret = -EINVAL; - char str[16]; struct snd_compr *compr; if (snd_BUG_ON(!device || !device->device_data)) return -EBADFD; compr = device->device_data; - pr_debug("reg %s for device %s, direction %d\n", str, compr->name, + pr_debug("reg device %s, direction %d\n", compr->name, compr->direction); /* register compressed device */ ret = snd_register_device(SNDRV_DEVICE_TYPE_COMPRESS, -- cgit v1.2.3-70-g09d2 From 1c363eaece2752c5f8b1b874cb4ae435de06aa66 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:56:13 +0200 Subject: android: binder: fix type mismatch warning Allowing binder to expose the 64-bit API on 32-bit kernels caused a build warning: drivers/android/binder.c: In function 'binder_transaction_buffer_release': drivers/android/binder.c:2220:15: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] fd_array = (u32 *)(parent_buffer + fda->parent_offset); ^ drivers/android/binder.c: In function 'binder_translate_fd_array': drivers/android/binder.c:2445:13: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] fd_array = (u32 *)(parent_buffer + fda->parent_offset); ^ drivers/android/binder.c: In function 'binder_fixup_parent': drivers/android/binder.c:2511:18: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] This adds extra type casts to avoid the warning. However, there is another problem with the Kconfig option: turning it on or off creates two incompatible ABI versions, a kernel that has this enabled cannot run user space that was built without it or vice versa. A better solution might be to leave the option hidden until the binder code is fixed to deal with both ABI versions. Fixes: e8d2ed7db7c3 ("Revert "staging: Fix build issues with new binder API"") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d055b3f2a207..72af95c9ea22 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2217,7 +2217,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, debug_id, (u64)fda->num_fds); continue; } - fd_array = (u32 *)(parent_buffer + fda->parent_offset); + fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); for (fd_index = 0; fd_index < fda->num_fds; fd_index++) task_close_fd(proc, fd_array[fd_index]); } break; @@ -2442,7 +2442,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ parent_buffer = parent->buffer - binder_alloc_get_user_buffer_offset(&target_proc->alloc); - fd_array = (u32 *)(parent_buffer + fda->parent_offset); + fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); @@ -2508,7 +2508,7 @@ static int binder_fixup_parent(struct binder_transaction *t, proc->pid, thread->pid); return -EINVAL; } - parent_buffer = (u8 *)(parent->buffer - + parent_buffer = (u8 *)((uintptr_t)parent->buffer - binder_alloc_get_user_buffer_offset( &target_proc->alloc)); *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; -- cgit v1.2.3-70-g09d2 From 52b81611f209da5f49019260522633e994e241b5 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Tue, 5 Sep 2017 10:25:38 -0700 Subject: binder: fix an ret value override commit 372e3147df70 ("binder: guarantee txn complete / errors delivered in-order") incorrectly defined a local ret value. This ret value will be invalid when out of the if block Fixes: 372e3147df70 ("binder: refactor binder ref inc/dec for thread safety") Signed-off-by: Xu YiPing Signed-off-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 72af95c9ea22..b257bb0e2cfe 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2326,7 +2326,6 @@ static int binder_translate_handle(struct flat_binder_object *fp, (u64)node->ptr); binder_node_unlock(node); } else { - int ret; struct binder_ref_data dest_rdata; binder_node_unlock(node); -- cgit v1.2.3-70-g09d2 From d53bebdf4d779497b29e1aad26e19cac1d446f42 Mon Sep 17 00:00:00 2001 From: Xu YiPing Date: Tue, 5 Sep 2017 10:21:52 -0700 Subject: binder: fix memory corruption in binder_transaction binder commit 7a4408c6bd3e ("binder: make sure accesses to proc/thread are safe") made a change to enqueue tcomplete to thread->todo before enqueuing the transaction. However, in err_dead_proc_or_thread case, the tcomplete is directly freed, without dequeued. It may cause the thread->todo list to be corrupted. So, dequeue it before freeing. Fixes: 7a4408c6bd3e ("binder: make sure accesses to proc/thread are safe") Signed-off-by: Xu YiPing Signed-off-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b257bb0e2cfe..ab34239a76ee 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3082,6 +3082,7 @@ static void binder_transaction(struct binder_proc *proc, err_dead_proc_or_thread: return_error = BR_DEAD_REPLY; return_error_line = __LINE__; + binder_dequeue_work(proc, tcomplete); err_translate_failed: err_bad_object_type: err_bad_offset: -- cgit v1.2.3-70-g09d2 From 93dc1774d2a4c7a298d5cdf78cc8acdcb7b1428d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 7 Sep 2017 15:37:30 +0200 Subject: auxdisplay: charlcd: properly restore atomic counter on error path Commit f4757af ("staging: panel: Fix single-open policy race condition") introduced in 3.19-rc1 attempted to fix a race condition on the open, but failed to properly do it and used to exit without restoring the semaphore. This results in -EBUSY being returned after the first open error until the module is reloaded or the system restarted (ie: consecutive to a dual open resulting in -EBUSY or to a permission error). [ Note for stable maintainers: the code moved from drivers/misc/panel.c to drivers/auxdisplay/{charlcd,panel}.c during 4.12. The patch easily applies there (modulo the renamed atomic counter) but I can provide a tested backport if desired. ] Fixes: f4757af85 # 3.19-rc1 Cc: stable@vger.kernel.org Cc: Mariusz Gorski Cc: Geert Uytterhoeven Cc: Miguel Ojeda Sandonis Signed-off-by: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/charlcd.c | 11 +++++++++-- drivers/auxdisplay/panel.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index cfeb049a01ef..642afd88870b 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -647,18 +647,25 @@ static ssize_t charlcd_write(struct file *file, const char __user *buf, static int charlcd_open(struct inode *inode, struct file *file) { struct charlcd_priv *priv = to_priv(the_charlcd); + int ret; + ret = -EBUSY; if (!atomic_dec_and_test(&charlcd_available)) - return -EBUSY; /* open only once at a time */ + goto fail; /* open only once at a time */ + ret = -EPERM; if (file->f_mode & FMODE_READ) /* device is write-only */ - return -EPERM; + goto fail; if (priv->must_clear) { charlcd_clear_display(&priv->lcd); priv->must_clear = false; } return nonseekable_open(inode, file); + + fail: + atomic_inc(&charlcd_available); + return ret; } static int charlcd_release(struct inode *inode, struct file *file) diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index df126dcdaf18..6911acd896d9 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -1105,14 +1105,21 @@ static ssize_t keypad_read(struct file *file, static int keypad_open(struct inode *inode, struct file *file) { + int ret; + + ret = -EBUSY; if (!atomic_dec_and_test(&keypad_available)) - return -EBUSY; /* open only once at a time */ + goto fail; /* open only once at a time */ + ret = -EPERM; if (file->f_mode & FMODE_WRITE) /* device is read-only */ - return -EPERM; + goto fail; keypad_buflen = 0; /* flush the buffer on opening */ return 0; + fail: + atomic_inc(&keypad_available); + return ret; } static int keypad_release(struct inode *inode, struct file *file) -- cgit v1.2.3-70-g09d2 From 38b0774c0598c7a54b8499d18c2b764c35dc94ab Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Mon, 11 Sep 2017 11:00:11 +0200 Subject: nvmem: core: return EFBIG on out-of-range write When writing data that exceeds the nvmem size to a nvmem sysfs file using the sh redirection operator >, the shell hangs, trying to write the out-of-range bytes endlessly. Fix the problem by returning EFBIG described in man 2 write. Similar change was done for binary sysfs files on commit 0936896056365349afa867c16e9f9100a6707cbf Signed-off-by: Guy Shapiro Cc: linux-api@vger.kernel.org Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index de54c7f5048a..3866117bc285 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -135,7 +135,7 @@ static ssize_t bin_attr_nvmem_write(struct file *filp, struct kobject *kobj, /* Stop the user from writing */ if (pos >= nvmem->size) - return 0; + return -EFBIG; if (count < nvmem->word_size) return -EINVAL; -- cgit v1.2.3-70-g09d2 From aad8d097c9224be264939fc6c02a5570ea094f60 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 11 Sep 2017 11:00:12 +0200 Subject: nvmem: add missing of_node_put() in of_nvmem_cell_get() of_get_next_parent() increments the refcount of the returned node. It should be put when done. Signed-off-by: Masahiro Yamada Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 3866117bc285..d12e5de78e70 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -789,6 +789,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, return ERR_PTR(-EINVAL); nvmem = __nvmem_device_get(nvmem_np, NULL, NULL); + of_node_put(nvmem_np); if (IS_ERR(nvmem)) return ERR_CAST(nvmem); -- cgit v1.2.3-70-g09d2 From 6878e7de6af726de47f9f3bec649c3f49e786586 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 13 Sep 2017 16:29:48 -0700 Subject: driver core: suppress sending MODALIAS in UNBIND uevents The current udev rules cause modules to be loaded on all device events save for "remove". With the introduction of KOBJ_BIND/KOBJ_UNBIND this causes issues, as driver modules that have devices bound to their drivers get immediately reloaded, and it appears to the user that module unloading doe snot work. The standard udev matching rule is foillowing: ENV{MODALIAS}=="?*", RUN{builtin}+="kmod load $env{MODALIAS}" Given that MODALIAS data is not terribly useful for UNBIND event, let's zap it from the generated uevent environment until we get userspace updated with the correct udev rule that only loads modules on "add" event. Reported-by: Jakub Kicinski Tested-by: Jakub Kicinski Fixes: 1455cf8dbfd0 ("driver core: emit uevents when device is bound ...") Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index e590523ea476..f237a09a5862 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -294,6 +294,26 @@ static void cleanup_uevent_env(struct subprocess_info *info) } #endif +static void zap_modalias_env(struct kobj_uevent_env *env) +{ + static const char modalias_prefix[] = "MODALIAS="; + int i; + + for (i = 0; i < env->envp_idx;) { + if (strncmp(env->envp[i], modalias_prefix, + sizeof(modalias_prefix) - 1)) { + i++; + continue; + } + + if (i != env->envp_idx - 1) + memmove(&env->envp[i], &env->envp[i + 1], + sizeof(env->envp[i]) * env->envp_idx - 1); + + env->envp_idx--; + } +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -409,16 +429,29 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } } - /* - * Mark "add" and "remove" events in the object to ensure proper - * events to userspace during automatic cleanup. If the object did - * send an "add" event, "remove" will automatically generated by - * the core, if not already done by the caller. - */ - if (action == KOBJ_ADD) + switch (action) { + case KOBJ_ADD: + /* + * Mark "add" event so we can make sure we deliver "remove" + * event to userspace during automatic cleanup. If + * the object did send an "add" event, "remove" will + * automatically generated by the core, if not already done + * by the caller. + */ kobj->state_add_uevent_sent = 1; - else if (action == KOBJ_REMOVE) + break; + + case KOBJ_REMOVE: kobj->state_remove_uevent_sent = 1; + break; + + case KOBJ_UNBIND: + zap_modalias_env(env); + break; + + default: + break; + } mutex_lock(&uevent_sock_mutex); /* we will send an event, so request a new sequence number */ -- cgit v1.2.3-70-g09d2 From 452562abb5b76c14449dead2a7113f641893e8bc Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 7 Sep 2017 15:16:05 +0100 Subject: base: arch_topology: fix section mismatch build warnings Commit 2ef7a2953c81 ("arm, arm64: factorize common cpu capacity default code") introduced init_cpu_capacity_callback and init_cpu_capacity_notifier which are referenced from initcall and are missing __init{,data} annotations resulting the below section mismatch build warnings. "WARNING: vmlinux.o(.text+0xbab790): Section mismatch in reference from the function init_cpu_capacity_callback() to the variable .init.text:$x The function init_cpu_capacity_callback() references the variable __init $x. This is often because init_cpu_capacity_callback lacks a __init annotation or the annotation of $x is wrong." This patch fixes the above build warnings by adding the required annotations. Fixes: 2ef7a2953c81 ("arm, arm64: factorize common cpu capacity default code") Cc: Juri Lelli Cc: stable Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 41be9ff7d70a..6df7d6676a48 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -166,11 +166,11 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) } #ifdef CONFIG_CPU_FREQ -static cpumask_var_t cpus_to_visit; -static void parsing_done_workfn(struct work_struct *work); -static DECLARE_WORK(parsing_done_work, parsing_done_workfn); +static cpumask_var_t cpus_to_visit __initdata; +static void __init parsing_done_workfn(struct work_struct *work); +static __initdata DECLARE_WORK(parsing_done_work, parsing_done_workfn); -static int +static int __init init_cpu_capacity_callback(struct notifier_block *nb, unsigned long val, void *data) @@ -206,7 +206,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, return 0; } -static struct notifier_block init_cpu_capacity_notifier = { +static struct notifier_block init_cpu_capacity_notifier __initdata = { .notifier_call = init_cpu_capacity_callback, }; @@ -232,7 +232,7 @@ static int __init register_cpufreq_notifier(void) } core_initcall(register_cpufreq_notifier); -static void parsing_done_workfn(struct work_struct *work) +static void __init parsing_done_workfn(struct work_struct *work) { cpufreq_unregister_notifier(&init_cpu_capacity_notifier, CPUFREQ_POLICY_NOTIFIER); -- cgit v1.2.3-70-g09d2 From 9821786d7c90eee2f6852261893d9703801aae47 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 18 Sep 2017 17:39:12 +0300 Subject: usb: xhci: Free the right ring in xhci_add_endpoint() In the xhci_add_endpoint(), a new ring was allocated and saved at xhci_virt_ep->new_ring. Hence, when error happens, we need to free the allocated ring before returning error. Current code frees xhci_virt_ep->ring instead of the new_ring. This patch fixes this. Cc: Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index b2ff1ff1a02f..ee198ea47f49 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1703,7 +1703,8 @@ static int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, if (xhci->quirks & XHCI_MTK_HOST) { ret = xhci_mtk_add_ep_quirk(hcd, udev, ep); if (ret < 0) { - xhci_free_endpoint_ring(xhci, virt_dev, ep_index); + xhci_ring_free(xhci, virt_dev->eps[ep_index].new_ring); + virt_dev->eps[ep_index].new_ring = NULL; return ret; } } -- cgit v1.2.3-70-g09d2 From 5a838a13c9b4e5dd188b7a6eaeb894e9358ead0c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 18 Sep 2017 17:39:13 +0300 Subject: xhci: fix finding correct bus_state structure for USB 3.1 hosts xhci driver keeps a bus_state structure for each hcd (usb2 and usb3) The structure is picked based on hcd speed, but driver only compared for HCD_USB3 speed, returning the wrong bus_state for HCD_USB31 hosts. This caused null pointer dereference errors in bus_resume function. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 2abaa4d6d39d..e96e90d0fe9f 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1681,7 +1681,7 @@ struct xhci_bus_state { static inline unsigned int hcd_index(struct usb_hcd *hcd) { - if (hcd->speed == HCD_USB3) + if (hcd->speed >= HCD_USB3) return 0; else return 1; -- cgit v1.2.3-70-g09d2 From 114ec3a6f9096d211a4aff4277793ba969a62c73 Mon Sep 17 00:00:00 2001 From: Jim Dickerson Date: Mon, 18 Sep 2017 17:39:14 +0300 Subject: usb: pci-quirks.c: Corrected timeout values used in handshake Servers were emitting failed handoff messages but were not waiting the full 1 second as designated in section 4.22.1 of the eXtensible Host Controller Interface specifications. The handshake was using wrong units so calls were made with milliseconds not microseconds. Comments referenced 5 seconds not 1 second as in specs. The wrong units were also corrected in a second handshake call. Cc: Signed-off-by: Jim Dickerson Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 658d9d1f9ea3..e02dbb1e1cdd 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -1022,7 +1022,7 @@ EXPORT_SYMBOL_GPL(usb_disable_xhci_ports); * * Takes care of the handoff between the Pre-OS (i.e. BIOS) and the OS. * It signals to the BIOS that the OS wants control of the host controller, - * and then waits 5 seconds for the BIOS to hand over control. + * and then waits 1 second for the BIOS to hand over control. * If we timeout, assume the BIOS is broken and take control anyway. */ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) @@ -1069,9 +1069,9 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev) if (val & XHCI_HC_BIOS_OWNED) { writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset); - /* Wait for 5 seconds with 10 microsecond polling interval */ + /* Wait for 1 second with 10 microsecond polling interval */ timeout = handshake(base + ext_cap_offset, XHCI_HC_BIOS_OWNED, - 0, 5000, 10); + 0, 1000000, 10); /* Assume a buggy BIOS and take HC ownership anyway */ if (timeout) { @@ -1100,7 +1100,7 @@ hc_init: * operational or runtime registers. Wait 5 seconds and no more. */ timeout = handshake(op_reg_base + XHCI_STS_OFFSET, XHCI_STS_CNR, 0, - 5000, 10); + 5000000, 10); /* Assume a buggy HC and start HC initialization anyway */ if (timeout) { val = readl(op_reg_base + XHCI_STS_OFFSET); -- cgit v1.2.3-70-g09d2 From 76a14d7bf92960aac2f5450bfd23783bfa618be9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 18 Sep 2017 17:39:15 +0300 Subject: xhci: fix wrong endpoint ESIT value shown in tracing Read the endpiont ESIT from endpiont context using correct macro. Add a macro for reading the high bits of ESIT for Large ESIT Payload Capable hosts (LEC=1) Cc: # 4.12 Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e96e90d0fe9f..7c87189e8110 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -735,6 +735,8 @@ struct xhci_ep_ctx { #define EP_MAXPSTREAMS(p) (((p) << 10) & EP_MAXPSTREAMS_MASK) /* Endpoint is set up with a Linear Stream Array (vs. Secondary Stream Array) */ #define EP_HAS_LSA (1 << 15) +/* hosts with LEC=1 use bits 31:24 as ESIT high bits. */ +#define CTX_TO_MAX_ESIT_PAYLOAD_HI(p) (((p) >> 24) & 0xff) /* ep_info2 bitmasks */ /* @@ -2540,8 +2542,8 @@ static inline const char *xhci_decode_ep_context(u32 info, u32 info2, u64 deq, u8 lsa; u8 hid; - esit = EP_MAX_ESIT_PAYLOAD_HI(info) << 16 | - EP_MAX_ESIT_PAYLOAD_LO(tx_info); + esit = CTX_TO_MAX_ESIT_PAYLOAD_HI(info) << 16 | + CTX_TO_MAX_ESIT_PAYLOAD(tx_info); ep_state = info & EP_STATE_MASK; max_pstr = info & EP_MAXPSTREAMS_MASK; -- cgit v1.2.3-70-g09d2 From c6b8e79306f515b5483eb11076e0fbfc140434a8 Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Mon, 18 Sep 2017 17:39:16 +0300 Subject: usb: host: xhci-plat: allow sysdev to inherit from ACPI Commit 4c39d4b949d3 ("usb: xhci: use bus->sysdev for DMA configuration") updated the method determining DMA for XHCI from sysdev. However, this patch broke the ability to enumerate the FWNODE from parent ACPI devices from the child plat XHCI device. Currently, xhci_plat is not set up properly when the parent device is an ACPI node. The conditions that xhci_plat_probe should satisfy are 1. xhci_plat comes from firmware 2. xhci_plat is child of a device from firmware (dwc3-plat) 3. xhci_plat is grandchild of a pci device (dwc3-pci) Case 2 is covered when the child is an OF node (by checking sysdev->parent->of_node), however, an ACPI parent will return NULL in the of_node check and will thus not result in sysdev being set to sysdev->parent [ 17.591549] xhci-hcd: probe of xhci-hcd.6.auto failed with error -5 This change adds a check for ACPI to completely allow for condition 2. This is done by first checking if the parent node is of type ACPI (e.g., dwc3-plat) and set sysdev to sysdev->parent if either of the two following conditions are met: 1: If fwnode is empty (in the case that platform_device_add_properties was not called on the allocated platform device) 2: fwnode exists but is not of type ACPI (this would happen if platform_device_add_properties was called on the allocated device. Instead of type FWNODE_ACPI, you would end up with FWNODE_PDATA) Cc: stable@vger.kernel.org #4.12.x Cc: stable@vger.kernel.org #4.13.x Fixes: 4c39d4b949d3 ("usb: xhci: use bus->sysdev for DMA configuration") Tested-by: Thang Q. Nguyen Signed-off-by: Adam Wallis Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 163bafde709f..1cb6eaef4ae1 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -178,14 +178,18 @@ static int xhci_plat_probe(struct platform_device *pdev) * 2. xhci_plat is child of a device from firmware (dwc3-plat) * 3. xhci_plat is grandchild of a pci device (dwc3-pci) */ - sysdev = &pdev->dev; - if (sysdev->parent && !sysdev->of_node && sysdev->parent->of_node) - sysdev = sysdev->parent; + for (sysdev = &pdev->dev; sysdev; sysdev = sysdev->parent) { + if (is_of_node(sysdev->fwnode) || + is_acpi_device_node(sysdev->fwnode)) + break; #ifdef CONFIG_PCI - else if (sysdev->parent && sysdev->parent->parent && - sysdev->parent->parent->bus == &pci_bus_type) - sysdev = sysdev->parent->parent; + else if (sysdev->bus == &pci_bus_type) + break; #endif + } + + if (!sysdev) + sysdev = &pdev->dev; /* Try to set 64-bit DMA first */ if (WARN_ON(!sysdev->dma_mask)) -- cgit v1.2.3-70-g09d2 From 4ec1cd3eeeee7ccc35681270da028dbc29ca7bbd Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 18 Sep 2017 17:39:17 +0300 Subject: xhci: Fix sleeping with spin_lock_irq() held in ASmedia 1042A workaround The flow control workaround for ASM1042A xHC hosts sleeps between register polling. The workaround gets called in several places, among them with spin_lock_irq() held when xHC host is resumed or hoplug removed. This was noticed as kernel panics at resume on a Dell XPS15 9550 with TB16 thunderbolt dock. Avoid sleeping with spin_lock_irq() held, use udelay() instead The original workaround was added to 4.9 and 4.12 stable releases, this patch needs to be applied to those as well. Fixes: 9da5a1092b13 ("xhci: Bad Ethernet performance plugged in ASM1042A host") Cc: #4.9+ Reported-by: Jose Marino Tested-by: Jose Marino Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index e02dbb1e1cdd..6dda3623a276 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -447,7 +447,7 @@ static int usb_asmedia_wait_write(struct pci_dev *pdev) if ((value & ASMT_CONTROL_WRITE_BIT) == 0) return 0; - usleep_range(40, 60); + udelay(50); } dev_warn(&pdev->dev, "%s: check_write_ready timeout", __func__); -- cgit v1.2.3-70-g09d2 From 7bea22b124d77845c85a62eaa29a85ba6cc2f899 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 18 Sep 2017 17:39:18 +0300 Subject: xhci: set missing SuperSpeedPlus Link Protocol bit in roothub descriptor A SuperSpeedPlus roothub needs to have the Link Protocol (LP) bit set in the bmSublinkSpeedAttr[] entry of a SuperSpeedPlus descriptor. If the xhci controller has an optional Protocol Speed ID (PSI) table then that will be used as a base to create the roothub SuperSpeedPlus descriptor. The PSI table does not however necessary contain the LP bit so we need to set it manually. Check the psi speed and set LP bit if speed is 10Gbps or higher. We're not setting it for 5 to 10Gbps as USB 3.1 specification always mention SuperSpeedPlus for 10Gbps or higher, and some SSIC USB 3.0 speeds can be over 5Gbps, such as SSIC-G3B-L1 at 5830 Mbps Cc: # 4.6+ Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index ad89a6d4111b..4dba2cf73cbe 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -112,7 +112,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* If PSI table exists, add the custom speed attributes from it */ if (usb3_1 && xhci->usb3_rhub.psi_count) { - u32 ssp_cap_base, bm_attrib, psi; + u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; ssp_cap_base = USB_DT_BOS_SIZE + USB_DT_USB_SS_CAP_SIZE; @@ -139,6 +139,15 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { psi = xhci->usb3_rhub.psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; + psi_exp = XHCI_EXT_PORT_PSIE(psi); + psi_mant = XHCI_EXT_PORT_PSIM(psi); + + /* Shift to Gbps and set SSP Link BIT(14) if 10Gpbs */ + for (; psi_exp < 3; psi_exp++) + psi_mant /= 1000; + if (psi_mant >= 10) + psi |= BIT(14); + if ((psi & PLT_MASK) == PLT_SYM) { /* Symmetric, create SSA RX and TX from one PSI entry */ put_unaligned_le32(psi, &buf[offset]); -- cgit v1.2.3-70-g09d2 From bcd6a7aa13800afc1418e6b29d944d882214939a Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 18 Sep 2017 17:39:19 +0300 Subject: Revert "xhci: Limit USB2 port wake support for AMD Promontory hosts" This reverts commit dec08194ffeccfa1cf085906b53d301930eae18f. Commit dec08194ffec ("xhci: Limit USB2 port wake support for AMD Promontory hosts") makes all high speed USB ports on ASUS PRIME B350M-A cease to function after enabling runtime PM. All boards with this chipsets will be affected, so revert the commit. The original patch was added to stable 4.9, 4.11 and 4.12 and needs to reverted from there as well Cc: # 4.9+ Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 3 --- drivers/usb/host/xhci-pci.c | 12 ------------ drivers/usb/host/xhci.h | 2 +- 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 4dba2cf73cbe..da9158f171cb 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1515,9 +1515,6 @@ int xhci_bus_suspend(struct usb_hcd *hcd) t2 |= PORT_WKOC_E | PORT_WKCONN_E; t2 &= ~PORT_WKDISC_E; } - if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) && - (hcd->speed < HCD_USB3)) - t2 &= ~PORT_WAKE_BITS; } else t2 &= ~PORT_WAKE_BITS; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 8071c8fdd15e..76f392954733 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -54,11 +54,6 @@ #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 -#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 -#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba -#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb -#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc - #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 static const char hcd_name[] = "xhci_hcd"; @@ -142,13 +137,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; - if ((pdev->vendor == PCI_VENDOR_ID_AMD) && - ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) || - (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1))) - xhci->quirks |= XHCI_U2_DISABLE_WAKE; - if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 7c87189e8110..2b48aa4f6b76 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1828,7 +1828,7 @@ struct xhci_hcd { /* For controller with a broken Port Disable implementation */ #define XHCI_BROKEN_PORT_PED (1 << 25) #define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26) -#define XHCI_U2_DISABLE_WAKE (1 << 27) +/* Reserved. It was XHCI_U2_DISABLE_WAKE */ #define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28) unsigned int num_active_eps; -- cgit v1.2.3-70-g09d2 From bf563b01c2895a4bfd1a29cc5abc67fe706ecffd Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 11 Sep 2017 09:45:42 +0200 Subject: driver core: platform: Don't read past the end of "driver_override" buffer When printing the driver_override parameter when it is 4095 and 4094 bytes long, the printing code would access invalid memory because we need count+1 bytes for printing. Reject driver_override values of these lengths in driver_override_store(). This is in close analogy to commit 4efe874aace5 ("PCI: Don't read past the end of sysfs "driver_override" buffer") from Sasha Levin. Fixes: 3d713e0e382e ("driver core: platform: add device binding path 'driver_override'") Cc: stable@vger.kernel.org # v3.17+ Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d1bd99271066..9045c5f3734e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -868,7 +868,8 @@ static ssize_t driver_override_store(struct device *dev, struct platform_device *pdev = to_platform_device(dev); char *driver_override, *old, *cp; - if (count > PATH_MAX) + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) return -EINVAL; driver_override = kstrndup(buf, count, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 432219fd002ada95d2f45dd3a25c3f7c73f27864 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 2 Sep 2017 01:15:13 +0300 Subject: serial: sh-sci: document R8A77970 bindings R-Car V3M (R8A77970) SoC also has the R-Car gen3 compatible SCIF and HSCIF ports, so document the SoC specific bindings. Signed-off-by: Sergei Shtylyov Acked-by: Rob Herring Acked-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,sci-serial.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt index 4fc96946f81d..cf504d0380ae 100644 --- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt +++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt @@ -41,6 +41,8 @@ Required properties: - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART. - "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART. - "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART. + - "renesas,scif-r8a77970" for R8A77970 (R-Car V3M) SCIF compatible UART. + - "renesas,hscif-r8a77970" for R8A77970 (R-Car V3M) HSCIF compatible UART. - "renesas,scif-r8a77995" for R8A77995 (R-Car D3) SCIF compatible UART. - "renesas,hscif-r8a77995" for R8A77995 (R-Car D3) HSCIF compatible UART. - "renesas,scifa-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFA compatible UART. -- cgit v1.2.3-70-g09d2 From 104583b504da8a3ad86d033b497cb6e58941a9a1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 12 Sep 2017 12:39:54 +0200 Subject: mxser: fix timeout calculation for low rates Paul reported, that low rates like B300 make the driver to hang in mxser_wait_until_sent. His debugging tackled the issue down to the info->timeout computation in mxser_set_baud. Obviously, ints are used there and they easily overflow with these low rates: B300 makes info->timeout to be -373. So switch all these types to unsigned as it ought to be. And use the u64 domain to perform the computation as in the worst case, we need 35 bits to store the computed value (before division). And use do_div not to break 32 bit kernels. [v2] make it actually build Signed-off-by: Jiri Slaby Cc: Paul Tested-by: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/mxser.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c index 1c0c9553bc05..7dd38047ba23 100644 --- a/drivers/tty/mxser.c +++ b/drivers/tty/mxser.c @@ -246,11 +246,11 @@ struct mxser_port { unsigned char err_shadow; struct async_icount icount; /* kernel counters for 4 input interrupts */ - int timeout; + unsigned int timeout; int read_status_mask; int ignore_status_mask; - int xmit_fifo_size; + unsigned int xmit_fifo_size; int xmit_head; int xmit_tail; int xmit_cnt; @@ -572,8 +572,9 @@ static void mxser_dtr_rts(struct tty_port *port, int on) static int mxser_set_baud(struct tty_struct *tty, long newspd) { struct mxser_port *info = tty->driver_data; - int quot = 0, baud; + unsigned int quot = 0, baud; unsigned char cval; + u64 timeout; if (!info->ioaddr) return -1; @@ -594,8 +595,13 @@ static int mxser_set_baud(struct tty_struct *tty, long newspd) quot = 0; } - info->timeout = ((info->xmit_fifo_size * HZ * 10 * quot) / info->baud_base); - info->timeout += HZ / 50; /* Add .02 seconds of slop */ + /* + * worst case (128 * 1000 * 10 * 18432) needs 35 bits, so divide in the + * u64 domain + */ + timeout = (u64)info->xmit_fifo_size * HZ * 10 * quot; + do_div(timeout, info->baud_base); + info->timeout = timeout + HZ / 50; /* Add .02 seconds of slop */ if (quot) { info->MCR |= UART_MCR_DTR; -- cgit v1.2.3-70-g09d2 From 0e5ec4140c4a8a38c4ff7293c018eb7da69a30db Mon Sep 17 00:00:00 2001 From: Russell Enderby Date: Tue, 5 Sep 2017 12:16:47 -0500 Subject: serial: bcm63xx: fix timing issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue where unprintable characters can occur or output is cut off over the serial uart / linux console depending on timing. Problem occurs when changing the serial baud rate when setting up the new console.The bcm63xx driver does a disable and flush of the uart tx fifo while there is data still in the tx fifo. If the tx fifo still has data it is trying to send out, we need to wait until it is empty before disabling and flushing the uart. When we now go to change the uart parameters including speed we check if there is data currently in the tx fifo.If there is was mdelay(10) and check again.If it tries 3 times and still has data in it we just continue and sacrifice the tx fifo buffer. A cleaner and more preferred approach would be to remove : - spin_lock_irqsave() - bcm_uart_disable() - bcm_uart_flush() However it is not clear if the author put those in to fix another underlying issue.As a result this solution is a safer approach. Output before the fix: [0.306000] 14e00520.serial: ttyS0 at MMIO 0x14e00520 (irq = 9, base_baud = 1687500) is a° 0.315000] console[ttyS0] enabled Output verified after the fix: [0.315000] 14e00520.serial: ttyS0 at MMIO 0x14e00520 (irq = 9, base_baud = 1687500) is a bcm63xx_uart [0.334000] console[ttyS0] enabled Signed-off-by: Russell Enderby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/bcm63xx_uart.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 583c9a0c7ecc..8c48c3784831 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -507,9 +507,14 @@ static void bcm_uart_set_termios(struct uart_port *port, { unsigned int ctl, baud, quot, ier; unsigned long flags; + int tries; spin_lock_irqsave(&port->lock, flags); + /* Drain the hot tub fully before we power it off for the winter. */ + for (tries = 3; !bcm_uart_tx_empty(port) && tries; tries--) + mdelay(10); + /* disable uart while changing speed */ bcm_uart_disable(port); bcm_uart_flush(port); -- cgit v1.2.3-70-g09d2 From 9d7ee0e28da59b05647c3d2a7ad4076c16b1a6ef Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 4 Sep 2017 19:20:24 +0800 Subject: tty: serial: lpuart: avoid report NULL interrupt The current driver register irq in .startup() and free the irq in .shutdown(), then user will see the NULL interrupt output from 'cat /proc/interrupts' after the uart port test completed: ... 41: 515 0 0 0 GICv3 257 Level fsl-lpuart 42: 2 0 0 0 GICv3 258 Level ... It is better to register all the irqs during probe function via devm_request_irq() to avoid to call free_irq(). Signed-off-by: Fugang Duan Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 849c1f9991ce..f0252184291e 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1276,7 +1276,6 @@ static void rx_dma_timer_init(struct lpuart_port *sport) static int lpuart_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - int ret; unsigned long flags; unsigned char temp; @@ -1291,11 +1290,6 @@ static int lpuart_startup(struct uart_port *port) sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK) + 1); - ret = devm_request_irq(port->dev, port->irq, lpuart_int, 0, - DRIVER_NAME, sport); - if (ret) - return ret; - spin_lock_irqsave(&sport->port.lock, flags); lpuart_setup_watermark(sport); @@ -1333,7 +1327,6 @@ static int lpuart_startup(struct uart_port *port) static int lpuart32_startup(struct uart_port *port) { struct lpuart_port *sport = container_of(port, struct lpuart_port, port); - int ret; unsigned long flags; unsigned long temp; @@ -1346,11 +1339,6 @@ static int lpuart32_startup(struct uart_port *port) sport->rxfifo_size = 0x1 << (((temp >> UARTFIFO_RXSIZE_OFF) & UARTFIFO_FIFOSIZE_MASK) - 1); - ret = devm_request_irq(port->dev, port->irq, lpuart32_int, 0, - DRIVER_NAME, sport); - if (ret) - return ret; - spin_lock_irqsave(&sport->port.lock, flags); lpuart32_setup_watermark(sport); @@ -1380,8 +1368,6 @@ static void lpuart_shutdown(struct uart_port *port) spin_unlock_irqrestore(&port->lock, flags); - devm_free_irq(port->dev, port->irq, sport); - if (sport->lpuart_dma_rx_use) { del_timer_sync(&sport->lpuart_timer); lpuart_dma_rx_free(&sport->port); @@ -1400,7 +1386,6 @@ static void lpuart_shutdown(struct uart_port *port) static void lpuart32_shutdown(struct uart_port *port) { - struct lpuart_port *sport = container_of(port, struct lpuart_port, port); unsigned long temp; unsigned long flags; @@ -1413,8 +1398,6 @@ static void lpuart32_shutdown(struct uart_port *port) lpuart32_write(port, temp, UARTCTRL); spin_unlock_irqrestore(&port->lock, flags); - - devm_free_irq(port->dev, port->irq, sport); } static void @@ -2212,16 +2195,22 @@ static int lpuart_probe(struct platform_device *pdev) platform_set_drvdata(pdev, &sport->port); - if (lpuart_is_32(sport)) + if (lpuart_is_32(sport)) { lpuart_reg.cons = LPUART32_CONSOLE; - else + ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart32_int, 0, + DRIVER_NAME, sport); + } else { lpuart_reg.cons = LPUART_CONSOLE; + ret = devm_request_irq(&pdev->dev, sport->port.irq, lpuart_int, 0, + DRIVER_NAME, sport); + } + + if (ret) + goto failed_irq_request; ret = uart_add_one_port(&lpuart_reg, &sport->port); - if (ret) { - clk_disable_unprepare(sport->clk); - return ret; - } + if (ret) + goto failed_attach_port; sport->dma_tx_chan = dma_request_slave_channel(sport->port.dev, "tx"); if (!sport->dma_tx_chan) @@ -2240,6 +2229,11 @@ static int lpuart_probe(struct platform_device *pdev) } return 0; + +failed_attach_port: +failed_irq_request: + clk_disable_unprepare(sport->clk); + return ret; } static int lpuart_remove(struct platform_device *pdev) -- cgit v1.2.3-70-g09d2 From c91261437985d481c472639d4397931d77f5d4e9 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 2 Sep 2017 23:13:55 +0300 Subject: serial: sccnxp: Fix error handling in sccnxp_probe() sccnxp_probe() returns result of regulator_disable() that may lead to returning zero, while device is not properly initialized. Also the driver enables clocks, but it does not disable it. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sccnxp.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index cdd2f942317c..b9c7a904c1ea 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -889,7 +889,16 @@ static int sccnxp_probe(struct platform_device *pdev) goto err_out; uartclk = 0; } else { - clk_prepare_enable(clk); + ret = clk_prepare_enable(clk); + if (ret) + goto err_out; + + ret = devm_add_action_or_reset(&pdev->dev, + (void(*)(void *))clk_disable_unprepare, + clk); + if (ret) + goto err_out; + uartclk = clk_get_rate(clk); } @@ -988,7 +997,7 @@ static int sccnxp_probe(struct platform_device *pdev) uart_unregister_driver(&s->uart); err_out: if (!IS_ERR(s->regulator)) - return regulator_disable(s->regulator); + regulator_disable(s->regulator); return ret; } -- cgit v1.2.3-70-g09d2 From 7d34cd12061d2cf38f7505c0ab2ae2f03fbe8e08 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Sep 2017 11:12:02 -0400 Subject: MAINTAINERS: associate linux/fs.h with VFS instead of file locking include/linux/fs.h and include/uapi/linux/fs.h deal with much more than just file locking. Move them to the "FILESYSTEMS (VFS and infrastructure)" section of the MAINTAINERS file so that the first suggestion from get_maintainer.pl isn't the file locking maintainers, which has caused some confusion. Cc: J. Bruce Fields Cc: Al Viro Signed-off-by: Eric Biggers Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..31ac2c0ec8ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5334,9 +5334,7 @@ M: "J. Bruce Fields" L: linux-fsdevel@vger.kernel.org S: Maintained F: include/linux/fcntl.h -F: include/linux/fs.h F: include/uapi/linux/fcntl.h -F: include/uapi/linux/fs.h F: fs/fcntl.c F: fs/locks.c @@ -5345,6 +5343,8 @@ M: Alexander Viro L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/* +F: include/linux/fs.h +F: include/uapi/linux/fs.h FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio -- cgit v1.2.3-70-g09d2 From 47684e111f52fface17820d3ef84cc845b26070e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 18 Sep 2017 13:10:15 -0700 Subject: Documentation: core-api: minor workqueue.rst cleanups Clean up workqueue.rst: - fix minor typos - put '@' after `` instead of preceding them (one place) - use "CPU" instead of "cpu" in text consistently - quote one function name Signed-off-by: Randy Dunlap Cc: Tejun Heo Cc: Florian Mickler Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index 3943b5bfa8cf..00a5ba51e63f 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -39,8 +39,8 @@ up. Although MT wq wasted a lot of resource, the level of concurrency provided was unsatisfactory. The limitation was common to both ST and MT wq albeit less severe on MT. Each wq maintained its own separate -worker pool. A MT wq could provide only one execution context per CPU -while a ST wq one for the whole system. Work items had to compete for +worker pool. An MT wq could provide only one execution context per CPU +while an ST wq one for the whole system. Work items had to compete for those very limited execution contexts leading to various problems including proneness to deadlocks around the single execution context. @@ -151,7 +151,7 @@ Application Programming Interface (API) ``alloc_workqueue()`` allocates a wq. The original ``create_*workqueue()`` functions are deprecated and scheduled for -removal. ``alloc_workqueue()`` takes three arguments - @``name``, +removal. ``alloc_workqueue()`` takes three arguments - ``@name``, ``@flags`` and ``@max_active``. ``@name`` is the name of the wq and also used as the name of the rescuer thread if there is one. @@ -197,7 +197,7 @@ resources, scheduled and executed. served by worker threads with elevated nice level. Note that normal and highpri worker-pools don't interact with - each other. Each maintain its separate pool of workers and + each other. Each maintains its separate pool of workers and implements concurrency management among its workers. ``WQ_CPU_INTENSIVE`` @@ -249,8 +249,8 @@ unbound worker-pools and only one work item could be active at any given time thus achieving the same ordering property as ST wq. In the current implementation the above configuration only guarantees -ST behavior within a given NUMA node. Instead alloc_ordered_queue should -be used to achieve system wide ST behavior. +ST behavior within a given NUMA node. Instead ``alloc_ordered_queue()`` should +be used to achieve system-wide ST behavior. Example Execution Scenarios -- cgit v1.2.3-70-g09d2 From 80a921e207cea3dd18038501187078669a218dab Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 18 Sep 2017 23:00:08 +0300 Subject: ata_piix: Add Fujitsu-Siemens Lifebook S6120 to short cable IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fujitsu-Siemens Lifebook S6120 misdetects the cable type for some drives. The problematic one in this case is an mSATA SSD hooked up via a mSATA->PATA bridge. With regular hard disks the detection seems to work correctly. Strangely an older Lifebook model (S6020) detects the cable as 80c with the mSATA SSD, even if using the exact same flex cable. Cc: Tejun Heo Signed-off-by: Ville Syrjälä Signed-off-by: Tejun Heo --- drivers/ata/ata_piix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 8401c3b5be92..b702c20fbc2b 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -492,6 +492,7 @@ static const struct ich_laptop ich_laptop[] = { { 0x27DF, 0x152D, 0x0778 }, /* ICH7 on unknown Intel */ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on ACER Aspire 2023WLMi */ { 0x24CA, 0x1025, 0x003d }, /* ICH4 on ACER TM290 */ + { 0x24CA, 0x10CF, 0x11AB }, /* ICH4M on Fujitsu-Siemens Lifebook S6120 */ { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ { 0x27df, 0x104d, 0x900e }, /* ICH7 on Sony TZ-90 */ -- cgit v1.2.3-70-g09d2 From 55e001aabb826c96f09e0a440bdcbce620189dbc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Sep 2017 12:17:36 +0200 Subject: fpga: altera-cvp: remove DRIVER_ATTR() usage It's better to be explicit and use the DRIVER_ATTR_RW() macro when defining a driver's sysfs file. This is part of a series to drop DRIVER_ATTR() from the tree entirely. Cc: linux-fpga@vger.kernel.org Reviewed-by: Moritz Fischer Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/altera-cvp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index 08629ee69d11..00e73d28077c 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -361,12 +361,12 @@ static const struct fpga_manager_ops altera_cvp_ops = { .write_complete = altera_cvp_write_complete, }; -static ssize_t show_chkcfg(struct device_driver *dev, char *buf) +static ssize_t chkcfg_show(struct device_driver *dev, char *buf) { return snprintf(buf, 3, "%d\n", altera_cvp_chkcfg); } -static ssize_t store_chkcfg(struct device_driver *drv, const char *buf, +static ssize_t chkcfg_store(struct device_driver *drv, const char *buf, size_t count) { int ret; @@ -378,7 +378,7 @@ static ssize_t store_chkcfg(struct device_driver *drv, const char *buf, return count; } -static DRIVER_ATTR(chkcfg, 0600, show_chkcfg, store_chkcfg); +static DRIVER_ATTR_RW(chkcfg); static int altera_cvp_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id); -- cgit v1.2.3-70-g09d2 From 850fdec8d2fd1eebfa003fea39bec08cd69b6155 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Sep 2017 12:17:57 +0200 Subject: driver core: remove DRIVER_ATTR DRIVER_ATTR is no longer in use, and driver authors should be using DRIVER_ATTR_RW() or DRIVER_ATTR_RO() or DRIVER_ATTR_WO() instead in order to always get the permissions correct. So remove it so that no one can use it anymore. Acked-by: Alan Tull Reviewed-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-model/driver.txt | 7 ++++--- Documentation/filesystems/sysfs.txt | 3 ++- include/linux/device.h | 2 -- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt index 4421135826a2..d661e6f7e6a0 100644 --- a/Documentation/driver-model/driver.txt +++ b/Documentation/driver-model/driver.txt @@ -196,12 +196,13 @@ struct driver_attribute { }; Device drivers can export attributes via their sysfs directories. -Drivers can declare attributes using a DRIVER_ATTR macro that works -identically to the DEVICE_ATTR macro. +Drivers can declare attributes using a DRIVER_ATTR_RW and DRIVER_ATTR_RO +macro that works identically to the DEVICE_ATTR_RW and DEVICE_ATTR_RO +macros. Example: -DRIVER_ATTR(debug,0644,show_debug,store_debug); +DRIVER_ATTR_RW(debug); This is equivalent to declaring: diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index 24da7b32c489..9a3658cc399e 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -366,7 +366,8 @@ struct driver_attribute { Declaring: -DRIVER_ATTR(_name, _mode, _show, _store) +DRIVER_ATTR_RO(_name) +DRIVER_ATTR_RW(_name) Creation/Removal: diff --git a/include/linux/device.h b/include/linux/device.h index c6f27207dbe8..2bc70ddda09b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -307,8 +307,6 @@ struct driver_attribute { size_t count); }; -#define DRIVER_ATTR(_name, _mode, _show, _store) \ - struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store) #define DRIVER_ATTR_RW(_name) \ struct driver_attribute driver_attr_##_name = __ATTR_RW(_name) #define DRIVER_ATTR_RO(_name) \ -- cgit v1.2.3-70-g09d2 From 7eac35ea29dc54cbc8399de84c9bf16553575b89 Mon Sep 17 00:00:00 2001 From: Sebastian Frei Date: Tue, 12 Sep 2017 09:50:59 +0200 Subject: USB: serial: cp210x: fix partnum regression When adding GPIO support for the cp2105, the mentioned commit by Martyn Welch introduced a query for the part number of the chip. Unfortunately the driver aborts probing when this query fails, so currently the driver can not be used with chips not supporting this query. I have a data cable for Siemens mobile phones (ID 10ab:10c5) where this is the case. With this patch the driver can be bound even if the part number can not be queried. Fixes: cf5276ce7867 ("USB: serial: cp210x: Adding GPIO support for CP2105") Signed-off-by: Sebastian Frei [ johan: amend commit message; shorten error message and demote to warning; drop unnecessary move of usb_set_serial_data() ] Cc: stable # 4.9 Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 2d945c9f975c..8364b44f4c45 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -352,6 +352,7 @@ static struct usb_serial_driver * const serial_drivers[] = { #define CP210X_PARTNUM_CP2104 0x04 #define CP210X_PARTNUM_CP2105 0x05 #define CP210X_PARTNUM_CP2108 0x08 +#define CP210X_PARTNUM_UNKNOWN 0xFF /* CP210X_GET_COMM_STATUS returns these 0x13 bytes */ struct cp210x_comm_status { @@ -1491,8 +1492,11 @@ static int cp210x_attach(struct usb_serial *serial) result = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST, CP210X_GET_PARTNUM, &priv->partnum, sizeof(priv->partnum)); - if (result < 0) - goto err_free_priv; + if (result < 0) { + dev_warn(&serial->interface->dev, + "querying part number failed\n"); + priv->partnum = CP210X_PARTNUM_UNKNOWN; + } usb_set_serial_data(serial, priv); @@ -1505,10 +1509,6 @@ static int cp210x_attach(struct usb_serial *serial) } return 0; -err_free_priv: - kfree(priv); - - return result; } static void cp210x_disconnect(struct usb_serial *serial) -- cgit v1.2.3-70-g09d2 From 311de3ce96f453a5a6edb3066eb6109cc07e81d2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 6 Sep 2017 10:40:52 +0900 Subject: gpio: thunderx: select IRQ_DOMAIN_HIERARCHY instead of depends on IRQ_DOMAIN_HIERARCHY is not user-configurable, but supposed to be selected by drivers that need IRQ domain hierarchy support. GPIO_THUNDERX is the only user of "depends on IRQ_DOMAIN_HIERARCHY". This means, we can not enable GPIO_THUNDERX unless other drivers select IRQ_DOMAIN_HIERARCHY elsewhere. This is odd. Flip the logic. Signed-off-by: Masahiro Yamada Acked-by: David Daney Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 3388d54ba114..3f80f167ed56 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -453,7 +453,8 @@ config GPIO_TS4800 config GPIO_THUNDERX tristate "Cavium ThunderX/OCTEON-TX GPIO" depends on ARCH_THUNDER || (64BIT && COMPILE_TEST) - depends on PCI_MSI && IRQ_DOMAIN_HIERARCHY + depends on PCI_MSI + select IRQ_DOMAIN_HIERARCHY select IRQ_FASTEOI_HIERARCHY_HANDLERS help Say yes here to support the on-chip GPIO lines on the ThunderX -- cgit v1.2.3-70-g09d2 From e40a3ae1f794a35c4af3746291ed6fedc1fa0f6f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 17:47:45 +0200 Subject: gpio: acpi: work around false-positive -Wstring-overflow warning gcc-7 notices that the pin_table is an array of 16-bit numbers, but fails to take the following range check into account: drivers/gpio/gpiolib-acpi.c: In function 'acpi_gpiochip_request_interrupt': drivers/gpio/gpiolib-acpi.c:206:24: warning: '%02X' directive writing between 2 and 4 bytes into a region of size 3 [-Wformat-overflow=] sprintf(ev_name, "_%c%02X", ^~~~ drivers/gpio/gpiolib-acpi.c:206:20: note: directive argument in the range [0, 65535] sprintf(ev_name, "_%c%02X", ^~~~~~~~~ drivers/gpio/gpiolib-acpi.c:206:3: note: 'sprintf' output between 5 and 7 bytes into a destination of size 5 sprintf(ev_name, "_%c%02X", ^~~~~~~~~~~~~~~~~~~~~~~~~~~ agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pin); ~~~~ As suggested by Andy, this changes the format string to have a fixed length. Since modifying the range check did not help, I also opened a bug against gcc, see link below. Fixes: 0d1c28a449c6 ("gpiolib-acpi: Add ACPI5 event model support to gpio.") Cc: Andy Shevchenko Link: https://patchwork.kernel.org/patch/9840801/ Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82123 Signed-off-by: Arnd Bergmann Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 4d2113530735..eb4528c87c0b 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -203,7 +203,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, if (pin <= 255) { char ev_name[5]; - sprintf(ev_name, "_%c%02X", + sprintf(ev_name, "_%c%02hhX", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) -- cgit v1.2.3-70-g09d2 From c496ad835c31ad639b6865714270b3003df031f6 Mon Sep 17 00:00:00 2001 From: Andreas Engel Date: Mon, 18 Sep 2017 21:11:57 +0200 Subject: USB: serial: cp210x: add support for ELV TFD500 Add the USB device id for the ELV TFD500 data logger. Signed-off-by: Andreas Engel Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 8364b44f4c45..412f812522ee 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -177,6 +177,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ -- cgit v1.2.3-70-g09d2 From d03d5d530a46c9def981a2a4ec98f6ec2e9b7878 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 8 Jul 2017 20:16:29 +0100 Subject: MAINTAINERS: add Macchiatobin maintainers entry Add a maintainers entry for the Macchiatobin board. Signed-off-by: Russell King Signed-off-by: Gregory CLEMENT --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..531ba1acae3e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8253,6 +8253,12 @@ L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ +MARVELL MACCHIATOBIN SUPPORT +M: Russell King +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts + MARVELL MV643XX ETHERNET DRIVER M: Sebastian Hesselbarth L: netdev@vger.kernel.org -- cgit v1.2.3-70-g09d2 From 9e7460fc325dad06d2066abdbc1f4dd49456f9a4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Fri, 15 Sep 2017 10:50:07 +0300 Subject: arm64: dt marvell: Fix AP806 system controller size Extend the container size to 0x2000 to include the gpio controller at offset 0x1040. While at it, add start address notation to the gpio node name to match its 'offset' property. Fixes: 63dac0f4924b ("arm64: dts: marvell: add gpio support for Armada 7K/8K") Cc: Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index 4d360713ed12..30d48ecf46e0 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -254,7 +254,7 @@ ap_syscon: system-controller@6f4000 { compatible = "syscon", "simple-mfd"; - reg = <0x6f4000 0x1000>; + reg = <0x6f4000 0x2000>; ap_clk: clock { compatible = "marvell,ap806-clock"; @@ -265,7 +265,7 @@ compatible = "marvell,ap806-pinctrl"; }; - ap_gpio: gpio { + ap_gpio: gpio@1040 { compatible = "marvell,armada-8k-gpio"; offset = <0x1040>; ngpios = <20>; -- cgit v1.2.3-70-g09d2 From bd7a3fe770ebd8391d1c7d072ff88e9e76d063eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Sep 2017 15:07:17 +0200 Subject: USB: fix out-of-bounds in usb_set_configuration Andrey Konovalov reported a possible out-of-bounds problem for a USB interface association descriptor. He writes: It seems there's no proper size check of a USB_DT_INTERFACE_ASSOCIATION descriptor. It's only checked that the size is >= 2 in usb_parse_configuration(), so find_iad() might do out-of-bounds access to intf_assoc->bInterfaceCount. And he's right, we don't check for crazy descriptors of this type very well, so resolve this problem. Yet another issue found by syzkaller... Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 14 +++++++++++--- include/uapi/linux/usb/ch9.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 854c8d66cfbe..68b54bd88d1e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -643,15 +643,23 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, } else if (header->bDescriptorType == USB_DT_INTERFACE_ASSOCIATION) { + struct usb_interface_assoc_descriptor *d; + + d = (struct usb_interface_assoc_descriptor *)header; + if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) { + dev_warn(ddev, + "config %d has an invalid interface association descriptor of length %d, skipping\n", + cfgno, d->bLength); + continue; + } + if (iad_num == USB_MAXIADS) { dev_warn(ddev, "found more Interface " "Association Descriptors " "than allocated for in " "configuration %d\n", cfgno); } else { - config->intf_assoc[iad_num] = - (struct usb_interface_assoc_descriptor - *)header; + config->intf_assoc[iad_num] = d; iad_num++; } diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index ce1169af39d7..2a5d63040a0b 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -780,6 +780,7 @@ struct usb_interface_assoc_descriptor { __u8 iFunction; } __attribute__ ((packed)); +#define USB_DT_INTERFACE_ASSOCIATION_SIZE 8 /*-------------------------------------------------------------------------*/ -- cgit v1.2.3-70-g09d2 From bb4e6ff01ac356f82327d980e45fee8a65491328 Mon Sep 17 00:00:00 2001 From: Nickey Yang Date: Mon, 18 Sep 2017 17:05:37 +0800 Subject: arm64: dts: rockchip: Correct MIPI DPHY PLL clock on rk3399 There is a further gate in between the mipidphy reference clock and the actual ref-clock input to the dsi host, making the clock hirarchy look like clk_24m --> Gate11[14] --> clk_mipidphy_ref --> Gate21[0] --> clk_dphy_pll Fix the clock reference so that the whole clock subtree gets enabled when the dsi host needs it. Signed-off-by: Nickey Yang [amended commit message] Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index d79e9b3265b9..6aa43fd47148 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1629,7 +1629,7 @@ compatible = "rockchip,rk3399-mipi-dsi", "snps,dw-mipi-dsi"; reg = <0x0 0xff960000 0x0 0x8000>; interrupts = ; - clocks = <&cru SCLK_MIPIDPHY_REF>, <&cru PCLK_MIPI_DSI0>, + clocks = <&cru SCLK_DPHY_PLL>, <&cru PCLK_MIPI_DSI0>, <&cru SCLK_DPHY_TX0_CFG>; clock-names = "ref", "pclk", "phy_cfg"; power-domains = <&power RK3399_PD_VIO>; -- cgit v1.2.3-70-g09d2 From 4ae7c364b9320063504db78834fabe59d16f85bf Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 28 Aug 2017 15:18:16 +0200 Subject: ARM: dts: nokia n900: drop unneeded/undocumented parts of the dts Sakari mentioned that some parts of the dts are not needed and do not have proper documentation, yet. As the camera works without them, remove them for now. Signed-off-by: Pavel Machek Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n900.dts | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 26c20e1167b9..4acd32a1c4ef 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -144,15 +144,6 @@ io-channel-names = "temp", "bsi", "vbat"; }; - rear_camera: camera@0 { - compatible = "linux,camera"; - - module { - model = "TCM8341MD"; - sensor = <&cam1>; - }; - }; - pwm9: dmtimer-pwm { compatible = "ti,omap-dmtimer-pwm"; #pwm-cells = <3>; @@ -189,10 +180,8 @@ clock-lanes = <1>; data-lanes = <0>; lane-polarity = <0 0>; - clock-inv = <0>; /* Select strobe = <1> for back camera, <0> for front camera */ strobe = <1>; - crc = <0>; }; }; }; -- cgit v1.2.3-70-g09d2 From 06480f8cf559001c9eb49b4e9d822e13ad1cc5c4 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 6 Sep 2017 16:03:58 +0530 Subject: ARM: OMAP2+: dra7xx: Set OPT_CLKS_IN_RESET flag for gpio1 gpio1 soft reset fails in the kexec path as the optional clock is not enabled hence enable the HWMOD_CONTROL_OPT_CLKS_IN_RESET flag for gpio1 hwmod. Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_7xx_data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index f040244c57e7..2f4f7002f38d 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -839,6 +839,7 @@ static struct omap_hwmod dra7xx_gpio1_hwmod = { .name = "gpio1", .class = &dra7xx_gpio_hwmod_class, .clkdm_name = "wkupaon_clkdm", + .flags = HWMOD_CONTROL_OPT_CLKS_IN_RESET, .main_clk = "wkupaon_iclk_mux", .prcm = { .omap4 = { -- cgit v1.2.3-70-g09d2 From 8aed1026ccfe9cf5772c62bde35bc101ead9308c Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 6 Sep 2017 19:09:32 +0530 Subject: ARM: dts: dra7: Set a default parent to mcasp3_ahclkx_mux Assign a default parent to mcasp3_ahclkx_mux clock using the assigned-clock-parents property. This is helpful in cases like kexec where in the clock parent can be something other than the value at reset. Suggested-by: Tero Kristo Signed-off-by: Keerthy Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7xx-clocks.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index cf229dfabf61..e62b62875cba 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -1817,6 +1817,8 @@ clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1868>; + assigned-clocks = <&mcasp3_ahclkx_mux>; + assigned-clock-parents = <&abe_24m_fclk>; }; mcasp3_aux_gfclk_mux: mcasp3_aux_gfclk_mux@1868 { -- cgit v1.2.3-70-g09d2 From 20547dfd85f5baaf27ca01b32570bd6bfd7b209c Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Fri, 8 Sep 2017 21:15:51 +0200 Subject: ARM: OMAP2+: hsmmc: fix logic to call either omap_hsmmc_init or omap_hsmmc_late_init but not both MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With 4.13 kernel I get this boot message: [    1.051727] ------------[ cut here ]------------ [    1.051818] WARNING: CPU: 0 PID: 1 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x54/0x74 [    1.051849] sysfs: cannot create duplicate filename '/devices/platform/omap_hsmmc.2' [    1.051879] Modules linked in: [    1.051971] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-letux+ #1360 [    1.052001] Hardware name: Generic OMAP3 (Flattened Device Tree) [    1.052062] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [    1.052124] [] (show_stack) from [] (dump_stack+0x98/0xd0) [    1.052185] [] (dump_stack) from [] (__warn+0xd0/0x100) [    1.052215] [] (__warn) from [] (warn_slowpath_fmt+0x34/0x44) [    1.052276] [] (warn_slowpath_fmt) from [] (sysfs_warn_dup+0x54/0x74) [    1.052337] [] (sysfs_warn_dup) from [] (sysfs_create_dir_ns+0x74/0x84) [    1.052398] [] (sysfs_create_dir_ns) from [] (kobject_add_internal+0xd0/0x294) [    1.052429] [] (kobject_add_internal) from [] (kobject_add+0x6c/0x8c) [    1.052490] [] (kobject_add) from [] (device_add+0xe4/0x510) [    1.052551] [] (device_add) from [] (platform_device_add+0x130/0x1c0) [    1.052612] [] (platform_device_add) from [] (omap_hsmmc_late_init+0x3c/0x60) [    1.052673] [] (omap_hsmmc_late_init) from [] (omap3_pandora_legacy_init+0x24/0xb4) [    1.052734] [] (omap3_pandora_legacy_init) from [] (pdata_quirks_check+0x30/0x3c) [    1.052795] [] (pdata_quirks_check) from [] (omap_generic_init+0xc/0x18) [    1.052856] [] (omap_generic_init) from [] (customize_machine+0x1c/0x28) [    1.052917] [] (customize_machine) from [] (do_one_initcall+0xa8/0x150) [    1.052947] [] (do_one_initcall) from [] (kernel_init_freeable+0x110/0x1d4) [    1.053009] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) [    1.053070] [] (kernel_init) from [] (ret_from_fork+0x14/0x24) [    1.055023] ---[ end trace 44e490b09ac4ab88 ]--- This can be traced down to the calls of omap_hsmmc_init(pandora_mmc3); omap_hsmmc_late_init(pandora_mmc3); in omap3_pandora_legacy_init(). It turns out that both funcions disagree how to decide if the other one was alredy called. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/hsmmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c index 5b614388d72f..6d28aa20a7d3 100644 --- a/arch/arm/mach-omap2/hsmmc.c +++ b/arch/arm/mach-omap2/hsmmc.c @@ -58,10 +58,10 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c) struct platform_device *pdev; int res; - if (omap_hsmmc_done != 1) + if (omap_hsmmc_done) return; - omap_hsmmc_done++; + omap_hsmmc_done = 1; for (; c->mmc; c++) { pdev = c->pdev; -- cgit v1.2.3-70-g09d2 From cddfae253c875076750a03bd05ba5b1569e6876e Mon Sep 17 00:00:00 2001 From: Suniel Mahesh Date: Mon, 11 Sep 2017 12:00:16 +0530 Subject: ARM: dts: am33xx: Add spi alias to match SOC schematics Linux bus numbers should match the numbers defined by the chip manufacturer. This patch add's spi aliases to achieve that bus naming convention. Signed-off-by: Suniel Mahesh Signed-off-by: Karthik Tummala Tested-by: Karthik Tummala Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 7d7ca054c557..e58fab8aec5d 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -36,6 +36,8 @@ phy1 = &usb1_phy; ethernet0 = &cpsw_emac0; ethernet1 = &cpsw_emac1; + spi0 = &spi0; + spi1 = &spi1; }; cpus { -- cgit v1.2.3-70-g09d2 From 4afa616ce937f88d9a69a71b8c561551596a81e3 Mon Sep 17 00:00:00 2001 From: Yogesh Siraswar Date: Thu, 14 Sep 2017 14:30:07 -0500 Subject: ARM: dts: am43xx-epos-evm: Remove extra CPSW EMAC entry On am438x EPOS boards there is only one ethernet port, remove extra port definition. This boot log warnings during PHY detection. Signed-off-by: Yogesh Siraswar Signed-off-by: Andrew F. Davis Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am43x-epos-evm.dts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 9d276af7c539..081fa68b6f98 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -388,6 +388,7 @@ pinctrl-0 = <&cpsw_default>; pinctrl-1 = <&cpsw_sleep>; status = "okay"; + slaves = <1>; }; &davinci_mdio { @@ -402,11 +403,6 @@ phy-mode = "rmii"; }; -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; - phy-mode = "rmii"; -}; - &phy_sel { rmii-clock-ext; }; -- cgit v1.2.3-70-g09d2 From e1af344df4e5c8fe90f4a63235a68d5405afc41b Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 19 Sep 2017 01:21:56 -0400 Subject: ALSA: asihpi: fix a potential double-fetch bug when copying puhm The hm->h.size is intended to hold the actual size of the hm struct that is copied from userspace and should always be <= sizeof(*hm). However, after copy_from_user(hm, puhm, hm->h.size), since userspace process has full control over the memory region pointed by puhm, it is possible that the value of hm->h.size is different from what is fetched-in previously (get_user(hm->h.size, (u16 __user *)puhm)). In other words, hm->h.size is overriden and the relation between hm->h.size and the hm struct is broken. This patch proposes to use a seperate variable, msg_size, to hold the value of the first fetch and override hm->h.size to msg_size after the second fetch to maintain the relation. Signed-off-by: Meng Xu Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpioctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 7e3aa50b21f9..5badd08e1d69 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -103,6 +103,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) void __user *puhr; union hpi_message_buffer_v1 *hm; union hpi_response_buffer_v1 *hr; + u16 msg_size; u16 res_max_size; u32 uncopied_bytes; int err = 0; @@ -127,22 +128,25 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* Now read the message size and data from user space. */ - if (get_user(hm->h.size, (u16 __user *)puhm)) { + if (get_user(msg_size, (u16 __user *)puhm)) { err = -EFAULT; goto out; } - if (hm->h.size > sizeof(*hm)) - hm->h.size = sizeof(*hm); + if (msg_size > sizeof(*hm)) + msg_size = sizeof(*hm); /* printk(KERN_INFO "message size %d\n", hm->h.wSize); */ - uncopied_bytes = copy_from_user(hm, puhm, hm->h.size); + uncopied_bytes = copy_from_user(hm, puhm, msg_size); if (uncopied_bytes) { HPI_DEBUG_LOG(ERROR, "uncopied bytes %d\n", uncopied_bytes); err = -EFAULT; goto out; } + /* Override h.size in case it is changed between two userspace fetches */ + hm->h.size = msg_size; + if (get_user(res_max_size, (u16 __user *)puhr)) { err = -EFAULT; goto out; -- cgit v1.2.3-70-g09d2 From 17df6453d4be17910456e99c5a85025aa1b7a246 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 12 Sep 2017 10:47:53 +0200 Subject: brcmfmac: add length check in brcmf_cfg80211_escan_handler() Upon handling the firmware notification for scans the length was checked properly and may result in corrupting kernel heap memory due to buffer overruns. This fix addresses CVE-2017-0786. Cc: stable@vger.kernel.org # v4.0.x Cc: Kevin Cernekee Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index aaed4ab503ad..26a0de371c26 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3162,6 +3162,7 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, struct brcmf_cfg80211_info *cfg = ifp->drvr->config; s32 status; struct brcmf_escan_result_le *escan_result_le; + u32 escan_buflen; struct brcmf_bss_info_le *bss_info_le; struct brcmf_bss_info_le *bss = NULL; u32 bi_length; @@ -3181,11 +3182,23 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, if (status == BRCMF_E_STATUS_PARTIAL) { brcmf_dbg(SCAN, "ESCAN Partial result\n"); + if (e->datalen < sizeof(*escan_result_le)) { + brcmf_err("invalid event data length\n"); + goto exit; + } escan_result_le = (struct brcmf_escan_result_le *) data; if (!escan_result_le) { brcmf_err("Invalid escan result (NULL pointer)\n"); goto exit; } + escan_buflen = le32_to_cpu(escan_result_le->buflen); + if (escan_buflen > BRCMF_ESCAN_BUF_SIZE || + escan_buflen > e->datalen || + escan_buflen < sizeof(*escan_result_le)) { + brcmf_err("Invalid escan buffer length: %d\n", + escan_buflen); + goto exit; + } if (le16_to_cpu(escan_result_le->bss_count) != 1) { brcmf_err("Invalid bss_count %d: ignoring\n", escan_result_le->bss_count); @@ -3202,9 +3215,8 @@ brcmf_cfg80211_escan_handler(struct brcmf_if *ifp, } bi_length = le32_to_cpu(bss_info_le->length); - if (bi_length != (le32_to_cpu(escan_result_le->buflen) - - WL_ESCAN_RESULTS_FIXED_SIZE)) { - brcmf_err("Invalid bss_info length %d: ignoring\n", + if (bi_length != escan_buflen - WL_ESCAN_RESULTS_FIXED_SIZE) { + brcmf_err("Ignoring invalid bss_info length: %d\n", bi_length); goto exit; } -- cgit v1.2.3-70-g09d2 From 35f62727df0ed8e5e4857e162d94fd46d861f1cf Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 12 Sep 2017 10:47:54 +0200 Subject: brcmfmac: setup passive scan if requested by user-space The driver was not properly configuring firmware with regard to the type of scan. It always performed an active scan even when user-space was requesting for passive scan, ie. the scan request was done without any SSIDs specified. Cc: stable@vger.kernel.org # v4.0.x Reported-by: Huang, Jiangyang Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 19 ++++--------------- .../wireless/broadcom/brcm80211/brcmfmac/fwil_types.h | 5 +++++ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 26a0de371c26..4157c90ad973 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -980,7 +980,7 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, eth_broadcast_addr(params_le->bssid); params_le->bss_type = DOT11_BSSTYPE_ANY; - params_le->scan_type = 0; + params_le->scan_type = BRCMF_SCANTYPE_ACTIVE; params_le->channel_num = 0; params_le->nprobes = cpu_to_le32(-1); params_le->active_time = cpu_to_le32(-1); @@ -988,12 +988,9 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, params_le->home_time = cpu_to_le32(-1); memset(¶ms_le->ssid_le, 0, sizeof(params_le->ssid_le)); - /* if request is null exit so it will be all channel broadcast scan */ - if (!request) - return; - n_ssids = request->n_ssids; n_channels = request->n_channels; + /* Copy channel array if applicable */ brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n", n_channels); @@ -1030,16 +1027,8 @@ static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg, ptr += sizeof(ssid_le); } } else { - brcmf_dbg(SCAN, "Broadcast scan %p\n", request->ssids); - if ((request->ssids) && request->ssids->ssid_len) { - brcmf_dbg(SCAN, "SSID %s len=%d\n", - params_le->ssid_le.SSID, - request->ssids->ssid_len); - params_le->ssid_le.SSID_len = - cpu_to_le32(request->ssids->ssid_len); - memcpy(¶ms_le->ssid_le.SSID, request->ssids->ssid, - request->ssids->ssid_len); - } + brcmf_dbg(SCAN, "Performing passive scan\n"); + params_le->scan_type = BRCMF_SCANTYPE_PASSIVE; } /* Adding mask to channel numbers */ params_le->channel_num = diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h index 8391989b1882..e0d22fedb2b4 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h @@ -45,6 +45,11 @@ #define BRCMF_SCAN_PARAMS_COUNT_MASK 0x0000ffff #define BRCMF_SCAN_PARAMS_NSSID_SHIFT 16 +/* scan type definitions */ +#define BRCMF_SCANTYPE_DEFAULT 0xFF +#define BRCMF_SCANTYPE_ACTIVE 0 +#define BRCMF_SCANTYPE_PASSIVE 1 + #define BRCMF_WSEC_MAX_PSK_LEN 32 #define BRCMF_WSEC_PASSPHRASE BIT(0) -- cgit v1.2.3-70-g09d2 From ce21574ad1922b403198ee664c4dff276f514f1d Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Tue, 29 Aug 2017 13:52:51 +0530 Subject: ARM: dts: da850-evm: add serial and ethernet aliases Add aliases for serial and ethernet nodes. Serial aliases help keep order of tty nodes fixed and ethernet alias is used by bootloader to setup mac address correctly. Reported-by: Adam Ford Acked-by: Tony Lindgren Fixes: dd7deaf218bf ("ARM: davinci: da850: add DT node for ethernet") Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850-evm.dts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/da850-evm.dts b/arch/arm/boot/dts/da850-evm.dts index 67e72bc72e80..c75507922f7d 100644 --- a/arch/arm/boot/dts/da850-evm.dts +++ b/arch/arm/boot/dts/da850-evm.dts @@ -15,6 +15,13 @@ compatible = "ti,da850-evm", "ti,da850"; model = "DA850/AM1808/OMAP-L138 EVM"; + aliases { + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + ethernet0 = ð0; + }; + soc@1c00000 { pmx_core: pinmux@14120 { status = "okay"; -- cgit v1.2.3-70-g09d2 From 5a5d718f952b55e7fa96bfaf6f31f2c08babd77b Mon Sep 17 00:00:00 2001 From: Sriram Periyasamy Date: Tue, 19 Sep 2017 17:25:05 -0500 Subject: ALSA: hda - program ICT bits to support HBR audio On recent Intel platforms (Haswell, Broadwell, Skylake, ApolloLake, KabyLake, ...), the IEC Coding Type (ICT) bitfield in the Digital Converter Control #3 needs to be set explicitly for HDMI/DisplayPort High Bit Rate (HBR) audio playback to work. This was not required in earlier platforms when HBR was first introduced. The ICT bits are defined in Section 7.3.3.9 of the HDaudio 1.0a specification. Since the ICT bitfield was not specified for HDAudio 1.0 devices (before 2009), we only program it on machines more recent than Haswell. We tested that this fix is not needed on Baytrail-I (MinnowBoard Turbot) and believe by extension it also does not apply to Braswell. [ Moved AC_VERB_SET_DIGI_CONVERT_3 definition to the right place by tiwai ] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98797 Signed-off-by: Sriram Periyasamy Signed-off-by: Pierre-Louis Bossart Signed-off-by: Subhransu S. Prusty Acked-by: Vinod Koul Signed-off-by: Takashi Iwai --- include/sound/hda_verbs.h | 1 + sound/pci/hda/patch_hdmi.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/sound/hda_verbs.h b/include/sound/hda_verbs.h index d0509db6d0ec..f89cd5ee1c7a 100644 --- a/include/sound/hda_verbs.h +++ b/include/sound/hda_verbs.h @@ -95,6 +95,7 @@ enum { #define AC_VERB_SET_EAPD_BTLENABLE 0x70c #define AC_VERB_SET_DIGI_CONVERT_1 0x70d #define AC_VERB_SET_DIGI_CONVERT_2 0x70e +#define AC_VERB_SET_DIGI_CONVERT_3 0x73e #define AC_VERB_SET_VOLUME_KNOB_CONTROL 0x70f #define AC_VERB_SET_GPIO_DATA 0x715 #define AC_VERB_SET_GPIO_MASK 0x716 diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 2b64fabd5faa..c19c81d230bd 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -906,6 +906,7 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, hda_nid_t pin_nid, u32 stream_tag, int format) { struct hdmi_spec *spec = codec->spec; + unsigned int param; int err; err = spec->ops.pin_hbr_setup(codec, pin_nid, is_hbr_format(format)); @@ -915,6 +916,26 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid, return err; } + if (is_haswell_plus(codec)) { + + /* + * on recent platforms IEC Coding Type is required for HBR + * support, read current Digital Converter settings and set + * ICT bitfield if needed. + */ + param = snd_hda_codec_read(codec, cvt_nid, 0, + AC_VERB_GET_DIGI_CONVERT_1, 0); + + param = (param >> 16) & ~(AC_DIG3_ICT); + + /* on recent platforms ICT mode is required for HBR support */ + if (is_hbr_format(format)) + param |= 0x1; + + snd_hda_codec_write(codec, cvt_nid, 0, + AC_VERB_SET_DIGI_CONVERT_3, param); + } + snd_hda_codec_setup_stream(codec, cvt_nid, stream_tag, 0, format); return 0; } -- cgit v1.2.3-70-g09d2 From 97e133d54c1ca8948b191e5721a145a76c4db33d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 19 Sep 2017 11:46:16 +0300 Subject: usb: gadget: core: fix ->udc_set_speed() logic Consider the following case: udc controller supports SuperSpeed. If we first load a HighSpeed gadget followed by a SuperSpeed gadget, the SuperSpeed gadget will be limited to HighSpeed as UDC core driver doesn't call ->udc_set_speed() in the second case. Call ->udc_set_speed() unconditionally to fix this issue. This will also fix the case for dwc3 controller driver when SuperSpeed gadget is loaded first and works in HighSpeed only as udc_set_speed() was never being called. Fixes: 6099eca796ae ("usb: gadget: core: introduce ->udc_set_speed() method") Cc: [v4.13+] Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 75c51ca4ee0f..d41d07aae0ce 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1320,8 +1320,7 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri udc->dev.driver = &driver->driver; udc->gadget->dev.driver = &driver->driver; - if (driver->max_speed < udc->gadget->max_speed) - usb_gadget_udc_set_speed(udc, driver->max_speed); + usb_gadget_udc_set_speed(udc, driver->max_speed); ret = driver->bind(udc->gadget, driver); if (ret) -- cgit v1.2.3-70-g09d2 From 9ada8c582088d32bd5c071c17213bc6edf37443a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 13 Sep 2017 15:31:33 +0900 Subject: usb: gadget: function: printer: avoid spinlock recursion If usb_gadget_giveback_request() is called in usb_ep_queue(), this printer_write() is possible to cause spinlock recursion. So, this patch adds spin_unlock() before calls usb_ep_queue() to avoid it. Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_printer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 8df244fc9d80..ea0da35a44e2 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -555,6 +555,7 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) size_t size; /* Amount of data in a TX request. */ size_t bytes_copied = 0; struct usb_request *req; + int value; DBG(dev, "printer_write trying to send %d bytes\n", (int)len); @@ -634,7 +635,11 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return -EAGAIN; } - if (usb_ep_queue(dev->in_ep, req, GFP_ATOMIC)) { + /* here, we unlock, and only unlock, to avoid deadlock. */ + spin_unlock(&dev->lock); + value = usb_ep_queue(dev->in_ep, req, GFP_ATOMIC); + spin_lock(&dev->lock); + if (value) { list_add(&req->list, &dev->tx_reqs); spin_unlock_irqrestore(&dev->lock, flags); mutex_unlock(&dev->lock_printer_io); -- cgit v1.2.3-70-g09d2 From 641663a19f6002ecc5c5af517a5184d4164cc749 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Sep 2017 09:24:15 -0700 Subject: usb: gadget: udc: fix snps_udc_plat.c build errors Fix build errors that happen when CONFIG_EXTCON=m and CONFIG_USB_SNP_UDC_PLAT=y by preventing that combination in Kconfig. CONFIG_EXTCON can still be disabled or enabled for this driver since has stubs for the disabled case, but if CONFIG_EXTCON=m, USB_SNP_UDC_PLAT is restricted to m or n (cannot be builtin). drivers/built-in.o: In function `udc_plat_remove': snps_udc_plat.c:(.text+0x2c4060): undefined reference to `extcon_unregister_notifier' drivers/built-in.o: In function `udc_plat_probe': snps_udc_plat.c:(.text+0x2c438c): undefined reference to `extcon_get_edev_by_phandle' snps_udc_plat.c:(.text+0x2c43f2): undefined reference to `extcon_register_notifier' snps_udc_plat.c:(.text+0x2c4416): undefined reference to `extcon_get_state' snps_udc_plat.c:(.text+0x2c44f7): undefined reference to `extcon_unregister_notifier' Reported-by: kbuild test robot Signed-off-by: Randy Dunlap Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 7cd5c969fcbe..1e9567091d86 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -273,6 +273,7 @@ config USB_SNP_CORE config USB_SNP_UDC_PLAT tristate "Synopsys USB 2.0 Device controller" depends on USB_GADGET && OF && HAS_DMA + depends on EXTCON || EXTCON=n select USB_GADGET_DUALSPEED select USB_SNP_CORE default ARCH_BCM_IPROC -- cgit v1.2.3-70-g09d2 From 7661ca09b2ff98f48693f431bb01fed62830e433 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Sep 2017 16:14:31 +0200 Subject: usb: gadget: dummy: fix nonsensical comparisons gcc-8 points out two comparisons that are clearly bogus and almost certainly not what the author intended to write: drivers/usb/gadget/udc/dummy_hcd.c: In function 'set_link_state_by_speed': drivers/usb/gadget/udc/dummy_hcd.c:379:31: error: bitwise comparison always evaluates to false [-Werror=tautological-compare] USB_PORT_STAT_ENABLE) == 1 && ^~ drivers/usb/gadget/udc/dummy_hcd.c:381:25: error: bitwise comparison always evaluates to false [-Werror=tautological-compare] USB_SS_PORT_LS_U0) == 1 && ^~ I looked at the code for a bit and came up with a change that makes it look like what the author probably meant here. This makes it look reasonable to me and to gcc, shutting up the warning. It does of course change behavior as the two conditions are actually evaluated rather than being hardcoded to false, and I have made no attempt at verifying that the changed logic makes sense in the context of a USB HCD, so that part needs to be reviewed carefully. Fixes: 1cd8fd2887e1 ("usb: gadget: dummy_hcd: add SuperSpeed support") Cc: Tatyana Brokhman Cc: Felipe Balbi Acked-by: Alan Stern Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index a030d7923d7d..b1e21b3be6e1 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -375,11 +375,10 @@ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) USB_PORT_STAT_CONNECTION) == 0) dum_hcd->port_status |= (USB_PORT_STAT_C_CONNECTION << 16); - if ((dum_hcd->port_status & - USB_PORT_STAT_ENABLE) == 1 && - (dum_hcd->port_status & - USB_SS_PORT_LS_U0) == 1 && - dum_hcd->rh_state != DUMMY_RH_SUSPENDED) + if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) && + (dum_hcd->port_status & + USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U0 && + dum_hcd->rh_state != DUMMY_RH_SUSPENDED) dum_hcd->active = 1; } } else { -- cgit v1.2.3-70-g09d2 From 20da2ec06bfad2d4dfd40d77d3831f5e56365d20 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 18 Sep 2017 15:29:49 +0300 Subject: qtnfmac: lock access to h/w in tx path Fix tx path regression. Lock should be held when queuing packets to h/w fifos in order to properly handle configurations with multiple enabled interfaces. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c | 9 ++++++++- drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c index 502e72b7cdcc..69131965a298 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c @@ -661,14 +661,18 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb) struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus); dma_addr_t txbd_paddr, skb_paddr; struct qtnf_tx_bd *txbd; + unsigned long flags; int len, i; u32 info; int ret = 0; + spin_lock_irqsave(&priv->tx0_lock, flags); + if (!qtnf_tx_queue_ready(priv)) { if (skb->dev) netif_stop_queue(skb->dev); + spin_unlock_irqrestore(&priv->tx0_lock, flags); return NETDEV_TX_BUSY; } @@ -717,8 +721,10 @@ tx_done: dev_kfree_skb_any(skb); } - qtnf_pcie_data_tx_reclaim(priv); priv->tx_done_count++; + spin_unlock_irqrestore(&priv->tx0_lock, flags); + + qtnf_pcie_data_tx_reclaim(priv); return NETDEV_TX_OK; } @@ -1247,6 +1253,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id) strcpy(bus->fwname, QTN_PCI_PEARL_FW_NAME); init_completion(&bus->request_firmware_complete); mutex_init(&bus->bus_lock); + spin_lock_init(&pcie_priv->tx0_lock); spin_lock_init(&pcie_priv->irq_lock); spin_lock_init(&pcie_priv->tx_reclaim_lock); diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h index e76a23716ee0..86ac1ccedb52 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h +++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h @@ -34,6 +34,8 @@ struct qtnf_pcie_bus_priv { /* lock for tx reclaim operations */ spinlock_t tx_reclaim_lock; + /* lock for tx0 operations */ + spinlock_t tx0_lock; u8 msi_enabled; int mps; -- cgit v1.2.3-70-g09d2 From a715b3a0efe76d36c3ef96a93894a13db9d3a72f Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 18 Sep 2017 15:29:50 +0300 Subject: qtnfmac: cancel scans on wireless interface changes Cancel active scans and deactivate firmware scan watchdog timer when wireless interface configuration is changed. The usecases include wireless interface mode change, interface down, AP stop, virtual interface removal. Signed-off-by: Sergey Matyukevich Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 9 ++++++--- drivers/net/wireless/quantenna/qtnfmac/cfg80211.h | 3 +++ drivers/net/wireless/quantenna/qtnfmac/event.c | 2 -- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 856fa6e8327e..a450bc6bc774 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -115,6 +115,8 @@ int qtnf_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) vif = qtnf_netdev_get_priv(wdev->netdev); + qtnf_scan_done(vif->mac, true); + if (qtnf_cmd_send_del_intf(vif)) pr_err("VIF%u.%u: failed to delete VIF\n", vif->mac->macid, vif->vifid); @@ -335,6 +337,8 @@ static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev) struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; + qtnf_scan_done(vif->mac, true); + ret = qtnf_cmd_send_stop_ap(vif); if (ret) { pr_err("VIF%u.%u: failed to stop AP operation in FW\n", @@ -570,8 +574,6 @@ qtnf_del_station(struct wiphy *wiphy, struct net_device *dev, !qtnf_sta_list_lookup(&vif->sta_list, params->mac)) return 0; - qtnf_scan_done(vif->mac, true); - ret = qtnf_cmd_send_del_sta(vif, params); if (ret) pr_err("VIF%u.%u: failed to delete STA %pM\n", @@ -1134,8 +1136,9 @@ void qtnf_virtual_intf_cleanup(struct net_device *ndev) } vif->sta_state = QTNF_STA_DISCONNECTED; - qtnf_scan_done(mac, true); } + + qtnf_scan_done(mac, true); } void qtnf_cfg80211_vif_reset(struct qtnf_vif *vif) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h index 6a4af52522b8..66db26613b1f 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h @@ -34,6 +34,9 @@ static inline void qtnf_scan_done(struct qtnf_wmac *mac, bool aborted) .aborted = aborted, }; + if (timer_pending(&mac->scan_timeout)) + del_timer_sync(&mac->scan_timeout); + mutex_lock(&mac->mac_lock); if (mac->scan_req) { diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 0fc2814eafad..43d2e7fd6e02 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -345,8 +345,6 @@ qtnf_event_handle_scan_complete(struct qtnf_wmac *mac, return -EINVAL; } - if (timer_pending(&mac->scan_timeout)) - del_timer_sync(&mac->scan_timeout); qtnf_scan_done(mac, le32_to_cpu(status->flags) & QLINK_SCAN_ABORTED); return 0; -- cgit v1.2.3-70-g09d2 From 0dcd020b7abbff238f188d4db8f02389dc849553 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 20 Sep 2017 09:21:40 +0800 Subject: ALSA: usb-audio: Add sample rate quirk for Plantronics C310/C520-M Like other Plantronics devices, C310 and C520-M do not support sample rate reading. Add them to the sample rate quirk accordingly. BugLink: https://bugs.launchpad.net/bugs/1708499 BugLink: https://bugs.launchpad.net/bugs/1709282 Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 913552078285..b8cb57aeec77 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1137,6 +1137,8 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0x02F7): /* Plantronics BT-600 */ case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ + case USB_ID(0x047F, 0xC022): /* Plantronics C310 */ + case USB_ID(0x047F, 0xC036): /* Plantronics C520-M */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ -- cgit v1.2.3-70-g09d2 From 829e4887b8676255778e162efc57e4a0f18f8bb3 Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Mon, 18 Sep 2017 17:56:14 +0200 Subject: arm64: dts: salvator-common: add 12V regulator to backlight This fixes the warning "pwm-backlight backlight: backlight supply power not found, using dummy regulator". Fixes: b33be33670217533 ("arm64: dts: salvator-x: Add panel backlight support") Reported-by: Geert Uytterhoeven Signed-off-by: Ulrich Hecht Reviewed-by: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/salvator-common.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index 4786c67b5e65..d9d885006a8e 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -62,6 +62,7 @@ brightness-levels = <256 128 64 16 8 4 0>; default-brightness-level = <6>; + power-supply = <®_12v>; enable-gpios = <&gpio6 7 GPIO_ACTIVE_HIGH>; }; @@ -83,6 +84,15 @@ regulator-always-on; }; + reg_12v: regulator2 { + compatible = "regulator-fixed"; + regulator-name = "fixed-12V"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-boot-on; + regulator-always-on; + }; + rsnd_ak4613: sound { compatible = "simple-audio-card"; -- cgit v1.2.3-70-g09d2 From 544e3bf4f0e8278400f19ca7918a3cdf2548b4eb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Sep 2017 14:22:08 +0200 Subject: reset: Restrict RESET_HSDK to ARC_SOC_HSDK or COMPILE_TEST The HSDK reset driver is only useful when building for an ARC HSDK platform. While at it, drop the "default n", as that is the default. Fixes: e0be864f14240cb1 ("ARC: reset: introduce HSDKv1 reset driver") Signed-off-by: Geert Uytterhoeven [p.zabel@pengutronix.de: rebased, renamed RESET_HSDK_V1 to RESET_HSDK] Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index a7c7d5a8c089..e2baecbb9dd3 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -37,7 +37,7 @@ config RESET_BERLIN config RESET_HSDK bool "Synopsys HSDK Reset Driver" depends on HAS_IOMEM - default n + depends on ARC_SOC_HSDK || COMPILE_TEST help This enables the reset controller driver for HSDK board. -- cgit v1.2.3-70-g09d2 From e4b2ae7a8a11c5d4e0a6e21ba65d4b487a15d3d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 16 Sep 2017 22:42:21 +0200 Subject: gpio: omap: omap_gpio_show_rev is not __init The probe function calls omap_gpio_show_rev(), which on most compilers is inlined, but on the old gcc-4.6 is not, causing a valid warning about the incorrect __init annotation: WARNING: vmlinux.o(.text+0x40f614): Section mismatch in reference from the function omap_gpio_probe() to the function .init.text:omap_gpio_show_rev() The function omap_gpio_probe() references the function __init omap_gpio_show_rev(). This is often because omap_gpio_probe lacks a __init annotation or the annotation of omap_gpio_show_rev is wrong. This removes the __init. Signed-off-by: Arnd Bergmann Acked-by: Santosh Shilimkar Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index dbf869fb63ce..22d7d4838265 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1010,7 +1010,7 @@ static void omap_gpio_set(struct gpio_chip *chip, unsigned offset, int value) /*---------------------------------------------------------------------*/ -static void __init omap_gpio_show_rev(struct gpio_bank *bank) +static void omap_gpio_show_rev(struct gpio_bank *bank) { static bool called; u32 rev; -- cgit v1.2.3-70-g09d2 From c84284e59d60514539cb06741972adf60b14a5a3 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 19 Sep 2017 21:04:56 +0200 Subject: pinctrl: bcm2835: fix build warning in bcm2835_gpio_irq_handle_bank This patch fix the following build warning: drivers/pinctrl/bcm/pinctrl-bcm2835.c:376:15: warning: variable 'type' set but not used [-Wunused-but-set-variable] Furthermore, it is unused for a long time, at least since commit 85ae9e512f43 ("pinctrl: bcm2835: switch to GPIOLIB_IRQCHIP") where a "FIXME no clue why the code looks up the type here" was added. A year after, nobody answeered this question, so its time to remove it. Signed-off-by: Corentin Labbe Acked-by: Stefan Wahren Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 0944310225db..ff782445dfb7 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -373,16 +373,12 @@ static void bcm2835_gpio_irq_handle_bank(struct bcm2835_pinctrl *pc, unsigned long events; unsigned offset; unsigned gpio; - unsigned int type; events = bcm2835_gpio_rd(pc, GPEDS0 + bank * 4); events &= mask; events &= pc->enabled_irq_map[bank]; for_each_set_bit(offset, &events, 32) { gpio = (32 * bank) + offset; - /* FIXME: no clue why the code looks up the type here */ - type = pc->irq_type[gpio]; - generic_handle_irq(irq_linear_revmap(pc->gpio_chip.irqdomain, gpio)); } -- cgit v1.2.3-70-g09d2 From 6af1de2e4ec49635905aaed31d073a0d92c8d3bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 14:58:53 +0200 Subject: ath10k: mark PM functions as __maybe_unused When CONFIG_PM_SLEEP is disabled, we get a compile-time warning: drivers/net/wireless/ath/ath10k/pci.c:3417:12: error: 'ath10k_pci_pm_resume' defined but not used [-Werror=unused-function] static int ath10k_pci_pm_resume(struct device *dev) ^~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath10k/pci.c:3401:12: error: 'ath10k_pci_pm_suspend' defined but not used [-Werror=unused-function] static int ath10k_pci_pm_suspend(struct device *dev) Rather than fixing the #ifdef, this just marks both functions as __maybe_unused, which is a more robust way to do this. Fixes: 32faa3f0ee50 ("ath10k: add the PCI PM core suspend/resume ops") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/pci.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index bc1633945a56..195dafb98131 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -3396,9 +3396,7 @@ static void ath10k_pci_remove(struct pci_dev *pdev) MODULE_DEVICE_TABLE(pci, ath10k_pci_id_table); -#ifdef CONFIG_PM - -static int ath10k_pci_pm_suspend(struct device *dev) +static __maybe_unused int ath10k_pci_pm_suspend(struct device *dev) { struct ath10k *ar = dev_get_drvdata(dev); int ret; @@ -3414,7 +3412,7 @@ static int ath10k_pci_pm_suspend(struct device *dev) return ret; } -static int ath10k_pci_pm_resume(struct device *dev) +static __maybe_unused int ath10k_pci_pm_resume(struct device *dev) { struct ath10k *ar = dev_get_drvdata(dev); int ret; @@ -3433,7 +3431,6 @@ static int ath10k_pci_pm_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ath10k_pci_pm_ops, ath10k_pci_pm_suspend, ath10k_pci_pm_resume); -#endif static struct pci_driver ath10k_pci_driver = { .name = "ath10k_pci", -- cgit v1.2.3-70-g09d2 From 2e1c42391ff2556387b3cb6308b24f6f65619feb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 21 Sep 2017 16:58:48 +0200 Subject: USB: core: harden cdc_parse_cdc_header Andrey Konovalov reported a possible out-of-bounds problem for the cdc_parse_cdc_header function. He writes: It looks like cdc_parse_cdc_header() doesn't validate buflen before accessing buffer[1], buffer[2] and so on. The only check present is while (buflen > 0). So fix this issue up by properly validating the buffer length matches what the descriptor says it is. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 4c38ea41ae96..371a07d874a3 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -2069,6 +2069,10 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, elength = 1; goto next_desc; } + if ((buflen < elength) || (elength < 3)) { + dev_err(&intf->dev, "invalid descriptor buffer length\n"); + break; + } if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; -- cgit v1.2.3-70-g09d2 From 87a2f622cc6446c7d09ac655b7b9b04886f16a4c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 18 Sep 2017 11:16:26 +0300 Subject: dmaengine: edma: Align the memcpy acnt array size with the transfer Memory to Memory transfers does not have any special alignment needs regarding to acnt array size, but if one of the areas are in memory mapped regions (like PCIe memory), we need to make sure that the acnt array size is aligned with the mem copy parameters. Before "dmaengine: edma: Optimize memcpy operation" change the memcpy was set up in a different way: acnt == number of bytes in a word based on __ffs((src | dest | len), bcnt and ccnt for looping the necessary number of words to comlete the trasnfer. Instead of reverting the commit we can fix it to make sure that the ACNT size is aligned to the traswnfer. Fixes: df6694f80365a (dmaengine: edma: Optimize memcpy operation) Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3879f80a4815..a7ea20e7b8e9 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1143,11 +1143,24 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( struct edma_desc *edesc; struct device *dev = chan->device->dev; struct edma_chan *echan = to_edma_chan(chan); - unsigned int width, pset_len; + unsigned int width, pset_len, array_size; if (unlikely(!echan || !len)) return NULL; + /* Align the array size (acnt block) with the transfer properties */ + switch (__ffs((src | dest | len))) { + case 0: + array_size = SZ_32K - 1; + break; + case 1: + array_size = SZ_32K - 2; + break; + default: + array_size = SZ_32K - 4; + break; + } + if (len < SZ_64K) { /* * Transfer size less than 64K can be handled with one paRAM @@ -1169,7 +1182,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( * When the full_length is multibple of 32767 one slot can be * used to complete the transfer. */ - width = SZ_32K - 1; + width = array_size; pset_len = rounddown(len, width); /* One slot is enough for lengths multiple of (SZ_32K -1) */ if (unlikely(pset_len == len)) @@ -1217,7 +1230,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy( } dest += pset_len; src += pset_len; - pset_len = width = len % (SZ_32K - 1); + pset_len = width = len % array_size; ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1, width, pset_len, DMA_MEM_TO_MEM); -- cgit v1.2.3-70-g09d2 From 2ccb4837c938357233a0b8818e3ca3e58242c952 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 21 Sep 2017 14:35:32 +0300 Subject: dmaengine: ti-dma-crossbar: Fix possible race condition with dma_inuse When looking for unused xbar_out lane we should also protect the set_bit() call with the same mutex to protect against concurrent threads picking the same ID. Fixes: ec9bfa1e1a796 ("dmaengine: ti-dma-crossbar: dra7: Use bitops instead of idr") Signed-off-by: Peter Ujfalusi Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ti-dma-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c index 2f65a8fde21d..f1d04b70ee67 100644 --- a/drivers/dma/ti-dma-crossbar.c +++ b/drivers/dma/ti-dma-crossbar.c @@ -262,13 +262,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_lock(&xbar->mutex); map->xbar_out = find_first_zero_bit(xbar->dma_inuse, xbar->dma_requests); - mutex_unlock(&xbar->mutex); if (map->xbar_out == xbar->dma_requests) { + mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); + mutex_unlock(&xbar->mutex); map->xbar_in = (u16)dma_spec->args[0]; -- cgit v1.2.3-70-g09d2 From a4fd4a724d6c30ad671046d83be2e9be2f11d275 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 16:02:05 -0400 Subject: usb-storage: fix bogus hardware error messages for ATA pass-thru devices Ever since commit a621bac3044e ("scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands"), people have been getting bogus error messages for USB disk drives using ATA pass-thru. For example: [ 1344.880193] sd 6:0:0:0: [sdb] Attached SCSI disk [ 1345.069152] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.069159] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.069162] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.069168] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(16) 85 06 20 00 00 00 00 00 00 00 00 00 00 00 e5 00 [ 1345.172252] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.172258] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.172261] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.172266] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(12)/Blank a1 06 20 da 00 00 4f c2 00 b0 00 00 These messages can be quite annoying, because programs like udisks2 provoke them every 10 minutes or so. Other programs can also have this effect, such as those in smartmontools. I don't fully understand how that commit induced the SCSI core to log these error messages, but the underlying cause for them is code added to usb-storage by commit f1a0743bc0e7 ("USB: storage: When a device returns no sense data, call it a Hardware Error"). At the time it was necessary to do this, in order to prevent an infinite retry loop with some not-so-great mass storage devices. However, the ATA pass-thru protocol uses SCSI sense data to return command status values, and some devices always report Check Condition status for ATA pass-thru commands to ensure that the host retrieves the sense data, even if the command succeeded. This violates the USB mass-storage protocol (Check Condition status is supposed to mean the command failed), but we can't help that. This patch attempts to mitigate the problem of these bogus error reports by changing usb-storage. The HARDWARE ERROR sense key will be inserted only for commands that aren't ATA pass-thru. Thanks to Ewan Milne for pointing out that this mechanism was present in usb-storage. 8 years after writing it, I had completely forgotten its existence. Signed-off-by: Alan Stern Tested-by: Kris Lindgren Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1351305 CC: Ewan D. Milne CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/transport.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 1a59f335b063..a3ccb899df60 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -834,13 +834,25 @@ Retry_Sense: if (result == USB_STOR_TRANSPORT_GOOD) { srb->result = SAM_STAT_GOOD; srb->sense_buffer[0] = 0x0; + } + + /* + * ATA-passthru commands use sense data to report + * the command completion status, and often devices + * return Check Condition status when nothing is + * wrong. + */ + else if (srb->cmnd[0] == ATA_16 || + srb->cmnd[0] == ATA_12) { + /* leave the data alone */ + } /* * If there was a problem, report an unspecified * hardware error to prevent the higher layers from * entering an infinite retry loop. */ - } else { + else { srb->result = DID_ERROR << 16; if ((sshdr.response_code & 0x72) == 0x72) srb->sense_buffer[1] = HARDWARE_ERROR; -- cgit v1.2.3-70-g09d2 From 113f6eb6d50cfa5e2a1cdcf1678b12661fa272ab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 15:59:30 -0400 Subject: usb-storage: unusual_devs entry to fix write-access regression for Seagate external drives Kris Lindgren reports that without the NO_WP_DETECT flag, his Seagate external disk drive fails all write accesses. This regresssion dates back approximately to the start of the 4.x kernel releases. Signed-off-by: Alan Stern Reported-by: Kris Lindgren CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5a70c33ef0e0..eb06d88b41d6 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1459,6 +1459,13 @@ UNUSUAL_DEV( 0x0bc2, 0x3010, 0x0000, 0x0000, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SANE_SENSE ), +/* Reported by Kris Lindgren */ +UNUSUAL_DEV( 0x0bc2, 0x3332, 0x0000, 0x9999, + "Seagate", + "External", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_WP_DETECT ), + UNUSUAL_DEV( 0x0d49, 0x7310, 0x0000, 0x9999, "Maxtor", "USB to SATA", -- cgit v1.2.3-70-g09d2 From fd085bb1766d6a598f53af2308374a546a49775a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 19 Sep 2017 18:47:40 +0300 Subject: stm class: Fix a use-after-free For reasons unknown, the stm_source removal path uses device_destroy() to kill the underlying device object. Because device_destroy() uses devt to look for the device to destroy and the fact that stm_source devices don't have one (or all have the same one), it just picks the first device in the class, which may well be the wrong one. That is, loading stm_console and stm_heartbeat and then removing both will die in dereferencing a freed object. Since this should have been device_unregister() in the first place, use it instead of device_destroy(). Signed-off-by: Alexander Shishkin Fixes: 7bd1d4093c2 ("stm class: Introduce an abstraction for System Trace Module devices") Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 9414900575d8..f129869e05a9 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -1119,7 +1119,7 @@ void stm_source_unregister_device(struct stm_source_data *data) stm_source_link_drop(src); - device_destroy(&stm_source_class, src->dev.devt); + device_unregister(&src->dev); } EXPORT_SYMBOL_GPL(stm_source_unregister_device); -- cgit v1.2.3-70-g09d2 From 920ce7c33db25cf4acb4ade3ae8c93bd23dfd730 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 19 Sep 2017 18:47:41 +0300 Subject: intel_th: pci: Add Cedar Fork PCH support This adds Intel(R) Trace Hub PCI ID for Cedar Fork PCH. Signed-off-by: Alexander Shishkin Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index bc9cebc30526..00ee60d9789e 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -158,6 +158,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Cedar Fork PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x18e1), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; -- cgit v1.2.3-70-g09d2 From 24600840c74112ad04a9ddd99d7d7f731dcaa1cb Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 19 Sep 2017 18:47:42 +0300 Subject: intel_th: pci: Add Lewisburg PCH support This adds Intel(R) Trace Hub PCI ID for Lewisburg PCH. Signed-off-by: Alexander Shishkin Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 00ee60d9789e..c2a2ce8ee541 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -143,6 +143,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1), .driver_data = (kernel_ulong_t)0, }, + { + /* Lewisburg PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa1a6), + .driver_data = (kernel_ulong_t)0, + }, { /* Gemini Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e), -- cgit v1.2.3-70-g09d2 From 33c150c2ee4a65a59190a124b45d05b1abf9478e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 21 Sep 2017 23:41:47 -0700 Subject: vmbus: don't acquire the mutex in vmbus_hvsock_device_unregister() Due to commit 54a66265d675 ("Drivers: hv: vmbus: Fix rescind handling"), we need this patch to resolve the below deadlock: after we get the mutex in vmbus_hvsock_device_unregister() and call vmbus_device_unregister() -> device_unregister() -> ... -> device_release() -> vmbus_device_release(), we'll get a deadlock, because vmbus_device_release() tries to get the same mutex. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Cc: stable@vger.kernel.org (4.13 and above) Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 060df71c2e8b..bcbb031f7263 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -936,14 +936,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) { - mutex_lock(&vmbus_connection.channel_mutex); - BUG_ON(!is_hvsock_channel(channel)); channel->rescind = true; vmbus_device_unregister(channel->device_obj); - - mutex_unlock(&vmbus_connection.channel_mutex); } EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -- cgit v1.2.3-70-g09d2 From 549e658a0919e355a2b2144dc380b3729bef7f3e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 21 Sep 2017 23:41:48 -0700 Subject: Drivers: hv: fcopy: restore correct transfer length Till recently the expected length of bytes read by the daemon did depend on the context. It was either hv_start_fcopy or hv_do_fcopy. The daemon had a buffer size of two pages, which was much larger than needed. Now the expected length of bytes read by the daemon changed slightly. For START_FILE_COPY it is still the size of hv_start_fcopy. But for WRITE_TO_FILE and the other operations it is as large as the buffer that arrived via vmbus. In case of WRITE_TO_FILE that is slightly larger than a struct hv_do_fcopy. Since the buffer in the daemon was still larger everything was fine. Currently, the daemon reads only what is actually needed. The new buffer layout is as large as a struct hv_do_fcopy, for the WRITE_TO_FILE operation. Since the kernel expects a slightly larger size, hvt_op_read will return -EINVAL because the daemon will read slightly less than expected. Address this by restoring the expected buffer size in case of WRITE_TO_FILE. Fixes: 'c7e490fc23eb ("Drivers: hv: fcopy: convert to hv_utils_transport")' Fixes: '3f2baa8a7d2e ("Tools: hv: update buffer handling in hv_fcopy_daemon")' Signed-off-by: Olaf Hering Signed-off-by: K. Y. Srinivasan Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index daa75bd41f86..2364281d8593 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -170,6 +170,10 @@ static void fcopy_send_data(struct work_struct *dummy) out_src = smsg_out; break; + case WRITE_TO_FILE: + out_src = fcopy_transaction.fcopy_msg; + out_len = sizeof(struct hv_do_fcopy); + break; default: out_src = fcopy_transaction.fcopy_msg; out_len = fcopy_transaction.recv_len; -- cgit v1.2.3-70-g09d2 From c51b46dd5b9950436b6b1f8189e93e1ad380cee1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 20 Sep 2017 22:19:57 +0100 Subject: staging: rtl8723bs: add missing range check on id The value of the u8 id needs to be upper bounds checked to ensure the cam_cache array on the adapter dvobj is not indexed outside of the allowed range of 0..TOTAL_CAM_ENTRY-1. This can currently occur if id is >= TOTAL_CAM_ENTRY when calling write_cam_from_cache. Fix this by adding an upper range check. Detected by CoverityScan, CID#1428464 ("Use of untrusted scalar value") Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/rtw_proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c index 92277457aba4..ce1dd6f9036f 100644 --- a/drivers/staging/rtl8723bs/os_dep/rtw_proc.c +++ b/drivers/staging/rtl8723bs/os_dep/rtw_proc.c @@ -311,6 +311,8 @@ static ssize_t proc_set_cam(struct file *file, const char __user *buffer, size_t if (num < 2) return count; + if (id >= TOTAL_CAM_ENTRY) + return -EINVAL; if (strcmp("c", cmd) == 0) { _clear_cam_entry(adapter, id); -- cgit v1.2.3-70-g09d2 From ec14121931a24f8d3678b8a9c408adee3b21d465 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 20 Sep 2017 18:34:18 +0100 Subject: staging: rtl8723bs: avoid null pointer dereference on pmlmepriv There is a check to see if pmlmepriv is null before vfree'ing pmlmepriv->free_bss_buf hence implying pmlmepriv could potenially be null. However, a previous call to rtw_free_mlme_priv_ie_data can also dereference pmlmepriv, so move this call so that it is only called if pmlmepriv non-null. Detected by CoverityScan, CID#1077739 ("Dereference before null check") Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c index 6b778206a1a3..cb8a95aabd6c 100644 --- a/drivers/staging/rtl8723bs/core/rtw_mlme.c +++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c @@ -119,9 +119,8 @@ void rtw_free_mlme_priv_ie_data(struct mlme_priv *pmlmepriv) void _rtw_free_mlme_priv(struct mlme_priv *pmlmepriv) { - rtw_free_mlme_priv_ie_data(pmlmepriv); - if (pmlmepriv) { + rtw_free_mlme_priv_ie_data(pmlmepriv); if (pmlmepriv->free_bss_buf) { vfree(pmlmepriv->free_bss_buf); } -- cgit v1.2.3-70-g09d2 From c9adcdbc653b52e4be834f535eefec833f9ca6b1 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 21 Sep 2017 14:03:29 +0800 Subject: ALSA: pcm: Fix structure definition for X32 ABI X32 ABI uses the 64bit timespec in addition to 64bit alignment of 64bit values. We have added compat ABI for these ioctls, but this patch adds one missing padding into 'struct snd_pcm_mmap_status_x32' to fix incompatibilities. Signed-off-by: Baolin Wang Signed-off-by: Takashi Iwai --- sound/core/pcm_compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 3a1cc7b97e46..b719d0bd833e 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -547,6 +547,7 @@ struct snd_pcm_mmap_status_x32 { u32 pad2; /* alignment */ struct timespec tstamp; s32 suspended_state; + s32 pad3; struct timespec audio_tstamp; } __packed; -- cgit v1.2.3-70-g09d2 From 40784d72aeeb4d95cf74ea2243223a85193f0e84 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Aug 2017 00:19:54 +0200 Subject: crypto: axis - hide an unused variable Without CONFIG_DEBUG_FS, we get a harmless warning: drivers/crypto/axis/artpec6_crypto.c:352:23: error: 'dbgfs_root' defined but not used [-Werror=unused-variable] This moves it into the #ifdef that hides the only user. Fixes: a21eb94fc4d3 ("crypto: axis - add ARTPEC-6/7 crypto accelerator driver") Signed-off-by: Arnd Bergmann Acked-by: Lars Persson Signed-off-by: Herbert Xu --- drivers/crypto/axis/artpec6_crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index d9fbbf01062b..0f9754e07719 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -349,8 +349,6 @@ struct artpec6_crypto_aead_req_ctx { /* The crypto framework makes it hard to avoid this global. */ static struct device *artpec6_crypto_dev; -static struct dentry *dbgfs_root; - #ifdef CONFIG_FAULT_INJECTION static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read); static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full); @@ -2984,6 +2982,8 @@ struct dbgfs_u32 { char *desc; }; +static struct dentry *dbgfs_root; + static void artpec6_crypto_init_debugfs(void) { dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL); -- cgit v1.2.3-70-g09d2 From bfc81a8bc18e3c4ba0cbaa7666ff76be2f998991 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Sep 2017 16:18:53 +0200 Subject: ALSA: usb-audio: Check out-of-bounds access by corrupted buffer descriptor When a USB-audio device receives a maliciously adjusted or corrupted buffer descriptor, the USB-audio driver may access an out-of-bounce value at its parser. This was detected by syzkaller, something like: BUG: KASAN: slab-out-of-bounds in usb_audio_probe+0x27b2/0x2ab0 Read of size 1 at addr ffff88006b83a9e8 by task kworker/0:1/24 CPU: 0 PID: 24 Comm: kworker/0:1 Not tainted 4.14.0-rc1-42251-gebb2c2437d80 #224 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 print_address_description+0x78/0x280 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 kasan_report+0x22f/0x340 mm/kasan/report.c:409 __asan_report_load1_noabort+0x19/0x20 mm/kasan/report.c:427 snd_usb_create_streams sound/usb/card.c:248 usb_audio_probe+0x27b2/0x2ab0 sound/usb/card.c:605 usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:413 driver_probe_device+0x610/0xa00 drivers/base/dd.c:557 __device_attach_driver+0x230/0x290 drivers/base/dd.c:653 bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463 __device_attach+0x26e/0x3d0 drivers/base/dd.c:710 device_initial_probe+0x1f/0x30 drivers/base/dd.c:757 bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523 device_add+0xd0b/0x1660 drivers/base/core.c:1835 usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932 generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174 usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266 really_probe drivers/base/dd.c:413 driver_probe_device+0x610/0xa00 drivers/base/dd.c:557 __device_attach_driver+0x230/0x290 drivers/base/dd.c:653 bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463 __device_attach+0x26e/0x3d0 drivers/base/dd.c:710 device_initial_probe+0x1f/0x30 drivers/base/dd.c:757 bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523 device_add+0xd0b/0x1660 drivers/base/core.c:1835 usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2457 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 This patch adds the checks of out-of-bounce accesses at appropriate places and bails out when it goes out of the given buffer. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/card.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/card.c b/sound/usb/card.c index 3dc36d913550..23d1d23aefec 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -221,6 +221,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) struct usb_interface_descriptor *altsd; void *control_header; int i, protocol; + int rest_bytes; /* find audiocontrol interface */ host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0]; @@ -235,6 +236,15 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) return -EINVAL; } + rest_bytes = (void *)(host_iface->extra + host_iface->extralen) - + control_header; + + /* just to be sure -- this shouldn't hit at all */ + if (rest_bytes <= 0) { + dev_err(&dev->dev, "invalid control header\n"); + return -EINVAL; + } + switch (protocol) { default: dev_warn(&dev->dev, @@ -245,11 +255,21 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1 = control_header; + if (rest_bytes < sizeof(*h1)) { + dev_err(&dev->dev, "too short v1 buffer descriptor\n"); + return -EINVAL; + } + if (!h1->bInCollection) { dev_info(&dev->dev, "skipping empty audio interface (v1)\n"); return -EINVAL; } + if (rest_bytes < h1->bLength) { + dev_err(&dev->dev, "invalid buffer length (v1)\n"); + return -EINVAL; + } + if (h1->bLength < sizeof(*h1) + h1->bInCollection) { dev_err(&dev->dev, "invalid UAC_HEADER (v1)\n"); return -EINVAL; -- cgit v1.2.3-70-g09d2 From c4fa6c43ce4b427350cfbb659436bfe3d9e09a1d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 21 Sep 2017 09:54:13 -0400 Subject: cgroup: Reinit cgroup_taskset structure before cgroup_migrate_execute() returns The cgroup_taskset structure within the larger cgroup_mgctx structure is supposed to be used once and then discarded. That is not really the case in the hotplug code path: cpuset_hotplug_workfn() - cgroup_transfer_tasks() - cgroup_migrate() - cgroup_migrate_add_task() - cgroup_migrate_execute() In this case, the cgroup_migrate() function is called multiple time with the same cgroup_mgctx structure to transfer the tasks from one cgroup to another one-by-one. The second time cgroup_migrate() is called, the cgroup_taskset will be in an incorrect state and so may cause the system to panic. For example, [ 150.888410] Faulting instruction address: 0xc0000000001db648 [ 150.888414] Oops: Kernel access of bad area, sig: 11 [#1] [ 150.888417] SMP NR_CPUS=2048 [ 150.888417] NUMA [ 150.888419] pSeries : [ 150.888545] NIP [c0000000001db648] cpuset_can_attach+0x58/0x1b0 [ 150.888548] LR [c0000000001db638] cpuset_can_attach+0x48/0x1b0 [ 150.888551] Call Trace: [ 150.888554] [c0000005f65cb940] [c0000000001db638] cpuset_can_attach+0x48/0x1b 0 (unreliable) [ 150.888559] [c0000005f65cb9a0] [c0000000001cff04] cgroup_migrate_execute+0xc4/0x4b0 [ 150.888563] [c0000005f65cba20] [c0000000001d7d14] cgroup_transfer_tasks+0x1d4/0x370 [ 150.888568] [c0000005f65cbb70] [c0000000001ddcb0] cpuset_hotplug_workfn+0x710/0x8f0 [ 150.888572] [c0000005f65cbc80] [c00000000012032c] process_one_work+0x1ac/0x4d0 [ 150.888576] [c0000005f65cbd20] [c0000000001206f8] worker_thread+0xa8/0x5b0 [ 150.888580] [c0000005f65cbdc0] [c0000000001293f8] kthread+0x168/0x1b0 [ 150.888584] [c0000005f65cbe30] [c00000000000b368] ret_from_kernel_thread+0x5c/0x74 To allow reuse of the cgroup_mgctx structure, some fields in that structure are now re-initialized at the end of cgroup_migrate_execute() function call so that the structure can be reused again in a later iteration without causing problem. This bug was introduced in the commit e595cd706982 ("group: track migration context in cgroup_mgctx") in 4.11. This commit moves the cgroup_taskset initialization out of cgroup_migrate(). The commit 10467270fb3 ("cgroup: don't call migration methods if there are no tasks to migrate") helped, but did not completely resolve the problem. Fixes: e595cd706982bff0211e6fafe5a108421e747fbc ("group: track migration context in cgroup_mgctx") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v4.11+ --- kernel/cgroup/cgroup.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d6551cd45238..44857278eb8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2311,6 +2311,14 @@ out_release_tset: list_del_init(&cset->mg_node); } spin_unlock_irq(&css_set_lock); + + /* + * Re-initialize the cgroup_taskset structure in case it is reused + * again in another cgroup_migrate_add_task()/cgroup_migrate_execute() + * iteration. + */ + tset->nr_tasks = 0; + tset->csets = &tset->src_csets; return ret; } -- cgit v1.2.3-70-g09d2 From 786de92b3cb26012d3d0f00ee37adf14527f35c4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 22 Sep 2017 11:56:49 -0400 Subject: USB: uas: fix bug in handling of alternate settings The uas driver has a subtle bug in the way it handles alternate settings. The uas_find_uas_alt_setting() routine returns an altsetting value (the bAlternateSetting number in the descriptor), but uas_use_uas_driver() then treats that value as an index to the intf->altsetting array, which it isn't. Normally this doesn't cause any problems because the various alternate settings have bAlternateSetting values 0, 1, 2, ..., so the value is equal to the index in the array. But this is not guaranteed, and Andrey Konovalov used the syzkaller fuzzer with KASAN to get a slab-out-of-bounds error by violating this assumption. This patch fixes the bug by making uas_find_uas_alt_setting() return a pointer to the altsetting entry rather than either the value or the index. Pointers are less subject to misinterpretation. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov CC: Oliver Neukum CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 15 ++++++++------- drivers/usb/storage/uas.c | 10 +++++----- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index f58caa9e6a27..a155cd02bce2 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -9,7 +9,8 @@ static int uas_is_interface(struct usb_host_interface *intf) intf->desc.bInterfaceProtocol == USB_PR_UAS); } -static int uas_find_uas_alt_setting(struct usb_interface *intf) +static struct usb_host_interface *uas_find_uas_alt_setting( + struct usb_interface *intf) { int i; @@ -17,10 +18,10 @@ static int uas_find_uas_alt_setting(struct usb_interface *intf) struct usb_host_interface *alt = &intf->altsetting[i]; if (uas_is_interface(alt)) - return alt->desc.bAlternateSetting; + return alt; } - return -ENODEV; + return NULL; } static int uas_find_endpoints(struct usb_host_interface *alt, @@ -58,14 +59,14 @@ static int uas_use_uas_driver(struct usb_interface *intf, struct usb_device *udev = interface_to_usbdev(intf); struct usb_hcd *hcd = bus_to_hcd(udev->bus); unsigned long flags = id->driver_info; - int r, alt; - + struct usb_host_interface *alt; + int r; alt = uas_find_uas_alt_setting(intf); - if (alt < 0) + if (!alt) return 0; - r = uas_find_endpoints(&intf->altsetting[alt], eps); + r = uas_find_endpoints(alt, eps); if (r < 0) return 0; diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index cfb1e3bbd434..63cf981ed81c 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -873,14 +873,14 @@ MODULE_DEVICE_TABLE(usb, uas_usb_ids); static int uas_switch_interface(struct usb_device *udev, struct usb_interface *intf) { - int alt; + struct usb_host_interface *alt; alt = uas_find_uas_alt_setting(intf); - if (alt < 0) - return alt; + if (!alt) + return -ENODEV; - return usb_set_interface(udev, - intf->altsetting[0].desc.bInterfaceNumber, alt); + return usb_set_interface(udev, alt->desc.bInterfaceNumber, + alt->desc.bAlternateSetting); } static int uas_configure_endpoints(struct uas_dev_info *devinfo) -- cgit v1.2.3-70-g09d2 From 6e76c01e71551cb221c1f3deacb9dcd9a7346784 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 16:12:01 -0400 Subject: USB: gadgetfs: fix copy_to_user while holding spinlock The gadgetfs driver as a long-outstanding FIXME, regarding a call of copy_to_user() made while holding a spinlock. This patch fixes the issue by dropping the spinlock and using the dev->udc_usage mechanism introduced by another recent patch to guard against status changes while the lock isn't held. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov CC: Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 684900fcfe24..956b3dc7c3a4 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -983,11 +983,14 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) retval = -EIO; else { len = min (len, (size_t)dev->req->actual); -// FIXME don't call this with the spinlock held ... + ++dev->udc_usage; + spin_unlock_irq(&dev->lock); if (copy_to_user (buf, dev->req->buf, len)) retval = -EFAULT; else retval = len; + spin_lock_irq(&dev->lock); + --dev->udc_usage; clean_req (dev->gadget->ep0, dev->req); /* NOTE userspace can't yet choose to stall */ } -- cgit v1.2.3-70-g09d2 From 520b72fc64debf8a86c3853b8e486aa5982188f0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 13:23:58 -0400 Subject: USB: gadgetfs: Fix crash caused by inadequate synchronization The gadgetfs driver (drivers/usb/gadget/legacy/inode.c) was written before the UDC and composite frameworks were adopted; it is a legacy driver. As such, it expects that once bound to a UDC controller, it will not be unbound until it unregisters itself. However, the UDC framework does unbind function drivers while they are still registered. When this happens, it can cause the gadgetfs driver to misbehave or crash. For example, userspace can cause a crash by opening the device file and doing an ioctl call before setting up a configuration (found by Andrey Konovalov using the syzkaller fuzzer). This patch adds checks and synchronization to prevent these bad behaviors. It adds a udc_usage counter that the driver increments at times when it is using a gadget interface without holding the private spinlock. The unbind routine waits for this counter to go to 0 before returning, thereby ensuring that the UDC is no longer in use. The patch also adds a check in the dev_ioctl() routine to make sure the driver is bound to a UDC before dereferencing the gadget pointer, and it makes destroy_ep_files() synchronize with the endpoint I/O routines, to prevent the user from accessing an endpoint data structure after it has been removed. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov CC: Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 41 ++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 956b3dc7c3a4..5c28bee327e1 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -28,7 +28,7 @@ #include #include #include - +#include #include #include @@ -116,6 +116,7 @@ enum ep0_state { struct dev_data { spinlock_t lock; refcount_t count; + int udc_usage; enum ep0_state state; /* P: lock */ struct usb_gadgetfs_event event [N_EVENT]; unsigned ev_next; @@ -513,9 +514,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) INIT_WORK(&priv->work, ep_user_copy_worker); schedule_work(&priv->work); } - spin_unlock(&epdata->dev->lock); usb_ep_free_request(ep, req); + spin_unlock(&epdata->dev->lock); put_ep(epdata); } @@ -939,9 +940,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) struct usb_request *req = dev->req; if ((retval = setup_req (ep, req, 0)) == 0) { + ++dev->udc_usage; spin_unlock_irq (&dev->lock); retval = usb_ep_queue (ep, req, GFP_KERNEL); spin_lock_irq (&dev->lock); + --dev->udc_usage; } dev->state = STATE_DEV_CONNECTED; @@ -1134,6 +1137,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) retval = setup_req (dev->gadget->ep0, dev->req, len); if (retval == 0) { dev->state = STATE_DEV_CONNECTED; + ++dev->udc_usage; spin_unlock_irq (&dev->lock); if (copy_from_user (dev->req->buf, buf, len)) retval = -EFAULT; @@ -1145,6 +1149,7 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) GFP_KERNEL); } spin_lock_irq(&dev->lock); + --dev->udc_usage; if (retval < 0) { clean_req (dev->gadget->ep0, dev->req); } else @@ -1246,9 +1251,21 @@ static long dev_ioctl (struct file *fd, unsigned code, unsigned long value) struct usb_gadget *gadget = dev->gadget; long ret = -ENOTTY; - if (gadget->ops->ioctl) + spin_lock_irq(&dev->lock); + if (dev->state == STATE_DEV_OPENED || + dev->state == STATE_DEV_UNBOUND) { + /* Not bound to a UDC */ + } else if (gadget->ops->ioctl) { + ++dev->udc_usage; + spin_unlock_irq(&dev->lock); + ret = gadget->ops->ioctl (gadget, code, value); + spin_lock_irq(&dev->lock); + --dev->udc_usage; + } + spin_unlock_irq(&dev->lock); + return ret; } @@ -1466,10 +1483,12 @@ delegate: if (value < 0) break; + ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, dev->req, GFP_KERNEL); spin_lock (&dev->lock); + --dev->udc_usage; if (value < 0) { clean_req (gadget->ep0, dev->req); break; @@ -1493,8 +1512,12 @@ delegate: req->length = value; req->zero = value < w_length; + ++dev->udc_usage; spin_unlock (&dev->lock); value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL); + spin_lock(&dev->lock); + --dev->udc_usage; + spin_unlock(&dev->lock); if (value < 0) { DBG (dev, "ep_queue --> %d\n", value); req->status = 0; @@ -1521,21 +1544,24 @@ static void destroy_ep_files (struct dev_data *dev) /* break link to FS */ ep = list_first_entry (&dev->epfiles, struct ep_data, epfiles); list_del_init (&ep->epfiles); + spin_unlock_irq (&dev->lock); + dentry = ep->dentry; ep->dentry = NULL; parent = d_inode(dentry->d_parent); /* break link to controller */ + mutex_lock(&ep->lock); if (ep->state == STATE_EP_ENABLED) (void) usb_ep_disable (ep->ep); ep->state = STATE_EP_UNBOUND; usb_ep_free_request (ep->ep, ep->req); ep->ep = NULL; + mutex_unlock(&ep->lock); + wake_up (&ep->wait); put_ep (ep); - spin_unlock_irq (&dev->lock); - /* break link to dcache */ inode_lock(parent); d_delete (dentry); @@ -1606,6 +1632,11 @@ gadgetfs_unbind (struct usb_gadget *gadget) spin_lock_irq (&dev->lock); dev->state = STATE_DEV_UNBOUND; + while (dev->udc_usage > 0) { + spin_unlock_irq(&dev->lock); + usleep_range(1000, 2000); + spin_lock_irq(&dev->lock); + } spin_unlock_irq (&dev->lock); destroy_ep_files (dev); -- cgit v1.2.3-70-g09d2 From 1fbbb78f25d1291274f320462bf6908906f538db Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 13:22:00 -0400 Subject: USB: g_mass_storage: Fix deadlock when driver is unbound As a holdover from the old g_file_storage gadget, the g_mass_storage legacy gadget driver attempts to unregister itself when its main operating thread terminates (if it hasn't been unregistered already). This is not strictly necessary; it was never more than an attempt to have the gadget fail cleanly if something went wrong and the main thread was killed. However, now that the UDC core manages gadget drivers independently of UDC drivers, this scheme doesn't work any more. A simple test: modprobe dummy-hcd modprobe g-mass-storage file=... rmmod dummy-hcd ends up in a deadlock with the following backtrace: sysrq: SysRq : Show Blocked State task PC stack pid father file-storage D 0 1130 2 0x00000000 Call Trace: __schedule+0x53e/0x58c schedule+0x6e/0x77 schedule_preempt_disabled+0xd/0xf __mutex_lock.isra.1+0x129/0x224 ? _raw_spin_unlock_irqrestore+0x12/0x14 __mutex_lock_slowpath+0x12/0x14 mutex_lock+0x28/0x2b usb_gadget_unregister_driver+0x29/0x9b [udc_core] usb_composite_unregister+0x10/0x12 [libcomposite] msg_cleanup+0x1d/0x20 [g_mass_storage] msg_thread_exits+0xd/0xdd7 [g_mass_storage] fsg_main_thread+0x1395/0x13d6 [usb_f_mass_storage] ? __schedule+0x573/0x58c kthread+0xd9/0xdb ? do_set_interface+0x25c/0x25c [usb_f_mass_storage] ? init_completion+0x1e/0x1e ret_from_fork+0x19/0x24 rmmod D 0 1155 683 0x00000000 Call Trace: __schedule+0x53e/0x58c schedule+0x6e/0x77 schedule_timeout+0x26/0xbc ? __schedule+0x573/0x58c do_wait_for_common+0xb3/0x128 ? usleep_range+0x81/0x81 ? wake_up_q+0x3f/0x3f wait_for_common+0x2e/0x45 wait_for_completion+0x17/0x19 fsg_common_put+0x34/0x81 [usb_f_mass_storage] fsg_free_inst+0x13/0x1e [usb_f_mass_storage] usb_put_function_instance+0x1a/0x25 [libcomposite] msg_unbind+0x2a/0x42 [g_mass_storage] __composite_unbind+0x4a/0x6f [libcomposite] composite_unbind+0x12/0x14 [libcomposite] usb_gadget_remove_driver+0x4f/0x77 [udc_core] usb_del_gadget_udc+0x52/0xcc [udc_core] dummy_udc_remove+0x27/0x2c [dummy_hcd] platform_drv_remove+0x1d/0x31 device_release_driver_internal+0xe9/0x16d device_release_driver+0x11/0x13 bus_remove_device+0xd2/0xe2 device_del+0x19f/0x221 ? selinux_capable+0x22/0x27 platform_device_del+0x21/0x63 platform_device_unregister+0x10/0x1a cleanup+0x20/0x817 [dummy_hcd] SyS_delete_module+0x10c/0x197 ? ____fput+0xd/0xf ? task_work_run+0x55/0x62 ? prepare_exit_to_usermode+0x65/0x75 do_fast_syscall_32+0x86/0xc3 entry_SYSENTER_32+0x4e/0x7c What happens is that removing the dummy-hcd driver causes the UDC core to unbind the gadget driver, which it does while holding the udc_lock mutex. The unbind routine in g_mass_storage tells the main thread to exit and waits for it to terminate. But as mentioned above, when the main thread exits it tries to unregister the mass-storage function driver. Via the composite framework this ends up calling usb_gadget_unregister_driver(), which tries to acquire the udc_lock mutex. The result is deadlock. The simplest way to fix the problem is not to be so clever: The main thread doesn't have to unregister the function driver. The side effects won't be so terrible; if the gadget is still attached to a USB host when the main thread is killed, it will appear to the host as though the gadget's firmware has crashed -- a reasonably accurate interpretation, and an all-too-common occurrence for USB mass-storage devices. In fact, the code to unregister the driver when the main thread exits is specific to g-mass-storage; it is not used when f-mass-storage is included as a function in a larger composite device. Therefore the entire mechanism responsible for this (the fsg_operations structure with its ->thread_exits method, the fsg_common_set_ops() routine, and the msg_thread_exits() callback routine) can all be eliminated. Even the msg_registered bitflag can be removed, because now the driver is unregistered in only one place rather than in two places. Signed-off-by: Alan Stern CC: Acked-by: Felipe Balbi Acked-by: Michal Nazarewicz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 27 +++++++-------------------- drivers/usb/gadget/function/f_mass_storage.h | 14 -------------- drivers/usb/gadget/legacy/mass_storage.c | 26 +++----------------------- 3 files changed, 10 insertions(+), 57 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index d6bd0244b008..5153e29870c3 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -307,8 +307,6 @@ struct fsg_common { struct completion thread_notifier; struct task_struct *thread_task; - /* Callback functions. */ - const struct fsg_operations *ops; /* Gadget's private data. */ void *private_data; @@ -2438,6 +2436,7 @@ static void handle_exception(struct fsg_common *common) static int fsg_main_thread(void *common_) { struct fsg_common *common = common_; + int i; /* * Allow the thread to be killed by a signal, but set the signal mask @@ -2476,21 +2475,16 @@ static int fsg_main_thread(void *common_) common->thread_task = NULL; spin_unlock_irq(&common->lock); - if (!common->ops || !common->ops->thread_exits - || common->ops->thread_exits(common) < 0) { - int i; + /* Eject media from all LUNs */ - down_write(&common->filesem); - for (i = 0; i < ARRAY_SIZE(common->luns); i++) { - struct fsg_lun *curlun = common->luns[i]; - if (!curlun || !fsg_lun_is_open(curlun)) - continue; + down_write(&common->filesem); + for (i = 0; i < ARRAY_SIZE(common->luns); i++) { + struct fsg_lun *curlun = common->luns[i]; + if (curlun && fsg_lun_is_open(curlun)) fsg_lun_close(curlun); - curlun->unit_attention_data = SS_MEDIUM_NOT_PRESENT; - } - up_write(&common->filesem); } + up_write(&common->filesem); /* Let fsg_unbind() know the thread has exited */ complete_and_exit(&common->thread_notifier, 0); @@ -2681,13 +2675,6 @@ void fsg_common_remove_luns(struct fsg_common *common) } EXPORT_SYMBOL_GPL(fsg_common_remove_luns); -void fsg_common_set_ops(struct fsg_common *common, - const struct fsg_operations *ops) -{ - common->ops = ops; -} -EXPORT_SYMBOL_GPL(fsg_common_set_ops); - void fsg_common_free_buffers(struct fsg_common *common) { _fsg_common_free_buffers(common->buffhds, common->fsg_num_buffers); diff --git a/drivers/usb/gadget/function/f_mass_storage.h b/drivers/usb/gadget/function/f_mass_storage.h index d3902313b8ac..dc05ca0c4359 100644 --- a/drivers/usb/gadget/function/f_mass_storage.h +++ b/drivers/usb/gadget/function/f_mass_storage.h @@ -60,17 +60,6 @@ struct fsg_module_parameters { struct fsg_common; /* FSF callback functions */ -struct fsg_operations { - /* - * Callback function to call when thread exits. If no - * callback is set or it returns value lower then zero MSF - * will force eject all LUNs it operates on (including those - * marked as non-removable or with prevent_medium_removal flag - * set). - */ - int (*thread_exits)(struct fsg_common *common); -}; - struct fsg_lun_opts { struct config_group group; struct fsg_lun *lun; @@ -142,9 +131,6 @@ void fsg_common_remove_lun(struct fsg_lun *lun); void fsg_common_remove_luns(struct fsg_common *common); -void fsg_common_set_ops(struct fsg_common *common, - const struct fsg_operations *ops); - int fsg_common_create_lun(struct fsg_common *common, struct fsg_lun_config *cfg, unsigned int id, const char *name, const char **name_pfx); diff --git a/drivers/usb/gadget/legacy/mass_storage.c b/drivers/usb/gadget/legacy/mass_storage.c index e99ab57ee3e5..fcba59782f26 100644 --- a/drivers/usb/gadget/legacy/mass_storage.c +++ b/drivers/usb/gadget/legacy/mass_storage.c @@ -107,15 +107,6 @@ static unsigned int fsg_num_buffers = CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS; FSG_MODULE_PARAMETERS(/* no prefix */, mod_data); -static unsigned long msg_registered; -static void msg_cleanup(void); - -static int msg_thread_exits(struct fsg_common *common) -{ - msg_cleanup(); - return 0; -} - static int msg_do_config(struct usb_configuration *c) { struct fsg_opts *opts; @@ -154,9 +145,6 @@ static struct usb_configuration msg_config_driver = { static int msg_bind(struct usb_composite_dev *cdev) { - static const struct fsg_operations ops = { - .thread_exits = msg_thread_exits, - }; struct fsg_opts *opts; struct fsg_config config; int status; @@ -173,8 +161,6 @@ static int msg_bind(struct usb_composite_dev *cdev) if (status) goto fail; - fsg_common_set_ops(opts->common, &ops); - status = fsg_common_set_cdev(opts->common, cdev, config.can_stall); if (status) goto fail_set_cdev; @@ -256,18 +242,12 @@ MODULE_LICENSE("GPL"); static int __init msg_init(void) { - int ret; - - ret = usb_composite_probe(&msg_driver); - set_bit(0, &msg_registered); - - return ret; + return usb_composite_probe(&msg_driver); } module_init(msg_init); -static void msg_cleanup(void) +static void __exit msg_cleanup(void) { - if (test_and_clear_bit(0, &msg_registered)) - usb_composite_unregister(&msg_driver); + usb_composite_unregister(&msg_driver); } module_exit(msg_cleanup); -- cgit v1.2.3-70-g09d2 From b4756707152700c96acdfe149cb1ca4cec306c7a Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 2 Sep 2017 07:42:42 -0400 Subject: media: dvb: i2c transfers over usb cannot be done from stack Since commit 29d2fef8be11 ("usb: catch attempts to submit urbs with a vmalloc'd transfer buffer"), the AverMedia AverTV DVB-T USB 2.0 (a800) fails to probe. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-frontends/dib3000mc.c | 50 ++++++++++++++++++++++------ drivers/media/dvb-frontends/dvb-pll.c | 22 +++++++++--- drivers/media/tuners/mt2060.c | 59 ++++++++++++++++++++++++++------- 3 files changed, 103 insertions(+), 28 deletions(-) diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c index 224283fe100a..4d086a7248e9 100644 --- a/drivers/media/dvb-frontends/dib3000mc.c +++ b/drivers/media/dvb-frontends/dib3000mc.c @@ -55,29 +55,57 @@ struct dib3000mc_state { static u16 dib3000mc_read_word(struct dib3000mc_state *state, u16 reg) { - u8 wb[2] = { (reg >> 8) | 0x80, reg & 0xff }; - u8 rb[2]; struct i2c_msg msg[2] = { - { .addr = state->i2c_addr >> 1, .flags = 0, .buf = wb, .len = 2 }, - { .addr = state->i2c_addr >> 1, .flags = I2C_M_RD, .buf = rb, .len = 2 }, + { .addr = state->i2c_addr >> 1, .flags = 0, .len = 2 }, + { .addr = state->i2c_addr >> 1, .flags = I2C_M_RD, .len = 2 }, }; + u16 word; + u8 *b; + + b = kmalloc(4, GFP_KERNEL); + if (!b) + return 0; + + b[0] = (reg >> 8) | 0x80; + b[1] = reg; + b[2] = 0; + b[3] = 0; + + msg[0].buf = b; + msg[1].buf = b + 2; if (i2c_transfer(state->i2c_adap, msg, 2) != 2) dprintk("i2c read error on %d\n",reg); - return (rb[0] << 8) | rb[1]; + word = (b[2] << 8) | b[3]; + kfree(b); + + return word; } static int dib3000mc_write_word(struct dib3000mc_state *state, u16 reg, u16 val) { - u8 b[4] = { - (reg >> 8) & 0xff, reg & 0xff, - (val >> 8) & 0xff, val & 0xff, - }; struct i2c_msg msg = { - .addr = state->i2c_addr >> 1, .flags = 0, .buf = b, .len = 4 + .addr = state->i2c_addr >> 1, .flags = 0, .len = 4 }; - return i2c_transfer(state->i2c_adap, &msg, 1) != 1 ? -EREMOTEIO : 0; + int rc; + u8 *b; + + b = kmalloc(4, GFP_KERNEL); + if (!b) + return -ENOMEM; + + b[0] = reg >> 8; + b[1] = reg; + b[2] = val >> 8; + b[3] = val; + + msg.buf = b; + + rc = i2c_transfer(state->i2c_adap, &msg, 1) != 1 ? -EREMOTEIO : 0; + kfree(b); + + return rc; } static int dib3000mc_identify(struct dib3000mc_state *state) diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c index 7bec3e028bee..5553b89b804e 100644 --- a/drivers/media/dvb-frontends/dvb-pll.c +++ b/drivers/media/dvb-frontends/dvb-pll.c @@ -753,13 +753,19 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, struct i2c_adapter *i2c, unsigned int pll_desc_id) { - u8 b1 [] = { 0 }; - struct i2c_msg msg = { .addr = pll_addr, .flags = I2C_M_RD, - .buf = b1, .len = 1 }; + u8 *b1; + struct i2c_msg msg = { .addr = pll_addr, .flags = I2C_M_RD, .len = 1 }; struct dvb_pll_priv *priv = NULL; int ret; const struct dvb_pll_desc *desc; + b1 = kmalloc(1, GFP_KERNEL); + if (!b1) + return NULL; + + b1[0] = 0; + msg.buf = b1; + if ((id[dvb_pll_devcount] > DVB_PLL_UNDEFINED) && (id[dvb_pll_devcount] < ARRAY_SIZE(pll_list))) pll_desc_id = id[dvb_pll_devcount]; @@ -773,15 +779,19 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, fe->ops.i2c_gate_ctrl(fe, 1); ret = i2c_transfer (i2c, &msg, 1); - if (ret != 1) + if (ret != 1) { + kfree(b1); return NULL; + } if (fe->ops.i2c_gate_ctrl) fe->ops.i2c_gate_ctrl(fe, 0); } priv = kzalloc(sizeof(struct dvb_pll_priv), GFP_KERNEL); - if (priv == NULL) + if (!priv) { + kfree(b1); return NULL; + } priv->pll_i2c_address = pll_addr; priv->i2c = i2c; @@ -811,6 +821,8 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr, "insmod option" : "autodetected"); } + kfree(b1); + return fe; } EXPORT_SYMBOL(dvb_pll_attach); diff --git a/drivers/media/tuners/mt2060.c b/drivers/media/tuners/mt2060.c index 2e487f9a2cc3..4983eeb39f36 100644 --- a/drivers/media/tuners/mt2060.c +++ b/drivers/media/tuners/mt2060.c @@ -38,41 +38,74 @@ MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off)."); static int mt2060_readreg(struct mt2060_priv *priv, u8 reg, u8 *val) { struct i2c_msg msg[2] = { - { .addr = priv->cfg->i2c_address, .flags = 0, .buf = ®, .len = 1 }, - { .addr = priv->cfg->i2c_address, .flags = I2C_M_RD, .buf = val, .len = 1 }, + { .addr = priv->cfg->i2c_address, .flags = 0, .len = 1 }, + { .addr = priv->cfg->i2c_address, .flags = I2C_M_RD, .len = 1 }, }; + int rc = 0; + u8 *b; + + b = kmalloc(2, GFP_KERNEL); + if (!b) + return -ENOMEM; + + b[0] = reg; + b[1] = 0; + + msg[0].buf = b; + msg[1].buf = b + 1; if (i2c_transfer(priv->i2c, msg, 2) != 2) { printk(KERN_WARNING "mt2060 I2C read failed\n"); - return -EREMOTEIO; + rc = -EREMOTEIO; } - return 0; + *val = b[1]; + kfree(b); + + return rc; } // Writes a single register static int mt2060_writereg(struct mt2060_priv *priv, u8 reg, u8 val) { - u8 buf[2] = { reg, val }; struct i2c_msg msg = { - .addr = priv->cfg->i2c_address, .flags = 0, .buf = buf, .len = 2 + .addr = priv->cfg->i2c_address, .flags = 0, .len = 2 }; + u8 *buf; + int rc = 0; + + buf = kmalloc(2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = reg; + buf[1] = val; + + msg.buf = buf; if (i2c_transfer(priv->i2c, &msg, 1) != 1) { printk(KERN_WARNING "mt2060 I2C write failed\n"); - return -EREMOTEIO; + rc = -EREMOTEIO; } - return 0; + kfree(buf); + return rc; } // Writes a set of consecutive registers static int mt2060_writeregs(struct mt2060_priv *priv,u8 *buf, u8 len) { int rem, val_len; - u8 xfer_buf[16]; + u8 *xfer_buf; + int rc = 0; struct i2c_msg msg = { - .addr = priv->cfg->i2c_address, .flags = 0, .buf = xfer_buf + .addr = priv->cfg->i2c_address, .flags = 0 }; + xfer_buf = kmalloc(16, GFP_KERNEL); + if (!xfer_buf) + return -ENOMEM; + + msg.buf = xfer_buf; + for (rem = len - 1; rem > 0; rem -= priv->i2c_max_regs) { val_len = min_t(int, rem, priv->i2c_max_regs); msg.len = 1 + val_len; @@ -81,11 +114,13 @@ static int mt2060_writeregs(struct mt2060_priv *priv,u8 *buf, u8 len) if (i2c_transfer(priv->i2c, &msg, 1) != 1) { printk(KERN_WARNING "mt2060 I2C write failed (len=%i)\n", val_len); - return -EREMOTEIO; + rc = -EREMOTEIO; + break; } } - return 0; + kfree(xfer_buf); + return rc; } // Initialisation sequences -- cgit v1.2.3-70-g09d2 From bbd770aee018c8f46d8f43263928440d5ac04b36 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 29 Aug 2017 04:19:43 -0400 Subject: media: venus: init registered list on streamoff Add missing init_list_head for the registered buffer list. Absence of the init could lead to a unhandled kernel paging request as below, when streamon/streamoff are called in row. [338046.571321] Unable to handle kernel paging request at virtual address fffffffffffffe00 [338046.574849] pgd = ffff800034820000 [338046.582381] [fffffffffffffe00] *pgd=00000000b60f5003[338046.582545] , *pud=00000000b1f31003 , *pmd=0000000000000000[338046.592082] [338046.597754] Internal error: Oops: 96000004 [#1] PREEMPT SMP [338046.601671] Modules linked in: venus_enc venus_dec venus_core usb_f_ecm g_ether usb_f_rndis u_ether libcomposite ipt_MASQUERADE nf_nat_masquerade_ipv4 arc4 wcn36xx mac80211 btqcomsmd btqca iptable_nat nf_co] [338046.662408] CPU: 0 PID: 5433 Comm: irq/160-venus Tainted: G W 4.9.39+ #232 [338046.668024] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [338046.675268] task: ffff80003541cb00 task.stack: ffff800026e20000 [338046.682097] PC is at venus_helper_release_buf_ref+0x28/0x88 [venus_core] [338046.688282] LR is at vdec_event_notify+0xe8/0x150 [venus_dec] [338046.695029] pc : [] lr : [] pstate: a0000145 [338046.701256] sp : ffff800026e23bc0 [338046.708494] x29: ffff800026e23bc0 x28: 0000000000000000 [338046.718853] x27: ffff000000afd4f8 x26: ffff800031faa700 [338046.729253] x25: ffff000000afd790 x24: ffff800031faa618 [338046.739664] x23: ffff800003e18138 x22: ffff800002fc9810 [338046.750109] x21: ffff800026e23c28 x20: 0000000000000001 [338046.760592] x19: ffff80002a13b800 x18: 0000000000000010 [338046.771099] x17: 0000ffffa3d01600 x16: ffff000008100428 [338046.781654] x15: 0000000000000006 x14: ffff000089045ba7 [338046.792250] x13: ffff000009045bb6 x12: 00000000004f37c8 [338046.802894] x11: 0000000000267211 x10: 0000000000000000 [338046.813574] x9 : 0000000000032000 x8 : 00000000dc400000 [338046.824274] x7 : 0000000000000000 x6 : ffff800031faa728 [338046.835005] x5 : ffff80002a13b850 x4 : 0000000000000000 [338046.845793] x3 : fffffffffffffdf8 x2 : 0000000000000000 [338046.856602] x1 : 0000000000000003 x0 : ffff80002a13b800 Signed-off-by: Stanimir Varbanov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/venus/helpers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 68933d208063..9b2a401a4891 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -682,6 +682,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q) hfi_session_abort(inst); load_scale_clocks(core); + INIT_LIST_HEAD(&inst->registeredbufs); } venus_helper_buffers_done(inst, VB2_BUF_STATE_ERROR); -- cgit v1.2.3-70-g09d2 From 9b62ccdbc797ae42342bd6ca15719362d2543d24 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 29 Aug 2017 06:21:10 -0400 Subject: media: qcom: camss: Make function vfe_set_selection static The function vfe_set_selection is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: warning: symbol 'vfe_set_selection' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Todor Tomov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/camss-8x16/camss-vfe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss-8x16/camss-vfe.c b/drivers/media/platform/qcom/camss-8x16/camss-vfe.c index b21b3c2dc77f..b22d2dfcd3c2 100644 --- a/drivers/media/platform/qcom/camss-8x16/camss-vfe.c +++ b/drivers/media/platform/qcom/camss-8x16/camss-vfe.c @@ -2660,7 +2660,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd, * * Return -EINVAL or zero on success */ -int vfe_set_selection(struct v4l2_subdev *sd, +static int vfe_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_selection *sel) { -- cgit v1.2.3-70-g09d2 From 81b79c71e546fc15e95e804de2497a448cc51a47 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 30 Aug 2017 13:14:36 -0400 Subject: media: staging/imx: Fix uninitialized variable warning The ret variable can be returned uninitialized in the imx_media_create_pad_vdev_lists() function is imxmd->num_vdevs is zero. Fix it. Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/imx/imx-media-dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c index d96f4512224f..b55e5ebba8b4 100644 --- a/drivers/staging/media/imx/imx-media-dev.c +++ b/drivers/staging/media/imx/imx-media-dev.c @@ -400,10 +400,10 @@ static int imx_media_create_pad_vdev_lists(struct imx_media_dev *imxmd) struct media_link, list); ret = imx_media_add_vdev_to_pad(imxmd, vdev, link->source); if (ret) - break; + return ret; } - return ret; + return 0; } /* async subdev complete notifier */ -- cgit v1.2.3-70-g09d2 From e949f61461ab83b094cad564c89a8d2b078b4508 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 31 Aug 2017 12:56:10 -0400 Subject: media: s5p-cec: add NACK detection support The s5p-cec driver returned CEC_TX_STATUS_ERROR for the NACK condition. Some digging into the datasheet uncovered the S5P_CEC_TX_STAT1 register where bit 0 indicates if the transmit was nacked or not. Use this to return the correct CEC_TX_STATUS_NACK status to userspace. This was the only driver that couldn't tell a NACK from another error, and that was very unusual. And a potential problem for applications as well. Tested with my Odroid-U3. Signed-off-by: Hans Verkuil Acked-by: Sylwester Nawrocki Cc: # for v4.12 and up Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c | 3 ++- drivers/media/platform/s5p-cec/s5p_cec.c | 11 ++++++++++- drivers/media/platform/s5p-cec/s5p_cec.h | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c b/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c index 1edf667d562a..146ae6f25cdb 100644 --- a/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c +++ b/drivers/media/platform/s5p-cec/exynos_hdmi_cecctrl.c @@ -172,7 +172,8 @@ u32 s5p_cec_get_status(struct s5p_cec_dev *cec) { u32 status = 0; - status = readb(cec->reg + S5P_CEC_STATUS_0); + status = readb(cec->reg + S5P_CEC_STATUS_0) & 0xf; + status |= (readb(cec->reg + S5P_CEC_TX_STAT1) & 0xf) << 4; status |= readb(cec->reg + S5P_CEC_STATUS_1) << 8; status |= readb(cec->reg + S5P_CEC_STATUS_2) << 16; status |= readb(cec->reg + S5P_CEC_STATUS_3) << 24; diff --git a/drivers/media/platform/s5p-cec/s5p_cec.c b/drivers/media/platform/s5p-cec/s5p_cec.c index 58d200e7c838..8837e2678bde 100644 --- a/drivers/media/platform/s5p-cec/s5p_cec.c +++ b/drivers/media/platform/s5p-cec/s5p_cec.c @@ -92,7 +92,10 @@ static irqreturn_t s5p_cec_irq_handler(int irq, void *priv) dev_dbg(cec->dev, "irq received\n"); if (status & CEC_STATUS_TX_DONE) { - if (status & CEC_STATUS_TX_ERROR) { + if (status & CEC_STATUS_TX_NACK) { + dev_dbg(cec->dev, "CEC_STATUS_TX_NACK set\n"); + cec->tx = STATE_NACK; + } else if (status & CEC_STATUS_TX_ERROR) { dev_dbg(cec->dev, "CEC_STATUS_TX_ERROR set\n"); cec->tx = STATE_ERROR; } else { @@ -135,6 +138,12 @@ static irqreturn_t s5p_cec_irq_handler_thread(int irq, void *priv) cec_transmit_done(cec->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); cec->tx = STATE_IDLE; break; + case STATE_NACK: + cec_transmit_done(cec->adap, + CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_NACK, + 0, 1, 0, 0); + cec->tx = STATE_IDLE; + break; case STATE_ERROR: cec_transmit_done(cec->adap, CEC_TX_STATUS_MAX_RETRIES | CEC_TX_STATUS_ERROR, diff --git a/drivers/media/platform/s5p-cec/s5p_cec.h b/drivers/media/platform/s5p-cec/s5p_cec.h index 8bcd8dc1aeb9..86ded522ef27 100644 --- a/drivers/media/platform/s5p-cec/s5p_cec.h +++ b/drivers/media/platform/s5p-cec/s5p_cec.h @@ -35,6 +35,7 @@ #define CEC_STATUS_TX_TRANSFERRING (1 << 1) #define CEC_STATUS_TX_DONE (1 << 2) #define CEC_STATUS_TX_ERROR (1 << 3) +#define CEC_STATUS_TX_NACK (1 << 4) #define CEC_STATUS_TX_BYTES (0xFF << 8) #define CEC_STATUS_RX_RUNNING (1 << 16) #define CEC_STATUS_RX_RECEIVING (1 << 17) @@ -55,6 +56,7 @@ enum cec_state { STATE_IDLE, STATE_BUSY, STATE_DONE, + STATE_NACK, STATE_ERROR }; -- cgit v1.2.3-70-g09d2 From 845d6524d69b40bd6abd61dc1264a8657159aa55 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 14 Sep 2017 11:23:38 -0400 Subject: media: cec: Respond to unregistered initiators, when applicable Running CEC 1.4 compliance test we get the following error on test 11.1.6.2: "ERROR: The DUT did not broadcast a message to the unregistered device." Fix this by letting GIVE_PHYSICAL_ADDR message respond to unregistered device. Also, GIVE_DEVICE_VENDOR_ID and GIVE_FEATURES fall in the same category so, respond also to these messages. With this fix we pass CEC 1.4 official compliance. Signed-off-by: Jose Abreu Cc: Joao Pinto Signed-off-by: Hans Verkuil Cc: # for v4.10 and up Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/cec-adap.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index dd769e40416f..84d1b67f850c 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -1794,12 +1794,19 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, */ switch (msg->msg[1]) { case CEC_MSG_GET_CEC_VERSION: - case CEC_MSG_GIVE_DEVICE_VENDOR_ID: case CEC_MSG_ABORT: case CEC_MSG_GIVE_DEVICE_POWER_STATUS: - case CEC_MSG_GIVE_PHYSICAL_ADDR: case CEC_MSG_GIVE_OSD_NAME: + /* + * These messages reply with a directed message, so ignore if + * the initiator is Unregistered. + */ + if (!adap->passthrough && from_unregistered) + return 0; + /* Fall through */ + case CEC_MSG_GIVE_DEVICE_VENDOR_ID: case CEC_MSG_GIVE_FEATURES: + case CEC_MSG_GIVE_PHYSICAL_ADDR: /* * Skip processing these messages if the passthrough mode * is on. @@ -1807,7 +1814,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, if (adap->passthrough) goto skip_processing; /* Ignore if addressing is wrong */ - if (is_broadcast || from_unregistered) + if (is_broadcast) return 0; break; -- cgit v1.2.3-70-g09d2 From db6321a1af8432df983048d2dd8529525589f71d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Sep 2017 08:35:36 -0400 Subject: media: platform: VIDEO_QCOM_CAMSS should depend on HAS_DMA If NO_DMA=y: warning: (TOUCHSCREEN_SUR40 && VIDEO_TW68 && VIDEO_CX23885 && VIDEO_CX25821 && VIDEO_CX88 && VIDEO_SAA7134 && VIDEO_COBALT && VIDEO_QCOM_CAMSS) selects VIDEOBUF2_DMA_SG which has unmet direct dependencies (MEDIA_SUPPORT && HAS_DMA) and ERROR: "bad_dma_ops" [drivers/media/v4l2-core/videobuf2-dma-sg.ko] undefined! ERROR: "bad_dma_ops" [drivers/media/platform/qcom/camss-8x16/qcom-camss.ko] undefined! VIDEO_QCOM_CAMSS selects VIDEOBUF2_DMA_SG, which bypasses its dependency on HAS_DMA. Make VIDEO_QCOM_CAMSS depend on HAS_DMA to fix this. Fixes: f5c074947f56533c ("media: camss: Enable building") Signed-off-by: Geert Uytterhoeven Signed-off-by: Hans Verkuil --- drivers/media/platform/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 7e7cc49b8674..3c4f7fa7b9d8 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -112,7 +112,7 @@ config VIDEO_PXA27x config VIDEO_QCOM_CAMSS tristate "Qualcomm 8x16 V4L2 Camera Subsystem driver" - depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API + depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && HAS_DMA depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST select VIDEOBUF2_DMA_SG select V4L2_FWNODE -- cgit v1.2.3-70-g09d2 From 05cf97e7a619fc7ede81ee6bb8ebfa7531b633f5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 22 Sep 2017 01:01:11 +0200 Subject: cnic: Fix an error handling path in 'cnic_alloc_bnx2x_resc()' All the error handling paths 'goto error', except this one. We should also go to error in this case, or some resources will be leaking. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index cec94bbb2ea5..8bc126a156e8 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -1278,7 +1278,7 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev) ret = cnic_alloc_dma(dev, kwq_16_dma, pages, 0); if (ret) - return -ENOMEM; + goto error; n = CNIC_PAGE_SIZE / CNIC_KWQ16_DATA_SIZE; for (i = 0, j = 0; i < cp->max_cid_space; i++) { -- cgit v1.2.3-70-g09d2 From 5c346525d3591cb032eca86d0f904cc01f1069ff Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 21 Sep 2017 18:00:36 -0600 Subject: net: qualcomm: rmnet: Fix rcu splat in rmnet_is_real_dev_registered Xiaolong reported a suspicious rcu_dereference_check in the device unregister notifier callback. Since we do not dereference the rx_handler_data, it's ok to just check for the value of the pointer. Note that this section is already protected by rtnl_lock. [ 101.364846] WARNING: suspicious RCU usage [ 101.365654] 4.13.0-rc6-01701-gceed73a #1 Not tainted [ 101.370873] ----------------------------- [ 101.372472] drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c:57 suspicious rcu_dereference_check() usage! [ 101.374427] [ 101.374427] other info that might help us debug this: [ 101.374427] [ 101.387491] [ 101.387491] rcu_scheduler_active = 2, debug_locks = 1 [ 101.389368] 1 lock held by trinity-main/2809: [ 101.390736] #0: (rtnl_mutex){+.+.+.}, at: [<8146085b>] rtnl_lock+0xf/0x11 [ 101.395482] [ 101.395482] stack backtrace: [ 101.396948] CPU: 0 PID: 2809 Comm: trinity-main Not tainted 4.13.0-rc6-01701-gceed73a #1 [ 101.398857] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20161025_171302-gandalf 04/01/2014 [ 101.401079] Call Trace: [ 101.401656] dump_stack+0xa1/0xeb [ 101.402871] lockdep_rcu_suspicious+0xc7/0xd0 [ 101.403665] rmnet_is_real_dev_registered+0x40/0x4e [ 101.405199] rmnet_config_notify_cb+0x2c/0x142 [ 101.406344] ? wireless_nlevent_flush+0x47/0x71 [ 101.407385] notifier_call_chain+0x2d/0x47 [ 101.408645] raw_notifier_call_chain+0xc/0xe [ 101.409882] call_netdevice_notifiers_info+0x41/0x49 [ 101.411402] call_netdevice_notifiers+0xc/0xe [ 101.412713] rollback_registered_many+0x268/0x36e [ 101.413702] rollback_registered+0x39/0x56 [ 101.414965] unregister_netdevice_queue+0x79/0x88 [ 101.415908] unregister_netdev+0x16/0x1d Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Subash Abhinov Kasiviswanathan Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 98f22551eb45..1e33aea59f50 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -51,10 +51,7 @@ struct rmnet_walk_data { static int rmnet_is_real_dev_registered(const struct net_device *real_dev) { - rx_handler_func_t *rx_handler; - - rx_handler = rcu_dereference(real_dev->rx_handler); - return (rx_handler == rmnet_rx_handler); + return rcu_access_pointer(real_dev->rx_handler) == rmnet_rx_handler; } /* Needs rtnl lock */ -- cgit v1.2.3-70-g09d2 From 245a396a9b1a67ac5c3228737c261b3e48708a2a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2017 08:06:18 +0200 Subject: iio: adc: twl4030: Fix an error handling path in 'twl4030_madc_probe()' If 'devm_regulator_get()' fails, we should go through the existing error handling path instead of returning directly, as done is all the other error handling paths in this function. Fixes: 7cc97d77ee8a ("iio: adc: twl4030: Fix ADC[3:6] readings") Signed-off-by: Christophe JAILLET Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/twl4030-madc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index bd3d37fc2144..252f5890311e 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -887,8 +887,10 @@ static int twl4030_madc_probe(struct platform_device *pdev) /* Enable 3v1 bias regulator for MADC[3:6] */ madc->usb3v1 = devm_regulator_get(madc->dev, "vusb3v1"); - if (IS_ERR(madc->usb3v1)) - return -ENODEV; + if (IS_ERR(madc->usb3v1)) { + ret = -ENODEV; + goto err_i2c; + } ret = regulator_enable(madc->usb3v1); if (ret) -- cgit v1.2.3-70-g09d2 From 7f70be6e4025db0551e6863e7eb9cca07122695c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2017 08:06:19 +0200 Subject: iio: adc: twl4030: Disable the vusb3v1 rugulator in the error handling path of 'twl4030_madc_probe()' Commit 7cc97d77ee8a has introduced a call to 'regulator_disable()' in the .remove function. So we should also have such a call in the .probe function in case of error after a successful 'regulator_enable()' call. Add a new label for that and use it. Fixes: 7cc97d77ee8a ("iio: adc: twl4030: Fix ADC[3:6] readings") Signed-off-by: Christophe JAILLET Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/twl4030-madc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index 252f5890311e..0c86fbb3033e 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -899,11 +899,13 @@ static int twl4030_madc_probe(struct platform_device *pdev) ret = iio_device_register(iio_dev); if (ret) { dev_err(&pdev->dev, "could not register iio device\n"); - goto err_i2c; + goto err_usb3v1; } return 0; +err_usb3v1: + regulator_disable(madc->usb3v1); err_i2c: twl4030_madc_set_current_generator(madc, 0, 0); err_current_generator: -- cgit v1.2.3-70-g09d2 From 53063846affd27def6f96e13a9fb80b9a3c2d126 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2017 08:06:20 +0200 Subject: iio: adc: twl4030: Return an error if we can not enable the vusb3v1 regulator in 'twl4030_madc_probe()' If we can not enable the regulator, go through the error handling path instead of silently continuing. Fixes: 7cc97d77ee8a ("iio: adc: twl4030: Fix ADC[3:6] readings") Signed-off-by: Christophe JAILLET Signed-off-by: Jonathan Cameron --- drivers/iio/adc/twl4030-madc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/twl4030-madc.c b/drivers/iio/adc/twl4030-madc.c index 0c86fbb3033e..28df096e84ec 100644 --- a/drivers/iio/adc/twl4030-madc.c +++ b/drivers/iio/adc/twl4030-madc.c @@ -893,8 +893,10 @@ static int twl4030_madc_probe(struct platform_device *pdev) } ret = regulator_enable(madc->usb3v1); - if (ret) + if (ret) { dev_err(madc->dev, "could not enable 3v1 bias regulator\n"); + goto err_i2c; + } ret = iio_device_register(iio_dev); if (ret) { -- cgit v1.2.3-70-g09d2 From 0a56eabc4e3f730782e4a9f3af4f60aa03a8a849 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 18 Sep 2017 12:05:30 +0200 Subject: iio: trigger: stm32-timer: preset shouldn't be buffered Currently, setting preset value (ARR) will update directly 'Auto reload value' only on 1st write access. But then, ARPE is set. This makes ARR a shadow register. Preset value should be updated upon each write request: ensure ARPE is 0. This fixes successive writes to preset attribute. Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index a9bc5b603b86..4cec28af3ecf 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -681,8 +681,9 @@ static ssize_t stm32_count_set_preset(struct iio_dev *indio_dev, if (ret) return ret; + /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, preset); - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, TIM_CR1_ARPE); return len; } -- cgit v1.2.3-70-g09d2 From b7a9776c1f9443326632486fcbd82dca82f8511e Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 18 Sep 2017 12:05:31 +0200 Subject: iio: trigger: stm32-timer: fix a corner case to write preset Balance timer start routine that sets ARPE: clear it in stop routine. This fixes a corner case, when timer is used successively as trigger (with sampling_frequency start/stop routines), then as a counter (with preset). Fixes: 93fbe91b5521 ("iio: Add STM32 timer trigger driver") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 4cec28af3ecf..a30ba6e1dfec 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -174,6 +174,7 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv) clk_disable(priv->clk); /* Stop timer */ + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0); regmap_write(priv->regmap, TIM_PSC, 0); regmap_write(priv->regmap, TIM_ARR, 0); -- cgit v1.2.3-70-g09d2 From 4fb840c95f82652cece7352be9080884cafb92a0 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 18 Sep 2017 18:24:15 +0200 Subject: iio: adc: stm32: fix bad error check on max_channels Fix a bad error check when counting 'st,adc-channels' array elements. This is seen when all channels are in use simultaneously. Fixes: 64ad7f643 ("iio: adc: stm32: introduce compatible data cfg") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 6bc602891f2f..e93244bc3edd 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1656,7 +1656,7 @@ static int stm32_adc_chan_of_init(struct iio_dev *indio_dev) num_channels = of_property_count_u32_elems(node, "st,adc-channels"); if (num_channels < 0 || - num_channels >= adc_info->max_channels) { + num_channels > adc_info->max_channels) { dev_err(&indio_dev->dev, "Bad st,adc-channels?\n"); return num_channels < 0 ? num_channels : -EINVAL; } -- cgit v1.2.3-70-g09d2 From 0964e40947a630a2a6f724e968246992f97bcf1c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 22 Aug 2017 15:33:00 +0200 Subject: iio: adc: mcp320x: Fix oops on module unload The driver calls spi_get_drvdata() in its ->remove hook even though it has never called spi_set_drvdata(). Stack trace for posterity: Unable to handle kernel NULL pointer dereference at virtual address 00000220 Internal error: Oops: 5 [#1] SMP ARM [<8072f564>] (mutex_lock) from [<7f1400d0>] (iio_device_unregister+0x24/0x7c [industrialio]) [<7f1400d0>] (iio_device_unregister [industrialio]) from [<7f15e020>] (mcp320x_remove+0x20/0x30 [mcp320x]) [<7f15e020>] (mcp320x_remove [mcp320x]) from [<8055a8cc>] (spi_drv_remove+0x2c/0x44) [<8055a8cc>] (spi_drv_remove) from [<805087bc>] (__device_release_driver+0x98/0x134) [<805087bc>] (__device_release_driver) from [<80509180>] (driver_detach+0xdc/0xe0) [<80509180>] (driver_detach) from [<8050823c>] (bus_remove_driver+0x5c/0xb0) [<8050823c>] (bus_remove_driver) from [<80509ab0>] (driver_unregister+0x38/0x58) [<80509ab0>] (driver_unregister) from [<7f15e69c>] (mcp320x_driver_exit+0x14/0x1c [mcp320x]) [<7f15e69c>] (mcp320x_driver_exit [mcp320x]) from [<801a78d0>] (SyS_delete_module+0x184/0x1d0) [<801a78d0>] (SyS_delete_module) from [<80108100>] (ret_fast_syscall+0x0/0x1c) Fixes: f5ce4a7a9291 ("iio: adc: add driver for MCP3204/08 12-bit ADC") Cc: Oskar Andero Signed-off-by: Lukas Wunner Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp320x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index 634717ae12f3..45d043c9a888 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -312,6 +312,7 @@ static int mcp320x_probe(struct spi_device *spi) indio_dev->name = spi_get_device_id(spi)->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &mcp320x_info; + spi_set_drvdata(spi, indio_dev); chip_info = &mcp320x_chip_infos[spi_get_device_id(spi)->driver_data]; indio_dev->channels = chip_info->channels; -- cgit v1.2.3-70-g09d2 From e6f4794371ee7cce1339e7ca9542f1e703c5f84a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 22 Aug 2017 15:33:00 +0200 Subject: iio: adc: mcp320x: Fix readout of negative voltages Commit f686a36b4b79 ("iio: adc: mcp320x: Add support for mcp3301") returns a signed voltage from mcp320x_adc_conversion() but neglects that the caller interprets a negative return value as failure. Only mcp3301 (and the upcoming mcp3550/1/3) is affected as the other chips are incapable of measuring negative voltages. Fix and while at it, add mcp3301 to the list of supported chips at the top of the file. Fixes: f686a36b4b79 ("iio: adc: mcp320x: Add support for mcp3301") Cc: Andrea Galbusera Signed-off-by: Lukas Wunner Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mcp320x.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index 45d043c9a888..071dd23a33d9 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -17,6 +17,8 @@ * MCP3204 * MCP3208 * ------------ + * 13 bit converter + * MCP3301 * * Datasheet can be found here: * http://ww1.microchip.com/downloads/en/DeviceDoc/21293C.pdf mcp3001 @@ -96,7 +98,7 @@ static int mcp320x_channel_to_tx_data(int device_index, } static int mcp320x_adc_conversion(struct mcp320x *adc, u8 channel, - bool differential, int device_index) + bool differential, int device_index, int *val) { int ret; @@ -117,19 +119,25 @@ static int mcp320x_adc_conversion(struct mcp320x *adc, u8 channel, switch (device_index) { case mcp3001: - return (adc->rx_buf[0] << 5 | adc->rx_buf[1] >> 3); + *val = (adc->rx_buf[0] << 5 | adc->rx_buf[1] >> 3); + return 0; case mcp3002: case mcp3004: case mcp3008: - return (adc->rx_buf[0] << 2 | adc->rx_buf[1] >> 6); + *val = (adc->rx_buf[0] << 2 | adc->rx_buf[1] >> 6); + return 0; case mcp3201: - return (adc->rx_buf[0] << 7 | adc->rx_buf[1] >> 1); + *val = (adc->rx_buf[0] << 7 | adc->rx_buf[1] >> 1); + return 0; case mcp3202: case mcp3204: case mcp3208: - return (adc->rx_buf[0] << 4 | adc->rx_buf[1] >> 4); + *val = (adc->rx_buf[0] << 4 | adc->rx_buf[1] >> 4); + return 0; case mcp3301: - return sign_extend32((adc->rx_buf[0] & 0x1f) << 8 | adc->rx_buf[1], 12); + *val = sign_extend32((adc->rx_buf[0] & 0x1f) << 8 + | adc->rx_buf[1], 12); + return 0; default: return -EINVAL; } @@ -150,12 +158,10 @@ static int mcp320x_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: ret = mcp320x_adc_conversion(adc, channel->address, - channel->differential, device_index); - + channel->differential, device_index, val); if (ret < 0) goto out; - *val = ret; ret = IIO_VAL_INT; break; -- cgit v1.2.3-70-g09d2 From 4b1f0c31f96c45e8521dd84aae50f2aa4aecfb7b Mon Sep 17 00:00:00 2001 From: Colin Parker Date: Mon, 28 Aug 2017 16:21:39 -0700 Subject: IIO: BME280: Updates to Humidity readings need ctrl_reg write! The ctrl_reg register needs to be written after any write to the humidity registers. The value written to the ctrl_reg register does not necessarily need to change, but a write operation must occur. The regmap_update_bits functions will not write to a register if the register value matches the value to be written. This saves unnecessary bus operations. The change in this patch forces a bus write during the chip_config operation by switching to regmap_write_bits. This will fix issues where the Humidity Sensor Oversampling bits are not updated after initialization. Signed-off-by: Colin Parker Acked-by: Andreas Klinger Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index d82b788374b6..e442c5248427 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -558,7 +558,7 @@ static int bmp280_chip_config(struct bmp280_data *data) u8 osrs = BMP280_OSRS_TEMP_X(data->oversampling_temp + 1) | BMP280_OSRS_PRESS_X(data->oversampling_press + 1); - ret = regmap_update_bits(data->regmap, BMP280_REG_CTRL_MEAS, + ret = regmap_write_bits(data->regmap, BMP280_REG_CTRL_MEAS, BMP280_OSRS_TEMP_MASK | BMP280_OSRS_PRESS_MASK | BMP280_MODE_MASK, -- cgit v1.2.3-70-g09d2 From 7fc10de8d49a748c476532c9d8e8fe19e548dd67 Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Tue, 5 Sep 2017 15:14:45 +0300 Subject: iio: ad_sigma_delta: Implement a dedicated reset function Since most of the SD ADCs have the option of reseting the serial interface by sending a number of SCLKs with CS = 0 and DIN = 1, a dedicated function that can do this is usefull. Needed for the patch: iio: ad7793: Fix the serial interface reset Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 28 ++++++++++++++++++++++++++++ include/linux/iio/adc/ad_sigma_delta.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index d10bd0c97233..22c4c17cd996 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -177,6 +177,34 @@ out: } EXPORT_SYMBOL_GPL(ad_sd_read_reg); +/** + * ad_sd_reset() - Reset the serial interface + * + * @sigma_delta: The sigma delta device + * @reset_length: Number of SCLKs with DIN = 1 + * + * Returns 0 on success, an error code otherwise. + **/ +int ad_sd_reset(struct ad_sigma_delta *sigma_delta, + unsigned int reset_length) +{ + uint8_t *buf; + unsigned int size; + int ret; + + size = DIV_ROUND_UP(reset_length, 8); + buf = kcalloc(size, sizeof(*buf), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memset(buf, 0xff, size); + ret = spi_write(sigma_delta->spi, buf, size); + kfree(buf); + + return ret; +} +EXPORT_SYMBOL_GPL(ad_sd_reset); + static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, unsigned int mode, unsigned int channel) { diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 5ba430cc9a87..1fc7abd28b0b 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -111,6 +111,9 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta, + unsigned int reset_length); + int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); int ad_sd_calibrate_all(struct ad_sigma_delta *sigma_delta, -- cgit v1.2.3-70-g09d2 From 7ee3b7ebcb74714df6d94c8f500f307e1ee5dda5 Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Tue, 5 Sep 2017 15:16:13 +0300 Subject: iio: ad7793: Fix the serial interface reset The serial interface can be reset by writing 32 consecutive 1s to the device. 'ret' was initialized correctly but its value was overwritten when ad7793_check_platform_data() was called. Since a dedicated reset function is present now, it should be used instead. Fixes: 2edb769d246e ("iio:ad7793: Add support for the ad7798 and ad7799") Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7793.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index e6706a09e100..47c3d7f32900 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -257,7 +257,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, unsigned int vref_mv) { struct ad7793_state *st = iio_priv(indio_dev); - int i, ret = -1; + int i, ret; unsigned long long scale_uv; u32 id; @@ -266,7 +266,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, return ret; /* reset the serial interface */ - ret = spi_write(st->sd.spi, (u8 *)&ret, sizeof(ret)); + ret = ad_sd_reset(&st->sd, 32); if (ret < 0) goto out; usleep_range(500, 2000); /* Wait for at least 500us */ -- cgit v1.2.3-70-g09d2 From 3d62c78a6eb9a7d67bace9622b66ad51e81c5f9b Mon Sep 17 00:00:00 2001 From: Matt Fornero Date: Tue, 5 Sep 2017 16:34:10 +0200 Subject: iio: core: Return error for failed read_reg If an IIO device returns an error code for a read access via debugfs, it is currently ignored by the IIO core (other than emitting an error message). Instead, return this error code to user space, so upper layers can detect it correctly. Signed-off-by: Matt Fornero Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 17ec4cee51dc..a47428b4d31b 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -310,8 +310,10 @@ static ssize_t iio_debugfs_read_reg(struct file *file, char __user *userbuf, ret = indio_dev->info->debugfs_reg_access(indio_dev, indio_dev->cached_reg_addr, 0, &val); - if (ret) + if (ret) { dev_err(indio_dev->dev.parent, "%s: read failed\n", __func__); + return ret; + } len = snprintf(buf, sizeof(buf), "0x%X\n", val); -- cgit v1.2.3-70-g09d2 From f790923f146140a261ad211e5baf75d169f16fb2 Mon Sep 17 00:00:00 2001 From: Stefan Popa Date: Thu, 14 Sep 2017 16:50:28 +0300 Subject: staging: iio: ad7192: Fix - use the dedicated reset function avoiding dma from stack. Depends on: 691c4b95d1 ("iio: ad_sigma_delta: Implement a dedicated reset function") SPI host drivers can use DMA to transfer data, so the buffer should be properly allocated. Keeping it on the stack could cause an undefined behavior. The dedicated reset function solves this issue. Signed-off-by: Stefan Popa Acked-by: Lars-Peter Clausen Acked-by: Michael Hennerich Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7192.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index d11c6de9c777..6150d2780e22 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -223,11 +223,9 @@ static int ad7192_setup(struct ad7192_state *st, struct iio_dev *indio_dev = spi_get_drvdata(st->sd.spi); unsigned long long scale_uv; int i, ret, id; - u8 ones[6]; /* reset the serial interface */ - memset(&ones, 0xFF, 6); - ret = spi_write(st->sd.spi, &ones, 6); + ret = ad_sd_reset(&st->sd, 48); if (ret < 0) goto out; usleep_range(500, 1000); /* Wait for at least 500us */ -- cgit v1.2.3-70-g09d2 From 57999d1107c1e60c2ca7088f2ac0f819e2f554b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Sep 2017 23:43:25 +0300 Subject: USB: devio: Prevent integer overflow in proc_do_submiturb() There used to be an integer overflow check in proc_do_submiturb() but we removed it. It turns out that it's still required. The uurb->buffer_length variable is a signed integer and it's controlled by the user. It can lead to an integer overflow when we do: num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE); If we strip away the macro then that line looks like this: num_sgs = (uurb->buffer_length + USB_SG_SIZE - 1) / USB_SG_SIZE; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ It's the first addition which can overflow. Fixes: 1129d270cbfb ("USB: Increase usbfs transfer limit") Cc: stable Signed-off-by: Dan Carpenter Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 318bb3b96687..e9326f31db8d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -140,6 +140,9 @@ module_param(usbfs_memory_mb, uint, 0644); MODULE_PARM_DESC(usbfs_memory_mb, "maximum MB allowed for usbfs buffers (0 = no limit)"); +/* Hard limit, necessary to avoid arithmetic overflow */ +#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000) + static atomic64_t usbfs_memory_usage; /* Total memory currently allocated */ /* Check whether it's okay to allocate more memory for a transfer */ @@ -1460,6 +1463,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb USBDEVFS_URB_ZERO_PACKET | USBDEVFS_URB_NO_INTERRUPT)) return -EINVAL; + if ((unsigned int)uurb->buffer_length >= USBFS_XFER_MAX) + return -EINVAL; if (uurb->buffer_length > 0 && !uurb->buffer) return -EINVAL; if (!(uurb->type == USBDEVFS_URB_TYPE_CONTROL && -- cgit v1.2.3-70-g09d2 From fa1ed74eb1c233be6131ec92df21ab46499a15b6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Sep 2017 23:43:46 +0300 Subject: USB: devio: Don't corrupt user memory The user buffer has "uurb->buffer_length" bytes. If the kernel has more information than that, we should truncate it instead of writing past the end of the user's buffer. I added a WARN_ONCE() to help the user debug the issue. Reported-by: Alan Stern Cc: stable Signed-off-by: Dan Carpenter Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e9326f31db8d..4664e543cf2f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1576,7 +1576,11 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb totlen += isopkt[u].length; } u *= sizeof(struct usb_iso_packet_descriptor); - uurb->buffer_length = totlen; + if (totlen <= uurb->buffer_length) + uurb->buffer_length = totlen; + else + WARN_ONCE(1, "uurb->buffer_length is too short %d vs %d", + totlen, uurb->buffer_length); break; default: -- cgit v1.2.3-70-g09d2 From 8fec9355a968ad240f3a2e9ad55b823cf1cc52ff Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 22 Sep 2017 22:18:18 +0200 Subject: USB: cdc-wdm: ignore -EPIPE from GetEncapsulatedResponse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver will forward errors to userspace after turning most of them into -EIO. But all status codes are not equal. The -EPIPE (stall) in particular can be seen more as a result of normal USB signaling than an actual error. The state is automatically cleared by the USB core without intervention from either driver or userspace. And most devices and firmwares will never trigger a stall as a result of GetEncapsulatedResponse. This is in fact a requirement for CDC WDM devices. Quoting from section 7.1 of the CDC WMC spec revision 1.1: The function shall not return STALL in response to GetEncapsulatedResponse. But this driver is also handling GetEncapsulatedResponse on behalf of the qmi_wwan and cdc_mbim drivers. Unfortunately the relevant specs are not as clear wrt stall. So some QMI and MBIM devices *will* occasionally stall, causing the GetEncapsulatedResponse to return an -EPIPE status. Translating this into -EIO for userspace has proven to be harmful. Treating it as an empty read is safer, making the driver behave as if the device was conforming to the CDC WDM spec. There have been numerous reports of issues related to -EPIPE errors from some newer CDC MBIM devices in particular, like for example the Fibocom L831-EAU. Testing on this device has shown that the issues go away if we simply ignore the -EPIPE status. Similar handling of -EPIPE is already known from e.g. usb_get_string() The -EPIPE log message is still kept to let us track devices with this unexpected behaviour, hoping that it attracts attention from firmware developers. Cc: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100938 Reported-and-tested-by: Christian Ehrig Reported-and-tested-by: Patrick Chilton Reported-and-tested-by: Andreas Böhler Signed-off-by: Bjørn Mork Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 5aacea1978a5..3e865dbf878c 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -190,8 +190,10 @@ static void wdm_in_callback(struct urb *urb) /* * only set a new error if there is no previous error. * Errors are only cleared during read/open + * Avoid propagating -EPIPE (stall) to userspace since it is + * better handled as an empty read */ - if (desc->rerr == 0) + if (desc->rerr == 0 && status != -EPIPE) desc->rerr = status; if (length + desc->length > desc->wMaxCommand) { -- cgit v1.2.3-70-g09d2 From 62e082430ea4bb5b28909ca4375bb683931e22aa Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Sep 2017 07:29:49 -0400 Subject: dm ioctl: fix alignment of event number in the device list The size of struct dm_name_list is different on 32-bit and 64-bit kernels (so "(nl + 1)" differs between 32-bit and 64-bit kernels). This mismatch caused some harmless difference in padding when using 32-bit or 64-bit kernel. Commit 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES") added reporting event number in the output of DM_LIST_DEVICES_CMD. This difference in padding makes it impossible for userspace to determine the location of the event number (the location would be different when running on 32-bit and 64-bit kernels). Fix the padding by using offsetof(struct dm_name_list, name) instead of sizeof(struct dm_name_list) to determine the location of entries. Also, the ioctl version number is incremented to 37 so that userspace can use the version number to determine that the event number is present and correctly located. In addition, a global event is now raised when a DM device is created, removed, renamed or when table is swapped, so that the user can monitor for device changes. Reported-by: Eugene Syromiatnikov Fixes: 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES") Cc: stable@vger.kernel.org # 4.13 Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 1 + drivers/md/dm-ioctl.c | 37 ++++++++++++++++++++++++------------- drivers/md/dm.c | 10 ++++++++-- include/uapi/linux/dm-ioctl.h | 4 ++-- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 24eddbdf2ab4..203144762f36 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -149,5 +149,6 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen extern atomic_t dm_global_event_nr; extern wait_queue_head_t dm_global_eventq; +void dm_issue_global_event(void); #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 8756a6850431..e52676fa9832 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -477,9 +477,13 @@ static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_si * Round up the ptr to an 8-byte boundary. */ #define ALIGN_MASK 7 +static inline size_t align_val(size_t val) +{ + return (val + ALIGN_MASK) & ~ALIGN_MASK; +} static inline void *align_ptr(void *ptr) { - return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK); + return (void *)align_val((size_t)ptr); } /* @@ -505,7 +509,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ struct hash_cell *hc; size_t len, needed = 0; struct gendisk *disk; - struct dm_name_list *nl, *old_nl = NULL; + struct dm_name_list *orig_nl, *nl, *old_nl = NULL; uint32_t *event_nr; down_write(&_hash_lock); @@ -516,17 +520,15 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ */ for (i = 0; i < NUM_BUCKETS; i++) { list_for_each_entry (hc, _name_buckets + i, name_list) { - needed += sizeof(struct dm_name_list); - needed += strlen(hc->name) + 1; - needed += ALIGN_MASK; - needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK; + needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1); + needed += align_val(sizeof(uint32_t)); } } /* * Grab our output buffer. */ - nl = get_result_buffer(param, param_size, &len); + nl = orig_nl = get_result_buffer(param, param_size, &len); if (len < needed) { param->flags |= DM_BUFFER_FULL_FLAG; goto out; @@ -549,11 +551,16 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ strcpy(nl->name, hc->name); old_nl = nl; - event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1); + event_nr = align_ptr(nl->name + strlen(hc->name) + 1); *event_nr = dm_get_event_nr(hc->md); nl = align_ptr(event_nr + 1); } } + /* + * If mismatch happens, security may be compromised due to buffer + * overflow, so it's better to crash. + */ + BUG_ON((char *)nl - (char *)orig_nl != needed); out: up_write(&_hash_lock); @@ -1621,7 +1628,8 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para * which has a variable size, is not used by the function processing * the ioctl. */ -#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_NO_PARAMS 1 +#define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT 2 /*----------------------------------------------------------------- * Implementation of open/close/ioctl on the special char @@ -1635,12 +1643,12 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) ioctl_fn fn; } _ioctls[] = { {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */ - {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all}, + {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all}, {DM_LIST_DEVICES_CMD, 0, list_devices}, - {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create}, - {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove}, - {DM_DEV_RENAME_CMD, 0, dev_rename}, + {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create}, + {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove}, + {DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename}, {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend}, {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status}, {DM_DEV_WAIT_CMD, 0, dev_wait}, @@ -1869,6 +1877,9 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd); + if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT) + dm_issue_global_event(); + /* * Copy the results back to userland. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6e54145969c5..4be85324f44d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -52,6 +52,12 @@ static struct workqueue_struct *deferred_remove_workqueue; atomic_t dm_global_event_nr = ATOMIC_INIT(0); DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); +void dm_issue_global_event(void) +{ + atomic_inc(&dm_global_event_nr); + wake_up(&dm_global_eventq); +} + /* * One of these is allocated per bio. */ @@ -1865,9 +1871,8 @@ static void event_callback(void *context) dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); - atomic_inc(&dm_global_event_nr); wake_up(&md->eventq); - wake_up(&dm_global_eventq); + dm_issue_global_event(); } /* @@ -2283,6 +2288,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) } map = __bind(md, table, &limits); + dm_issue_global_event(); out: mutex_unlock(&md->suspend_lock); diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 412c06a624c8..ccaea525340b 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -269,9 +269,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 36 +#define DM_VERSION_MINOR 37 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2017-06-09)" +#define DM_VERSION_EXTRA "-ioctl (2017-09-20)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3-70-g09d2 From 115ef3b7e61ac64e32827611a127002672ed3725 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 25 Sep 2017 20:21:54 +0200 Subject: watchdog/hardlockup/perf: Cure UP damage for_each_cpu() unintuitively reports CPU0 as set independend of the actual cpumask content on UP kernels. That leads to a NULL pointer dereference when the cleanup function is invoked and there is no event to clean up. Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner --- kernel/watchdog_hld.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index b2931154b5f2..204a8cadb717 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -220,8 +220,13 @@ void hardlockup_detector_perf_cleanup(void) for_each_cpu(cpu, &dead_events_mask) { struct perf_event *event = per_cpu(watchdog_ev, cpu); + /* + * Required because for_each_cpu() reports unconditionally + * CPU0 as set on UP kernels. Sigh. + */ + if (event) + perf_event_release_kernel(event); per_cpu(watchdog_ev, cpu) = NULL; - perf_event_release_kernel(event); } cpumask_clear(&dead_events_mask); } -- cgit v1.2.3-70-g09d2 From eb35279dd7c7834d6320edf24e1b9786d31e4899 Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Wed, 24 May 2017 22:52:29 -0700 Subject: iio: proximity: as3935: noise detection + threshold changes Most applications are too noisy to allow the default noise and watchdog settings, and thus need to be configurable via DT properties. Also default settings to POR defaults on a reset, and register distuber interrupts as noise since it prevents proper usage. Cc: devicetree@vger.kernel.org Signed-off-by: Matt Ranostay Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../ABI/testing/sysfs-bus-iio-proximity-as3935 | 8 ++++ .../devicetree/bindings/iio/proximity/as3935.txt | 5 +++ drivers/iio/proximity/as3935.c | 43 ++++++++++++++++++++-- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 index 33e96f740639..147d4e8a1403 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 +++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 @@ -14,3 +14,11 @@ Description: Show or set the gain boost of the amp, from 0-31 range. 18 = indoors (default) 14 = outdoors + +What /sys/bus/iio/devices/iio:deviceX/noise_level_tripped +Date: May 2017 +KernelVersion: 4.13 +Contact: Matt Ranostay +Description: + When 1 the noise level is over the trip level and not reporting + valid data diff --git a/Documentation/devicetree/bindings/iio/proximity/as3935.txt b/Documentation/devicetree/bindings/iio/proximity/as3935.txt index 38d74314b7ab..b6c1afa6f02d 100644 --- a/Documentation/devicetree/bindings/iio/proximity/as3935.txt +++ b/Documentation/devicetree/bindings/iio/proximity/as3935.txt @@ -16,6 +16,10 @@ Optional properties: - ams,tuning-capacitor-pf: Calibration tuning capacitor stepping value 0 - 120pF. This will require using the calibration data from the manufacturer. + - ams,nflwdth: Set the noise and watchdog threshold register on + startup. This will need to set according to the noise from the + MCU board, and possibly the local environment. Refer to the + datasheet for the threshold settings. Example: @@ -27,4 +31,5 @@ as3935@0 { interrupt-parent = <&gpio1>; interrupts = <16 1>; ams,tuning-capacitor-pf = <80>; + ams,nflwdth = <0x44>; }; diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 0eeff29b61be..4a48b7ba3a1c 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -39,8 +39,12 @@ #define AS3935_AFE_GAIN_MAX 0x1F #define AS3935_AFE_PWR_BIT BIT(0) +#define AS3935_NFLWDTH 0x01 +#define AS3935_NFLWDTH_MASK 0x7f + #define AS3935_INT 0x03 #define AS3935_INT_MASK 0x0f +#define AS3935_DISTURB_INT BIT(2) #define AS3935_EVENT_INT BIT(3) #define AS3935_NOISE_INT BIT(0) @@ -48,6 +52,7 @@ #define AS3935_DATA_MASK 0x3F #define AS3935_TUNE_CAP 0x08 +#define AS3935_DEFAULTS 0x3C #define AS3935_CALIBRATE 0x3D #define AS3935_READ_DATA BIT(14) @@ -62,7 +67,9 @@ struct as3935_state { struct mutex lock; struct delayed_work work; + unsigned long noise_tripped; u32 tune_cap; + u32 nflwdth_reg; u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */ u8 buf[2] ____cacheline_aligned; }; @@ -145,12 +152,29 @@ static ssize_t as3935_sensor_sensitivity_store(struct device *dev, return len; } +static ssize_t as3935_noise_level_tripped_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct as3935_state *st = iio_priv(dev_to_iio_dev(dev)); + int ret; + + mutex_lock(&st->lock); + ret = sprintf(buf, "%d\n", !time_after(jiffies, st->noise_tripped + HZ)); + mutex_unlock(&st->lock); + + return ret; +} + static IIO_DEVICE_ATTR(sensor_sensitivity, S_IRUGO | S_IWUSR, as3935_sensor_sensitivity_show, as3935_sensor_sensitivity_store, 0); +static IIO_DEVICE_ATTR(noise_level_tripped, S_IRUGO, + as3935_noise_level_tripped_show, NULL, 0); static struct attribute *as3935_attributes[] = { &iio_dev_attr_sensor_sensitivity.dev_attr.attr, + &iio_dev_attr_noise_level_tripped.dev_attr.attr, NULL, }; @@ -246,7 +270,11 @@ static void as3935_event_work(struct work_struct *work) case AS3935_EVENT_INT: iio_trigger_poll_chained(st->trig); break; + case AS3935_DISTURB_INT: case AS3935_NOISE_INT: + mutex_lock(&st->lock); + st->noise_tripped = jiffies; + mutex_unlock(&st->lock); dev_warn(&st->spi->dev, "noise level is too high\n"); break; } @@ -269,15 +297,14 @@ static irqreturn_t as3935_interrupt_handler(int irq, void *private) static void calibrate_as3935(struct as3935_state *st) { - /* mask disturber interrupt bit */ - as3935_write(st, AS3935_INT, BIT(5)); - + as3935_write(st, AS3935_DEFAULTS, 0x96); as3935_write(st, AS3935_CALIBRATE, 0x96); as3935_write(st, AS3935_TUNE_CAP, BIT(5) | (st->tune_cap / TUNE_CAP_DIV)); mdelay(2); as3935_write(st, AS3935_TUNE_CAP, (st->tune_cap / TUNE_CAP_DIV)); + as3935_write(st, AS3935_NFLWDTH, st->nflwdth_reg); } #ifdef CONFIG_PM_SLEEP @@ -370,6 +397,15 @@ static int as3935_probe(struct spi_device *spi) return -EINVAL; } + ret = of_property_read_u32(np, + "ams,nflwdth", &st->nflwdth_reg); + if (!ret && st->nflwdth_reg > AS3935_NFLWDTH_MASK) { + dev_err(&spi->dev, + "invalid nflwdth setting of %d\n", + st->nflwdth_reg); + return -EINVAL; + } + indio_dev->dev.parent = &spi->dev; indio_dev->name = spi_get_device_id(spi)->name; indio_dev->channels = as3935_channels; @@ -384,6 +420,7 @@ static int as3935_probe(struct spi_device *spi) return -ENOMEM; st->trig = trig; + st->noise_tripped = jiffies - HZ; trig->dev.parent = indio_dev->dev.parent; iio_trigger_set_drvdata(trig, indio_dev); trig->ops = &iio_interrupt_trigger_ops; -- cgit v1.2.3-70-g09d2 From f61dfff2f5b9fcb087bf5c444bc44b444709588f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Jul 2017 10:14:59 +0200 Subject: iio: pressure: zpa2326: Remove always-true check which confuses gcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 4.1.2: drivers/iio/pressure/zpa2326.c: In function ‘zpa2326_wait_oneshot_completion’: drivers/iio/pressure/zpa2326.c:868: warning: ‘ret’ may be used uninitialized in this function When testing for "timeout < 0", timeout is already guaranteed to be strict negative, so the branch is always taken, and ret is thus always initialized. But (some version of) gcc is not smart enough to notice. Remove the check to fix this. As there is no other code in between assigning the error codes and returning them, the error codes can be returned immediately, and the intermediate variable can be dropped. Drop the "else" to please checkpatch. Fixes: e7215fe4d51e69c9 ("iio: pressure: zpa2326: report interrupted case as failure") Signed-off-by: Geert Uytterhoeven Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/zpa2326.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/pressure/zpa2326.c b/drivers/iio/pressure/zpa2326.c index ebfb1de7377f..91431454eb85 100644 --- a/drivers/iio/pressure/zpa2326.c +++ b/drivers/iio/pressure/zpa2326.c @@ -865,7 +865,6 @@ complete: static int zpa2326_wait_oneshot_completion(const struct iio_dev *indio_dev, struct zpa2326_private *private) { - int ret; unsigned int val; long timeout; @@ -887,14 +886,11 @@ static int zpa2326_wait_oneshot_completion(const struct iio_dev *indio_dev, /* Timed out. */ zpa2326_warn(indio_dev, "no one shot interrupt occurred (%ld)", timeout); - ret = -ETIME; - } else if (timeout < 0) { - zpa2326_warn(indio_dev, - "wait for one shot interrupt cancelled"); - ret = -ERESTARTSYS; + return -ETIME; } - return ret; + zpa2326_warn(indio_dev, "wait for one shot interrupt cancelled"); + return -ERESTARTSYS; } static int zpa2326_init_managed_irq(struct device *parent, -- cgit v1.2.3-70-g09d2 From cdd10c9627496ad25c87ce6394e29752253c69d3 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 22 Sep 2017 15:39:23 +0200 Subject: l2tp: ensure sessions are freed after their PPPOL2TP socket If l2tp_tunnel_delete() or l2tp_tunnel_closeall() deletes a session right after pppol2tp_release() orphaned its socket, then the 'sock' variable of the pppol2tp_session_close() callback is NULL. Yet the session is still used by pppol2tp_release(). Therefore we need to take an extra reference in any case, to prevent l2tp_tunnel_delete() or l2tp_tunnel_closeall() from freeing the session. Since the pppol2tp_session_close() callback is only set if the session is associated to a PPPOL2TP socket and that both l2tp_tunnel_delete() and l2tp_tunnel_closeall() hold the PPPOL2TP socket before calling pppol2tp_session_close(), we're sure that pppol2tp_session_close() and pppol2tp_session_destruct() are paired and called in the right order. So the reference taken by the former will be released by the later. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 50e3ee9a9d61..bc6e8bfc5be4 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -437,11 +437,11 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (sock) { + if (sock) inet_shutdown(sock, SEND_SHUTDOWN); - /* Don't let the session go away before our socket does */ - l2tp_session_inc_refcount(session); - } + + /* Don't let the session go away before our socket does */ + l2tp_session_inc_refcount(session); } /* Really kill the session socket. (Called from sock_put() if -- cgit v1.2.3-70-g09d2 From b228a94066406b6c456321d69643b0d7ce11cfa6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 22 Sep 2017 15:39:24 +0200 Subject: l2tp: fix race between l2tp_session_delete() and l2tp_tunnel_closeall() There are several ways to remove L2TP sessions: * deleting a session explicitly using the netlink interface (with L2TP_CMD_SESSION_DELETE), * deleting the session's parent tunnel (either by closing the tunnel's file descriptor or using the netlink interface), * closing the PPPOL2TP file descriptor of a PPP pseudo-wire. In some cases, when these methods are used concurrently on the same session, the session can be removed twice, leading to use-after-free bugs. This patch adds a 'dead' flag, used by l2tp_session_delete() and l2tp_tunnel_closeall() to prevent them from stepping on each other's toes. The session deletion path used when closing a PPPOL2TP file descriptor doesn't need to be adapted. It already has to ensure that a session remains valid for the lifetime of its PPPOL2TP file descriptor. So it takes an extra reference on the session in the ->session_close() callback (pppol2tp_session_close()), which is eventually dropped in the ->sk_destruct() callback of the PPPOL2TP socket (pppol2tp_session_destruct()). Still, __l2tp_session_unhash() and l2tp_session_queue_purge() can be called twice and even concurrently for a given session, but thanks to proper locking and re-initialisation of list fields, this is not an issue. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 6 ++++++ net/l2tp/l2tp_core.h | 1 + 2 files changed, 7 insertions(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ee485df73ccd..d8c2a89a76e1 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1314,6 +1314,9 @@ again: hlist_del_init(&session->hlist); + if (test_and_set_bit(0, &session->dead)) + goto again; + if (session->ref != NULL) (*session->ref)(session); @@ -1750,6 +1753,9 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash); */ int l2tp_session_delete(struct l2tp_session *session) { + if (test_and_set_bit(0, &session->dead)) + return 0; + if (session->ref) (*session->ref)(session); __l2tp_session_unhash(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a305e0c5925a..70a12df40a5f 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -76,6 +76,7 @@ struct l2tp_session_cfg { struct l2tp_session { int magic; /* should be * L2TP_SESSION_MAGIC */ + long dead; struct l2tp_tunnel *tunnel; /* back pointer to tunnel * context */ -- cgit v1.2.3-70-g09d2 From a93ad944f4ff9a797abff17c73fc4b1e4a1d9141 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 22 Sep 2017 15:32:44 -0500 Subject: net: qcom/emac: specify the correct size when mapping a DMA buffer When mapping the RX DMA buffers, the driver was accidentally specifying zero for the buffer length. Under normal circumstances, SWIOTLB does not need to allocate a bounce buffer, so the address is just mapped without checking the size field. This is why the error was not detected earlier. Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Cc: stable@vger.kernel.org Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 0ea3ca09c689..3ed9033e56db 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -898,7 +898,8 @@ static void emac_mac_rx_descs_refill(struct emac_adapter *adpt, curr_rxbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, skb->data, - curr_rxbuf->length, DMA_FROM_DEVICE); + adpt->rxbuf_size, DMA_FROM_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, curr_rxbuf->dma_addr); if (ret) { -- cgit v1.2.3-70-g09d2 From 6098d7ddd62f532f80ee2a4b01aca500a8e4e9e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:18 +0200 Subject: rocker: fix rocker_tlv_put_* functions for KASAN Inlining these functions creates lots of stack variables that each take 64 bytes when KASAN is enabled, leading to this warning about potential stack overflow: drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 'ofdpa_cmd_flow_tbl_add': drivers/net/ethernet/rocker/rocker_ofdpa.c:621:1: error: the frame size of 2752 bytes is larger than 1536 bytes [-Werror=frame-larger-than=] gcc-8 can now consolidate the stack slots itself, but on older versions we get the same behavior by using a temporary variable that holds a copy of the inline function argument. Cc: stable@vger.kernel.org Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_tlv.h | 48 ++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_tlv.h b/drivers/net/ethernet/rocker/rocker_tlv.h index a63ef82e7c72..dfae3c9d57c6 100644 --- a/drivers/net/ethernet/rocker/rocker_tlv.h +++ b/drivers/net/ethernet/rocker/rocker_tlv.h @@ -139,40 +139,52 @@ rocker_tlv_start(struct rocker_desc_info *desc_info) int rocker_tlv_put(struct rocker_desc_info *desc_info, int attrtype, int attrlen, const void *data); -static inline int rocker_tlv_put_u8(struct rocker_desc_info *desc_info, - int attrtype, u8 value) +static inline int +rocker_tlv_put_u8(struct rocker_desc_info *desc_info, int attrtype, u8 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value); + u8 tmp = value; /* work around GCC PR81715 */ + + return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &tmp); } -static inline int rocker_tlv_put_u16(struct rocker_desc_info *desc_info, - int attrtype, u16 value) +static inline int +rocker_tlv_put_u16(struct rocker_desc_info *desc_info, int attrtype, u16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &tmp); } -static inline int rocker_tlv_put_be16(struct rocker_desc_info *desc_info, - int attrtype, __be16 value) +static inline int +rocker_tlv_put_be16(struct rocker_desc_info *desc_info, int attrtype, __be16 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be16), &tmp); } -static inline int rocker_tlv_put_u32(struct rocker_desc_info *desc_info, - int attrtype, u32 value) +static inline int +rocker_tlv_put_u32(struct rocker_desc_info *desc_info, int attrtype, u32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &tmp); } -static inline int rocker_tlv_put_be32(struct rocker_desc_info *desc_info, - int attrtype, __be32 value) +static inline int +rocker_tlv_put_be32(struct rocker_desc_info *desc_info, int attrtype, __be32 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(__be32), &tmp); } -static inline int rocker_tlv_put_u64(struct rocker_desc_info *desc_info, - int attrtype, u64 value) +static inline int +rocker_tlv_put_u64(struct rocker_desc_info *desc_info, int attrtype, u64 value) { - return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value); + u64 tmp = value; + + return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &tmp); } static inline struct rocker_tlv * -- cgit v1.2.3-70-g09d2 From b4391db42308c9940944b5d7be5ca4b78fb88dd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:19 +0200 Subject: netlink: fix nla_put_{u8,u16,u32} for KASAN When CONFIG_KASAN is enabled, the "--param asan-stack=1" causes rather large stack frames in some functions. This goes unnoticed normally because CONFIG_FRAME_WARN is disabled with CONFIG_KASAN by default as of commit 3f181b4d8652 ("lib/Kconfig.debug: disable -Wframe-larger-than warnings with KASAN=y"). The kernelci.org build bot however has the warning enabled and that led me to investigate it a little further, as every build produces these warnings: net/wireless/nl80211.c:4389:1: warning: the frame size of 2240 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1895:1: warning: the frame size of 3776 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/wireless/nl80211.c:1410:1: warning: the frame size of 2208 bytes is larger than 2048 bytes [-Wframe-larger-than=] net/bridge/br_netlink.c:1282:1: warning: the frame size of 2544 bytes is larger than 2048 bytes [-Wframe-larger-than=] Most of this problem is now solved in gcc-8, which can consolidate the stack slots for the inline function arguments. On older compilers we can add a workaround by declaring a local variable in each function to pass the inline function argument. Cc: stable@vger.kernel.org Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- include/net/netlink.h | 73 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index e51cf5f81597..14c289393071 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -773,7 +773,10 @@ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, */ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) { - return nla_put(skb, attrtype, sizeof(u8), &value); + /* temporary variables to work around GCC PR81715 with asan-stack=1 */ + u8 tmp = value; + + return nla_put(skb, attrtype, sizeof(u8), &tmp); } /** @@ -784,7 +787,9 @@ static inline int nla_put_u8(struct sk_buff *skb, int attrtype, u8 value) */ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) { - return nla_put(skb, attrtype, sizeof(u16), &value); + u16 tmp = value; + + return nla_put(skb, attrtype, sizeof(u16), &tmp); } /** @@ -795,7 +800,9 @@ static inline int nla_put_u16(struct sk_buff *skb, int attrtype, u16 value) */ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put(skb, attrtype, sizeof(__be16), &value); + __be16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be16), &tmp); } /** @@ -806,7 +813,9 @@ static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) { - return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be16 tmp = value; + + return nla_put_be16(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -817,7 +826,9 @@ static inline int nla_put_net16(struct sk_buff *skb, int attrtype, __be16 value) */ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) { - return nla_put(skb, attrtype, sizeof(__le16), &value); + __le16 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le16), &tmp); } /** @@ -828,7 +839,9 @@ static inline int nla_put_le16(struct sk_buff *skb, int attrtype, __le16 value) */ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) { - return nla_put(skb, attrtype, sizeof(u32), &value); + u32 tmp = value; + + return nla_put(skb, attrtype, sizeof(u32), &tmp); } /** @@ -839,7 +852,9 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value) */ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put(skb, attrtype, sizeof(__be32), &value); + __be32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__be32), &tmp); } /** @@ -850,7 +865,9 @@ static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) { - return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, value); + __be32 tmp = value; + + return nla_put_be32(skb, attrtype | NLA_F_NET_BYTEORDER, tmp); } /** @@ -861,7 +878,9 @@ static inline int nla_put_net32(struct sk_buff *skb, int attrtype, __be32 value) */ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) { - return nla_put(skb, attrtype, sizeof(__le32), &value); + __le32 tmp = value; + + return nla_put(skb, attrtype, sizeof(__le32), &tmp); } /** @@ -874,7 +893,9 @@ static inline int nla_put_le32(struct sk_buff *skb, int attrtype, __le32 value) static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, u64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(u64), &value, padattr); + u64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr); } /** @@ -887,7 +908,9 @@ static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype, static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__be64), &value, padattr); + __be64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__be64), &tmp, padattr); } /** @@ -900,7 +923,9 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, int padattr) { - return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value, + __be64 tmp = value; + + return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, tmp, padattr); } @@ -914,7 +939,9 @@ static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 value, static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(__le64), &value, padattr); + __le64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(__le64), &tmp, padattr); } /** @@ -925,7 +952,9 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value, */ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) { - return nla_put(skb, attrtype, sizeof(s8), &value); + s8 tmp = value; + + return nla_put(skb, attrtype, sizeof(s8), &tmp); } /** @@ -936,7 +965,9 @@ static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value) */ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) { - return nla_put(skb, attrtype, sizeof(s16), &value); + s16 tmp = value; + + return nla_put(skb, attrtype, sizeof(s16), &tmp); } /** @@ -947,7 +978,9 @@ static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value) */ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) { - return nla_put(skb, attrtype, sizeof(s32), &value); + s32 tmp = value; + + return nla_put(skb, attrtype, sizeof(s32), &tmp); } /** @@ -960,7 +993,9 @@ static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value) static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value, int padattr) { - return nla_put_64bit(skb, attrtype, sizeof(s64), &value, padattr); + s64 tmp = value; + + return nla_put_64bit(skb, attrtype, sizeof(s64), &tmp, padattr); } /** @@ -1010,7 +1045,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, static inline int nla_put_in_addr(struct sk_buff *skb, int attrtype, __be32 addr) { - return nla_put_be32(skb, attrtype, addr); + __be32 tmp = addr; + + return nla_put_be32(skb, attrtype, tmp); } /** -- cgit v1.2.3-70-g09d2 From d8bd9f3f0925d22726de159531bfe3774b5cacc6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 22 Sep 2017 13:32:21 +1000 Subject: powerpc: Handle MCE on POWER9 with only DSISR bit 30 set On POWER9 DD2.1 and below, it's possible for a paste instruction to cause a Machine Check Exception (MCE) where only DSISR bit 30 (IBM 33) is set. This will result in the MCE handler seeing an unknown event, which triggers linux to crash. We change this by detecting unknown events caused by load/stores in the MCE handler and marking them as handled so that we no longer crash. An MCE that occurs like this is spurious, so we don't need to do anything in terms of servicing it. If there is something that needs to be serviced, the CPU will raise the MCE again with the correct DSISR so that it can be serviced properly. Signed-off-by: Michael Neuling Reviewed-by: Nicholas Piggin [mpe: Expand comment with details from change log, use normal bit #s] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b76ca198e09c..f523125b9d34 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -624,5 +624,18 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long __machine_check_early_realmode_p9(struct pt_regs *regs) { + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 30 is set. This + * will result in the MCE handler seeing an unknown event and the kernel + * crashing. An MCE that occurs like this is spurious, so we don't need + * to do anything in terms of servicing it. If there is something that + * needs to be serviced, the CPU will raise the MCE again with the + * correct DSISR so that it can be serviced properly. So detect this + * case and mark it as handled. + */ + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000) + return 1; + return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } -- cgit v1.2.3-70-g09d2 From 1df79cb3bae754e4a42240f9851ed82549a44f1a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 20 Sep 2017 12:35:57 +0530 Subject: phy: tegra: Handle return value of kasprintf kasprintf() can fail and it's return value must be checked. Signed-off-by: Arvind Yadav Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/tegra/xusb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 3cbcb2537657..4307bf0013e1 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -454,6 +454,8 @@ tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, char *name; name = kasprintf(GFP_KERNEL, "%s-%u", type, index); + if (!name) + return ERR_PTR(-ENOMEM); np = of_find_node_by_name(np, name); kfree(name); } -- cgit v1.2.3-70-g09d2 From 554a56fc83f679c73b4f851a330045d0ec7ec1a5 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 21 Sep 2017 18:31:48 +0800 Subject: phy: phy-mtk-tphy: fix NULL point of chip bank Chip bank of version-1 is initialized as NULL, but it's used by pcie_phy_instance_power_on/off(), so assign it a right address. Signed-off-by: Chunfeng Yun Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/mediatek/phy-mtk-tphy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index e3baad78521f..721a2a1c97ef 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -27,6 +27,7 @@ /* banks shared by multiple phys */ #define SSUSB_SIFSLV_V1_SPLLC 0x000 /* shared by u3 phys */ #define SSUSB_SIFSLV_V1_U2FREQ 0x100 /* shared by u2 phys */ +#define SSUSB_SIFSLV_V1_CHIP 0x300 /* shared by u3 phys */ /* u2 phy bank */ #define SSUSB_SIFSLV_V1_U2PHY_COM 0x000 /* u3/pcie/sata phy banks */ @@ -762,7 +763,7 @@ static void phy_v1_banks_init(struct mtk_tphy *tphy, case PHY_TYPE_USB3: case PHY_TYPE_PCIE: u3_banks->spllc = tphy->sif_base + SSUSB_SIFSLV_V1_SPLLC; - u3_banks->chip = NULL; + u3_banks->chip = tphy->sif_base + SSUSB_SIFSLV_V1_CHIP; u3_banks->phyd = instance->port_base + SSUSB_SIFSLV_V1_U3PHYD; u3_banks->phya = instance->port_base + SSUSB_SIFSLV_V1_U3PHYA; break; -- cgit v1.2.3-70-g09d2 From caef3e0b657d091a540232e07e5e5b4648110a52 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 10:04:20 +0200 Subject: phy: mvebu-cp110-comphy: fix mux error check The mux value is retrieved from the mvebu_comphy_get_mux() function which returns an int. In mvebu_comphy_power_on() this int is stored to a u32 and a check is made to ensure it's not negative. Which is wrong. This fixes it. Fixes: d0438bd6aa09 ("phy: add the mvebu cp110 comphy driver") Signed-off-by: Antoine Tenart Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index 73ebad6634a7..514a1a47e1fd 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -468,8 +468,8 @@ static int mvebu_comphy_power_on(struct phy *phy) { struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); struct mvebu_comphy_priv *priv = lane->priv; - int ret; - u32 mux, val; + int ret, mux; + u32 val; mux = mvebu_comphy_get_mux(lane->id, lane->port, lane->mode); if (mux < 0) -- cgit v1.2.3-70-g09d2 From 17fb745d4acfe52f9ebb1ba2b10f2fcd796fb5ce Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 10:04:22 +0200 Subject: phy: mvebu-cp110-comphy: explicitly set the pipe selector The pipe selector is used to select some modes (such as USB or PCIe). Otherwise it must be set to 0 (or "unconnected"). This patch does this to ensure it is not set to an incompatible value when using the supported modes (SGMII, 10GKR). Signed-off-by: Antoine Tenart Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index 514a1a47e1fd..e8ed7e3e6e23 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -111,6 +111,8 @@ #define MVEBU_COMPHY_CONF6_40B BIT(18) #define MVEBU_COMPHY_SELECTOR 0x1140 #define MVEBU_COMPHY_SELECTOR_PHY(n) ((n) * 0x4) +#define MVEBU_COMPHY_PIPE_SELECTOR 0x1144 +#define MVEBU_COMPHY_PIPE_SELECTOR_PIPE(n) ((n) * 0x4) #define MVEBU_COMPHY_LANES 6 #define MVEBU_COMPHY_PORTS 3 @@ -475,6 +477,10 @@ static int mvebu_comphy_power_on(struct phy *phy) if (mux < 0) return -ENOTSUPP; + regmap_read(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_PIPE_SELECTOR_PIPE(lane->id)); + regmap_write(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, val); + regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val); val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); val |= mux << MVEBU_COMPHY_SELECTOR_PHY(lane->id); @@ -526,6 +532,10 @@ static int mvebu_comphy_power_off(struct phy *phy) val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val); + regmap_read(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_PIPE_SELECTOR_PIPE(lane->id)); + regmap_write(priv->regmap, MVEBU_COMPHY_PIPE_SELECTOR, val); + return 0; } -- cgit v1.2.3-70-g09d2 From c1c7acac0998ffcc9cd81e016a7d1b58b1afbb51 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Sep 2017 13:31:37 +0300 Subject: phy: mvebu-cp110: checking for NULL instead of IS_ERR() devm_ioremap_resource() never returns NULL, it only returns error pointers so this test needs to be changed. Fixes: d0438bd6aa09 ("phy: add the mvebu cp110 comphy driver") Signed-off-by: Dan Carpenter Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/marvell/phy-mvebu-cp110-comphy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c index e8ed7e3e6e23..89c887ea5557 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -586,8 +586,8 @@ static int mvebu_comphy_probe(struct platform_device *pdev) return PTR_ERR(priv->regmap); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->base = devm_ioremap_resource(&pdev->dev, res); - if (!priv->base) - return -ENOMEM; + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); for_each_available_child_of_node(pdev->dev.of_node, child) { struct mvebu_comphy_lane *lane; -- cgit v1.2.3-70-g09d2 From f98b74387551f6d266c044d90e87e4919b25b970 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 22 Sep 2017 09:44:03 -0700 Subject: phy: rockchip-typec: Set the AUX channel flip state earlier On some DP monitors we found that setting the wrong flip state on the AUX channel could cause the monitor to stop asserting HotPlug Detect (HPD). Setting the right flip state caused these monitors to start asserting HotPlug Detect again. Here's what we believe was happening: * We'd plug in the monitor and we'd see HPD assert * We'd quickly see HPD deassert * The kernel would try to init the type C PHY but would init it in USB mode (because there was a peripheral there but no HPD) * Because the kernel never set the flip mode properly we'd never see the HPD come back. With this change, we'll still see HPD disappear (we don't think there's anything we can do about that), but then it will come back. Overall we can say that it's sane to set the AUX channel flip state even when HPD is not asserted. NOTE: to make this change possible, I needed to do a bit of cleanup to the tcphy_dp_aux_calibration() function so that it doesn't ever clobber the FLIP state. This made it very obvious that a line of code documented as "setting bit 12" also did a bunch of other magic, undocumented stuff. For now I'll just break out the bits and add a comment that this is black magic and we'll try to document tcphy_dp_aux_calibration() better in a future CL. ALSO NOTE: the old function used to write a bunch of hardcoded values in _some_ cases instead of doing a read-modify-write. One could possibly assert that these could have had (beneficial) side effects and thus with this new code (which always does read-modify-write) we could have a bug. We shouldn't need to worry, though, since in the old code tcphy_dp_aux_calibration() was always called following the de-assertion of "reset" the the type C PHY. ...so the type C PHY was always in default state. TX_ANA_CTRL_REG_1 is documented to be 0x0 after reset. This was also confirmed by printk. Suggested-by: Shawn Nematbakhsh Reviewed-by: Chris Zhong Signed-off-by: Douglas Anderson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 62 +++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index 4d2c57f21d76..342a77733207 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -443,14 +443,34 @@ static inline int property_enable(struct rockchip_typec_phy *tcphy, return regmap_write(tcphy->grf_regs, reg->offset, val | mask); } +static void tcphy_dp_aux_set_flip(struct rockchip_typec_phy *tcphy) +{ + u16 tx_ana_ctrl_reg_1; + + /* + * Select the polarity of the xcvr: + * 1, Reverses the polarity (If TYPEC, Pulls ups aux_p and pull + * down aux_m) + * 0, Normal polarity (if TYPEC, pulls up aux_m and pulls down + * aux_p) + */ + tx_ana_ctrl_reg_1 = readl(tcphy->base + TX_ANA_CTRL_REG_1); + if (!tcphy->flip) + tx_ana_ctrl_reg_1 |= BIT(12); + else + tx_ana_ctrl_reg_1 &= ~BIT(12); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); +} + static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) { + u16 tx_ana_ctrl_reg_1; u16 rdata, rdata2, val; /* disable txda_cal_latch_en for rewrite the calibration values */ - rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1); - val = rdata & 0xdfff; - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 = readl(tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 &= ~BIT(13); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); /* * read a resistor calibration code from CMN_TXPUCAL_CTRL[6:0] and @@ -472,9 +492,8 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) * Activate this signal for 1 clock cycle to sample new calibration * values. */ - rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1); - val = rdata | 0x2000; - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(13); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); usleep_range(150, 200); /* set TX Voltage Level and TX Deemphasis to 0 */ @@ -482,8 +501,10 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) /* re-enable decap */ writel(0x100, tcphy->base + TX_ANA_CTRL_REG_2); writel(0x300, tcphy->base + TX_ANA_CTRL_REG_2); - writel(0x2008, tcphy->base + TX_ANA_CTRL_REG_1); - writel(0x2018, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(3); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(4); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); writel(0, tcphy->base + TX_ANA_CTRL_REG_5); @@ -494,8 +515,10 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0x1001, tcphy->base + TX_ANA_CTRL_REG_4); /* re-enables Bandgap reference for LDO */ - writel(0x2098, tcphy->base + TX_ANA_CTRL_REG_1); - writel(0x2198, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(7); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(8); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); /* * re-enables the transmitter pre-driver, driver data selection MUX, @@ -505,17 +528,15 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0x303, tcphy->base + TX_ANA_CTRL_REG_2); /* - * BIT 12: Controls auxda_polarity, which selects the polarity of the - * xcvr: - * 1, Reverses the polarity (If TYPEC, Pulls ups aux_p and pull - * down aux_m) - * 0, Normal polarity (if TYPE_C, pulls up aux_m and pulls down - * aux_p) + * Do some magic undocumented stuff, some of which appears to + * undo the "re-enables Bandgap reference for LDO" above. */ - val = 0xa078; - if (!tcphy->flip) - val |= BIT(12); - writel(val, tcphy->base + TX_ANA_CTRL_REG_1); + tx_ana_ctrl_reg_1 |= BIT(15); + tx_ana_ctrl_reg_1 &= ~BIT(8); + tx_ana_ctrl_reg_1 &= ~BIT(7); + tx_ana_ctrl_reg_1 |= BIT(6); + tx_ana_ctrl_reg_1 |= BIT(5); + writel(tx_ana_ctrl_reg_1, tcphy->base + TX_ANA_CTRL_REG_1); writel(0, tcphy->base + TX_ANA_CTRL_REG_3); writel(0, tcphy->base + TX_ANA_CTRL_REG_4); @@ -555,6 +576,7 @@ static int tcphy_phy_init(struct rockchip_typec_phy *tcphy, u8 mode) reset_control_deassert(tcphy->tcphy_rst); property_enable(tcphy, &cfg->typec_conn_dir, tcphy->flip); + tcphy_dp_aux_set_flip(tcphy); tcphy_cfg_24m(tcphy); -- cgit v1.2.3-70-g09d2 From 26e03d803c8191e906360a0320c05b12d45a37ae Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 22 Sep 2017 09:44:04 -0700 Subject: phy: rockchip-typec: Don't set the aux voltage swing to 400 mV On rk3399-gru-kevin there are some cases where we're seeing AUX CH failures when trying to do DisplayPort over type C. Problems are intermittent and don't reproduce all the time. Problems are often bursty and failures persist for several seconds before going away. The failure case I focused on is: * A particular type C to HDMI adapter. * One orientation (flip mode) of that adapter. * Easier to see failures when something is plugged into the _other type C port at the same time. * Problems reproduce on both type C ports (left and right side). Ironically problems also stop reproducing when I solder wires onto the AUX CH signals on a port (even if no scope is connected to the signals). In this case, problems only stop reproducing on the port with the wires connected. From the above it appears that something about the signaling on the aux channel is marginal and any slight differences can bring us over the edge to failure. It turns out that we can fix our problems by just increasing the voltage swing of the AUX CH, giving us a bunch of extra margin. In DP up to version 1.2 the voltage swing on the aux channel was specced as .29 V to 1.38 V. In DP version 1.3 the aux channel voltage was tightened to be between .29 V and .40 V, but it clarifies that it really only needs the lower voltage when operating at the highest speed (HBR3 mode). So right now we are trying to use a voltage that technically should be valid for all versions of the spec (including version 1.3 when transmitting at HBR3). That would be great to do if it worked reliably. ...but it doesn't seem to. It turns out that if you continue to read through the DP part of the rk3399 TRM and other parts of the type C PHY spec you'll find out that while the rk3399 does support DP 1.3, it doesn't support HBR3. The docs specifically say "RBR, HBR and HBR2 data rates only". Thus there is actually no requirement to support an AUX CH swing of .4 V. Even if there is no actual requirement to support the tighter voltage swing, one could possibly argue that we should support it anyway. The DP spec clarifies that the lower voltage on the AUX CH will reduce cross talk in some cases and that seems like it could be beneficial even at the lower bit rates. At the moment, though, we are seeing problems with the AUX CH and not on the other lines. Also, checking another known working and similar laptop shows that the other laptop runs the AUX channel at a higher voltage. Other notes: * Looking at measurements done on the AUX CH we weren't actually compliant with the DP 1.3 spec anyway. AUX CH peek-to-peek voltage was measured on rk3399-gru-kevin as .466 V which is > .4 V. * With this new patch the AUX channel isn't actually 1.0 V, but it has been confirmed that the signal is better and has more margin. Eye diagram passes. * If someone were truly an expert in the Type C PHY and in DisplayPort signaling they might be able to make things work and keep the voltage at < .4 V. The Type C PHY seems to have a plethora of tuning knobs that could almost certainly improve the signal integrity. Some of these things (like enabling tx_fcm_full_margin) even seem to fix my problems. However, lacking expertise I can't say whether this is a better or worse solution. Tightening signals to give cleaner waveforms can often have adverse affects, like increasing EMI or adding noise to other signals. I'd rather not tune things like this without a healthy application of expertise that I don't have. Signed-off-by: Douglas Anderson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index 342a77733207..b25c00432f9b 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -543,10 +543,11 @@ static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy) writel(0, tcphy->base + TX_ANA_CTRL_REG_5); /* - * Controls low_power_swing_en, set the voltage swing of the driver - * to 400mv. The values below are peak to peak (differential) values. + * Controls low_power_swing_en, don't set the voltage swing of the + * driver to 400mv. The values below are peak to peak (differential) + * values. */ - writel(4, tcphy->base + TXDA_COEFF_CALC_CTRL); + writel(0, tcphy->base + TXDA_COEFF_CALC_CTRL); writel(0, tcphy->base + TXDA_CYA_AUXDA_CYA); /* Controls tx_high_z_tm_en */ -- cgit v1.2.3-70-g09d2 From b621129f4f08c8d42ac4de2e77a07c5cf0c4b740 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 15 Feb 2017 16:33:56 +0300 Subject: netfilter: ipvs: full-functionality option for ECN encapsulation in tunnel IPVS tunnel mode works as simple tunnel (see RFC 3168) copying ECN field to outer header. That's result in packet drops on egress tunnels in case the egress tunnel operates as ECN-capable with Full-functionality option (like ip_tunnel and ip6_tunnel kernel modules), according to RFC 3168 section 9.1.1 recommendation. This patch implements ECN full-functionality option into ipvs xmit code. Cc: netdev@vger.kernel.org Cc: lvs-devel@vger.kernel.org Signed-off-by: Vadim Fedorenko Reviewed-by: Konstantin Khlebnikov Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 90d396814798..4527921b1c3a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -921,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, { struct sk_buff *new_skb = NULL; struct iphdr *old_iph = NULL; + __u8 old_dsfield; #ifdef CONFIG_IP_VS_IPV6 struct ipv6hdr *old_ipv6h = NULL; #endif @@ -945,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, *payload_len = ntohs(old_ipv6h->payload_len) + sizeof(*old_ipv6h); - *dsfield = ipv6_get_dsfield(old_ipv6h); + old_dsfield = ipv6_get_dsfield(old_ipv6h); *ttl = old_ipv6h->hop_limit; if (df) *df = 0; @@ -960,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, /* fix old IP header checksum */ ip_send_check(old_iph); - *dsfield = ipv4_get_dsfield(old_iph); + old_dsfield = ipv4_get_dsfield(old_iph); *ttl = old_iph->ttl; if (payload_len) *payload_len = ntohs(old_iph->tot_len); } + /* Implement full-functionality option for ECN encapsulation */ + *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield); + return skb; error: kfree_skb(skb); -- cgit v1.2.3-70-g09d2 From 0bc15d85d97d44e8979ff91d0c1fbafe6fd4172c Mon Sep 17 00:00:00 2001 From: Nickey Yang Date: Tue, 26 Sep 2017 15:55:22 +0800 Subject: arm64: dts: rockchip: add the grf clk for dw-mipi-dsi on rk3399 The clk of grf must be enabled before writing grf register for rk3399. Signed-off-by: Nickey Yang [the grf clock is already part of the binding since march 2017] Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 6aa43fd47148..ab7629c5b856 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1630,8 +1630,8 @@ reg = <0x0 0xff960000 0x0 0x8000>; interrupts = ; clocks = <&cru SCLK_DPHY_PLL>, <&cru PCLK_MIPI_DSI0>, - <&cru SCLK_DPHY_TX0_CFG>; - clock-names = "ref", "pclk", "phy_cfg"; + <&cru SCLK_DPHY_TX0_CFG>, <&cru PCLK_VIO_GRF>; + clock-names = "ref", "pclk", "phy_cfg", "grf"; power-domains = <&power RK3399_PD_VIO>; rockchip,grf = <&grf>; status = "disabled"; -- cgit v1.2.3-70-g09d2 From e88d62cd4b2f0b1ae55e9008e79c2794b1fc914d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 26 Sep 2017 12:41:52 +0100 Subject: percpu: make this_cpu_generic_read() atomic w.r.t. interrupts As raw_cpu_generic_read() is a plain read from a raw_cpu_ptr() address, it's possible (albeit unlikely) that the compiler will split the access across multiple instructions. In this_cpu_generic_read() we disable preemption but not interrupts before calling raw_cpu_generic_read(). Thus, an interrupt could be taken in the middle of the split load instructions. If a this_cpu_write() or RMW this_cpu_*() op is made to the same variable in the interrupt handling path, this_cpu_read() will return a torn value. For native word types, we can avoid tearing using READ_ONCE(), but this won't work in all cases (e.g. 64-bit types on most 32-bit platforms). This patch reworks this_cpu_generic_read() to use READ_ONCE() where possible, otherwise falling back to disabling interrupts. Signed-off-by: Mark Rutland Cc: Arnd Bergmann Cc: Christoph Lameter Cc: Peter Zijlstra Cc: Pranith Kumar Cc: Tejun Heo Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 0504ef8f3aa3..976f8ac26665 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -115,15 +115,35 @@ do { \ (__ret); \ }) -#define this_cpu_generic_read(pcp) \ +#define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ typeof(pcp) __ret; \ preempt_disable_notrace(); \ - __ret = raw_cpu_generic_read(pcp); \ + __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ __ret; \ }) +#define __this_cpu_generic_read_noirq(pcp) \ +({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + +#define this_cpu_generic_read(pcp) \ +({ \ + typeof(pcp) __ret; \ + if (__native_word(pcp)) \ + __ret = __this_cpu_generic_read_nopreempt(pcp); \ + else \ + __ret = __this_cpu_generic_read_noirq(pcp); \ + __ret; \ +}) + #define this_cpu_generic_to_op(pcp, val, op) \ do { \ unsigned long __flags; \ -- cgit v1.2.3-70-g09d2 From 2d8f63297b9f0b430c96329893667c0bfdcbd47e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 19 Sep 2017 18:38:13 +0300 Subject: drm/i915: always update ELD connector type after get modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_edid_to_eld() initializes the connector ELD to zero, overwriting the ELD connector type initialized in intel_audio_codec_enable(). If userspace does getconnector and thus get_modes after modeset, a subsequent audio component i915_audio_component_get_eld() call will receive an ELD without the connector type properly set. It's fine for HDMI, but screws up audio for DP. Always set the ELD connector type at intel_connector_update_modes() based on the connector type. We can drop the connector type update from intel_audio_codec_enable(). Credits to Joseph Nuzman for figuring this out. Cc: Ville Syrjälä Cc: Joseph Nuzman Reported-by: Joseph Nuzman Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101583 Reviewed-by: Ville Syrjälä Tested-by: Joseph Nuzman Cc: stable@vger.kernel.org # v4.10+, maybe earlier Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170919153813.29808-1-jani.nikula@intel.com (cherry picked from commit d81fb7fd9436e81fda67e5bc8ed0713aa28d3db2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_audio.c | 5 ----- drivers/gpu/drm/i915/intel_modes.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index d805b6e6fe71..27743be5b768 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -606,11 +606,6 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder, connector->encoder->base.id, connector->encoder->name); - /* ELD Conn_Type */ - connector->eld[5] &= ~(3 << 2); - if (intel_crtc_has_dp_encoder(crtc_state)) - connector->eld[5] |= (1 << 2); - connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; if (dev_priv->display.audio_codec_enable) diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c index 951e834dd274..28a778b785ac 100644 --- a/drivers/gpu/drm/i915/intel_modes.c +++ b/drivers/gpu/drm/i915/intel_modes.c @@ -30,6 +30,21 @@ #include "intel_drv.h" #include "i915_drv.h" +static void intel_connector_update_eld_conn_type(struct drm_connector *connector) +{ + u8 conn_type; + + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || + connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + conn_type = DRM_ELD_CONN_TYPE_DP; + } else { + conn_type = DRM_ELD_CONN_TYPE_HDMI; + } + + connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] &= ~DRM_ELD_CONN_TYPE_MASK; + connector->eld[DRM_ELD_SAD_COUNT_CONN_TYPE] |= conn_type; +} + /** * intel_connector_update_modes - update connector from edid * @connector: DRM connector device to use @@ -44,6 +59,8 @@ int intel_connector_update_modes(struct drm_connector *connector, ret = drm_add_edid_modes(connector, edid); drm_edid_to_eld(connector, edid); + intel_connector_update_eld_conn_type(connector); + return ret; } -- cgit v1.2.3-70-g09d2 From bf5d10dcae3549b779490672c705c6ac79cf68a3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Sep 2017 17:21:54 +0100 Subject: drm/i915: remove redundant variable hw_check hw_check is being assigned and updated but is no longer being read, hence it is redundant and can be removed. Detected by clang scan-build: "warning: Value stored to 'hw_check' during its initialization is never read" Fixes: f6d1973db2d2 ("drm/i915: Move modeset state verifier calls") Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170914162154.11304-1-colin.king@canonical.com (cherry picked from commit 4babc5e27cfda59e2e257d28628b8d853aea5206) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 00cd17c76fdc..64f7b51ed97c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12359,7 +12359,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; - bool hw_check = intel_state->modeset; u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -12376,7 +12375,6 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (needs_modeset(new_crtc_state) || to_intel_crtc_state(new_crtc_state)->update_pipe) { - hw_check = true; put_domains[to_intel_crtc(crtc)->pipe] = modeset_get_crtc_power_domains(crtc, -- cgit v1.2.3-70-g09d2 From 2ba7d7e0437127314864238f8bfcb8369d81075c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 21 Sep 2017 17:19:20 +0300 Subject: drm/i915/bios: ignore HDMI on port A MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware state readout oopses after several warnings when trying to use HDMI on port A, if such a combination is configured in VBT. Filter the combo out already at the VBT parsing phase. v2: also ignore DVI (Ville) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102889 Cc: stable@vger.kernel.org Cc: Imre Deak Reviewed-by: Ville Syrjälä Tested-by: Daniel Drake Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170921141920.18172-1-jani.nikula@intel.com (cherry picked from commit d27ffc1d00327c29b3aa97f941b42f0949f9e99f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_bios.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 183e87e8ea31..00c6aee0a9a1 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1163,6 +1163,13 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = is_dvi && (child->common.device_type & DEVICE_TYPE_NOT_HDMI_OUTPUT) == 0; is_edp = is_dp && (child->common.device_type & DEVICE_TYPE_INTERNAL_CONNECTOR); + if (port == PORT_A && is_dvi) { + DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", + is_hdmi ? "/HDMI" : ""); + is_dvi = false; + is_hdmi = false; + } + info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; -- cgit v1.2.3-70-g09d2 From 36f6ee22d2d66046e369757ec6bbe1c482957ba6 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 26 Sep 2017 15:14:29 +0300 Subject: vti: fix use after free in vti_tunnel_xmit/vti6_tnl_xmit When running LTP IPsec tests, KASan might report: BUG: KASAN: use-after-free in vti_tunnel_xmit+0xeee/0xff0 [ip_vti] Read of size 4 at addr ffff880dc6ad1980 by task swapper/0/0 ... Call Trace: dump_stack+0x63/0x89 print_address_description+0x7c/0x290 kasan_report+0x28d/0x370 ? vti_tunnel_xmit+0xeee/0xff0 [ip_vti] __asan_report_load4_noabort+0x19/0x20 vti_tunnel_xmit+0xeee/0xff0 [ip_vti] ? vti_init_net+0x190/0x190 [ip_vti] ? save_stack_trace+0x1b/0x20 ? save_stack+0x46/0xd0 dev_hard_start_xmit+0x147/0x510 ? icmp_echo.part.24+0x1f0/0x210 __dev_queue_xmit+0x1394/0x1c60 ... Freed by task 0: save_stack_trace+0x1b/0x20 save_stack+0x46/0xd0 kasan_slab_free+0x70/0xc0 kmem_cache_free+0x81/0x1e0 kfree_skbmem+0xb1/0xe0 kfree_skb+0x75/0x170 kfree_skb_list+0x3e/0x60 __dev_queue_xmit+0x1298/0x1c60 dev_queue_xmit+0x10/0x20 neigh_resolve_output+0x3a8/0x740 ip_finish_output2+0x5c0/0xe70 ip_finish_output+0x4ba/0x680 ip_output+0x1c1/0x3a0 xfrm_output_resume+0xc65/0x13d0 xfrm_output+0x1e4/0x380 xfrm4_output_finish+0x5c/0x70 Can be fixed if we get skb->len before dst_output(). Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code") Fixes: 22e1b23dafa8 ("vti6: Support inter address family tunneling.") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 3 ++- net/ipv6/ip6_vti.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5ed63d250950..89453cf62158 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_parm *parms = &tunnel->parms; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ + int pkt_len = skb->len; int err; int mtu; @@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) - err = skb->len; + err = pkt_len; iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 79444a4bfd6d..bcdc2d557de1 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; struct xfrm_state *x; + int pkt_len = skb->len; int err = -1; int mtu; @@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += skb->len; + tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); } else { -- cgit v1.2.3-70-g09d2 From 62b982eeb4589b2e6d7c01a90590e3a4c2b2ca19 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 26 Sep 2017 16:16:43 +0200 Subject: l2tp: fix race condition in l2tp_tunnel_delete If we try to delete the same tunnel twice, the first delete operation does a lookup (l2tp_tunnel_get), finds the tunnel, calls l2tp_tunnel_delete, which queues it for deletion by l2tp_tunnel_del_work. The second delete operation also finds the tunnel and calls l2tp_tunnel_delete. If the workqueue has already fired and started running l2tp_tunnel_del_work, then l2tp_tunnel_delete will queue the same tunnel a second time, and try to free the socket again. Add a dead flag to prevent firing the workqueue twice. Then we can remove the check of queue_work's result that was meant to prevent that race but doesn't. Reproducer: ip l2tp add tunnel tunnel_id 3000 peer_tunnel_id 4000 local 192.168.0.2 remote 192.168.0.1 encap udp udp_sport 5000 udp_dport 6000 ip l2tp add session name l2tp1 tunnel_id 3000 session_id 1000 peer_session_id 2000 ip link set l2tp1 up ip l2tp del tunnel tunnel_id 3000 ip l2tp del tunnel tunnel_id 3000 Fixes: f8ccac0e4493 ("l2tp: put tunnel socket release on a workqueue") Reported-by: Jianlin Shi Signed-off-by: Sabrina Dubroca Acked-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 10 ++++------ net/l2tp/l2tp_core.h | 5 ++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d8c2a89a76e1..02d61101b108 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1688,14 +1688,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); /* This function is used by the netlink TUNNEL_DELETE command. */ -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - l2tp_tunnel_inc_refcount(tunnel); - if (false == queue_work(l2tp_wq, &tunnel->del_work)) { - l2tp_tunnel_dec_refcount(tunnel); - return 1; + if (!test_and_set_bit(0, &tunnel->dead)) { + l2tp_tunnel_inc_refcount(tunnel); + queue_work(l2tp_wq, &tunnel->del_work); } - return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 70a12df40a5f..67c79d9b5c6c 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -161,6 +161,9 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + + unsigned long dead; + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ bool acpt_newsess; /* Indicates whether this @@ -255,7 +258,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, -- cgit v1.2.3-70-g09d2 From 89fcbb564f4a64c439d597c2702f990eed49c8a1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 21 Sep 2017 19:01:36 -0600 Subject: netfilter: xt_socket: Restore mark from full sockets only An out of bounds error was detected on an ARM64 target with Android based kernel 4.9. This occurs while trying to restore mark on a skb from an inet request socket. BUG: KASAN: slab-out-of-bounds in socket_match.isra.2+0xc8/0x1f0 net/netfilter/xt_socket.c:248 Read of size 4 at addr ffffffc06a8d824c by task syz-fuzzer/1532 CPU: 7 PID: 1532 Comm: syz-fuzzer Tainted: G W O 4.9.41+ #1 Call trace: [] dump_backtrace+0x0/0x440 arch/arm64/kernel/traps.c:76 [] show_stack+0x28/0x38 arch/arm64/kernel/traps.c:226 [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0xe4/0x134 lib/dump_stack.c:51 [] print_address_description+0x68/0x258 mm/kasan/report.c:248 [] kasan_report_error mm/kasan/report.c:347 [inline] [] kasan_report.part.2+0x228/0x2f0 mm/kasan/report.c:371 [] kasan_report+0x5c/0x70 mm/kasan/report.c:372 [] check_memory_region_inline mm/kasan/kasan.c:308 [inline] [] __asan_load4+0x88/0xa0 mm/kasan/kasan.c:740 [] socket_match.isra.2+0xc8/0x1f0 net/netfilter/xt_socket.c:248 [] socket_mt4_v1_v2_v3+0x3c/0x48 net/netfilter/xt_socket.c:272 [] ipt_do_table+0x54c/0xad8 net/ipv4/netfilter/ip_tables.c:311 [] iptable_mangle_hook+0x6c/0x220 net/ipv4/netfilter/iptable_mangle.c:90 ... Allocated by task 1532: save_stack_trace_tsk+0x0/0x2a0 arch/arm64/kernel/stacktrace.c:131 save_stack_trace+0x28/0x38 arch/arm64/kernel/stacktrace.c:215 save_stack mm/kasan/kasan.c:495 [inline] set_track mm/kasan/kasan.c:507 [inline] kasan_kmalloc+0xd8/0x188 mm/kasan/kasan.c:599 kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:537 slab_post_alloc_hook mm/slab.h:417 [inline] slab_alloc_node mm/slub.c:2728 [inline] slab_alloc mm/slub.c:2736 [inline] kmem_cache_alloc+0x14c/0x2e8 mm/slub.c:2741 reqsk_alloc include/net/request_sock.h:87 [inline] inet_reqsk_alloc+0x4c/0x238 net/ipv4/tcp_input.c:6236 tcp_conn_request+0x2b0/0xea8 net/ipv4/tcp_input.c:6341 tcp_v4_conn_request+0xe0/0x100 net/ipv4/tcp_ipv4.c:1256 tcp_rcv_state_process+0x384/0x18a8 net/ipv4/tcp_input.c:5926 tcp_v4_do_rcv+0x2f0/0x3e0 net/ipv4/tcp_ipv4.c:1430 tcp_v4_rcv+0x1278/0x1350 net/ipv4/tcp_ipv4.c:1709 ip_local_deliver_finish+0x174/0x3e0 net/ipv4/ip_input.c:216 v1->v2: Change socket_mt6_v1_v2_v3() as well as mentioned by Eric v2->v3: Put the correct fixes tag Fixes: 01555e74bde5 ("netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e75ef39669c5..575d2153e3b8 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) @@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) -- cgit v1.2.3-70-g09d2 From 48596a8ddc46f96afb6a2cd72787cb15d6bb01fc Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 23 Sep 2017 23:37:40 +0200 Subject: netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses Wrong comparison prevented the hash types to add a range with more than 2^31 addresses but reported as a success. Fixes Netfilter's bugzilla id #1005, reported by Oleg Serditov and Oliver Ford. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ip.c | 22 ++++++++++++---------- net/netfilter/ipset/ip_set_hash_ipmark.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_net.c | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/ipset/ip_set_hash_netnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netport.c | 2 +- net/netfilter/ipset/ip_set_hash_netportnet.c | 4 ++-- 10 files changed, 24 insertions(+), 22 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 20bfbd315f61..613eb212cb48 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ip &= ip_set_hostmask(h->netmask); + e.ip = htonl(ip); + if (e.ip == 0) + return -IPSET_ERR_HASH_ELEM; - if (adt == IPSET_TEST) { - e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); - } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { @@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - if (retried) + if (retried) { ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip += hosts) { e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + } + for (; ip <= ip_to;) { ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; + ip += hosts; + e.ip = htonl(ip); + if (e.ip == 0) + return 0; + ret = 0; } return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index b64cf14e8352..f3ba8348cf9d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index f438740e6c6a..ddb8039ec1d2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6215fb898c50..a7f4d7a85420 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99a53c2..a2f19b9906e9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; @@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip == ntohl(h->next.ip) && p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &cidr); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5d9e895452e7..1c67a1761e45 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 44cf11939c91..d417074f1c1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index db614e13b193..7f9ae2e9645b 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ip2 = (retried && ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 54b64b6cd0cd..e6ef382febe4 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index aff846960ac4..8602f2595a1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) @@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = (retried && ip == ntohl(h->next.ip[0]) && p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); -- cgit v1.2.3-70-g09d2 From e23ed762db7ed1950a6408c3be80bc56909ab3d4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 11:57:54 +0200 Subject: netfilter: ipset: pernet ops must be unregistered last Removing the ipset module leaves a small window where one cpu performs module removal while another runs a command like 'ipset flush'. ipset uses net_generic(), unregistering the pernet ops frees this storage area. Fix it by first removing the user-visible api handlers and the pernet ops last. Fixes: 1785e8f473082 ("netfiler: ipset: Add net namespace for ipset") Reported-by: Li Shuang Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e495b5e484b1..a7f049ff3049 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2072,25 +2072,28 @@ static struct pernet_operations ip_set_net_ops = { static int __init ip_set_init(void) { - int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + int ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + return ret; + } + + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } + ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } - ret = register_pernet_subsys(&ip_set_net_ops); - if (ret) { - pr_err("ip_set: cannot register pernet_subsys.\n"); - nf_unregister_sockopt(&so_set); - nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - return ret; - } + pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2098,9 +2101,10 @@ ip_set_init(void) static void __exit ip_set_fini(void) { - unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + + unregister_pernet_subsys(&ip_set_net_ops); pr_debug("these are the famous last words\n"); } -- cgit v1.2.3-70-g09d2 From d85fc17beeb06f9979d63fe4d9fbffbb1a00bba4 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 25 Sep 2017 10:48:47 +0300 Subject: aquantia: Setup max_mtu in ndev to enable jumbo frames Although hardware is capable for almost 16K MTU, without max_mtu field correctly set it only allows standard MTU to be used. This patch enables max MTU, calculating it from hardware maximum frame size of 16352 octets (including FCS). Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu") Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 11 ++--------- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 6ac9e2602d6d..bf26a59a9d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -214,7 +214,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, SET_NETDEV_DEV(ndev, dev); ndev->if_port = port; - ndev->min_mtu = ETH_MIN_MTU; self->ndev = ndev; self->aq_pci_func = aq_pci_func; @@ -283,6 +282,7 @@ int aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->features = aq_hw_caps->hw_features; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; + self->ndev->max_mtu = self->aq_hw_caps.mtu - ETH_FCS_LEN - ETH_HLEN; return 0; } @@ -693,16 +693,9 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu) { - int err = 0; - - if (new_mtu > self->aq_hw_caps.mtu) { - err = -EINVAL; - goto err_exit; - } self->aq_nic_cfg.mtu = new_mtu; -err_exit: - return err; + return 0; } int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index f3957e930340..fcf89e25a773 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -16,7 +16,7 @@ #include "../aq_common.h" -#define HW_ATL_B0_MTU_JUMBO (16000U) +#define HW_ATL_B0_MTU_JUMBO 16352U #define HW_ATL_B0_MTU 1514U #define HW_ATL_B0_TX_RINGS 4U -- cgit v1.2.3-70-g09d2 From 3aec6412e007b294d4c135f5c7ed5e5ecf37dd2e Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 25 Sep 2017 10:48:48 +0300 Subject: aquantia: Fix Tx queue hangups Driver did a poor job in managing its Tx queues: Sometimes it could stop tx queues due to link down condition in aq_nic_xmit - but never waked up them. That led to Tx path total suspend. This patch fixes this and improves generic queue management: - introduces queue restart counter - uses generic netif_ interface to disable and enable tx path - refactors link up/down condition and introduces dmesg log event when link changes. - introduces new constant for minimum descriptors count required for queue wakeup Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 4 ++ drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 91 +++++++++++------------- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 - drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 26 +++++++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 4 ++ drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 8 +-- 6 files changed, 76 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 214986436ece..0fdaaa643073 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -51,6 +51,10 @@ #define AQ_CFG_SKB_FRAGS_MAX 32U +/* Number of descriptors available in one ring to resume this ring queue + */ +#define AQ_CFG_RESTART_DESC_THRES (AQ_CFG_SKB_FRAGS_MAX * 2) + #define AQ_CFG_NAPI_WEIGHT 64U #define AQ_CFG_MULTICAST_ADDRESS_MAX 32U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index bf26a59a9d8e..072a55029f04 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -119,6 +119,35 @@ int aq_nic_cfg_start(struct aq_nic_s *self) return 0; } +static int aq_nic_update_link_status(struct aq_nic_s *self) +{ + int err = self->aq_hw_ops.hw_get_link_status(self->aq_hw); + + if (err) + return err; + + if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) + pr_info("%s: link change old %d new %d\n", + AQ_CFG_DRV_NAME, self->link_status.mbps, + self->aq_hw->aq_link_status.mbps); + + self->link_status = self->aq_hw->aq_link_status; + if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) { + aq_utils_obj_set(&self->header.flags, + AQ_NIC_FLAG_STARTED); + aq_utils_obj_clear(&self->header.flags, + AQ_NIC_LINK_DOWN); + netif_carrier_on(self->ndev); + netif_tx_wake_all_queues(self->ndev); + } + if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) { + netif_carrier_off(self->ndev); + netif_tx_disable(self->ndev); + aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN); + } + return 0; +} + static void aq_nic_service_timer_cb(unsigned long param) { struct aq_nic_s *self = (struct aq_nic_s *)param; @@ -131,26 +160,13 @@ static void aq_nic_service_timer_cb(unsigned long param) if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY)) goto err_exit; - err = self->aq_hw_ops.hw_get_link_status(self->aq_hw); - if (err < 0) + err = aq_nic_update_link_status(self); + if (err) goto err_exit; - self->link_status = self->aq_hw->aq_link_status; - self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, self->aq_nic_cfg.is_interrupt_moderation); - if (self->link_status.mbps) { - aq_utils_obj_set(&self->header.flags, - AQ_NIC_FLAG_STARTED); - aq_utils_obj_clear(&self->header.flags, - AQ_NIC_LINK_DOWN); - netif_carrier_on(self->ndev); - } else { - netif_carrier_off(self->ndev); - aq_utils_obj_set(&self->header.flags, AQ_NIC_LINK_DOWN); - } - memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); for (i = AQ_DIMOF(self->aq_vec); i--;) { @@ -240,7 +256,6 @@ err_exit: int aq_nic_ndev_register(struct aq_nic_s *self) { int err = 0; - unsigned int i = 0U; if (!self->ndev) { err = -EINVAL; @@ -262,8 +277,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self) netif_carrier_off(self->ndev); - for (i = AQ_CFG_VECS_MAX; i--;) - aq_nic_ndev_queue_stop(self, i); + netif_tx_disable(self->ndev); err = register_netdev(self->ndev); if (err < 0) @@ -318,12 +332,8 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev) err = -EINVAL; goto err_exit; } - if (netif_running(ndev)) { - unsigned int i; - - for (i = AQ_CFG_VECS_MAX; i--;) - netif_stop_subqueue(ndev, i); - } + if (netif_running(ndev)) + netif_tx_disable(ndev); for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs; self->aq_vecs++) { @@ -383,16 +393,6 @@ err_exit: return err; } -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx) -{ - netif_start_subqueue(self->ndev, idx); -} - -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx) -{ - netif_stop_subqueue(self->ndev, idx); -} - int aq_nic_start(struct aq_nic_s *self) { struct aq_vec_s *aq_vec = NULL; @@ -451,10 +451,6 @@ int aq_nic_start(struct aq_nic_s *self) goto err_exit; } - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) - aq_nic_ndev_queue_start(self, i); - err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs); if (err < 0) goto err_exit; @@ -463,6 +459,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; + netif_tx_start_all_queues(self->ndev); + err_exit: return err; } @@ -602,7 +600,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs; unsigned int tc = 0U; int err = NETDEV_TX_OK; - bool is_nic_in_bad_state; frags = skb_shinfo(skb)->nr_frags + 1; @@ -613,13 +610,10 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) goto err_exit; } - is_nic_in_bad_state = aq_utils_obj_test(&self->header.flags, - AQ_NIC_FLAGS_IS_NOT_TX_READY) || - (aq_ring_avail_dx(ring) < - AQ_CFG_SKB_FRAGS_MAX); + aq_ring_update_queue_state(ring); - if (is_nic_in_bad_state) { - aq_nic_ndev_queue_stop(self, ring->idx); + /* Above status update may stop the queue. Check this. */ + if (__netif_subqueue_stopped(self->ndev, ring->idx)) { err = NETDEV_TX_BUSY; goto err_exit; } @@ -631,9 +625,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) ring, frags); if (err >= 0) { - if (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX + 1) - aq_nic_ndev_queue_stop(self, ring->idx); - ++ring->stats.tx.packets; ring->stats.tx.bytes += skb->len; } @@ -898,9 +889,7 @@ int aq_nic_stop(struct aq_nic_s *self) struct aq_vec_s *aq_vec = NULL; unsigned int i = 0U; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) - aq_nic_ndev_queue_stop(self, i); + netif_tx_disable(self->ndev); del_timer_sync(&self->service_timer); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 7fc2a5ecb2b7..0ddd556ff901 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -83,8 +83,6 @@ struct net_device *aq_nic_get_ndev(struct aq_nic_s *self); int aq_nic_init(struct aq_nic_s *self); int aq_nic_cfg_start(struct aq_nic_s *self); int aq_nic_ndev_register(struct aq_nic_s *self); -void aq_nic_ndev_queue_start(struct aq_nic_s *self, unsigned int idx); -void aq_nic_ndev_queue_stop(struct aq_nic_s *self, unsigned int idx); void aq_nic_ndev_free(struct aq_nic_s *self); int aq_nic_start(struct aq_nic_s *self); int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4eee1996a825..02f79b0640ba 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -104,6 +104,32 @@ int aq_ring_init(struct aq_ring_s *self) return 0; } +void aq_ring_update_queue_state(struct aq_ring_s *ring) +{ + if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX) + aq_ring_queue_stop(ring); + else if (aq_ring_avail_dx(ring) > AQ_CFG_RESTART_DESC_THRES) + aq_ring_queue_wake(ring); +} + +void aq_ring_queue_wake(struct aq_ring_s *ring) +{ + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic); + + if (__netif_subqueue_stopped(ndev, ring->idx)) { + netif_wake_subqueue(ndev, ring->idx); + ring->stats.tx.queue_restarts++; + } +} + +void aq_ring_queue_stop(struct aq_ring_s *ring) +{ + struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic); + + if (!__netif_subqueue_stopped(ndev, ring->idx)) + netif_stop_subqueue(ndev, ring->idx); +} + void aq_ring_tx_clean(struct aq_ring_s *self) { struct device *dev = aq_nic_get_dev(self->aq_nic); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 782176c5f4f8..24523b5ac68c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -94,6 +94,7 @@ struct aq_ring_stats_tx_s { u64 errors; u64 packets; u64 bytes; + u64 queue_restarts; }; union aq_ring_stats_s { @@ -147,6 +148,9 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, int aq_ring_init(struct aq_ring_s *self); void aq_ring_rx_deinit(struct aq_ring_s *self); void aq_ring_free(struct aq_ring_s *self); +void aq_ring_update_queue_state(struct aq_ring_s *ring); +void aq_ring_queue_wake(struct aq_ring_s *ring); +void aq_ring_queue_stop(struct aq_ring_s *ring); void aq_ring_tx_clean(struct aq_ring_s *self); int aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index ebf588004c46..305ff8ffac2c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -59,12 +59,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget) if (ring[AQ_VEC_TX_ID].sw_head != ring[AQ_VEC_TX_ID].hw_head) { aq_ring_tx_clean(&ring[AQ_VEC_TX_ID]); - - if (aq_ring_avail_dx(&ring[AQ_VEC_TX_ID]) > - AQ_CFG_SKB_FRAGS_MAX) { - aq_nic_ndev_queue_start(self->aq_nic, - ring[AQ_VEC_TX_ID].idx); - } + aq_ring_update_queue_state(&ring[AQ_VEC_TX_ID]); was_tx_cleaned = true; } @@ -364,6 +359,7 @@ void aq_vec_add_stats(struct aq_vec_s *self, stats_tx->packets += tx->packets; stats_tx->bytes += tx->bytes; stats_tx->errors += tx->errors; + stats_tx->queue_restarts += tx->queue_restarts; } } -- cgit v1.2.3-70-g09d2 From a7bb1bea3a296549ebfc28afa76276ef392f9afa Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 25 Sep 2017 10:48:49 +0300 Subject: aquantia: Fix transient invalid link down/up indications Due to a bug in aquantia atlantic card firmware, it sometimes reports invalid link speed bits. That caused driver to report link down events, although link itself is totally fine. This patch ignores such out of blue readings. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 4f5ec9a0fbfb..bf734b32e44b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -351,8 +351,7 @@ int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self) break; default: - link_status->mbps = 0U; - break; + return -EBUSY; } } -- cgit v1.2.3-70-g09d2 From c7545689244b50c562b1fbbc71905fba224c8a05 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Mon, 25 Sep 2017 10:48:50 +0300 Subject: atlantic: fix iommu errors Call skb_frag_dma_map multiple times if tx length is greater than device max and avoid processing tx ring until entire packet has been sent. Signed-off-by: Igor Russkikh Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 43 ++++++++++++++---------- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 27 ++++++++++----- drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 6 ++-- 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 072a55029f04..0a5bb4114eb4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -473,6 +473,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, unsigned int nr_frags = skb_shinfo(skb)->nr_frags; unsigned int frag_count = 0U; unsigned int dx = ring->sw_tail; + struct aq_ring_buff_s *first = NULL; struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx]; if (unlikely(skb_is_gso(skb))) { @@ -483,6 +484,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->len_l4 = tcp_hdrlen(skb); dx_buff->mss = skb_shinfo(skb)->gso_size; dx_buff->is_txc = 1U; + dx_buff->eop_index = 0xffffU; dx_buff->is_ipv6 = (ip_hdr(skb)->version == 6) ? 1U : 0U; @@ -502,6 +504,7 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) goto exit; + first = dx_buff; dx_buff->len_pkt = skb->len; dx_buff->is_sop = 1U; dx_buff->is_mapped = 1U; @@ -530,40 +533,46 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, for (; nr_frags--; ++frag_count) { unsigned int frag_len = 0U; + unsigned int buff_offset = 0U; + unsigned int buff_size = 0U; dma_addr_t frag_pa; skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_count]; frag_len = skb_frag_size(frag); - frag_pa = skb_frag_dma_map(aq_nic_get_dev(self), frag, 0, - frag_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), frag_pa))) - goto mapping_error; + while (frag_len) { + if (frag_len > AQ_CFG_TX_FRAME_MAX) + buff_size = AQ_CFG_TX_FRAME_MAX; + else + buff_size = frag_len; + + frag_pa = skb_frag_dma_map(aq_nic_get_dev(self), + frag, + buff_offset, + buff_size, + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), + frag_pa))) + goto mapping_error; - while (frag_len > AQ_CFG_TX_FRAME_MAX) { dx = aq_ring_next_dx(ring, dx); dx_buff = &ring->buff_ring[dx]; dx_buff->flags = 0U; - dx_buff->len = AQ_CFG_TX_FRAME_MAX; + dx_buff->len = buff_size; dx_buff->pa = frag_pa; dx_buff->is_mapped = 1U; + dx_buff->eop_index = 0xffffU; + + frag_len -= buff_size; + buff_offset += buff_size; - frag_len -= AQ_CFG_TX_FRAME_MAX; - frag_pa += AQ_CFG_TX_FRAME_MAX; ++ret; } - - dx = aq_ring_next_dx(ring, dx); - dx_buff = &ring->buff_ring[dx]; - - dx_buff->flags = 0U; - dx_buff->len = frag_len; - dx_buff->pa = frag_pa; - dx_buff->is_mapped = 1U; - ++ret; } + first->eop_index = dx; dx_buff->is_eop = 1U; dx_buff->skb = skb; goto exit; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 02f79b0640ba..0654e0c76bc2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -104,6 +104,12 @@ int aq_ring_init(struct aq_ring_s *self) return 0; } +static inline bool aq_ring_dx_in_range(unsigned int h, unsigned int i, + unsigned int t) +{ + return (h < t) ? ((h < i) && (i < t)) : ((h < i) || (i < t)); +} + void aq_ring_update_queue_state(struct aq_ring_s *ring) { if (aq_ring_avail_dx(ring) <= AQ_CFG_SKB_FRAGS_MAX) @@ -139,23 +145,28 @@ void aq_ring_tx_clean(struct aq_ring_s *self) struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; if (likely(buff->is_mapped)) { - if (unlikely(buff->is_sop)) + if (unlikely(buff->is_sop)) { + if (!buff->is_eop && + buff->eop_index != 0xffffU && + (!aq_ring_dx_in_range(self->sw_head, + buff->eop_index, + self->hw_head))) + break; + dma_unmap_single(dev, buff->pa, buff->len, DMA_TO_DEVICE); - else + } else { dma_unmap_page(dev, buff->pa, buff->len, DMA_TO_DEVICE); + } } if (unlikely(buff->is_eop)) dev_kfree_skb_any(buff->skb); - } -} -static inline unsigned int aq_ring_dx_in_range(unsigned int h, unsigned int i, - unsigned int t) -{ - return (h < t) ? ((h < i) && (i < t)) : ((h < i) || (i < t)); + buff->pa = 0U; + buff->eop_index = 0xffffU; + } } #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 24523b5ac68c..5844078764bd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -65,7 +65,7 @@ struct __packed aq_ring_buff_s { }; union { struct { - u32 len:16; + u16 len; u32 is_ip_cso:1; u32 is_udp_cso:1; u32 is_tcp_cso:1; @@ -77,8 +77,10 @@ struct __packed aq_ring_buff_s { u32 is_cleaned:1; u32 is_error:1; u32 rsvd3:6; + u16 eop_index; + u16 rsvd4; }; - u32 flags; + u64 flags; }; }; -- cgit v1.2.3-70-g09d2 From c2cc187e53011c1c4931055984657da9085c763b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 Sep 2017 13:19:26 +0300 Subject: sctp: Fix a big endian bug in sctp_diag_dump() The sctp_for_each_transport() function takes an pointer to int. The cb->args[] array holds longs so it's only using the high 32 bits. It works on little endian system but will break on big endian 64 bit machines. Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Dan Carpenter Acked-by: Neil Horman Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sctp_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 22ed01a76b19..a72a7d925d46 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -463,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, .r = r, .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; + int pos = cb->args[2]; /* eps hashtable dumps * args: @@ -493,7 +494,8 @@ skip: goto done; sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, (int *)&cb->args[2], &commp); + net, &pos, &commp); + cb->args[2] = pos; done: cb->args[1] = cb->args[4]; -- cgit v1.2.3-70-g09d2 From 83b31c2a5fdd4fb3a4ec84c59a962e816d0bc9de Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Tue, 26 Sep 2017 15:51:28 +0200 Subject: pinctrl/amd: Fix build dependency on pinmux code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 79d2c8bede2c93f943 ("pinctrl/amd: save pin registers over suspend/resume") caused the following compilation errors: drivers/pinctrl/pinctrl-amd.c: In function ‘amd_gpio_should_save’: drivers/pinctrl/pinctrl-amd.c:741:8: error: ‘const struct pin_desc’ has no member named ‘mux_owner’ if (pd->mux_owner || pd->gpio_owner || ^ drivers/pinctrl/pinctrl-amd.c:741:25: error: ‘const struct pin_desc’ has no member named ‘gpio_owner’ if (pd->mux_owner || pd->gpio_owner || We need to enable CONFIG_PINMUX for this driver as well. Cc: stable@vger.kernel.org Fixes: 79d2c8bede2c93f943 ("pinctrl/amd: save pin registers over suspend/resume") Signed-off-by: Petr Mladek Signed-off-by: Linus Walleij --- drivers/pinctrl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index 1778cf4f81c7..82cd8b08d71f 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -100,6 +100,7 @@ config PINCTRL_AMD tristate "AMD GPIO pin control" depends on GPIOLIB select GPIOLIB_IRQCHIP + select PINMUX select PINCONF select GENERIC_PINCONF help -- cgit v1.2.3-70-g09d2 From a5f3d8a5eaaf917878f07998e6f1ea46024e6bab Mon Sep 17 00:00:00 2001 From: Coly Li Date: Tue, 26 Sep 2017 17:54:12 +0800 Subject: bcache: use llist_for_each_entry_safe() in __closure_wake_up() Commit 09b3efec ("bcache: Don't reinvent the wheel but use existing llist API") replaces the following while loop by llist_for_each_entry(), - - while (reverse) { - cl = container_of(reverse, struct closure, list); - reverse = llist_next(reverse); - + llist_for_each_entry(cl, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } This modification introduces a potential race by iterating a corrupted list. Here is how it happens. In the above modification, closure_sub() may wake up a process which is waiting on reverse list. If this process decides to wait again by calling closure_wait(), its cl->list will be added to another wait list. Then when llist_for_each_entry() continues to iterate next node, it will travel on another new wait list which is added in closure_wait(), not the original reverse list in __closure_wake_up(). It is more probably to happen on UP machine because the waked up process may preempt the process which wakes up it. Use llist_for_each_entry_safe() will fix the issue, the safe version fetch next node before waking up a process. Then the copy of next node will make sure list iteration stays on original reverse list. Fixes: 09b3efec81de ("bcache: Don't reinvent the wheel but use existing llist API") Signed-off-by: Coly Li Reported-by: Michael Lyle Reviewed-by: Byungchul Park Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 7d5286b05036..1841d0359bac 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(closure_put); void __closure_wake_up(struct closure_waitlist *wait_list) { struct llist_node *list; - struct closure *cl; + struct closure *cl, *t; struct llist_node *reverse = NULL; list = llist_del_all(&wait_list->list); @@ -73,7 +73,7 @@ void __closure_wake_up(struct closure_waitlist *wait_list) reverse = llist_reverse_order(list); /* Then do the wakeups */ - llist_for_each_entry(cl, reverse, list) { + llist_for_each_entry_safe(cl, t, reverse, list) { closure_set_waiting(cl, 0); closure_sub(cl, CLOSURE_WAITING + 1); } -- cgit v1.2.3-70-g09d2 From 2e08d20d777e997bf37806b22b471f98fbe6b693 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 27 Sep 2017 16:34:59 -0500 Subject: percpu: fix starting offset for chunk statistics traversal This patch fixes the starting offset used when scanning chunks to compute the chunk statistics. The value start_offset (and end_offset) are managed in bytes while the traversal occurs over bits. Thus for the reserved and dynamic chunk, it may incorrectly skip over the initial allocations. Signed-off-by: Dennis Zhou Signed-off-by: Tejun Heo --- mm/percpu-stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index 6142484e88f7..7a58460bfd27 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -73,7 +73,7 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, last_alloc + 1 : 0; as_len = 0; - start = chunk->start_offset; + start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE; /* * If a bit is set in the allocation map, the bound_map identifies -- cgit v1.2.3-70-g09d2 From 38e8a5c040d3ec99a8351c688dcdf0f549611565 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 21 Aug 2017 12:04:50 +0300 Subject: net/mlx5e: IPoIB, Fix access to invalid memory address When cleaning rdma netdevice we need to save the mdev pointer because priv is released when we release netdev. This bug was found using the kernel address sanitizer (KASAN). use-after-free in mlx5_rdma_netdev_free+0xe3/0x100 [mlx5_core] Fixes: 48935bbb7ae8 ("net/mlx5e: IPoIB, Add netdevice profile skeleton") Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 85298051a3e4..145e392ab849 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -572,12 +572,13 @@ void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); const struct mlx5e_profile *profile = priv->profile; + struct mlx5_core_dev *mdev = priv->mdev; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); free_netdev(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + mlx5e_destroy_mdev_resources(mdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); -- cgit v1.2.3-70-g09d2 From 99d3cd27f755d63fd6cf85169eaa873d90769aa5 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 24 Aug 2017 17:21:44 +0300 Subject: net/mlx5: Fix FPGA capability location Currently, FPGA capability is located in (mdev)->caps.hca_cur, change the location to be (mdev)->caps.fpga, since hca_cur is reserved for HCA device capabilities. Fixes: e29341fb3a5b ("net/mlx5: FPGA, Add basic support for Innova") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c | 3 +-- include/linux/mlx5/device.h | 5 ++--- include/linux/mlx5/driver.h | 1 + 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c index e37453d838db..c0fd2212e890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c @@ -71,11 +71,11 @@ int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, return 0; } -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps) +int mlx5_fpga_caps(struct mlx5_core_dev *dev) { u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0}; - return mlx5_core_access_reg(dev, in, sizeof(in), caps, + return mlx5_core_access_reg(dev, in, sizeof(in), dev->caps.fpga, MLX5_ST_SZ_BYTES(fpga_cap), MLX5_REG_FPGA_CAP, 0, 0); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h index 94bdfd47c3f0..d05233c9b4f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h @@ -65,7 +65,7 @@ struct mlx5_fpga_qp_counters { u64 rx_total_drop; }; -int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps); +int mlx5_fpga_caps(struct mlx5_core_dev *dev); int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query); int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op); int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 9034e9960a76..dc8970346521 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -139,8 +139,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; - err = mlx5_fpga_caps(fdev->mdev, - fdev->mdev->caps.hca_cur[MLX5_CAP_FPGA]); + err = mlx5_fpga_caps(fdev->mdev); if (err) goto out; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index eaf4ad209c8f..e32dbc4934db 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -980,7 +980,6 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, - MLX5_CAP_FPGA, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1110,10 +1109,10 @@ enum mlx5_mcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_FPGA(mdev, cap) \ - MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP64_FPGA(mdev, cap) \ - MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) enum { MLX5_CMD_STAT_OK = 0x0, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 02ff700e4f30..401c8972cc3a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -774,6 +774,7 @@ struct mlx5_core_dev { u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; + u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; -- cgit v1.2.3-70-g09d2 From 16f1c5bb3ed75b3cf3ced537db40f7e1a244debe Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Sun, 30 Jul 2017 11:02:51 +0300 Subject: net/mlx5: Check device capability for maximum flow counters Added check for the maximal number of flow counters attached to rule (FTE). Fixes: bd5251dbf156b ('net/mlx5_core: Introduce flow steering destination of type counter') Signed-off-by: Raed Salem Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 8 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 11 +++++++++++ include/linux/mlx5/mlx5_ifc.h | 3 ++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e0d0efd903bc..36ecc2b2e187 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -293,6 +293,9 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev, + log_max_flow_counter, + ft->type)); int list_size = 0; list_for_each_entry(dst, &fte->node.children, node.list) { @@ -305,12 +308,17 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } + if (list_size > max_list_size) { + err = -EINVAL; + goto err_out; + } MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, list_size); } err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +err_out: kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5509a752f98e..48dd78975062 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -52,6 +52,7 @@ enum fs_flow_table_type { FS_FT_FDB = 0X4, FS_FT_SNIFFER_RX = 0X5, FS_FT_SNIFFER_TX = 0X6, + FS_FT_MAX_TYPE = FS_FT_SNIFFER_TX, }; enum fs_flow_table_op_mod { @@ -260,4 +261,14 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev); #define fs_for_each_dst(pos, fte) \ fs_list_for_each_entry(pos, &(fte)->node.children) +#define MLX5_CAP_FLOWTABLE_TYPE(mdev, cap, type) ( \ + (type == FS_FT_NIC_RX) ? MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) : \ + (type == FS_FT_ESW_EGRESS_ACL) ? MLX5_CAP_ESW_EGRESS_ACL(mdev, cap) : \ + (type == FS_FT_ESW_INGRESS_ACL) ? MLX5_CAP_ESW_INGRESS_ACL(mdev, cap) : \ + (type == FS_FT_FDB) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ + (type == FS_FT_SNIFFER_RX) ? MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) : \ + (type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_SNIFFER_TX != FS_FT_MAX_TYPE))\ + ) + #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a528b35a022e..69772347f866 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -327,7 +327,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_80[0x18]; u8 log_max_destination[0x8]; - u8 reserved_at_a0[0x18]; + u8 log_max_flow_counter[0x8]; + u8 reserved_at_a8[0x10]; u8 log_max_flow[0x8]; u8 reserved_at_c0[0x40]; -- cgit v1.2.3-70-g09d2 From ace743214ea205c7d433562c5fa24e33bdfda7ab Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 5 Sep 2017 15:05:51 +0300 Subject: net/mlx5e: Fix erroneous freeing of encap header buffer In case the neighbour for the tunnel destination isn't valid, we send a neighbour update request but we free the encap header buffer. This is wrong, because we still need it for allocating a HW encap entry once the neighbour is available. Fix that by skipping freeing it if we wait for neighbour. Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index da503e6411da..4e2fc016bdd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1564,7 +1564,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; @@ -1573,7 +1573,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1590,7 +1590,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1630,8 +1630,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; @@ -1668,7 +1669,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto free_encap; } fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -1678,7 +1679,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) - goto out; + goto free_encap; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -1695,7 +1696,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, */ err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); if (err) - goto out; + goto free_encap; read_lock_bh(&n->lock); nud_state = n->nud_state; @@ -1736,8 +1737,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, destroy_neigh_entry: mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -out: +free_encap: kfree(encap_header); +out: if (n) neigh_release(n); return err; -- cgit v1.2.3-70-g09d2 From bdd66ac0aeed971d1cb42b3aa0d11b0ea3842e09 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Jun 2017 21:13:25 +0300 Subject: net/mlx5e: Disallow TC offloading of unsupported match/action combinations When offloading header re-write, the HW may need to adjust checksums along the packet. For IP traffic, and a case where we are asked to modify fields in the IP header, current HW supports that only for TCP and UDP. Enforce it, in this case fail the offloading attempt for non TCP/UDP packets. Fixes: d7e75a325cb2 ('net/mlx5e: Add offloading of E-Switch TC pedit (header re-write) actions') Fixes: 2f4fe4cab073 ('net/mlx5e: Add offloading of NIC TC pedit (header re-write) actions') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 70 +++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4e2fc016bdd6..d3786005fba7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1317,6 +1317,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda return true; } +static bool modify_header_match_supported(struct mlx5_flow_spec *spec, + struct tcf_exts *exts) +{ + const struct tc_action *a; + bool modify_ip_header; + LIST_HEAD(actions); + u8 htype, ip_proto; + void *headers_v; + u16 ethertype; + int nkeys, i; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + + /* for non-IP we only re-write MACs, so we're okay */ + if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6) + goto out_ok; + + modify_ip_header = false; + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!is_tcf_pedit(a)) + continue; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || + htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { + modify_ip_header = true; + break; + } + } + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); + if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } + +out_ok: + return true; +} + +static bool actions_match_supported(struct mlx5e_priv *priv, + struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + u32 actions; + + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + actions = flow->esw_attr->action; + else + actions = flow->nic_attr->action; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + return modify_header_match_supported(&parse_attr->spec, exts); + + return true; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) @@ -1378,6 +1441,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return 0; } @@ -1936,6 +2002,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + + if (!actions_match_supported(priv, exts, parse_attr, flow)) + return -EOPNOTSUPP; + return err; } -- cgit v1.2.3-70-g09d2 From b281208911a549e391d92ee6cb680dcd3d71783b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 8 Aug 2017 11:45:28 +0300 Subject: net/mlx5e: Check encap entry state when offloading tunneled flows Encap entries cached by the driver could be invalidated due to tunnel destination neighbour state changes. When attempting to offload a flow that uses a cached encap entry, we must check the entry validity and defer the offloading if the entry exists but not valid. When EAGAIN is returned, the flow offloading to hardware takes place by the neigh update code when the tunnel destination neighbour becomes connected. Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Signed-off-by: Vlad Buslov Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d3786005fba7..1aa2028ed995 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1859,6 +1859,7 @@ vxlan_encap_offload_err: } } + /* must verify if encap is valid or not */ if (found) goto attach_flow; @@ -1885,6 +1886,8 @@ attach_flow: *encap_dev = e->out_dev; if (e->flags & MLX5_ENCAP_ENTRY_VALID) attr->encap_id = e->encap_id; + else + err = -EAGAIN; return err; -- cgit v1.2.3-70-g09d2 From b20eab15a1d5091e45022401e75b49948e8be33f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 Sep 2017 17:51:12 +0300 Subject: net/mlx5e: Print netdev features correctly in error message Use the correct formatting for netdev features. Fixes: 0e405443e803 ("net/mlx5e: Improve set features ndo resiliency") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dfc29720ab77..84b013dc62e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3333,8 +3333,8 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { - netdev_err(netdev, "%s feature 0x%llx failed err %d\n", - enable ? "Enable" : "Disable", feature, err); + netdev_err(netdev, "%s feature %pNF failed, err %d\n", + enable ? "Enable" : "Disable", &feature, err); return err; } -- cgit v1.2.3-70-g09d2 From 1456f69ff5fbba48ed5bc86e858e945e693ba0b7 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 10:36:06 +0300 Subject: net/mlx5e: Don't add/remove 802.1ad rules when changing 802.1Q VLAN filter Toggling of C-tag VLAN filter should not affect the "any S-tag" steering rule. Fixes: 8a271746a264 ("net/mlx5e: Receive s-tagged packets in promiscuous mode") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f11fd07ac4dd..850cdc980ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -291,7 +291,7 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_del_any_vid_rules(priv); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) @@ -302,7 +302,7 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_add_any_vid_rules(priv); + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, -- cgit v1.2.3-70-g09d2 From 603e1f5bd3ca76f16688e10040545594d2e91ba4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 13 Sep 2017 15:37:50 +0300 Subject: net/mlx5e: Fix calculated checksum offloads counters Instead of calculating the offloads counters, count them explicitly. The calculations done for these counters would result in bugs in some cases, for example: When running TCP traffic over a VXLAN tunnel with TSO enabled the following counters would increase: tx_csum_partial: 1,333,284 tx_csum_partial_inner: 29,286 tx4_csum_partial_inner: 384 tx7_csum_partial_inner: 8 tx9_csum_partial_inner: 34 tx10_csum_partial_inner: 26,807 tx11_csum_partial_inner: 287 tx12_csum_partial_inner: 27 tx16_csum_partial_inner: 6 tx25_csum_partial_inner: 1,733 Seems like tx_csum_partial increased out of nowhere. The issue is in the following calculation in mlx5e_update_sw_counters: s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; While tx_packets increases by the number of GSO segments for each SKB, tx_csum_partial_inner will only increase by one, resulting in wrong tx_csum_partial counter. Fixes: bfe6d8d1d433 ("net/mlx5e: Reorganize ethtool statistics") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++------ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 1 + 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 84b013dc62e9..cc11bbbd0309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -184,7 +184,6 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) struct mlx5e_sw_stats temp, *s = &temp; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u64 tx_offload_none = 0; int i, j; memset(s, 0, sizeof(*s)); @@ -199,6 +198,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; s->rx_xdp_tx += rq_stats->xdp_tx; @@ -229,14 +229,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_queue_dropped += sq_stats->dropped; s->tx_xmit_more += sq_stats->xmit_more; s->tx_csum_partial_inner += sq_stats->csum_partial_inner; - tx_offload_none += sq_stats->csum_none; + s->tx_csum_none += sq_stats->csum_none; + s->tx_csum_partial += sq_stats->csum_partial; } } - /* Update calculated offload counters */ - s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; - s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; - s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f1dd638384d3..15a1687483cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -627,6 +627,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; + rq->stats.csum_unnecessary++; return; } @@ -644,7 +645,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum_level = 1; skb->encapsulation = 1; rq->stats.csum_unnecessary_inner++; + return; } + rq->stats.csum_unnecessary++; return; } csum_none: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6d199ffb1c0b..f8637213afc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -68,6 +68,7 @@ struct mlx5e_sw_stats { u64 rx_xdp_drop; u64 rx_xdp_tx; u64 rx_xdp_tx_full; + u64 tx_csum_none; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -108,6 +109,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -339,6 +341,7 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; u64 lro_packets; @@ -363,6 +366,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, @@ -392,6 +396,7 @@ struct mlx5e_sq_stats { u64 tso_bytes; u64 tso_inner_packets; u64 tso_inner_bytes; + u64 csum_partial; u64 csum_partial_inner; u64 nop; /* less likely accessed in data path */ @@ -408,6 +413,7 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index fee43e40fa16..1d6925d4369a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -193,6 +193,7 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; + sq->stats.csum_partial++; } } else sq->stats.csum_none++; -- cgit v1.2.3-70-g09d2 From 480df991b869eff02a004e8fe7707900437cfcd4 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 31 Aug 2017 18:52:14 +0300 Subject: net/mlx5: Fix static checker warning on steering tracepoints code Fix this sparse complaint: drivers/net/ethernet/mellanox/mlx5/core/./diag/fs_tracepoint.h:172:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) Fixes: d9fea79171ee ('net/mlx5: Add tracepoints') Signed-off-by: Matan Barak Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 1e3a6c3e4132..80eef4163f52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -139,7 +139,7 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} TRACE_EVENT(mlx5_fs_set_fte, - TP_PROTO(const struct fs_fte *fte, bool new_fte), + TP_PROTO(const struct fs_fte *fte, int new_fte), TP_ARGS(fte, new_fte), TP_STRUCT__entry( __field(const struct fs_fte *, fte) @@ -149,7 +149,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, action) __field(u32, flow_tag) __field(u8, mask_enable) - __field(bool, new_fte) + __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) -- cgit v1.2.3-70-g09d2 From 353f59f4d41e9c5798a15c5c52958f25b579a3d5 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 24 Sep 2017 09:54:00 +0200 Subject: net/mlx5: Fix wrong indentation in enable SRIOV code Smatch is screaming: drivers/net/ethernet/mellanox/mlx5/core/sriov.c:112 mlx5_device_enable_sriov() warn: inconsistent indenting fix that. Fixes: 7ecf6d8ff154 ('IB/mlx5: Restore IB guid/policy for virtual functions') Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 6c48e9959b65..2a8b529ce6dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -109,7 +109,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", vf, err); - continue; + continue; } } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); -- cgit v1.2.3-70-g09d2 From dd269db84908d4d3f7c0efed85bf9d8939fb0b9b Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 27 Sep 2017 14:25:37 +0200 Subject: xfrm: don't call xfrm_policy_cache_flush under xfrm_state_lock I might be wrong but it doesn't look like xfrm_state_lock is required for xfrm_policy_cache_flush and calling it under this lock triggers both "sleeping function called from invalid context" and "possible circular locking dependency detected" warnings on flush. Fixes: ec30d78c14a8 xfrm: add xdst pcpu cache Signed-off-by: Artem Savkov Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0dab1cd79ce4..12213477cd3a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -732,12 +732,12 @@ restart: } } } +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (cnt) { err = 0; xfrm_policy_cache_flush(); } -out: - spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); -- cgit v1.2.3-70-g09d2 From d9ec46416de5ef83220f1c7010ee0f5d1be1d753 Mon Sep 17 00:00:00 2001 From: Sylvain Lesne Date: Mon, 18 Sep 2017 13:08:00 +0200 Subject: dmaengine: altera: fix response FIFO emptying Commit 6084fc2ec478 ("dmaengine: altera: Use macros instead of structs to describe the registers") introduced a minus sign before a register offset. This leads to soft-locks of the DMA controller, since reading the last status byte is required to pop the response from the FIFO. Failing to do so will lead to a full FIFO, which means that the DMA controller will stop processing descriptors. Signed-off-by: Sylvain Lesne Reviewed-by: Stefan Roese Signed-off-by: Vinod Koul --- drivers/dma/altera-msgdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 32905d5606ac..35cbf2365f68 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -698,7 +698,7 @@ static void msgdma_tasklet(unsigned long data) * bits. So we need to just drop these values. */ size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED); - status = ioread32(mdev->resp - MSGDMA_RESP_STATUS); + status = ioread32(mdev->resp + MSGDMA_RESP_STATUS); msgdma_complete_descriptor(mdev); msgdma_chan_desc_cleanup(mdev); -- cgit v1.2.3-70-g09d2 From edf10919e5fc8dfd10e57ed72f651204559bc6ba Mon Sep 17 00:00:00 2001 From: Sylvain Lesne Date: Mon, 18 Sep 2017 13:08:01 +0200 Subject: dmaengine: altera: fix spinlock usage Since this lock is acquired in both process and IRQ context, failing to to disable IRQs when trying to acquire the lock in process context can lead to deadlocks. Signed-off-by: Sylvain Lesne Reviewed-by: Stefan Roese Signed-off-by: Vinod Koul --- drivers/dma/altera-msgdma.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 35cbf2365f68..339186f25a2a 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -212,11 +212,12 @@ struct msgdma_device { static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev) { struct msgdma_sw_desc *desc; + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node); list_del(&desc->node); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); INIT_LIST_HEAD(&desc->tx_list); @@ -306,13 +307,14 @@ static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx) struct msgdma_device *mdev = to_mdev(tx->chan); struct msgdma_sw_desc *new; dma_cookie_t cookie; + unsigned long flags; new = tx_to_desc(tx); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); cookie = dma_cookie_assign(tx); list_add_tail(&new->node, &mdev->pending_list); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); return cookie; } @@ -336,17 +338,18 @@ msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, struct msgdma_extended_desc *desc; size_t copy; u32 desc_cnt; + unsigned long irqflags; desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); do { /* Allocate and populate the descriptor */ @@ -397,18 +400,19 @@ msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, u32 desc_cnt = 0, i; struct scatterlist *sg; u32 stride; + unsigned long irqflags; for_each_sg(sgl, sg, sg_len, i) desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN); - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { spin_unlock_bh(&mdev->lock); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } mdev->desc_free_cnt -= desc_cnt; - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); avail = sg_dma_len(sgl); @@ -566,10 +570,11 @@ static void msgdma_start_transfer(struct msgdma_device *mdev) static void msgdma_issue_pending(struct dma_chan *chan) { struct msgdma_device *mdev = to_mdev(chan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_start_transfer(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /** @@ -634,10 +639,11 @@ static void msgdma_free_descriptors(struct msgdma_device *mdev) static void msgdma_free_chan_resources(struct dma_chan *dchan) { struct msgdma_device *mdev = to_mdev(dchan); + unsigned long flags; - spin_lock_bh(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); msgdma_free_descriptors(mdev); - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); kfree(mdev->sw_desq); } @@ -682,8 +688,9 @@ static void msgdma_tasklet(unsigned long data) u32 count; u32 __maybe_unused size; u32 __maybe_unused status; + unsigned long flags; - spin_lock(&mdev->lock); + spin_lock_irqsave(&mdev->lock, flags); /* Read number of responses that are available */ count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL); @@ -704,7 +711,7 @@ static void msgdma_tasklet(unsigned long data) msgdma_chan_desc_cleanup(mdev); } - spin_unlock(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, flags); } /** -- cgit v1.2.3-70-g09d2 From fe659bcc9b173bcfdd958ce2aec75e47651e74e1 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 26 Sep 2017 15:15:22 -0400 Subject: USB: dummy-hcd: fix connection failures (wrong speed) The dummy-hcd UDC driver is not careful about the way it handles connection speeds. It ignores the module parameter that is supposed to govern the maximum connection speed and it doesn't set the HCD flags properly for the case where it ends up running at full speed. The result is that in many cases, gadget enumeration over dummy-hcd fails because the bMaxPacketSize byte in the device descriptor is set incorrectly. For example, the default settings call for a high-speed connection, but the maxpacket value for ep0 ends up being set for a Super-Speed connection. This patch fixes the problem by initializing the gadget's max_speed and the HCD flags correctly. Signed-off-by: Alan Stern CC: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index b1e21b3be6e1..d515ec31afe4 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1036,7 +1036,12 @@ static int dummy_udc_probe(struct platform_device *pdev) memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; - dum->gadget.max_speed = USB_SPEED_SUPER; + if (mod_data.is_super_speed) + dum->gadget.max_speed = USB_SPEED_SUPER; + else if (mod_data.is_high_speed) + dum->gadget.max_speed = USB_SPEED_HIGH; + else + dum->gadget.max_speed = USB_SPEED_FULL; dum->gadget.dev.parent = &pdev->dev; init_dummy_udc_hw(dum); @@ -2560,8 +2565,6 @@ static struct hc_driver dummy_hcd = { .product_desc = "Dummy host controller", .hcd_priv_size = sizeof(struct dummy_hcd), - .flags = HCD_USB3 | HCD_SHARED, - .reset = dummy_setup, .start = dummy_start, .stop = dummy_stop, @@ -2590,8 +2593,12 @@ static int dummy_hcd_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s, driver " DRIVER_VERSION "\n", driver_desc); dum = *((void **)dev_get_platdata(&pdev->dev)); - if (!mod_data.is_super_speed) + if (mod_data.is_super_speed) + dummy_hcd.flags = HCD_USB3 | HCD_SHARED; + else if (mod_data.is_high_speed) dummy_hcd.flags = HCD_USB2; + else + dummy_hcd.flags = HCD_USB11; hs_hcd = usb_create_hcd(&dummy_hcd, &pdev->dev, dev_name(&pdev->dev)); if (!hs_hcd) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 0173a68bfb0ad1c72a6ee39cc485aa2c97540b98 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 26 Sep 2017 15:15:40 -0400 Subject: USB: dummy-hcd: fix infinite-loop resubmission bug The dummy-hcd HCD/UDC emulator tries not to do too much work during each timer interrupt. But it doesn't try very hard; currently all it does is limit the total amount of bulk data transferred. Other transfer types aren't limited, and URBs that transfer no data (because of an error, perhaps) don't count toward the limit, even though on a real USB bus they would consume at least a minimum overhead. This means it's possible to get the driver stuck in an infinite loop, for example, if the host class driver resubmits an URB every time it completes (which is common for interrupt URBs). Each time the URB is resubmitted it gets added to the end of the pending-URBs list, and dummy-hcd doesn't stop until that list is empty. Andrey Konovalov was able to trigger this failure mode using the syzkaller fuzzer. This patch fixes the infinite-loop problem by restricting the URBs handled during each timer interrupt to those that were already on the pending list when the interrupt routine started. Newly added URBs won't be processed until the next timer interrupt. The problem of properly accounting for non-bulk bandwidth (as well as packet and transaction overhead) is not addressed here. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov CC: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index d515ec31afe4..b2ab9cc33fec 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -237,6 +237,8 @@ struct dummy_hcd { struct usb_device *udev; struct list_head urbp_list; + struct urbp *next_frame_urbp; + u32 stream_en_ep; u8 num_stream[30 / 2]; @@ -1250,6 +1252,8 @@ static int dummy_urb_enqueue( list_add_tail(&urbp->urbp_list, &dum_hcd->urbp_list); urb->hcpriv = urbp; + if (!dum_hcd->next_frame_urbp) + dum_hcd->next_frame_urbp = urbp; if (usb_pipetype(urb->pipe) == PIPE_CONTROL) urb->error_count = 1; /* mark as a new urb */ @@ -1766,6 +1770,7 @@ static void dummy_timer(unsigned long _dum_hcd) spin_unlock_irqrestore(&dum->lock, flags); return; } + dum_hcd->next_frame_urbp = NULL; for (i = 0; i < DUMMY_ENDPOINTS; i++) { if (!ep_info[i].name) @@ -1782,6 +1787,10 @@ restart: int type; int status = -EINPROGRESS; + /* stop when we reach URBs queued after the timer interrupt */ + if (urbp == dum_hcd->next_frame_urbp) + break; + urb = urbp->urb; if (urb->unlinked) goto return_urb; -- cgit v1.2.3-70-g09d2 From 7dbd8f4cabd96db5a50513de9d83a8105a5ffc81 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 26 Sep 2017 15:15:49 -0400 Subject: USB: dummy-hcd: Fix erroneous synchronization change A recent change to the synchronization in dummy-hcd was incorrect. The issue was that dummy_udc_stop() contained no locking and therefore could race with various gadget driver callbacks, and the fix was to add locking and issue the callbacks with the private spinlock held. UDC drivers aren't supposed to do this. Gadget driver callback routines are allowed to invoke functions in the UDC driver, and these functions will generally try to acquire the private spinlock. This would deadlock the driver. The correct solution is to drop the spinlock before issuing callbacks, and avoid races by emulating the synchronize_irq() call that all real UDC drivers must perform in their ->udc_stop() routines after disabling interrupts. This involves adding a flag to dummy-hcd's private structure to keep track of whether interrupts are supposed to be enabled, and adding a counter to keep track of ongoing callbacks so that dummy_udc_stop() can wait for them all to finish. A real UDC driver won't receive disconnect, reset, suspend, resume, or setup events once it has disabled interrupts. dummy-hcd will receive them but won't try to issue any gadget driver callbacks, which should be just as good. Signed-off-by: Alan Stern Fixes: f16443a034c7 ("USB: gadgetfs, dummy-hcd, net2280: fix locking for callbacks") CC: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index b2ab9cc33fec..b17618a55f1b 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -255,11 +255,13 @@ struct dummy { */ struct dummy_ep ep[DUMMY_ENDPOINTS]; int address; + int callback_usage; struct usb_gadget gadget; struct usb_gadget_driver *driver; struct dummy_request fifo_req; u8 fifo_buf[FIFO_SIZE]; u16 devstatus; + unsigned ints_enabled:1; unsigned udc_suspended:1; unsigned pullup:1; @@ -441,18 +443,27 @@ static void set_link_state(struct dummy_hcd *dum_hcd) (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ - if (dum->driver && (disconnect || reset)) { + if (dum->ints_enabled && (disconnect || reset)) { stop_activity(dum); + ++dum->callback_usage; + spin_unlock(&dum->lock); if (reset) usb_gadget_udc_reset(&dum->gadget, dum->driver); else dum->driver->disconnect(&dum->gadget); + spin_lock(&dum->lock); + --dum->callback_usage; } - } else if (dum_hcd->active != dum_hcd->old_active) { + } else if (dum_hcd->active != dum_hcd->old_active && + dum->ints_enabled) { + ++dum->callback_usage; + spin_unlock(&dum->lock); if (dum_hcd->old_active && dum->driver->suspend) dum->driver->suspend(&dum->gadget); else if (!dum_hcd->old_active && dum->driver->resume) dum->driver->resume(&dum->gadget); + spin_lock(&dum->lock); + --dum->callback_usage; } dum_hcd->old_status = dum_hcd->port_status; @@ -973,8 +984,11 @@ static int dummy_udc_start(struct usb_gadget *g, * can't enumerate without help from the driver we're binding. */ + spin_lock_irq(&dum->lock); dum->devstatus = 0; dum->driver = driver; + dum->ints_enabled = 1; + spin_unlock_irq(&dum->lock); return 0; } @@ -985,6 +999,16 @@ static int dummy_udc_stop(struct usb_gadget *g) struct dummy *dum = dum_hcd->dum; spin_lock_irq(&dum->lock); + dum->ints_enabled = 0; + stop_activity(dum); + + /* emulate synchronize_irq(): wait for callbacks to finish */ + while (dum->callback_usage > 0) { + spin_unlock_irq(&dum->lock); + usleep_range(1000, 2000); + spin_lock_irq(&dum->lock); + } + dum->driver = NULL; spin_unlock_irq(&dum->lock); @@ -1529,6 +1553,8 @@ static struct dummy_ep *find_endpoint(struct dummy *dum, u8 address) if (!is_active((dum->gadget.speed == USB_SPEED_SUPER ? dum->ss_hcd : dum->hs_hcd))) return NULL; + if (!dum->ints_enabled) + return NULL; if ((address & ~USB_DIR_IN) == 0) return &dum->ep[0]; for (i = 1; i < DUMMY_ENDPOINTS; i++) { @@ -1870,10 +1896,12 @@ restart: * until setup() returns; no reentrancy issues etc. */ if (value > 0) { + ++dum->callback_usage; spin_unlock(&dum->lock); value = dum->driver->setup(&dum->gadget, &setup); spin_lock(&dum->lock); + --dum->callback_usage; if (value >= 0) { /* no delays (max 64KB data stage) */ -- cgit v1.2.3-70-g09d2 From 4dcf4bab4a409e81284b8202137e4a85b96b34de Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 25 Sep 2017 17:01:23 +0900 Subject: usb: gadget: udc: renesas_usb3: fix for no-data control transfer When bRequestType & USB_DIR_IN is false and req.length is 0 in control transfer, since it means non-data, this driver should not set the mode as control write. So, this patch fixes it. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index df37c1e6e9d5..555c105e82df 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -1150,7 +1150,8 @@ static void usb3_start_pipe0(struct renesas_usb3_ep *usb3_ep, usb3_set_p0_con_for_ctrl_read_data(usb3); } else { usb3_clear_bit(usb3, P0_MOD_DIR, USB3_P0_MOD); - usb3_set_p0_con_for_ctrl_write_data(usb3); + if (usb3_req->req.length) + usb3_set_p0_con_for_ctrl_write_data(usb3); } usb3_p0_xfer(usb3_ep, usb3_req); -- cgit v1.2.3-70-g09d2 From 73f2f5745f18b4ccfe9484deac4e84a1378d19fd Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 25 Sep 2017 17:01:24 +0900 Subject: usb: gadget: udc: renesas_usb3: fix Pn_RAMMAP.Pn_MPKT value According to the datasheet of R-Car Gen3, the Pn_RAMMAP.Pn_MPKT should be set to one of 8, 16, 32, 64, 512 and 1024. Otherwise, when a gadget driver uses an interrupt endpoint, unexpected behavior happens. So, this patch fixes it. Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 555c105e82df..7e0c53492356 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2054,7 +2054,16 @@ static u32 usb3_calc_ramarea(int ram_size) static u32 usb3_calc_rammap_val(struct renesas_usb3_ep *usb3_ep, const struct usb_endpoint_descriptor *desc) { - return usb3_ep->rammap_val | PN_RAMMAP_MPKT(usb_endpoint_maxp(desc)); + int i; + const u32 max_packet_array[] = {8, 16, 32, 64, 512}; + u32 mpkt = PN_RAMMAP_MPKT(1024); + + for (i = 0; i < ARRAY_SIZE(max_packet_array); i++) { + if (usb_endpoint_maxp(desc) <= max_packet_array[i]) + mpkt = PN_RAMMAP_MPKT(max_packet_array[i]); + } + + return usb3_ep->rammap_val | mpkt; } static int usb3_enable_pipe_n(struct renesas_usb3_ep *usb3_ep, -- cgit v1.2.3-70-g09d2 From 447b8a01b84f048d93d43bfe1fcaa4fcc56595cc Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 25 Sep 2017 17:01:25 +0900 Subject: usb: gadget: udc: renesas_usb3: Fix return value of usb3_write_pipe() This patch fixes an issue that this driver cannot go status stage in control read when the req.zero is set to 1 and the len in usb3_write_pipe() is set to 0. Otherwise, if we use g_ncm driver, usb enumeration takes long time (5 seconds or more). Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 7e0c53492356..63a206122058 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -1038,7 +1038,7 @@ static int usb3_write_pipe(struct renesas_usb3_ep *usb3_ep, usb3_ep->ep.maxpacket); u8 *buf = usb3_req->req.buf + usb3_req->req.actual; u32 tmp = 0; - bool is_last; + bool is_last = !len ? true : false; if (usb3_wait_pipe_status(usb3_ep, PX_STA_BUFSTS) < 0) return -EBUSY; @@ -1059,7 +1059,8 @@ static int usb3_write_pipe(struct renesas_usb3_ep *usb3_ep, usb3_write(usb3, tmp, fifo_reg); } - is_last = usb3_is_transfer_complete(usb3_ep, usb3_req); + if (!is_last) + is_last = usb3_is_transfer_complete(usb3_ep, usb3_req); /* Send the data */ usb3_set_px_con_send(usb3_ep, len, is_last); -- cgit v1.2.3-70-g09d2 From 6124607acc88fffeaadf3aacfeb3cc1304c87387 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 27 Sep 2017 18:47:12 +0900 Subject: usb: renesas_usbhs: fix the BCLR setting condition for non-DCP pipe This patch fixes an issue that the driver sets the BCLR bit of {C,Dn}FIFOCTR register to 1 even when it's non-DCP pipe and the FRDY bit of {C,Dn}FIFOCTR register is set to 1. Fixes: e8d548d54968 ("usb: renesas_usbhs: fifo became independent from pipe.") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/fifo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index d1af831f43eb..03cac07f57b5 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -282,11 +282,17 @@ static void usbhsf_fifo_clear(struct usbhs_pipe *pipe, struct usbhs_fifo *fifo) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); + int ret = 0; if (!usbhs_pipe_is_dcp(pipe)) - usbhsf_fifo_barrier(priv, fifo); + ret = usbhsf_fifo_barrier(priv, fifo); - usbhs_write(priv, fifo->ctr, BCLR); + /* + * if non-DCP pipe, this driver should set BCLR when + * usbhsf_fifo_barrier() returns 0. + */ + if (!ret) + usbhs_write(priv, fifo->ctr, BCLR); } static int usbhsf_fifo_rcv_len(struct usbhs_priv *priv, -- cgit v1.2.3-70-g09d2 From 0a2ce62b61f2c76d0213edf4e37aaf54a8ddf295 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 27 Sep 2017 18:47:13 +0900 Subject: usb: renesas_usbhs: fix usbhsf_fifo_clear() for RX direction This patch fixes an issue that the usbhsf_fifo_clear() is possible to cause 10 msec delay if the pipe is RX direction and empty because the FRDY bit will never be set to 1 in such case. Fixes: e8d548d54968 ("usb: renesas_usbhs: fifo became independent from pipe.") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/fifo.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 03cac07f57b5..68f26904c316 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -284,8 +284,17 @@ static void usbhsf_fifo_clear(struct usbhs_pipe *pipe, struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); int ret = 0; - if (!usbhs_pipe_is_dcp(pipe)) - ret = usbhsf_fifo_barrier(priv, fifo); + if (!usbhs_pipe_is_dcp(pipe)) { + /* + * This driver checks the pipe condition first to avoid -EBUSY + * from usbhsf_fifo_barrier() with about 10 msec delay in + * the interrupt handler if the pipe is RX direction and empty. + */ + if (usbhs_pipe_is_dir_in(pipe)) + ret = usbhs_pipe_is_accessible(pipe); + if (!ret) + ret = usbhsf_fifo_barrier(priv, fifo); + } /* * if non-DCP pipe, this driver should set BCLR when -- cgit v1.2.3-70-g09d2 From addfc5823dbf3e6ed400e98e49c7e64b10e191d6 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 12 Sep 2017 10:24:40 +0100 Subject: usb: gadget: ffs: handle I/O completion in-order By submitting completed transfers to the system workqueue there is no guarantee that completion events will be queued up in the correct order, as in multi-processor systems there is a thread running for each processor and the work items are not bound to a particular core. This means that several completions are in the queue at the same time, they may be processed in parallel and complete out of order, resulting in data appearing corrupt when read by userspace. Create a single-threaded workqueue for FunctionFS so that data completed requests is passed to userspace in the order in which they complete. Acked-by: Michal Nazarewicz Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 17 +++++++++++++---- drivers/usb/gadget/function/u_fs.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 9990944a7245..8b342587f8ad 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -46,7 +46,8 @@ static void ffs_data_get(struct ffs_data *ffs); static void ffs_data_put(struct ffs_data *ffs); /* Creates new ffs_data object. */ -static struct ffs_data *__must_check ffs_data_new(void) __attribute__((malloc)); +static struct ffs_data *__must_check ffs_data_new(const char *dev_name) + __attribute__((malloc)); /* Opened counter handling. */ static void ffs_data_opened(struct ffs_data *ffs); @@ -780,11 +781,12 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep, struct usb_request *req) { struct ffs_io_data *io_data = req->context; + struct ffs_data *ffs = io_data->ffs; ENTER(); INIT_WORK(&io_data->work, ffs_user_copy_worker); - schedule_work(&io_data->work); + queue_work(ffs->io_completion_wq, &io_data->work); } static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile) @@ -1500,7 +1502,7 @@ ffs_fs_mount(struct file_system_type *t, int flags, if (unlikely(ret < 0)) return ERR_PTR(ret); - ffs = ffs_data_new(); + ffs = ffs_data_new(dev_name); if (unlikely(!ffs)) return ERR_PTR(-ENOMEM); ffs->file_perms = data.perms; @@ -1610,6 +1612,7 @@ static void ffs_data_put(struct ffs_data *ffs) BUG_ON(waitqueue_active(&ffs->ev.waitq) || waitqueue_active(&ffs->ep0req_completion.wait) || waitqueue_active(&ffs->wait)); + destroy_workqueue(ffs->io_completion_wq); kfree(ffs->dev_name); kfree(ffs); } @@ -1642,7 +1645,7 @@ static void ffs_data_closed(struct ffs_data *ffs) ffs_data_put(ffs); } -static struct ffs_data *ffs_data_new(void) +static struct ffs_data *ffs_data_new(const char *dev_name) { struct ffs_data *ffs = kzalloc(sizeof *ffs, GFP_KERNEL); if (unlikely(!ffs)) @@ -1650,6 +1653,12 @@ static struct ffs_data *ffs_data_new(void) ENTER(); + ffs->io_completion_wq = alloc_ordered_workqueue("%s", 0, dev_name); + if (!ffs->io_completion_wq) { + kfree(ffs); + return NULL; + } + refcount_set(&ffs->ref, 1); atomic_set(&ffs->opened, 0); ffs->state = FFS_READ_DESCRIPTORS; diff --git a/drivers/usb/gadget/function/u_fs.h b/drivers/usb/gadget/function/u_fs.h index 540f1c48c1a8..79f70ebf85dc 100644 --- a/drivers/usb/gadget/function/u_fs.h +++ b/drivers/usb/gadget/function/u_fs.h @@ -279,6 +279,7 @@ struct ffs_data { } file_perms; struct eventfd_ctx *ffs_eventfd; + struct workqueue_struct *io_completion_wq; bool no_disconnect; struct work_struct reset_work; -- cgit v1.2.3-70-g09d2 From 6baeda120d90aa637b08f7604de104ab00ce9126 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 31 Aug 2017 14:51:40 +0200 Subject: usb: gadget: udc: atmel: set vbus irqflags explicitly The driver triggers actions on both edges of the vbus signal. The former PIO controller was triggering IRQs on both falling and rising edges by default. Newer PIO controller don't, so it's better to set it explicitly to IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING. Without this patch we may trigger the connection with host but only on some bouncing signal conditions and thus lose connecting events. Acked-by: Ludovic Desroches Signed-off-by: Nicolas Ferre Cc: stable # v4.4+ Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 98d71400f8a1..a884c022df7a 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -29,6 +29,8 @@ #include #include "atmel_usba_udc.h" +#define USBA_VBUS_IRQFLAGS (IRQF_ONESHOT \ + | IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING) #ifdef CONFIG_USB_GADGET_DEBUG_FS #include @@ -2361,7 +2363,7 @@ static int usba_udc_probe(struct platform_device *pdev) IRQ_NOAUTOEN); ret = devm_request_threaded_irq(&pdev->dev, gpio_to_irq(udc->vbus_pin), NULL, - usba_vbus_irq_thread, IRQF_ONESHOT, + usba_vbus_irq_thread, USBA_VBUS_IRQFLAGS, "atmel_usba_udc", udc); if (ret) { udc->vbus_pin = -ENODEV; -- cgit v1.2.3-70-g09d2 From c3cdce45f8d3dfa5c3467894aa89798314920328 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 30 Aug 2017 19:03:52 +0800 Subject: usb: dwc3: of-simple: Add compatible for Spreadtrum SC9860 platform Add compatible string to use this generic glue layer to support Spreadtrum SC9860 platform's dwc3 controller. Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 4cef7d4f9cd0..a26d1fde0f5e 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -177,6 +177,7 @@ static const struct of_device_id of_dwc3_simple_match[] = { { .compatible = "rockchip,rk3399-dwc3" }, { .compatible = "xlnx,zynqmp-dwc3" }, { .compatible = "cavium,octeon-7130-usb-uctl" }, + { .compatible = "sprd,sc9860-dwc3" }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, of_dwc3_simple_match); -- cgit v1.2.3-70-g09d2 From 77c01d11bbb2b5c005347061bf543ab94878314c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 26 Sep 2017 10:36:03 +0100 Subject: watchdog/hardlockup/perf: Fix spelling mistake: "permanetely" -> "permanently" Trivial fix to spelling mistake in pr_info message Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Acked-by: Don Zickus Cc: Andrew Morton Cc: Babu Moger Link: https://lkml.kernel.org/r/20170926093603.7756-1-colin.king@canonical.com --- kernel/watchdog_hld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 204a8cadb717..71a62ceacdc8 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -280,7 +280,7 @@ int __init hardlockup_detector_perf_init(void) int ret = hardlockup_detector_event_create(); if (ret) { - pr_info("Perf NMI watchdog permanetely disabled\n"); + pr_info("Perf NMI watchdog permanently disabled\n"); } else { perf_event_release_kernel(this_cpu_read(watchdog_ev)); this_cpu_write(watchdog_ev, NULL); -- cgit v1.2.3-70-g09d2 From 1fa4df3e688902d033dfda796eb83ae6ad8d0488 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 27 Sep 2017 16:35:00 -0500 Subject: percpu: fix iteration to prevent skipping over block The iterator functions pcpu_next_md_free_region and pcpu_next_fit_region use the block offset to determine if they have checked the area in the prior iteration. However, this causes an issue when the block offset is greater than subsequent block contig hints. If within the iterator it moves to check subsequent blocks, it may fail in the second predicate due to the block offset not being cleared. Thus, this causes the allocator to skip over blocks leading to false failures when allocating from the reserved chunk. While this happens in the general case as well, it will only fail if it cannot allocate a new chunk. This patch resets the block offset to 0 to pass the second predicate when checking subseqent blocks within the iterator function. Signed-off-by: Dennis Zhou Reported-and-tested-by: Luis Henriques Signed-off-by: Tejun Heo --- mm/percpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/percpu.c b/mm/percpu.c index 59d44d61f5f1..aa121cef76de 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -353,6 +353,8 @@ static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off, block->contig_hint_start); return; } + /* reset to satisfy the second predicate above */ + block_off = 0; *bits = block->right_free; *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; @@ -407,6 +409,8 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, *bit_off = pcpu_block_off_to_off(i, block->first_free); return; } + /* reset to satisfy the second predicate above */ + block_off = 0; *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free, align); -- cgit v1.2.3-70-g09d2 From 2580c4c17aee3ad58e9751012bad278dd074ccae Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 28 Sep 2017 11:32:37 +0200 Subject: tun: bail out from tun_get_user() if the skb is empty KMSAN (https://github.com/google/kmsan) reported accessing uninitialized skb->data[0] in the case the skb is empty (i.e. skb->len is 0): ================================================ BUG: KMSAN: use of uninitialized memory in tun_get_user+0x19ba/0x3770 CPU: 0 PID: 3051 Comm: probe Not tainted 4.13.0+ #3140 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: ... __msan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:477 tun_get_user+0x19ba/0x3770 drivers/net/tun.c:1301 tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365 call_write_iter ./include/linux/fs.h:1743 new_sync_write fs/read_write.c:457 __vfs_write+0x6c3/0x7f0 fs/read_write.c:470 vfs_write+0x3e4/0x770 fs/read_write.c:518 SYSC_write+0x12f/0x2b0 fs/read_write.c:565 SyS_write+0x55/0x80 fs/read_write.c:557 do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:245 ... origin: ... kmsan_poison_shadow+0x6e/0xc0 mm/kmsan/kmsan.c:211 slab_alloc_node mm/slub.c:2732 __kmalloc_node_track_caller+0x351/0x370 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x26a/0x810 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:903 alloc_skb_with_frags+0x1d7/0xc80 net/core/skbuff.c:4756 sock_alloc_send_pskb+0xabf/0xfe0 net/core/sock.c:2037 tun_alloc_skb drivers/net/tun.c:1144 tun_get_user+0x9a8/0x3770 drivers/net/tun.c:1274 tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365 call_write_iter ./include/linux/fs.h:1743 new_sync_write fs/read_write.c:457 __vfs_write+0x6c3/0x7f0 fs/read_write.c:470 vfs_write+0x3e4/0x770 fs/read_write.c:518 SYSC_write+0x12f/0x2b0 fs/read_write.c:565 SyS_write+0x55/0x80 fs/read_write.c:557 do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:245 ================================================ Make sure tun_get_user() doesn't touch skb->data[0] unless there is actual data. C reproducer below: ========================== // autogenerated by syzkaller (http://github.com/google/syzkaller) #define _GNU_SOURCE #include #include #include #include #include #include int main() { int sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP); int tun_fd = open("/dev/net/tun", O_RDWR); struct ifreq req; memset(&req, 0, sizeof(struct ifreq)); strcpy((char*)&req.ifr_name, "gre0"); req.ifr_flags = IFF_UP | IFF_MULTICAST; ioctl(tun_fd, TUNSETIFF, &req); ioctl(sock, SIOCSIFFLAGS, "gre0"); write(tun_fd, "hi", 0); return 0; } ========================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- drivers/net/tun.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c9985f29950..5ce580f413b9 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1496,11 +1496,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: if (tun->flags & IFF_NO_PI) { - switch (skb->data[0] & 0xf0) { - case 0x40: + u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0; + + switch (ip_version) { + case 4: pi.proto = htons(ETH_P_IP); break; - case 0x60: + case 6: pi.proto = htons(ETH_P_IPV6); break; default: -- cgit v1.2.3-70-g09d2 From bd86e32059526e2d0d13ca1e4447dfbbddb6e5cc Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Wed, 27 Sep 2017 20:28:57 +0800 Subject: dm crypt: fix memory leak in crypt_ctr_cipher_old() Fix memory leak of cipher_api. Fixes: 33d2f09fcb35 (dm crypt: introduce new format of cipher with "capi:" prefix) Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Jeffy Chen Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a55ffd4f5933..75341fdca4b6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2466,6 +2466,7 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key kfree(cipher_api); return ret; } + kfree(cipher_api); return 0; bad_mem: -- cgit v1.2.3-70-g09d2 From aff3da39211105a42b2108b8af79bf8e16f670fd Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Mon, 25 Sep 2017 14:59:46 +0200 Subject: net: mvpp2: fix parsing fragmentation detection Parsing fragmentation detection failed due to wrong configured parser TCAM entry's. Some traffic was marked as fragmented in RX descriptor, even it wasn't IP fragmented. The hardware also failed to calculate checksums which lead to use software checksum and caused performance degradation. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: Stefan Chulski Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index dd0ee2691c86..da04939a2748 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -676,6 +676,7 @@ enum mvpp2_tag_type { #define MVPP2_PRS_RI_L3_MCAST BIT(15) #define MVPP2_PRS_RI_L3_BCAST (BIT(15) | BIT(16)) #define MVPP2_PRS_RI_IP_FRAG_MASK 0x20000 +#define MVPP2_PRS_RI_IP_FRAG_TRUE BIT(17) #define MVPP2_PRS_RI_UDF3_MASK 0x300000 #define MVPP2_PRS_RI_UDF3_RX_SPECIAL BIT(21) #define MVPP2_PRS_RI_L4_PROTO_MASK 0x1c00000 @@ -2315,7 +2316,7 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, (proto != IPPROTO_IGMP)) return -EINVAL; - /* Fragmented packet */ + /* Not fragmented packet */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, MVPP2_PE_LAST_FREE_TID); if (tid < 0) @@ -2334,8 +2335,12 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, MVPP2_PRS_IPV4_DIP_AI_BIT); - mvpp2_prs_sram_ri_update(&pe, ri | MVPP2_PRS_RI_IP_FRAG_MASK, - ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); + mvpp2_prs_sram_ri_update(&pe, ri, ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); + + mvpp2_prs_tcam_data_byte_set(&pe, 2, 0x00, + MVPP2_PRS_TCAM_PROTO_MASK_L); + mvpp2_prs_tcam_data_byte_set(&pe, 3, 0x00, + MVPP2_PRS_TCAM_PROTO_MASK); mvpp2_prs_tcam_data_byte_set(&pe, 5, proto, MVPP2_PRS_TCAM_PROTO_MASK); mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); @@ -2346,7 +2351,7 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4); mvpp2_prs_hw_write(priv, &pe); - /* Not fragmented packet */ + /* Fragmented packet */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, MVPP2_PE_LAST_FREE_TID); if (tid < 0) @@ -2358,8 +2363,11 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, pe.sram.word[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0; mvpp2_prs_sram_ri_update(&pe, ri, ri_mask); - mvpp2_prs_tcam_data_byte_set(&pe, 2, 0x00, MVPP2_PRS_TCAM_PROTO_MASK_L); - mvpp2_prs_tcam_data_byte_set(&pe, 3, 0x00, MVPP2_PRS_TCAM_PROTO_MASK); + mvpp2_prs_sram_ri_update(&pe, ri | MVPP2_PRS_RI_IP_FRAG_TRUE, + ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); + + mvpp2_prs_tcam_data_byte_set(&pe, 2, 0x00, 0x0); + mvpp2_prs_tcam_data_byte_set(&pe, 3, 0x00, 0x0); /* Update shadow table and hw entry */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_IP4); -- cgit v1.2.3-70-g09d2 From 6bf69a1d6334bed776875c5ca852594ab4e5b209 Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Mon, 25 Sep 2017 14:59:47 +0200 Subject: net: mvpp2: fix port list indexing The private port_list array has a list of pointers to mvpp2_port instances. This list is allocated given the number of ports enabled in the device tree, but the pointers are set using the port-id property. If on a single port is enabled, the port_list array will be of size 1, but when registering the port, if its id is not 0 the driver will crash. Other crashes were encountered in various situations. This fixes the issue by using an index not equal to the value of the port-id property. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: Yan Markman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index da04939a2748..b2f99df81e9c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -7504,7 +7504,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, /* Ports initialization */ static int mvpp2_port_probe(struct platform_device *pdev, struct device_node *port_node, - struct mvpp2 *priv) + struct mvpp2 *priv, int index) { struct device_node *phy_node; struct phy *comphy; @@ -7678,7 +7678,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, } netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr); - priv->port_list[id] = port; + priv->port_list[index] = port; return 0; err_free_port_pcpu: @@ -8013,10 +8013,12 @@ static int mvpp2_probe(struct platform_device *pdev) } /* Initialize ports */ + i = 0; for_each_available_child_of_node(dn, port_node) { - err = mvpp2_port_probe(pdev, port_node, priv); + err = mvpp2_port_probe(pdev, port_node, priv, i); if (err < 0) goto err_mg_clk; + i++; } platform_set_drvdata(pdev, priv); -- cgit v1.2.3-70-g09d2 From c7dfc8c848a48f176096f66a14879fb3333a460f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 25 Sep 2017 14:59:48 +0200 Subject: net: mvpp2: do not select the internal source clock This patch stops the internal MAC Tx clock from being enabled as the internal clock isn't used. The definition used for the bit controlling this behaviour is renamed as well as it was wrongly named (bit 4 of GMAC_CTRL_2_REG). Fixes: 3919357fb0bb ("net: mvpp2: initialize the GMAC when using a port") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index b2f99df81e9c..161055564720 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -333,7 +333,7 @@ #define MVPP2_GMAC_INBAND_AN_MASK BIT(0) #define MVPP2_GMAC_FLOW_CTRL_MASK GENMASK(2, 1) #define MVPP2_GMAC_PCS_ENABLE_MASK BIT(3) -#define MVPP2_GMAC_PORT_RGMII_MASK BIT(4) +#define MVPP2_GMAC_INTERNAL_CLK_MASK BIT(4) #define MVPP2_GMAC_DISABLE_PADDING BIT(5) #define MVPP2_GMAC_PORT_RESET_MASK BIT(6) #define MVPP2_GMAC_AUTONEG_CONFIG 0xc @@ -4599,7 +4599,6 @@ static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port) val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK; } else if (phy_interface_mode_is_rgmii(port->phy_interface)) { val &= ~MVPP2_GMAC_PCS_ENABLE_MASK; - val |= MVPP2_GMAC_PORT_RGMII_MASK; } writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); -- cgit v1.2.3-70-g09d2 From 35f493b87ec072c5a2497ffbee243095ef725827 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Sep 2017 08:40:02 -0700 Subject: inetpeer: fix RCU lookup() again My prior fix was not complete, as we were dereferencing a pointer three times per node, not twice as I initially thought. Fixes: 4cc5b44b29a9 ("inetpeer: fix RCU lookup()") Fixes: b145425f269a ("inetpeer: remove AVL implementation in favor of RB tree") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e7eb590c86ce..b20c8ac64081 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -128,9 +128,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, break; } if (cmp == -1) - pp = &(*pp)->rb_left; + pp = &next->rb_left; else - pp = &(*pp)->rb_right; + pp = &next->rb_right; } *parent_p = parent; *pp_p = pp; -- cgit v1.2.3-70-g09d2 From db06ae41945b14feb7f696dcafe8048cc37e8a20 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 25 Sep 2017 23:32:20 +0200 Subject: net: dsa: mv88e6xxx: Allow dsa and cpu ports in multiple vlans Ports with the same VLAN must all be in the same bridge. However the CPU and DSA ports need to be in multiple VLANs spread over multiple bridges. So exclude them when performing this test. Fixes: b2f81d304cee ("net: dsa: add CPU and DSA ports as VLAN members") Signed-off-by: Andrew Lunn Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index c6678aa9b4ef..674dab71d71c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1100,6 +1100,10 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, }; int i, err; + /* DSA and CPU ports have to be members of multiple vlans */ + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + return 0; + if (!vid_begin) return -EOPNOTSUPP; -- cgit v1.2.3-70-g09d2 From e804441cfe0b60f6c430901946a69c01eac09df1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 25 Sep 2017 15:55:53 -0700 Subject: net: dsa: Fix network device registration order We cannot be registering the network device first, then setting its carrier off and finally connecting it to a PHY, doing that leaves a window during which the carrier is at best inconsistent, and at worse the device is not usable without a down/up sequence since the network device is visible to user space with possibly no PHY device attached. Re-order steps so that they make logical sense. This fixes some devices where the port was not usable after e.g: an unbind then bind of the driver. Fixes: 0071f56e46da ("dsa: Register netdev before phy") Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2afa99506f8b..865e29e62bad 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1301,28 +1301,33 @@ int dsa_slave_create(struct dsa_port *port, const char *name) p->old_duplex = -1; port->netdev = slave_dev; - ret = register_netdev(slave_dev); - if (ret) { - netdev_err(master, "error %d registering interface %s\n", - ret, slave_dev->name); - port->netdev = NULL; - free_percpu(p->stats64); - free_netdev(slave_dev); - return ret; - } netif_carrier_off(slave_dev); ret = dsa_slave_phy_setup(p, slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); - unregister_netdev(slave_dev); - free_percpu(p->stats64); - free_netdev(slave_dev); - return ret; + goto out_free; + } + + ret = register_netdev(slave_dev); + if (ret) { + netdev_err(master, "error %d registering interface %s\n", + ret, slave_dev->name); + goto out_phy; } return 0; + +out_phy: + phy_disconnect(p->phy); + if (of_phy_is_fixed_link(p->dp->dn)) + of_phy_deregister_fixed_link(p->dp->dn); +out_free: + free_percpu(p->stats64); + free_netdev(slave_dev); + port->netdev = NULL; + return ret; } void dsa_slave_destroy(struct net_device *slave_dev) -- cgit v1.2.3-70-g09d2 From 06d7a1b932c26afe2c0a1f4520ddd417d8eeda79 Mon Sep 17 00:00:00 2001 From: Ed Blake Date: Tue, 26 Sep 2017 11:43:46 +0100 Subject: net: stmmac: dwc-qos: Add suspend / resume support Add hook to stmmac_pltfr_pm_ops for suspend / resume handling. Signed-off-by: Ed Blake Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index dd6a2f9791cc..5efef8001edf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -511,6 +511,7 @@ static struct platform_driver dwc_eth_dwmac_driver = { .remove = dwc_eth_dwmac_remove, .driver = { .name = "dwc-eth-dwmac", + .pm = &stmmac_pltfr_pm_ops, .of_match_table = dwc_eth_dwmac_match, }, }; -- cgit v1.2.3-70-g09d2 From 1579f678fb4397f9e439d2e373d4ade036c673b4 Mon Sep 17 00:00:00 2001 From: Ed Blake Date: Tue, 26 Sep 2017 11:44:53 +0100 Subject: net: stmmac: dwmac4: Re-enable MAC Rx before powering down Re-enable the MAC receiver by setting CONFIG_RE before powering down, as instructed in section 6.3.5.1 of [1]. Without this the MAC fails to receive WoL packets and never wakes up. [1] DWC Ethernet QoS Databook 4.10a October 2014 Signed-off-by: Ed Blake Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c4407e8e39a3..2f7d7ec59962 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -296,6 +296,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) { void __iomem *ioaddr = hw->pcsr; unsigned int pmt = 0; + u32 config; if (mode & WAKE_MAGIC) { pr_debug("GMAC: WOL Magic frame\n"); @@ -306,6 +307,12 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) pmt |= power_down | global_unicast | wake_up_frame_en; } + if (pmt) { + /* The receiver must be enabled for WOL before powering down */ + config = readl(ioaddr + GMAC_CONFIG); + config |= GMAC_CONFIG_RE; + writel(config, ioaddr + GMAC_CONFIG); + } writel(pmt, ioaddr + GMAC_PMT); } -- cgit v1.2.3-70-g09d2 From 4971613c1639d8e5f102c4e797c3bf8f83a5a69e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 26 Sep 2017 12:19:37 -0400 Subject: packet: in packet_do_bind, test fanout with bind_lock held Once a socket has po->fanout set, it remains a member of the group until it is destroyed. The prot_hook must be constant and identical across sockets in the group. If fanout_add races with packet_do_bind between the test of po->fanout and taking the lock, the bind call may make type or dev inconsistent with that of the fanout group. Hold po->bind_lock when testing po->fanout to avoid this race. I had to introduce artificial delay (local_bh_enable) to actually observe the race. Fixes: dc99f600698d ("packet: Add fanout support.") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d288f52c53f7..a10c2836465c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3069,13 +3069,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, int ret = 0; bool unlisted = false; - if (po->fanout) - return -EINVAL; - lock_sock(sk); spin_lock(&po->bind_lock); rcu_read_lock(); + if (po->fanout) { + ret = -EINVAL; + goto out_unlock; + } + if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { -- cgit v1.2.3-70-g09d2 From da7c9561015e93d10fe6aab73e9288e0d09d65a6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 26 Sep 2017 12:20:17 -0400 Subject: packet: only test po->has_vnet_hdr once in packet_snd Packet socket option po->has_vnet_hdr can be updated concurrently with other operations if no ring is attached. Do not test the option twice in packet_snd, as the value may change in between calls. A race on setsockopt disable may cause a packet > mtu to be sent without having GSO options set. Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a10c2836465c..bec01a3daf5b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2840,6 +2840,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); + bool has_vnet_hdr = false; int hlen, tlen, linear; int extra_len = 0; @@ -2883,6 +2884,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; + has_vnet_hdr = true; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { @@ -2941,7 +2943,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = sk->sk_priority; skb->mark = sockc.mark; - if (po->has_vnet_hdr) { + if (has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) goto out_free; -- cgit v1.2.3-70-g09d2 From b32ca44a88def4bf92626d8777494c6f14638c42 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 14:57:21 -0400 Subject: net: dsa: mv88e6xxx: lock mutex when freeing IRQs mv88e6xxx_g2_irq_free locks the registers mutex, but not mv88e6xxx_g1_irq_free, which results in a stack trace from assert_reg_lock when unloading the mv88e6xxx module. Fix this. Fixes: 3460a5770ce9 ("net: dsa: mv88e6xxx: Mask g1 interrupts and free interrupt") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 674dab71d71c..d74c7335c512 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3951,7 +3951,9 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) if (chip->irq > 0) { if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); + mutex_lock(&chip->reg_lock); mv88e6xxx_g1_irq_free(chip); + mutex_unlock(&chip->reg_lock); } } -- cgit v1.2.3-70-g09d2 From 9d538fa60bad4f7b23193c89e843797a1cf71ef3 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Tue, 26 Sep 2017 17:38:50 -0700 Subject: net: Set sk_prot_creator when cloning sockets to the right proto sk->sk_prot and sk->sk_prot_creator can differ when the app uses IPV6_ADDRFORM (transforming an IPv6-socket to an IPv4-one). Which is why sk_prot_creator is there to make sure that sk_prot_free() does the kmem_cache_free() on the right kmem_cache slab. Now, if such a socket gets transformed back to a listening socket (using connect() with AF_UNSPEC) we will allocate an IPv4 tcp_sock through sk_clone_lock() when a new connection comes in. But sk_prot_creator will still point to the IPv6 kmem_cache (as everything got copied in sk_clone_lock()). When freeing, we will thus put this memory back into the IPv6 kmem_cache although it was allocated in the IPv4 cache. I have seen memory corruption happening because of this. With slub-debugging and MEMCG_KMEM enabled this gives the warning "cache_from_obj: Wrong slab cache. TCPv6 but object is from TCP" A C-program to trigger this: void main(void) { int fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP); int new_fd, newest_fd, client_fd; struct sockaddr_in6 bind_addr; struct sockaddr_in bind_addr4, client_addr1, client_addr2; struct sockaddr unsp; int val; memset(&bind_addr, 0, sizeof(bind_addr)); bind_addr.sin6_family = AF_INET6; bind_addr.sin6_port = ntohs(42424); memset(&client_addr1, 0, sizeof(client_addr1)); client_addr1.sin_family = AF_INET; client_addr1.sin_port = ntohs(42424); client_addr1.sin_addr.s_addr = inet_addr("127.0.0.1"); memset(&client_addr2, 0, sizeof(client_addr2)); client_addr2.sin_family = AF_INET; client_addr2.sin_port = ntohs(42421); client_addr2.sin_addr.s_addr = inet_addr("127.0.0.1"); memset(&unsp, 0, sizeof(unsp)); unsp.sa_family = AF_UNSPEC; bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr)); listen(fd, 5); client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); connect(client_fd, (struct sockaddr *)&client_addr1, sizeof(client_addr1)); new_fd = accept(fd, NULL, NULL); close(fd); val = AF_INET; setsockopt(new_fd, SOL_IPV6, IPV6_ADDRFORM, &val, sizeof(val)); connect(new_fd, &unsp, sizeof(unsp)); memset(&bind_addr4, 0, sizeof(bind_addr4)); bind_addr4.sin_family = AF_INET; bind_addr4.sin_port = ntohs(42421); bind(new_fd, (struct sockaddr *)&bind_addr4, sizeof(bind_addr4)); listen(new_fd, 5); client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); connect(client_fd, (struct sockaddr *)&client_addr2, sizeof(client_addr2)); newest_fd = accept(new_fd, NULL, NULL); close(new_fd); close(client_fd); close(new_fd); } As far as I can see, this bug has been there since the beginning of the git-days. Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/sock.c b/net/core/sock.c index 9b7b6bbb2a23..7d55c05f449d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1654,6 +1654,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); + newsk->sk_prot_creator = sk->sk_prot; + /* SANITY */ if (likely(newsk->sk_net_refcnt)) get_net(sock_net(newsk)); -- cgit v1.2.3-70-g09d2 From 87cbde8d9081b91df86a21d0d743cd700e04890a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 28 Sep 2017 01:45:10 +0200 Subject: PM / s2idle: Invoke the ->wake() platform callback earlier The role of the ->wake() platform callback for suspend-to-idle is to deal with possible spurious wakeups, among other things. The ACPI implementation of it, acpi_s2idle_wake(), additionally checks the conditions for entering the Low Power S0 Idle state by the platform and reports the ones that have not been met. However, the ->wake() platform callback is invoked after calling dpm_noirq_resume_devices(), which means that the power states of some devices may have changed since s2idle_enter() returned, so some unmet Low Power S0 Idle conditions may be reported incorrectly as a result of that. To avoid these false positives, reorder the invocations of the dpm_noirq_resume_devices() routine and the ->wake() platform callback in s2idle_loop(). Fixes: 726fb6b4f2a8 (ACPI / PM: Check low power idle constraints for debug only) Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 3e2b4f519009..ccd2d20e6b06 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -120,22 +120,26 @@ static void s2idle_loop(void) * frozen processes + suspended devices + idle processors. * Thus s2idle_enter() should be called right after * all devices have been suspended. + * + * Wakeups during the noirq suspend of devices may be spurious, + * so prevent them from terminating the loop right away. */ error = dpm_noirq_suspend_devices(PMSG_SUSPEND); if (!error) s2idle_enter(); + else if (error == -EBUSY && pm_wakeup_pending()) + error = 0; - dpm_noirq_resume_devices(PMSG_RESUME); - if (error && (error != -EBUSY || !pm_wakeup_pending())) { - dpm_noirq_end(); - break; - } - - if (s2idle_ops && s2idle_ops->wake) + if (!error && s2idle_ops && s2idle_ops->wake) s2idle_ops->wake(); + dpm_noirq_resume_devices(PMSG_RESUME); + dpm_noirq_end(); + if (error) + break; + if (s2idle_ops && s2idle_ops->sync) s2idle_ops->sync(); -- cgit v1.2.3-70-g09d2 From bca73f595a566f0262967535bb5b2ea9c4271d9a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 28 Sep 2017 22:37:35 -0500 Subject: powerpc: Fix workaround for spurious MCE on POWER9 In the recent commit d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with only DSISR bit 30 set") I screwed up the bit number. It should be bit 25 (IBM bit 38). Fixes: d8bd9f3f0925 ("powerpc: Handle MCE on POWER9 with only DSISR bit 30 set") Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index f523125b9d34..72f153c6f3fa 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -626,7 +626,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) { /* * On POWER9 DD2.1 and below, it's possible to get a machine check - * caused by a paste instruction where only DSISR bit 30 is set. This + * caused by a paste instruction where only DSISR bit 25 is set. This * will result in the MCE handler seeing an unknown event and the kernel * crashing. An MCE that occurs like this is spurious, so we don't need * to do anything in terms of servicing it. If there is something that @@ -634,7 +634,7 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) * correct DSISR so that it can be serviced properly. So detect this * case and mark it as handled. */ - if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x40000000) + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); -- cgit v1.2.3-70-g09d2 From 4fc0870d7e462fe3b86e0f938ae75ce884728c7d Mon Sep 17 00:00:00 2001 From: Christophe Lombard Date: Tue, 26 Sep 2017 10:15:21 +0200 Subject: cxl: Fix memory page not handled The in-kernel 'library' API can be called by drivers to help interaction with an IBM XSL on a POWER9 system. The cxllib_handle_fault() API is used to handle memory fault. All memory pages of the specified buffer have to be handled but under certain conditions,the last page may not be touched, and the address the adapter is trying to access is never sent to the kernel for resolution. This patch reworks start address of the loop with an address aligned on the page size. In this context, the last page is not missed. Signed-off-by: Christophe Lombard Acked-by: Frederic Barrat Acked-by: Andrew Donnellan Fixes: 3ced8d730063 ("cxl: Export library to support IBM XSL"); Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxllib.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index 5dba23ca2e5f..dc9bc1807fdf 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -219,8 +219,17 @@ int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) down_read(&mm->mmap_sem); - for (dar = addr; dar < addr + size; dar += page_size) { - if (!vma || dar < vma->vm_start || dar > vma->vm_end) { + vma = find_vma(mm, addr); + if (!vma) { + pr_err("Can't find vma for addr %016llx\n", addr); + rc = -EFAULT; + goto out; + } + /* get the size of the pages allocated */ + page_size = vma_kernel_pagesize(vma); + + for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) { + if (dar < vma->vm_start || dar >= vma->vm_end) { vma = find_vma(mm, addr); if (!vma) { pr_err("Can't find vma for addr %016llx\n", addr); -- cgit v1.2.3-70-g09d2 From e5173418ac597cebe9f7a39adf10be470000b518 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 27 Sep 2017 10:06:27 +0100 Subject: netfilter: ipset: Fix race between dump and swap Fix a race between ip_set_dump_start() and ip_set_swap(). The race is as follows: * Without holding the ref lock, ip_set_swap() checks ref_netlink of the set and it is 0. * ip_set_dump_start() takes a reference on the set. * ip_set_swap() does the swap (even though it now has a non-zero reference count). * ip_set_dump_start() gets the set from ip_set_list again which is now a different set since it has been swapped. * ip_set_dump_start() calls __ip_set_put_netlink() and hits a BUG_ON due to the reference count being 0. Fix this race by extending the critical region in which the ref lock is held to include checking the ref counts. The race can be reproduced with the following script: while :; do ipset destroy hash_ip1 ipset destroy hash_ip2 ipset create hash_ip1 hash:ip family inet hashsize 1024 \ maxelem 500000 ipset create hash_ip2 hash:ip family inet hashsize 300000 \ maxelem 500000 ipset create hash_ip3 hash:ip family inet hashsize 1024 \ maxelem 500000 ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 wait done Signed-off-by: Ross Lagerwall Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a7f049ff3049..cf84f7b37cd9 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; - if (from->ref_netlink || to->ref_netlink) + write_lock_bh(&ip_set_ref_lock); + + if (from->ref_netlink || to->ref_netlink) { + write_unlock_bh(&ip_set_ref_lock); return -EBUSY; + } strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); ip_set(inst, from_id) = to; ip_set(inst, to_id) = from; -- cgit v1.2.3-70-g09d2 From 0d18779be13766b33c69cbc26df38383598da373 Mon Sep 17 00:00:00 2001 From: JingPiao Chen Date: Sat, 23 Sep 2017 17:10:44 +0800 Subject: netfilter: nf_tables: fix update chain error # nft add table filter # nft add chain filter c1 # nft rename chain filter c1 c2 Error: Could not process rule: No such file or directory rename chain filter c1 c2 ^^^^^^^^^^^^^^^^^^^^^^^^^^ # nft add chain filter c2 # nft rename chain filter c1 c2 # nft list table filter table ip filter { chain c2 { } chain c2 { } } Fixes: 664b0f8cd8 ("netfilter: nf_tables: add generation mask to chains") Signed-off-by: JingPiao Chen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 929927171426..f98ca8c6aa59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1487,8 +1487,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); - if (IS_ERR(chain2)) - return PTR_ERR(chain2); + if (!IS_ERR(chain2)) + return -EEXIST; } if (nla[NFTA_CHAIN_COUNTERS]) { -- cgit v1.2.3-70-g09d2 From e6b72ee88a56bcfe63f72e9c30766484c45bec72 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 26 Sep 2017 18:35:45 +0200 Subject: netfilter: ebtables: fix race condition in frame_filter_net_init() It is possible for ebt_in_hook to be triggered before ebt_table is assigned resulting in a NULL-pointer dereference. Make sure hooks are registered as the last step. Fixes: aee12a0a3727 ("ebtables: remove nf_hook_register usage") Signed-off-by: Artem Savkov Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 7 ++++--- net/bridge/netfilter/ebtable_broute.c | 4 ++-- net/bridge/netfilter/ebtable_filter.c | 4 ++-- net/bridge/netfilter/ebtable_nat.c | 4 ++-- net/bridge/netfilter/ebtables.c | 17 +++++++++-------- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2c2a5514b0df..528b24c78308 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -108,9 +108,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) -extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table, - const struct nf_hook_ops *); +extern int ebt_register_table(struct net *net, + const struct ebt_table *table, + const struct nf_hook_ops *ops, + struct ebt_table **res); extern void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 2585b100ebbb..276b60262981 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); - return PTR_ERR_OR_ZERO(net->xt.broute_table); + return ebt_register_table(net, &broute_table, NULL, + &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 45a00dbdbcad..c41da5fac84f 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_filter[] = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); - return PTR_ERR_OR_ZERO(net->xt.frame_filter); + return ebt_register_table(net, &frame_filter, ebt_ops_filter, + &net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 57cd5bb154e7..08df7406ecb3 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_nat[] = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); - return PTR_ERR_OR_ZERO(net->xt.frame_nat); + return ebt_register_table(net, &frame_nat, ebt_ops_nat, + &net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83951f978445..3b3dcf719e07 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1169,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) kfree(table); } -struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table, - const struct nf_hook_ops *ops) +int ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops, struct ebt_table **res) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1183,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* Don't add one table to multiple lists. */ @@ -1252,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + WRITE_ONCE(*res, table); + if (!ops) - return table; + return 0; ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); - return ERR_PTR(ret); + *res = NULL; } - return table; + return ret; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1276,7 +1277,7 @@ free_newinfo: free_table: kfree(table); out: - return ERR_PTR(ret); + return ret; } void ebt_unregister_table(struct net *net, struct ebt_table *table, -- cgit v1.2.3-70-g09d2 From 04eae427406ef6af9b05bd631e235f4a509666b1 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 29 Sep 2017 08:25:01 -0500 Subject: RDMA/iwpm: Properly mark end of NL messages Commit 1a1c116f3dcf removes nlmsg_len calculation in ibnl_put_attr causing netlink messages to be rejected due to incorrect length. Add nlmsg_end after all attributes are appended to calculate the nlmsg_len. Fixes: 1a1c116f3dcf ("RDMA/netlink: Simplify the put_msg and put_attr") Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 8 ++++++++ drivers/infiniband/core/iwpm_util.c | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 30825bb9b8e9..8861c052155a 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -100,6 +100,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) if (ret) goto pid_query_error; + nlmsg_end(skb, nlh); + pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); @@ -170,6 +172,8 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR); if (ret) goto add_mapping_error; + + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); @@ -246,6 +250,8 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR); if (ret) goto query_mapping_error; + + nlmsg_end(skb, nlh); nlmsg_request->req_buffer = pm_msg; ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); @@ -308,6 +314,8 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) if (ret) goto remove_mapping_error; + nlmsg_end(skb, nlh); + ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index c81c55942626..3c4faadb8cdd 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -597,6 +597,9 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); if (ret) goto mapinfo_num_error; + + nlmsg_end(skb, nlh); + ret = rdma_nl_unicast(skb, iwpm_pid); if (ret) { skb = NULL; @@ -678,6 +681,8 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) if (ret) goto send_mapping_info_unlock; + nlmsg_end(skb, nlh); + iwpm_print_sockaddr(&map_info->local_sockaddr, "send_mapping_info: Local sockaddr:"); iwpm_print_sockaddr(&map_info->mapped_sockaddr, -- cgit v1.2.3-70-g09d2 From 9792bf5ad5e30b207274ccbb459a89eab6033b46 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 22 Sep 2017 22:00:29 -0700 Subject: clk: Export clk_bulk_prepare() Allow clk_bulk_prepare() to be referenced by kernel modules by adding the missing EXPORT_SYMBOL_GPL(). Fixes: 266e4e9d9150 ("clk: add clk_bulk_get accessories") Reported-by: Ulf Hansson Signed-off-by: Bjorn Andersson Reviewed-by: Ulf Hansson Signed-off-by: Stephen Boyd --- drivers/clk/clk-bulk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c index c834f5abfc49..4c10456f8a32 100644 --- a/drivers/clk/clk-bulk.c +++ b/drivers/clk/clk-bulk.c @@ -105,6 +105,7 @@ err: return ret; } +EXPORT_SYMBOL_GPL(clk_bulk_prepare); #endif /* CONFIG_HAVE_CLK_PREPARE */ -- cgit v1.2.3-70-g09d2 From fef0035c0f31322d417d1954bba5ab959bf91183 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 28 Sep 2017 00:41:44 +0200 Subject: netlink: do not proceed if dump's start() errs Drivers that use the start method for netlink dumping rely on dumpit not being called if start fails. For example, ila_xlat.c allocates memory and assigns it to cb->args[0] in its start() function. It might fail to do that and return -ENOMEM instead. However, even when returning an error, dumpit will be called, which, in the example above, quickly dereferences the memory in cb->args[0], which will OOPS the kernel. This is but one example of how this goes wrong. Since start() has always been a function with an int return type, it therefore makes sense to use it properly, rather than ignoring it. This patch thus returns early and does not call dumpit() when start() fails. Signed-off-by: Jason A. Donenfeld Cc: Johannes Berg Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 327807731b44..94c11cf0459d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2270,10 +2270,13 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + ret = 0; if (cb->start) - cb->start(cb); + ret = cb->start(cb); + + if (!ret) + ret = netlink_dump(sk); - ret = netlink_dump(sk); sock_put(sk); if (ret) -- cgit v1.2.3-70-g09d2 From 13ffe9a26df4e156363579b25c904dd0b1e31bfb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 13 Sep 2017 18:02:02 +0100 Subject: staging: iio: ade7759: fix signed extension bug on shift of a u8 The current shift of st->rx[2] left shifts a u8 24 bits left, promotes the integer to a an int and then to a unsigned u64. If the top bit of st->rx[2] is set then we end up with all the upper bits being set to 1. Fix this by casting st->rx[2] to a u64 before the 24 bit left shift. Detected by CoverityScan CID#144940 ("Unintended sign extension") Fixes: 2919fa54ef64 ("staging: iio: meter: new driver for ADE7759 devices") Signed-off-by: Colin Ian King Signed-off-by: Jonathan Cameron --- drivers/staging/iio/meter/ade7759.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/meter/ade7759.c b/drivers/staging/iio/meter/ade7759.c index 1691760339da..02573c517d9d 100644 --- a/drivers/staging/iio/meter/ade7759.c +++ b/drivers/staging/iio/meter/ade7759.c @@ -172,7 +172,7 @@ static int ade7759_spi_read_reg_40(struct device *dev, reg_address); goto error_ret; } - *val = ((u64)st->rx[1] << 32) | (st->rx[2] << 24) | + *val = ((u64)st->rx[1] << 32) | ((u64)st->rx[2] << 24) | (st->rx[3] << 16) | (st->rx[4] << 8) | st->rx[5]; error_ret: -- cgit v1.2.3-70-g09d2 From d51711c0557d6dbd26c63144aef32c7b3ec264b9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Sep 2017 13:23:31 +0800 Subject: ip_gre: ipgre_tap device should keep dst Without keeping dst, the tunnel will not update any mtu/pmtu info, since it does not have a dst on the skb. Reproducer: client(ipgre_tap1 - eth1) <-----> (eth1 - ipgre_tap1)server After reducing eth1's mtu on client, then perforamnce became 0. This patch is to netif_keep_dst in gre_tap_init, as ipgre does. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0162fb955b33..8b837f6f5532 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1223,6 +1223,7 @@ static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); return ip_tunnel_init(dev); } -- cgit v1.2.3-70-g09d2 From 2d40557cc702ed8e5edd9bd422233f86652d932e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Sep 2017 13:23:50 +0800 Subject: ip6_gre: ip6gre_tap device should keep dst The patch 'ip_gre: ipgre_tap device should keep dst' fixed a issue that ipgre_tap mtu couldn't be updated in tx path. The same fix is needed for ip6gre_tap as well. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 20f66f4c9460..1602b491b281 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1311,6 +1311,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], -- cgit v1.2.3-70-g09d2 From d41bb33ba33b8f8debe54ed36be6925eb496e354 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Sep 2017 13:24:07 +0800 Subject: ip6_tunnel: update mtu properly for ARPHRD_ETHER tunnel device in tx path Now when updating mtu in tx path, it doesn't consider ARPHRD_ETHER tunnel device, like ip6gre_tap tunnel, for which it should also subtract ether header to get the correct mtu. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f2f21c24915f..a1c24443cd9e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1043,6 +1043,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; + unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; bool use_cache = false; @@ -1124,7 +1125,7 @@ route_lookup: t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; + mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1133,7 +1134,7 @@ route_lookup: mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; -- cgit v1.2.3-70-g09d2 From 7487449c86c65202b3b725c4524cb48dd65e4e6f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:36 +0200 Subject: IPv4: early demux can return an error code Currently no error is emitted, but this infrastructure will used by the next patch to allow source address validation for mcast sockets. Since early demux can do a route lookup and an ipv4 route lookup can return an error code this is consistent with the current ipv4 route infrastructure. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/protocol.h | 4 ++-- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/ipv4/ip_input.c | 25 +++++++++++++++---------- net/ipv4/tcp_ipv4.c | 9 +++++---- net/ipv4/udp.c | 11 ++++++----- 6 files changed, 30 insertions(+), 23 deletions(-) diff --git a/include/net/protocol.h b/include/net/protocol.h index 65ba335b0e7e..4fc75f7ae23b 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,8 +39,8 @@ /* This is used to register protocols. */ struct net_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb); + int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, diff --git a/include/net/tcp.h b/include/net/tcp.h index 3bc910a9bfc6..89974c5286d8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -345,7 +345,7 @@ void tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); -void tcp_v4_early_demux(struct sk_buff *skb); +int tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); diff --git a/include/net/udp.h b/include/net/udp.h index 12dfbfe2e2d7..6c759c8594e2 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -259,7 +259,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); } -void udp_v4_early_demux(struct sk_buff *skb); +int udp_v4_early_demux(struct sk_buff *skb); bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fa2dc8f692c6..57fc13c6ab2b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -311,9 +311,10 @@ drop: static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - struct rtable *rt; + int (*edemux)(struct sk_buff *skb); struct net_device *dev = skb->dev; - void (*edemux)(struct sk_buff *skb); + struct rtable *rt; + int err; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - edemux(skb); + err = edemux(skb); + if (unlikely(err)) + goto drop_error; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } @@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); - if (unlikely(err)) { - if (err == -EXDEV) - __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); - goto drop; - } + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, dev); + if (unlikely(err)) + goto drop_error; } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) drop: kfree_skb(skb); return NET_RX_DROP; + +drop_error: + if (err == -EXDEV) + __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); + goto drop; } /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9416b5162bc..85164d4d3e53 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1503,23 +1503,23 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -void tcp_v4_early_demux(struct sk_buff *skb) +int tcp_v4_early_demux(struct sk_buff *skb) { const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; if (skb->pkt_type != PACKET_HOST) - return; + return 0; if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return; + return 0; iph = ip_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) - return; + return 0; sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, @@ -1538,6 +1538,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb_dst_set_noref(skb, dst); } } + return 0; } bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ef29df8648e4..9b30f821fe96 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2221,7 +2221,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, return NULL; } -void udp_v4_early_demux(struct sk_buff *skb) +int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -2234,7 +2234,7 @@ void udp_v4_early_demux(struct sk_buff *skb) /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) - return; + return 0; iph = ip_hdr(skb); uh = udp_hdr(skb); @@ -2244,14 +2244,14 @@ void udp_v4_early_demux(struct sk_buff *skb) struct in_device *in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) - return; + return 0; /* we are supposed to accept bcast packets */ if (skb->pkt_type == PACKET_MULTICAST) { ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, iph->protocol); if (!ours) - return; + return 0; } sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, @@ -2263,7 +2263,7 @@ void udp_v4_early_demux(struct sk_buff *skb) } if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) - return; + return 0; skb->sk = sk; skb->destructor = sock_efree; @@ -2278,6 +2278,7 @@ void udp_v4_early_demux(struct sk_buff *skb) */ skb_dst_set_noref(skb, dst); } + return 0; } int udp_rcv(struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From bc044e8db7962e727a75b591b9851ff2ac5cf846 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Sep 2017 15:51:37 +0200 Subject: udp: perform source validation for mcast early demux The UDP early demux can leverate the rx dst cache even for multicast unconnected sockets. In such scenario the ipv4 source address is validated only on the first packet in the given flow. After that, when we fetch the dst entry from the socket rx cache, we stop enforcing the rp_filter and we even start accepting any kind of martian addresses. Disabling the dst cache for unconnected multicast socket will cause large performace regression, nearly reducing by half the max ingress tput. Instead we factor out a route helper to completely validate an skb source address for multicast packets and we call it from the UDP early demux for mcast packets landing on unconnected sockets, after successful fetching the related cached dst entry. This still gives a measurable, but limited performance regression: rp_filter = 0 rp_filter = 1 edmux disabled: 1182 Kpps 1127 Kpps edmux before: 2238 Kpps 2238 Kpps edmux after: 2037 Kpps 2019 Kpps The above figures are on top of current net tree. Applying the net-next commit 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") the delta with rp_filter == 0 will decrease even more. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/route.h | 4 +++- net/ipv4/route.c | 46 ++++++++++++++++++++++++++-------------------- net/ipv4/udp.c | 13 ++++++++++++- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 57dfc6850d37..d538e6db1afe 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -175,7 +175,9 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 fl4->fl4_gre_key = gre_key; return ip_route_output_key(net, fl4); } - +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag); int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin); int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 94d4cd2d5ea4..ac6fde5d45f1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev, EXPORT_SYMBOL(rt_dst_alloc); /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag) { - struct rtable *rth; - struct in_device *in_dev = __in_dev_get_rcu(dev); - unsigned int flags = RTCF_MULTICAST; - u32 itag = 0; int err; /* Primary sanity checks. */ - if (!in_dev) return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || skb->protocol != htons(ETH_P_IP)) - goto e_inval; + return -EINVAL; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) - goto e_inval; + return -EINVAL; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) - goto e_inval; + return -EINVAL; } else { err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, &itag); + in_dev, itag); if (err < 0) - goto e_err; + return err; } + return 0; +} + +/* called in rcu_read_lock() section */ +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, int our) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + unsigned int flags = RTCF_MULTICAST; + struct rtable *rth; + u32 itag = 0; + int err; + + err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); + if (err) + return err; + if (our) flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) - goto e_nobufs; + return -ENOBUFS; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; @@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb_dst_set(skb, &rth->dst); return 0; - -e_nobufs: - return -ENOBUFS; -e_inval: - return -EINVAL; -e_err: - return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9b30f821fe96..5676237d2b0f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2224,6 +2224,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct in_device *in_dev = NULL; const struct iphdr *iph; const struct udphdr *uh; struct sock *sk = NULL; @@ -2241,7 +2242,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; @@ -2272,11 +2273,21 @@ int udp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst) { + u32 itag = 0; + /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); + + /* for unconnected multicast sockets we need to validate + * the source on each packet + */ + if (!inet_sk(sk)->inet_daddr && in_dev) + return ip_mc_validate_source(skb, iph->daddr, + iph->saddr, iph->tos, + skb->dev, in_dev, &itag); } return 0; } -- cgit v1.2.3-70-g09d2 From 5a59a3a0ef0e546626a762d49dc06feaa204bab3 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 28 Sep 2017 17:57:58 +0200 Subject: ppp: fix __percpu annotation Move sparse annotation right after pointer type. Fixes sparse warning: drivers/net/ppp/ppp_generic.c:1422:13: warning: incorrect type in initializer (different address spaces) drivers/net/ppp/ppp_generic.c:1422:13: expected void const [noderef] *__vpp_verify drivers/net/ppp/ppp_generic.c:1422:13: got int * ... Fixes: e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index a404552555d4..c3f77e3b7819 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -120,7 +120,7 @@ struct ppp { int n_channels; /* how many channels are attached 54 */ spinlock_t rlock; /* lock for receive side 58 */ spinlock_t wlock; /* lock for transmit side 5c */ - int *xmit_recursion __percpu; /* xmit recursion detect */ + int __percpu *xmit_recursion; /* xmit recursion detect */ int mru; /* max receive unit 60 */ unsigned int flags; /* control bits 64 */ unsigned int xstate; /* transmit state bits 68 */ -- cgit v1.2.3-70-g09d2 From aad06212d36cf34859428a0a279e5c14ee5c9e26 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Fri, 29 Sep 2017 10:02:54 +0200 Subject: tipc: use only positive error codes in messages In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"), we have updated the function tipc_msg_lookup_dest() to set the error codes to negative values at destination lookup failures. Thus when the function sets the error code to -TIPC_ERR_NO_NAME, its inserted into the 4 bit error field of the message header as 0xf instead of TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code. In this commit, we set only positive error code. Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()") Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 6ef379f004ac..121e59a1d0e7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return false; if (msg_errcode(msg)) return false; - *err = -TIPC_ERR_NO_NAME; + *err = TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; msg = buf_msg(skb); -- cgit v1.2.3-70-g09d2 From 007a61ae2f35c7fcf767313285c4924e81f11983 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 28 Sep 2017 21:33:23 +0200 Subject: nvme: fix visibility of "uuid" ns attribute "uuid" must be invisible if both ns->uuid and ns->nguid are unset, not if either one is. Fixes: d934f9848a77 "nvme: provide UUID value to userspace" Signed-off-by: Martin Wilck Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb2aad078637..5a14cc7f28ee 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2136,7 +2136,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, struct nvme_ns *ns = nvme_get_ns_from_dev(dev); if (a == &dev_attr_uuid.attr) { - if (uuid_is_null(&ns->uuid) || + if (uuid_is_null(&ns->uuid) && !memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) return 0; } -- cgit v1.2.3-70-g09d2 From be94a6f6d488b4767662e8949dc62361bd1d6311 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 29 Sep 2017 15:24:05 +0200 Subject: iio: dummy: events: Add missing break Add missing break in iio_simple_dummy_write_event_config() for the voltage threshold event enable attribute. Without this writing to the in_voltage0_thresh_rising_en always returns -EINVAL even though the change was correctly applied. Fixes: 3e34e650db197 ("iio: dummy: Demonstrate the usage of new channel types") Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dummy/iio_simple_dummy_events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/dummy/iio_simple_dummy_events.c b/drivers/iio/dummy/iio_simple_dummy_events.c index ed63ffd849f8..7ec2a0bb0807 100644 --- a/drivers/iio/dummy/iio_simple_dummy_events.c +++ b/drivers/iio/dummy/iio_simple_dummy_events.c @@ -72,6 +72,7 @@ int iio_simple_dummy_write_event_config(struct iio_dev *indio_dev, st->event_en = state; else return -EINVAL; + break; default: return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 74007ae6316ebe40260e44f8ab558f9b1ccc04e5 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sat, 23 Sep 2017 08:44:15 +0200 Subject: hwmon: (xgene) Fix up error handling path mixup in 'xgene_hwmon_probe()' Commit 2ca492e22cb7 has moved the call to 'kfifo_alloc()' from after the main 'if' statement to before it. But it has not updated the error handling paths accordingly. Fix all that: - if 'kfifo_alloc()' fails we can return directly - direct returns after 'kfifo_alloc()' must now go to 'out_mbox_free' - 'goto out_mbox_free' must be replaced by 'goto out', otherwise the '[pcc_]mbox_free_channel()' call will be missed. Fixes: 2ca492e22cb7 ("hwmon: (xgene) Fix crash when alarm occurs before driver probe") Signed-off-by: Christophe JAILLET Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 9c0dbb8191ad..e1be61095532 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -630,7 +630,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE, GFP_KERNEL); if (rc) - goto out_mbox_free; + return -ENOMEM; INIT_WORK(&ctx->workq, xgene_hwmon_evt_work); @@ -646,7 +646,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (IS_ERR(ctx->mbox_chan)) { dev_err(&pdev->dev, "SLIMpro mailbox channel request failed\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } } else { struct acpi_pcct_hw_reduced *cppc_ss; @@ -654,7 +655,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (device_property_read_u32(&pdev->dev, "pcc-channel", &ctx->mbox_idx)) { dev_err(&pdev->dev, "no pcc-channel property\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } cl->rx_callback = xgene_hwmon_pcc_rx_cb; @@ -662,7 +664,8 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (IS_ERR(ctx->mbox_chan)) { dev_err(&pdev->dev, "PPC channel request failed\n"); - return -ENODEV; + rc = -ENODEV; + goto out_mbox_free; } /* @@ -675,13 +678,13 @@ static int xgene_hwmon_probe(struct platform_device *pdev) if (!cppc_ss) { dev_err(&pdev->dev, "PPC subspace not found\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } if (!ctx->mbox_chan->mbox->txdone_irq) { dev_err(&pdev->dev, "PCC IRQ not supported\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } /* @@ -696,14 +699,14 @@ static int xgene_hwmon_probe(struct platform_device *pdev) } else { dev_err(&pdev->dev, "Failed to get PCC comm region\n"); rc = -ENODEV; - goto out_mbox_free; + goto out; } if (!ctx->pcc_comm_addr) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; - goto out_mbox_free; + goto out; } /* -- cgit v1.2.3-70-g09d2 From 35c036ef4a722e953e17884f4f4325f78eeab475 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Sep 2017 12:42:13 -0400 Subject: nfs: RPC_MAX_AUTH_SIZE is in bytes The units of RPC_MAX_AUTH_SIZE is bytes, not 4-byte words. This causes the client to request a larger-than-necessary session replay slot size. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 37c8af003275..14ed9791ec9c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1842,8 +1842,8 @@ static void encode_create_session(struct xdr_stream *xdr, * Assumes OPEN is the biggest non-idempotent compound. * 2 is the verifier. */ - max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + - RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; + max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + 2) + * XDR_UNIT + RPC_MAX_AUTH_SIZE; encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12); -- cgit v1.2.3-70-g09d2 From cdb2e53fd6dc715c5b45d0967fcb6dc574cb28f8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Sep 2017 00:53:46 +0300 Subject: NFS: Cleanup error handling in nfs_idmap_request_key() nfs_idmap_get_desc() can't actually return zero. But if it did then we would return ERR_PTR(0) which is NULL and the caller, nfs_idmap_get_key(), doesn't expect that so it leads to a NULL pointer dereference. I've cleaned this up by changing the "<=" to "<" so it's more clear that we don't return ERR_PTR(0). Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/nfs4idmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index dd5d27da8c0c..30426c1a1bbd 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -274,7 +274,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, ssize_t ret; ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); - if (ret <= 0) + if (ret < 0) return ERR_PTR(ret); rkey = request_key(&key_type_id_resolver, desc, ""); -- cgit v1.2.3-70-g09d2 From 68ebf8fe3bce8c167cf83fbd681c1eb1ed419c6c Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 22 Sep 2017 07:57:10 -0400 Subject: NFS: Fix uninitialized rpc_wait_queue Michael Sterrett reports a NULL pointer dereference on NFSv3 mounts when CONFIG_NFS_V4 is not set because the NFS UOC rpc_wait_queue has not been initialized. Move the initialization of the queue out of the CONFIG_NFS_V4 conditional setion. Fixes: 7d6ddf88c4db ("NFS: Add an iocounter wait function for async RPC tasks") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index efebe6cf4378..22880ef6d8dd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -218,7 +218,6 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) static void pnfs_init_server(struct nfs_server *server) { rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); - rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); } #else @@ -888,6 +887,7 @@ struct nfs_server *nfs_alloc_server(void) ida_init(&server->openowner_id); ida_init(&server->lockowner_id); pnfs_init_server(server); + rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC"); return server; } -- cgit v1.2.3-70-g09d2 From d099b8af46f5e1e37182eff988f9373dcc2b0128 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 12:21:14 +0100 Subject: sunrpc: remove redundant initialization of sock sock is being initialized and then being almost immediately updated hence the initialized value is not being used and is redundant. Remove the initialization. Cleans up clang warning: warning: Value stored to 'sock' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9b5de31aa429..c1841f234a71 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2203,7 +2203,7 @@ static void xs_udp_setup_socket(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; + struct socket *sock; int status = -EIO; sock = xs_create_sock(xprt, transport, -- cgit v1.2.3-70-g09d2 From 0a47df11bfc31e1ceae7f91cea84d3bff500475d Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 29 Sep 2017 09:36:43 -0400 Subject: nfs/filelayout: fix oops when freeing filelayout segment Check for a NULL dsaddr in filelayout_free_lseg() before calling nfs4_fl_put_deviceid(). This fixes the following oops: [ 1967.645207] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [ 1967.646010] IP: [] nfs4_put_deviceid_node+0xa/0x90 [nfsv4] [ 1967.646010] PGD c08bc067 PUD 915d3067 PMD 0 [ 1967.753036] Oops: 0000 [#1] SMP [ 1967.753036] Modules linked in: nfs_layout_nfsv41_files ext4 mbcache jbd2 loop rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache amd64_edac_mod ipmi_ssif edac_mce_amd edac_core kvm_amd sg kvm ipmi_si ipmi_devintf irqbypass pcspkr k8temp ipmi_msghandler i2c_piix4 shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_common amdkfd amd_iommu_v2 radeon i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops mptsas ttm scsi_transport_sas mptscsih drm mptbase serio_raw i2c_core bnx2 dm_mirror dm_region_hash dm_log dm_mod [ 1967.790031] CPU: 2 PID: 1370 Comm: ls Not tainted 3.10.0-709.el7.test.bz1463784.x86_64 #1 [ 1967.790031] Hardware name: IBM BladeCenter LS21 -[7971AC1]-/Server Blade, BIOS -[BAE155AUS-1.10]- 06/03/2009 [ 1967.790031] task: ffff8800c42a3f40 ti: ffff8800c4064000 task.ti: ffff8800c4064000 [ 1967.790031] RIP: 0010:[] [] nfs4_put_deviceid_node+0xa/0x90 [nfsv4] [ 1967.790031] RSP: 0000:ffff8800c4067978 EFLAGS: 00010246 [ 1967.790031] RAX: ffffffffc062f000 RBX: ffff8801d468a540 RCX: dead000000000200 [ 1967.790031] RDX: ffff8800c40679f8 RSI: ffff8800c4067a0c RDI: 0000000000000000 [ 1967.790031] RBP: ffff8800c4067980 R08: ffff8801d468a540 R09: 0000000000000000 [ 1967.790031] R10: 0000000000000000 R11: ffffffffffffffff R12: ffff8801d468a540 [ 1967.790031] R13: ffff8800c40679f8 R14: ffff8801d5645300 R15: ffff880126f15ff0 [ 1967.790031] FS: 00007f11053c9800(0000) GS:ffff88012bd00000(0000) knlGS:0000000000000000 [ 1967.790031] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1967.790031] CR2: 0000000000000030 CR3: 0000000094b55000 CR4: 00000000000007e0 [ 1967.790031] Stack: [ 1967.790031] ffff8801d468a540 ffff8800c4067990 ffffffffc062d2fe ffff8800c40679b0 [ 1967.790031] ffffffffc062b5b4 ffff8800c40679f8 ffff8801d468a540 ffff8800c40679d8 [ 1967.790031] ffffffffc06d39af ffff8800c40679f8 ffff880126f16078 0000000000000001 [ 1967.790031] Call Trace: [ 1967.790031] [] nfs4_fl_put_deviceid+0xe/0x10 [nfs_layout_nfsv41_files] [ 1967.790031] [] filelayout_free_lseg+0x24/0x90 [nfs_layout_nfsv41_files] [ 1967.790031] [] pnfs_free_lseg_list+0x5f/0x80 [nfsv4] [ 1967.790031] [] _pnfs_return_layout+0x157/0x270 [nfsv4] [ 1967.790031] [] nfs4_evict_inode+0x4d/0x70 [nfsv4] [ 1967.790031] [] evict+0xa9/0x180 [ 1967.790031] [] iput+0xf9/0x190 [ 1967.790031] [] nfs_dentry_iput+0x3a/0x50 [nfs] [ 1967.790031] [] shrink_dentry_list+0x20f/0x490 [ 1967.790031] [] d_invalidate+0xd8/0x150 [ 1967.790031] [] nfs_readdir_page_filler+0x40b/0x600 [nfs] [ 1967.790031] [] nfs_readdir_xdr_to_array+0x20d/0x3b0 [nfs] [ 1967.790031] [] ? __mem_cgroup_commit_charge+0xe2/0x2f0 [ 1967.790031] [] ? __add_to_page_cache_locked+0x48/0x170 [ 1967.790031] [] ? nfs_readdir_xdr_to_array+0x3b0/0x3b0 [nfs] [ 1967.790031] [] nfs_readdir_filler+0x22/0x90 [nfs] [ 1967.790031] [] do_read_cache_page+0x7f/0x190 [ 1967.790031] [] ? fillonedir+0xe0/0xe0 [ 1967.790031] [] read_cache_page+0x1c/0x30 [ 1967.790031] [] nfs_readdir+0x1ab/0x6b0 [nfs] [ 1967.790031] [] ? nfs4_xdr_dec_layoutget+0x270/0x270 [nfsv4] [ 1967.790031] [] ? fillonedir+0xe0/0xe0 [ 1967.790031] [] vfs_readdir+0xb0/0xe0 [ 1967.790031] [] SyS_getdents+0x95/0x120 [ 1967.790031] [] system_call_fastpath+0x16/0x1b [ 1967.790031] Code: 90 31 d2 48 89 d0 5d c3 85 f6 74 f5 8d 4e 01 89 f0 f0 0f b1 0f 39 f0 74 e2 89 c6 eb eb 0f 1f 40 00 66 66 66 66 90 55 48 89 e5 53 <48> 8b 47 30 48 89 fb a8 04 74 3b 8b 57 60 83 fa 02 74 19 8d 4a [ 1967.790031] RIP [] nfs4_put_deviceid_node+0xa/0x90 [nfsv4] [ 1967.790031] RSP [ 1967.790031] CR2: 0000000000000030 Signed-off-by: Scott Mayhew Fixes: 1ebf98012792 ("NFS/filelayout: Fix racy setting of fl->dsaddr...") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 44c638b7876c..508126eb49f9 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -745,7 +745,8 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); dprintk("--> %s\n", __func__); - nfs4_fl_put_deviceid(fl->dsaddr); + if (fl->dsaddr != NULL) + nfs4_fl_put_deviceid(fl->dsaddr); /* This assumes a single RW lseg */ if (lseg->pls_range.iomode == IOMODE_RW) { struct nfs4_filelayout *flo; -- cgit v1.2.3-70-g09d2 From 935a9749a36828af0e8be224a5cd4bc758112c34 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2017 22:00:53 +0800 Subject: ip_gre: get key from session_id correctly in erspan_rcv erspan only uses the first 10 bits of session_id as the key to look up the tunnel. But in erspan_rcv, it missed 'session_id & ID_MASK' when getting the key from session_id. If any other flag is also set in session_id in a packet, it would fail to find the tunnel due to incorrect key in erspan_rcv. This patch is to add 'session_id & ID_MASK' there and also remove the unnecessary variable session_id. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8b837f6f5532..b25b1e5112d0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, struct ip_tunnel *tunnel; struct erspanhdr *ershdr; const struct iphdr *iph; - __be32 session_id; __be32 index; int len; @@ -275,8 +274,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, /* The original GRE header does not have key field, * Use ERSPAN 10-bit session ID as key. */ - session_id = cpu_to_be32(ntohs(ershdr->session_id)); - tpi->key = session_id; + tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); index = ershdr->md.index; tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, -- cgit v1.2.3-70-g09d2 From 5513d08d29511c263c00933c00dd7a82fffda3c9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2017 22:00:54 +0800 Subject: ip_gre: check packet length and mtu correctly in erspan_xmit As a ARPHRD_ETHER device, skb->len in erspan_xmit is the length of the whole ether packet. So before checking if a packet size exceeds the mtu, skb->len should subtract dev->hard_header_len. Otherwise, all packets with max size according to mtu would be trimmed to be truncated packet. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b25b1e5112d0..2a4ef9dc48ff 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,7 +731,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; - if (skb->len > dev->mtu) { + if (skb->len - dev->hard_header_len > dev->mtu) { pskb_trim(skb, dev->mtu); truncate = true; } -- cgit v1.2.3-70-g09d2 From c122fda271717f4fc618e0a31e833941fd5f1efd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2017 22:00:55 +0800 Subject: ip_gre: set tunnel hlen properly in erspan_tunnel_init According to __gre_tunnel_init, tunnel->hlen should be set as the headers' length between inner packet and outer iphdr. It would be used especially to calculate a proper mtu when updating mtu in tnl_update_pmtu. Now without setting it, a bigger mtu value than expected would be updated, which hurts performance a lot. This patch is to fix it by setting tunnel->hlen with: tunnel->tun_hlen + tunnel->encap_hlen + sizeof(struct erspanhdr) Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a4ef9dc48ff..fad0bb1e3e9a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1245,7 +1245,9 @@ static int erspan_tunnel_init(struct net_device *dev) tunnel->tun_hlen = 8; tunnel->parms.iph.protocol = IPPROTO_GRE; - t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr); + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + sizeof(struct erspanhdr); + t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; dev->mtu = ETH_DATA_LEN - t_hlen - 4; -- cgit v1.2.3-70-g09d2 From c84bed440e4e11a973e8c0254d0dfaccfca41fb0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 1 Oct 2017 22:00:56 +0800 Subject: ip_gre: erspan device should keep dst The patch 'ip_gre: ipgre_tap device should keep dst' fixed the issue ipgre_tap dev mtu couldn't be updated in tx path. The same fix is needed for erspan as well. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fad0bb1e3e9a..467e44d7587d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1254,6 +1254,7 @@ static int erspan_tunnel_init(struct net_device *dev) dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); return ip_tunnel_init(dev); } -- cgit v1.2.3-70-g09d2 From 9f775ead5e570e7e19015b9e4e2f3dd6e71a5935 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 28 Sep 2017 15:44:38 +0200 Subject: l2tp: fix l2tp_eth module loading The l2tp_eth module crashes if its netlink callbacks are run when the pernet data aren't initialised. We should normally register_pernet_device() before the genl callbacks. However, the pernet data only maintain a list of l2tpeth interfaces, and this list is never used. So let's just drop pernet handling instead. Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 51 ++------------------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 87da9ef61860..014a7bc2a872 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -44,7 +44,6 @@ struct l2tp_eth { struct net_device *dev; struct sock *tunnel_sock; struct l2tp_session *session; - struct list_head list; atomic_long_t tx_bytes; atomic_long_t tx_packets; atomic_long_t tx_dropped; @@ -58,17 +57,6 @@ struct l2tp_eth_sess { struct net_device *dev; }; -/* per-net private data for this module */ -static unsigned int l2tp_eth_net_id; -struct l2tp_eth_net { - struct list_head l2tp_eth_dev_list; - spinlock_t l2tp_eth_lock; -}; - -static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) -{ - return net_generic(net, l2tp_eth_net_id); -} static int l2tp_eth_dev_init(struct net_device *dev) { @@ -84,12 +72,6 @@ static int l2tp_eth_dev_init(struct net_device *dev) static void l2tp_eth_dev_uninit(struct net_device *dev) { - struct l2tp_eth *priv = netdev_priv(dev); - struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev)); - - spin_lock(&pn->l2tp_eth_lock); - list_del_init(&priv->list); - spin_unlock(&pn->l2tp_eth_lock); dev_put(dev); } @@ -273,7 +255,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, struct l2tp_eth *priv; struct l2tp_eth_sess *spriv; int rc; - struct l2tp_eth_net *pn; if (cfg->ifname) { strlcpy(name, cfg->ifname, IFNAMSIZ); @@ -305,7 +286,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, priv = netdev_priv(dev); priv->dev = dev; priv->session = session; - INIT_LIST_HEAD(&priv->list); priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; @@ -326,10 +306,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, strlcpy(session->ifname, dev->name, IFNAMSIZ); dev_hold(dev); - pn = l2tp_eth_pernet(dev_net(dev)); - spin_lock(&pn->l2tp_eth_lock); - list_add(&priv->list, &pn->l2tp_eth_dev_list); - spin_unlock(&pn->l2tp_eth_lock); return 0; @@ -342,22 +318,6 @@ out: return rc; } -static __net_init int l2tp_eth_init_net(struct net *net) -{ - struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id); - - INIT_LIST_HEAD(&pn->l2tp_eth_dev_list); - spin_lock_init(&pn->l2tp_eth_lock); - - return 0; -} - -static struct pernet_operations l2tp_eth_net_ops = { - .init = l2tp_eth_init_net, - .id = &l2tp_eth_net_id, - .size = sizeof(struct l2tp_eth_net), -}; - static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = { .session_create = l2tp_eth_create, @@ -371,25 +331,18 @@ static int __init l2tp_eth_init(void) err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops); if (err) - goto out; - - err = register_pernet_device(&l2tp_eth_net_ops); - if (err) - goto out_unreg; + goto err; pr_info("L2TP ethernet pseudowire support (L2TPv3)\n"); return 0; -out_unreg: - l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); -out: +err: return err; } static void __exit l2tp_eth_exit(void) { - unregister_pernet_device(&l2tp_eth_net_ops); l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); } -- cgit v1.2.3-70-g09d2 From 90841047a01b452cc8c3f9b990698b264143334a Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Thu, 28 Sep 2017 11:35:00 -0700 Subject: r8152: add Linksys USB3GIGV1 id This linksys dongle by default comes up in cdc_ether mode. This patch allows r8152 to claim the device: Bus 002 Device 002: ID 13b1:0041 Linksys Signed-off-by: Grant Grundler Reviewed-by: Douglas Anderson Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 10 ++++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8ab281b478f2..677a85360db1 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -547,6 +547,7 @@ static const struct driver_info wwan_info = { #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 #define LENOVO_VENDOR_ID 0x17ef +#define LINKSYS_VENDOR_ID 0x13b1 #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e @@ -737,6 +738,15 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +#if IS_ENABLED(CONFIG_USB_RTL8152) +/* Linksys USB3GIGV1 Ethernet Adapter */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LINKSYS_VENDOR_ID, 0x0041, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, +#endif + /* ThinkPad USB-C Dock (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ceb78e2ea4f0..941ece08ba78 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -613,6 +613,7 @@ enum rtl8152_flags { #define VENDOR_ID_MICROSOFT 0x045e #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef +#define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 #define MCU_TYPE_PLA 0x0100 @@ -5316,6 +5317,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {} }; -- cgit v1.2.3-70-g09d2 From 4792ea04bcd03b8ccfd1ae336c5deba52dd9edc9 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 29 Sep 2017 14:27:39 +0200 Subject: net: mvpp2: Fix clock resource by adding an optional bus clock On Armada 7K/8K we need to explicitly enable the bus clock. The bus clock is optional because not all the SoCs need them but at least for Armada 7K/8K it is actually mandatory. The binding documentation is updating accordingly. Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/marvell-pp2.txt | 10 ++++++---- drivers/net/ethernet/marvell/mvpp2.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt index 7e2dad08a12e..1814fa13f6ab 100644 --- a/Documentation/devicetree/bindings/net/marvell-pp2.txt +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -21,8 +21,9 @@ Required properties: - main controller clock (for both armada-375-pp2 and armada-7k-pp2) - GOP clock (for both armada-375-pp2 and armada-7k-pp2) - MG clock (only for armada-7k-pp2) -- clock-names: names of used clocks, must be "pp_clk", "gop_clk" and - "mg_clk" (the latter only for armada-7k-pp2). + - AXI clock (only for armada-7k-pp2) +- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk" + and "axi_clk" (the 2 latter only for armada-7k-pp2). The ethernet ports are represented by subnodes. At least one port is required. @@ -78,8 +79,9 @@ Example for marvell,armada-7k-pp2: cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, <&cpm_syscon0 1 5>; - clock-names = "pp_clk", "gop_clk", "gp_clk"; + clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, + <&cpm_syscon0 1 5>, <&cpm_syscon0 1 18>; + clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk"; eth0: eth0 { interrupts = , diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 161055564720..9c86cb7cb988 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -793,6 +793,7 @@ struct mvpp2 { struct clk *pp_clk; struct clk *gop_clk; struct clk *mg_clk; + struct clk *axi_clk; /* List of pointers to port structures */ struct mvpp2_port **port_list; @@ -7970,6 +7971,18 @@ static int mvpp2_probe(struct platform_device *pdev) err = clk_prepare_enable(priv->mg_clk); if (err < 0) goto err_gop_clk; + + priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk"); + if (IS_ERR(priv->axi_clk)) { + err = PTR_ERR(priv->axi_clk); + if (err == -EPROBE_DEFER) + goto err_gop_clk; + priv->axi_clk = NULL; + } else { + err = clk_prepare_enable(priv->axi_clk); + if (err < 0) + goto err_gop_clk; + } } /* Get system's tclk rate */ @@ -8024,6 +8037,7 @@ static int mvpp2_probe(struct platform_device *pdev) return 0; err_mg_clk: + clk_disable_unprepare(priv->axi_clk); if (priv->hw_version == MVPP22) clk_disable_unprepare(priv->mg_clk); err_gop_clk: @@ -8061,6 +8075,7 @@ static int mvpp2_remove(struct platform_device *pdev) aggr_txq->descs_dma); } + clk_disable_unprepare(priv->axi_clk); clk_disable_unprepare(priv->mg_clk); clk_disable_unprepare(priv->pp_clk); clk_disable_unprepare(priv->gop_clk); -- cgit v1.2.3-70-g09d2 From 81359617f1b783a01e6e22b46cbb046e9513b9c6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 30 Sep 2017 07:34:34 +0200 Subject: net: hns3: Fix an error handling path in 'hclge_rss_init_hw()' If this sanity check fails, we must free 'rss_indir'. Otherwise there is a memory leak. 'goto err' as done in the other error handling paths to fix it. Fixes: 46a3df9f9718 ("net: hns3: Fix for setting rss_size incorrectly") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e0685e630afe..c1cdbfd83bdb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2652,7 +2652,8 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev) dev_err(&hdev->pdev->dev, "Configure rss tc size failed, invalid TC_SIZE = %d\n", rss_size); - return -EINVAL; + ret = -EINVAL; + goto err; } roundup_size = roundup_pow_of_two(rss_size); -- cgit v1.2.3-70-g09d2 From fb458864d9a78cc433fec7979acbe4078c82d7a8 Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Tue, 26 Sep 2017 09:03:40 +0900 Subject: mmc: core: add driver strength selection when selecting hs400es The driver strength selection is missed and required when selecting hs400es. So, It is added here. Fixes: 81ac2af65793ecf ("mmc: core: implement enhanced strobe support") Cc: stable@vger.kernel.org Signed-off-by: Hankyung Yu Signed-off-by: Chanho Min Reviewed-by: Adrian Hunter Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index a7eb623f8daa..36217ad5e9b1 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1286,6 +1286,23 @@ out_err: return err; } +static void mmc_select_driver_type(struct mmc_card *card) +{ + int card_drv_type, drive_strength, drv_type; + + card_drv_type = card->ext_csd.raw_driver_strength | + mmc_driver_type_mask(0); + + drive_strength = mmc_select_drive_strength(card, + card->ext_csd.hs200_max_dtr, + card_drv_type, &drv_type); + + card->drive_strength = drive_strength; + + if (drv_type) + mmc_set_driver_type(card->host, drv_type); +} + static int mmc_select_hs400es(struct mmc_card *card) { struct mmc_host *host = card->host; @@ -1341,6 +1358,8 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + mmc_select_driver_type(card); + /* Switch card to HS400 */ val = EXT_CSD_TIMING_HS400 | card->drive_strength << EXT_CSD_DRV_STR_SHIFT; @@ -1374,23 +1393,6 @@ out_err: return err; } -static void mmc_select_driver_type(struct mmc_card *card) -{ - int card_drv_type, drive_strength, drv_type; - - card_drv_type = card->ext_csd.raw_driver_strength | - mmc_driver_type_mask(0); - - drive_strength = mmc_select_drive_strength(card, - card->ext_csd.hs200_max_dtr, - card_drv_type, &drv_type); - - card->drive_strength = drive_strength; - - if (drv_type) - mmc_set_driver_type(card->host, drv_type); -} - /* * For device supporting HS200 mode, the following sequence * should be done before executing the tuning process. -- cgit v1.2.3-70-g09d2 From 2a5e597c6bb1b873e473e5f57147e9e5d2755430 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 18 Sep 2017 09:27:42 -0700 Subject: HID: wacom: Always increment hdev refcount within wacom_get_hdev_data The wacom_get_hdev_data function is used to find and return a reference to the "other half" of a Wacom device (i.e., the touch device associated with a pen, or vice-versa). To ensure these references are properly accounted for, the function is supposed to automatically increment the refcount before returning. This was not done, however, for devices which have pen & touch on different interfaces of the same USB device. This can lead to a WARNING ("refcount_t: underflow; use-after-free") when removing the module or device as we call kref_put() more times than kref_get(). Triggering an "actual" use- after-free would be difficult since both devices will disappear nearly- simultaneously. To silence this warning and prevent the potential error, we need to increment the refcount for all cases within wacom_get_hdev_data. Fixes: 41372d5d40 ("HID: wacom: Augment 'oVid' and 'oPid' with heuristics for HID_GENERIC") Cc: # v4.9+ Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 735bfbbcaa82..906e654fb0ba 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -668,8 +668,10 @@ static struct wacom_hdev_data *wacom_get_hdev_data(struct hid_device *hdev) /* Try to find an already-probed interface from the same device */ list_for_each_entry(data, &wacom_udev_list, list) { - if (compare_device_paths(hdev, data->dev, '/')) + if (compare_device_paths(hdev, data->dev, '/')) { + kref_get(&data->kref); return data; + } } /* Fallback to finding devices that appear to be "siblings" */ -- cgit v1.2.3-70-g09d2 From 814b6d17487fd970f293ee674c90ba267f82415d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 19 Sep 2017 18:37:46 -0700 Subject: HID: hidraw: fix power sequence when closing device We should not try to bring HID device out of full power state before calling hid_hw_close(), so that transport driver operates on powered up device (making this inverse of the opening sequence). Signed-off-by: Dmitry Torokhov Reviewed-by: Guenter Roeck Reviewed-by: Benson Leung Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index ec530454e6f6..5fbe0f81ab2e 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -337,8 +337,8 @@ static void drop_ref(struct hidraw *hidraw, int exists_bit) kfree(hidraw); } else { /* close device for last reader */ - hid_hw_power(hidraw->hid, PM_HINT_NORMAL); hid_hw_close(hidraw->hid); + hid_hw_power(hidraw->hid, PM_HINT_NORMAL); } } } -- cgit v1.2.3-70-g09d2 From 66dcdafe8e251a3edc5d84cf725835567bd3dd35 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 2 Oct 2017 15:50:34 +0800 Subject: Revert "HID: multitouch: Support ALPS PTP stick with pid 0x120A" This reverts commit fcaa4a07d2a4b541e91da7a55d8b3331f96d1865. As noted by Masaki [1], 0x120A + trackpoint will not be used in mass production machines, so remove the ID accordingly. [1] http://www.spinics.net/lists/linux-input/msg53222.html Signed-off-by: Kai-Heng Feng Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 - drivers/hid/hid-multitouch.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 832897d4a849..a98919199858 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -75,7 +75,6 @@ #define USB_VENDOR_ID_ALPS_JP 0x044E #define HID_DEVICE_ID_ALPS_U1_DUAL 0x120B -#define HID_DEVICE_ID_ALPS_U1_PTP_2 0x120A #define HID_DEVICE_ID_ALPS_U1_DUAL_PTP 0x121F #define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index c78625dceced..9e8c4d2ba11d 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1419,10 +1419,6 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) }, - { .driver_data = MT_CLS_WIN_8_DUAL, - HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, - USB_VENDOR_ID_ALPS_JP, - HID_DEVICE_ID_ALPS_U1_PTP_2) }, /* Lenovo X1 TAB Gen 2 */ { .driver_data = MT_CLS_WIN_8_DUAL, -- cgit v1.2.3-70-g09d2 From 51db452df07bb4c5754b73789253ba21681d9dc2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 26 Sep 2017 09:11:49 +0900 Subject: Revert "ALSA: echoaudio: purge contradictions between dimension matrix members and total number of members" This reverts commit 275353bb684e to fix a regression which can abort 'alsactl' program in alsa-utils due to assertion in alsa-lib. alsactl: control.c:2513: snd_ctl_elem_value_get_integer: Assertion `idx < sizeof(obj->value.integer.value) / sizeof(obj->value.integer.value[0])' failed. alsactl: control.c:2976: snd_ctl_elem_value_get_integer: Assertion `idx < ARRAY_SIZE(obj->value.integer.value)' failed. This commit is a band-aid. In a point of usage of ALSA control interface, the drivers still bring an issue that they prevent userspace applications to have a consistent way to parse each levels of the dimension information via ALSA control interface. Let me investigate this issue. Current implementation of the drivers have three control element sets with dimension information: * 'Monitor Mixer Volume' (type: integer) * 'VMixer Volume' (type: integer) * 'VU-meters' (type: boolean) Although the number of elements named as 'Monitor Mixer Volume' differs depending on drivers in this group, it can be calculated by macros defined by each driver (= (BX_NUM - BX_ANALOG_IN) * BX_ANALOG_IN). Each of the elements has one member for value and has dimension information with 2 levels (= BX_ANALOG_IN * (BX_NUM - BX_ANALOG_IN)). For these elements, userspace applications are expected to handle the dimension information so that all of the elements construct a matrix where the number of rows and columns are represented by the dimension information. The same way is applied to elements named as 'VMixer Volume'. The number of these elements can also be calculated by macros defined by each drivers (= PX_ANALOG_IN * BX_ANALOG_IN). Each of the element has one member for value and has dimension information with 2 levels (= BX_ANALOG_IN * PX_ANALOG_IN). All of the elements construct a matrix with the dimension information. An element named as 'VU-meters' gets a different way in a point of dimension information. The element includes 96 members for value. The element has dimension information with 3 levels (= 3 or 2 * 16 * 2). For this element, userspace applications are expected to handle the dimension information so that all of the members for value construct a matrix where the number of rows and columns are represented by the dimension information. This is different from the way for the former. As a summary, the drivers were not designed to produce a consistent way to parse the dimension information. This makes it hard for general userspace applications such as amixer to parse the information by a consistent way, and actually no userspace applications except for 'echomixer' utilize the dimension information. Additionally, no drivers excluding this group use the information. The reverted commit was written based on the latter way. A commit 860c1994a70a ('ALSA: control: add dimension validator for userspace elements') is written based on the latter way, too. The patch should be reconsider too in the same time to re-define a consistent way to parse the dimension information. Reported-by: Mark Hills Reported-by: S. Christian Collins Fixes: 275353bb684e ('ALSA: echoaudio: purge contradictions between dimension matrix members and total number of members') Cc: # v4.8+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 7326695bca33..d68f99e076a8 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1272,11 +1272,11 @@ static int snd_echo_mixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_busses_in(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1344,11 +1344,11 @@ static int snd_echo_vmixer_info(struct snd_kcontrol *kcontrol, chip = snd_kcontrol_chip(kcontrol); uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = ECHOGAIN_MAXOUT; uinfo->dimen.d[0] = num_busses_out(chip); uinfo->dimen.d[1] = num_pipes_out(chip); - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1]; return 0; } @@ -1728,6 +1728,7 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 96; uinfo->value.integer.min = ECHOGAIN_MINOUT; uinfo->value.integer.max = 0; #ifdef ECHOCARD_HAS_VMIXER @@ -1737,7 +1738,6 @@ static int snd_echo_vumeters_info(struct snd_kcontrol *kcontrol, #endif uinfo->dimen.d[1] = 16; /* 16 channels */ uinfo->dimen.d[2] = 2; /* 0=level, 1=peak */ - uinfo->count = uinfo->dimen.d[0] * uinfo->dimen.d[1] * uinfo->dimen.d[2]; return 0; } -- cgit v1.2.3-70-g09d2 From b8b8b16352cd90c6083033fd4487f04fae935c18 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 20 Sep 2017 16:15:05 -0500 Subject: rtlwifi: rtl8821ae: Fix connection lost problem In commit 40b368af4b75 ("rtlwifi: Fix alignment issues"), the read of REG_DBI_READ was changed from 16 to 8 bits. For unknown reasonsi this change results in reduced stability for the wireless connection. This regression was located using bisection. Fixes: 40b368af4b75 ("rtlwifi: Fix alignment issues") Reported-and-tested-by: James Cameron Signed-off-by: Larry Finger Cc: Stable # 4.11+ Cc: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 4f73012978e9..1d431d4bf6d2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1122,7 +1122,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr) } if (0 == tmp) { read_addr = REG_DBI_RDATA + addr % 4; - ret = rtl_read_byte(rtlpriv, read_addr); + ret = rtl_read_word(rtlpriv, read_addr); } return ret; } -- cgit v1.2.3-70-g09d2 From dd2349121bb1b8ff688c3ca6a2a0bea9d8c142ca Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:24 -0700 Subject: brcmfmac: Add check for short event packets The length of the data in the received skb is currently passed into brcmf_fweh_process_event() as packet_len, but this value is not checked. event_packet should be followed by DATALEN bytes of additional event data. Ensure that the received packet actually contains at least DATALEN bytes of additional data, to avoid copying uninitialized memory into event->data. Cc: # v3.8 Suggested-by: Mattias Nissler Signed-off-by: Kevin Cernekee Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 4eb1e1ce9ace..ef72baf6dd96 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -429,7 +429,8 @@ void brcmf_fweh_process_event(struct brcmf_pub *drvr, if (code != BRCMF_E_IF && !fweh->evt_handler[code]) return; - if (datalen > BRCMF_DCMD_MAXLEN) + if (datalen > BRCMF_DCMD_MAXLEN || + datalen + sizeof(*event_packet) > packet_len) return; if (in_interrupt()) -- cgit v1.2.3-70-g09d2 From c503dd38f850be28867ef7a42d9abe5ade81a9bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Sep 2017 23:29:12 +0200 Subject: brcmsmac: make some local variables 'static const' to reduce stack size With KASAN and a couple of other patches applied, this driver is one of the few remaining ones that actually use more than 2048 bytes of kernel stack: broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy_gainctrl': broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 3264 bytes is larger than 2048 bytes [-Wframe-larger-than=] broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy': broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=] Here, I'm reducing the stack size by marking as many local variables as 'static const' as I can without changing the actual code. This is the first of three patches to improve the stack usage in this driver. It would be good to have this backported to stabl kernels to get all drivers in 'allmodconfig' below the 2048 byte limit so we can turn on the frame warning again globally, but I realize that the patch is larger than the normal limit for stable backports. The other two patches do not need to be backported. Cc: Acked-by: Arend van Spriel Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmsmac/phy/phy_n.c | 197 ++++++++++----------- 1 file changed, 97 insertions(+), 100 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index b3aab2fe96eb..ef685465f80a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -14764,8 +14764,8 @@ static void wlc_phy_ipa_restore_tx_digi_filts_nphy(struct brcms_phy *pi) } static void -wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, u8 *events, u8 *dlys, - u8 len) +wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, const u8 *events, + const u8 *dlys, u8 len) { u32 t1_offset, t2_offset; u8 ctr; @@ -15240,16 +15240,16 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev5(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) { u16 currband; - s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; - s8 *lna1_gain_db = NULL; - s8 *lna1_gain_db_2 = NULL; - s8 *lna2_gain_db = NULL; - s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; - s8 *tia_gain_db; - s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; - s8 *tia_gainbits; - u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; - u16 *rfseq_init_gain; + static const s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 }; + const s8 *lna1_gain_db = NULL; + const s8 *lna1_gain_db_2 = NULL; + const s8 *lna2_gain_db = NULL; + static const s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 }; + const s8 *tia_gain_db; + static const s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 }; + const s8 *tia_gainbits; + static const u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f }; + const u16 *rfseq_init_gain; u16 init_gaincode; u16 clip1hi_gaincode; u16 clip1md_gaincode = 0; @@ -15310,10 +15310,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) if ((freq <= 5080) || (freq == 5825)) { - s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; - s8 lna1A_gain_db_2_rev7[] = { - 11, 17, 22, 25}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 }; + static const s8 lna1A_gain_db_2_rev7[] = { 11, 17, 22, 25}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x3e; lna1_gain_db = lna1A_gain_db_rev7; @@ -15321,10 +15320,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else if ((freq >= 5500) && (freq <= 5700)) { - s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; + static const s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 }; crsminu_th = 0x45; clip1md_gaincode_B = 0x14; @@ -15335,10 +15333,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi) lna2_gain_db = lna2A_gain_db_rev7; } else { - s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; - s8 lna1A_gain_db_2_rev7[] = { - 12, 18, 22, 26}; - s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; + static const s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 }; + static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26}; + static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 }; crsminu_th = 0x41; lna1_gain_db = lna1A_gain_db_rev7; @@ -15450,65 +15447,65 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) NPHY_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_RFSEQ_CMD_SET_HPF_BW }; - u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; - s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; - s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; - s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; - s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; - s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; - s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; - s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; - s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; - s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; - s8 *lna1_gain_db = NULL; - s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; - s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; - s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; - s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; - s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; - s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; - s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; - s8 *lna2_gain_db = NULL; - s8 tiaG_gain_db[] = { + static const u8 rfseq_updategainu_dlys[] = { 10, 30, 1 }; + static const s8 lna1G_gain_db[] = { 7, 11, 16, 23 }; + static const s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 }; + static const s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 }; + static const s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 }; + static const s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 }; + static const s8 lna1A_gain_db[] = { 7, 11, 17, 23 }; + static const s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 }; + static const s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 }; + static const s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 }; + const s8 *lna1_gain_db = NULL; + static const s8 lna2G_gain_db[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 }; + static const s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 }; + static const s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 }; + static const s8 lna2A_gain_db[] = { -6, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 }; + static const s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 }; + static const s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 }; + const s8 *lna2_gain_db = NULL; + static const s8 tiaG_gain_db[] = { 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A }; - s8 tiaA_gain_db[] = { + static const s8 tiaA_gain_db[] = { 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 }; - s8 tiaA_gain_db_rev4[] = { + static const s8 tiaA_gain_db_rev4[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev5[] = { + static const s8 tiaA_gain_db_rev5[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 tiaA_gain_db_rev6[] = { + static const s8 tiaA_gain_db_rev6[] = { 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d }; - s8 *tia_gain_db; - s8 tiaG_gainbits[] = { + const s8 *tia_gain_db; + static const s8 tiaG_gainbits[] = { 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 }; - s8 tiaA_gainbits[] = { + static const s8 tiaA_gainbits[] = { 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 }; - s8 tiaA_gainbits_rev4[] = { + static const s8 tiaA_gainbits_rev4[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev5[] = { + static const s8 tiaA_gainbits_rev5[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 tiaA_gainbits_rev6[] = { + static const s8 tiaA_gainbits_rev6[] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 }; - s8 *tia_gainbits; - s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; - s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; - u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; - u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev5_elna[] = { + const s8 *tia_gainbits; + static const s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 }; + static const s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 }; + static const u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f }; + static const u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev5_elna[] = { 0x013f, 0x013f, 0x013f, 0x013f }; - u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; - u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; - u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; - u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; - u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; - u16 rfseqA_init_gain_rev4_elna[] = { + static const u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f }; + static const u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f }; + static const u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f }; + static const u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f }; + static const u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f }; + static const u16 rfseqA_init_gain_rev4_elna[] = { 0x314f, 0x314f, 0x314f, 0x314f }; - u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; - u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; - u16 *rfseq_init_gain; + static const u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f }; + static const u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f }; + const u16 *rfseq_init_gain; u16 initG_gaincode = 0x627e; u16 initG_gaincode_rev4 = 0x527e; u16 initG_gaincode_rev5 = 0x427e; @@ -15538,10 +15535,10 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) u16 clip1mdA_gaincode_rev6 = 0x2084; u16 clip1md_gaincode = 0; u16 clip1loG_gaincode = 0x0074; - u16 clip1loG_gaincode_rev5[] = { + static const u16 clip1loG_gaincode_rev5[] = { 0x0062, 0x0064, 0x006a, 0x106a, 0x106c, 0x1074, 0x107c, 0x207c }; - u16 clip1loG_gaincode_rev6[] = { + static const u16 clip1loG_gaincode_rev6[] = { 0x106a, 0x106c, 0x1074, 0x107c, 0x007e, 0x107e, 0x207e, 0x307e }; u16 clip1loG_gaincode_rev6_224B0 = 0x1074; @@ -16066,7 +16063,7 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi) static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) { - u8 rfseq_rx2tx_events[] = { + static const u8 rfseq_rx2tx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_TR_SWITCH, @@ -16076,7 +16073,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_EXT_PA }; u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 }; - u8 rfseq_tx2rx_events[] = { + static const u8 rfseq_tx2rx_events[] = { NPHY_RFSEQ_CMD_NOP, NPHY_RFSEQ_CMD_EXT_PA, NPHY_RFSEQ_CMD_TX_GAIN, @@ -16085,8 +16082,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_RFSEQ_CMD_RXG_FBW, NPHY_RFSEQ_CMD_CLR_HIQ_DIS }; - u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; - u8 rfseq_tx2rx_events_rev3[] = { + static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_EXT_PA, NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_TX_GAIN, @@ -16096,7 +16093,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; + static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 }; u8 rfseq_rx2tx_events_rev3[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, @@ -16110,7 +16107,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) }; u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 }; - u8 rfseq_rx2tx_events_rev3_ipa[] = { + static const u8 rfseq_rx2tx_events_rev3_ipa[] = { NPHY_REV3_RFSEQ_CMD_NOP, NPHY_REV3_RFSEQ_CMD_RXG_FBW, NPHY_REV3_RFSEQ_CMD_TR_SWITCH, @@ -16121,15 +16118,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) NPHY_REV3_RFSEQ_CMD_INT_PA_PU, NPHY_REV3_RFSEQ_CMD_END }; - u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; - u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; + static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 }; + static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f }; s16 alpha0, alpha1, alpha2; s16 beta0, beta1, beta2; u32 leg_data_weights, ht_data_weights, nss1_data_weights, stbc_data_weights; u8 chan_freq_range = 0; - u16 dac_control = 0x0002; + static const u16 dac_control = 0x0002; u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 }; u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 }; u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 }; @@ -16139,8 +16136,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 }; u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 }; u16 *aux_adc_gain; - u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; - u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; + static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 }; + static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 }; s32 min_nvar_val = 0x18d; s32 min_nvar_offset_6mbps = 20; u8 pdetrange; @@ -16151,9 +16148,9 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi) u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_tx2rx_lpf_h_hpc_rev7 = 0x77; u16 rfseq_pktgn_lpf_h_hpc_rev7 = 0x77; - u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; - u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; - u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 }; + static const u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; + static const u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 }; u16 ipalvlshift_3p3_war_en = 0; u16 rccal_bcap_val, rccal_scap_val; u16 rccal_tx20_11b_bcap = 0; @@ -24291,13 +24288,13 @@ static void wlc_phy_update_txcal_ladder_nphy(struct brcms_phy *pi, u16 core) u16 bbmult; u16 tblentry; - struct nphy_txiqcal_ladder ladder_lo[] = { + static const struct nphy_txiqcal_ladder ladder_lo[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {25, 1}, {25, 2}, {25, 3}, {25, 4}, {25, 5}, {25, 6}, {25, 7}, {35, 7}, {50, 7}, {71, 7}, {100, 7} }; - struct nphy_txiqcal_ladder ladder_iq[] = { + static const struct nphy_txiqcal_ladder ladder_iq[] = { {3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0}, {25, 0}, {35, 0}, {50, 0}, {71, 0}, {100, 0}, {100, 1}, {100, 2}, {100, 3}, {100, 4}, {100, 5}, {100, 6}, {100, 7} @@ -25773,67 +25770,67 @@ wlc_phy_cal_txiqlo_nphy(struct brcms_phy *pi, struct nphy_txgains target_gain, u16 cal_gain[2]; struct nphy_iqcal_params cal_params[2]; u32 tbl_len; - void *tbl_ptr; + const void *tbl_ptr; bool ladder_updated[2]; u8 mphase_cal_lastphase = 0; int bcmerror = 0; bool phyhang_avoid_state = false; - u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_20[] = { 0x0300, 0x0500, 0x0700, 0x0900, 0x0d00, 0x1100, 0x1900, 0x1901, 0x1902, 0x1903, 0x1904, 0x1905, 0x1906, 0x1907, 0x2407, 0x3207, 0x4607, 0x6407 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = { 0x0200, 0x0300, 0x0600, 0x0900, 0x0d00, 0x1100, 0x1900, 0x2400, 0x3200, 0x4600, 0x6400, 0x6401, 0x6402, 0x6403, 0x6404, 0x6405, 0x6406, 0x6407 }; - u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_loft_ladder_40[] = { 0x0200, 0x0300, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1201, 0x1202, 0x1203, 0x1204, 0x1205, 0x1206, 0x1207, 0x1907, 0x2307, 0x3207, 0x4707 }; - u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { + static const u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = { 0x0100, 0x0200, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1900, 0x2300, 0x3200, 0x4700, 0x4701, 0x4702, 0x4703, 0x4704, 0x4705, 0x4706, 0x4707 }; - u16 tbl_tx_iqlo_cal_startcoefs[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal[] = { 0x8123, 0x8264, 0x8086, 0x8245, 0x8056, 0x9123, 0x9264, 0x9086, 0x9245, 0x9056 }; - u16 tbl_tx_iqlo_cal_cmds_recal[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal[] = { 0x8101, 0x8253, 0x8053, 0x8234, 0x8034, 0x9101, 0x9253, 0x9053, 0x9234, 0x9034 }; - u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; - u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = { 0x8434, 0x8334, 0x8084, 0x8267, 0x8056, 0x8234, 0x9434, 0x9334, 0x9084, 0x9267, 0x9056, 0x9234 }; - u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { + static const u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = { 0x8423, 0x8323, 0x8073, 0x8256, 0x8045, 0x8223, 0x9423, 0x9323, 0x9073, 0x9256, 0x9045, 0x9223 }; -- cgit v1.2.3-70-g09d2 From 98f7d577c882be5a4e7403b3fdd1741d1baab6b5 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 28 Sep 2017 16:06:33 +0200 Subject: arm64: dts: marvell: fix interrupt-map property for Armada CP110 PCIe controller The interrupt-map property used in the description of the Marvell Armada 7K/8K PCIe controllers has a bogus extraneous 0 that causes the interrupt conversion to not be done properly. This causes the PCIe PME and AER root port service drivers to fail their initialization: [ 5.019900] genirq: Setting trigger mode 7 for irq 114 failed (irq_chip_set_type_parent+0x0/0x30) [ 5.028821] pcie_pme: probe of 0001:00:00.0:pcie001 failed with error -22 [ 5.035687] genirq: Setting trigger mode 7 for irq 114 failed (irq_chip_set_type_parent+0x0/0x30) [ 5.044614] aer: probe of 0001:00:00.0:pcie002 failed with error -22 This problem was introduced when the interrupt description was switched from using the GIC directly to using the ICU interrupt controller. Indeed, the GIC has address-cells = <1>, which requires a parent unit address, while the ICU has address-cells = <0>. Fixes: 6ef84a827c37 ("arm64: dts: marvell: enable GICP and ICU on Armada 7K/8K") Signed-off-by: Thomas Petazzoni Reviewed-by: Yehuda Yitschak Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 6 +++--- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index 8263a8a504a8..f2aa2a81de4d 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -336,7 +336,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xf6000000 0 0xf6000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cpm_icu 0 ICU_GRP_NSR 22 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cpm_icu ICU_GRP_NSR 22 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; clocks = <&cpm_clk 1 13>; @@ -362,7 +362,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xf7000000 0 0xf7000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cpm_icu 0 ICU_GRP_NSR 24 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cpm_icu ICU_GRP_NSR 24 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; @@ -389,7 +389,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xf8000000 0 0xf8000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cpm_icu 0 ICU_GRP_NSR 23 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cpm_icu ICU_GRP_NSR 23 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index b71ee6c83668..4fe70323abb3 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -335,7 +335,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xfa000000 0 0xfa000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cps_icu 0 ICU_GRP_NSR 22 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cps_icu ICU_GRP_NSR 22 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; clocks = <&cps_clk 1 13>; @@ -361,7 +361,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xfb000000 0 0xfb000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cps_icu 0 ICU_GRP_NSR 24 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cps_icu ICU_GRP_NSR 24 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; @@ -388,7 +388,7 @@ /* non-prefetchable memory */ 0x82000000 0 0xfc000000 0 0xfc000000 0 0xf00000>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &cps_icu 0 ICU_GRP_NSR 23 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 0 &cps_icu ICU_GRP_NSR 23 IRQ_TYPE_LEVEL_HIGH>; interrupts = ; num-lanes = <1>; -- cgit v1.2.3-70-g09d2 From 0a6de8b8668a2cfc0912a1d7df21107e1a075a3a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Oct 2017 12:42:00 +0100 Subject: arm64: fix misleading data abort decoding Currently data_abort_decode() dumps the ISS field as a decimal value with a '0x' prefix, which is somewhat misleading. Fix it to print as hexadecimal, as was intended. Fixes: 1f9b8936f36f4a8e ("arm64: Decode information from ESR upon mem faults") Reviewed-by: Dave Martin Reviewed-by: Julien Thierry Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2069e9bc0fca..b64958b23a7f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -97,7 +97,7 @@ static void data_abort_decode(unsigned int esr) (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); } else { - pr_alert(" ISV = 0, ISS = 0x%08lu\n", esr & ESR_ELx_ISS_MASK); + pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); } pr_alert(" CM = %lu, WnR = %lu\n", -- cgit v1.2.3-70-g09d2 From 9f4057fc937f200f000dbc378c5c3e37d45e31dc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 22 Sep 2017 09:26:57 +0800 Subject: ceph: properly queue cap snap for newly created snap realm commit 3ae0bebc "ceph: queue cap snap only when snap realm's context changes" introduced a regression: we may not call queue_realm_cap_snaps() for newly created snap realm. This regression allows unflushed snapshot data to be overwritten. Link: http://tracker.ceph.com/issues/21483 Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 1ffc8b426c1c..7fc0b850c352 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm, realm->ino, realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); - if (realm->cached_context) { - ceph_put_snap_context(realm->cached_context); - /* queue realm for cap_snap creation */ - list_add_tail(&realm->dirty_item, dirty_realms); - } + ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; + /* queue realm for cap_snap creation */ + list_add_tail(&realm->dirty_item, dirty_realms); return 0; fail: -- cgit v1.2.3-70-g09d2 From 38f340ccdf9ed5f1350505b46c5689d015967057 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 22 Sep 2017 11:41:06 +0800 Subject: ceph: fix __choose_mds() for LSSNAP request previous commit 5d37ca14 "ceph: send LSSNAP request to auth mds of directory inode" is buggy. It makes __choose_mds() choose mds base on hash of '.snap' dentry. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 84edfc60d87a..f23c820daaed 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -734,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc, inode = req->r_inode; ihold(inode); } else { - /* req->r_dentry is non-null for LSSNAP request. - * fall-thru */ - WARN_ON_ONCE(!req->r_dentry); + /* req->r_dentry is non-null for LSSNAP request */ + rcu_read_lock(); + inode = get_nonsnap_parent(req->r_dentry); + rcu_read_unlock(); + dout("__choose_mds using snapdir's parent %p\n", inode); } - } - if (!inode && req->r_dentry) { + } else if (req->r_dentry) { /* ignore race with rename; old or new d_parent is okay */ struct dentry *parent; struct inode *dir; -- cgit v1.2.3-70-g09d2 From fbffee74986c899524de5707dab9cf321d02dab8 Mon Sep 17 00:00:00 2001 From: Kalyan Kinthada Date: Fri, 29 Sep 2017 10:36:04 +1300 Subject: ARM: dts: Fix I2C repeated start issue on Armada-38x All the Armada 38x(380, 385, 388) have a silicon issue in the I2C controller which violates the I2C repeated start timing (errata FE-8471889). i2c-mv64xxx driver handles this errata based on the compatible string "marvell,mv78230-a0-i2c". This patch activates the "marvell,mv78230-a0-i2c" compatible string for the I2C controller on armada-38x SoC based devices. Signed-off-by: Kalyan Kinthada Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-38x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 7ff0811e61db..4960722aab32 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -178,7 +178,7 @@ }; i2c0: i2c@11000 { - compatible = "marvell,mv64xxx-i2c"; + compatible = "marvell,mv78230-a0-i2c", "marvell,mv64xxx-i2c"; reg = <0x11000 0x20>; #address-cells = <1>; #size-cells = <0>; @@ -189,7 +189,7 @@ }; i2c1: i2c@11100 { - compatible = "marvell,mv64xxx-i2c"; + compatible = "marvell,mv78230-a0-i2c", "marvell,mv64xxx-i2c"; reg = <0x11100 0x20>; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3-70-g09d2 From 7682e399485fe19622b6fd82510b1f4551e48a25 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Oct 2017 14:06:43 +0200 Subject: ALSA: usx2y: Suppress kernel warning at page allocation failures The usx2y driver allocates the stream read/write buffers in continuous pages depending on the stream setup, and this may spew the kernel warning messages with a stack trace like: WARNING: CPU: 1 PID: 1846 at mm/page_alloc.c:3883 __alloc_pages_slowpath+0x1ef2/0x2d70 Modules linked in: CPU: 1 PID: 1846 Comm: kworker/1:2 Not tainted .... It may confuse user as if it were any serious error, although this is no fatal error and the driver handles the error case gracefully. Since the driver has already some sanity check of the given size (128 and 256 pages), it can't pass any crazy value. So it's merely page fragmentation. This patch adds __GFP_NOWARN to each caller for suppressing such kernel warnings. The original issue was spotted by syzkaller. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/usx2y/usb_stream.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c index 4dab49080700..e229abd21652 100644 --- a/sound/usb/usx2y/usb_stream.c +++ b/sound/usb/usx2y/usb_stream.c @@ -191,7 +191,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, } pg = get_order(read_size); - sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + sk->s = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->s) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); goto out; @@ -211,7 +212,8 @@ struct usb_stream *usb_stream_new(struct usb_stream_kernel *sk, pg = get_order(write_size); sk->write_page = - (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO, pg); + (void *)__get_free_pages(GFP_KERNEL|__GFP_COMP|__GFP_ZERO| + __GFP_NOWARN, pg); if (!sk->write_page) { snd_printk(KERN_WARNING "couldn't __get_free_pages()\n"); usb_stream_free(sk); -- cgit v1.2.3-70-g09d2 From 28a04c7b7bbecaab642fcb6a2d7354eb70ea7fbe Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:14:56 +0200 Subject: mlxsw: spectrum_router: Move VRF refcounting When creating a new RIF, bumping RIF count of the containing VR is the last thing to be done. Symmetrically, when destroying a RIF, RIF count is first dropped and only then the rest of the cleanup proceeds. That's a problem for loopback RIFs. Those hold two VR references: one for overlay and one for underlay. mlxsw_sp_rif_destroy() releases the overlay one, and the deconfigure() callback the underlay one. But if both overlay and underlay are the same, and if there are no other artifacts holding the VR alive, this put actually destroys the VR. Later on, when mlxsw_sp_rif_destroy() calls mlxsw_sp_vr_put() for the same VR, the VR will already have been released and the kernel crashes with NULL pointer dereference. The underlying problem is that the RIF under destruction ends up referencing the overlay VR much longer than it claims: all the way until the call to mlxsw_sp_vr_put(). So line up the reference counting properly to reflect this. Make corresponding changes in mlxsw_sp_rif_create() as well for symmetry. Fixes: 6ddb7426a7d4 ("mlxsw: spectrum_router: Introduce loopback RIFs") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2cfb3f5d092d..3917b4dd4202 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5068,6 +5068,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); if (IS_ERR(vr)) return ERR_CAST(vr); + vr->rif_count++; err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); if (err) @@ -5099,7 +5100,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_counters_alloc(rif); mlxsw_sp->router->rifs[rif_index] = rif; - vr->rif_count++; return rif; @@ -5110,6 +5110,7 @@ err_fid_get: kfree(rif); err_rif_alloc: err_rif_index_alloc: + vr->rif_count--; mlxsw_sp_vr_put(vr); return ERR_PTR(err); } @@ -5124,7 +5125,6 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); ops->deconfigure(rif); @@ -5132,6 +5132,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); kfree(rif); + vr->rif_count--; mlxsw_sp_vr_put(vr); } -- cgit v1.2.3-70-g09d2 From de0f43c01a4b5d408a5c087c8a92ac1739938f8b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:14:57 +0200 Subject: mlxsw: spectrum_router: Track RIF of IPIP next hops When considering whether to set RTNH_F_OFFLOAD flag on an IPv6 route, mlxsw_sp_fib6_entry_offload_set() looks up the mlxsw_sp_nexthop corresponding to a given route, and decides based on whether the next hop's offloaded flag was set. When looking for the matching next hop, it also takes into account the device of the route, which must match next hop's RIF. IPIP next hops however hitherto didn't set the RIF. As a result, IPv6 routes forwarding traffic to IP-in-IP netdevices are never marked as offloaded, even when they actually are. Thus track RIF of IPIP next hops the same way as that of ETHERNET next hops. Fixes: 8f28a3097645 ("mlxsw: spectrum_router: Support IPv6 overlay encap") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3917b4dd4202..032089efc1a0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2723,6 +2723,7 @@ static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_rif_fini(nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); break; } @@ -2742,7 +2743,11 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; @@ -4009,7 +4014,11 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV6)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + if (err) + return err; + mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); + return 0; } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; -- cgit v1.2.3-70-g09d2 From d312fefea8387503375f728855c9a62de20c9665 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 2 Oct 2017 19:31:24 +0100 Subject: ahci: don't ignore result code of ahci_reset_controller() ahci_pci_reset_controller() calls ahci_reset_controller(), which may fail, but ignores the result code and always returns success. This may result in failures like below ahci 0000:02:00.0: version 3.0 ahci 0000:02:00.0: enabling device (0000 -> 0003) ahci 0000:02:00.0: SSS flag set, parallel bus scan disabled ahci 0000:02:00.0: controller reset failed (0xffffffff) ahci 0000:02:00.0: failed to stop engine (-5) ... repeated many times ... ahci 0000:02:00.0: failed to stop engine (-5) Unable to handle kernel paging request at virtual address ffff0000093f9018 ... PC is at ahci_stop_engine+0x5c/0xd8 [libahci] LR is at ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] ... [] ahci_stop_engine+0x5c/0xd8 [libahci] [] ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] [] ahci_init_controller+0x80/0x168 [libahci] [] ahci_pci_init_controller+0x60/0x68 [ahci] [] ahci_init_one+0x75c/0xd88 [ahci] [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x138/0x170 [] driver_probe_device+0x2dc/0x458 [] __driver_attach+0x114/0x118 [] bus_for_each_dev+0x60/0xa0 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1f0/0x2a8 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x3c/0x48 [] ahci_pci_driver_init+0x1c/0x1000 [ahci] [] do_one_initcall+0x38/0x120 where an obvious hardware level failure results in an unnecessary 15 second delay and a subsequent crash. So record the result code of ahci_reset_controller() and relay it, rather than ignoring it. Signed-off-by: Ard Biesheuvel Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index cb9b0e9090e3..9f78bb03bb76 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -621,8 +621,11 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, static int ahci_pci_reset_controller(struct ata_host *host) { struct pci_dev *pdev = to_pci_dev(host->dev); + int rc; - ahci_reset_controller(host); + rc = ahci_reset_controller(host); + if (rc) + return rc; if (pdev->vendor == PCI_VENDOR_ID_INTEL) { struct ahci_host_priv *hpriv = host->private_data; -- cgit v1.2.3-70-g09d2 From cb1dab0e01969d63717c7464cb5d75c77a39bf02 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 16:22:57 +0800 Subject: drm/sun4i: hdmi: Disable clks in bind function error path and unbind function The HDMI driver enables the bus and mod clocks in the bind function, but does not disable them if it then bails our due to any errors. Neither does it disable the clocks in the unbind function. Fix this by adding a proper error path to the bind function, and clk_disable_unprepare calls to the unbind function. Also rename the err_cleanup_connector label to err_cleanup_encoder, since it is the encoder that gets cleaned up. Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20170929082306.16193-6-wens@csie.org --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 9ea6cd5a1370..3cf1a6932fac 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -302,26 +302,29 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, hdmi->mod_clk = devm_clk_get(dev, "mod"); if (IS_ERR(hdmi->mod_clk)) { dev_err(dev, "Couldn't get the HDMI mod clock\n"); - return PTR_ERR(hdmi->mod_clk); + ret = PTR_ERR(hdmi->mod_clk); + goto err_disable_bus_clk; } clk_prepare_enable(hdmi->mod_clk); hdmi->pll0_clk = devm_clk_get(dev, "pll-0"); if (IS_ERR(hdmi->pll0_clk)) { dev_err(dev, "Couldn't get the HDMI PLL 0 clock\n"); - return PTR_ERR(hdmi->pll0_clk); + ret = PTR_ERR(hdmi->pll0_clk); + goto err_disable_mod_clk; } hdmi->pll1_clk = devm_clk_get(dev, "pll-1"); if (IS_ERR(hdmi->pll1_clk)) { dev_err(dev, "Couldn't get the HDMI PLL 1 clock\n"); - return PTR_ERR(hdmi->pll1_clk); + ret = PTR_ERR(hdmi->pll1_clk); + goto err_disable_mod_clk; } ret = sun4i_tmds_create(hdmi); if (ret) { dev_err(dev, "Couldn't create the TMDS clock\n"); - return ret; + goto err_disable_mod_clk; } writel(SUN4I_HDMI_CTRL_ENABLE, hdmi->base + SUN4I_HDMI_CTRL_REG); @@ -362,7 +365,7 @@ static int sun4i_hdmi_bind(struct device *dev, struct device *master, ret = sun4i_hdmi_i2c_create(dev, hdmi); if (ret) { dev_err(dev, "Couldn't create the HDMI I2C adapter\n"); - return ret; + goto err_disable_mod_clk; } drm_encoder_helper_add(&hdmi->encoder, @@ -422,6 +425,10 @@ err_cleanup_connector: drm_encoder_cleanup(&hdmi->encoder); err_del_i2c_adapter: i2c_del_adapter(hdmi->i2c); +err_disable_mod_clk: + clk_disable_unprepare(hdmi->mod_clk); +err_disable_bus_clk: + clk_disable_unprepare(hdmi->bus_clk); return ret; } @@ -434,6 +441,8 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, drm_connector_cleanup(&hdmi->connector); drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); + clk_disable_unprepare(hdmi->mod_clk); + clk_disable_unprepare(hdmi->bus_clk); } static const struct component_ops sun4i_hdmi_ops = { -- cgit v1.2.3-70-g09d2 From 6e60a3bbb45bd8b307269d6a821ee2c72d815846 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 2 Oct 2017 16:22:08 -0400 Subject: nbd: fix -ERESTARTSYS handling Christoph made it so that if we return'ed BLK_STS_RESOURCE whenever we got ERESTARTSYS from sending our packets we'd return BLK_STS_OK, which means we'd never requeue and just hang. We really need to return the right value from the upper layer. Fixes: fc17b6534eb8 ("blk-mq: switch ->queue_rq return value to blk_status_t") Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3684e21d543f..883dfebd3014 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -820,9 +820,13 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, * appropriate. */ ret = nbd_handle_cmd(cmd, hctx->queue_num); + if (ret < 0) + ret = BLK_STS_IOERR; + else if (!ret) + ret = BLK_STS_OK; complete(&cmd->send_complete); - return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK; + return ret; } static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, -- cgit v1.2.3-70-g09d2 From eefca20eb20c66b06cf5ed09b49b1a7caaa27b7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Oct 2017 12:20:51 -0700 Subject: socket, bpf: fix possible use after free Starting from linux-4.4, 3WHS no longer takes the listener lock. Since this time, we might hit a use-after-free in sk_filter_charge(), if the filter we got in the memcpy() of the listener content just happened to be replaced by a thread changing listener BPF filter. To fix this, we need to make sure the filter refcount is not already zero before incrementing it again. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 12 ++++++++---- net/core/sock.c | 5 ++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 82edad58d066..74b8c91fb5f4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -989,10 +989,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) { - bool ret = __sk_filter_charge(sk, fp); - if (ret) - refcount_inc(&fp->refcnt); - return ret; + if (!refcount_inc_not_zero(&fp->refcnt)) + return false; + + if (!__sk_filter_charge(sk, fp)) { + sk_filter_release(fp); + return false; + } + return true; } static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) diff --git a/net/core/sock.c b/net/core/sock.c index 7d55c05f449d..23953b741a41 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1684,13 +1684,16 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_reset_flag(newsk, SOCK_DONE); - filter = rcu_dereference_protected(newsk->sk_filter, 1); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter != NULL) /* though it's an empty new sock, the charging may fail * if sysctl_optmem_max was changed between creation of * original socket and cloning */ is_charged = sk_filter_charge(newsk, filter); + RCU_INIT_POINTER(newsk->sk_filter, filter); + rcu_read_unlock(); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* We need to make sure that we don't uncharge the new -- cgit v1.2.3-70-g09d2 From 28a0bc4120d38a394499382ba21d6965a67a3703 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Sep 2017 21:35:12 -0400 Subject: scsi: sd: Implement blacklist option for WRITE SAME w/ UNMAP SBC-4 states: "A MAXIMUM UNMAP LBA COUNT field set to a non-zero value indicates the maximum number of LBAs that may be unmapped by an UNMAP command" "A MAXIMUM WRITE SAME LENGTH field set to a non-zero value indicates the maximum number of contiguous logical blocks that the device server allows to be unmapped or written in a single WRITE SAME command." Despite the spec being clear on the topic, some devices incorrectly expect WRITE SAME commands with the UNMAP bit set to be limited to the value reported in MAXIMUM UNMAP LBA COUNT in the Block Limits VPD. Implement a blacklist option that can be used to accommodate devices with this behavior. Cc: Reported-by: Bill Kuzeja Reported-by: Ewan D. Milne Reviewed-by: Ewan D. Milne Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 3 +++ drivers/scsi/sd.c | 16 ++++++++++++---- include/scsi/scsi_device.h | 1 + include/scsi/scsi_devinfo.h | 1 + 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index e7818afeda2b..15590a063ad9 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -956,6 +956,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags & BLIST_NO_DIF) sdev->no_dif = 1; + if (*bflags & BLIST_UNMAP_LIMIT_WS) + sdev->unmap_limit_for_ws = 1; + sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; if (*bflags & BLIST_TRY_VPD_PAGES) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fb9f8b5f4673..3d26a729825c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -715,13 +715,21 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) break; case SD_LBP_WS16: - max_blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS16_BLOCKS); + if (sdkp->device->unmap_limit_for_ws) + max_blocks = sdkp->max_unmap_blocks; + else + max_blocks = sdkp->max_ws_blocks; + + max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS10: - max_blocks = min_not_zero(sdkp->max_ws_blocks, - (u32)SD_MAX_WS10_BLOCKS); + if (sdkp->device->unmap_limit_for_ws) + max_blocks = sdkp->max_unmap_blocks; + else + max_blocks = sdkp->max_ws_blocks; + + max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS10_BLOCKS); break; case SD_LBP_ZERO: diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 82e93ee94708..67c5a9f223f7 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -192,6 +192,7 @@ struct scsi_device { unsigned no_dif:1; /* T10 PI (DIF) should be disabled */ unsigned broken_fua:1; /* Don't set FUA bit */ unsigned lun_in_cdb:1; /* Store LUN bits in CDB[1] */ + unsigned unmap_limit_for_ws:1; /* Use the UNMAP limit for WRITE SAME */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 9592570e092a..36b03013d629 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -29,5 +29,6 @@ #define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */ #define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */ #define BLIST_MAX_1024 0x40000000 /* maximum 1024 sector cdb length */ +#define BLIST_UNMAP_LIMIT_WS 0x80000000 /* Use UNMAP limit for WRITE SAME */ #endif -- cgit v1.2.3-70-g09d2 From 77082ca503bed061f7fbda7cfd7c93beda967a41 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Sep 2017 21:38:59 -0400 Subject: scsi: sd: Do not override max_sectors_kb sysfs setting A user may lower the max_sectors_kb setting in sysfs to accommodate certain workloads. Previously we would always set the max I/O size to either the block layer default or the optional preferred I/O size reported by the device. Keep the current heuristics for the initial setting of max_sectors_kb. For subsequent invocations, only update the current queue limit if it exceeds the capabilities of the hardware. Cc: Reported-by: Don Brace Reviewed-by: Martin Wilck Tested-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3d26a729825c..d175c5c5ccf8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3107,8 +3107,6 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_security(sdkp, buffer); } - sdkp->first_scan = 0; - /* * We now have all cache related info, determine how we deal * with flush requests. @@ -3123,7 +3121,7 @@ static int sd_revalidate_disk(struct gendisk *disk) q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max); /* - * Use the device's preferred I/O size for reads and writes + * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, or * garbage. */ @@ -3137,8 +3135,19 @@ static int sd_revalidate_disk(struct gendisk *disk) rw_max = min_not_zero(logical_to_sectors(sdp, dev_max), (sector_t)BLK_DEF_MAX_SECTORS); - /* Combine with controller limits */ - q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q)); + /* Do not exceed controller limit */ + rw_max = min(rw_max, queue_max_hw_sectors(q)); + + /* + * Only update max_sectors if previously unset or if the current value + * exceeds the capabilities of the hardware. + */ + if (sdkp->first_scan || + q->limits.max_sectors > q->limits.max_dev_sectors || + q->limits.max_sectors > q->limits.max_hw_sectors) + q->limits.max_sectors = rw_max; + + sdkp->first_scan = 0; set_capacity(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); -- cgit v1.2.3-70-g09d2 From 9e10b5121ad991ea6e84ca40b15a04cdc551bfe9 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Thu, 13 Jul 2017 09:11:21 -0700 Subject: scsi: libiscsi: Fix use-after-free race during iscsi_session_teardown Session attributes exposed through sysfs were freed before the device was destroyed, resulting in a potential use-after-free. Free these attributes after removing the device. Signed-off-by: Khazhismel Kumykov Acked-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index bd4605a34f54..c62e8d111fd9 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2851,9 +2851,6 @@ EXPORT_SYMBOL_GPL(iscsi_session_setup); /** * iscsi_session_teardown - destroy session, host, and cls_session * @cls_session: iscsi session - * - * The driver must have called iscsi_remove_session before - * calling this. */ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) { @@ -2863,6 +2860,8 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) iscsi_pool_free(&session->cmdpool); + iscsi_remove_session(cls_session); + kfree(session->password); kfree(session->password_in); kfree(session->username); @@ -2877,7 +2876,8 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session) kfree(session->portal_type); kfree(session->discovery_parent_type); - iscsi_destroy_session(cls_session); + iscsi_free_session(cls_session); + iscsi_host_dec_session_cnt(shost); module_put(owner); } -- cgit v1.2.3-70-g09d2 From 1c048a250aae1aaab0ba9dbec908f0c6cdb8614f Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Thu, 13 Jul 2017 09:11:22 -0700 Subject: scsi: libiscsi: Remove iscsi_destroy_session iscsi_session_teardown was the only user of this function. Function currently is just short for iscsi_remove_session + iscsi_free_session. Signed-off-by: Khazhismel Kumykov Acked-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 16 ---------------- include/scsi/scsi_transport_iscsi.h | 1 - 2 files changed, 17 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0190aeff5f7f..7404d26895f5 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2210,22 +2210,6 @@ void iscsi_free_session(struct iscsi_cls_session *session) } EXPORT_SYMBOL_GPL(iscsi_free_session); -/** - * iscsi_destroy_session - destroy iscsi session - * @session: iscsi_session - * - * Can be called by a LLD or iscsi_transport. There must not be - * any running connections. - */ -int iscsi_destroy_session(struct iscsi_cls_session *session) -{ - iscsi_remove_session(session); - ISCSI_DBG_TRANS_SESSION(session, "Completing session destruction\n"); - iscsi_free_session(session); - return 0; -} -EXPORT_SYMBOL_GPL(iscsi_destroy_session); - /** * iscsi_create_conn - create iscsi class connection * @session: iscsi cls session diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 6183d20a01fb..b266d2a3bcb1 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -434,7 +434,6 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, unsigned int target_id); extern void iscsi_remove_session(struct iscsi_cls_session *session); extern void iscsi_free_session(struct iscsi_cls_session *session); -extern int iscsi_destroy_session(struct iscsi_cls_session *session); extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, int dd_size, uint32_t cid); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); -- cgit v1.2.3-70-g09d2 From 88e65389fce1f68ba6d13ae2fc0f8d7e5c338c52 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Mon, 2 Oct 2017 12:59:38 -0500 Subject: scsi: ibmvscsis: Fix write_pending failure path For write_pending if the queue is down or client failed then return -EIO so that LIO can properly process the completed command. Prior we returned 0 since LIO could not handle it properly. Now with commit fa7e25cf13a6 ("target: Fix unknown fabric callback queue-full errors") that patch addresses LIO's ability to handle things right. Signed-off-by: Bryant G. Ly Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index 785fb42f6650..2799a6b08f73 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -3767,7 +3767,7 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd) */ if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) { pr_err("write_pending failed since: %d\n", vscsi->flags); - return 0; + return -EIO; } rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, -- cgit v1.2.3-70-g09d2 From 3b7af5c0fd9631762d1c4d7b4cee76f571dd3c2c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 27 Sep 2017 12:55:51 +0800 Subject: powerpc: Fix action argument for cpufeatures-based TLB flush Commit 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9") introduced calls to __flush_tlb_power[89] from the cpufeatures code, specifying the number of sets to flush. However, these functions take an action argument, not a number of sets. This means we hit the BUG() in __flush_tlb_{206,300} when using cpufeatures-style configuration. This change passes TLB_INVAL_SCOPE_GLOBAL instead. Fixes: 41d0c2ecde19 ("powerpc/powernv: Fix local TLB flush for boot and MCE on POWER9") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Jeremy Kerr Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1df770e8cbe0..7275fed271af 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,10 +102,10 @@ static void cpufeatures_flush_tlb(void) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: - __flush_tlb_power8(POWER8_TLB_SETS); + __flush_tlb_power8(TLB_INVAL_SCOPE_GLOBAL); break; case PVR_POWER9: - __flush_tlb_power9(POWER9_TLB_SETS_HASH); + __flush_tlb_power9(TLB_INVAL_SCOPE_GLOBAL); break; default: pr_err("unknown CPU version for boot TLB flush\n"); -- cgit v1.2.3-70-g09d2 From 77913bbcb43ac9a07a6fe849c2fd3bf85fc8bdd8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 25 Sep 2017 15:05:38 +1000 Subject: drm/nouveau/mmu: flush tlbs before deleting page tables Even though we've zeroed the PDE, the GPU may have cached the PD, so we need to flush when deleting them. Noticed while working on replacement MMU code, but a backport might be a good idea, so let's fix it in the current code too. Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index d06ad2c372bf..455da298227f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -241,6 +241,8 @@ nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde) mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem); } + mmu->func->flush(vm); + nvkm_memory_del(&pgt); } } -- cgit v1.2.3-70-g09d2 From 194d68dd051c2dd5ac2b522ae16100e774e8d869 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 1 Oct 2017 13:52:43 -0400 Subject: drm/nouveau/bsp/g92: disable by default G92's seem to require some additional bit of initialization before the BSP engine can work. It feels like clocks are not set up for the underlying VLD engine, which means that all commands submitted to the xtensa chip end up hanging. VP seems to work fine though. This still allows people to force-enable the bsp engine if they want to play around with it, but makes it harder for the card to hang by default. Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c index 8e2e24a74774..44e116f7880d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c @@ -39,5 +39,5 @@ int g84_bsp_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine) { return nvkm_xtensa_new_(&g84_bsp, device, index, - true, 0x103000, pengine); + device->chipset != 0x92, 0x103000, pengine); } -- cgit v1.2.3-70-g09d2 From 227f66d2f9954f68375736af62ebcd73c6754d69 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 3 Oct 2017 16:24:28 +1000 Subject: drm/nouveau/kms/nv50: fix oops during DP IRQ handling on non-MST boards Reported-by: Woody Suwalski Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org --- drivers/gpu/drm/nouveau/nv50_display.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 2dbf62a2ac41..e4751f92b342 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -3265,11 +3265,14 @@ nv50_mstm = { void nv50_mstm_service(struct nv50_mstm *mstm) { - struct drm_dp_aux *aux = mstm->mgr.aux; + struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL; bool handled = true; int ret; u8 esi[8] = {}; + if (!aux) + return; + while (handled) { ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8); if (ret != 8) { -- cgit v1.2.3-70-g09d2 From 2fb850092fd95198a0a4746f07b80077d5a3aa37 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 29 Sep 2017 16:58:46 -0700 Subject: phy: rockchip-typec: Check for errors from tcphy_phy_init() The function tcphy_phy_init() could return an error but the callers weren't checking the return value. They should. In at least one case while testing I saw the message "wait pma ready timeout" which indicates that tcphy_phy_init() really could return an error and we should account for it. Signed-off-by: Douglas Anderson Reviewed-by: Guenter Roeck Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index b25c00432f9b..a958c9bced01 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -708,8 +708,11 @@ static int rockchip_usb3_phy_power_on(struct phy *phy) if (tcphy->mode == new_mode) goto unlock_ret; - if (tcphy->mode == MODE_DISCONNECT) - tcphy_phy_init(tcphy, new_mode); + if (tcphy->mode == MODE_DISCONNECT) { + ret = tcphy_phy_init(tcphy, new_mode); + if (ret) + goto unlock_ret; + } /* wait TCPHY for pipe ready */ for (timeout = 0; timeout < 100; timeout++) { @@ -783,10 +786,12 @@ static int rockchip_dp_phy_power_on(struct phy *phy) */ if (new_mode == MODE_DFP_DP && tcphy->mode != MODE_DISCONNECT) { tcphy_phy_deinit(tcphy); - tcphy_phy_init(tcphy, new_mode); + ret = tcphy_phy_init(tcphy, new_mode); } else if (tcphy->mode == MODE_DISCONNECT) { - tcphy_phy_init(tcphy, new_mode); + ret = tcphy_phy_init(tcphy, new_mode); } + if (ret) + goto unlock_ret; ret = readx_poll_timeout(readl, tcphy->base + DP_MODE_CTL, val, val & DP_MODE_A2, 1000, -- cgit v1.2.3-70-g09d2 From 070e004912fed099263408bf2ff1bbc6926abe2e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 1 Oct 2017 16:33:03 +0200 Subject: powerpc/4xx: Fix compile error with 64K pages on 40x, 44x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mmu context on the 40x, 44x does not define pte_frag entry. This causes gcc abort the compilation due to: setup-common.c: In function ‘setup_arch’: setup-common.c:908: error: ‘mm_context_t’ has no ‘pte_frag’ This patch fixes the issue by removing the pte_frag initialization in setup-common.c. This is possible, because the compiler will do the initialization, since the mm_context is a sub struct of init_mm. init_mm is declared in mm_types.h as external linkage. According to C99 6.2.4.3: An object whose identifier is declared with external linkage [...] has static storage duration. C99 defines in 6.7.8.10 that: If an object that has static storage duration is not initialized explicitly, then: - if it has pointer type, it is initialized to a null pointer Fixes: b1923caa6e64 ("powerpc: Merge 32-bit and 64-bit setup_arch()") Signed-off-by: Christian Lamparter Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0ac741fae90e..2e3bc16d02b2 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -904,9 +904,6 @@ void __init setup_arch(char **cmdline_p) #endif #endif -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); #endif -- cgit v1.2.3-70-g09d2 From e63aaaa6be54c956b9603590ea436b003407bb3e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 20 Sep 2017 12:31:28 +0530 Subject: netfilter: nf_tables: Release memory obtained by kasprintf Free memory region, if nf_tables_set_alloc_name is not successful. Fixes: 387454901bd6 ("netfilter: nf_tables: Allow set names of up to 255 chars") Signed-off-by: Arvind Yadav Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f98ca8c6aa59..34adedcb239e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2741,8 +2741,10 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; - if (!strcmp(set->name, i->name)) + if (!strcmp(set->name, i->name)) { + kfree(set->name); return -ENFILE; + } } return 0; } -- cgit v1.2.3-70-g09d2 From f5d9644c5fca7d8e8972268598bb516a7eae17f9 Mon Sep 17 00:00:00 2001 From: Shrirang Bagul Date: Fri, 29 Sep 2017 12:39:51 +0800 Subject: USB: serial: qcserial: add Dell DW5818, DW5819 Dell Wireless 5819/5818 devices are re-branded Sierra Wireless MC74 series which will by default boot with vid 0x413c and pid's 0x81cf, 0x81d0, 0x81d1, 0x81d2. Signed-off-by: Shrirang Bagul Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index ebc0beea69d6..eb9928963a53 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -174,6 +174,10 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81cf)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ + {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ -- cgit v1.2.3-70-g09d2 From 2b0b8499ae75df91455bbeb7491d45affc384fb0 Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Tue, 12 Sep 2017 10:14:54 +0800 Subject: ftrace: Fix kmemleak in unregister_ftrace_graph The trampoline allocated by function tracer was overwriten by function_graph tracer, and caused a memory leak. The save_global_trampoline should have saved the previous trampoline in register_ftrace_graph() and restored it in unregister_ftrace_graph(). But as it is implemented, save_global_trampoline was only used in unregister_ftrace_graph as default value 0, and it overwrote the previous trampoline's value. Causing the previous allocated trampoline to be lost. kmmeleak backtrace: kmemleak_vmalloc+0x77/0xc0 __vmalloc_node_range+0x1b5/0x2c0 module_alloc+0x7c/0xd0 arch_ftrace_update_trampoline+0xb5/0x290 ftrace_startup+0x78/0x210 register_ftrace_function+0x8b/0xd0 function_trace_init+0x4f/0x80 tracing_set_tracer+0xe6/0x170 tracing_set_trace_write+0x90/0xd0 __vfs_write+0x37/0x170 vfs_write+0xb2/0x1b0 SyS_write+0x55/0xc0 do_syscall_64+0x67/0x180 return_from_SYSCALL_64+0x0/0x6a [ Looking further into this, I found that this was left over from when the function and function graph tracers shared the same ftrace_ops. But in commit 5f151b2401 ("ftrace: Fix function_profiler and function tracer together"), the two were separated, and the save_global_trampoline no longer was necessary (and it may have been broken back then too). -- Steven Rostedt ] Link: http://lkml.kernel.org/r/20170912021454.5976-1-shuwang@redhat.com Cc: stable@vger.kernel.org Fixes: 5f151b2401 ("ftrace: Fix function_profiler and function tracer together") Signed-off-by: Shu Wang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6abfafd7f173..8319e09e15b9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4954,9 +4954,6 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); -static unsigned long save_global_trampoline; -static unsigned long save_global_flags; - static int __init set_graph_function(char *str) { strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); @@ -6808,17 +6805,6 @@ void unregister_ftrace_graph(void) unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); -#ifdef CONFIG_DYNAMIC_FTRACE - /* - * Function graph does not allocate the trampoline, but - * other global_ops do. We need to reset the ALLOC_TRAMP flag - * if one was used. - */ - global_ops.trampoline = save_global_trampoline; - if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) - global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; -#endif - out: mutex_unlock(&ftrace_lock); } -- cgit v1.2.3-70-g09d2 From f39b536ce9248e9799ff900358d6f073ab2e6c55 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Sep 2017 20:19:02 -0700 Subject: rcu: Remove extraneous READ_ONCE()s from rcu_irq_{enter,exit}() The read of ->dynticks_nmi_nesting in rcu_irq_enter() and rcu_irq_exit() is currently protected with READ_ONCE(). However, this protection is unnecessary because (1) ->dynticks_nmi_nesting is updated only by the current CPU, (2) Although NMI handlers can update this field, they reset it back to its old value before return, and (3) Interrupts are disabled, so nothing else can modify it. The value of ->dynticks_nmi_nesting is thus effectively constant, and so no protection is required. This commit therefore removes the READ_ONCE() protection from these two accesses. Link: http://lkml.kernel.org/r/20170926031902.GA2074@linux.vnet.ibm.com Reported-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 63bee8e1b193..c03152f7e458 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -890,7 +890,7 @@ void rcu_irq_exit(void) rdtp = this_cpu_ptr(&rcu_dynticks); /* Page faults can happen in NMI handlers, so check... */ - if (READ_ONCE(rdtp->dynticks_nmi_nesting)) + if (rdtp->dynticks_nmi_nesting) return; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && @@ -1027,7 +1027,7 @@ void rcu_irq_enter(void) rdtp = this_cpu_ptr(&rcu_dynticks); /* Page faults can happen in NMI handlers, so check... */ - if (READ_ONCE(rdtp->dynticks_nmi_nesting)) + if (rdtp->dynticks_nmi_nesting) return; oldval = rdtp->dynticks_nesting; -- cgit v1.2.3-70-g09d2 From 638164a2718f337ea224b747cf5977ef143166a4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Oct 2017 02:50:16 +0800 Subject: f2fs: fix potential panic during fstrim As Ju Hyung Park reported: "When 'fstrim' is called for manual trim, a BUG() can be triggered randomly with this patch. I'm seeing this issue on both x86 Desktop and arm64 Android phone. On x86 Desktop, this was caused during Ubuntu boot-up. I have a cronjob installed which calls 'fstrim -v /' during boot. On arm64 Android, this was caused during GC looping with 1ms gc_min_sleep_time & gc_max_sleep_time." Root cause of this issue is that f2fs_wait_discard_bios can only be used by f2fs_put_super, because during put_super there must be no other referrers, so it can ignore discard entry's reference count when removing the entry, otherwise in other caller we will hit bug_on in __remove_discard_cmd as there may be other issuer added reference count in discard entry. Thread A Thread B - issue_discard_thread - f2fs_ioc_fitrim - f2fs_trim_fs - f2fs_wait_discard_bios - __issue_discard_cmd - __submit_discard_cmd - __wait_discard_cmd - dc->ref++ - __wait_one_discard_bio - __wait_discard_cmd - __remove_discard_cmd - f2fs_bug_on(sbi, dc->ref) Fixes: 969d1b180d987c2be02de890d0fff0f66a0e80de Reported-by: Ju Hyung Park Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a7c90386947..4b4a72f392be 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2525,7 +2525,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 621b9b3d320b..c695ff462ee6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1210,11 +1210,11 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, false); + __wait_discard_cmd(sbi, !umount); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -2244,7 +2244,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, false); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 89f61eb3d167..933c3d529e65 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi); + f2fs_wait_discard_bios(sbi, true); if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { -- cgit v1.2.3-70-g09d2 From 2fb1e946450a4fef74bb72f360555f7760d816f0 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Tue, 26 Sep 2017 16:47:04 +1000 Subject: KVM: PPC: Book3S: Fix server always zero from kvmppc_xive_get_xive() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In KVM's XICS-on-XIVE emulation, kvmppc_xive_get_xive() returns the value of state->guest_server as "server". However, this value is not set by it's counterpart kvmppc_xive_set_xive(). When the guest uses this interface to migrate interrupts away from a CPU that is going offline, it sees all interrupts as belonging to CPU 0, so they are left assigned to (now) offline CPUs. This patch removes the guest_server field from the state, and returns act_server in it's place (that is, the CPU actually handling the interrupt, which may differ from the one requested). Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org Signed-off-by: Sam Bobroff Acked-by: Benjamin Herrenschmidt Signed-off-by: Radim Krčmář --- arch/powerpc/kvm/book3s_xive.c | 5 ++--- arch/powerpc/kvm/book3s_xive.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 13304622ab1c..bf457843e032 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -622,7 +622,7 @@ int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server, return -EINVAL; state = &sb->irq_state[idx]; arch_spin_lock(&sb->lock); - *server = state->guest_server; + *server = state->act_server; *priority = state->guest_priority; arch_spin_unlock(&sb->lock); @@ -1331,7 +1331,7 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) xive->saved_src_count++; /* Convert saved state into something compatible with xics */ - val = state->guest_server; + val = state->act_server; prio = state->saved_scan_prio; if (prio == MASKED) { @@ -1507,7 +1507,6 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* First convert prio and mark interrupt as untargetted */ act_prio = xive_prio_from_guest(guest_prio); state->act_priority = MASKED; - state->guest_server = server; /* * We need to drop the lock due to the mutex below. Hopefully diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 5938f7644dc1..6ba63f8e8a61 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -35,7 +35,6 @@ struct kvmppc_xive_irq_state { struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */ /* Targetting as set by guest */ - u32 guest_server; /* Current guest selected target */ u8 guest_priority; /* Guest set priority */ u8 saved_priority; /* Saved priority when masking */ -- cgit v1.2.3-70-g09d2 From ee213fc72fd67d0988525af501534f4cb924d1e9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 3 Oct 2017 08:51:43 -0500 Subject: kprobes/x86: Set up frame pointer in kprobe trampoline Richard Weinberger saw an unwinder warning when running bcc's opensnoop: WARNING: kernel stack frame pointer at ffff99ef4076bea0 in opensnoop:2008 has bad value 0000000000000008 unwind stack type:0 next_sp: (null) mask:0x2 graph_idx:0 ... ffff99ef4076be88: ffff99ef4076bea0 (0xffff99ef4076bea0) ffff99ef4076be90: ffffffffac442721 (optimized_callback +0x81/0x90) ... A lockdep stack trace was initiated from inside a kprobe handler, when the unwinder noticed a bad frame pointer on the stack. The bad frame pointer is related to the fact that the kprobe optprobe trampoline doesn't save the frame pointer before calling into optimized_callback(). Reported-and-tested-by: Richard Weinberger Signed-off-by: Josh Poimboeuf Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/7aef2f8ecd75c2f505ef9b80490412262cf4a44c.1507038547.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index db2182d63ed0..3fc0f9a794cb 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -3,6 +3,15 @@ /* Kprobes and Optprobes common header */ +#include + +#ifdef CONFIG_FRAME_POINTER +# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \ + " mov %" _ASM_SP ", %" _ASM_BP "\n" +#else +# define SAVE_RBP_STRING " push %" _ASM_BP "\n" +#endif + #ifdef CONFIG_X86_64 #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax. */ \ @@ -17,7 +26,7 @@ " pushq %r10\n" \ " pushq %r11\n" \ " pushq %rbx\n" \ - " pushq %rbp\n" \ + SAVE_RBP_STRING \ " pushq %r12\n" \ " pushq %r13\n" \ " pushq %r14\n" \ @@ -48,7 +57,7 @@ " pushl %es\n" \ " pushl %ds\n" \ " pushl %eax\n" \ - " pushl %ebp\n" \ + SAVE_RBP_STRING \ " pushl %edi\n" \ " pushl %esi\n" \ " pushl %edx\n" \ -- cgit v1.2.3-70-g09d2 From b664d57f39d01e775204d4f1a7e2f8bda77bc549 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Oct 2017 16:18:02 +0900 Subject: kprobes/x86: Remove IRQ disabling from jprobe handlers Jprobes actually don't need to disable IRQs while calling handlers, because of how we specify the kernel interface in Documentation/kprobes.txt: ----- Probe handlers are run with preemption disabled. Depending on the architecture and optimization state, handlers may also run with interrupts disabled (e.g., kretprobe handlers and optimized kprobe handlers run without interrupt disabled on x86/x86-64). ----- So let's remove IRQ disabling from jprobes too. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/150701508194.32266.14458959863314097305.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index f0153714ddac..0742491cbb73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * raw stack chunk with redzones: */ __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); - regs->flags &= ~X86_EFLAGS_IF; - trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); /* -- cgit v1.2.3-70-g09d2 From ce024f42c2e28b6bce4ecc1e891b42f57f753892 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 3 Oct 2017 13:20:48 +0300 Subject: net: rtnetlink: fix info leak in RTM_GETSTATS call When RTM_GETSTATS was added the fields of its header struct were not all initialized when returning the result thus leaking 4 bytes of information to user-space per rtnl_fill_statsinfo call, so initialize them now. Thanks to Alexander Potapenko for the detailed report and bisection. Reported-by: Alexander Potapenko Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a78fd61da0ec..d4bcdcc68e92 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3854,6 +3854,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; ifsm = nlmsg_data(nlh); + ifsm->family = PF_UNSPEC; + ifsm->pad1 = 0; + ifsm->pad2 = 0; ifsm->ifindex = dev->ifindex; ifsm->filter_mask = filter_mask; -- cgit v1.2.3-70-g09d2 From 38b249bc0ca26d57dac65f5f659b39d88899d23d Mon Sep 17 00:00:00 2001 From: Wouter Verhelst Date: Fri, 22 Sep 2017 12:09:54 +0200 Subject: MAINTAINERS: update list for NBD nbd-general@sourceforge.net becomes nbd@other.debian.org, because sourceforge is just a spamtrap these days. Signed-off-by: Wouter Verhelst Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6671f375f7fc..17a643f670a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9348,7 +9348,7 @@ NETWORK BLOCK DEVICE (NBD) M: Josef Bacik S: Maintained L: linux-block@vger.kernel.org -L: nbd-general@lists.sourceforge.net +L: nbd@other.debian.org F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c F: include/uapi/linux/nbd.h -- cgit v1.2.3-70-g09d2 From d410a6417e5e4deb4a9c34164addcebebc9099c9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 3 Oct 2017 13:18:47 +0200 Subject: ide: free hwif->portdev on hwif_init() failure Recent pci_assign_irq() changes uncovered a problem with missing freeing of ide_port class instance on hwif_init() failure in ide_host_register(): ide0: disabled, no IRQ ide0: failed to initialize IDE interface ide0: disabling port cmd64x 0000:00:02.0: IDE controller (0x1095:0x0646 rev 0x07) CMD64x_IDE 0000:00:02.0: BAR 0: can't reserve [io 0x8050-0x8057] cmd64x 0000:00:02.0: can't reserve resources CMD64x_IDE: probe of 0000:00:02.0 failed with error -16 ide_generic: please use "probe_mask=0x3f" module parameter for probing all legacy ISA IDE ports ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x94/0xd0 sysfs: cannot create duplicate filename '/class/ide_port/ide0' ... Trace: [] __warn+0x160/0x190 [] sysfs_warn_dup+0x94/0xd0 [] warn_slowpath_fmt+0x58/0x70 [] sysfs_warn_dup+0x94/0xd0 [] kernfs_path_from_node+0x30/0x60 [] kernfs_put+0x16c/0x2c0 [] kernfs_put+0x16c/0x2c0 [] sysfs_do_create_link_sd.isra.2+0x100/0x120 [] device_add+0x2a4/0x7c0 [] device_create_groups_vargs+0x14c/0x170 [] device_create_groups_vargs+0x98/0x170 [] device_create+0x50/0x70 [] ide_host_register+0x48c/0xa00 [] ide_host_register+0x450/0xa00 [] device_register+0x20/0x50 [] ide_host_register+0x450/0xa00 [] ide_host_add+0x64/0xe0 [] kobject_uevent_env+0x16c/0x710 [] do_one_initcall+0x68/0x260 [] kernel_init+0x1c/0x1a0 [] kernel_init+0x0/0x1a0 [] ret_from_kernel_thread+0x18/0x20 [] kernel_init+0x0/0x1a0 ---[ end trace 24a70433c3e4d374 ]--- ide0: disabling port Fix the problem by adding missing code to ide_host_register(). Fixes: 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()") Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Link: http://lkml.kernel.org/r/32ec730f-c1b0-5584-cd35-f8a809122b96@roeck-us.net Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Bartlomiej Zolnierkiewicz [bhelgaas: add Fixes:] Signed-off-by: Bjorn Helgaas Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner --- drivers/ide/ide-probe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 01b2adfd8226..eaf39e5db08b 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1451,6 +1451,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, if (hwif_init(hwif) == 0) { printk(KERN_INFO "%s: failed to initialize IDE " "interface\n", hwif->name); + device_unregister(hwif->portdev); device_unregister(&hwif->gendev); ide_disable_port(hwif); continue; -- cgit v1.2.3-70-g09d2 From a06876a766c27dcedf17a0f60be89609ee23d861 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 3 Oct 2017 14:17:13 +0200 Subject: ide: pci: free PCI BARs on initialization failure Recent pci_assign_irq() changes uncovered a problem with missing freeing of PCI BARs on PCI IDE host initialization failure: ide0: disabled, no IRQ ide0: failed to initialize IDE interface ide0: disabling port cmd64x 0000:00:02.0: IDE controller (0x1095:0x0646 rev 0x07) CMD64x_IDE 0000:00:02.0: BAR 0: can't reserve [io 0x8050-0x8057] cmd64x 0000:00:02.0: can't reserve resources CMD64x_IDE: probe of 0000:00:02.0 failed with error -16 Fix the problem by adding missing freeing of PCI BARs to ide_setup_pci_controller() and ide_pci_init_two(). Fixes: 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()") Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Link: http://lkml.kernel.org/r/32ec730f-c1b0-5584-cd35-f8a809122b96@roeck-us.net Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Bartlomiej Zolnierkiewicz [bhelgaas: add Fixes:] Signed-off-by: Bjorn Helgaas Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner --- drivers/ide/setup-pci.c | 63 +++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 112d2fe1bcdb..fdc8e813170c 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -179,6 +179,7 @@ EXPORT_SYMBOL_GPL(ide_setup_pci_noise); /** * ide_pci_enable - do PCI enables * @dev: PCI device + * @bars: PCI BARs mask * @d: IDE port info * * Enable the IDE PCI device. We attempt to enable the device in full @@ -189,9 +190,10 @@ EXPORT_SYMBOL_GPL(ide_setup_pci_noise); * Returns zero on success or an error code */ -static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d) +static int ide_pci_enable(struct pci_dev *dev, int bars, + const struct ide_port_info *d) { - int ret, bars; + int ret; if (pci_enable_device(dev)) { ret = pci_enable_device_io(dev); @@ -216,18 +218,6 @@ static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d) goto out; } - if (d->host_flags & IDE_HFLAG_SINGLE) - bars = (1 << 2) - 1; - else - bars = (1 << 4) - 1; - - if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { - if (d->host_flags & IDE_HFLAG_CS5520) - bars |= (1 << 2); - else - bars |= (1 << 4); - } - ret = pci_request_selected_regions(dev, bars, d->name); if (ret < 0) printk(KERN_ERR "%s %s: can't reserve resources\n", @@ -403,6 +393,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) /** * ide_setup_pci_controller - set up IDE PCI * @dev: PCI device + * @bars: PCI BARs mask * @d: IDE port info * @noisy: verbose flag * @@ -411,7 +402,7 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d) * and enables it if need be */ -static int ide_setup_pci_controller(struct pci_dev *dev, +static int ide_setup_pci_controller(struct pci_dev *dev, int bars, const struct ide_port_info *d, int noisy) { int ret; @@ -420,7 +411,7 @@ static int ide_setup_pci_controller(struct pci_dev *dev, if (noisy) ide_setup_pci_noise(dev, d); - ret = ide_pci_enable(dev, d); + ret = ide_pci_enable(dev, bars, d); if (ret < 0) goto out; @@ -428,16 +419,20 @@ static int ide_setup_pci_controller(struct pci_dev *dev, if (ret < 0) { printk(KERN_ERR "%s %s: error accessing PCI regs\n", d->name, pci_name(dev)); - goto out; + goto out_free_bars; } if (!(pcicmd & PCI_COMMAND_IO)) { /* is device disabled? */ ret = ide_pci_configure(dev, d); if (ret < 0) - goto out; + goto out_free_bars; printk(KERN_INFO "%s %s: device enabled (Linux)\n", d->name, pci_name(dev)); } + goto out; + +out_free_bars: + pci_release_selected_regions(dev, bars); out: return ret; } @@ -540,13 +535,28 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, { struct pci_dev *pdev[] = { dev1, dev2 }; struct ide_host *host; - int ret, i, n_ports = dev2 ? 4 : 2; + int ret, i, n_ports = dev2 ? 4 : 2, bars; struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL }; + if (d->host_flags & IDE_HFLAG_SINGLE) + bars = (1 << 2) - 1; + else + bars = (1 << 4) - 1; + + if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) { + if (d->host_flags & IDE_HFLAG_CS5520) + bars |= (1 << 2); + else + bars |= (1 << 4); + } + for (i = 0; i < n_ports / 2; i++) { - ret = ide_setup_pci_controller(pdev[i], d, !i); - if (ret < 0) + ret = ide_setup_pci_controller(pdev[i], bars, d, !i); + if (ret < 0) { + if (i == 1) + pci_release_selected_regions(pdev[0], bars); goto out; + } ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]); } @@ -554,7 +564,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, host = ide_host_alloc(d, hws, n_ports); if (host == NULL) { ret = -ENOMEM; - goto out; + goto out_free_bars; } host->dev[0] = &dev1->dev; @@ -576,7 +586,7 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, * do_ide_setup_pci_device() on the first device! */ if (ret < 0) - goto out; + goto out_free_bars; /* fixup IRQ */ if (ide_pci_is_in_compatibility_mode(pdev[i])) { @@ -589,6 +599,13 @@ int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2, ret = ide_host_register(host, d, hws); if (ret) ide_host_free(host); + else + goto out; + +out_free_bars: + i = n_ports / 2; + while (i--) + pci_release_selected_regions(pdev[i], bars); out: return ret; } -- cgit v1.2.3-70-g09d2 From b1f9e5e355e909000fcccfd2bc31f7c1ded358ab Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 2 Oct 2017 11:52:47 +0100 Subject: ide: fix IRQ assignment for PCI bus order probing We used to assign IRQs for all devices at boot-time, before any drivers claimed devices. The following commits: 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()") 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") changed this so we now call pci_assign_irq() from pci_device_probe() when we call a driver's probe method. The ide_scan_pcibus() path (enabled by CONFIG_IDEPCI_PCIBUS_ORDER) bypasses pci_device_probe() so it can guarantee devices are claimed in order of PCI bus address. It calls the driver's probe method directly, so it misses the pci_assign_irq() call (and other PCI initialization functions), which causes failures like this: ide0: disabled, no IRQ ide0: failed to initialize IDE interface ide0: disabling port cmd64x 0000:00:02.0: IDE controller (0x1095:0x0646 rev 0x07) CMD64x_IDE 0000:00:02.0: BAR 0: can't reserve [io 0x8050-0x8057] cmd64x 0000:00:02.0: can't reserve resources CMD64x_IDE: probe of 0000:00:02.0 failed with error -16 ide_generic: please use "probe_mask=0x3f" module parameter for probing all legacy ISA IDE ports ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x94/0xd0 sysfs: cannot create duplicate filename '/class/ide_port/ide0' ... Trace: [] sysfs_warn_dup+0x94/0xd0 [] warn_slowpath_fmt+0x58/0x70 [] sysfs_warn_dup+0x94/0xd0 [] kernfs_path_from_node+0x30/0x60 [] kernfs_put+0x16c/0x2c0 [] kernfs_put+0x16c/0x2c0 [] sysfs_do_create_link_sd.isra.2+0x100/0x120 [] device_add+0x2a4/0x7c0 [] device_create_groups_vargs+0x14c/0x170 [] device_create_groups_vargs+0x98/0x170 [] device_create+0x50/0x70 [] ide_host_register+0x48c/0xa00 [] ide_host_register+0x450/0xa00 [] device_register+0x20/0x50 [] ide_host_register+0x450/0xa00 [] ide_host_add+0x64/0xe0 [] kobject_uevent_env+0x16c/0x710 [] do_one_initcall+0x68/0x260 [] kernel_init+0x1c/0x1a0 ... ---[ end trace 24a70433c3e4d374 ]--- ide0: disabling port Fix the IRQ allocation issue by calling pci_assign_irq() from ide_scan_pcidev() before probing the IDE PCI drivers, so that IRQs for a given PCI device are allocated for the IDE PCI drivers to use them for device configuration. Fixes: 30fdfb929e82 ("PCI: Add a call to pci_assign_irq() in pci_device_probe()") Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Link: http://lkml.kernel.org/r/32ec730f-c1b0-5584-cd35-f8a809122b96@roeck-us.net Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Lorenzo Pieralisi [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas Reviewed-by: Bartlomiej Zolnierkiewicz Acked-by: David S. Miller Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner --- drivers/ide/ide-scan-pci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c index 86aa88aeb3a6..acf874800ca4 100644 --- a/drivers/ide/ide-scan-pci.c +++ b/drivers/ide/ide-scan-pci.c @@ -56,6 +56,7 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) { struct list_head *l; struct pci_driver *d; + int ret; list_for_each(l, &ide_pci_drivers) { d = list_entry(l, struct pci_driver, node); @@ -63,10 +64,14 @@ static int __init ide_scan_pcidev(struct pci_dev *dev) const struct pci_device_id *id = pci_match_id(d->id_table, dev); - if (id != NULL && d->probe(dev, id) >= 0) { - dev->driver = d; - pci_dev_get(dev); - return 1; + if (id != NULL) { + pci_assign_irq(dev); + ret = d->probe(dev, id); + if (ret >= 0) { + dev->driver = d; + pci_dev_get(dev); + return 1; + } } } } -- cgit v1.2.3-70-g09d2 From 71300132975f364a0d3ebf68671a2ce4923191db Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 2 Oct 2017 16:53:07 +0300 Subject: drm/i915: Fix DDI PHY init if it was already on The common lane power down flag of a DPIO PHY has a funky semantic: after the initial enabling of the PHY (so from a disabled state) this flag will be clear. It will be set only after the PHY will be used for the first time (for instance due to enabling the corresponding pipe) and then become unused (due to disabling the pipe). During the initial PHY enablement we don't know which of the above phases we are in, so move the check for the flag where this is known, the HW readout code. This is where the rest of lane power down status checks are done anyway. This fixes at least a problem on GLK where after module reloading, the common lane power down flag of PHY1 is set, but the PHY is actually powered-on and properly set up. The GRC readout code for other PHYs will hence think that PHY1 is not powered initially and disable it after the GRC readout. This will cause the AUX power well related to PHY1 to get disabled in a stuck state, timing out when we try to enable it later. Cc: Ville Syrjala Fixes: e93da0a0137b ("drm/i915/bxt: Sanitiy check the PHY lane power down status") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102777 Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171002135307.26117-1-imre.deak@intel.com (cherry picked from commit e19c1eb885ac4186e64c7e484424124f3145318e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 3 ++- drivers/gpu/drm/i915/intel_dpio_phy.c | 20 -------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 4b4fd1f8110b..476681d5940c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1655,7 +1655,8 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, out: if (ret && IS_GEN9_LP(dev_priv)) { tmp = I915_READ(BXT_PHY_CTL(port)); - if ((tmp & (BXT_PHY_LANE_POWERDOWN_ACK | + if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | + BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) DRM_ERROR("Port %c enabled but PHY powered down? " "(PHY_CTL %08x)\n", port_name(port), tmp); diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 09b670929786..de38d014ed39 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -208,12 +208,6 @@ static const struct bxt_ddi_phy_info glk_ddi_phy_info[] = { }, }; -static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) -{ - return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | - BIT(phy_info->channel[DPIO_CH0].port); -} - static const struct bxt_ddi_phy_info * bxt_get_phy_list(struct drm_i915_private *dev_priv, int *count) { @@ -313,7 +307,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info; - enum port port; phy_info = bxt_get_phy_info(dev_priv, phy); @@ -335,19 +328,6 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - return true; } -- cgit v1.2.3-70-g09d2 From 63ba395cd7a52431cbb61658dad3beb5b24e9300 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 27 Sep 2017 23:31:03 +0200 Subject: rndis_host: support Novatel Verizon USB730L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Treat the ef/04/01 interface class/subclass/protocol combination used by the Novatel Verizon USB730L (1410:9030) as a possible RNDIS interface. T: Bus=01 Lev=02 Prnt=02 Port=01 Cnt=02 Dev#= 17 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 3 P: Vendor=1410 ProdID=9030 Rev=03.10 S: Manufacturer=Novatel Wireless S: Product=MiFi USB730L S: SerialNumber=0123456789ABCDEF C: #Ifs= 3 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host I: If#= 2 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=00 Prot=00 Driver=usbhid Once the network interface is brought up, the user just needs to run a DHCP client to get IP address and routing setup. As a side note, other Novatel Verizon USB730L models with the same vid:pid end up exposing a standard ECM interface which doesn't require any other kernel update to make it work. Signed-off-by: Aleksander Morgado Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 11 ++++++++++- drivers/net/usb/rndis_host.c | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 677a85360db1..29c7e2ec0dcb 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -54,11 +54,19 @@ static int is_wireless_rndis(struct usb_interface_descriptor *desc) desc->bInterfaceProtocol == 3); } +static int is_novatel_rndis(struct usb_interface_descriptor *desc) +{ + return (desc->bInterfaceClass == USB_CLASS_MISC && + desc->bInterfaceSubClass == 4 && + desc->bInterfaceProtocol == 1); +} + #else #define is_rndis(desc) 0 #define is_activesync(desc) 0 #define is_wireless_rndis(desc) 0 +#define is_novatel_rndis(desc) 0 #endif @@ -150,7 +158,8 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) */ rndis = (is_rndis(&intf->cur_altsetting->desc) || is_activesync(&intf->cur_altsetting->desc) || - is_wireless_rndis(&intf->cur_altsetting->desc)); + is_wireless_rndis(&intf->cur_altsetting->desc) || + is_novatel_rndis(&intf->cur_altsetting->desc)); memset(info, 0, sizeof(*info)); info->control = intf; diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index a151f267aebb..b807c91abe1d 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -632,6 +632,10 @@ static const struct usb_device_id products [] = { /* RNDIS for tethering */ USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, +}, { + /* Novatel Verizon USB730L */ + USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; -- cgit v1.2.3-70-g09d2 From 4f02fb7617ba12ac15d261c654b9759ea8f1f1ef Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Sat, 30 Sep 2017 14:38:49 +0800 Subject: blk-throttle: fix possible io stall when upgrade to max There is a case which will lead to io stall. The case is described as follows. /test1 |-subtest1 /test2 |-subtest2 And subtest1 and subtest2 each has 32 queued bios already. Now upgrade to max. In throtl_upgrade_state, it will try to dispatch bios as follows: 1) tg=subtest1, do nothing; 2) tg=test1, transfer 32 queued bios from subtest1 to test1; no pending left, no need to schedule next dispatch; 3) tg=subtest2, do nothing; 4) tg=test2, transfer 32 queued bios from subtest2 to test2; no pending left, no need to schedule next dispatch; 5) tg=/, transfer 8 queued bios from test1 to /, 8 queued bios from test2 to /, 8 queued bios from test1 to /, and 8 queued bios from test2 to /; note that test1 and test2 each still has 16 queued bios left; 6) tg=/, try to schedule next dispatch, but since disptime is now (update in tg_update_disptime, wait=0), pending timer is not scheduled in fact; 7) In throtl_upgrade_state it totally dispatches 32 queued bios and with 32 left. test1 and test2 each has 16 queued bios; 8) throtl_pending_timer_fn sees the left over bios, but could do nothing, because throtl_select_dispatch returns 0, and test1/test2 has no pending tg. The blktrace shows the following: 8,32 0 0 2.539007641 0 m N throtl upgrade to max 8,32 0 0 2.539072267 0 m N throtl /test2 dispatch nr_queued=16 read=0 write=16 8,32 7 0 2.539077142 0 m N throtl /test1 dispatch nr_queued=16 read=0 write=16 So force schedule dispatch if there are pending children. Reviewed-by: Shaohua Li Signed-off-by: Joseph Qi Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0fea76aa0f3f..17816a028dcb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1911,11 +1911,11 @@ static void throtl_upgrade_state(struct throtl_data *td) tg->disptime = jiffies - 1; throtl_select_dispatch(sq); - throtl_schedule_next_dispatch(sq, false); + throtl_schedule_next_dispatch(sq, true); } rcu_read_unlock(); throtl_select_dispatch(&td->service_queue); - throtl_schedule_next_dispatch(&td->service_queue, false); + throtl_schedule_next_dispatch(&td->service_queue, true); queue_work(kthrotld_workqueue, &td->dispatch_work); } -- cgit v1.2.3-70-g09d2 From 6cd1a6fef7058de15405b13d6587538853279c7b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 3 Oct 2017 15:58:15 -0600 Subject: null_blk: change configfs dependency to select A recent commit made null_blk depend on configfs, which is kind of annoying since you now have to find this dependency and enable that as well. Discovered this since I no longer had null_blk available on a box I needed to debug, since it got killed when the config updated after the configfs change was merged. Fixes: 3bf2bd20734e ("nullb: add configfs interface") Reviewed-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4a438b8abe27..2dfe99b328f8 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -17,7 +17,7 @@ if BLK_DEV config BLK_DEV_NULL_BLK tristate "Null test block driver" - depends on CONFIGFS_FS + select CONFIGFS_FS config BLK_DEV_FD tristate "Normal floppy disk support" -- cgit v1.2.3-70-g09d2 From 70e62f4bacdf31ea8a59f241c9229120cd06d9d1 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 3 Oct 2017 14:57:16 -0700 Subject: blk-mq-debugfs: fix device sched directory for default scheduler In blk_mq_debugfs_register(), I remembered to set up the per-hctx sched directories if a default scheduler was already configured by blk_mq_sched_init() from blk_mq_init_allocated_queue(), but I didn't do the same for the device-wide sched directory. Fix it. Fixes: d332ce091813 ("blk-mq-debugfs: allow schedulers to register debugfs attributes") Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 980e73095643..de294d775acf 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -815,10 +815,14 @@ int blk_mq_debugfs_register(struct request_queue *q) goto err; /* - * blk_mq_init_hctx() attempted to do this already, but q->debugfs_dir + * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir * didn't exist yet (because we don't know what to name the directory * until the queue is registered to a gendisk). */ + if (q->elevator && !q->sched_debugfs_dir) + blk_mq_debugfs_register_sched(q); + + /* Similarly, blk_mq_init_hctx() couldn't do this previously. */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx)) goto err; -- cgit v1.2.3-70-g09d2 From 05946876f0c16f6fe1db692d575aba42b25f0811 Mon Sep 17 00:00:00 2001 From: David Wu Date: Sat, 30 Sep 2017 17:47:23 +0800 Subject: net: stmmac: dwmac-rk: Add RK3128 GMAC support Add constants and callback functions for the dwmac on rk3128 soc. As can be seen, the base structure is the same, only registers and the bits in them moved slightly. Signed-off-by: David Wu Signed-off-by: David S. Miller --- .../devicetree/bindings/net/rockchip-dwmac.txt | 1 + drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 112 +++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index 6af8eed1adeb..9c16ee2965a2 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -4,6 +4,7 @@ The device node has following properties. Required properties: - compatible: should be "rockchip,-gamc" + "rockchip,rk3128-gmac": found on RK312x SoCs "rockchip,rk3228-gmac": found on RK322x SoCs "rockchip,rk3288-gmac": found on RK3288 SoCs "rockchip,rk3328-gmac": found on RK3328 SoCs diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 99823f54696a..13133b30b575 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -83,6 +83,117 @@ struct rk_priv_data { (((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \ ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE)) +#define RK3128_GRF_MAC_CON0 0x0168 +#define RK3128_GRF_MAC_CON1 0x016c + +/* RK3128_GRF_MAC_CON0 */ +#define RK3128_GMAC_TXCLK_DLY_ENABLE GRF_BIT(14) +#define RK3128_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(14) +#define RK3128_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3128_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3128_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 7) +#define RK3128_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +/* RK3128_GRF_MAC_CON1 */ +#define RK3128_GMAC_PHY_INTF_SEL_RGMII \ + (GRF_BIT(6) | GRF_CLR_BIT(7) | GRF_CLR_BIT(8)) +#define RK3128_GMAC_PHY_INTF_SEL_RMII \ + (GRF_CLR_BIT(6) | GRF_CLR_BIT(7) | GRF_BIT(8)) +#define RK3128_GMAC_FLOW_CTRL GRF_BIT(9) +#define RK3128_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(9) +#define RK3128_GMAC_SPEED_10M GRF_CLR_BIT(10) +#define RK3128_GMAC_SPEED_100M GRF_BIT(10) +#define RK3128_GMAC_RMII_CLK_25M GRF_BIT(11) +#define RK3128_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(11) +#define RK3128_GMAC_CLK_125M (GRF_CLR_BIT(12) | GRF_CLR_BIT(13)) +#define RK3128_GMAC_CLK_25M (GRF_BIT(12) | GRF_BIT(13)) +#define RK3128_GMAC_CLK_2_5M (GRF_CLR_BIT(12) | GRF_BIT(13)) +#define RK3128_GMAC_RMII_MODE GRF_BIT(14) +#define RK3128_GMAC_RMII_MODE_CLR GRF_CLR_BIT(14) + +static void rk3128_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "Missing rockchip,grf property\n"); + return; + } + + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_PHY_INTF_SEL_RGMII | + RK3128_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON0, + DELAY_ENABLE(RK3128, tx_delay, rx_delay) | + RK3128_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3128_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3128_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "Missing rockchip,grf property\n"); + return; + } + + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_PHY_INTF_SEL_RMII | RK3128_GMAC_RMII_MODE); +} + +static void rk3128_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "Missing rockchip,grf property\n"); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3128_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "Missing rockchip,grf property\n"); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_RMII_CLK_2_5M | + RK3128_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3128_GRF_MAC_CON1, + RK3128_GMAC_RMII_CLK_25M | + RK3128_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3128_ops = { + .set_to_rgmii = rk3128_set_to_rgmii, + .set_to_rmii = rk3128_set_to_rmii, + .set_rgmii_speed = rk3128_set_rgmii_speed, + .set_rmii_speed = rk3128_set_rmii_speed, +}; + #define RK3228_GRF_MAC_CON0 0x0900 #define RK3228_GRF_MAC_CON1 0x0904 @@ -1313,6 +1424,7 @@ static int rk_gmac_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume); static const struct of_device_id rk_gmac_dwmac_match[] = { + { .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops }, { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, { .compatible = "rockchip,rk3328-gmac", .data = &rk3328_ops }, -- cgit v1.2.3-70-g09d2 From 90caccdd8cc0215705f18b92771b449b01e2474a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Oct 2017 15:37:20 -0700 Subject: bpf: fix bpf_tail_call() x64 JIT - bpf prog_array just like all other types of bpf array accepts 32-bit index. Clarify that in the comment. - fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes - tighten corresponding check in the interpreter to stay consistent The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and though JIT code is wrong it will check bounds correctly. Hence two fixes tags. All other JITs don't have this problem. Signed-off-by: Alexei Starovoitov Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation") Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper") Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 4 ++-- include/uapi/linux/bpf.h | 2 +- kernel/bpf/core.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c9573660d51..0554e8aef4d5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) /* if (index >= array->map.max_entries) * goto out; */ - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ + EMIT2(0x89, 0xD2); /* mov edx, edx */ + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ #define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..7b62df86be1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1022,7 +1022,7 @@ select_insn: struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; - u64 index = BPF_R3; + u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; -- cgit v1.2.3-70-g09d2 From 8ee912dab95f1483156b6e994004bfcc3158d798 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:14:15 -0700 Subject: alpha: fix build failures The build of alpha allmodconfig is giving error: arch/alpha/include/asm/mmu_context.h: In function 'ev5_switch_mm': arch/alpha/include/asm/mmu_context.h:160:2: error: implicit declaration of function 'task_thread_info'; did you mean 'init_thread_info'? [-Werror=implicit-function-declaration] The file 'mmu_context.h' needed an extra header file. Link: http://lkml.kernel.org/r/1505668810-7497-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mmu_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h index 384bd47b5187..45c020a0fe76 100644 --- a/arch/alpha/include/asm/mmu_context.h +++ b/arch/alpha/include/asm/mmu_context.h @@ -8,6 +8,7 @@ */ #include +#include #include #include -- cgit v1.2.3-70-g09d2 From 630cc2b30a42c70628368a412beb4a5e5dd71abe Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:14:18 -0700 Subject: kernel/params.c: align add_sysfs_param documentation with code This parameter is named kp, so the documentation should use that. Fixes: 9b473de87209 ("param: Fix duplicate module prefixes") Link: http://lkml.kernel.org/r/20170919142656.64aea59e@endymion Signed-off-by: Jean Delvare Acked-by: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index 60b2d8101355..1cd8f1a895a8 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -600,7 +600,7 @@ EXPORT_SYMBOL(kernel_param_unlock); /* * add_sysfs_param - add a parameter to sysfs * @mk: struct module_kobject - * @kparam: the actual parameter definition to add to sysfs + * @kp: the actual parameter definition to add to sysfs * @name: name of parameter * * Create a kobject if for a (per-module) parameter if mp NULL, and -- cgit v1.2.3-70-g09d2 From e00e5a26e3d37b47ce8ca59611db1e6135790ef8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 16:14:21 -0700 Subject: scripts/spelling.txt: add more spelling mistakes to spelling.txt Here are some of the more spelling mistakes and typos that I've found while fixing up spelling mistakes in kernel error message text over the past eight weeks. [akpm@linux-foundation.org: s/|/||/, per Joe] Link: http://lkml.kernel.org/r/20170919090818.5989-1-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Kees Cook Cc: Masahiro Yamada Cc: Stephen Boyd Cc: Joe Perches Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/spelling.txt | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 400ef35169c5..aa0cc49ad1ad 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -53,6 +53,7 @@ acumulator||accumulator adapater||adapter addional||additional additionaly||additionally +additonal||additional addres||address adddress||address addreses||addresses @@ -67,6 +68,8 @@ adviced||advised afecting||affecting againt||against agaist||against +aggreataon||aggregation +aggreation||aggregation albumns||albums alegorical||allegorical algined||aligned @@ -80,6 +83,8 @@ aligment||alignment alignement||alignment allign||align alligned||aligned +alllocate||allocate +alloated||allocated allocatote||allocate allocatrd||allocated allocte||allocate @@ -171,6 +176,7 @@ availale||available availavility||availability availble||available availiable||available +availible||available avalable||available avaliable||available aysnc||async @@ -203,6 +209,7 @@ broadcat||broadcast cacluated||calculated caculation||calculation calender||calendar +calescing||coalescing calle||called callibration||calibration calucate||calculate @@ -210,6 +217,7 @@ calulate||calculate cancelation||cancellation cancle||cancel capabilites||capabilities +capabilty||capability capabitilies||capabilities capatibilities||capabilities capapbilities||capabilities @@ -302,6 +310,7 @@ containts||contains contaisn||contains contant||contact contence||contents +continious||continuous continous||continuous continously||continuously continueing||continuing @@ -393,6 +402,7 @@ differrence||difference diffrent||different diffrentiate||differentiate difinition||definition +dimesions||dimensions diplay||display direectly||directly disassocation||disassociation @@ -449,6 +459,7 @@ equiped||equipped equivelant||equivalent equivilant||equivalent eror||error +errorr||error estbalishment||establishment etsablishment||establishment etsbalishment||establishment @@ -481,6 +492,7 @@ failied||failed faillure||failure failue||failure failuer||failure +failng||failing faireness||fairness falied||failed faliure||failure @@ -493,6 +505,7 @@ fetaure||feature fetaures||features fileystem||filesystem fimware||firmware +firware||firmware finanize||finalize findn||find finilizes||finalizes @@ -502,6 +515,7 @@ folloing||following followign||following followings||following follwing||following +fonud||found forseeable||foreseeable forse||force fortan||fortran @@ -532,6 +546,7 @@ grabing||grabbing grahical||graphical grahpical||graphical grapic||graphic +grranted||granted guage||gauge guarenteed||guaranteed guarentee||guarantee @@ -543,6 +558,7 @@ happend||happened harware||hardware heirarchically||hierarchically helpfull||helpful +hybernate||hibernate hierachy||hierarchy hierarchie||hierarchy howver||however @@ -565,16 +581,19 @@ implemenation||implementation implementaiton||implementation implementated||implemented implemention||implementation +implementd||implemented implemetation||implementation implemntation||implementation implentation||implementation implmentation||implementation implmenting||implementing +incative||inactive incomming||incoming incompatabilities||incompatibilities incompatable||incompatible inconsistant||inconsistent increas||increase +incremeted||incremented incrment||increment indendation||indentation indended||intended @@ -619,6 +638,7 @@ interger||integer intermittant||intermittent internel||internal interoprability||interoperability +interuupt||interrupt interrface||interface interrrupt||interrupt interrup||interrupt @@ -638,8 +658,10 @@ intrrupt||interrupt intterrupt||interrupt intuative||intuitive invaid||invalid +invald||invalid invalde||invalid invalide||invalid +invalidiate||invalidate invalud||invalid invididual||individual invokation||invocation @@ -713,6 +735,7 @@ misformed||malformed mispelled||misspelled mispelt||misspelt mising||missing +mismactch||mismatch missmanaged||mismanaged missmatch||mismatch miximum||maximum @@ -731,6 +754,7 @@ multidimensionnal||multidimensional multple||multiple mumber||number muticast||multicast +mutilcast||multicast mutiple||multiple mutli||multi nams||names @@ -834,6 +858,7 @@ posible||possible positon||position possibilites||possibilities powerfull||powerful +preample||preamble preapre||prepare preceeded||preceded preceeding||preceding @@ -1059,6 +1084,7 @@ sturcture||structure subdirectoires||subdirectories suble||subtle substract||subtract +submition||submission succesfully||successfully succesful||successful successed||succeeded @@ -1078,6 +1104,7 @@ suppoted||supported suppported||supported suppport||support supress||suppress +surpressed||suppressed surpresses||suppresses susbsystem||subsystem suspeneded||suspended @@ -1091,6 +1118,7 @@ swithced||switched swithcing||switching swithed||switched swithing||switching +swtich||switch symetric||symmetric synax||syntax synchonized||synchronized @@ -1111,7 +1139,9 @@ therfore||therefore thier||their threds||threads threshhold||threshold +thresold||threshold throught||through +troughput||throughput thses||these tiggered||triggered tipically||typically @@ -1120,6 +1150,7 @@ tmis||this torerable||tolerable tramsmitted||transmitted tramsmit||transmit +tranasction||transaction tranfer||transfer transciever||transceiver transferd||transferred @@ -1133,6 +1164,7 @@ trasmission||transmission treshold||threshold trigerring||triggering trun||turn +tunning||tuning ture||true tyep||type udpate||update @@ -1199,6 +1231,7 @@ visiters||visitors vitual||virtual wakeus||wakeups wating||waiting +wiat||wait wether||whether whataver||whatever whcih||which -- cgit v1.2.3-70-g09d2 From fa87b91c94a8fd2c8502b6761be2d08a8e9bcf55 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 3 Oct 2017 16:14:24 -0700 Subject: include/linux/mm.h: fix typo in VM_MPX definition There's a typo in recent change of VM_MPX definition. We want it to be VM_HIGH_ARCH_4, not VM_HIGH_ARCH_BIT_4. This bug does cause visible regressions. In arch_vma_name the vmflags are tested against VM_MPX. With the incorrect value of VM_MPX, a number of vmas (such as the stack) test positive and end up being marked as "[mpx]" in /proc/N/maps instead of their correct names. This confuses tools like rr which expect to be able to find familiar vmas. Fixes: df3735c5b40f ("x86,mpx: make mpx depend on x86-64 to free up VMA flag") Link: http://lkml.kernel.org/r/20170918140253.36856-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Rik van Riel Cc: Dave Hansen Cc: Kyle Huey Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f8c10d336e42..065d99deb847 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -240,7 +240,7 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86_INTEL_MPX) /* MPX specific bounds table or bounds directory */ -# define VM_MPX VM_HIGH_ARCH_BIT_4 +# define VM_MPX VM_HIGH_ARCH_4 #else # define VM_MPX VM_NONE #endif -- cgit v1.2.3-70-g09d2 From 4b22927f0cbd58303aac689e378d20bf56267a39 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 3 Oct 2017 16:14:27 -0700 Subject: ksm: fix unlocked iteration over vmas in cmp_and_merge_page() In this place mm is unlocked, so vmas or list may change. Down read mmap_sem to protect them from modifications. Link: http://lkml.kernel.org/r/150512788393.10691.8868381099691121308.stgit@localhost.localdomain Fixes: e86c59b1b12d ("mm/ksm: improve deduplication of zero pages with colouring") Signed-off-by: Kirill Tkhai Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: Minchan Kim Cc: zhong jiang Cc: Ingo Molnar Cc: Claudio Imbrenda Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/ksm.c b/mm/ksm.c index 15dd7415f7b3..6cb60f46cce5 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item, */ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) { + struct mm_struct *mm = rmap_item->mm; struct rmap_item *tree_rmap_item; struct page *tree_page = NULL; struct stable_node *stable_node; @@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) if (ksm_use_zero_pages && (checksum == zero_checksum)) { struct vm_area_struct *vma; - vma = find_mergeable_vma(rmap_item->mm, rmap_item->address); + down_read(&mm->mmap_sem); + vma = find_mergeable_vma(mm, rmap_item->address); err = try_to_merge_one_page(vma, page, ZERO_PAGE(rmap_item->address)); + up_read(&mm->mmap_sem); /* * In case of failure, the page was not really empty, so we * need to continue. Otherwise we're done. -- cgit v1.2.3-70-g09d2 From 19bfbe22f59a207417b2679e7e83c180419c9ec5 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Tue, 3 Oct 2017 16:14:31 -0700 Subject: mm, hugetlb, soft_offline: save compound page order before page migration This fixes a bug in madvise() where if you'd try to soft offline a hugepage via madvise(), while walking the address range you'd end up, using the wrong page offset due to attempting to get the compound order of a former but presently not compound page, due to dissolving the huge page (since commit c3114a84f7f9: "mm: hugetlb: soft-offline: dissolve source hugepage after successful migration"). As a result I ended up with all my free pages except one being offlined. Link: http://lkml.kernel.org/r/20170912204306.GA12053@gmail.com Fixes: c3114a84f7f9 ("mm: hugetlb: soft-offline: dissolve source hugepage after successful migration") Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Cc: Anshuman Khandual Cc: Michal Hocko Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Hillf Danton Cc: Shaohua Li Cc: Mike Rapoport Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: David Rientjes Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 21261ff0466f..25bade36e9ca 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -625,18 +625,26 @@ static int madvise_inject_error(int behavior, { struct page *page; struct zone *zone; + unsigned int order; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - for (; start < end; start += PAGE_SIZE << - compound_order(compound_head(page))) { + + for (; start < end; start += PAGE_SIZE << order) { int ret; ret = get_user_pages_fast(start, 1, 0, &page); if (ret != 1) return ret; + /* + * When soft offlining hugepages, after migrating the page + * we dissolve it, therefore in the second loop "page" will + * no longer be a compound page, and order will be 0. + */ + order = compound_order(compound_head(page)); + if (PageHWPoison(page)) { put_page(page); continue; -- cgit v1.2.3-70-g09d2 From b78412b8300a8453b78d2c1b0b925b66493bb011 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:34 -0700 Subject: sh: sh7722: remove nonexistent GPIO_PTQ7 to fix pinctrl registration Patch series "sh: sh7722/sh7757i/sh7264/sh7269: Fix pinctrl registration", v2. Magnus Damm reported that on sh7722/Migo-R, pinctrl registration fails with: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. Apparently GPIO_PTQ7 was defined in the enum, but never used. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Hence such entries are treated as pin zero, which was registered before, and pinctrl registration fails. I can't see how this ever worked, as at the time of commit f5e25ae52fef ("sh-pfc: Add sh7722 pinmux support"), pinmux_gpios[] in drivers/pinctrl/sh-pfc/pfc-sh7722.c already had the hole, and drivers/pinctrl/core.c already had the check. Some scripting revealed a few more broken drivers: - sh7757 has four holes, due to nonexistent GPIO_PT[JLNQ]7_RESV. - sh7264 and sh7269 define GPIO_PH[0-7], but don't use it with PINMUX_GPIO(). Patch 1 fixes the issue on sh7722, and was tested. Patches 3-4 should fix the issue on the other 3 SoCs, but was untested due to lack of hardware. This patch (of 4): On sh7722/Migo-R, pinctrl registration fails with: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. As GPIO_PTQ7 is defined in the enum, but never used, pinmux_pins[] contains a (zero-filled) hole. Hence this entry is treated as pin zero, which was registered before, and pinctrl registration fails. According to the datasheet, port PTQ7 does not exist. Hence remove GPIO_PTQ7 from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-2-git-send-email-geert+renesas@glider.be Fixes: 8d7b5b0af7e070b9 ("sh: Add sh7722 pinmux code") Signed-off-by: Geert Uytterhoeven Reported-by: Magnus Damm Reviewed-by: Laurent Pinchart Tested-by: Jacopo Mondi Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh4/cpu/sh7722.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 3bb74e534d0f..78961ab78a5a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -67,7 +67,7 @@ enum { GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0, /* PTQ */ - GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4, + GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4, GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0, /* PTR */ -- cgit v1.2.3-70-g09d2 From d8ce38f69843a56da044e56b6c16aecfbc3c6e39 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:37 -0700 Subject: sh: sh7757: remove nonexistent GPIO_PT[JLNQ]7_RESV to fix pinctrl registration Commit 3810e96056ff ("sh: modify pinmux for SH7757 2nd cut") renamed GPIO_PT[JLNQ]7 to GPIO_PT[JLNQ]7_RESV, and removed the existing users from the pinmux_pins[] array. However, pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. Hence entries were not really removed, but replaced by (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PT[JLNQ]7_RESV from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-3-git-send-email-geert+renesas@glider.be Fixes: 3810e96056ffddf6 ("sh: modify pinmux for SH7757 2nd cut") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Jacopo Mondi Cc: Magnus Damm Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh4/cpu/sh7757.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7757.h b/arch/sh/include/cpu-sh4/cpu/sh7757.h index 5340f3bc1863..b40fb541e72a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7757.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7757.h @@ -40,7 +40,7 @@ enum { /* PTJ */ GPIO_PTJ0, GPIO_PTJ1, GPIO_PTJ2, GPIO_PTJ3, - GPIO_PTJ4, GPIO_PTJ5, GPIO_PTJ6, GPIO_PTJ7_RESV, + GPIO_PTJ4, GPIO_PTJ5, GPIO_PTJ6, /* PTK */ GPIO_PTK0, GPIO_PTK1, GPIO_PTK2, GPIO_PTK3, @@ -48,7 +48,7 @@ enum { /* PTL */ GPIO_PTL0, GPIO_PTL1, GPIO_PTL2, GPIO_PTL3, - GPIO_PTL4, GPIO_PTL5, GPIO_PTL6, GPIO_PTL7_RESV, + GPIO_PTL4, GPIO_PTL5, GPIO_PTL6, /* PTM */ GPIO_PTM0, GPIO_PTM1, GPIO_PTM2, GPIO_PTM3, @@ -56,7 +56,7 @@ enum { /* PTN */ GPIO_PTN0, GPIO_PTN1, GPIO_PTN2, GPIO_PTN3, - GPIO_PTN4, GPIO_PTN5, GPIO_PTN6, GPIO_PTN7_RESV, + GPIO_PTN4, GPIO_PTN5, GPIO_PTN6, /* PTO */ GPIO_PTO0, GPIO_PTO1, GPIO_PTO2, GPIO_PTO3, @@ -68,7 +68,7 @@ enum { /* PTQ */ GPIO_PTQ0, GPIO_PTQ1, GPIO_PTQ2, GPIO_PTQ3, - GPIO_PTQ4, GPIO_PTQ5, GPIO_PTQ6, GPIO_PTQ7_RESV, + GPIO_PTQ4, GPIO_PTQ5, GPIO_PTQ6, /* PTR */ GPIO_PTR0, GPIO_PTR1, GPIO_PTR2, GPIO_PTR3, -- cgit v1.2.3-70-g09d2 From eae3df7e82318d798f45dedf111e241805ec7a4a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:41 -0700 Subject: sh: sh7264: remove nonexistent GPIO_PH[0-7] to fix pinctrl registration Pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PH[0-7] from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-4-git-send-email-geert+renesas@glider.be Fixes: 41797f75486d8ca3 ("sh: Add pinmux for sh7264") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Jacopo Mondi Cc: Magnus Damm Cc: Rich Felker Cc: Yoshihiro Shimoda Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh2a/cpu/sh7264.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7264.h b/arch/sh/include/cpu-sh2a/cpu/sh7264.h index 4d1ef6d74bd6..2ae0e938b657 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7264.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7264.h @@ -43,9 +43,7 @@ enum { GPIO_PG7, GPIO_PG6, GPIO_PG5, GPIO_PG4, GPIO_PG3, GPIO_PG2, GPIO_PG1, GPIO_PG0, - /* Port H */ - GPIO_PH7, GPIO_PH6, GPIO_PH5, GPIO_PH4, - GPIO_PH3, GPIO_PH2, GPIO_PH1, GPIO_PH0, + /* Port H - Port H does not have a Data Register */ /* Port I - not on device */ -- cgit v1.2.3-70-g09d2 From d9d73e81fe82fdf4ee65a48c26531edc04108349 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Oct 2017 16:14:44 -0700 Subject: sh: sh7269: remove nonexistent GPIO_PH[0-7] to fix pinctrl registration Pinmux_pins[] is initialized through PINMUX_GPIO(), using designated array initializers, where the GPIO_* enums serve as indices. If enum values are defined, but never used, pinmux_pins[] contains (zero-filled) holes. Such entries are treated as pin zero, which was registered before, thus leading to pinctrl registration failures, as seen on sh7722: sh-pfc pfc-sh7722: pin 0 already registered sh-pfc pfc-sh7722: error during pin registration sh-pfc pfc-sh7722: could not register: -22 sh-pfc: probe of pfc-sh7722 failed with error -22 Remove GPIO_PH[0-7] from the enum to fix this. Link: http://lkml.kernel.org/r/1505205657-18012-5-git-send-email-geert+renesas@glider.be Fixes: ef0fa5331a73e479 ("sh: Add pinmux for sh7269") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Cc: Yoshinori Sato Cc: Rich Felker Cc: Magnus Damm Cc: Yoshihiro Shimoda Cc: Jacopo Mondi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/cpu-sh2a/cpu/sh7269.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index 2a0ca8780f0d..13c495a9fc00 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -45,9 +45,7 @@ enum { GPIO_PG7, GPIO_PG6, GPIO_PG5, GPIO_PG4, GPIO_PG3, GPIO_PG2, GPIO_PG1, GPIO_PG0, - /* Port H */ - GPIO_PH7, GPIO_PH6, GPIO_PH5, GPIO_PH4, - GPIO_PH3, GPIO_PH2, GPIO_PH1, GPIO_PH0, + /* Port H - Port H does not have a Data Register */ /* Port I - not on device */ -- cgit v1.2.3-70-g09d2 From d5567c9df1ef001b2a7e6684b3b3498371ee4cae Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 3 Oct 2017 16:14:47 -0700 Subject: z3fold: fix potential race in z3fold_reclaim_page It is possible that on a (partially) unsuccessful page reclaim, kref_put() called in z3fold_reclaim_page() does not yield page release, but the page is released shortly afterwards by another thread. Then z3fold_reclaim_page() would try to list_add() that (released) page again which is obviously a bug. To avoid that, spin_lock() has to be taken earlier, before the kref_put() call mentioned earlier. Link: http://lkml.kernel.org/r/20170913162937.bfff21c7d12b12a5f47639fd@gmail.com Signed-off-by: Vitaly Wool Cc: Dan Streetman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 486550df32be..b04fa3ba1bf2 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -875,16 +875,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: + spin_lock(&pool->lock); if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { + spin_unlock(&pool->lock); free_z3fold_page(page); return 0; } } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { atomic64_dec(&pool->pages_nr); + spin_unlock(&pool->lock); return 0; } - spin_lock(&pool->lock); /* * Add to the beginning of LRU. -- cgit v1.2.3-70-g09d2 From 4d4bbd8526a8fbeb2c090ea360211fceff952383 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:14:50 -0700 Subject: mm, oom_reaper: skip mm structs with mmu notifiers Andrea has noticed that the oom_reaper doesn't invalidate the range via mmu notifiers (mmu_notifier_invalidate_range_start/end) and that can corrupt the memory of the kvm guest for example. tlb_flush_mmu_tlbonly already invokes mmu notifiers but that is not sufficient as per Andrea: "mmu_notifier_invalidate_range cannot be used in replacement of mmu_notifier_invalidate_range_start/end. For KVM mmu_notifier_invalidate_range is a noop and rightfully so. A MMU notifier implementation has to implement either ->invalidate_range method or the invalidate_range_start/end methods, not both. And if you implement invalidate_range_start/end like KVM is forced to do, calling mmu_notifier_invalidate_range in common code is a noop for KVM. For those MMU notifiers that can get away only implementing ->invalidate_range, the ->invalidate_range is implicitly called by mmu_notifier_invalidate_range_end(). And only those secondary MMUs that share the same pagetable with the primary MMU (like AMD iommuv2) can get away only implementing ->invalidate_range" As the callback is allowed to sleep and the implementation is out of hand of the MM it is safer to simply bail out if there is an mmu notifier registered. In order to not fail too early make the mm_has_notifiers check under the oom_lock and have a little nap before failing to give the current oom victim some more time to exit. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170913113427.2291-1-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 5 +++++ mm/oom_kill.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 7b2e31b1745a..6866e8126982 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -400,6 +400,11 @@ extern void mmu_notifier_synchronize(void); #else /* CONFIG_MMU_NOTIFIER */ +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return 0; +} + static inline void mmu_notifier_release(struct mm_struct *mm) { } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 99736e026712..dee0f75c3013 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "internal.h" @@ -494,6 +495,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) goto unlock_oom; } + /* + * If the mm has notifiers then we would need to invalidate them around + * unmap_page_range and that is risky because notifiers can sleep and + * what they do is basically undeterministic. So let's have a short + * sleep to give the oom victim some more time. + * TODO: we really want to get rid of this ugly hack and make sure that + * notifiers cannot block for unbounded amount of time and add + * mmu_notifier_invalidate_range_{start,end} around unmap_page_range + */ + if (mm_has_notifiers(mm)) { + up_read(&mm->mmap_sem); + schedule_timeout_idle(HZ); + goto unlock_oom; + } + /* * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't * work on the mm anymore. The check for MMF_OOM_SKIP must run -- cgit v1.2.3-70-g09d2 From 72f0184c8a00c70179cfed6266e2e06b4d400065 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:14:53 -0700 Subject: mm, memcg: remove hotplug locking from try_charge The following lockdep splat has been noticed during LTP testing ====================================================== WARNING: possible circular locking dependency detected 4.13.0-rc3-next-20170807 #12 Not tainted ------------------------------------------------------ a.out/4771 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){++++++}, at: [] drain_all_stock.part.35+0x18/0x140 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x175/0x530 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&mm->mmap_sem){++++++}: lock_acquire+0xc9/0x230 __might_fault+0x70/0xa0 _copy_to_user+0x23/0x70 filldir+0xa7/0x110 xfs_dir2_sf_getdents.isra.10+0x20c/0x2c0 [xfs] xfs_readdir+0x1fa/0x2c0 [xfs] xfs_file_readdir+0x30/0x40 [xfs] iterate_dir+0x17a/0x1a0 SyS_getdents+0xb0/0x160 entry_SYSCALL_64_fastpath+0x1f/0xbe -> #2 (&type->i_mutex_dir_key#3){++++++}: lock_acquire+0xc9/0x230 down_read+0x51/0xb0 lookup_slow+0xde/0x210 walk_component+0x160/0x250 link_path_walk+0x1a6/0x610 path_openat+0xe4/0xd50 do_filp_open+0x91/0x100 file_open_name+0xf5/0x130 filp_open+0x33/0x50 kernel_read_file_from_path+0x39/0x80 _request_firmware+0x39f/0x880 request_firmware_direct+0x37/0x50 request_microcode_fw+0x64/0xe0 reload_store+0xf7/0x180 dev_attr_store+0x18/0x30 sysfs_kf_write+0x44/0x60 kernfs_fop_write+0x113/0x1a0 __vfs_write+0x37/0x170 vfs_write+0xc7/0x1c0 SyS_write+0x58/0xc0 do_syscall_64+0x6c/0x1f0 return_from_SYSCALL_64+0x0/0x7a -> #1 (microcode_mutex){+.+.+.}: lock_acquire+0xc9/0x230 __mutex_lock+0x88/0x960 mutex_lock_nested+0x1b/0x20 microcode_init+0xbb/0x208 do_one_initcall+0x51/0x1a9 kernel_init_freeable+0x208/0x2a7 kernel_init+0xe/0x104 ret_from_fork+0x2a/0x40 -> #0 (cpu_hotplug_lock.rw_sem){++++++}: __lock_acquire+0x153c/0x1550 lock_acquire+0xc9/0x230 cpus_read_lock+0x4b/0x90 drain_all_stock.part.35+0x18/0x140 try_charge+0x3ab/0x6e0 mem_cgroup_try_charge+0x7f/0x2c0 shmem_getpage_gfp+0x25f/0x1050 shmem_fault+0x96/0x200 __do_fault+0x1e/0xa0 __handle_mm_fault+0x9c3/0xe00 handle_mm_fault+0x16e/0x380 __do_page_fault+0x24a/0x530 do_page_fault+0x30/0x80 page_fault+0x28/0x30 other info that might help us debug this: Chain exists of: cpu_hotplug_lock.rw_sem --> &type->i_mutex_dir_key#3 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&type->i_mutex_dir_key#3); lock(&mm->mmap_sem); lock(cpu_hotplug_lock.rw_sem); *** DEADLOCK *** 2 locks held by a.out/4771: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x175/0x530 #1: (percpu_charge_mutex){+.+...}, at: [] try_charge+0x397/0x6e0 The problem is very similar to the one fixed by commit a459eeb7b852 ("mm, page_alloc: do not depend on cpu hotplug locks inside the allocator"). We are taking hotplug locks while we can be sitting on top of basically arbitrary locks. This just calls for problems. We can get rid of {get,put}_online_cpus, fortunately. We do not have to be worried about races with memory hotplug because drain_local_stock, which is called from both the WQ draining and the memory hotplug contexts, is always operating on the local cpu stock with IRQs disabled. The only thing to be careful about is that the target memcg doesn't vanish while we are still in drain_all_stock so take a reference on it. Link: http://lkml.kernel.org/r/20170913090023.28322-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Artem Savkov Tested-by: Artem Savkov Cc: Johannes Weiner Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15af3da5af02..696c6529e900 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy) struct memcg_stock_pcp *stock; unsigned long flags; + /* + * The only protection from memory hotplug vs. drain_stock races is + * that we always operate on local CPU stock here with IRQ disabled + */ local_irq_save(flags); stock = this_cpu_ptr(&memcg_stock); @@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) /* If someone's already draining, avoid adding running more workers. */ if (!mutex_trylock(&percpu_charge_mutex)) return; - /* Notify other cpus that system-wide "drain" is running */ - get_online_cpus(); + /* + * Notify other cpus that system-wide "drain" is running + * We do not care about races with the cpu hotplug because cpu down + * as well as workers from this path always operate on the local + * per-cpu data. CPU up doesn't touch memcg_stock at all. + */ curcpu = get_cpu(); for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; memcg = stock->cached; - if (!memcg || !stock->nr_pages) + if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css)) continue; - if (!mem_cgroup_is_descendant(memcg, root_memcg)) + if (!mem_cgroup_is_descendant(memcg, root_memcg)) { + css_put(&memcg->css); continue; + } if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { if (cpu == curcpu) drain_local_stock(&stock->work); else schedule_work_on(cpu, &stock->work); } + css_put(&memcg->css); } put_cpu(); - put_online_cpus(); mutex_unlock(&percpu_charge_mutex); } -- cgit v1.2.3-70-g09d2 From 3f2eb0287ebd62ec8d6d544f830285302279e6bf Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Tue, 3 Oct 2017 16:14:57 -0700 Subject: mm/memcg: avoid page count check for zone device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix for 4.14, zone device page always have an elevated refcount of one and thus page count sanity check in uncharge_page() is inappropriate for them. [mhocko@suse.com: nano-optimize VM_BUG_ON in uncharge_page] Link: http://lkml.kernel.org/r/20170914190011.5217-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Signed-off-by: Michal Hocko Reported-by: Evgeny Baskakov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 696c6529e900..d5f3a62887cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5658,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug) static void uncharge_page(struct page *page, struct uncharge_gather *ug) { VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); + VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) && + !PageHWPoison(page) , page); if (!page->mem_cgroup) return; -- cgit v1.2.3-70-g09d2 From a1b2289cef92ef0e9a92afcd2e1ea71d5bcaaf64 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Tue, 3 Oct 2017 16:15:00 -0700 Subject: android: binder: drop lru lock in isolate callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the global lru lock in isolate callback before calling zap_page_range which calls cond_resched, and re-acquire the global lru lock before returning. Also change return code to LRU_REMOVED_RETRY. Use mmput_async when fail to acquire mmap sem in an atomic context. Fix "BUG: sleeping function called from invalid context" errors when CONFIG_DEBUG_ATOMIC_SLEEP is enabled. Also restore mmput_async, which was initially introduced in commit ec8d7c14ea14 ("mm, oom_reaper: do not mmput synchronously from the oom reaper context"), and was removed in commit 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently"). Link: http://lkml.kernel.org/r/20170914182231.90908-1-sherryy@android.com Fixes: f2517eb76f1f2 ("android: binder: Add global lru shrinker to binder") Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman Reported-by: Kyle Yan Acked-by: Arve Hjønnevåg Acked-by: Michal Hocko Cc: Martijn Coenen Cc: Todd Kjos Cc: Riley Andrews Cc: Ingo Molnar Cc: Vlastimil Babka Cc: Hillf Danton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Hoeun Ryu Cc: Christopher Lameter Cc: Vegard Nossum Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/android/binder_alloc.c | 18 ++++++++++++------ include/linux/sched/mm.h | 6 ++++++ kernel/fork.c | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 8fe165844e47..064f5e31ec55 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -913,6 +913,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, struct binder_alloc *alloc; uintptr_t page_addr; size_t index; + struct vm_area_struct *vma; alloc = page->alloc; if (!mutex_trylock(&alloc->mutex)) @@ -923,16 +924,22 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; - if (alloc->vma) { + vma = alloc->vma; + if (vma) { mm = get_task_mm(alloc->tsk); if (!mm) goto err_get_task_mm_failed; if (!down_write_trylock(&mm->mmap_sem)) goto err_down_write_mmap_sem_failed; + } + + list_lru_isolate(lru, item); + spin_unlock(lock); + if (vma) { trace_binder_unmap_user_start(alloc, index); - zap_page_range(alloc->vma, + zap_page_range(vma, page_addr + alloc->user_buffer_offset, PAGE_SIZE); @@ -950,13 +957,12 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_kernel_end(alloc, index); - list_lru_isolate(lru, item); - + spin_lock(lock); mutex_unlock(&alloc->mutex); - return LRU_REMOVED; + return LRU_REMOVED_RETRY; err_down_write_mmap_sem_failed: - mmput(mm); + mmput_async(mm); err_get_task_mm_failed: err_page_already_freed: mutex_unlock(&alloc->mutex); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3a19c253bdb1..ae53e413fb13 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -84,6 +84,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +void mmput_async(struct mm_struct *); +#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); diff --git a/kernel/fork.c b/kernel/fork.c index 10646182440f..e702cb9ffbd8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -946,6 +946,24 @@ void mmput(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mmput); +#ifdef CONFIG_MMU +static void mmput_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, + async_put_work); + + __mmput(mm); +} + +void mmput_async(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_users)) { + INIT_WORK(&mm->async_put_work, mmput_async_fn); + schedule_work(&mm->async_put_work); + } +} +#endif + /** * set_mm_exe_file - change a reference to the mm's executable file * -- cgit v1.2.3-70-g09d2 From 6818600ff094ca255a7fe31838ad50c29656c3c5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 3 Oct 2017 16:15:03 -0700 Subject: mm,compaction: serialize waitqueue_active() checks (for real) Andrea brought to my attention that the L->{L,S} guarantees are completely bogus for this case. I was looking at the diagram, from the offending commit, when that _is_ the race, we had the load reordered already. What we need is at least S->L semantics, thus simply use wq_has_sleeper() to serialize the call for good. Link: http://lkml.kernel.org/r/20170914175313.GB811@linux-80c1.suse Fixes: 46acef048a6 (mm,compaction: serialize waitqueue_active() checks) Signed-off-by: Davidlohr Bueso Reported-by: Andrea Parri Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index fb548e4c7bd4..03d31a875341 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1999,17 +1999,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) if (pgdat->kcompactd_max_order < order) pgdat->kcompactd_max_order = order; - /* - * Pairs with implicit barrier in wait_event_freezable() - * such that wakeups are not missed in the lockless - * waitqueue_active() call. - */ - smp_acquire__after_ctrl_dep(); - if (pgdat->kcompactd_classzone_idx > classzone_idx) pgdat->kcompactd_classzone_idx = classzone_idx; - if (!waitqueue_active(&pgdat->kcompactd_wait)) + /* + * Pairs with implicit barrier in wait_event_freezable() + * such that wakeups are not missed. + */ + if (!wq_has_sleeper(&pgdat->kcompactd_wait)) return; if (!kcompactd_node_suitable(pgdat)) -- cgit v1.2.3-70-g09d2 From 3552935742e0d5f0dafd823736f45bdaa7ba672c Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 3 Oct 2017 16:15:06 -0700 Subject: z3fold: fix stale list handling Fix the situation when clear_bit() is called for page->private before the page pointer is actually assigned. While at it, remove work_busy() check because it is costly and does not give 100% guarantee anyway. Signed-off-by: Vitaly Wool Cc: Dan Streetman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index b04fa3ba1bf2..b2ba2ba585f3 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) WARN_ON(!list_empty(&zhdr->buddy)); set_bit(PAGE_STALE, &page->private); + clear_bit(NEEDS_COMPACTING, &page->private); spin_lock(&pool->lock); if (!list_empty(&page->lru)) list_del(&page->lru); @@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w) list_del(&zhdr->buddy); if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) continue; - clear_bit(NEEDS_COMPACTING, &page->private); spin_unlock(&pool->stale_lock); cancel_work_sync(&zhdr->work); free_z3fold_page(page); @@ -624,10 +624,8 @@ lookup: * stale pages list. cancel_work_sync() can sleep so we must make * sure it won't be called in case we're in atomic context. */ - if (zhdr && (can_sleep || !work_pending(&zhdr->work) || - !unlikely(work_busy(&zhdr->work)))) { + if (zhdr && (can_sleep || !work_pending(&zhdr->work))) { list_del(&zhdr->buddy); - clear_bit(NEEDS_COMPACTING, &page->private); spin_unlock(&pool->stale_lock); if (can_sleep) cancel_work_sync(&zhdr->work); -- cgit v1.2.3-70-g09d2 From 57148a64e823bb1f49112fa52a92a7f372cda892 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Oct 2017 16:15:10 -0700 Subject: mm: meminit: mark init_reserved_page as __meminit The function is called from __meminit context and calls other __meminit functions but isn't it self mark as such today: WARNING: vmlinux.o(.text.unlikely+0x4516): Section mismatch in reference from the function init_reserved_page() to the function .meminit.text:early_pfn_to_nid() The function init_reserved_page() references the function __meminit early_pfn_to_nid(). This is often because init_reserved_page lacks a __meminit annotation or the annotation of early_pfn_to_nid is wrong. On most compilers, we don't notice this because the function gets inlined all the time. Adding __meminit here fixes the harmless warning for the old versions and is generally the correct annotation. Link: http://lkml.kernel.org/r/20170915193149.901180-1-arnd@arndb.de Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Arnd Bergmann Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c841af88836a..38d165a87860 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, } #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -static void init_reserved_page(unsigned long pfn) +static void __meminit init_reserved_page(unsigned long pfn) { pg_data_t *pgdat; int nid, zid; -- cgit v1.2.3-70-g09d2 From 31d1e130f4a0f8f629a460167569577cac9b17c1 Mon Sep 17 00:00:00 2001 From: Ioan Nicu Date: Tue, 3 Oct 2017 16:15:13 -0700 Subject: rapidio: remove global irq spinlocks from the subsystem Locking of config and doorbell operations should be done only if the underlying hardware requires it. This patch removes the global spinlocks from the rapidio subsystem and moves them to the mport drivers (fsl_rio and tsi721), only to the necessary places. For example, local config space read and write operations (lcread/lcwrite) are atomic in all existing drivers, so there should be no need for locking, while the cread/cwrite operations which generate maintenance transactions need to be synchronized with a lock. Later, each driver could chose to use a per-port lock instead of a global one, or even more granular locking. Link: http://lkml.kernel.org/r/20170824113023.GD50104@nokia.com Signed-off-by: Ioan Nicu Signed-off-by: Frank Kunz Acked-by: Alexandre Bounine Cc: Matt Porter Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/sysdev/fsl_rio.c | 17 +++++++++++++++-- arch/powerpc/sysdev/fsl_rmu.c | 8 ++++++++ drivers/rapidio/devices/tsi721.c | 7 +++++++ drivers/rapidio/rio-access.c | 40 +++++----------------------------------- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 9234be1e66f5..5011ffea4e4b 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -71,6 +71,8 @@ #define RIWAR_WRTYP_ALLOC 0x00006000 #define RIWAR_SIZE_MASK 0x0000003F +static DEFINE_SPINLOCK(fsl_rio_config_lock); + #define __fsl_read_rio_config(x, addr, err, op) \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ @@ -184,6 +186,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, u8 hopcount, u32 offset, int len, u32 *val) { struct rio_priv *priv = mport->priv; + unsigned long flags; u8 *data; u32 rval, err = 0; @@ -197,6 +200,8 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len)) return -EINVAL; + spin_lock_irqsave(&fsl_rio_config_lock, flags); + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | (offset >> 12)); out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10)); @@ -213,6 +218,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, __fsl_read_rio_config(rval, data, err, "lwz"); break; default: + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); return -EINVAL; } @@ -221,6 +227,7 @@ fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, err, destid, hopcount, offset); } + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); *val = rval; return err; @@ -244,7 +251,10 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, u8 hopcount, u32 offset, int len, u32 val) { struct rio_priv *priv = mport->priv; + unsigned long flags; u8 *data; + int ret = 0; + pr_debug ("fsl_rio_config_write:" " index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n", @@ -255,6 +265,8 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, if (offset > (0x1000000 - len) || !IS_ALIGNED(offset, len)) return -EINVAL; + spin_lock_irqsave(&fsl_rio_config_lock, flags); + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | (offset >> 12)); out_be32(&priv->maint_atmu_regs->rowtear, (destid >> 10)); @@ -271,10 +283,11 @@ fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, out_be32((u32 *) data, val); break; default: - return -EINVAL; + ret = -EINVAL; } + spin_unlock_irqrestore(&fsl_rio_config_lock, flags); - return 0; + return ret; } static void fsl_rio_inbound_mem_init(struct rio_priv *priv) diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index ab7a74c75be8..88b35a3dcdc5 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -104,6 +104,8 @@ #define DOORBELL_MESSAGE_SIZE 0x08 +static DEFINE_SPINLOCK(fsl_rio_doorbell_lock); + struct rio_msg_regs { u32 omr; u32 osr; @@ -626,9 +628,13 @@ err_out: int fsl_rio_doorbell_send(struct rio_mport *mport, int index, u16 destid, u16 data) { + unsigned long flags; + pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n", index, destid, data); + spin_lock_irqsave(&fsl_rio_doorbell_lock, flags); + /* In the serial version silicons, such as MPC8548, MPC8641, * below operations is must be. */ @@ -638,6 +644,8 @@ int fsl_rio_doorbell_send(struct rio_mport *mport, out_be32(&dbell->dbell_regs->oddatr, (index << 20) | data); out_be32(&dbell->dbell_regs->odmr, 0x00000001); + spin_unlock_irqrestore(&fsl_rio_doorbell_lock, flags); + return 0; } diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 315a4be8dc1e..9a68914100ad 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -51,6 +51,8 @@ module_param(mbox_sel, byte, S_IRUGO); MODULE_PARM_DESC(mbox_sel, "RIO Messaging MBOX Selection Mask (default: 0x0f = all)"); +static DEFINE_SPINLOCK(tsi721_maint_lock); + static void tsi721_omsg_handler(struct tsi721_device *priv, int ch); static void tsi721_imsg_handler(struct tsi721_device *priv, int ch); @@ -124,12 +126,15 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size, void __iomem *regs = priv->regs + TSI721_DMAC_BASE(priv->mdma.ch_id); struct tsi721_dma_desc *bd_ptr; u32 rd_count, swr_ptr, ch_stat; + unsigned long flags; int i, err = 0; u32 op = do_wr ? MAINT_WR : MAINT_RD; if (offset > (RIO_MAINT_SPACE_SZ - len) || (len != sizeof(u32))) return -EINVAL; + spin_lock_irqsave(&tsi721_maint_lock, flags); + bd_ptr = priv->mdma.bd_base; rd_count = ioread32(regs + TSI721_DMAC_DRDCNT); @@ -197,7 +202,9 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size, */ swr_ptr = ioread32(regs + TSI721_DMAC_DSWP); iowrite32(swr_ptr, regs + TSI721_DMAC_DSRP); + err_out: + spin_unlock_irqrestore(&tsi721_maint_lock, flags); return err; } diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c index a3824baca2e5..3ee9af83b638 100644 --- a/drivers/rapidio/rio-access.c +++ b/drivers/rapidio/rio-access.c @@ -13,17 +13,9 @@ #include #include -/* - * These interrupt-safe spinlocks protect all accesses to RIO - * configuration space and doorbell access. - */ -static DEFINE_SPINLOCK(rio_config_lock); -static DEFINE_SPINLOCK(rio_doorbell_lock); - /* * Wrappers for all RIO configuration access functions. They just check - * alignment, do locking and call the low-level functions pointed to - * by rio_mport->ops. + * alignment and call the low-level functions pointed to by rio_mport->ops. */ #define RIO_8_BAD 0 @@ -44,13 +36,10 @@ int __rio_local_read_config_##size \ (struct rio_mport *mport, u32 offset, type *value) \ { \ int res; \ - unsigned long flags; \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ res = mport->ops->lcread(mport, mport->id, offset, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -67,13 +56,8 @@ int __rio_local_read_config_##size \ int __rio_local_write_config_##size \ (struct rio_mport *mport, u32 offset, type value) \ { \ - int res; \ - unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcwrite(mport, mport->id, offset, len, value);\ - spin_unlock_irqrestore(&rio_config_lock, flags); \ - return res; \ + return mport->ops->lcwrite(mport, mport->id, offset, len, value);\ } RIO_LOP_READ(8, u8, 1) @@ -104,13 +88,10 @@ int rio_mport_read_config_##size \ (struct rio_mport *mport, u16 destid, u8 hopcount, u32 offset, type *value) \ { \ int res; \ - unsigned long flags; \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ res = mport->ops->cread(mport, mport->id, destid, hopcount, offset, len, &data); \ *value = (type)data; \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -127,13 +108,9 @@ int rio_mport_read_config_##size \ int rio_mport_write_config_##size \ (struct rio_mport *mport, u16 destid, u8 hopcount, u32 offset, type value) \ { \ - int res; \ - unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ - spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cwrite(mport, mport->id, destid, hopcount, offset, len, value); \ - spin_unlock_irqrestore(&rio_config_lock, flags); \ - return res; \ + return mport->ops->cwrite(mport, mport->id, destid, hopcount, \ + offset, len, value); \ } RIO_OP_READ(8, u8, 1) @@ -162,14 +139,7 @@ EXPORT_SYMBOL_GPL(rio_mport_write_config_32); */ int rio_mport_send_doorbell(struct rio_mport *mport, u16 destid, u16 data) { - int res; - unsigned long flags; - - spin_lock_irqsave(&rio_doorbell_lock, flags); - res = mport->ops->dsend(mport, mport->id, destid, data); - spin_unlock_irqrestore(&rio_doorbell_lock, flags); - - return res; + return mport->ops->dsend(mport, mport->id, destid, data); } EXPORT_SYMBOL_GPL(rio_mport_send_doorbell); -- cgit v1.2.3-70-g09d2 From a872eb2131e91ce7c89a8888974a5e22a272b12f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 3 Oct 2017 16:15:16 -0700 Subject: mm: fix RODATA_TEST failure "rodata_test: test data was not read only" On powerpc, RODATA_TEST fails with message the following messages: Freeing unused kernel memory: 528K rodata_test: test data was not read only This is because GCC allocates it to .data section: c0695034 g O .data 00000004 rodata_test_data Since commit 056b9d8a7692 ("mm: remove rodata_test_data export, add pr_fmt"), rodata_test_data is used only inside rodata_test.c By declaring it static, it gets properly allocated into .rodata section instead of .data: c04df710 l O .rodata 00000004 rodata_test_data Fixes: 056b9d8a7692 ("mm: remove rodata_test_data export, add pr_fmt") Link: http://lkml.kernel.org/r/20170921093729.1080368AC1@po15668-vm-win7.idsi0.si.c-s.fr Signed-off-by: Christophe Leroy Cc: Kees Cook Cc: Jinbum Park Cc: Segher Boessenkool Cc: David Laight Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rodata_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rodata_test.c b/mm/rodata_test.c index 6bb4deb12e78..d908c8769b48 100644 --- a/mm/rodata_test.c +++ b/mm/rodata_test.c @@ -14,7 +14,7 @@ #include #include -const int rodata_test_data = 0xC3; +static const int rodata_test_data = 0xC3; void rodata_test(void) { -- cgit v1.2.3-70-g09d2 From ae94264ed4b0cf7cd887947650db4c69acb62072 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 3 Oct 2017 16:15:19 -0700 Subject: zram: fix null dereference of handle In testing I found handle passed to zs_map_object in __zram_bvec_read is NULL so eh kernel goes oops in pin_object(). The reason is there is no routine to check the slot's freeing after getting the slot's lock. This patch fixes it. [minchan@kernel.org: v2] Link: http://lkml.kernel.org/r/1505887347-10881-1-git-send-email-minchan@kernel.org Link: http://lkml.kernel.org/r/1505788488-26723-1-git-send-email-minchan@kernel.org Fixes: 1f7319c74275 ("zram: partial IO refactoring") Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 2981c27d3aae..f149d3e61234 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -766,27 +766,6 @@ static void zram_slot_unlock(struct zram *zram, u32 index) bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); } -static bool zram_same_page_read(struct zram *zram, u32 index, - struct page *page, - unsigned int offset, unsigned int len) -{ - zram_slot_lock(zram, index); - if (unlikely(!zram_get_handle(zram, index) || - zram_test_flag(zram, index, ZRAM_SAME))) { - void *mem; - - zram_slot_unlock(zram, index); - mem = kmap_atomic(page); - zram_fill_page(mem + offset, len, - zram_get_element(zram, index)); - kunmap_atomic(mem); - return true; - } - zram_slot_unlock(zram, index); - - return false; -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -884,11 +863,20 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, zram_slot_unlock(zram, index); } - if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) - return 0; - zram_slot_lock(zram, index); handle = zram_get_handle(zram, index); + if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { + unsigned long value; + void *mem; + + value = handle ? zram_get_element(zram, index) : 0; + mem = kmap_atomic(page); + zram_fill_page(mem, PAGE_SIZE, value); + kunmap_atomic(mem); + zram_slot_unlock(zram, index); + return 0; + } + size = zram_get_obj_size(zram, index); src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); -- cgit v1.2.3-70-g09d2 From 5bdfca6435b8294490ffb5b7c8b7d8eac3814b06 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:15:23 -0700 Subject: m32r: define CPU_BIG_ENDIAN The build of m32r allmodconfig is giving lots of build warnings about: include/linux/byteorder/big_endian.h:7:2: warning: #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN [-Wcpp] #warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN Define CPU_BIG_ENDIAN like the way CPU_LITTLE_ENDIAN is defined. Link: http://lkml.kernel.org/r/1505678083-10320-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 87cde1e4b38c..0777f3a8a1f3 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -194,6 +194,10 @@ config TIMER_DIVIDE int "Timer divider (integer)" default "128" +config CPU_BIG_ENDIAN + bool "Generate big endian code" + default n + config CPU_LITTLE_ENDIAN bool "Generate little endian code" default n -- cgit v1.2.3-70-g09d2 From f4e222c56c83b2aed7cc2b329fca7435508eefa1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 3 Oct 2017 16:15:25 -0700 Subject: mm: have filemap_check_and_advance_wb_err clear AS_EIO/AS_ENOSPC Eryu noticed that he could sometimes get a leftover error reported when it shouldn't be on fsync with ext2 and non-journalled ext4. The problem is that writeback_single_inode still uses filemap_fdatawait. That picks up a previously set AS_EIO flag, which would ordinarily have been cleared before. Since we're mostly using this function as a replacement for filemap_check_errors, have filemap_check_and_advance_wb_err clear AS_EIO and AS_ENOSPC when reporting an error. That should allow the new function to better emulate the behavior of the old with respect to these flags. Link: http://lkml.kernel.org/r/20170922133331.28812-1-jlayton@kernel.org Signed-off-by: Jeff Layton Reported-by: Eryu Guan Reviewed-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index db250d0e0565..594d73fef8b4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file) trace_file_check_and_advance_wb_err(file, old); spin_unlock(&file->f_lock); } + + /* + * We're mostly using this function as a drop in replacement for + * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect + * that the legacy code would have had on these flags. + */ + clear_bit(AS_EIO, &mapping->flags); + clear_bit(AS_ENOSPC, &mapping->flags); return err; } EXPORT_SYMBOL(file_check_and_advance_wb_err); -- cgit v1.2.3-70-g09d2 From 24c92eb7dce0a299b8e1a8c5fa585844a53bf7f0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 3 Oct 2017 16:15:29 -0700 Subject: mm: avoid marking swap cached page as lazyfree MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear SwapBacked). There is no lock to prevent the page is added to swap cache between these two steps by page reclaim. Page reclaim could add the page to swap cache and unmap the page. After page reclaim, the page is added back to lru. At that time, we probably start draining per-cpu pagevec and mark the page lazyfree. So the page could be in a state with SwapBacked cleared and PG_swapcache set. Next time there is a refault in the virtual address, do_swap_page can find the page from swap cache but the page has PageSwapCache false because SwapBacked isn't set, so do_swap_page will bail out and do nothing. The task will keep running into fault handler. Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Link: http://lkml.kernel.org/r/6537ef3814398c0073630b03f176263bc81f0902.1506446061.git.shli@fb.com Signed-off-by: Shaohua Li Reported-by: Artem Savkov Tested-by: Artem Savkov Reviewed-by: Rik van Riel Acked-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Minchan Kim Cc: Hillf Danton Cc: Hugh Dickins Cc: Mel Gorman Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index 9295ae960d66..a77d68f2c1b6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && - !PageUnevictable(page)) { + !PageSwapCache(page) && !PageUnevictable(page)) { bool active = PageActive(page); del_page_from_lru_list(page, lruvec, @@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page) void mark_page_lazyfree(struct page *page) { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && - !PageUnevictable(page)) { + !PageSwapCache(page) && !PageUnevictable(page)) { struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); get_page(page); -- cgit v1.2.3-70-g09d2 From 9625456cc76391b7f3f2809579126542a8ed4d39 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 3 Oct 2017 16:15:32 -0700 Subject: mm: fix data corruption caused by lazyfree page MADV_FREE clears pte dirty bit and then marks the page lazyfree (clear SwapBacked). There is no lock to prevent the page is added to swap cache between these two steps by page reclaim. If page reclaim finds such page, it will simply add the page to swap cache without pageout the page to swap because the page is marked as clean. Next time, page fault will read data from the swap slot which doesn't have the original data, so we have a data corruption. To fix issue, we mark the page dirty and pageout the page. However, we shouldn't dirty all pages which is clean and in swap cache. swapin page is swap cache and clean too. So we only dirty page which is added into swap cache in page reclaim, which shouldn't be swapin page. As Minchan suggested, simply dirty the page in add_to_swap can do the job. Fixes: 802a3a92ad7a ("mm: reclaim MADV_FREE pages") Link: http://lkml.kernel.org/r/08c84256b007bf3f63c91d94383bd9eb6fee2daa.1506446061.git.shli@fb.com Signed-off-by: Shaohua Li Reported-by: Artem Savkov Acked-by: Michal Hocko Acked-by: Minchan Kim Cc: Johannes Weiner Cc: Hillf Danton Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: [4.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/swap_state.c b/mm/swap_state.c index 71ce2d1ccbf7..ed91091d1e68 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -242,6 +242,17 @@ int add_to_swap(struct page *page) * clear SWAP_HAS_CACHE flag. */ goto fail; + /* + * Normally the page will be dirtied in unmap because its pte should be + * dirty. A special case is MADV_FREE page. The page'e pte could have + * dirty bit cleared but the page's SwapBacked bit is still set because + * clearing the dirty bit and SwapBacked bit has no lock protected. For + * such page, unmap will not set dirty bit for it, so page reclaim will + * not write the page out. This can cause data corruption when the page + * is swap in later. Always setting the dirty bit for the page solves + * the problem. + */ + set_page_dirty(page); return 1; -- cgit v1.2.3-70-g09d2 From 7d790d2da386a52cfebcf0c898ba927bece9d4ab Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Tue, 3 Oct 2017 16:15:35 -0700 Subject: mm/device-public-memory: fix edge case in _vm_normal_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With device public pages at the end of my memory space, I'm getting output from _vm_normal_page(): BUG: Bad page map in process migrate_pages pte:c0800001ffff0d06 pmd:f95d3000 addr:00007fff89330000 vm_flags:00100073 anon_vma:c0000000fa899320 mapping: (null) index:7fff8933 file: (null) fault: (null) mmap: (null) readpage: (null) CPU: 0 PID: 13963 Comm: migrate_pages Tainted: P B OE 4.14.0-rc1-wip #155 Call Trace: dump_stack+0xb0/0xf4 (unreliable) print_bad_pte+0x28c/0x340 _vm_normal_page+0xc0/0x140 zap_pte_range+0x664/0xc10 unmap_page_range+0x318/0x670 unmap_vmas+0x74/0xe0 exit_mmap+0xe8/0x1f0 mmput+0xac/0x1f0 do_exit+0x348/0xcd0 do_group_exit+0x5c/0xf0 SyS_exit_group+0x1c/0x20 system_call+0x58/0x6c The pfn causing this is the very last one. Correct the bounds check accordingly. Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent with CPU") Link: http://lkml.kernel.org/r/1506092178-20351-1-git-send-email-arbab@linux.vnet.ibm.com Signed-off-by: Reza Arbab Reviewed-by: Jérôme Glisse Reviewed-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index ec4e15494901..a728bed16c20 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, * vm_normal_page() so that we do not have to special case all * call site of vm_normal_page(). */ - if (likely(pfn < highest_memmap_pfn)) { + if (likely(pfn <= highest_memmap_pfn)) { struct page *page = pfn_to_page(pfn); if (is_device_public_page(page)) { -- cgit v1.2.3-70-g09d2 From 384632e67e0829deb8015ee6ad916b180049d252 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Tue, 3 Oct 2017 16:15:38 -0700 Subject: userfaultfd: non-cooperative: fix fork use after free When reading the event from the uffd, we put it on a temporary fork_event list to detect if we can still access it after releasing and retaking the event_wqh.lock. If fork aborts and removes the event from the fork_event all is fine as long as we're still in the userfault read context and fork_event head is still alive. We've to put the event allocated in the fork kernel stack, back from fork_event list-head to the event_wqh head, before returning from userfaultfd_ctx_read, because the fork_event head lifetime is limited to the userfaultfd_ctx_read stack lifetime. Forgetting to move the event back to its event_wqh place then results in __remove_wait_queue(&ctx->event_wqh, &ewq->wq); in userfaultfd_event_wait_completion to remove it from a head that has been already freed from the reader stack. This could only happen if resolve_userfault_fork failed (for example if there are no file descriptors available to allocate the fork uffd). If it succeeded it was put back correctly. Furthermore, after find_userfault_evt receives a fork event, the forked userfault context in fork_nctx and uwq->msg.arg.reserved.reserved1 can be released by the fork thread as soon as the event_wqh.lock is released. Taking a reference on the fork_nctx before dropping the lock prevents an use after free in resolve_userfault_fork(). If the fork side aborted and it already released everything, we still try to succeed resolve_userfault_fork(), if possible. Fixes: 893e26e61d04eac9 ("userfaultfd: non-cooperative: Add fork() event") Link: http://lkml.kernel.org/r/20170920180413.26713-1-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Mark Rutland Tested-by: Mark Rutland Cc: Pavel Emelyanov Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ef4b48d1ea42..1c713fd5b3e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { + /* + * &ewq->wq may be queued in fork_event, but + * __remove_wait_queue ignores the head + * parameter. It would be a problem if it + * didn't. + */ __remove_wait_queue(&ctx->event_wqh, &ewq->wq); if (ewq->msg.event == UFFD_EVENT_FORK) { struct userfaultfd_ctx *new; @@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, (unsigned long) uwq->msg.arg.reserved.reserved1; list_move(&uwq->wq.entry, &fork_event); + /* + * fork_nctx can be freed as soon as + * we drop the lock, unless we take a + * reference on it. + */ + userfaultfd_ctx_get(fork_nctx); spin_unlock(&ctx->event_wqh.lock); ret = 0; break; @@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); + spin_lock(&ctx->event_wqh.lock); + if (!list_empty(&fork_event)) { + /* + * The fork thread didn't abort, so we can + * drop the temporary refcount. + */ + userfaultfd_ctx_put(fork_nctx); + + uwq = list_first_entry(&fork_event, + typeof(*uwq), + wq.entry); + /* + * If fork_event list wasn't empty and in turn + * the event wasn't already released by fork + * (the event is allocated on fork kernel + * stack), put the event back to its place in + * the event_wq. fork_event head will be freed + * as soon as we return so the event cannot + * stay queued there no matter the current + * "ret" value. + */ + list_del(&uwq->wq.entry); + __add_wait_queue(&ctx->event_wqh, &uwq->wq); - if (!ret) { - spin_lock(&ctx->event_wqh.lock); - if (!list_empty(&fork_event)) { - uwq = list_first_entry(&fork_event, - typeof(*uwq), - wq.entry); - list_del(&uwq->wq.entry); - __add_wait_queue(&ctx->event_wqh, &uwq->wq); + /* + * Leave the event in the waitqueue and report + * error to userland if we failed to resolve + * the userfault fork. + */ + if (likely(!ret)) userfaultfd_event_complete(ctx, uwq); - } - spin_unlock(&ctx->event_wqh.lock); + } else { + /* + * Here the fork thread aborted and the + * refcount from the fork thread on fork_nctx + * has already been released. We still hold + * the reference we took before releasing the + * lock above. If resolve_userfault_fork + * failed we've to drop it because the + * fork_nctx has to be freed in such case. If + * it succeeded we'll hold it because the new + * uffd references it. + */ + if (ret) + userfaultfd_ctx_put(fork_nctx); } + spin_unlock(&ctx->event_wqh.lock); } return ret; -- cgit v1.2.3-70-g09d2 From c2315c187fa0d3ab363fdebe22718170b40473e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:42 -0700 Subject: exec: load_script: kill the onstack interp[BINPRM_BUF_SIZE] array Patch series "exec: binfmt_misc: fix use-after-free, kill iname[BINPRM_BUF_SIZE]". It looks like this code was always wrong, then commit 948b701a607f ("binfmt_misc: add persistent opened binary handler for containers") added more problems. This patch (of 6): load_script() can simply use i_name instead, it points into bprm->buf[] and nobody can change this memory until we call prepare_binprm(). The only complication is that we need to also change the signature of bprm_change_interp() but this change looks good too. While at it, do whitespace/style cleanups. NOTE: the real motivation for this change is that people want to increase BINPRM_BUF_SIZE, we need to change load_misc_binary() too but this looks more complicated because afaics it is very buggy. Link: http://lkml.kernel.org/r/20170918163446.GA26793@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Travis Gummels Cc: Ben Woodard Cc: Jim Foraker Cc: Cc: Al Viro Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_script.c | 17 +++++++++-------- fs/exec.c | 2 +- include/linux/binfmts.h | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index afdf4e3cafc2..7cde3f46ad26 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm) const char *i_arg, *i_name; char *cp; struct file *file; - char interp[BINPRM_BUF_SIZE]; int retval; if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) @@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm) break; } for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++); - if (*cp == '\0') + if (*cp == '\0') return -ENOEXEC; /* No interpreter name found */ i_name = cp; i_arg = NULL; @@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm) *cp++ = '\0'; if (*cp) i_arg = cp; - strcpy (interp, i_name); /* * OK, we've parsed out the interpreter name and * (optional) argument. @@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm) if (retval) return retval; retval = copy_strings_kernel(1, &bprm->interp, bprm); - if (retval < 0) return retval; + if (retval < 0) + return retval; bprm->argc++; if (i_arg) { retval = copy_strings_kernel(1, &i_arg, bprm); - if (retval < 0) return retval; + if (retval < 0) + return retval; bprm->argc++; } retval = copy_strings_kernel(1, &i_name, bprm); - if (retval) return retval; + if (retval) + return retval; bprm->argc++; - retval = bprm_change_interp(interp, bprm); + retval = bprm_change_interp(i_name, bprm); if (retval < 0) return retval; /* * OK, now restart the process with the interpreter's dentry. */ - file = open_exec(interp); + file = open_exec(i_name); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/fs/exec.c b/fs/exec.c index ac34d9724684..5470d3c1892a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm) kfree(bprm); } -int bprm_change_interp(char *interp, struct linux_binprm *bprm) +int bprm_change_interp(const char *interp, struct linux_binprm *bprm) { /* If a binfmt changed the interp, free it first. */ if (bprm->interp != bprm->filename) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fb44d6180ca0..18d05b5491f3 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -131,7 +131,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *sp_location); -extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); +extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); -- cgit v1.2.3-70-g09d2 From baba1b29731c79d605100087b8f02f9e1cf5a344 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:45 -0700 Subject: exec: binfmt_misc: don't nullify Node->dentry in kill_node() kill_node() nullifies/checks Node->dentry to avoid double free. This complicates the next changes and this is very confusing: - we do not need to check dentry != NULL under entries_lock, kill_node() is always called under inode_lock(d_inode(root)) and we rely on this inode_lock() anyway, without this lock the MISC_FMT_OPEN_FILE cleanup could race with itself. - if kill_inode() was already called and ->dentry == NULL we should not even try to close e->interp_file. We can change bm_entry_write() to simply check !list_empty(list) before kill_node. Again, we rely on inode_lock(), in particular it saves us from the race with bm_status_write(), another caller of kill_node(). Link: http://lkml.kernel.org/r/20170922143641.GA17210@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index ce7181ea60fa..6451e7520e05 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -603,11 +603,7 @@ static void kill_node(Node *e) struct dentry *dentry; write_lock(&entries_lock); - dentry = e->dentry; - if (dentry) { - list_del_init(&e->list); - e->dentry = NULL; - } + list_del_init(&e->list); write_unlock(&entries_lock); if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { @@ -615,12 +611,11 @@ static void kill_node(Node *e) e->interp_file = NULL; } - if (dentry) { - drop_nlink(d_inode(dentry)); - d_drop(dentry); - dput(dentry); - simple_release_fs(&bm_mnt, &entry_count); - } + dentry = e->dentry; + drop_nlink(d_inode(dentry)); + d_drop(dentry); + dput(dentry); + simple_release_fs(&bm_mnt, &entry_count); } /* / */ @@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, root = file_inode(file)->i_sb->s_root; inode_lock(d_inode(root)); - kill_node(e); + if (!list_empty(&e->list)) + kill_node(e); inode_unlock(d_inode(root)); break; @@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer, inode_lock(d_inode(root)); while (!list_empty(&entries)) - kill_node(list_entry(entries.next, Node, list)); + kill_node(list_first_entry(&entries, Node, list)); inode_unlock(d_inode(root)); break; -- cgit v1.2.3-70-g09d2 From 83f918274e4b841d6fb817861ea0c896fba0c179 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:48 -0700 Subject: exec: binfmt_misc: shift filp_close(interp_file) from kill_node() to bm_evict_inode() To ensure that load_misc_binary() can't use the partially destroyed Node, see also the next patch. The current logic looks wrong in any case, once we close interp_file it doesn't make any sense to delay kfree(inode->i_private), this Node is no longer valid. Even if the MISC_FMT_OPEN_FILE/interp_file checks were not racy (they are), load_misc_binary() should not try to reopen ->interpreter if MISC_FMT_OPEN_FILE is set but ->interp_file is NULL. And I can't understand why do we use filp_close(), not fput(). Link: http://lkml.kernel.org/r/20170922143644.GA17216@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6451e7520e05..203598ccb40a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) static void bm_evict_inode(struct inode *inode) { + Node *e = inode->i_private; + + if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) + filp_close(e->interp_file, NULL); + clear_inode(inode); - kfree(inode->i_private); + kfree(e); } static void kill_node(Node *e) @@ -606,11 +611,6 @@ static void kill_node(Node *e) list_del_init(&e->list); write_unlock(&entries_lock); - if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) { - filp_close(e->interp_file, NULL); - e->interp_file = NULL; - } - dentry = e->dentry; drop_nlink(d_inode(dentry)); d_drop(dentry); -- cgit v1.2.3-70-g09d2 From eb23aa0317eb1f08e8d9d36b8753d42f03b32764 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:51 -0700 Subject: exec: binfmt_misc: remove the confusing e->interp_file != NULL checks If MISC_FMT_OPEN_FILE flag is set e->interp_file must be valid or we have a bug which should not be silently ignored. Link: http://lkml.kernel.org/r/20170922143647.GA17222@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 203598ccb40a..babfe7bd56f0 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm) if (retval < 0) goto error; - if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) { + if (fmt->flags & MISC_FMT_OPEN_FILE) { interp_file = filp_clone_open(fmt->interp_file); if (!IS_ERR(interp_file)) deny_write_access(interp_file); @@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; - if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) + if (e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); clear_inode(inode); -- cgit v1.2.3-70-g09d2 From 43a4f2619038002f48c78698c42c05692d4b4eb2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:55 -0700 Subject: exec: binfmt_misc: fix race between load_misc_binary() and kill_node() load_misc_binary() makes a local copy of fmt->interpreter under entries_lock to avoid the race with kill_node() but this is not enough; the whole Node can be freed after we drop entries_lock, not only the ->interpreter string. Add dget/dput(fmt->dentry) to ensure bm_evict_inode() can't destroy/free this Node. Link: http://lkml.kernel.org/r/20170922143650.GA17227@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Travis Gummels Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index babfe7bd56f0..f5f8c2541790 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -138,20 +138,23 @@ static int load_misc_binary(struct linux_binprm *bprm) retval = -ENOEXEC; if (!enabled) - goto ret; + return retval; /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); - if (fmt) + if (fmt) { + dget(fmt->dentry); strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); + } read_unlock(&entries_lock); if (!fmt) - goto ret; + return retval; /* Need to be able to load the file after exec */ + retval = -ENOENT; if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) - return -ENOENT; + goto ret; if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { retval = remove_arg_zero(bprm); @@ -238,6 +241,7 @@ static int load_misc_binary(struct linux_binprm *bprm) goto error; ret: + dput(fmt->dentry); return retval; error: if (fd_binary > 0) -- cgit v1.2.3-70-g09d2 From 50097f74934e3ec8fb1e6f3087568b958972817d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:58 -0700 Subject: exec: binfmt_misc: kill the onstack iname[BINPRM_BUF_SIZE] array After the previous change "fmt" can't go away, we can kill iname/iname_addr and use fmt->interpreter. Link: http://lkml.kernel.org/r/20170922143653.GA17232@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Al Viro Cc: Ben Woodard Cc: James Bottomley Cc: Jim Foraker Cc: Cc: Travis Gummels Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index f5f8c2541790..2a46762def31 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -54,7 +54,7 @@ typedef struct { int size; /* size of magic/mask */ char *magic; /* magic or filename extension */ char *mask; /* mask, NULL for exact match */ - char *interpreter; /* filename of interpreter */ + const char *interpreter; /* filename of interpreter */ char *name; struct dentry *dentry; struct file *interp_file; @@ -131,8 +131,6 @@ static int load_misc_binary(struct linux_binprm *bprm) { Node *fmt; struct file *interp_file = NULL; - char iname[BINPRM_BUF_SIZE]; - const char *iname_addr = iname; int retval; int fd_binary = -1; @@ -143,10 +141,8 @@ static int load_misc_binary(struct linux_binprm *bprm) /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); - if (fmt) { + if (fmt) dget(fmt->dentry); - strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE); - } read_unlock(&entries_lock); if (!fmt) return retval; @@ -198,13 +194,13 @@ static int load_misc_binary(struct linux_binprm *bprm) bprm->argc++; /* add the interp as argv[0] */ - retval = copy_strings_kernel(1, &iname_addr, bprm); + retval = copy_strings_kernel(1, &fmt->interpreter, bprm); if (retval < 0) goto error; bprm->argc++; /* Update interp in case binfmt_script needs it. */ - retval = bprm_change_interp(iname, bprm); + retval = bprm_change_interp(fmt->interpreter, bprm); if (retval < 0) goto error; @@ -213,7 +209,7 @@ static int load_misc_binary(struct linux_binprm *bprm) if (!IS_ERR(interp_file)) deny_write_access(interp_file); } else { - interp_file = open_exec(iname); + interp_file = open_exec(fmt->interpreter); } retval = PTR_ERR(interp_file); if (IS_ERR(interp_file)) -- cgit v1.2.3-70-g09d2 From 8cb5d7482810b7eb1bb05bf4f71bc93ce35e5896 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 16:16:01 -0700 Subject: lib/lz4: make arrays static const, reduces object code size Don't populate the read-only arrays dec32table and dec64table on the stack, instead make them both static const. Makes the object code smaller by over 10K bytes: Before: text data bss dec hex filename 31500 0 0 31500 7b0c lib/lz4/lz4_decompress.o After: text data bss dec hex filename 20237 176 0 20413 4fbd lib/lz4/lz4_decompress.o (gcc version 7.2.0 x86_64) Link: http://lkml.kernel.org/r/20170921221939.20820-1-colin.king@canonical.com Signed-off-by: Colin Ian King Cc: Christophe JAILLET Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Arnd Bergmann Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/lz4/lz4_decompress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index bd3574312b82..141734d255e4 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -85,8 +85,8 @@ static FORCE_INLINE int LZ4_decompress_generic( const BYTE * const lowLimit = lowPrefix - dictSize; const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; - const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; - const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; + static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; const int safeDecode = (endOnInput == endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); -- cgit v1.2.3-70-g09d2 From 7240767450d6d8380fb153e2998a1bb4ede7b029 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Oct 2017 16:16:04 -0700 Subject: include/linux/bitfield.h: remove 32bit from FIELD_GET comment block I do not see anything that restricts this macro to 32 bit width. Link: http://lkml.kernel.org/r/1505921975-23379-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Jakub Kicinski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 8b9d6fff002d..f2deb71958b2 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -92,7 +92,7 @@ /** * FIELD_GET() - extract a bitfield element * @_mask: shifted mask defining the field's length and position - * @_reg: 32bit value of entire bitfield + * @_reg: value of entire bitfield * * FIELD_GET() extracts the field specified by @_mask from the * bitfield passed in as @_reg by masking and shifting it down. -- cgit v1.2.3-70-g09d2 From 3181c38e4df257852a0c0a53552fd5c869402886 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 3 Oct 2017 16:16:07 -0700 Subject: kernel/sysctl.c: remove duplicate UINT_MAX check on do_proc_douintvec_conv() do_proc_douintvec_conv() has two UINT_MAX checks, we can remove one. This has no functional changes other than fixing a compiler warning: kernel/sysctl.c:2190]: (warning) Identical condition '*lvalp>UINT_MAX', second condition is always false Fixes: 4f2fec00afa60 ("sysctl: simplify unsigned int support") Link: http://lkml.kernel.org/r/20170919072918.12066-1-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Reported-by: David Binderman Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 423554ad3610..4da9e622471f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2186,8 +2186,6 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, int write, void *data) { if (write) { - if (*lvalp > UINT_MAX) - return -EINVAL; if (*lvalp > UINT_MAX) return -EINVAL; *valp = *lvalp; -- cgit v1.2.3-70-g09d2 From f80c7dab95a1f0f968acbafe4426ee9525b6f6ab Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 3 Oct 2017 16:16:10 -0700 Subject: mm: memcontrol: use vmalloc fallback for large kmem memcg arrays For quick per-memcg indexing, slab caches and list_lru structures maintain linear arrays of descriptors. As the number of concurrent memory cgroups in the system goes up, this requires large contiguous allocations (8k cgroups = order-5, 16k cgroups = order-6 etc.) for every existing slab cache and list_lru, which can easily fail on loaded systems. E.g.: mkdir: page allocation failure: order:5, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null) CPU: 1 PID: 6399 Comm: mkdir Not tainted 4.13.0-mm1-00065-g720bbe532b7c-dirty #481 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-20170228_101828-anatol 04/01/2014 Call Trace: ? __alloc_pages_direct_compact+0x4c/0x110 __alloc_pages_nodemask+0xf50/0x1430 alloc_pages_current+0x60/0xc0 kmalloc_order_trace+0x29/0x1b0 __kmalloc+0x1f4/0x320 memcg_update_all_list_lrus+0xca/0x2e0 mem_cgroup_css_alloc+0x612/0x670 cgroup_apply_control_enable+0x19e/0x360 cgroup_mkdir+0x322/0x490 kernfs_iop_mkdir+0x55/0x80 vfs_mkdir+0xd0/0x120 SyS_mkdirat+0x6c/0xe0 SyS_mkdir+0x14/0x20 entry_SYSCALL_64_fastpath+0x18/0xad Mem-Info: active_anon:2965 inactive_anon:19 isolated_anon:0 active_file:100270 inactive_file:98846 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:7328 slab_unreclaimable:16402 mapped:771 shmem:52 pagetables:278 bounce:0 free:13718 free_pcp:0 free_cma:0 This output is from an artificial reproducer, but we have repeatedly observed order-7 failures in production in the Facebook fleet. These systems become useless as they cannot run more jobs, even though there is plenty of memory to allocate 128 individual pages. Use kvmalloc and kvzalloc to fall back to vmalloc space if these arrays prove too large for allocating them physically contiguous. Link: http://lkml.kernel.org/r/20170918184919.20644-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Josef Bacik Acked-by: Michal Hocko Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/list_lru.c | 12 ++++++------ mm/slab_common.c | 22 +++++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index 7a40fa2be858..f141f0c80ff3 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) { int size = memcg_nr_cache_ids; - nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); + nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL); if (!nlru->memcg_lrus) return -ENOMEM; if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { - kfree(nlru->memcg_lrus); + kvfree(nlru->memcg_lrus); return -ENOMEM; } @@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) { __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); - kfree(nlru->memcg_lrus); + kvfree(nlru->memcg_lrus); } static int memcg_update_list_lru_node(struct list_lru_node *nlru, @@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, BUG_ON(old_size > new_size); old = nlru->memcg_lrus; - new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); + new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL); if (!new) return -ENOMEM; if (__memcg_init_list_lru_node(new, old_size, new_size)) { - kfree(new); + kvfree(new); return -ENOMEM; } @@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, nlru->memcg_lrus = new; spin_unlock_irq(&nlru->lock); - kfree(old); + kvfree(old); return 0; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 904a83be82de..80164599ca5d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -165,9 +165,9 @@ static int init_memcg_params(struct kmem_cache *s, if (!memcg_nr_cache_ids) return 0; - arr = kzalloc(sizeof(struct memcg_cache_array) + - memcg_nr_cache_ids * sizeof(void *), - GFP_KERNEL); + arr = kvzalloc(sizeof(struct memcg_cache_array) + + memcg_nr_cache_ids * sizeof(void *), + GFP_KERNEL); if (!arr) return -ENOMEM; @@ -178,15 +178,23 @@ static int init_memcg_params(struct kmem_cache *s, static void destroy_memcg_params(struct kmem_cache *s) { if (is_root_cache(s)) - kfree(rcu_access_pointer(s->memcg_params.memcg_caches)); + kvfree(rcu_access_pointer(s->memcg_params.memcg_caches)); +} + +static void free_memcg_params(struct rcu_head *rcu) +{ + struct memcg_cache_array *old; + + old = container_of(rcu, struct memcg_cache_array, rcu); + kvfree(old); } static int update_memcg_params(struct kmem_cache *s, int new_array_size) { struct memcg_cache_array *old, *new; - new = kzalloc(sizeof(struct memcg_cache_array) + - new_array_size * sizeof(void *), GFP_KERNEL); + new = kvzalloc(sizeof(struct memcg_cache_array) + + new_array_size * sizeof(void *), GFP_KERNEL); if (!new) return -ENOMEM; @@ -198,7 +206,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size) rcu_assign_pointer(s->memcg_params.memcg_caches, new); if (old) - kfree_rcu(old, rcu); + call_rcu(&old->rcu, free_memcg_params); return 0; } -- cgit v1.2.3-70-g09d2 From a70e43a59de9316e6fbad3b65557d0a24c099aca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 3 Oct 2017 16:16:13 -0700 Subject: lib/idr.c: fix comment for idr_replace() idr_replace() returns the old value on success, not 0. Link: http://lkml.kernel.org/r/20170918162642.37511-1-ebiggers3@gmail.com Signed-off-by: Eric Biggers Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index f9adf4805fd7..edd9b2be1651 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -146,8 +146,8 @@ EXPORT_SYMBOL(idr_get_next_ext); * idr_alloc() and idr_remove() (as long as the ID being removed is not * the one being replaced!). * - * Returns: 0 on success. %-ENOENT indicates that @id was not found. - * %-EINVAL indicates that @id or @ptr were not valid. + * Returns: the old value on success. %-ENOENT indicates that @id was not + * found. %-EINVAL indicates that @id or @ptr were not valid. */ void *idr_replace(struct idr *idr, void *ptr, int id) { -- cgit v1.2.3-70-g09d2 From f64ac5e6e30668216cf489d73ba8a96e372d78c6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:16 -0700 Subject: mm, memory_hotplug: add scheduling point to __add_pages Patch series "mm, memory_hotplug: fix few soft lockups in memory hotadd". Johannes has noticed few soft lockups when adding a large nvdimm device. All of them were caused by a long loop without any explicit cond_resched which is a problem for !PREEMPT kernels. The fix is quite straightforward. Just make sure that cond_resched gets called from time to time. This patch (of 3): __add_pages gets a pfn range to add and there is no upper bound for a single call. This is usually a memory block aligned size for the regular memory hotplug - smaller sizes are usual for memory balloning drivers, or the whole NUMA node for physical memory online. There is no explicit scheduling point in that code path though. This can lead to long latencies while __add_pages is executed and we have even seen a soft lockup report during nvdimm initialization with !PREEMPT kernel NMI watchdog: BUG: soft lockup - CPU#11 stuck for 23s! [kworker/u641:3:832] [...] Workqueue: events_unbound async_run_entry_fn task: ffff881809270f40 ti: ffff881809274000 task.ti: ffff881809274000 RIP: _raw_spin_unlock_irqrestore+0x11/0x20 RSP: 0018:ffff881809277b10 EFLAGS: 00000286 [...] Call Trace: sparse_add_one_section+0x13d/0x18e __add_pages+0x10a/0x1d0 arch_add_memory+0x4a/0xc0 devm_memremap_pages+0x29d/0x430 pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 DWARF2 unwinder stuck at ret_from_fork+0x3f/0x70 Fix this by adding cond_resched once per each memory section in the given pfn range. Each section is constant amount of work which itself is not too expensive but many of them will just add up. Link: http://lkml.kernel.org/r/20170918121410.24466-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e882cb6da994..23d5bd968950 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, if (err && (err != -EEXIST)) break; err = 0; + cond_resched(); } vmemmap_populate_print_last(); out: -- cgit v1.2.3-70-g09d2 From 9b6e63cbf85b89b2dbffa4955dbf2df8250e5375 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:19 -0700 Subject: mm, page_alloc: add scheduling point to memmap_init_zone memmap_init_zone gets a pfn range to initialize and it can be really large resulting in a soft lockup on non-preemptible kernels NMI watchdog: BUG: soft lockup - CPU#31 stuck for 23s! [kworker/u642:5:1720] [...] task: ffff88ecd7e902c0 ti: ffff88eca4e50000 task.ti: ffff88eca4e50000 RIP: move_pfn_range_to_zone+0x185/0x1d0 [...] Call Trace: devm_memremap_pages+0x2c7/0x430 pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 Fix this by adding a scheduling point once per page block. Link: http://lkml.kernel.org/r/20170918121410.24466-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 38d165a87860..77e4d3c5c57b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5367,6 +5367,7 @@ not_early: __init_single_page(page, pfn, zone, nid); set_pageblock_migratetype(page, MIGRATE_MOVABLE); + cond_resched(); } else { __init_single_pfn(pfn, zone, nid); } -- cgit v1.2.3-70-g09d2 From 1fdcce6e16c54facc4f0688630d3b9ecfcaa411f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:16:23 -0700 Subject: memremap: add scheduling point to devm_memremap_pages devm_memremap_pages is initializing struct pages in for_each_device_pfn and that can take quite some time. We have even seen a soft lockup triggering on a non preemptive kernel NMI watchdog: BUG: soft lockup - CPU#61 stuck for 22s! [kworker/u641:11:1808] [...] RIP: 0010:[] [] devm_memremap_pages+0x327/0x430 [...] Call Trace: pmem_attach_disk+0x2fd/0x3f0 [nd_pmem] nvdimm_bus_probe+0x64/0x110 [libnvdimm] driver_probe_device+0x1f7/0x420 bus_for_each_drv+0x52/0x80 __device_attach+0xb0/0x130 bus_probe_device+0x87/0xa0 device_add+0x3fc/0x5f0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x43/0x150 process_one_work+0x14e/0x410 worker_thread+0x116/0x490 kthread+0xc7/0xe0 ret_from_fork+0x3f/0x70 fix this by adding cond_resched every 1024 pages. Link: http://lkml.kernel.org/r/20170918121410.24466-4-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 6bcbfbf1a8fd..403ab9cdb949 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -350,7 +350,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; - int error, nid, is_ram; + int error, nid, is_ram, i = 0; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -448,6 +448,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, list_del(&page->lru); page->pgmap = pgmap; percpu_ref_get(ref); + if (!(++i % 1024)) + cond_resched(); } devres_add(dev, page_map); return __va(res->start); -- cgit v1.2.3-70-g09d2 From c9653850c90d6050197bc76ba672e00a7771aea5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 3 Oct 2017 16:16:26 -0700 Subject: kernel/kcmp.c: drop branch leftover typo The else branch been left over and escaped the source code refresh. Not a problem but better clean it up. Fixes: 0791e3644e5e ("kcmp: add KCMP_EPOLL_TFD mode to compare epoll target files") Link: http://lkml.kernel.org/r/20170917165838.GA1887@uranus.lan Reported-by: Eugene Syromiatnikov Signed-off-by: Cyrill Gorcunov Acked-by: Andrei Vagin Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kcmp.c b/kernel/kcmp.c index ea34ed8bb952..055bb2962a0b 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -131,7 +131,7 @@ static int kcmp_epoll_target(struct task_struct *task1, if (filp_epoll) { filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); fput(filp_epoll); - } else + } if (IS_ERR(filp_tgt)) return PTR_ERR(filp_tgt); -- cgit v1.2.3-70-g09d2 From 1dd2bfc86818ddbc95f98e312e7704350223fd7d Mon Sep 17 00:00:00 2001 From: YASUAKI ISHIMATSU Date: Tue, 3 Oct 2017 16:16:29 -0700 Subject: mm/memory_hotplug: change pfn_to_section_nr/section_nr_to_pfn macro to inline function pfn_to_section_nr() and section_nr_to_pfn() are defined as macro. pfn_to_section_nr() has no issue even if it is defined as macro. But section_nr_to_pfn() has overflow issue if sec is defined as int. section_nr_to_pfn() just shifts sec by PFN_SECTION_SHIFT. If sec is defined as unsigned long, section_nr_to_pfn() returns pfn as 64 bit value. But if sec is defined as int, section_nr_to_pfn() returns pfn as 32 bit value. __remove_section() calculates start_pfn using section_nr_to_pfn() and scn_nr defined as int. So if hot-removed memory address is over 16TB, overflow issue occurs and section_nr_to_pfn() does not calculate correct pfn. To make callers use proper arg, the patch changes the macros to inline functions. Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory") Link: http://lkml.kernel.org/r/e643a387-e573-6bbf-d418-c60c8ee3d15e@gmail.com Signed-off-by: Yasuaki Ishimatsu Acked-by: Michal Hocko Cc: Xishi Qiu Cc: Reza Arbab Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++-- mm/memory_hotplug.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 356a814e7c8e..c8f89417740b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1094,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +static inline unsigned long pfn_to_section_nr(unsigned long pfn) +{ + return pfn >> PFN_SECTION_SHIFT; +} +static inline unsigned long section_nr_to_pfn(unsigned long sec) +{ + return sec << PFN_SECTION_SHIFT; +} #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 23d5bd968950..efd1ad37bb57 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -551,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, return ret; scn_nr = __section_nr(ms); - start_pfn = section_nr_to_pfn(scn_nr); + start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); sparse_remove_one_section(zone, ms, map_offset); -- cgit v1.2.3-70-g09d2 From d09b0137d204bebeaafed672bc5a244e9ac92edb Mon Sep 17 00:00:00 2001 From: YASUAKI ISHIMATSU Date: Tue, 3 Oct 2017 16:16:32 -0700 Subject: mm/memory_hotplug: define find_{smallest|biggest}_section_pfn as unsigned long find_{smallest|biggest}_section_pfn()s find the smallest/biggest section and return the pfn of the section. But the functions are defined as int. So the functions always return 0x00000000 - 0xffffffff. It means if memory address is over 16TB, the functions does not work correctly. To handle 64 bit value, the patch defines find_{smallest|biggest}_section_pfn() as unsigned long. Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory") Link: http://lkml.kernel.org/r/d9d5593a-d0a4-c4be-ab08-493df59a85c6@gmail.com Signed-off-by: Yasuaki Ishimatsu Acked-by: Michal Hocko Cc: Xishi Qiu Cc: Reza Arbab Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index efd1ad37bb57..d4b5f29906b9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages); #ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ -static int find_smallest_section_pfn(int nid, struct zone *zone, +static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { @@ -363,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone, } /* find the biggest valid pfn in the range [start_pfn, end_pfn). */ -static int find_biggest_section_pfn(int nid, struct zone *zone, +static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { -- cgit v1.2.3-70-g09d2 From 90ceb2a3ad868f800eb1c9f4ede650daddd94b77 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:35 -0700 Subject: kernel/params.c: fix the maximum length in param_get_string The length parameter of strlcpy() is supposed to reflect the size of the target buffer, not of the source string. Harmless in this case as the buffer is PAGE_SIZE long and the source string is always much shorter than this, but conceptually wrong, so let's fix it. Link: http://lkml.kernel.org/r/20170928162515.24846b4f@endymion Signed-off-by: Jean Delvare Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index 1cd8f1a895a8..8283ba045f4f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -507,7 +507,7 @@ EXPORT_SYMBOL(param_set_copystring); int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - return strlcpy(buffer, kps->string, kps->maxlen); + return strlcpy(buffer, kps->string, PAGE_SIZE); } EXPORT_SYMBOL(param_get_string); -- cgit v1.2.3-70-g09d2 From 96802e6b1dbf29d3012b39503c5dd6d9d8e82955 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:38 -0700 Subject: kernel/params.c: fix an overflow in param_attr_show Function param_attr_show could overflow the buffer it is operating on. The buffer size is PAGE_SIZE, and the string returned by attribute->param->ops->get is generated by scnprintf(buffer, PAGE_SIZE, ...) so it could be PAGE_SIZE - 1 long, with the terminating '\0' at the very end of the buffer. Calling strcat(..., "\n") on this isn't safe, as the '\0' will be replaced by '\n' (OK) and then another '\0' will be added past the end of the buffer (not OK.) Simply add the trailing '\n' when writing the attribute contents to the buffer originally. This is safe, and also faster. Credits to Teradata for discovering this issue. Link: http://lkml.kernel.org/r/20170928162602.60c379c7@endymion Signed-off-by: Jean Delvare Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 8283ba045f4f..0cca488263dd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -224,7 +224,7 @@ char *parse_args(const char *doing, } \ int param_get_##name(char *buffer, const struct kernel_param *kp) \ { \ - return scnprintf(buffer, PAGE_SIZE, format, \ + return scnprintf(buffer, PAGE_SIZE, format "\n", \ *((type *)kp->arg)); \ } \ const struct kernel_param_ops param_ops_##name = { \ @@ -270,7 +270,7 @@ EXPORT_SYMBOL(param_set_charp); int param_get_charp(char *buffer, const struct kernel_param *kp) { - return scnprintf(buffer, PAGE_SIZE, "%s", *((char **)kp->arg)); + return scnprintf(buffer, PAGE_SIZE, "%s\n", *((char **)kp->arg)); } EXPORT_SYMBOL(param_get_charp); @@ -301,7 +301,7 @@ EXPORT_SYMBOL(param_set_bool); int param_get_bool(char *buffer, const struct kernel_param *kp) { /* Y and N chosen as being relatively non-coder friendly */ - return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N'); + return sprintf(buffer, "%c\n", *(bool *)kp->arg ? 'Y' : 'N'); } EXPORT_SYMBOL(param_get_bool); @@ -360,7 +360,7 @@ EXPORT_SYMBOL(param_set_invbool); int param_get_invbool(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); + return sprintf(buffer, "%c\n", (*(bool *)kp->arg) ? 'N' : 'Y'); } EXPORT_SYMBOL(param_get_invbool); @@ -460,8 +460,9 @@ static int param_array_get(char *buffer, const struct kernel_param *kp) struct kernel_param p = *kp; for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { + /* Replace \n with comma */ if (i) - buffer[off++] = ','; + buffer[off - 1] = ','; p.arg = arr->elem + arr->elemsize * i; check_kparam_locked(p.mod); ret = arr->ops->get(buffer + off, &p); @@ -507,7 +508,7 @@ EXPORT_SYMBOL(param_set_copystring); int param_get_string(char *buffer, const struct kernel_param *kp) { const struct kparam_string *kps = kp->str; - return strlcpy(buffer, kps->string, PAGE_SIZE); + return scnprintf(buffer, PAGE_SIZE, "%s\n", kps->string); } EXPORT_SYMBOL(param_get_string); @@ -549,10 +550,6 @@ static ssize_t param_attr_show(struct module_attribute *mattr, kernel_param_lock(mk->mod); count = attribute->param->ops->get(buf, attribute->param); kernel_param_unlock(mk->mod); - if (count > 0) { - strcat(buf, "\n"); - ++count; - } return count; } -- cgit v1.2.3-70-g09d2 From e0596c80f442d1e1221c17dbb71b2aed43909221 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Oct 2017 16:16:41 -0700 Subject: kernel/params.c: improve STANDARD_PARAM_DEF readability Align the parameters passed to STANDARD_PARAM_DEF for clarity. Link: http://lkml.kernel.org/r/20170928162728.756143cc@endymion Signed-off-by: Jean Delvare Suggested-by: Ingo Molnar Acked-by: Ingo Molnar Cc: Baoquan He Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 0cca488263dd..cc9108c2a1fd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -236,14 +236,14 @@ char *parse_args(const char *doing, EXPORT_SYMBOL(param_ops_##name) -STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); -STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); -STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); -STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); -STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); -STANDARD_PARAM_DEF(long, long, "%li", kstrtol); -STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); -STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); +STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); +STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); +STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); +STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); +STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); +STANDARD_PARAM_DEF(long, long, "%li", kstrtol); +STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); +STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); int param_set_charp(const char *val, const struct kernel_param *kp) { -- cgit v1.2.3-70-g09d2 From 656d61ce9666209c4c4a13c71902d3ee70d1ff6f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 3 Oct 2017 16:16:45 -0700 Subject: lib/ratelimit.c: use deferred printk() version printk_ratelimit() invokes ___ratelimit() which may invoke a normal printk() (pr_warn() in this particular case) to warn about suppressed output. Given that printk_ratelimit() may be called from anywhere, that pr_warn() is dangerous - it may end up deadlocking the system. Fix ___ratelimit() by using deferred printk(). Sasha reported the following lockdep error: : Unregister pv shared memory for cpu 8 : select_fallback_rq: 3 callbacks suppressed : process 8583 (trinity-c78) no longer affine to cpu8 : : ====================================================== : WARNING: possible circular locking dependency detected : 4.14.0-rc2-next-20170927+ #252 Not tainted : ------------------------------------------------------ : migration/8/62 is trying to acquire lock: : (&port_lock_key){-.-.}, at: serial8250_console_write() : : but task is already holding lock: : (&rq->lock){-.-.}, at: sched_cpu_dying() : : which lock already depends on the new lock. : : : the existing dependency chain (in reverse order) is: : : -> #3 (&rq->lock){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock() : task_fork_fair() : sched_fork() : copy_process.part.31() : _do_fork() : kernel_thread() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #2 (&p->pi_lock){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : try_to_wake_up() : default_wake_function() : woken_wake_function() : __wake_up_common() : __wake_up_common_lock() : __wake_up() : tty_wakeup() : tty_port_default_wakeup() : tty_port_tty_wakeup() : uart_write_wakeup() : serial8250_tx_chars() : serial8250_handle_irq.part.25() : serial8250_default_handle_irq() : serial8250_interrupt() : __handle_irq_event_percpu() : handle_irq_event_percpu() : handle_irq_event() : handle_level_irq() : handle_irq() : do_IRQ() : ret_from_intr() : native_safe_halt() : default_idle() : arch_cpu_idle() : default_idle_call() : do_idle() : cpu_startup_entry() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #1 (&tty->write_wait){-.-.}: : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : __wake_up_common_lock() : __wake_up() : tty_wakeup() : tty_port_default_wakeup() : tty_port_tty_wakeup() : uart_write_wakeup() : serial8250_tx_chars() : serial8250_handle_irq.part.25() : serial8250_default_handle_irq() : serial8250_interrupt() : __handle_irq_event_percpu() : handle_irq_event_percpu() : handle_irq_event() : handle_level_irq() : handle_irq() : do_IRQ() : ret_from_intr() : native_safe_halt() : default_idle() : arch_cpu_idle() : default_idle_call() : do_idle() : cpu_startup_entry() : rest_init() : start_kernel() : x86_64_start_reservations() : x86_64_start_kernel() : verify_cpu() : : -> #0 (&port_lock_key){-.-.}: : check_prev_add() : __lock_acquire() : lock_acquire() : _raw_spin_lock_irqsave() : serial8250_console_write() : univ8250_console_write() : console_unlock() : vprintk_emit() : vprintk_default() : vprintk_func() : printk() : ___ratelimit() : __printk_ratelimit() : select_fallback_rq() : sched_cpu_dying() : cpuhp_invoke_callback() : take_cpu_down() : multi_cpu_stop() : cpu_stopper_thread() : smpboot_thread_fn() : kthread() : ret_from_fork() : : other info that might help us debug this: : : Chain exists of: : &port_lock_key --> &p->pi_lock --> &rq->lock : : Possible unsafe locking scenario: : : CPU0 CPU1 : ---- ---- : lock(&rq->lock); : lock(&p->pi_lock); : lock(&rq->lock); : lock(&port_lock_key); : : *** DEADLOCK *** : : 4 locks held by migration/8/62: : #0: (&p->pi_lock){-.-.}, at: sched_cpu_dying() : #1: (&rq->lock){-.-.}, at: sched_cpu_dying() : #2: (printk_ratelimit_state.lock){....}, at: ___ratelimit() : #3: (console_lock){+.+.}, at: vprintk_emit() : : stack backtrace: : CPU: 8 PID: 62 Comm: migration/8 Not tainted 4.14.0-rc2-next-20170927+ #252 : Call Trace: : dump_stack() : print_circular_bug() : check_prev_add() : ? add_lock_to_list.isra.26() : ? check_usage() : ? kvm_clock_read() : ? kvm_sched_clock_read() : ? sched_clock() : ? check_preemption_disabled() : __lock_acquire() : ? __lock_acquire() : ? add_lock_to_list.isra.26() : ? debug_check_no_locks_freed() : ? memcpy() : lock_acquire() : ? serial8250_console_write() : _raw_spin_lock_irqsave() : ? serial8250_console_write() : serial8250_console_write() : ? serial8250_start_tx() : ? lock_acquire() : ? memcpy() : univ8250_console_write() : console_unlock() : ? __down_trylock_console_sem() : vprintk_emit() : vprintk_default() : vprintk_func() : printk() : ? show_regs_print_info() : ? lock_acquire() : ___ratelimit() : __printk_ratelimit() : select_fallback_rq() : sched_cpu_dying() : ? sched_cpu_starting() : ? rcutree_dying_cpu() : ? sched_cpu_starting() : cpuhp_invoke_callback() : ? cpu_disable_common() : take_cpu_down() : ? trace_hardirqs_off_caller() : ? cpuhp_invoke_callback() : multi_cpu_stop() : ? __this_cpu_preempt_check() : ? cpu_stop_queue_work() : cpu_stopper_thread() : ? cpu_stop_create() : smpboot_thread_fn() : ? sort_range() : ? schedule() : ? __kthread_parkme() : kthread() : ? sort_range() : ? kthread_create_on_node() : ret_from_fork() : process 9121 (trinity-c78) no longer affine to cpu8 : smpboot: CPU 8 is now offline Link: http://lkml.kernel.org/r/20170928120405.18273-1-sergey.senozhatsky@gmail.com Fixes: 6b1d174b0c27b ("ratelimit: extend to print suppressed messages on release") Signed-off-by: Sergey Senozhatsky Reported-by: Sasha Levin Reviewed-by: Petr Mladek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Steven Rostedt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ratelimit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 08f8043cac61..d01f47135239 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -48,7 +48,9 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { - pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); + printk_deferred(KERN_WARNING + "%s: %d callbacks suppressed\n", + func, rs->missed); rs->missed = 0; } } -- cgit v1.2.3-70-g09d2 From d22e3d69ee1a3f83ff7cc943af63de48b6156dcf Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 3 Oct 2017 16:16:49 -0700 Subject: m32r: fix build failure The allmodconfig build of m32r is failing with the error: lib/mpi/mpih-div.o: In function 'mpihelp_divrem': mpih-div.c:(.text+0x40): undefined reference to 'abort' mpih-div.c:(.text+0x40): relocation truncated to fit: R_M32R_26_PCREL_RELA against undefined symbol 'abort' The function 'abort' was never defined for the m32r architecture. Create 'abort' as is done in other arch like 'arm' and 'unicore32'. Link: http://lkml.kernel.org/r/1506727220-6108-1-git-send-email-sudip.mukherjee@codethink.co.uk Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/traps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index 647dd94a0c39..72b96f282689 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -114,6 +114,15 @@ static void set_eit_vector_entries(void) _flush_cache_copyback_all(); } +void abort(void) +{ + BUG(); + + /* if that doesn't kill us, halt */ + panic("Oops failed to kill thread"); +} +EXPORT_SYMBOL(abort); + void __init trap_init(void) { set_eit_vector_entries(); -- cgit v1.2.3-70-g09d2 From a08ffbef4ab799351d0610bbdbeaa1ee746b9065 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 3 Oct 2017 16:16:51 -0700 Subject: checkpatch: fix ignoring cover-letter logic Currently running checkpatch on a directory with a cover-letter.patch file reports the following error: ----------------------------------------- patches/smp-v2/v2-0000-cover-letter.patch ----------------------------------------- ERROR: Does not appear to be a unified-diff format patch The logic to suppress the unified-diff check for cover letters is there but is checking $file instead of $filename. Fix the variable to use the correct one. Link: http://lkml.kernel.org/r/20170909090406.31523-1-shorne@gmail.com Signed-off-by: Stafford Horne Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index dd2c262aebbf..8b80bac055e4 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6390,7 +6390,7 @@ sub process { exit(0); } - if (!$is_patch && $file !~ /cover-letter\.patch$/) { + if (!$is_patch && $filename !~ /cover-letter\.patch$/) { ERROR("NOT_UNIFIED_DIFF", "Does not appear to be a unified-diff format patch\n"); } -- cgit v1.2.3-70-g09d2 From 32e57c29e3c038ac802b7cc214a8795a4234055f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 3 Oct 2017 16:16:54 -0700 Subject: include/linux/fs.h: fix comment about struct address_space Before commit 9c5d760b8d22 ("mm: split gfp_mask and mapping flags into separate fields") the private_* fields of struct adrress_space were grouped together and using "ditto" in comments describing the last fields was correct. With introduction of gpf_mask between private_lock and private_list "ditto" references the wrong description. Fix it by using the elaborate description. Link: http://lkml.kernel.org/r/1507009987-8746-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..13dab191a23e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -403,7 +403,7 @@ struct address_space { unsigned long flags; /* error bits */ spinlock_t private_lock; /* for use by the address_space */ gfp_t gfp_mask; /* implicit gfp mask for allocations */ - struct list_head private_list; /* ditto */ + struct list_head private_list; /* for use by the address_space */ void *private_data; /* ditto */ errseq_t wb_err; } __attribute__((aligned(sizeof(long)))) __randomize_layout; -- cgit v1.2.3-70-g09d2 From e4c77f8b9b213c6315faba109c03b0db873db200 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Sep 2017 17:47:50 +0200 Subject: ARM: defconfig: FRAMEBUFFER_CONSOLE can no longer be =m It is no longer possible to load this at runtime, so let's change the few remaining users to have it built-in all the time. arch/arm/configs/zeus_defconfig:115:warning: symbol value 'm' invalid for FRAMEBUFFER_CONSOLE arch/arm/configs/viper_defconfig:116:warning: symbol value 'm' invalid for FRAMEBUFFER_CONSOLE arch/arm/configs/pxa_defconfig:474:warning: symbol value 'm' invalid for FRAMEBUFFER_CONSOLE Reported-by: kernelci.org bot Fixes: 6104c37094e7 ("fbcon: Make fbcon a built-time depency for fbdev") Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson --- arch/arm/configs/pxa_defconfig | 2 +- arch/arm/configs/viper_defconfig | 2 +- arch/arm/configs/zeus_defconfig | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 64e3a2a8cede..d5e1370ec303 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -471,7 +471,7 @@ CONFIG_LCD_PLATFORM=m CONFIG_LCD_TOSA=m CONFIG_BACKLIGHT_PWM=m CONFIG_BACKLIGHT_TOSA=m -CONFIG_FRAMEBUFFER_CONSOLE=m +CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y CONFIG_SOUND=m diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index 44d4fa57ba0a..070e5074f1ee 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -113,7 +113,7 @@ CONFIG_FB_PXA_PARAMETERS=y CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_BACKLIGHT_PWM=m # CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=m +CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig index 8d4c0c926c34..09e7050d5653 100644 --- a/arch/arm/configs/zeus_defconfig +++ b/arch/arm/configs/zeus_defconfig @@ -112,7 +112,7 @@ CONFIG_FB_PXA=m CONFIG_FB_PXA_PARAMETERS=y CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=m +CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -- cgit v1.2.3-70-g09d2 From 0694b2ee87ee1a6d83acf1a66b92c8e64ceb38f2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 17 Sep 2017 16:26:18 +0200 Subject: ARM: defconfig: update Gemini defconfig This updates the Gemini defconfig with drivers merged for v4.13 or v4.14: - ATA driver is merged - DMA driver is merged - RTC driver gets selected from default Kconfig Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/configs/gemini_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/configs/gemini_defconfig b/arch/arm/configs/gemini_defconfig index d2d75fa664a6..2a63fa10c813 100644 --- a/arch/arm/configs/gemini_defconfig +++ b/arch/arm/configs/gemini_defconfig @@ -32,6 +32,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=y +CONFIG_PATA_FTIDE010=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set @@ -55,8 +56,8 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_GEMINI=y CONFIG_DMADEVICES=y +CONFIG_AMBA_PL08X=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y -- cgit v1.2.3-70-g09d2 From 043d1e729b0fbaf2b69386fe45290b8a9a18a6a9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 6 Sep 2017 21:21:08 +0300 Subject: ARC: [plat-axs103] Add temporary quirk to reset ethernet IP DW ethernet controller on AXS10x hangs sometimes after SW reset, so add temporary quirk to reset DW ethernet controller IP core. This quirk can be removed after axs10x reset driver (see http://patchwork.ozlabs.org/patch/800273/) or simple reset driver (see https://patchwork.kernel.org/patch/9903375/) will be available in upstream. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/plat-axs10x/axs10x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac6790da5f..cf14ebc36916 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -111,6 +111,13 @@ static void __init axs10x_early_init(void) axs10x_enable_gpio_intc_wire(); + /* + * Reset ethernet IP core. + * TODO: get rid of this quirk after axs10x reset driver (or simple + * reset driver) will be available in upstream. + */ + iowrite32((1 << 5), (void __iomem *) CREG_MB_SW_RESET); + scnprintf(mb, 32, "MainBoard v%d", mb_rev); axs10x_print_board_ver(CREG_MB_VER, mb); } -- cgit v1.2.3-70-g09d2 From 6afa3bcf1f919c374d4606a7ed8078d3f67dfa90 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 8 Sep 2017 21:42:33 +0300 Subject: ARC: [plat-hsdk] sdio: Temporary fix of sdio ciu frequency DW sdio controller has external ciu clock divider controlled via register in SDIO IP. Due to its unexpected default value (it should divide by 1 but it divides by 8) SDIO IP uses wrong ciu clock and works unstable So add temporary fix and change clock frequency from 100000000 to 12500000 Hz until we fix dw sdio driver itself. Fixes SNPS STAR 9001204800 Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 229d13adbce4..daeef4ab2df9 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -120,7 +120,17 @@ mmcclk_ciu: mmcclk-ciu { compatible = "fixed-clock"; - clock-frequency = <100000000>; + /* + * DW sdio controller has external ciu clock divider + * controlled via register in SDIO IP. Due to its + * unexpected default value (it should devide by 1 + * but it devides by 8) SDIO IP uses wrong clock and + * works unstable (see STAR 9001204800) + * So add temporary fix and change clock frequency + * from 100000000 to 12500000 Hz until we fix dw sdio + * driver itself. + */ + clock-frequency = <12500000>; #clock-cells = <0>; }; -- cgit v1.2.3-70-g09d2 From 976e78a5226598cb582fe9ef98a72861adbc0e9c Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 12 Sep 2017 21:20:45 +0300 Subject: ARC: [plat-axs10x] sdio: Temporary fix of sdio ciu frequency DW sdio controller has external ciu clock divider controlled via register in SDIO IP. It divides sdio_ref_clk (which comes from CGU) by 16 for default. So default mmcclk clock (which comes to sdk_in) is 25000000 Hz. So fix wrong current value (50000000 Hz) to actual 25000000 Hz. Note this is a preventive fix, in line with similar change for HSDK where this was actually needed. see: http://lists.infradead.org/pipermail/linux-snps-arc/2017-September/002924.html Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axs10x_mb.dtsi | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 2367a67c5f10..e114000a84f5 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -44,7 +44,14 @@ mmcclk: mmcclk { compatible = "fixed-clock"; - clock-frequency = <50000000>; + /* + * DW sdio controller has external ciu clock divider + * controlled via register in SDIO IP. It divides + * sdio_ref_clk (which comes from CGU) by 16 for + * default. So default mmcclk clock (which comes + * to sdk_in) is 25000000 Hz. + */ + clock-frequency = <25000000>; #clock-cells = <0>; }; -- cgit v1.2.3-70-g09d2 From 9583833e9e3628177661e815e5ce80dd3955d82f Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 8 Sep 2017 23:12:59 +0300 Subject: ARC: [*defconfig] Reenable soft lock-up detector Commit 92e5aae45778 "kernel/watchdog: split up config options" introduced SOFTLOCKUP_DETECTOR which selects LOCKUP_DETECTOR instead of the latter to be selected itself. We need to adjust our defconfigs accordingly. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/configs/axs101_defconfig | 2 +- arch/arc/configs/axs103_defconfig | 2 +- arch/arc/configs/axs103_smp_defconfig | 2 +- arch/arc/configs/haps_hs_smp_defconfig | 2 +- arch/arc/configs/hsdk_defconfig | 2 +- arch/arc/configs/vdk_hs38_defconfig | 2 +- arch/arc/configs/vdk_hs38_smp_defconfig | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 6980b966a364..ec7c849a5c8e 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -105,7 +105,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 2233f5777a71..63d3cf69e0b0 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -104,7 +104,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 30a3d4cf53d2..f613ecac14a7 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -107,7 +107,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 821a2e562f3f..3507be2af6fe 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -84,5 +84,5 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 9a3fcf446388..7b8f8faf8a24 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -72,7 +72,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index c0d6a010751a..4fcf4f2503f6 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -94,7 +94,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 5c0971787acf..7b71464f6c2f 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -98,7 +98,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_SHIRQ=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set -- cgit v1.2.3-70-g09d2 From ef833eab1ddec06982ea620086b03d67ef4ddf9b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 4 Sep 2017 12:48:43 +0300 Subject: ARC: [plat-hsdk] use actual clk driver to manage cpu clk With corresponding clk driver now merged upstream, switch to it. - core_clk now represent the PLL (vs. fixed clk before) - input_clk represent the clk signal src for PLL (basically xtal) Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 11 +++++++++-- arch/arc/plat-hsdk/Kconfig | 3 ++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index daeef4ab2df9..b922f3faf554 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -57,10 +57,10 @@ }; }; - core_clk: core-clk { + input_clk: input-clk { #clock-cells = <0>; compatible = "fixed-clock"; - clock-frequency = <500000000>; + clock-frequency = <33333333>; }; cpu_intc: cpu-interrupt-controller { @@ -102,6 +102,13 @@ ranges = <0x00000000 0xf0000000 0x10000000>; + core_clk: core-clk@0 { + compatible = "snps,hsdk-core-pll-clock"; + reg = <0x00 0x10>, <0x14B8 0x4>; + #clock-cells = <0>; + clocks = <&input_clk>; + }; + serial: serial@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 5a6ed5afb009..bd08de4be75e 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -6,4 +6,5 @@ # menuconfig ARC_SOC_HSDK - bool "ARC HS Development Kit SOC" + bool "ARC HS Development Kit SOC" + select CLK_HSDK -- cgit v1.2.3-70-g09d2 From bd6d3588c834e3087ad0229ff0da651bbccf5e24 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 11 Sep 2017 09:48:46 -0700 Subject: ARC: [plat-eznps] Update platform maintainer as Noam left Signed-off-by: Vineet Gupta --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 65b0c88d5ee0..42bfa57673ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5259,7 +5259,8 @@ S: Maintained F: drivers/iommu/exynos-iommu.c EZchip NPS platform support -M: Noam Camus +M: Elad Kanfi +M: Vineet Gupta S: Supported F: arch/arc/plat-eznps F: arch/arc/boot/dts/eznps.dts -- cgit v1.2.3-70-g09d2 From d9bc84a808572451f95fb1dde80cb8d12be05665 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 Sep 2017 20:25:30 +0900 Subject: arc: remove redundant UTS_MACHINE define in arch/arc/Makefile The top-level Makefile sets the default of UTS_MACHINE to $(ARCH). If ARCH and UTS_MACHINE match, arch/$(ARCH)/Makefile need not specify UTS_MACHINE explicitly. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 3a4b52b7e09d..d37f49d6a27f 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,8 +6,6 @@ # published by the Free Software Foundation. # -UTS_MACHINE := arc - ifeq ($(CROSS_COMPILE),) ifndef CONFIG_CPU_BIG_ENDIAN CROSS_COMPILE := arc-linux- -- cgit v1.2.3-70-g09d2 From 010a8c98884f4ca42a167c9b51470c624daa2932 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 21 Sep 2017 17:46:38 -0700 Subject: ARC: boot log: decontaminate ARCv2 ISA_CONFIG register ARCv2 ISA_CONFIG and ARC700_BUILD build config registers are not compatible. cpuinfo_arc had isa info placeholder which was mashup of bits form both. Untangle this by defining it off of ARCv2 ISA info and it is fine even for ARC700 since former is a super set of latter (ARC700 buildonly has 2 bits for atomics and stack check). At runtime, we treat ARCv2 ISA info as a generic placeholder but populate it correctly depending on ARC700 or HS. This paves way for adding more HS specific bits in isa info which was colliding with the extra bits for arc700. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 8 ++++---- arch/arc/kernel/setup.c | 15 +++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index ba8e802dba80..b71d84873f7d 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -135,12 +135,12 @@ struct bcr_identity { #endif }; -struct bcr_isa { +struct bcr_isa_arcv2 { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int div_rem:4, pad2:4, ldd:1, unalign:1, atomic:1, be:1, - pad1:11, atomic1:1, ver:8; + pad1:12, ver:8; #else - unsigned int ver:8, atomic1:1, pad1:11, be:1, atomic:1, unalign:1, + unsigned int ver:8, pad1:12, be:1, atomic:1, unalign:1, ldd:1, pad2:4, div_rem:4; #endif }; @@ -263,7 +263,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_mmu mmu; struct cpuinfo_arc_bpu bpu; struct bcr_identity core; - struct bcr_isa isa; + struct bcr_isa_arcv2 isa; const char *details, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 877cec8f5ea2..228593a964f8 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -119,11 +119,11 @@ static void read_arc_build_cfg_regs(void) struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; const struct id_to_str *tbl; + struct bcr_isa_arcv2 isa; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); - READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { if (cpu->core.family == tbl->id) { @@ -205,18 +205,25 @@ static void read_arc_build_cfg_regs(void) cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; + READ_BCR(ARC_REG_ISA_CFG_BCR, isa); + /* some hacks for lack of feature BCR info in old ARC700 cores */ if (is_isa_arcompact()) { - if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + if (!isa.ver) /* ISA BCR absent, use Kconfig info */ cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - else - cpu->isa.atomic = cpu->isa.atomic1; + else { + /* ARC700_BUILD only has 2 bits of isa info */ + struct bcr_generic bcr = *(struct bcr_generic *)&isa; + cpu->isa.atomic = bcr.info & 1; + } cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); /* there's no direct way to distinguish 750 vs. 770 */ if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) cpu->name = "ARC750"; + } else { + cpu->isa = isa; } } -- cgit v1.2.3-70-g09d2 From dea8252059a3210340f255bf69d67225b9af552d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 21 Sep 2017 18:02:44 -0700 Subject: ARCv2: boot log: identify HS48 cores (dual issue) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 3 ++- arch/arc/kernel/setup.c | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index b71d84873f7d..b1c56d35f2a9 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -98,6 +98,7 @@ /* Auxiliary registers */ #define AUX_IDENTITY 4 +#define AUX_EXEC_CTRL 8 #define AUX_INTR_VEC_BASE 0x25 #define AUX_VOL 0x5e @@ -269,7 +270,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, - fpu_sp:1, fpu_dp:1, pad2:6, + fpu_sp:1, fpu_dp:1, dual_iss_enb:1, dual_iss_exist:1, pad2:4, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 228593a964f8..fb83844daeea 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -51,6 +51,7 @@ static const struct id_to_str arc_cpu_rel[] = { { 0x51, "R2.0" }, { 0x52, "R2.1" }, { 0x53, "R3.0" }, + { 0x54, "R4.0" }, #endif { 0x00, NULL } }; @@ -62,6 +63,7 @@ static const struct id_to_str arc_cpu_nm[] = { #else { 0x40, "ARC EM" }, { 0x50, "ARC HS38" }, + { 0x54, "ARC HS48" }, #endif { 0x00, "Unknown" } }; @@ -133,7 +135,7 @@ static void read_arc_build_cfg_regs(void) } for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { - if ((cpu->core.family & 0xF0) == tbl->id) + if ((cpu->core.family & 0xF4) == tbl->id) break; } cpu->name = tbl->str; @@ -192,6 +194,14 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.full = bpu.ft; cpu->bpu.num_cache = 256 << bpu.bce; cpu->bpu.num_pred = 2048 << bpu.pte; + + if (cpu->core.family >= 0x54) { + unsigned int exec_ctrl; + + READ_BCR(AUX_EXEC_CTRL, exec_ctrl); + cpu->extn.dual_iss_exist = 1; + cpu->extn.dual_iss_enb = exec_ctrl & 1; + } } READ_BCR(ARC_REG_AP_BCR, bcr); @@ -239,10 +249,11 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n", + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n", cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", - IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); + IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), + IS_AVAIL3(cpu->extn.dual_iss_exist, cpu->extn.dual_iss_enb, " Dual-Issue")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), -- cgit v1.2.3-70-g09d2 From 5464d03d92601ac2977ef605b0cbb33276567daf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 29 Sep 2017 14:46:50 -0700 Subject: ARC: fix allnoconfig build warning Reported-by: Dmitrii Kolesnichenko Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index a598641eed98..c84e67fdea09 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -24,7 +24,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select HAVE_FUTEX_CMPXCHG + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_KRETPROBES -- cgit v1.2.3-70-g09d2 From edb40d74c08edfd049cbba15479dadd9aeb7d307 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 28 Sep 2017 17:33:29 +0300 Subject: ARC: [plat-hsdk]: Temporary fix to set CPU frequency to 1GHz Add temporary fix to HSDK platform code to setup CPU frequency to 1GHz on early boot. We can remove this fix when smart hsdk pll driver will be introduced, see discussion: https://www.mail-archive.com/linux-snps-arc@lists.infradead.org/msg02689.html Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/platform.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index a2e7fd17e36d..744e62e58788 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,6 +38,42 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) +#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) +#define CREG_CORE_IF_CLK_DIV_2 0x1 +#define CGU_BASE ARC_PERIPHERAL_BASE +#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) +#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) +#define CGU_PLL_STATUS_LOCK BIT(0) +#define CGU_PLL_STATUS_ERR BIT(1) +#define CGU_PLL_CTRL_1GHZ 0x3A10 +#define HSDK_PLL_LOCK_TIMEOUT 500 + +#define HSDK_PLL_LOCKED() \ + !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) + +#define HSDK_PLL_ERR() \ + !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) + +static void __init hsdk_set_cpu_freq_1ghz(void) +{ + u32 timeout = HSDK_PLL_LOCK_TIMEOUT; + + /* + * As we set cpu clock which exceeds 500MHz, the divider for the interface + * clock must be programmed to div-by-2. + */ + iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); + + /* Set cpu clock to 1GHz */ + iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); + + while (!HSDK_PLL_LOCKED() && timeout--) + cpu_relax(); + + if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) + pr_err("Failed to setup CPU frequency to 1GHz!"); +} + static void __init hsdk_init_early(void) { /* @@ -52,6 +88,12 @@ static void __init hsdk_init_early(void) /* Really apply settings made above */ writel(1, (void __iomem *) CREG_PAE_UPDATE); + + /* + * Setup CPU frequency to 1GHz. + * TODO: remove it after smart hsdk pll driver will be introduced. + */ + hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { -- cgit v1.2.3-70-g09d2 From 52bfcdd7adbc26639bc7b2356ab9a3f5dad68ad6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 18 Sep 2017 09:41:18 -0700 Subject: xfs: always swap the cow forks when swapping extents Since the CoW fork exists as a secondary data structure to the data fork, we must always swap cow forks during swapext. We also need to swap the extent counts and reset the cowblocks tags. Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index bc6c6e10a969..e9db7fc95b70 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -2122,11 +2122,31 @@ xfs_swap_extents( ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; + } + + /* Swap the cow forks. */ + if (xfs_sb_version_hasreflink(&mp->m_sb)) { + xfs_extnum_t extnum; + + ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); + ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); + + extnum = ip->i_cnextents; + ip->i_cnextents = tip->i_cnextents; + tip->i_cnextents = extnum; + cowfp = ip->i_cowfp; ip->i_cowfp = tip->i_cowfp; tip->i_cowfp = cowfp; - xfs_inode_set_cowblocks_tag(ip); - xfs_inode_set_cowblocks_tag(tip); + + if (ip->i_cowfp && ip->i_cnextents) + xfs_inode_set_cowblocks_tag(ip); + else + xfs_inode_clear_cowblocks_tag(ip); + if (tip->i_cowfp && tip->i_cnextents) + xfs_inode_set_cowblocks_tag(tip); + else + xfs_inode_clear_cowblocks_tag(tip); } xfs_trans_log_inode(tp, ip, src_log_flags); -- cgit v1.2.3-70-g09d2 From e12199f85d0ad1b04ce6c425ad93cd847fe930bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Oct 2017 08:58:33 -0700 Subject: xfs: handle racy AIO in xfs_reflink_end_cow If we got two AIO writes into a COW area the second one might not have any COW extents left to convert. Handle that case gracefully instead of triggering an assert or accessing beyond the bounds of the extent list. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_reflink.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3246815c24d6..37e603bf1591 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -736,7 +736,13 @@ xfs_reflink_end_cow( /* If there is a hole at end_fsb - 1 go to the previous extent */ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || got.br_startoff > end_fsb) { - ASSERT(idx > 0); + /* + * In case of racing, overlapping AIO writes no COW extents + * might be left by the time I/O completes for the loser of + * the race. In that case we are done. + */ + if (idx <= 0) + goto out_cancel; xfs_iext_get_extent(ifp, --idx, &got); } @@ -809,6 +815,7 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); +out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: -- cgit v1.2.3-70-g09d2 From 3dd40cb320fee7c23b574ab821ce140ccd1281c9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 3 Oct 2017 20:10:36 -0500 Subject: objtool: Upgrade libelf-devel warning to error for CONFIG_ORC_UNWINDER With CONFIG_ORC_UNWINDER, if the user doesn't have libelf-devel installed, and they don't see the make warning, their ORC unwinder will be silently broken. Upgrade the warning to an error. Reported-and-tested-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d9dfc39fb8240998820f9efb233d283a1ee96084.1507079417.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf007a31d575..bc5c79e8e3cf 100644 --- a/Makefile +++ b/Makefile @@ -933,7 +933,11 @@ ifdef CONFIG_STACK_VALIDATION ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else - $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + ifdef CONFIG_ORC_UNWINDER + $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + else + $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") + endif SKIP_STACK_VALIDATION := 1 export SKIP_STACK_VALIDATION endif -- cgit v1.2.3-70-g09d2 From 57e7ba04d422c3d41c8426380303ec9b7533ded9 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 19 Sep 2017 09:39:08 -0700 Subject: lsm: fix smack_inode_removexattr and xattr_getsecurity memleak security_inode_getsecurity() provides the text string value of a security attribute. It does not provide a "secctx". The code in xattr_getsecurity() that calls security_inode_getsecurity() and then calls security_release_secctx() happened to work because SElinux and Smack treat the attribute and the secctx the same way. It fails for cap_inode_getsecurity(), because that module has no secctx that ever needs releasing. It turns out that Smack is the one that's doing things wrong by not allocating memory when instructed to do so by the "alloc" parameter. The fix is simple enough. Change the security_release_secctx() to kfree() because it isn't a secctx being returned by security_inode_getsecurity(). Change Smack to allocate the string when told to do so. Note: this also fixes memory leaks for LSMs which implement inode_getsecurity but not release_secctx, such as capabilities. Signed-off-by: Casey Schaufler Reported-by: Konstantin Khlebnikov Cc: stable@vger.kernel.org Signed-off-by: James Morris --- fs/xattr.c | 2 +- security/smack/smack_lsm.c | 55 +++++++++++++++++++++------------------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 4424f7fecf14..61cd28ba25f3 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -250,7 +250,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value, } memcpy(value, buffer, len); out: - security_release_secctx(buffer, len); + kfree(buffer); out_noalloc: return len; } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 319add31b4a4..286171a16ed2 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1473,7 +1473,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name) * @inode: the object * @name: attribute name * @buffer: where to put the result - * @alloc: unused + * @alloc: duplicate memory * * Returns the size of the attribute or an error code */ @@ -1486,43 +1486,38 @@ static int smack_inode_getsecurity(struct inode *inode, struct super_block *sbp; struct inode *ip = (struct inode *)inode; struct smack_known *isp; - int ilen; - int rc = 0; - if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) { + if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) isp = smk_of_inode(inode); - ilen = strlen(isp->smk_known); - *buffer = isp->smk_known; - return ilen; - } + else { + /* + * The rest of the Smack xattrs are only on sockets. + */ + sbp = ip->i_sb; + if (sbp->s_magic != SOCKFS_MAGIC) + return -EOPNOTSUPP; - /* - * The rest of the Smack xattrs are only on sockets. - */ - sbp = ip->i_sb; - if (sbp->s_magic != SOCKFS_MAGIC) - return -EOPNOTSUPP; + sock = SOCKET_I(ip); + if (sock == NULL || sock->sk == NULL) + return -EOPNOTSUPP; - sock = SOCKET_I(ip); - if (sock == NULL || sock->sk == NULL) - return -EOPNOTSUPP; - - ssp = sock->sk->sk_security; + ssp = sock->sk->sk_security; - if (strcmp(name, XATTR_SMACK_IPIN) == 0) - isp = ssp->smk_in; - else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) - isp = ssp->smk_out; - else - return -EOPNOTSUPP; + if (strcmp(name, XATTR_SMACK_IPIN) == 0) + isp = ssp->smk_in; + else if (strcmp(name, XATTR_SMACK_IPOUT) == 0) + isp = ssp->smk_out; + else + return -EOPNOTSUPP; + } - ilen = strlen(isp->smk_known); - if (rc == 0) { - *buffer = isp->smk_known; - rc = ilen; + if (alloc) { + *buffer = kstrdup(isp->smk_known, GFP_KERNEL); + if (*buffer == NULL) + return -ENOMEM; } - return rc; + return strlen(isp->smk_known); } -- cgit v1.2.3-70-g09d2 From de3ee99b097dd51938276e3af388cd4ad0f2750a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Sep 2017 10:56:14 +0200 Subject: mmc: Delete bounce buffer handling In may, Steven sent a patch deleting the bounce buffer handling and the CONFIG_MMC_BLOCK_BOUNCE option. I chose the less invasive path of making it a runtime config option, and we merged that successfully for kernel v4.12. The code is however just standing in the way and taking up space for seemingly no gain on any systems in wide use today. Pierre says the code was there to improve speed on TI SDHCI controllers on certain HP laptops and possibly some Ricoh controllers as well. Early SDHCI controllers lacked the scatter-gather feature, which made software bounce buffers a significant speed boost. We are clearly talking about the list of SDHCI PCI-based MMC/SD card readers found in the pci_ids[] list in drivers/mmc/host/sdhci-pci-core.c. The TI SDHCI derivative is not supported by the upstream kernel. This leaves the Ricoh. What we can however notice is that the x86 defconfigs in the kernel did not enable CONFIG_MMC_BLOCK_BOUNCE option, which means that any such laptop would have to have a custom configured kernel to actually take advantage of this bounce buffer speed-up. It simply seems like there was a speed optimization for the Ricoh controllers that noone was using. (I have not checked the distro defconfigs but I am pretty sure the situation is the same there.) Bounce buffers increased performance on the OMAP HSMMC at one point, and was part of the original submission in commit a45c6cb81647 ("[ARM] 5369/1: omap mmc: Add new omap hsmmc controller for 2430 and 34xx, v3") This optimization was removed in commit 0ccd76d4c236 ("omap_hsmmc: Implement scatter-gather emulation") which found that scatter-gather emulation provided even better performance. The same was introduced for SDHCI in commit 2134a922c6e7 ("sdhci: scatter-gather (ADMA) support") I am pretty positively convinced that software scatter-gather emulation will do for any host controller what the bounce buffers were doing. Essentially, the bounce buffer was a reimplementation of software scatter-gather-emulation in the MMC subsystem, and it should be done away with. Cc: Pierre Ossman Cc: Juha Yrjola Cc: Steven J. Hill Cc: Shawn Lin Cc: Adrian Hunter Suggested-by: Steven J. Hill Suggested-by: Shawn Lin Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 -- drivers/mmc/core/queue.c | 125 ++++------------------------------------------ drivers/mmc/core/queue.h | 6 --- drivers/mmc/host/cavium.c | 2 +- drivers/mmc/host/pxamci.c | 6 +-- include/linux/mmc/host.h | 2 +- 6 files changed, 12 insertions(+), 132 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 29fc1e662891..2ad7b5c69156 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1634,8 +1634,6 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, } mqrq->areq.mrq = &brq->mrq; - - mmc_queue_bounce_pre(mqrq); } static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, @@ -1829,7 +1827,6 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) brq = &mq_rq->brq; old_req = mmc_queue_req_to_req(mq_rq); type = rq_data_dir(old_req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; - mmc_queue_bounce_post(mq_rq); switch (status) { case MMC_BLK_SUCCESS: diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 74c663b1c0a7..0a4e77a5ba33 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -23,8 +23,6 @@ #include "core.h" #include "card.h" -#define MMC_QUEUE_BOUNCESZ 65536 - /* * Prepare a MMC request. This just filters out odd stuff. */ @@ -150,26 +148,6 @@ static void mmc_queue_setup_discard(struct request_queue *q, queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q); } -static unsigned int mmc_queue_calc_bouncesz(struct mmc_host *host) -{ - unsigned int bouncesz = MMC_QUEUE_BOUNCESZ; - - if (host->max_segs != 1 || (host->caps & MMC_CAP_NO_BOUNCE_BUFF)) - return 0; - - if (bouncesz > host->max_req_size) - bouncesz = host->max_req_size; - if (bouncesz > host->max_seg_size) - bouncesz = host->max_seg_size; - if (bouncesz > host->max_blk_count * 512) - bouncesz = host->max_blk_count * 512; - - if (bouncesz <= 512) - return 0; - - return bouncesz; -} - /** * mmc_init_request() - initialize the MMC-specific per-request data * @q: the request queue @@ -184,26 +162,9 @@ static int mmc_init_request(struct request_queue *q, struct request *req, struct mmc_card *card = mq->card; struct mmc_host *host = card->host; - if (card->bouncesz) { - mq_rq->bounce_buf = kmalloc(card->bouncesz, gfp); - if (!mq_rq->bounce_buf) - return -ENOMEM; - if (card->bouncesz > 512) { - mq_rq->sg = mmc_alloc_sg(1, gfp); - if (!mq_rq->sg) - return -ENOMEM; - mq_rq->bounce_sg = mmc_alloc_sg(card->bouncesz / 512, - gfp); - if (!mq_rq->bounce_sg) - return -ENOMEM; - } - } else { - mq_rq->bounce_buf = NULL; - mq_rq->bounce_sg = NULL; - mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); - if (!mq_rq->sg) - return -ENOMEM; - } + mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp); + if (!mq_rq->sg) + return -ENOMEM; return 0; } @@ -212,13 +173,6 @@ static void mmc_exit_request(struct request_queue *q, struct request *req) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); - /* It is OK to kfree(NULL) so this will be smooth */ - kfree(mq_rq->bounce_sg); - mq_rq->bounce_sg = NULL; - - kfree(mq_rq->bounce_buf); - mq_rq->bounce_buf = NULL; - kfree(mq_rq->sg); mq_rq->sg = NULL; } @@ -242,12 +196,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT; - /* - * mmc_init_request() depends on card->bouncesz so it must be calculated - * before blk_init_allocated_queue() starts allocating requests. - */ - card->bouncesz = mmc_queue_calc_bouncesz(host); - mq->card = card; mq->queue = blk_alloc_queue(GFP_KERNEL); if (!mq->queue) @@ -271,17 +219,11 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_can_erase(card)) mmc_queue_setup_discard(mq->queue, card); - if (card->bouncesz) { - blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512); - blk_queue_max_segments(mq->queue, card->bouncesz / 512); - blk_queue_max_segment_size(mq->queue, card->bouncesz); - } else { - blk_queue_bounce_limit(mq->queue, limit); - blk_queue_max_hw_sectors(mq->queue, - min(host->max_blk_count, host->max_req_size / 512)); - blk_queue_max_segments(mq->queue, host->max_segs); - blk_queue_max_segment_size(mq->queue, host->max_seg_size); - } + blk_queue_bounce_limit(mq->queue, limit); + blk_queue_max_hw_sectors(mq->queue, + min(host->max_blk_count, host->max_req_size / 512)); + blk_queue_max_segments(mq->queue, host->max_segs); + blk_queue_max_segment_size(mq->queue, host->max_seg_size); sema_init(&mq->thread_sem, 1); @@ -370,56 +312,7 @@ void mmc_queue_resume(struct mmc_queue *mq) */ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq) { - unsigned int sg_len; - size_t buflen; - struct scatterlist *sg; struct request *req = mmc_queue_req_to_req(mqrq); - int i; - - if (!mqrq->bounce_buf) - return blk_rq_map_sg(mq->queue, req, mqrq->sg); - - sg_len = blk_rq_map_sg(mq->queue, req, mqrq->bounce_sg); - - mqrq->bounce_sg_len = sg_len; - - buflen = 0; - for_each_sg(mqrq->bounce_sg, sg, sg_len, i) - buflen += sg->length; - - sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen); - - return 1; -} - -/* - * If writing, bounce the data to the buffer before the request - * is sent to the host driver - */ -void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq) -{ - if (!mqrq->bounce_buf) - return; - - if (rq_data_dir(mmc_queue_req_to_req(mqrq)) != WRITE) - return; - - sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); -} - -/* - * If reading, bounce the data from the buffer after the request - * has been handled by the host driver - */ -void mmc_queue_bounce_post(struct mmc_queue_req *mqrq) -{ - if (!mqrq->bounce_buf) - return; - - if (rq_data_dir(mmc_queue_req_to_req(mqrq)) != READ) - return; - sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len, - mqrq->bounce_buf, mqrq->sg[0].length); + return blk_rq_map_sg(mq->queue, req, mqrq->sg); } diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 04fc89360a7a..f18d3f656baa 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -49,9 +49,6 @@ enum mmc_drv_op { struct mmc_queue_req { struct mmc_blk_request brq; struct scatterlist *sg; - char *bounce_buf; - struct scatterlist *bounce_sg; - unsigned int bounce_sg_len; struct mmc_async_req areq; enum mmc_drv_op drv_op; int drv_op_result; @@ -81,11 +78,8 @@ extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, extern void mmc_cleanup_queue(struct mmc_queue *); extern void mmc_queue_suspend(struct mmc_queue *); extern void mmc_queue_resume(struct mmc_queue *); - extern unsigned int mmc_queue_map_sg(struct mmc_queue *, struct mmc_queue_req *); -extern void mmc_queue_bounce_pre(struct mmc_queue_req *); -extern void mmc_queue_bounce_post(struct mmc_queue_req *); extern int mmc_access_rpmb(struct mmc_queue *); diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index 27fb625cbcf3..fbd29f00fca0 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -1038,7 +1038,7 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) */ mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD | - MMC_CAP_3_3V_DDR | MMC_CAP_NO_BOUNCE_BUFF; + MMC_CAP_3_3V_DDR; if (host->use_sg) mmc->max_segs = 16; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 59ab194cb009..c763b404510f 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -702,11 +702,7 @@ static int pxamci_probe(struct platform_device *pdev) pxamci_init_ocr(host); - /* - * This architecture used to disable bounce buffers through its - * defconfig, now it is done at runtime as a host property. - */ - mmc->caps = MMC_CAP_NO_BOUNCE_BUFF; + mmc->caps = 0; host->cmdat = 0; if (!cpu_is_pxa25x()) { mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f3f2d07feb2a..9a43763a68ad 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -316,7 +316,7 @@ struct mmc_host { #define MMC_CAP_UHS_SDR50 (1 << 18) /* Host supports UHS SDR50 mode */ #define MMC_CAP_UHS_SDR104 (1 << 19) /* Host supports UHS SDR104 mode */ #define MMC_CAP_UHS_DDR50 (1 << 20) /* Host supports UHS DDR50 mode */ -#define MMC_CAP_NO_BOUNCE_BUFF (1 << 21) /* Disable bounce buffers on host */ +/* (1 << 21) is free for reuse */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ -- cgit v1.2.3-70-g09d2 From f450f28e70a2378d9d6ded0932fe480055888cfa Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Fri, 22 Sep 2017 13:42:47 -0500 Subject: reset: socfpga: fix for 64-bit compilation The SoCFPGA Stratix10 reset controller has 32-bit registers. Thus, we cannot use BITS_PER_LONG in computing the register and bit offset. Instead, we should be using the width of the hardware register for the calculation. Signed-off-by: Dinh Nguyen Signed-off-by: Philipp Zabel --- drivers/reset/reset-socfpga.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index c60904ff40b8..3907bbc9c6cf 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -40,8 +40,9 @@ static int socfpga_reset_assert(struct reset_controller_dev *rcdev, struct socfpga_reset_data *data = container_of(rcdev, struct socfpga_reset_data, rcdev); - int bank = id / BITS_PER_LONG; - int offset = id % BITS_PER_LONG; + int reg_width = sizeof(u32); + int bank = id / (reg_width * BITS_PER_BYTE); + int offset = id % (reg_width * BITS_PER_BYTE); unsigned long flags; u32 reg; @@ -61,8 +62,9 @@ static int socfpga_reset_deassert(struct reset_controller_dev *rcdev, struct socfpga_reset_data, rcdev); - int bank = id / BITS_PER_LONG; - int offset = id % BITS_PER_LONG; + int reg_width = sizeof(u32); + int bank = id / (reg_width * BITS_PER_BYTE); + int offset = id % (reg_width * BITS_PER_BYTE); unsigned long flags; u32 reg; @@ -81,8 +83,9 @@ static int socfpga_reset_status(struct reset_controller_dev *rcdev, { struct socfpga_reset_data *data = container_of(rcdev, struct socfpga_reset_data, rcdev); - int bank = id / BITS_PER_LONG; - int offset = id % BITS_PER_LONG; + int reg_width = sizeof(u32); + int bank = id / (reg_width * BITS_PER_BYTE); + int offset = id % (reg_width * BITS_PER_BYTE); u32 reg; reg = readl(data->membase + (bank * BANK_INCREMENT)); @@ -132,7 +135,7 @@ static int socfpga_reset_probe(struct platform_device *pdev) spin_lock_init(&data->lock); data->rcdev.owner = THIS_MODULE; - data->rcdev.nr_resets = NR_BANKS * BITS_PER_LONG; + data->rcdev.nr_resets = NR_BANKS * (sizeof(u32) * BITS_PER_BYTE); data->rcdev.ops = &socfpga_reset_ops; data->rcdev.of_node = pdev->dev.of_node; -- cgit v1.2.3-70-g09d2 From ca3dcd3ff5b13a31a09a0119dc484b97ec19c4c8 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 2 Oct 2017 14:27:41 +0200 Subject: mmc: meson-gx: make sure the clock is rounded down Using CLK_DIVIDER_ROUND_CLOSEST is unsafe as the mmc clock could be rounded to a rate higher the specified rate. Removing this flag ensure that, if the rate needs to be rounded, it will be rounded down. Fixes: 51c5d8447bd7 ("MMC: meson: initial support for GX platforms") Signed-off-by: Jerome Brunet Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index c885c2d4b904..421c8719c202 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -531,8 +531,7 @@ static int meson_mmc_clk_init(struct meson_host *host) div->shift = __ffs(CLK_DIV_MASK); div->width = __builtin_popcountl(CLK_DIV_MASK); div->hw.init = &init; - div->flags = (CLK_DIVIDER_ONE_BASED | - CLK_DIVIDER_ROUND_CLOSEST); + div->flags = CLK_DIVIDER_ONE_BASED; clk = devm_clk_register(host->dev, &div->hw); if (WARN_ON(IS_ERR(clk))) -- cgit v1.2.3-70-g09d2 From 3e2b0af411d4bf85bc0fbc385756fd5968adb9fd Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 2 Oct 2017 14:27:42 +0200 Subject: mmc: meson-gx: fix rx phase reset Resetting the phase when POWER_ON is set the set_ios() call means that the phase is reset almost every time the set_ios() is called, while the expected behavior was to reset the phase on a power cycle. This had gone unnoticed until now because in all mode (except hs400) the tuning is done after the last to set_ios(). In such case, the tuning result is used anyway. In HS400, there are a few calls to set_ios() after the tuning is done, overwriting the tuning result. Resetting the phase on POWER_UP instead of POWER_ON solve the problem. Fixes: d341ca88eead ("mmc: meson-gx: rework tuning function") Signed-off-by: Jerome Brunet Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 421c8719c202..08a55c2e96e1 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -745,6 +745,10 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) case MMC_POWER_UP: if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); + + /* Reset rx phase */ + clk_set_phase(host->rx_clk, 0); + break; case MMC_POWER_ON: @@ -758,8 +762,6 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->vqmmc_enabled = true; } - /* Reset rx phase */ - clk_set_phase(host->rx_clk, 0); break; } -- cgit v1.2.3-70-g09d2 From 0a44697627d17a66d7dc98f17aeca07ca79c5c20 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 2 Oct 2017 14:27:43 +0200 Subject: mmc: meson-gx: include tx phase in the tuning process It has been reported that some platforms (odroid-c2) may require a different tx phase setting to operate at high speed (hs200 and hs400) To improve the situation, this patch includes tx phase in the tuning process. Fixes: d341ca88eead ("mmc: meson-gx: rework tuning function") Reported-by: Heiner Kallweit Signed-off-by: Jerome Brunet Reviewed-by: Kevin Hilman Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 08a55c2e96e1..85745ef179e2 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -716,6 +716,22 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode, static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct meson_host *host = mmc_priv(mmc); + int ret; + + /* + * If this is the initial tuning, try to get a sane Rx starting + * phase before doing the actual tuning. + */ + if (!mmc->doing_retune) { + ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); + + if (ret) + return ret; + } + + ret = meson_mmc_clk_phase_tuning(mmc, opcode, host->tx_clk); + if (ret) + return ret; return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); } @@ -746,8 +762,9 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); - /* Reset rx phase */ + /* Reset phases */ clk_set_phase(host->rx_clk, 0); + clk_set_phase(host->tx_clk, 270); break; -- cgit v1.2.3-70-g09d2 From bb16ea1742c8f35a9349b7508dc45d3a922db5f5 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 2 Oct 2017 16:58:52 +0200 Subject: mmc: sdhci-xenon: Fix clock resource by adding an optional bus clock On Armada 7K/8K we need to explicitly enable the bus clock. The bus clock is optional because not all the SoCs need them but at least for Armada 7K/8K it is actually mandatory. The binding documentation is updating accordingly. Without this patch the kernel hand during boot if the mvpp2.2 network driver was not present in the kernel. Indeed the clock needed by the xenon controller was set by the network driver. Fixes: 3a3748dba881 ("mmc: sdhci-xenon: Add Marvell Xenon SDHC core functionality)" CC: Stable Tested-by: Zhoujie Wu Signed-off-by: Gregory CLEMENT Signed-off-by: Ulf Hansson --- .../bindings/mmc/marvell,xenon-sdhci.txt | 12 ++++++----- drivers/mmc/host/sdhci-xenon.c | 24 ++++++++++++++++++---- drivers/mmc/host/sdhci-xenon.h | 1 + 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt index b878a1e305af..ed1456f5c94d 100644 --- a/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/marvell,xenon-sdhci.txt @@ -16,11 +16,13 @@ Required Properties: - clocks: Array of clocks required for SDHC. - Require at least input clock for Xenon IP core. + Require at least input clock for Xenon IP core. For Armada AP806 and + CP110, the AXI clock is also mandatory. - clock-names: Array of names corresponding to clocks property. The input clock for Xenon IP core should be named as "core". + The input clock for the AXI bus must be named as "axi". - reg: * For "marvell,armada-3700-sdhci", two register areas. @@ -106,8 +108,8 @@ Example: compatible = "marvell,armada-ap806-sdhci"; reg = <0xaa0000 0x1000>; interrupts = - clocks = <&emmc_clk>; - clock-names = "core"; + clocks = <&emmc_clk>,<&axi_clk>; + clock-names = "core", "axi"; bus-width = <4>; marvell,xenon-phy-slow-mode; marvell,xenon-tun-count = <11>; @@ -126,8 +128,8 @@ Example: interrupts = vqmmc-supply = <&sd_vqmmc_regulator>; vmmc-supply = <&sd_vmmc_regulator>; - clocks = <&sdclk>; - clock-names = "core"; + clocks = <&sdclk>, <&axi_clk>; + clock-names = "core", "axi"; bus-width = <4>; marvell,xenon-tun-count = <9>; }; diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 2eec2e652c53..0842bbc2d7ad 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -466,6 +466,7 @@ static int xenon_probe(struct platform_device *pdev) { struct sdhci_pltfm_host *pltfm_host; struct sdhci_host *host; + struct xenon_priv *priv; int err; host = sdhci_pltfm_init(pdev, &sdhci_xenon_pdata, @@ -474,6 +475,7 @@ static int xenon_probe(struct platform_device *pdev) return PTR_ERR(host); pltfm_host = sdhci_priv(host); + priv = sdhci_pltfm_priv(pltfm_host); /* * Link Xenon specific mmc_host_ops function, @@ -491,9 +493,20 @@ static int xenon_probe(struct platform_device *pdev) if (err) goto free_pltfm; + priv->axi_clk = devm_clk_get(&pdev->dev, "axi"); + if (IS_ERR(priv->axi_clk)) { + err = PTR_ERR(priv->axi_clk); + if (err == -EPROBE_DEFER) + goto err_clk; + } else { + err = clk_prepare_enable(priv->axi_clk); + if (err) + goto err_clk; + } + err = mmc_of_parse(host->mmc); if (err) - goto err_clk; + goto err_clk_axi; sdhci_get_of_property(pdev); @@ -502,11 +515,11 @@ static int xenon_probe(struct platform_device *pdev) /* Xenon specific dt parse */ err = xenon_probe_dt(pdev); if (err) - goto err_clk; + goto err_clk_axi; err = xenon_sdhc_prepare(host); if (err) - goto err_clk; + goto err_clk_axi; pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); @@ -527,6 +540,8 @@ remove_sdhc: pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); xenon_sdhc_unprepare(host); +err_clk_axi: + clk_disable_unprepare(priv->axi_clk); err_clk: clk_disable_unprepare(pltfm_host->clk); free_pltfm: @@ -538,6 +553,7 @@ static int xenon_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); pm_runtime_get_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -546,7 +562,7 @@ static int xenon_remove(struct platform_device *pdev) sdhci_remove_host(host, 0); xenon_sdhc_unprepare(host); - + clk_disable_unprepare(priv->axi_clk); clk_disable_unprepare(pltfm_host->clk); sdhci_pltfm_free(pdev); diff --git a/drivers/mmc/host/sdhci-xenon.h b/drivers/mmc/host/sdhci-xenon.h index 2bc0510c0769..9994995c7c56 100644 --- a/drivers/mmc/host/sdhci-xenon.h +++ b/drivers/mmc/host/sdhci-xenon.h @@ -83,6 +83,7 @@ struct xenon_priv { unsigned char bus_width; unsigned char timing; unsigned int clock; + struct clk *axi_clk; int phy_type; /* -- cgit v1.2.3-70-g09d2 From 6b9dc4806b28214a4a260517e59439e0ac12a15e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Oct 2017 12:34:50 +0200 Subject: watchdog/core, powerpc: Replace watchdog_nmi_reconfigure() The recent cleanup of the watchdog code split watchdog_nmi_reconfigure() into two stages. One to stop the NMI and one to restart it after reconfiguration. That was done by adding a boolean 'run' argument to the code, which is functionally correct but not necessarily a piece of art. Replace it by two explicit functions: watchdog_nmi_stop() and watchdog_nmi_start(). Fixes: 6592ad2fcc8f ("watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage") Requested-by: Linus 'Nursing his pet-peeve' Torvalds Signed-off-by: Thomas 'Mopping up garbage' Gleixner Acked-by: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710021957480.2114@nanos --- arch/powerpc/kernel/watchdog.c | 23 ++++++++++++++--------- include/linux/nmi.h | 3 ++- kernel/watchdog.c | 33 ++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dfb067764480..2673ec8bec00 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -355,19 +355,24 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_reconfigure(bool run) +void watchdog_nmi_stop(void) { int cpu; cpus_read_lock(); - if (!run) { - for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); - } else { - watchdog_calc_timeouts(); - for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); - } + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + cpus_read_unlock(); +} + +void watchdog_nmi_start(void) +{ + int cpu; + + cpus_read_lock(); + watchdog_calc_timeouts(); + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); cpus_read_unlock(); } diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 89ba8b23c6fe..0c9ed49fb21a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -109,7 +109,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } # endif #endif -void watchdog_nmi_reconfigure(bool run); +void watchdog_nmi_stop(void); +void watchdog_nmi_start(void); /** * touch_nmi_watchdog - restart NMI watchdog timeout. diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f6ef163b72cd..6ad6226535d0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -123,24 +123,27 @@ int __weak __init watchdog_nmi_probe(void) } /** - * watchdog_nmi_reconfigure - Optional function to reconfigure NMI watchdogs - * @run: If false stop the watchdogs on all enabled CPUs - * If true start the watchdogs on all enabled CPUs + * watchdog_nmi_stop - Stop the watchdog for reconfiguration * - * The core call order is: - * watchdog_nmi_reconfigure(false); + * The reconfiguration steps are: + * watchdog_nmi_stop(); * update_variables(); - * watchdog_nmi_reconfigure(true); + * watchdog_nmi_start(); + */ +void __weak watchdog_nmi_stop(void) { } + +/** + * watchdog_nmi_start - Start the watchdog after reconfiguration * - * The second call which starts the watchdogs again guarantees that the - * following variables are stable across the call. + * Counterpart to watchdog_nmi_stop(). + * + * The following variables have been updated in update_variables() and + * contain the currently valid configuration: * - watchdog_enabled * - watchdog_thresh * - watchdog_cpumask - * - * After the call the variables can be changed again. */ -void __weak watchdog_nmi_reconfigure(bool run) { } +void __weak watchdog_nmi_start(void) { } /** * lockup_detector_update_enable - Update the sysctl enable bit @@ -551,13 +554,13 @@ static void softlockup_unpark_threads(void) static void softlockup_reconfigure_threads(void) { - watchdog_nmi_reconfigure(false); + watchdog_nmi_stop(); softlockup_park_all_threads(); set_sample_period(); lockup_detector_update_enable(); if (watchdog_enabled && watchdog_thresh) softlockup_unpark_threads(); - watchdog_nmi_reconfigure(true); + watchdog_nmi_start(); } /* @@ -602,9 +605,9 @@ static inline void watchdog_disable_all_cpus(void) { } static inline void softlockup_init_threads(void) { } static void softlockup_reconfigure_threads(void) { - watchdog_nmi_reconfigure(false); + watchdog_nmi_stop(); lockup_detector_update_enable(); - watchdog_nmi_reconfigure(true); + watchdog_nmi_start(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ -- cgit v1.2.3-70-g09d2 From e31d6883f21c1cdfe5bc64e28411f8a92b783fde Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Oct 2017 16:37:53 +0200 Subject: watchdog/core, powerpc: Lock cpus across reconfiguration Instead of dropping the cpu hotplug lock after stopping NMI watchdog and threads and reaquiring for restart, the code and the protection rules become more obvious when holding cpu hotplug lock across the full reconfiguration. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710022105570.2114@nanos --- arch/powerpc/kernel/watchdog.c | 4 ---- kernel/smpboot.c | 3 +-- kernel/watchdog.c | 10 +++++++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 2673ec8bec00..f9b4c6352d24 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -359,21 +359,17 @@ void watchdog_nmi_stop(void) { int cpu; - cpus_read_lock(); for_each_cpu(cpu, &wd_cpus_enabled) stop_wd_on_cpu(cpu); - cpus_read_unlock(); } void watchdog_nmi_start(void) { int cpu; - cpus_read_lock(); watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) start_wd_on_cpu(cpu); - cpus_read_unlock(); } /* diff --git a/kernel/smpboot.c b/kernel/smpboot.c index ed7507b69b48..5043e7433f4b 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -351,7 +351,7 @@ void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread static struct cpumask tmp; unsigned int cpu; - get_online_cpus(); + lockdep_assert_cpus_held(); mutex_lock(&smpboot_threads_lock); /* Park threads that were exclusively enabled on the old mask. */ @@ -367,7 +367,6 @@ void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread cpumask_copy(old, new); mutex_unlock(&smpboot_threads_lock); - put_online_cpus(); } static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 6ad6226535d0..fff90fe10007 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -535,7 +535,6 @@ static void softlockup_update_smpboot_threads(void) smpboot_update_cpumask_percpu_thread(&watchdog_threads, &watchdog_allowed_mask); - __lockup_detector_cleanup(); } /* Temporarily park all watchdog threads */ @@ -554,6 +553,7 @@ static void softlockup_unpark_threads(void) static void softlockup_reconfigure_threads(void) { + cpus_read_lock(); watchdog_nmi_stop(); softlockup_park_all_threads(); set_sample_period(); @@ -561,6 +561,12 @@ static void softlockup_reconfigure_threads(void) if (watchdog_enabled && watchdog_thresh) softlockup_unpark_threads(); watchdog_nmi_start(); + cpus_read_unlock(); + /* + * Must be called outside the cpus locked section to prevent + * recursive locking in the perf code. + */ + __lockup_detector_cleanup(); } /* @@ -605,9 +611,11 @@ static inline void watchdog_disable_all_cpus(void) { } static inline void softlockup_init_threads(void) { } static void softlockup_reconfigure_threads(void) { + cpus_read_lock(); watchdog_nmi_stop(); lockup_detector_update_enable(); watchdog_nmi_start(); + cpus_read_unlock(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ -- cgit v1.2.3-70-g09d2 From 34ddaa3e5c0096fef52485186c7eb6cf56ddc686 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Oct 2017 16:39:02 +0200 Subject: powerpc/watchdog: Make use of watchdog_nmi_probe() The rework of the core hotplug code triggers the WARN_ON in start_wd_cpu() on powerpc because it is called multiple times for the boot CPU. The first call is via: start_wd_on_cpu+0x80/0x2f0 watchdog_nmi_reconfigure+0x124/0x170 softlockup_reconfigure_threads+0x110/0x130 lockup_detector_init+0xbc/0xe0 kernel_init_freeable+0x18c/0x37c kernel_init+0x2c/0x160 ret_from_kernel_thread+0x5c/0xbc And then again via the CPU hotplug registration: start_wd_on_cpu+0x80/0x2f0 cpuhp_invoke_callback+0x194/0x620 cpuhp_thread_fun+0x7c/0x1b0 smpboot_thread_fn+0x290/0x2a0 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0xbc This can be avoided by setting up the cpu hotplug state with nocalls and move the initialization to the watchdog_nmi_probe() function. That initializes the hotplug callbacks without invoking the callback and the following core initialization function then configures the watchdog for the online CPUs (in this case CPU0) via softlockup_reconfigure_threads(). Reported-and-tested-by: Michael Ellerman Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/kernel/watchdog.c | 17 ++++++++--------- include/linux/nmi.h | 1 + kernel/watchdog.c | 5 ++++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index f9b4c6352d24..c702a8981452 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -373,22 +373,21 @@ void watchdog_nmi_start(void) } /* - * This runs after lockup_detector_init() which sets up watchdog_cpumask. + * Invoked from core watchdog init. */ -static int __init powerpc_watchdog_init(void) +int __init watchdog_nmi_probe(void) { int err; - watchdog_calc_timeouts(); - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); - if (err < 0) + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, + "powerpc/watchdog:online", + start_wd_on_cpu, stop_wd_on_cpu); + if (err < 0) { pr_warn("Watchdog could not be initialized"); - + return err; + } return 0; } -arch_initcall(powerpc_watchdog_init); static void handle_backtrace_ipi(struct pt_regs *regs) { diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0c9ed49fb21a..27e249ed7c5c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -111,6 +111,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); +int watchdog_nmi_probe(void); /** * touch_nmi_watchdog - restart NMI watchdog timeout. diff --git a/kernel/watchdog.c b/kernel/watchdog.c index fff90fe10007..5c6fb7cd9ae8 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -608,7 +608,6 @@ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } -static inline void softlockup_init_threads(void) { } static void softlockup_reconfigure_threads(void) { cpus_read_lock(); @@ -617,6 +616,10 @@ static void softlockup_reconfigure_threads(void) watchdog_nmi_start(); cpus_read_unlock(); } +static inline void softlockup_init_threads(void) +{ + softlockup_reconfigure_threads(); +} #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ static void __lockup_detector_cleanup(void) -- cgit v1.2.3-70-g09d2 From 5587185ddb4b9f413299dfec0a022ad0212513e8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Oct 2017 10:03:04 +0200 Subject: watchdog/core: Rename some softlockup_* functions The function names made sense up to the point where the watchdog (re)configuration was unified to use softlockup_reconfigure_threads() for all configuration purposes. But that includes scenarios which solely configure the nmi watchdog. Rename softlockup_reconfigure_threads() and softlockup_init_threads() so the function names match the functionality. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus --- kernel/watchdog.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5c6fb7cd9ae8..d241bd99cee1 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -551,7 +551,7 @@ static void softlockup_unpark_threads(void) softlockup_update_smpboot_threads(); } -static void softlockup_reconfigure_threads(void) +static void lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -570,13 +570,13 @@ static void softlockup_reconfigure_threads(void) } /* - * Create the watchdog thread infrastructure. + * Create the watchdog thread infrastructure and configure the detector(s). * * The threads are not unparked as watchdog_allowed_mask is empty. When * the threads are sucessfully initialized, take the proper locks and * unpark the threads in the watchdog_cpumask if the watchdog is enabled. */ -static __init void softlockup_init_threads(void) +static __init void lockup_detector_setup(void) { int ret; @@ -599,7 +599,7 @@ static __init void softlockup_init_threads(void) mutex_lock(&watchdog_mutex); softlockup_threads_initialized = true; - softlockup_reconfigure_threads(); + lockup_detector_reconfigure(); mutex_unlock(&watchdog_mutex); } @@ -608,7 +608,7 @@ static inline int watchdog_park_threads(void) { return 0; } static inline void watchdog_unpark_threads(void) { } static inline int watchdog_enable_all_cpus(void) { return 0; } static inline void watchdog_disable_all_cpus(void) { } -static void softlockup_reconfigure_threads(void) +static void lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); @@ -616,9 +616,9 @@ static void softlockup_reconfigure_threads(void) watchdog_nmi_start(); cpus_read_unlock(); } -static inline void softlockup_init_threads(void) +static inline void lockup_detector_setup(void) { - softlockup_reconfigure_threads(); + lockup_detector_reconfigure(); } #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ @@ -658,7 +658,7 @@ static void proc_watchdog_update(void) { /* Remove impossible cpus to keep sysctl output clean. */ cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); - softlockup_reconfigure_threads(); + lockup_detector_reconfigure(); } /* @@ -785,5 +785,5 @@ void __init lockup_detector_init(void) if (!watchdog_nmi_probe()) nmi_watchdog_available = true; - softlockup_init_threads(); + lockup_detector_setup(); } -- cgit v1.2.3-70-g09d2 From b42dc0635bf0a6aa59fe4d7c826796ff659908c7 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 26 Sep 2017 09:18:27 +0300 Subject: mei: always use domain runtime pm callbacks. This patch fixes a regression caused by the new changes in the "run wake" handlers. The mei devices that support D0i3 are no longer receiving an interrupt after entering runtime suspend state and will stall. pci_dev_run_wake function now returns "true" for some devices (including mei) for which it used to return "false", arguably incorrectly as "run wake" used to mean that wakeup signals can be generated for a device in the working state of the system, so it could not be enabled or disabled before too. MEI maps runtime suspend/resume to its own defined power gating (PG) states, (D0i3 or other depending on generation), hence we need to go around the native PCI runtime service which eventually brings the device into D3cold/hot state, but the mei devices cannot wake up from D3 unlike from D0i3/PG state, which keeps irq running. To get around PCI device native runtime pm, MEI uses runtime pm domain handlers which take precedence. Cc: #4.13+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 21 +++++++++++---------- drivers/misc/mei/pci-txe.c | 30 +++++++++++------------------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 4ff40d319676..630757a4b36a 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -226,12 +226,15 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; /* - * For not wake-able HW runtime pm framework - * can't be used on pci device level. - * Use domain runtime pm callbacks instead. - */ - if (!pci_dev_run_wake(pdev)) - mei_me_set_pm_domain(dev); + * ME maps runtime suspend/resume to D0i states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state, + * but the mei device cannot wake up from D3 unlike from D0i3. + * To get around the PCI device native runtime pm, + * ME uses runtime pm domain handlers which take precedence + * over the driver's pm handlers. + */ + mei_me_set_pm_domain(dev); if (mei_pg_is_enabled(dev)) pm_runtime_put_noidle(&pdev->dev); @@ -271,8 +274,7 @@ static void mei_me_shutdown(struct pci_dev *pdev) dev_dbg(&pdev->dev, "shutdown\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_me_unset_pm_domain(dev); + mei_me_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -300,8 +302,7 @@ static void mei_me_remove(struct pci_dev *pdev) dev_dbg(&pdev->dev, "stop\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_me_unset_pm_domain(dev); + mei_me_unset_pm_domain(dev); mei_disable_interrupts(dev); diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index e38a5f144373..0566f9bfa7de 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -144,12 +144,14 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; /* - * For not wake-able HW runtime pm framework - * can't be used on pci device level. - * Use domain runtime pm callbacks instead. - */ - if (!pci_dev_run_wake(pdev)) - mei_txe_set_pm_domain(dev); + * TXE maps runtime suspend/resume to own power gating states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state. + * But the TXE device cannot wake up from D3 unlike from own + * power gating. To get around PCI device native runtime pm, + * TXE uses runtime pm domain handlers which take precedence. + */ + mei_txe_set_pm_domain(dev); pm_runtime_put_noidle(&pdev->dev); @@ -186,8 +188,7 @@ static void mei_txe_shutdown(struct pci_dev *pdev) dev_dbg(&pdev->dev, "shutdown\n"); mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_txe_unset_pm_domain(dev); + mei_txe_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -215,8 +216,7 @@ static void mei_txe_remove(struct pci_dev *pdev) mei_stop(dev); - if (!pci_dev_run_wake(pdev)) - mei_txe_unset_pm_domain(dev); + mei_txe_unset_pm_domain(dev); mei_disable_interrupts(dev); free_irq(pdev->irq, dev); @@ -318,15 +318,7 @@ static int mei_txe_pm_runtime_suspend(struct device *device) else ret = -EAGAIN; - /* - * If everything is okay we're about to enter PCI low - * power state (D3) therefor we need to disable the - * interrupts towards host. - * However if device is not wakeable we do not enter - * D-low state and we need to keep the interrupt kicking - */ - if (!ret && pci_dev_run_wake(pdev)) - mei_disable_interrupts(dev); + /* keep irq on we are staying in D0 */ dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); -- cgit v1.2.3-70-g09d2 From 688cb67839e852740d22cf763e5eafb27d5a6e53 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 24 Sep 2017 11:35:34 +0300 Subject: mei: me: add gemini lake devices id Add Gemini Lake (GLK) device id. Signed-off-by: Tomas Winkler Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c8307e8b4c16..0ccccbaf530d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ + #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ #define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 630757a4b36a..78b3172c8e6e 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -93,6 +93,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, -- cgit v1.2.3-70-g09d2 From 192b2d78722ffea188e5ec6ae5d55010dce05a4b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 29 Sep 2017 21:09:36 -0700 Subject: Drivers: hv: vmbus: Fix bugs in rescind handling This patch addresses the following bugs in the current rescind handling code: 1. Fixes a race condition where we may be invoking hv_process_channel_removal() on an already freed channel. 2. Prevents indefinite wait when rescinding sub-channels by correctly setting the probe_complete state. I would like to thank Dexuan for patiently reviewing earlier versions of this patch and identifying many of the issues fixed here. Greg, please apply this to 4.14-final. Fixes: '54a66265d675 ("Drivers: hv: vmbus: Fix rescind handling")' Signed-off-by: K. Y. Srinivasan Reviewed-by: Dexuan Cui Cc: stable@vger.kernel.org # (4.13 and above) Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 6 +++--- drivers/hv/channel_mgmt.c | 37 ++++++++++++++++++------------------- drivers/hv/vmbus_drv.c | 3 +-- include/linux/hyperv.h | 2 +- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index efd5db743319..894b67ac2cae 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -640,6 +640,7 @@ void vmbus_close(struct vmbus_channel *channel) */ return; } + mutex_lock(&vmbus_connection.channel_mutex); /* * Close all the sub-channels first and then close the * primary channel. @@ -648,16 +649,15 @@ void vmbus_close(struct vmbus_channel *channel) cur_channel = list_entry(cur, struct vmbus_channel, sc_list); vmbus_close_internal(cur_channel); if (cur_channel->rescind) { - mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(cur_channel, + hv_process_channel_removal( cur_channel->offermsg.child_relid); - mutex_unlock(&vmbus_connection.channel_mutex); } } /* * Now close the primary. */ vmbus_close_internal(channel); + mutex_unlock(&vmbus_connection.channel_mutex); } EXPORT_SYMBOL_GPL(vmbus_close); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index bcbb031f7263..018d2e0f8ec5 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -159,7 +159,7 @@ static void vmbus_rescind_cleanup(struct vmbus_channel *channel) spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); - + channel->rescind = true; list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { @@ -381,14 +381,21 @@ static void vmbus_release_relid(u32 relid) true); } -void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) +void hv_process_channel_removal(u32 relid) { unsigned long flags; - struct vmbus_channel *primary_channel; + struct vmbus_channel *primary_channel, *channel; - BUG_ON(!channel->rescind); BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + /* + * Make sure channel is valid as we may have raced. + */ + channel = relid2channel(relid); + if (!channel) + return; + + BUG_ON(!channel->rescind); if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -515,6 +522,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!fnew) { if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); + newchannel->probe_done = true; return; } @@ -834,7 +842,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) { struct vmbus_channel_rescind_offer *rescind; struct vmbus_channel *channel; - unsigned long flags; struct device *dev; rescind = (struct vmbus_channel_rescind_offer *)hdr; @@ -873,16 +880,6 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) return; } - spin_lock_irqsave(&channel->lock, flags); - channel->rescind = true; - spin_unlock_irqrestore(&channel->lock, flags); - - /* - * Now that we have posted the rescind state, perform - * rescind related cleanup. - */ - vmbus_rescind_cleanup(channel); - /* * Now wait for offer handling to complete. */ @@ -901,6 +898,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); + vmbus_rescind_cleanup(channel); return; } /* @@ -909,6 +907,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) */ dev = get_device(&channel->device_obj->device); if (dev) { + vmbus_rescind_cleanup(channel); vmbus_device_unregister(channel->device_obj); put_device(dev); } @@ -921,16 +920,16 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * 1. Close all sub-channels first * 2. Then close the primary channel. */ + mutex_lock(&vmbus_connection.channel_mutex); + vmbus_rescind_cleanup(channel); if (channel->state == CHANNEL_OPEN_STATE) { /* * The channel is currently not open; * it is safe for us to cleanup the channel. */ - mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - mutex_unlock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(rescind->child_relid); } + mutex_unlock(&vmbus_connection.channel_mutex); } } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a9d49f6f6501..937801ac2fe0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -768,8 +768,7 @@ static void vmbus_device_release(struct device *device) struct vmbus_channel *channel = hv_dev->channel; mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel, - channel->offermsg.child_relid); + hv_process_channel_removal(channel->offermsg.child_relid); mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c458d7b7ad19..6431087816ba 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1403,7 +1403,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); +void hv_process_channel_removal(u32 relid); void vmbus_setevent(struct vmbus_channel *channel); /* -- cgit v1.2.3-70-g09d2 From 512cf465ee01eb23936a9e6ed0b6414eccb00853 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 29 Sep 2017 15:39:49 -0700 Subject: binder: fix use-after-free in binder_transaction() User-space normally keeps the node alive when creating a transaction since it has a reference to the target. The local strong ref keeps it alive if the sending process dies before the target process processes the transaction. If the source process is malicious or has a reference counting bug, this can fail. In this case, when we attempt to decrement the node in the failure path, the node has already been freed. This is fixed by taking a tmpref on the node while constructing the transaction. To avoid re-acquiring the node lock and inner proc lock to increment the proc's tmpref, a helper is used that does the ref increments on both the node and proc. Signed-off-by: Todd Kjos Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 93 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 27 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ab34239a76ee..0621a95b8597 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2582,6 +2582,48 @@ static bool binder_proc_transaction(struct binder_transaction *t, return true; } +/** + * binder_get_node_refs_for_txn() - Get required refs on node for txn + * @node: struct binder_node for which to get refs + * @proc: returns @node->proc if valid + * @error: if no @proc then returns BR_DEAD_REPLY + * + * User-space normally keeps the node alive when creating a transaction + * since it has a reference to the target. The local strong ref keeps it + * alive if the sending process dies before the target process processes + * the transaction. If the source process is malicious or has a reference + * counting bug, relying on the local strong ref can fail. + * + * Since user-space can cause the local strong ref to go away, we also take + * a tmpref on the node to ensure it survives while we are constructing + * the transaction. We also need a tmpref on the proc while we are + * constructing the transaction, so we take that here as well. + * + * Return: The target_node with refs taken or NULL if no @node->proc is NULL. + * Also sets @proc if valid. If the @node->proc is NULL indicating that the + * target proc has died, @error is set to BR_DEAD_REPLY + */ +static struct binder_node *binder_get_node_refs_for_txn( + struct binder_node *node, + struct binder_proc **procp, + uint32_t *error) +{ + struct binder_node *target_node = NULL; + + binder_node_inner_lock(node); + if (node->proc) { + target_node = node; + binder_inc_node_nilocked(node, 1, 0, NULL); + binder_inc_node_tmpref_ilocked(node); + node->proc->tmp_ref++; + *procp = node->proc; + } else + *error = BR_DEAD_REPLY; + binder_node_inner_unlock(node); + + return target_node; +} + static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, @@ -2685,43 +2727,35 @@ static void binder_transaction(struct binder_proc *proc, ref = binder_get_ref_olocked(proc, tr->target.handle, true); if (ref) { - binder_inc_node(ref->node, 1, 0, NULL); - target_node = ref->node; - } - binder_proc_unlock(proc); - if (target_node == NULL) { + target_node = binder_get_node_refs_for_txn( + ref->node, &target_proc, + &return_error); + } else { binder_user_error("%d:%d got transaction to invalid handle\n", - proc->pid, thread->pid); + proc->pid, thread->pid); return_error = BR_FAILED_REPLY; - return_error_param = -EINVAL; - return_error_line = __LINE__; - goto err_invalid_target_handle; } + binder_proc_unlock(proc); } else { mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; - if (target_node == NULL) { + if (target_node) + target_node = binder_get_node_refs_for_txn( + target_node, &target_proc, + &return_error); + else return_error = BR_DEAD_REPLY; - mutex_unlock(&context->context_mgr_node_lock); - return_error_line = __LINE__; - goto err_no_context_mgr_node; - } - binder_inc_node(target_node, 1, 0, NULL); mutex_unlock(&context->context_mgr_node_lock); } - e->to_node = target_node->debug_id; - binder_node_lock(target_node); - target_proc = target_node->proc; - if (target_proc == NULL) { - binder_node_unlock(target_node); - return_error = BR_DEAD_REPLY; + if (!target_node) { + /* + * return_error is set above + */ + return_error_param = -EINVAL; return_error_line = __LINE__; goto err_dead_binder; } - binder_inner_proc_lock(target_proc); - target_proc->tmp_ref++; - binder_inner_proc_unlock(target_proc); - binder_node_unlock(target_node); + e->to_node = target_node->debug_id; if (security_binder_transaction(proc->tsk, target_proc->tsk) < 0) { return_error = BR_FAILED_REPLY; @@ -3071,6 +3105,8 @@ static void binder_transaction(struct binder_proc *proc, if (target_thread) binder_thread_dec_tmpref(target_thread); binder_proc_dec_tmpref(target_proc); + if (target_node) + binder_dec_node_tmpref(target_node); /* * write barrier to synchronize with initialization * of log entry @@ -3090,6 +3126,8 @@ err_bad_parent: err_copy_data_failed: trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, t->buffer, offp); + if (target_node) + binder_dec_node_tmpref(target_node); target_node = NULL; t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); @@ -3104,13 +3142,14 @@ err_bad_call_stack: err_empty_call_stack: err_dead_binder: err_invalid_target_handle: -err_no_context_mgr_node: if (target_thread) binder_thread_dec_tmpref(target_thread); if (target_proc) binder_proc_dec_tmpref(target_proc); - if (target_node) + if (target_node) { binder_dec_node(target_node, 1, 0); + binder_dec_node_tmpref(target_node); + } binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction failed %d/%d, size %lld-%lld line %d\n", -- cgit v1.2.3-70-g09d2 From 0b62bf862dc93a05fea97b6ca6ffca072e2f30c1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Oct 2017 20:59:09 +0200 Subject: watchdog/core: Put softlockup_threads_initialized under ifdef guard The variable is unused when the softlockup detector is disabled in Kconfig. Signed-off-by: Thomas Gleixner --- kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d241bd99cee1..6bcb854909c0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -47,7 +47,6 @@ int __read_mostly watchdog_thresh = 10; int __read_mostly nmi_watchdog_available; struct cpumask watchdog_allowed_mask __read_mostly; -static bool softlockup_threads_initialized __read_mostly; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -168,6 +167,7 @@ static void lockup_detector_update_enable(void) unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; +static bool softlockup_threads_initialized __read_mostly; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); -- cgit v1.2.3-70-g09d2 From 4edd8121e555acbee63578abeaf73026d055bbb4 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Wed, 4 Oct 2017 11:42:00 +0200 Subject: ARM: dts: stm32: Fix STMPE1600 binding on stm32429i-eval board To declare gpio interrupt line for STMPE1600, 2 possibilities are offered: -use gpio binding (and then the gpiolib interface inside driver) -use interrupt binding as each gpio-controller are also interrupt controller on stm32f429. In STMPE 1600 node both (gpio and interrupt) bindings are defined. This patch fixes this issue and use only interrupt binding. Fixes: c04b2e72af8d ("ARM: dts: stm32: Enable STMPE1600 gpio expander of STM32F429-EVAL board") Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32429i-eval.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts index 97b1c2321ba9..5bdb90b2ae72 100644 --- a/arch/arm/boot/dts/stm32429i-eval.dts +++ b/arch/arm/boot/dts/stm32429i-eval.dts @@ -202,10 +202,8 @@ stmpe1600: stmpe1600@42 { compatible = "st,stmpe1600"; reg = <0x42>; - irq-gpio = <&gpioi 8 0>; - irq-trigger = <3>; interrupts = <8 3>; - interrupt-parent = <&exti>; + interrupt-parent = <&gpioi>; interrupt-controller; wakeup-source; -- cgit v1.2.3-70-g09d2 From 8969f1f8291762c13147c1ba89d46238af01675b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 1 Oct 2017 09:37:35 +0200 Subject: nvme-pci: Use PCI bus address for data/queues in CMB Currently, NVMe PCI host driver is programming CMB dma address as I/O SQs addresses. This results in failures on systems where 1:1 outbound mapping is not used (example Broadcom iProc SOCs) because CMB BAR will be progammed with PCI bus address but NVMe PCI EP will try to access CMB using dma address. To have CMB working on systems without 1:1 outbound mapping, we program PCI bus address for I/O SQs instead of dma address. This approach will work on systems with/without 1:1 outbound mapping. Based on a report and previous patch from Abhishek Shah. Fixes: 8ffaadf7 ("NVMe: Use CMB for the IO SQes if available") Cc: stable@vger.kernel.org Reported-by: Abhishek Shah Tested-by: Abhishek Shah Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cb73bc8cad3b..3f5a04c586ce 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -94,7 +94,7 @@ struct nvme_dev { struct mutex shutdown_lock; bool subsystem; void __iomem *cmb; - dma_addr_t cmb_dma_addr; + pci_bus_addr_t cmb_bus_addr; u64 cmb_size; u32 cmbsz; u32 cmbloc; @@ -1226,7 +1226,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), dev->ctrl.page_size); - nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; + nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; nvmeq->sq_cmds_io = dev->cmb + offset; } else { nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), @@ -1527,7 +1527,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; - dma_addr_t dma_addr; + int bar; dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) @@ -1540,7 +1540,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); offset = szu * NVME_CMB_OFST(dev->cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); + bar = NVME_CMB_BIR(dev->cmbloc); + bar_size = pci_resource_len(pdev, bar); if (offset > bar_size) return NULL; @@ -1553,12 +1554,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; - cmb = ioremap_wc(dma_addr, size); + cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); if (!cmb) return NULL; - dev->cmb_dma_addr = dma_addr; + dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; dev->cmb_size = size; return cmb; } -- cgit v1.2.3-70-g09d2 From 986e7b7e4991a5d3abab26f97a671512e09e4417 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 11:25:08 +0800 Subject: regulator: axp20x: Fix poly-phase bit offset for AXP803 DCDC5/6 The bit offset used to check if DCDC5 and DCDC6 are tied together in poly-phase output is wrong. It was checking against a reserved bit, which is always false. In reality, neither the reference design layout nor actually produced boards tie these two buck regulators together. But we should still fix it, just in case. Fixes: 1dbe0ccb0631 ("regulator: axp20x-regulator: add support for AXP803") Signed-off-by: Chen-Yu Tsai Tested-by: Maxime Ripard Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- drivers/regulator/axp20x-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index f18b36dd57dd..376a99b7cf5d 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -590,7 +590,7 @@ static bool axp20x_is_polyphase_slave(struct axp20x_dev *axp20x, int id) case AXP803_DCDC3: return !!(reg & BIT(6)); case AXP803_DCDC6: - return !!(reg & BIT(7)); + return !!(reg & BIT(5)); } break; -- cgit v1.2.3-70-g09d2 From 74f1282114acc7d67e25745efe200f020f823c8a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 4 Oct 2017 11:15:04 +0200 Subject: powerpc/xive: Fix IPI reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When resetting an IPI, hw_ipi should also be set to zero. Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/spapr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f24a70bc6855..d9c4c9366049 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -431,7 +431,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { + if (!xc->hw_ipi) + return; + xive_irq_bitmap_free(xc->hw_ipi); + xc->hw_ipi = 0; } #endif /* CONFIG_SMP */ -- cgit v1.2.3-70-g09d2 From cc56939802fb4c9548be53563387a0700baeec82 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 4 Oct 2017 11:15:05 +0200 Subject: powerpc/xive: Clear XIVE internal structures when a CPU is removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") introduced support for the XIVE exploitation mode of the P9 interrupt controller on the pseries platform. At that time, support for CPU removal was not complete on PowerVM and CPU hot unplug remained untested. It appears that some cleanups of the XIVE internal structures are required before releasing the CPU, without which the kernel crashes in a RTAS call doing the CPU isolation. These changes fix the crash by deconfiguring the IPI interrupt source and clearing the event queues of the CPU when it is removed. Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xive/common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index f387318678b9..a3b8d7d1316e 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1402,6 +1402,14 @@ void xive_teardown_cpu(void) if (xive_ops->teardown_cpu) xive_ops->teardown_cpu(cpu, xc); + +#ifdef CONFIG_SMP + /* Get rid of IPI */ + xive_cleanup_cpu_ipi(cpu, xc); +#endif + + /* Disable and free the queues */ + xive_cleanup_cpu_queues(cpu, xc); } void xive_kexec_teardown_cpu(int secondary) -- cgit v1.2.3-70-g09d2 From 7c6a4f3b1641195119ddbb531200f4dc4cecbafa Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 24 Sep 2017 10:30:43 -0700 Subject: powerpc/mm: Call flush_tlb_kernel_range with interrupts enabled flush_tlb_kernel_range() may call smp_call_function_many() which expects interrupts to be enabled. This results in a traceback. WARNING: CPU: 0 PID: 1 at kernel/smp.c:416 smp_call_function_many+0xcc/0x2fc CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.14.0-rc1-00009-g0666f56 #1 task: cf830000 task.stack: cf82e000 NIP: c00a93c8 LR: c00a9634 CTR: 00000001 REGS: cf82fde0 TRAP: 0700 Not tainted (4.14.0-rc1-00009-g0666f56) MSR: 00021000 CR: 24000082 XER: 00000000 GPR00: c00a9634 cf82fe90 cf830000 c050ad3c c0015a54 00000000 00000001 00000001 GPR08: 00000001 00000000 00000000 cf82e000 24000084 00000000 c0003150 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000001 00000000 c0510000 GPR24: 00000000 c0015a54 00000000 c050ad3c c051823c c050ad3c 00000025 00000000 NIP [c00a93c8] smp_call_function_many+0xcc/0x2fc LR [c00a9634] smp_call_function+0x3c/0x50 Call Trace: [cf82fe90] [00000010] 0x10 (unreliable) [cf82fed0] [c00a9634] smp_call_function+0x3c/0x50 [cf82fee0] [c0015d2c] flush_tlb_kernel_range+0x20/0x38 [cf82fef0] [c001524c] mark_initmem_nx+0x154/0x16c [cf82ff20] [c001484c] free_initmem+0x20/0x4c [cf82ff30] [c000316c] kernel_init+0x1c/0x108 [cf82ff40] [c000f3a8] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 7c0803a6 7d808120 38210040 4e800020 3d20c052 812981a0 2f890000 40beffac 3d20c051 8929ac64 2f890000 40beff9c <0fe00000> 4bffff94 7fc3f378 7f64db78 Fixes: 3184cc4b6f6a ("powerpc/mm: Fix kernel RAM protection after freeing ...") Fixes: e611939fc8ec ("powerpc/mm: Ensure change_page_attr() doesn't ...") Cc: Christophe Leroy Signed-off-by: Guenter Roeck Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 65eda1997c3f..f6c7f54c0515 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -361,9 +361,9 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) break; } wmb(); + local_irq_restore(flags); flush_tlb_kernel_range((unsigned long)page_address(start), (unsigned long)page_address(page)); - local_irq_restore(flags); return err; } -- cgit v1.2.3-70-g09d2 From ad670233c9e1d5feb365d870e30083ef1b889177 Mon Sep 17 00:00:00 2001 From: Peng Xu Date: Tue, 3 Oct 2017 23:21:51 +0300 Subject: nl80211: Define policy for packet pattern attributes Define a policy for packet pattern attributes in order to fix a potential read over the end of the buffer during nla_get_u32() of the NL80211_PKTPAT_OFFSET attribute. Note that the data there can always be read due to SKB allocation (with alignment and struct skb_shared_info at the end), but the data might be uninitialized. This could be used to leak some data from uninitialized vmalloc() memory, but most drivers don't allow an offset (so you'd just get -EINVAL if the data is non-zero) or just allow it with a fixed value - 100 or 128 bytes, so anything above that would get -EINVAL. With brcmfmac the limit is 1500 so (at least) one byte could be obtained. Cc: stable@kernel.org Signed-off-by: Peng Xu Signed-off-by: Jouni Malinen [rewrite description based on SKB allocation knowledge] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 690874293cfc..d396cb61a280 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; +/* policy for packet pattern attributes */ +static const struct nla_policy +nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { + [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10532,7 +10540,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL, info->extack); + nl80211_packet_pattern_policy, + info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10781,7 +10790,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; -- cgit v1.2.3-70-g09d2 From 2aaae13a9db7897a007c5d7bb46cacfb37dffacd Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Wed, 4 Oct 2017 15:34:48 +0200 Subject: ARM: dts: stm32: use right pinctrl compatible for stm32f469 Currently, same stm32f429-pinctrl driver is used for stm32f429 and stm32f469. As pin map is different between those 2 MCUs, a stm32f469-pinctrl driver has been recently added. This patch -allows to use stm32f469-pinctrl driver for stm32f469 boards -reworks stm32 devicetree files to fit with stm32f429 / stm32f469 In the same time it fixes an issue when only MACH_STM32F469 flag is selected in menuconfig. Fixes: d28bcd53fa90 ("ARM: stm32: Introduce MACH_STM32F469 flag") Reported-by: Nicolas Pitre Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32429i-eval.dts | 1 + arch/arm/boot/dts/stm32f4-pinctrl.dtsi | 343 +++++++++++++++++++++++++++++++ arch/arm/boot/dts/stm32f429-disco.dts | 1 + arch/arm/boot/dts/stm32f429-pinctrl.dtsi | 95 +++++++++ arch/arm/boot/dts/stm32f429.dtsi | 297 -------------------------- arch/arm/boot/dts/stm32f469-disco.dts | 1 + arch/arm/boot/dts/stm32f469-pinctrl.dtsi | 96 +++++++++ 7 files changed, 537 insertions(+), 297 deletions(-) create mode 100644 arch/arm/boot/dts/stm32f4-pinctrl.dtsi create mode 100644 arch/arm/boot/dts/stm32f429-pinctrl.dtsi create mode 100644 arch/arm/boot/dts/stm32f469-pinctrl.dtsi diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts index 5bdb90b2ae72..293ecb957227 100644 --- a/arch/arm/boot/dts/stm32429i-eval.dts +++ b/arch/arm/boot/dts/stm32429i-eval.dts @@ -47,6 +47,7 @@ /dts-v1/; #include "stm32f429.dtsi" +#include "stm32f429-pinctrl.dtsi" #include #include diff --git a/arch/arm/boot/dts/stm32f4-pinctrl.dtsi b/arch/arm/boot/dts/stm32f4-pinctrl.dtsi new file mode 100644 index 000000000000..7f3560c0211d --- /dev/null +++ b/arch/arm/boot/dts/stm32f4-pinctrl.dtsi @@ -0,0 +1,343 @@ +/* + * Copyright 2017 - Alexandre Torgue + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +/ { + soc { + pinctrl: pin-controller { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x40020000 0x3000>; + interrupt-parent = <&exti>; + st,syscfg = <&syscfg 0x8>; + pins-are-numbered; + + gpioa: gpio@40020000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x0 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOA)>; + st,bank-name = "GPIOA"; + }; + + gpiob: gpio@40020400 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x400 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOB)>; + st,bank-name = "GPIOB"; + }; + + gpioc: gpio@40020800 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x800 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOC)>; + st,bank-name = "GPIOC"; + }; + + gpiod: gpio@40020c00 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0xc00 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOD)>; + st,bank-name = "GPIOD"; + }; + + gpioe: gpio@40021000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x1000 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOE)>; + st,bank-name = "GPIOE"; + }; + + gpiof: gpio@40021400 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x1400 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOF)>; + st,bank-name = "GPIOF"; + }; + + gpiog: gpio@40021800 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x1800 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOG)>; + st,bank-name = "GPIOG"; + }; + + gpioh: gpio@40021c00 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x1c00 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOH)>; + st,bank-name = "GPIOH"; + }; + + gpioi: gpio@40022000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x2000 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOI)>; + st,bank-name = "GPIOI"; + }; + + gpioj: gpio@40022400 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x2400 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOJ)>; + st,bank-name = "GPIOJ"; + }; + + gpiok: gpio@40022800 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x2800 0x400>; + clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOK)>; + st,bank-name = "GPIOK"; + }; + + usart1_pins_a: usart1@0 { + pins1 { + pinmux = ; + bias-disable; + drive-push-pull; + slew-rate = <0>; + }; + pins2 { + pinmux = ; + bias-disable; + }; + }; + + usart3_pins_a: usart3@0 { + pins1 { + pinmux = ; + bias-disable; + drive-push-pull; + slew-rate = <0>; + }; + pins2 { + pinmux = ; + bias-disable; + }; + }; + + usbotg_fs_pins_a: usbotg_fs@0 { + pins { + pinmux = , + , + ; + bias-disable; + drive-push-pull; + slew-rate = <2>; + }; + }; + + usbotg_fs_pins_b: usbotg_fs@1 { + pins { + pinmux = , + , + ; + bias-disable; + drive-push-pull; + slew-rate = <2>; + }; + }; + + usbotg_hs_pins_a: usbotg_hs@0 { + pins { + pinmux = , + , + , + , + , + , + , + , + , + , + , + ; + bias-disable; + drive-push-pull; + slew-rate = <2>; + }; + }; + + ethernet_mii: mii@0 { + pins { + pinmux = , + , + , + , + , + , + , + , + , + , + , + , + , + ; + slew-rate = <2>; + }; + }; + + adc3_in8_pin: adc@200 { + pins { + pinmux = ; + }; + }; + + pwm1_pins: pwm@1 { + pins { + pinmux = , + , + ; + }; + }; + + pwm3_pins: pwm@3 { + pins { + pinmux = , + ; + }; + }; + + i2c1_pins: i2c1@0 { + pins { + pinmux = , + ; + bias-disable; + drive-open-drain; + slew-rate = <3>; + }; + }; + + ltdc_pins: ltdc@0 { + pins { + pinmux = , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + slew-rate = <2>; + }; + }; + + dcmi_pins: dcmi@0 { + pins { + pinmux = , + , + , + , + , + , + , + , + , + , + , + , + , + , + ; + bias-disable; + drive-push-pull; + slew-rate = <3>; + }; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32f429-disco.dts b/arch/arm/boot/dts/stm32f429-disco.dts index c66d617e4245..5ceb2cf3777f 100644 --- a/arch/arm/boot/dts/stm32f429-disco.dts +++ b/arch/arm/boot/dts/stm32f429-disco.dts @@ -47,6 +47,7 @@ /dts-v1/; #include "stm32f429.dtsi" +#include "stm32f429-pinctrl.dtsi" #include / { diff --git a/arch/arm/boot/dts/stm32f429-pinctrl.dtsi b/arch/arm/boot/dts/stm32f429-pinctrl.dtsi new file mode 100644 index 000000000000..3e7a17d9112e --- /dev/null +++ b/arch/arm/boot/dts/stm32f429-pinctrl.dtsi @@ -0,0 +1,95 @@ +/* + * Copyright 2017 - Alexandre Torgue + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "stm32f4-pinctrl.dtsi" + +/ { + soc { + pinctrl: pin-controller { + compatible = "st,stm32f429-pinctrl"; + + gpioa: gpio@40020000 { + gpio-ranges = <&pinctrl 0 0 16>; + }; + + gpiob: gpio@40020400 { + gpio-ranges = <&pinctrl 0 16 16>; + }; + + gpioc: gpio@40020800 { + gpio-ranges = <&pinctrl 0 32 16>; + }; + + gpiod: gpio@40020c00 { + gpio-ranges = <&pinctrl 0 48 16>; + }; + + gpioe: gpio@40021000 { + gpio-ranges = <&pinctrl 0 64 16>; + }; + + gpiof: gpio@40021400 { + gpio-ranges = <&pinctrl 0 80 16>; + }; + + gpiog: gpio@40021800 { + gpio-ranges = <&pinctrl 0 96 16>; + }; + + gpioh: gpio@40021c00 { + gpio-ranges = <&pinctrl 0 112 16>; + }; + + gpioi: gpio@40022000 { + gpio-ranges = <&pinctrl 0 128 16>; + }; + + gpioj: gpio@40022400 { + gpio-ranges = <&pinctrl 0 144 16>; + }; + + gpiok: gpio@40022800 { + gpio-ranges = <&pinctrl 0 160 8>; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index dd7e99b1f43b..5b36eb114ddc 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -47,7 +47,6 @@ #include "skeleton.dtsi" #include "armv7-m.dtsi" -#include #include #include @@ -591,302 +590,6 @@ status = "disabled"; }; - pinctrl: pin-controller { - #address-cells = <1>; - #size-cells = <1>; - compatible = "st,stm32f429-pinctrl"; - ranges = <0 0x40020000 0x3000>; - interrupt-parent = <&exti>; - st,syscfg = <&syscfg 0x8>; - pins-are-numbered; - - gpioa: gpio@40020000 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x0 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOA)>; - st,bank-name = "GPIOA"; - }; - - gpiob: gpio@40020400 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x400 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOB)>; - st,bank-name = "GPIOB"; - }; - - gpioc: gpio@40020800 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x800 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOC)>; - st,bank-name = "GPIOC"; - }; - - gpiod: gpio@40020c00 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0xc00 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOD)>; - st,bank-name = "GPIOD"; - }; - - gpioe: gpio@40021000 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x1000 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOE)>; - st,bank-name = "GPIOE"; - }; - - gpiof: gpio@40021400 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x1400 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOF)>; - st,bank-name = "GPIOF"; - }; - - gpiog: gpio@40021800 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x1800 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOG)>; - st,bank-name = "GPIOG"; - }; - - gpioh: gpio@40021c00 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x1c00 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOH)>; - st,bank-name = "GPIOH"; - }; - - gpioi: gpio@40022000 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x2000 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOI)>; - st,bank-name = "GPIOI"; - }; - - gpioj: gpio@40022400 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x2400 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOJ)>; - st,bank-name = "GPIOJ"; - }; - - gpiok: gpio@40022800 { - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - reg = <0x2800 0x400>; - clocks = <&rcc 0 STM32F4_AHB1_CLOCK(GPIOK)>; - st,bank-name = "GPIOK"; - }; - - usart1_pins_a: usart1@0 { - pins1 { - pinmux = ; - bias-disable; - drive-push-pull; - slew-rate = <0>; - }; - pins2 { - pinmux = ; - bias-disable; - }; - }; - - usart3_pins_a: usart3@0 { - pins1 { - pinmux = ; - bias-disable; - drive-push-pull; - slew-rate = <0>; - }; - pins2 { - pinmux = ; - bias-disable; - }; - }; - - usbotg_fs_pins_a: usbotg_fs@0 { - pins { - pinmux = , - , - ; - bias-disable; - drive-push-pull; - slew-rate = <2>; - }; - }; - - usbotg_fs_pins_b: usbotg_fs@1 { - pins { - pinmux = , - , - ; - bias-disable; - drive-push-pull; - slew-rate = <2>; - }; - }; - - usbotg_hs_pins_a: usbotg_hs@0 { - pins { - pinmux = , - , - , - , - , - , - , - , - , - , - , - ; - bias-disable; - drive-push-pull; - slew-rate = <2>; - }; - }; - - ethernet_mii: mii@0 { - pins { - pinmux = , - , - , - , - , - , - , - , - , - , - , - , - , - ; - slew-rate = <2>; - }; - }; - - adc3_in8_pin: adc@200 { - pins { - pinmux = ; - }; - }; - - pwm1_pins: pwm@1 { - pins { - pinmux = , - , - ; - }; - }; - - pwm3_pins: pwm@3 { - pins { - pinmux = , - ; - }; - }; - - i2c1_pins: i2c1@0 { - pins { - pinmux = , - ; - bias-disable; - drive-open-drain; - slew-rate = <3>; - }; - }; - - ltdc_pins: ltdc@0 { - pins { - pinmux = , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - , - ; - slew-rate = <2>; - }; - }; - - dcmi_pins: dcmi@0 { - pins { - pinmux = , - , - , - , - , - , - , - , - , - , - , - , - , - , - ; - bias-disable; - drive-push-pull; - slew-rate = <3>; - }; - }; - }; - crc: crc@40023000 { compatible = "st,stm32f4-crc"; reg = <0x40023000 0x400>; diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts index 6ae1f037f3f0..c18acbe4cf4e 100644 --- a/arch/arm/boot/dts/stm32f469-disco.dts +++ b/arch/arm/boot/dts/stm32f469-disco.dts @@ -47,6 +47,7 @@ /dts-v1/; #include "stm32f429.dtsi" +#include "stm32f469-pinctrl.dtsi" / { model = "STMicroelectronics STM32F469i-DISCO board"; diff --git a/arch/arm/boot/dts/stm32f469-pinctrl.dtsi b/arch/arm/boot/dts/stm32f469-pinctrl.dtsi new file mode 100644 index 000000000000..fff542662eea --- /dev/null +++ b/arch/arm/boot/dts/stm32f469-pinctrl.dtsi @@ -0,0 +1,96 @@ +/* + * Copyright 2017 - Alexandre Torgue + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "stm32f4-pinctrl.dtsi" + +/ { + soc { + pinctrl: pin-controller { + compatible = "st,stm32f469-pinctrl"; + + gpioa: gpio@40020000 { + gpio-ranges = <&pinctrl 0 0 16>; + }; + + gpiob: gpio@40020400 { + gpio-ranges = <&pinctrl 0 16 16>; + }; + + gpioc: gpio@40020800 { + gpio-ranges = <&pinctrl 0 32 16>; + }; + + gpiod: gpio@40020c00 { + gpio-ranges = <&pinctrl 0 48 16>; + }; + + gpioe: gpio@40021000 { + gpio-ranges = <&pinctrl 0 64 16>; + }; + + gpiof: gpio@40021400 { + gpio-ranges = <&pinctrl 0 80 16>; + }; + + gpiog: gpio@40021800 { + gpio-ranges = <&pinctrl 0 96 16>; + }; + + gpioh: gpio@40021c00 { + gpio-ranges = <&pinctrl 0 112 16>; + }; + + gpioi: gpio@40022000 { + gpio-ranges = <&pinctrl 0 128 16>; + }; + + gpioj: gpio@40022400 { + gpio-ranges = <&pinctrl 0 144 6>, + <&pinctrl 12 156 4>; + }; + + gpiok: gpio@40022800 { + gpio-ranges = <&pinctrl 3 163 5>; + }; + }; + }; +}; -- cgit v1.2.3-70-g09d2 From 2d8ce70a08fe033c904115d59276ad86adeaa337 Mon Sep 17 00:00:00 2001 From: Goffredo Baroncelli Date: Tue, 3 Oct 2017 19:31:10 +0200 Subject: btrfs: avoid overflow when sector_t is 32 bit Jean-Denis Girard noticed commit c821e7f3 "pass bytes to btrfs_bio_alloc" (https://patchwork.kernel.org/patch/9763081/) introduces a regression on 32 bit machines. When CONFIG_LBDAF is _not_ defined (CONFIG_LBDAF == Support for large (2TB+) block devices and files) sector_t is 32 bit on 32bit machines. In the function submit_extent_page, 'sector' (which is sector_t type) is multiplied by 512 to convert it from sectors to bytes, leading to an overflow when the disk is bigger than 4GB (!). I added a cast to u64 to avoid overflow. Fixes: c821e7f3 ("btrfs: pass bytes to btrfs_bio_alloc") CC: stable@vger.kernel.org # 4.13+ Signed-off-by: Goffredo Baroncelli Tested-by: Jean-Denis Girard Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3738d245518c..8dfa181b1a92 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, } } - bio = btrfs_bio_alloc(bdev, sector << 9); + bio = btrfs_bio_alloc(bdev, (u64)sector << 9); bio_add_page(bio, page, page_size, offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; -- cgit v1.2.3-70-g09d2 From eab40cf336065e8d765e006b81ff48c5c114b365 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 3 Oct 2017 12:48:37 +0200 Subject: bsg-lib: fix use-after-free under memory-pressure When under memory-pressure it is possible that the mempool which backs the 'struct request_queue' will make use of up to BLKDEV_MIN_RQ count emergency buffers - in case it can't get a regular allocation. These buffers are preallocated and once they are also used, they are re-supplied with old finished requests from the same request_queue (see mempool_free()). The bug is, when re-supplying the emergency pool, the old requests are not again ran through the callback mempool_t->alloc(), and thus also not through the callback bsg_init_rq(). Thus we skip initialization, and while the sense-buffer still should be good, scsi_request->cmd might have become to be an invalid pointer in the meantime. When the request is initialized in bsg.c, and the user's CDB is larger than BLK_MAX_CDB, bsg will replace it with a custom allocated buffer, which is freed when the user's command is finished, thus it dangles afterwards. When next a command is sent by the user that has a smaller/similar CDB as BLK_MAX_CDB, bsg will assume that scsi_request->cmd is backed by scsi_request->__cmd, will not make a custom allocation, and write into undefined memory. Fix this by splitting bsg_init_rq() into two functions: - bsg_init_rq() is changed to only do the allocation of the sense-buffer, which is used to back the bsg job's reply buffer. This pointer should never change during the lifetime of a scsi_request, so it doesn't need re-initialization. - bsg_initialize_rq() is a new function that makes use of 'struct request_queue's initialize_rq_fn callback (which was introduced in v4.12). This is always called before the request is given out via blk_get_request(). This function does the remaining initialization that was previously done in bsg_init_rq(), and will also do it when the request is taken from the emergency-pool of the backing mempool. Fixes: 50b4d485528d ("bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer") Cc: # 4.11+ Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Signed-off-by: Jens Axboe --- block/bsg-lib.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index dbddff8174e5..15d25ccd51a5 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -207,20 +207,34 @@ static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) struct bsg_job *job = blk_mq_rq_to_pdu(req); struct scsi_request *sreq = &job->sreq; + /* called right after the request is allocated for the request_queue */ + + sreq->sense = kzalloc(SCSI_SENSE_BUFFERSIZE, gfp); + if (!sreq->sense) + return -ENOMEM; + + return 0; +} + +static void bsg_initialize_rq(struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + void *sense = sreq->sense; + + /* called right before the request is given to the request_queue user */ + memset(job, 0, sizeof(*job)); scsi_req_init(sreq); + + sreq->sense = sense; sreq->sense_len = SCSI_SENSE_BUFFERSIZE; - sreq->sense = kzalloc(sreq->sense_len, gfp); - if (!sreq->sense) - return -ENOMEM; job->req = req; - job->reply = sreq->sense; + job->reply = sense; job->reply_len = sreq->sense_len; job->dd_data = job + 1; - - return 0; } static void bsg_exit_rq(struct request_queue *q, struct request *req) @@ -251,6 +265,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, q->cmd_size = sizeof(struct bsg_job) + dd_job_size; q->init_rq_fn = bsg_init_rq; q->exit_rq_fn = bsg_exit_rq; + q->initialize_rq_fn = bsg_initialize_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); -- cgit v1.2.3-70-g09d2 From 69ad59767d094752c23c0fc180a79532fde073d0 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Wed, 4 Oct 2017 11:05:17 +0900 Subject: Btrfs: fix overlap of fs_info::flags values Because the values of BTRFS_FS_EXCL_OP and BTRFS_FS_QUOTA_OVERRIDE overlap, we should change the value. First, BTRFS_FS_EXCL_OP was set to 14. commit 171938e52807 ("btrfs: track exclusive filesystem operation in flags") Next, the value of BTRFS_FS_QUOTA_OVERRIDE was set to 14. commit f29efe292198 ("btrfs: add quota override flag to enable quota override for CAP_SYS_RESOURCE") As a result, the value 14 overlapped, by accident. This problem is solved by defining the value of BTRFS_FS_EXCL_OP as 16, the flags are internal. Fixes: f29efe292198 ("btrfs: add quota override flag to enable quota override for CAP_SYS_RESOURCE") CC: stable@vger.kernel.org # 4.13+ Signed-off-by: Tsutomu Itoh Reviewed-by: David Sterba [ minimize the change, update only BTRFS_FS_EXCL_OP ] Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b7ccfcc01732..aff3248beb90 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -721,7 +721,7 @@ struct btrfs_delayed_root; * Indicate that a whole-filesystem exclusive operation is running * (device replace, resize, device add/delete, balance) */ -#define BTRFS_FS_EXCL_OP 14 +#define BTRFS_FS_EXCL_OP 16 struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; -- cgit v1.2.3-70-g09d2 From 783874b050768d361239e444ba0fa396bb6d463f Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 13 Sep 2017 15:45:56 +0200 Subject: dm crypt: reject sector_size feature if device length is not aligned to it If a crypt mapping uses optional sector_size feature, additional restrictions to mapped device segment size must be applied in constructor, otherwise the device activation will fail later. Fixes: 8f0009a225 ("dm crypt: optionally support larger encryption sector size") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 75341fdca4b6..96ab46512e1f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2585,6 +2585,10 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar ti->error = "Invalid feature value for sector_size"; return -EINVAL; } + if (ti->len & ((cc->sector_size >> SECTOR_SHIFT) - 1)) { + ti->error = "Device size is not multiple of sector_size feature"; + return -EINVAL; + } cc->sector_shift = __ffs(cc->sector_size) - SECTOR_SHIFT; } else if (!strcasecmp(opt_string, "iv_large_sectors")) set_bit(CRYPT_IV_LARGE_SECTORS, &cc->cipher_flags); -- cgit v1.2.3-70-g09d2 From 5dcbeca615ef12047a5f4097b91030cbf995b1d2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 19 Sep 2017 12:01:08 +0200 Subject: clk: samsung: exynos4: Enable VPLL and EPLL clocks for suspend/resume cycle Commit 6edfa11cb396 ("clk: samsung: Add enable/disable operation for PLL36XX clocks") added enable/disable operations to PLL clocks. Prior that VPLL and EPPL clocks were always enabled because the enable bit was never touched. Those clocks have to be enabled during suspend/resume cycle, because otherwise board fails to enter sleep mode. This patch enables them unconditionally before entering system suspend state. System restore function will set them to the previous state saved in the register cache done before that unconditional enable. Fixes: 6edfa11cb396 ("clk: samsung: Add enable/disable operation for PLL36XX clocks") CC: stable@vger.kernel.org # v4.13 Signed-off-by: Marek Szyprowski Reviewed-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Acked-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos4.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index e40b77583c47..d8d3cb67b402 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -294,6 +294,18 @@ static const struct samsung_clk_reg_dump src_mask_suspend_e4210[] = { #define PLL_ENABLED (1 << 31) #define PLL_LOCKED (1 << 29) +static void exynos4_clk_enable_pll(u32 reg) +{ + u32 pll_con = readl(reg_base + reg); + pll_con |= PLL_ENABLED; + writel(pll_con, reg_base + reg); + + while (!(pll_con & PLL_LOCKED)) { + cpu_relax(); + pll_con = readl(reg_base + reg); + } +} + static void exynos4_clk_wait_for_pll(u32 reg) { u32 pll_con; @@ -315,6 +327,9 @@ static int exynos4_clk_suspend(void) samsung_clk_save(reg_base, exynos4_save_pll, ARRAY_SIZE(exynos4_clk_pll_regs)); + exynos4_clk_enable_pll(EPLL_CON0); + exynos4_clk_enable_pll(VPLL_CON0); + if (exynos4_soc == EXYNOS4210) { samsung_clk_save(reg_base, exynos4_save_soc, ARRAY_SIZE(exynos4210_clk_save)); -- cgit v1.2.3-70-g09d2 From a2b7861bb33b2538420bb5d8554153484d3f961f Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Oct 2017 21:36:51 +0800 Subject: kvm/x86: Avoid async PF preempting the kernel incorrectly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call schedule() to reschedule in some cases. This could result in accidentally ending the current RCU read-side critical section early, causing random memory corruption in the guest, or otherwise preempting the currently running task inside between preempt_disable and preempt_enable. The difficulty to handle this well is because we don't know whether an async PF delivered in a preemptible section or RCU read-side critical section for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock() are both no-ops in that case. To cure this, we treat any async PF interrupting a kernel context as one that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing the schedule() path in that case. To do so, a second parameter for kvm_async_pf_task_wait() is introduced, so that we know whether it's called from a context interrupting the kernel, and the parameter is set properly in all the callsites. Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Boqun Feng Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 14 ++++++++++---- arch/x86/kvm/mmu.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7cbf1b1..59ad3d132353 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); @@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_guest_init() do {} while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e675704fa6f7..8bb9594d0761 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -void kvm_async_pf_task_wait(u32 token) +/* + * @interrupt_kernel: Is this called from a routine which interrupts the kernel + * (other than user space)? + */ +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; @@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || preempt_count() > 1 || - rcu_preempt_depth(); + n.halted = is_idle_task(current) || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) + ? preempt_count() > 1 || rcu_preempt_depth() + : interrupt_kernel); init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); raw_spin_unlock(&b->lock); @@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ prev_state = exception_enter(); - kvm_async_pf_task_wait((u32)read_cr2()); + kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs)); exception_exit(prev_state); break; case KVM_PV_REASON_PAGE_READY: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca30c1eb1d9..106d4a029a8a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, case KVM_PV_REASON_PAGE_NOT_PRESENT: vcpu->arch.apf.host_apf_reason = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address); + kvm_async_pf_task_wait(fault_address, 0); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: -- cgit v1.2.3-70-g09d2 From 37f6b42e9c2966b08c7df5cfddc0d73c39cead4a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 2 Oct 2017 18:28:44 +0100 Subject: ACPI/IORT: Fix PCI ACS enablement commit f6810c15cf97 ("iommu/arm-smmu: Clean up early-probing workarounds") removed kernel code that was allowing to initialize and probe the SMMU devices early (ie earlier than PCI devices, through linker script callback entries) in the boot process because it was not needed any longer in that the SMMU devices/drivers now support deferred probing. Since the SMMUs probe routines are also in charge of requesting global PCI ACS kernel enablement, commit f6810c15cf97 ("iommu/arm-smmu: Clean up early-probing workarounds") also postponed PCI ACS enablement to SMMUs devices probe time, which is too late given that PCI devices needs to detect if PCI ACS is enabled to init the respective capability through the following call path: pci_device_add() -> pci_init_capabilities() -> pci_enable_acs() Add code in the ACPI IORT SMMU platform devices initialization path (that is called before ACPI PCI enumeration) to detect if there exists firmware mappings to map root complexes ids to SMMU ids and if so enable ACS for the system. Fixes: f6810c15cf97 ("iommu/arm-smmu: Clean up early-probing workarounds") Reviewed-by: Robin Murphy Tested-by: Nate Watterson Signed-off-by: Lorenzo Pieralisi Cc: Will Deacon Cc: Hanjun Guo Cc: Sudeep Holla Cc: Zhou Wang Cc: Alex Williamson Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/iort.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 9565d572f8dd..de56394dd161 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1178,12 +1178,44 @@ dev_put: return ret; } +static bool __init iort_enable_acs(struct acpi_iort_node *iort_node) +{ + if (iort_node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { + struct acpi_iort_node *parent; + struct acpi_iort_id_mapping *map; + int i; + + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, iort_node, + iort_node->mapping_offset); + + for (i = 0; i < iort_node->mapping_count; i++, map++) { + if (!map->output_reference) + continue; + + parent = ACPI_ADD_PTR(struct acpi_iort_node, + iort_table, map->output_reference); + /* + * If we detect a RC->SMMU mapping, make sure + * we enable ACS on the system. + */ + if ((parent->type == ACPI_IORT_NODE_SMMU) || + (parent->type == ACPI_IORT_NODE_SMMU_V3)) { + pci_request_acs(); + return true; + } + } + } + + return false; +} + static void __init iort_init_platform_devices(void) { struct acpi_iort_node *iort_node, *iort_end; struct acpi_table_iort *iort; struct fwnode_handle *fwnode; int i, ret; + bool acs_enabled = false; /* * iort_table and iort both point to the start of IORT table, but @@ -1203,6 +1235,9 @@ static void __init iort_init_platform_devices(void) return; } + if (!acs_enabled) + acs_enabled = iort_enable_acs(iort_node); + if ((iort_node->type == ACPI_IORT_NODE_SMMU) || (iort_node->type == ACPI_IORT_NODE_SMMU_V3)) { -- cgit v1.2.3-70-g09d2 From b02faed15d86f846b0f23f47b92e0782baa873ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 3 Oct 2017 18:25:46 +0100 Subject: arm64: Use larger stacks when KASAN is selected AddressSanitizer instrumentation can significantly bloat the stack, and with GCC 7 this can result in stack overflows at boot time in some configurations. We can avoid this by doubling our stack size when KASAN is in use, as is already done on x86 (and has been since KASAN was introduced). Regardless of other patches to decrease KASAN's stack utilization, kernels built with KASAN will always require more stack space than those built without, and we should take this into account. Signed-off-by: Mark Rutland Cc: Will Deacon Cc: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 3585a5e26151..f7c4d2146aed 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -95,16 +95,19 @@ #define KERNEL_END _end /* - * The size of the KASAN shadow region. This should be 1/8th of the - * size of the entire kernel virtual address space. + * KASAN requires 1/8th of the kernel virtual address space for the shadow + * region. KASAN can bloat the stack significantly, so double the (minimum) + * stack size when KASAN is in use. */ #ifdef CONFIG_KASAN #define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#define KASAN_THREAD_SHIFT 1 #else #define KASAN_SHADOW_SIZE (0) +#define KASAN_THREAD_SHIFT 0 #endif -#define MIN_THREAD_SHIFT 14 +#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) /* * VMAP'd stacks are allocated at page granularity, so we must ensure that such -- cgit v1.2.3-70-g09d2 From e8fa33a6f6c7688591542db955794b69b8cecc55 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Oct 2017 13:49:12 -0400 Subject: NFSv4/pnfs: Fix an infinite layoutget loop Since we can now use a lock stateid or a delegation stateid, that differs from the context stateid, we need to change the test in nfs4_layoutget_handle_exception() to take this into account. This fixes an infinite layoutget loop in the NFS client whereby it keeps retrying the initial layoutget using the same broken stateid. Fixes: 70d2f7b1ea19b ("pNFS: Use the standard I/O stateid when...") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6c61e2b99635..f90090e8c959 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8399,8 +8399,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, lo = NFS_I(inode)->layout; /* If the open stateid was bad, then recover it. */ if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || - nfs4_stateid_match_other(&lgp->args.stateid, - &lgp->args.ctx->state->stateid)) { + !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { spin_unlock(&inode->i_lock); exception->state = lgp->args.ctx->state; exception->stateid = &lgp->args.stateid; -- cgit v1.2.3-70-g09d2 From e19cd282ebedb801e572efae5df2f88d573932ce Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 1 Oct 2017 09:54:35 +0300 Subject: IB/mlx5: Fix label order in error path handling When UAR get_page fails, it needs to continue to cleanup debugfs for congestion control parameters. Labels for error path were incorrectly ordered. This patch fixes to do correct cleanup on debugfs init failure and uar get page failure. Fixes: 4a2da0b8c078 ("IB/mlx5: Add debug control parameters for congestion control") Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d6fbad8f34aa..552f7bd4ecc3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4174,9 +4174,9 @@ err_bfreg: err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); -err_cnt: - mlx5_ib_cleanup_cong_debugfs(dev); err_cong: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cnt: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); -- cgit v1.2.3-70-g09d2 From efe63c220e57ce74e240670399912e609ac125d9 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Tue, 3 Oct 2017 14:47:26 +0300 Subject: RDMA/qedr: Parse VLAN ID correctly and ignore the value of zero Rename vlan_id field name to vlan as it contains more than the vlan_id. Mask out non vlan id fields from vlan tag of the QED LL2 RX GSI vlan output. As it is expected to be vlan id only. Ignore vlan_id with value of zero. Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr.h | 2 +- drivers/infiniband/hw/qedr/qedr_cm.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index b2bb42e2805d..254083b524bd 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -387,7 +387,7 @@ struct qedr_qp { u8 wqe_size; u8 smac[ETH_ALEN]; - u16 vlan_id; + u16 vlan; int rc; } *rqe_wr_id; diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 4689e802b332..5ebbe4952386 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -105,7 +105,7 @@ void qedr_ll2_complete_rx_packet(void *cxt, qp->rqe_wr_id[qp->rq.gsi_cons].rc = data->u.data_length_error ? -EINVAL : 0; - qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = data->vlan; + qp->rqe_wr_id[qp->rq.gsi_cons].vlan = data->vlan; /* note: length stands for data length i.e. GRH is excluded */ qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = data->length.data_length; @@ -694,6 +694,7 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct qedr_cq *cq = get_qedr_cq(ibcq); struct qedr_qp *qp = dev->gsi_qp; unsigned long flags; + u16 vlan_id; int i = 0; spin_lock_irqsave(&cq->cq_lock, flags); @@ -712,9 +713,12 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK; ether_addr_copy(wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac); wc[i].wc_flags |= IB_WC_WITH_SMAC; - if (qp->rqe_wr_id[qp->rq.cons].vlan_id) { + + vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan & + VLAN_VID_MASK; + if (vlan_id) { wc[i].wc_flags |= IB_WC_WITH_VLAN; - wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id; + wc[i].vlan_id = vlan_id; } qedr_inc_sw_cons(&qp->rq); -- cgit v1.2.3-70-g09d2 From 1736b4c99d1c53abec042d41b702aeabeb65d86a Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Tue, 3 Oct 2017 14:47:27 +0300 Subject: RDMA/qedr: Parse vlan priority as sl Parse the vlan priority from the vlan tag and configure it to the WC's sl field. Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Ram Amrani Signed-off-by: Michal Kalderon Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 5ebbe4952386..ad8965397cf7 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -719,6 +719,8 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) if (vlan_id) { wc[i].wc_flags |= IB_WC_WITH_VLAN; wc[i].vlan_id = vlan_id; + wc[i].sl = (qp->rqe_wr_id[qp->rq.cons].vlan & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; } qedr_inc_sw_cons(&qp->rq); -- cgit v1.2.3-70-g09d2 From 43bfc24ec1d69853d706cb3ebfdb9088846b9b50 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 3 Oct 2017 11:11:49 -0500 Subject: i40iw: Add missing memory barriers Remove duplicate set_64bit_val call to offset 24. Replace some instances of set_64bit_val with i40iw_insert_wqe_hdr as valid bit needs a write barrier and should be the last write operation for the WQE. Fixes: 786c6adb3a94 ("i40iw: add puda code") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 +- drivers/infiniband/hw/i40iw/i40iw_p.h | 2 ++ drivers/infiniband/hw/i40iw/i40iw_puda.c | 11 ++++------- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index d1f5345f04f0..42ca5346777d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -48,7 +48,7 @@ * @wqe: cqp wqe for header * @header: header for the cqp wqe */ -static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) +void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) { wmb(); /* make sure WQE is populated before polarity is set */ set_64bit_val(wqe, 24, header); diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index e217a1259f57..5498ad01c280 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -59,6 +59,8 @@ enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp, struct i40iw_fast_reg_stag_info *info, bool post_sq); +void i40iw_insert_wqe_hdr(u64 *wqe, u64 header); + /* HMC/FPM functions */ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id); diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index c2cab20c4bc5..59f70676f0e0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -123,12 +123,11 @@ static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx, get_64bit_val(wqe, 24, &offset24); offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, offset24); set_64bit_val(wqe, 0, buf->mem.pa); set_64bit_val(wqe, 8, LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN)); - set_64bit_val(wqe, 24, offset24); + i40iw_insert_wqe_hdr(wqe, offset24); } /** @@ -409,9 +408,7 @@ enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp, set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN)); set_64bit_val(wqe, 16, header[0]); - /* Ensure all data is written before writing valid bit */ - wmb(); - set_64bit_val(wqe, 24, header[1]); + i40iw_insert_wqe_hdr(wqe, header[1]); i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32); i40iw_qp_post_wr(&qp->qp_uk); @@ -539,7 +536,7 @@ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) | LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + i40iw_insert_wqe_hdr(wqe, header); i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32); i40iw_sc_cqp_post_sq(cqp); @@ -655,7 +652,7 @@ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) | LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) | LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID); - set_64bit_val(wqe, 24, header); + i40iw_insert_wqe_hdr(wqe, header); i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, I40IW_CQP_WQE_SIZE * 8); -- cgit v1.2.3-70-g09d2 From 789f903fd75036f937409a9a1616a5a5e5cc5bae Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 3 Oct 2017 11:11:50 -0500 Subject: i40iw: Fix port number for query QP Port number 0 is an invalid port number. Set it to 1 as there is one port per i40iw device. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 28b3d02d511b..62be0a41ad0b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -826,12 +826,14 @@ static int i40iw_query_qp(struct ib_qp *ibqp, attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE; attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; + attr->port_num = 1; init_attr->event_handler = iwqp->ibqp.event_handler; init_attr->qp_context = iwqp->ibqp.qp_context; init_attr->send_cq = iwqp->ibqp.send_cq; init_attr->recv_cq = iwqp->ibqp.recv_cq; init_attr->srq = iwqp->ibqp.srq; init_attr->cap = attr->cap; + init_attr->port_num = 1; return 0; } -- cgit v1.2.3-70-g09d2 From 2a8408e5372503fe4df9b5bbb49fb3af0c212c67 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Oct 2017 12:51:58 +0300 Subject: drm/i915/cnl: Reprogram DMC firmware after S3/S4 resume The DMC firmware program memory is lost after S3/S4 system suspend, so we need to reprogram it during resume. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103070 Fixes: cebfcead63de ("drm/i915/DMC/CNL: Load DMC on CNL") Cc: Anusha Srivatsa Cc: Animesh Manna Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171003095159.711-1-imre.deak@intel.com (cherry picked from commit 57522c4c87de20d8f7ad4e142a3a4334066d55ff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index b66d8e136aa3..b3a087cb0860 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2782,6 +2782,9 @@ static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume /* 6. Enable DBUF */ gen9_dbuf_enable(dev_priv); + + if (resume && dev_priv->csr.dmc_payload) + intel_csr_load_program(dev_priv); } #undef CNL_PROCMON_IDX -- cgit v1.2.3-70-g09d2 From 069d40f5834ad26a58f269225a7e13af17019062 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 3 Oct 2017 12:51:59 +0300 Subject: drm/i915/glk: Fix DMC/DC state idleness calculation According to BSpec GLK like BXT needs to ignore the idle state of cores before starting the DMC firmware's DC state handler. Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK") Cc: Anusha Srivatsa Cc: Rodrigo Vivi Signed-off-by: Imre Deak Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171003095159.711-2-imre.deak@intel.com (cherry picked from commit b7208a3f3e52791571df064fb96025ad48edd1bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_csr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 965988f79a55..92c1f8e166dc 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -216,7 +216,7 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv) mask = DC_STATE_DEBUG_MASK_MEMORY_UP; - if (IS_BROXTON(dev_priv)) + if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_DEBUG_MASK_CORES; /* The below bit doesn't need to be cleared ever afterwards */ -- cgit v1.2.3-70-g09d2 From e0a86312874e36033cd94fb977dd603a292875c8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 4 Oct 2017 23:10:59 +0100 Subject: Update James Hogan's email address Update my imgtec.com and personal email address to my kernel.org one in a few places as MIPS will soon no longer be part of Imagination Technologies, and add mappings in .mailcap so get_maintainer.pl reports the right address. Signed-off-by: James Hogan Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ Documentation/ABI/testing/sysfs-power | 2 +- MAINTAINERS | 6 +++--- drivers/i2c/busses/i2c-img-scb.c | 2 +- drivers/media/rc/ir-sharp-decoder.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.mailmap b/.mailmap index 5273cfd70ad6..c7b10caecc4e 100644 --- a/.mailmap +++ b/.mailmap @@ -68,6 +68,8 @@ Jacob Shin James Bottomley James Bottomley James E Wilson +James Hogan +James Hogan James Ketrenos Javi Merino diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 713cab1d5f12..a1d1612f3651 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -127,7 +127,7 @@ Description: What; /sys/power/pm_trace_dev_match Date: October 2010 -Contact: James Hogan +Contact: James Hogan Description: The /sys/power/pm_trace_dev_match file contains the name of the device associated with the last PM event point saved in the RTC diff --git a/MAINTAINERS b/MAINTAINERS index 65b0c88d5ee0..3f05fc6961ad 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6738,7 +6738,7 @@ F: Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt F: drivers/auxdisplay/img-ascii-lcd.c IMGTEC IR DECODER DRIVER -M: James Hogan +M: James Hogan S: Maintained F: drivers/media/rc/img-ir/ @@ -7562,7 +7562,7 @@ F: arch/arm64/include/asm/kvm* F: arch/arm64/kvm/ KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) -M: James Hogan +M: James Hogan L: linux-mips@linux-mips.org S: Supported F: arch/mips/include/uapi/asm/kvm* @@ -8885,7 +8885,7 @@ F: Documentation/devicetree/bindings/media/meson-ao-cec.txt T: git git://linuxtv.org/media_tree.git METAG ARCHITECTURE -M: James Hogan +M: James Hogan L: linux-metag@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag.git S: Odd Fixes diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 84fb35f6837f..eb1d91b986fd 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1459,6 +1459,6 @@ static struct platform_driver img_scb_i2c_driver = { }; module_platform_driver(img_scb_i2c_driver); -MODULE_AUTHOR("James Hogan "); +MODULE_AUTHOR("James Hogan "); MODULE_DESCRIPTION("IMG host I2C driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/rc/ir-sharp-decoder.c b/drivers/media/rc/ir-sharp-decoder.c index ed43a4212479..129b558acc92 100644 --- a/drivers/media/rc/ir-sharp-decoder.c +++ b/drivers/media/rc/ir-sharp-decoder.c @@ -245,5 +245,5 @@ module_init(ir_sharp_decode_init); module_exit(ir_sharp_decode_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("James Hogan "); +MODULE_AUTHOR("James Hogan "); MODULE_DESCRIPTION("Sharp IR protocol decoder"); -- cgit v1.2.3-70-g09d2 From e769fcec6bc4bdd1b0e2cf817680148f9c40b1c4 Mon Sep 17 00:00:00 2001 From: Vishakha Narvekar Date: Tue, 3 Oct 2017 16:13:29 -0400 Subject: net: 8021q: skip packets if the vlan is down If the vlan is down, free the packet instead of proceeding with other processing, or counting it as received. If vlan interfaces are used as slaves for bonding, with arp monitoring for connectivity, if the rx counter is seen to be incrementing, then the bond device will not observe that the interface is down. CC: David S. Miller Signed-off-by: Vishakha Narvekar Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index e2ed69850489..0bc31de9071a 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -21,6 +21,12 @@ bool vlan_do_receive(struct sk_buff **skbp) if (unlikely(!skb)) return false; + if (unlikely(!(vlan_dev->flags & IFF_UP))) { + kfree_skb(skb); + *skbp = NULL; + return false; + } + skb->dev = vlan_dev; if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { /* Our lower layer thinks this is not local, let's make sure. -- cgit v1.2.3-70-g09d2 From 69a330007091ea8a801dd9fcd897ec52f9529586 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Mon, 2 Oct 2017 11:28:35 +0200 Subject: RAS/CEC: Use the right length for "cec_disable" parse_cec_param() compares a string with "cec_disable" using only 7 characters of the 11-character-long string. The proper solution for this would be: #define CEC_DISABLE "cec_disable" strncmp(str, CEC_DISABLE, strlen(CEC_DISABLE)) but when comparing a string against a string constant strncmp() has no advantage over strcmp() because the comparison is guaranteed to be bound by the string constant. So just replace str strncmp() with strcmp(). [ tglx: Made it use strcmp and updated the changelog ] Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector") Signed-off-by: Nicolas Iooss Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170903075440.30250-1-nicolas.iooss_linux@m4x.org --- drivers/ras/cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index d0e5d6ee882c..e2c1988cd7c0 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -523,7 +523,7 @@ int __init parse_cec_param(char *str) if (*str == '=') str++; - if (!strncmp(str, "cec_disable", 7)) + if (!strcmp(str, "cec_disable")) ce_arr.disabled = 1; else return 0; -- cgit v1.2.3-70-g09d2 From 262e681183ddcdb24d64a2f993e41a226adcec29 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 2 Oct 2017 11:28:36 +0200 Subject: x86/mce: Hide mca_cfg Now that lguest is gone, put it in the internal header which should be used only by MCA/RAS code. Add missing header guards while at it. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20171002092836.22971-3-bp@alien8.de --- arch/x86/include/asm/mce.h | 1 - arch/x86/kernel/cpu/mcheck/mce-internal.h | 7 +++++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 181264989db5..8edac1de2e35 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -187,7 +187,6 @@ struct mca_msr_regs { extern struct mce_vendor_flags mce_flags; -extern struct mca_config mca_cfg; extern struct mca_msr_regs msr_ops; enum mce_notifier_prios { diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 098530a93bb7..debb974fd17d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -1,3 +1,6 @@ +#ifndef __X86_MCE_INTERNAL_H__ +#define __X86_MCE_INTERNAL_H__ + #include #include @@ -108,3 +111,7 @@ static inline void mce_work_trigger(void) { } static inline void mce_register_injector_chain(struct notifier_block *nb) { } static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } #endif + +extern struct mca_config mca_cfg; + +#endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 40e28ed77fbf..486f640b02ef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -28,6 +28,8 @@ #include #include +#include "mce-internal.h" + #define NR_BLOCKS 5 #define THRESHOLD_MAX 0xFFF #define INT_TYPE_APIC 0x00020000 -- cgit v1.2.3-70-g09d2 From 463a9215f3ca7600b5fff6d903913906ae4548a9 Mon Sep 17 00:00:00 2001 From: Pierre-Yves MORDRET Date: Thu, 21 Sep 2017 15:30:09 +0200 Subject: i2c: stm32f7: fix setup structure I2C drive setup structure is not properly allocated. Make it static instead of pointer to store driver data. Fixes: aeb068c5721485 ("i2c: i2c-stm32f7: add driver") Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 47c67b0ca896..42ebdb1665e1 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -215,7 +215,7 @@ struct stm32f7_i2c_dev { unsigned int msg_num; unsigned int msg_id; struct stm32f7_i2c_msg f7_msg; - struct stm32f7_i2c_setup *setup; + struct stm32f7_i2c_setup setup; struct stm32f7_i2c_timings timing; }; @@ -537,7 +537,7 @@ static void stm32f7_i2c_hw_config(struct stm32f7_i2c_dev *i2c_dev) writel_relaxed(timing, i2c_dev->base + STM32F7_I2C_TIMINGR); /* Enable I2C */ - if (i2c_dev->setup->analog_filter) + if (i2c_dev->setup.analog_filter) stm32f7_i2c_clr_bits(i2c_dev->base + STM32F7_I2C_CR1, STM32F7_I2C_CR1_ANFOFF); else @@ -887,22 +887,19 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) } setup = of_device_get_match_data(&pdev->dev); - i2c_dev->setup->rise_time = setup->rise_time; - i2c_dev->setup->fall_time = setup->fall_time; - i2c_dev->setup->dnf = setup->dnf; - i2c_dev->setup->analog_filter = setup->analog_filter; + i2c_dev->setup = *setup; ret = device_property_read_u32(i2c_dev->dev, "i2c-scl-rising-time-ns", &rise_time); if (!ret) - i2c_dev->setup->rise_time = rise_time; + i2c_dev->setup.rise_time = rise_time; ret = device_property_read_u32(i2c_dev->dev, "i2c-scl-falling-time-ns", &fall_time); if (!ret) - i2c_dev->setup->fall_time = fall_time; + i2c_dev->setup.fall_time = fall_time; - ret = stm32f7_i2c_setup_timing(i2c_dev, i2c_dev->setup); + ret = stm32f7_i2c_setup_timing(i2c_dev, &i2c_dev->setup); if (ret) goto clk_free; -- cgit v1.2.3-70-g09d2 From cb09d943c70da7c8097006db1dc163b2d99338f6 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 21 Sep 2017 16:23:16 +0300 Subject: i2c: i801: Add support for Intel Cedar Fork Add PCI ID for Intel Cedar Fork PCH. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801 | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index 0500193434cb..d47702456926 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -36,6 +36,7 @@ Supported adapters: * Intel Gemini Lake (SOC) * Intel Cannon Lake-H (PCH) * Intel Cannon Lake-LP (PCH) + * Intel Cedar Fork (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index c06dce2c1da7..45a3f3ca29b3 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -131,6 +131,7 @@ config I2C_I801 Gemini Lake (SOC) Cannon Lake-H (PCH) Cannon Lake-LP (PCH) + Cedar Fork (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e114e4e00d29..9e12a53ef7b8 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -68,6 +68,7 @@ * Gemini Lake (SOC) 0x31d4 32 hard yes yes yes * Cannon Lake-H (PCH) 0xa323 32 hard yes yes yes * Cannon Lake-LP (PCH) 0x9da3 32 hard yes yes yes + * Cedar Fork (PCH) 0x18df 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -204,6 +205,7 @@ /* Older devices have their ID defined in */ #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS 0x0f12 +#define PCI_DEVICE_ID_INTEL_CDF_SMBUS 0x18df #define PCI_DEVICE_ID_INTEL_DNV_SMBUS 0x19df #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 @@ -1025,6 +1027,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CDF_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) }, @@ -1513,6 +1516,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS: case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS: case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: + case PCI_DEVICE_ID_INTEL_CDF_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: priv->features |= FEATURE_I2C_BLOCK_READ; -- cgit v1.2.3-70-g09d2 From a91aee523fb282f8868ba6b918f781679c3ea301 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 21 Sep 2017 08:24:27 +0200 Subject: i2c: ensure termination of *_device_id tables Make sure (of/i2c/platform)_device_id tables are NULL terminated. Found by coccinelle spatch "misc/of_table.cocci" Signed-off-by: Thomas Meyer Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 22e08ae1704f..25fcc3c1e32b 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -627,6 +627,7 @@ static const struct dev_pm_ops sprd_i2c_pm_ops = { static const struct of_device_id sprd_i2c_of_match[] = { { .compatible = "sprd,sc9860-i2c", }, + {}, }; static struct platform_driver sprd_i2c_driver = { -- cgit v1.2.3-70-g09d2 From 25f2f440989c7079fdd8fccd54592cc077b63ae5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 09:15:39 +0100 Subject: i2c: i2c-stm32f7: make structure stm32f7_setup static const The structure stm32f7_setup is local to the source and does not need to be in global scope, make it static const. Cleans up sparse warning: symbol 'stm32f7_setup' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32f7.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 42ebdb1665e1..d4a6e9c2e9aa 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -265,7 +265,7 @@ static struct stm32f7_i2c_spec i2c_specs[] = { }, }; -struct stm32f7_i2c_setup stm32f7_setup = { +static const struct stm32f7_i2c_setup stm32f7_setup = { .rise_time = STM32F7_I2C_RISE_TIME_DEFAULT, .fall_time = STM32F7_I2C_FALL_TIME_DEFAULT, .dnf = STM32F7_I2C_DNF_DEFAULT, -- cgit v1.2.3-70-g09d2 From f26e60167d8b5b1c67b3efd4cb5672da446bdb0e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 4 Oct 2017 10:39:05 -0500 Subject: x86/kvm: Move kvm_fastop_exception to .fixup section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling the kernel with the '-frecord-gcc-switches' flag, objtool complains: arch/x86/kvm/emulate.o: warning: objtool: .GCC.command.line+0x0: special: can't find new instruction And also the kernel fails to link. The problem is that the 'kvm_fastop_exception' code gets placed into the throwaway '.GCC.command.line' section instead of '.text'. Exception fixup code is conventionally placed in the '.fixup' section, so put it there where it belongs. Reported-and-tested-by: Guenter Roeck Signed-off-by: Josh Poimboeuf Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a36254cbf776..d90cdc77e077 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -425,8 +425,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); #op " %al \n\t" \ FOP_RET -asm(".global kvm_fastop_exception \n" - "kvm_fastop_exception: xor %esi, %esi; ret"); +asm(".pushsection .fixup, \"ax\"\n" + ".global kvm_fastop_exception \n" + "kvm_fastop_exception: xor %esi, %esi; ret\n" + ".popsection"); FOP_START(setcc) FOP_SETCC(seto) -- cgit v1.2.3-70-g09d2 From c1fbc0cf81f1c464f5fda322c1104d4bb1da6711 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 5 Oct 2017 14:42:34 +0530 Subject: perf callchain: Compare dsos (as well) for CCKEY_FUNCTION Two functions from different binaries can have same start address. Thus, comparing only start address in match_chain() leads to inconsistent callchains. Fix this by adding a check for dsos as well. Ex, https://www.spinics.net/lists/linux-perf-users/msg04067.html Reported-by: Alexander Pozdneev Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Krister Johansen Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yao Jin Cc: zhangmengting@huawei.com Link: http://lkml.kernel.org/r/20171005091234.5874-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index be09d77cade0..a971caf3759d 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -685,6 +685,8 @@ static enum match_result match_chain(struct callchain_cursor_node *node, { struct symbol *sym = node->sym; u64 left, right; + struct dso *left_dso = NULL; + struct dso *right_dso = NULL; if (callchain_param.key == CCKEY_SRCLINE) { enum match_result match = match_chain_srcline(node, cnode); @@ -696,12 +698,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node, if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) { left = cnode->ms.sym->start; right = sym->start; + left_dso = cnode->ms.map->dso; + right_dso = node->map->dso; } else { left = cnode->ip; right = node->ip; } - if (left == right) { + if (left == right && left_dso == right_dso) { if (node->branch) { cnode->branch_count++; -- cgit v1.2.3-70-g09d2 From 954c736f865d6c0c68ae4263a2f3502ee7c447a3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 18 Sep 2017 22:53:43 +0300 Subject: ovl: fix may_write_real() for overlayfs directories Overlayfs directory file_inode() is the overlay inode whether the real inode is upper or lower. This fixes a regression in xfstest generic/158. Fixes: 7c6893e3c9ab ("ovl: don't allow writing ioctl on lower layer") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 54059b142d6b..3b601f115b6c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file) /* File refers to upper, writable layer? */ upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); - if (upperdentry && file_inode(file) == d_inode(upperdentry)) + if (upperdentry && + (file_inode(file) == d_inode(upperdentry) || + file_inode(file) == d_inode(dentry))) return 0; /* Lower layer: can't write to real file, sorry... */ -- cgit v1.2.3-70-g09d2 From e0082a0f04c432cb6d7128ef60d8e425e45ce025 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Sep 2017 13:01:35 +0300 Subject: ovl: fix error value printed in ovl_lookup_index() Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index c3addd1114f1..654bea1a5ac9 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -506,6 +506,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { + err = PTR_ERR(index); pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, -- cgit v1.2.3-70-g09d2 From 9f4ec904dbd4eb1a2db10d5e7dc16eae386fe64d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Sep 2017 17:36:26 +0300 Subject: ovl: fix dput() of ERR_PTR in ovl_cleanup_index() Fixes: caf70cb2ba5d ("ovl: cleanup orphan index entries") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 117794582f9f..7ae33d225a67 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry) } } -/* Called must hold OVL_I(inode)->oi_lock */ +/* Caller must hold OVL_I(inode)->lock */ static void ovl_cleanup_index(struct dentry *dentry) { struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode; @@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry) err = PTR_ERR(index); if (!IS_ERR(index)) err = ovl_cleanup(dir, index); + else + index = NULL; + inode_unlock(dir); if (err) goto fail; -- cgit v1.2.3-70-g09d2 From dc7ab6773e8171e07f16fd0df0c5eea28c899503 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 24 Sep 2017 22:19:10 +0300 Subject: ovl: fix dentry leak in ovl_indexdir_cleanup() index dentry was not released when breaking out of the loop due to index verification error. Fixes: 415543d5c64f ("ovl: cleanup bad and stale index entries on mount") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 62e9b22a2077..0f85ee9c3268 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, struct path *lowerstack, unsigned int numlower) { int err; + struct dentry *index = NULL; struct inode *dir = dentry->d_inode; struct path path = { .mnt = mnt, .dentry = dentry }; LIST_HEAD(list); @@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, inode_lock_nested(dir, I_MUTEX_PARENT); list_for_each_entry(p, &list, l_node) { - struct dentry *index; - if (p->name[0] == '.') { if (p->len == 1) continue; @@ -1018,6 +1017,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, index = lookup_one_len(p->name, dentry, p->len); if (IS_ERR(index)) { err = PTR_ERR(index); + index = NULL; break; } err = ovl_verify_index(index, lowerstack, numlower); @@ -1029,7 +1029,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, break; } dput(index); + index = NULL; } + dput(index); inode_unlock(dir); out: ovl_cache_free(&list); -- cgit v1.2.3-70-g09d2 From 5820dc0888d302ac05f8b91ffdf7e4e53b4fbf53 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 25 Sep 2017 16:39:55 +0300 Subject: ovl: fix missing unlock_rename() in ovl_do_copy_up() Use the ovl_lock_rename_workdir() helper which requires unlock_rename() only on lock success. Fixes: ("fd210b7d67ee ovl: move copy up lock out") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 6 ++---- fs/overlayfs/dir.c | 20 -------------------- fs/overlayfs/overlayfs.h | 1 + fs/overlayfs/util.c | 19 +++++++++++++++++++ 4 files changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aad97b30d5e6..c441f9387a1b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) c->tmpfile = true; err = ovl_copy_up_locked(c); } else { - err = -EIO; - if (lock_rename(c->workdir, c->destdir) != NULL) { - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - } else { + err = ovl_lock_rename_workdir(c->workdir, c->destdir); + if (!err) { err = ovl_copy_up_locked(c); unlock_rename(c->workdir, c->destdir); } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 3309b1912241..cc961a3bd3bd 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -216,26 +216,6 @@ out_unlock: return err; } -static int ovl_lock_rename_workdir(struct dentry *workdir, - struct dentry *upperdir) -{ - /* Workdir should not be the same as upperdir */ - if (workdir == upperdir) - goto err; - - /* Workdir should not be subdir of upperdir and vice versa */ - if (lock_rename(workdir, upperdir) != NULL) - goto err_unlock; - - return 0; - -err_unlock: - unlock_rename(workdir, upperdir); -err: - pr_err("overlayfs: failed to lock workdir+upperdir\n"); - return -EIO; -} - static struct dentry *ovl_clear_empty(struct dentry *dentry, struct list_head *list) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d4e8c1a08fb0..c706a6f99928 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry, bool *locked); void ovl_nlink_end(struct dentry *dentry, bool locked); +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir); static inline bool ovl_is_impuredir(struct dentry *dentry) { diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 7ae33d225a67..b9b239fa5cfd 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -560,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked) mutex_unlock(&OVL_I(d_inode(dentry))->lock); } } + +int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir) +{ + /* Workdir should not be the same as upperdir */ + if (workdir == upperdir) + goto err; + + /* Workdir should not be subdir of upperdir and vice versa */ + if (lock_rename(workdir, upperdir) != NULL) + goto err_unlock; + + return 0; + +err_unlock: + unlock_rename(workdir, upperdir); +err: + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + return -EIO; +} -- cgit v1.2.3-70-g09d2 From 85fdee1eef1a9e48ad5716916677e0c5fbc781e3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 29 Sep 2017 10:21:21 +0300 Subject: ovl: fix regression caused by exclusive upper/work dir protection Enforcing exclusive ownership on upper/work dirs caused a docker regression: https://github.com/moby/moby/issues/34672. Euan spotted the regression and pointed to the offending commit. Vivek has brought the regression to my attention and provided this reproducer: Terminal 1: mount -t overlay -o workdir=work,lowerdir=lower,upperdir=upper none merged/ Terminal 2: unshare -m Terminal 1: umount merged mount -t overlay -o workdir=work,lowerdir=lower,upperdir=upper none merged/ mount: /root/overlay-testing/merged: none already mounted or mount point busy To fix the regression, I replaced the error with an alarming warning. With index feature enabled, mount does fail, but logs a suggestion to override exclusive dir protection by disabling index. Note that index=off mount does take the inuse locks, so a concurrent index=off will issue the warning and a concurrent index=on mount will fail. Documentation was updated to reflect this change. Fixes: 2cac0c00a6cd ("ovl: get exclusive ownership on upper/work dirs") Cc: # v4.13 Reported-by: Euan Kemp Reported-by: Vivek Goyal Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 5 ++++- fs/overlayfs/ovl_entry.h | 3 +++ fs/overlayfs/super.c | 27 +++++++++++++++++++-------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 36f528a7fdd6..8caa60734647 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -210,8 +210,11 @@ path as another overlay mount and it may use a lower layer path that is beneath or above the path of another overlay lower layer path. Using an upper layer path and/or a workdir path that are already used by -another overlay mount is not allowed and will fail with EBUSY. Using +another overlay mount is not allowed and may fail with EBUSY. Using partially overlapping paths is not allowed but will not fail with EBUSY. +If files are accessed from two overlayfs mounts which share or overlap the +upper layer and/or workdir path the behavior of the overlay is undefined, +though it will not result in a crash or deadlock. Mounting an overlay using an upper layer path, where the upper layer path was previously used by another mounted overlay in combination with a diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 878a750986dd..25d9b5adcd42 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -37,6 +37,9 @@ struct ovl_fs { bool noxattr; /* sb common to all layers */ struct super_block *same_sb; + /* Did we take the inuse lock? */ + bool upperdir_locked; + bool workdir_locked; }; /* private information held for every overlayfs dentry */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index fd5ea4facc62..092d150643c1 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,9 +211,10 @@ static void ovl_put_super(struct super_block *sb) dput(ufs->indexdir); dput(ufs->workdir); - ovl_inuse_unlock(ufs->workbasedir); + if (ufs->workdir_locked) + ovl_inuse_unlock(ufs->workbasedir); dput(ufs->workbasedir); - if (ufs->upper_mnt) + if (ufs->upper_mnt && ufs->upperdir_locked) ovl_inuse_unlock(ufs->upper_mnt->mnt_root); mntput(ufs->upper_mnt); for (i = 0; i < ufs->numlower; i++) @@ -881,9 +882,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) goto out_put_upperpath; err = -EBUSY; - if (!ovl_inuse_trylock(upperpath.dentry)) { - pr_err("overlayfs: upperdir is in-use by another mount\n"); + if (ovl_inuse_trylock(upperpath.dentry)) { + ufs->upperdir_locked = true; + } else if (ufs->config.index) { + pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); goto out_put_upperpath; + } else { + pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); } err = ovl_mount_dir(ufs->config.workdir, &workpath); @@ -901,9 +906,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } err = -EBUSY; - if (!ovl_inuse_trylock(workpath.dentry)) { - pr_err("overlayfs: workdir is in-use by another mount\n"); + if (ovl_inuse_trylock(workpath.dentry)) { + ufs->workdir_locked = true; + } else if (ufs->config.index) { + pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); goto out_put_workpath; + } else { + pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); } ufs->workbasedir = workpath.dentry; @@ -1156,11 +1165,13 @@ out_put_lowerpath: out_free_lowertmp: kfree(lowertmp); out_unlock_workdentry: - ovl_inuse_unlock(workpath.dentry); + if (ufs->workdir_locked) + ovl_inuse_unlock(workpath.dentry); out_put_workpath: path_put(&workpath); out_unlock_upperdentry: - ovl_inuse_unlock(upperpath.dentry); + if (ufs->upperdir_locked) + ovl_inuse_unlock(upperpath.dentry); out_put_upperpath: path_put(&upperpath); out_free_config: -- cgit v1.2.3-70-g09d2 From 3346a6a4e5ba8c040360f753b26938cec31a4bdc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Oct 2017 16:16:13 -0600 Subject: selftests: x86: sysret_ss_attrs doesn't build on a PIE build sysret_ss_attrs fails to compile leading x86 test run to fail on systems configured to build using PIE by default. Add -no-pie fix it. Relocation might still fail if relocated above 4G. For now this change fixes the build and runs x86 tests. tools/testing/selftests/x86$ make gcc -m64 -o .../tools/testing/selftests/x86/single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall single_step_syscall.c -lrt -ldl gcc -m64 -o .../tools/testing/selftests/x86/sysret_ss_attrs_64 -O2 -g -std=gnu99 -pthread -Wall sysret_ss_attrs.c thunks.S -lrt -ldl /usr/bin/ld: /tmp/ccS6pvIh.o: relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile with -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status Makefile:49: recipe for target '.../tools/testing/selftests/x86/sysret_ss_attrs_64' failed make: *** [.../tools/testing/selftests/x86/sysret_ss_attrs_64] Error 1 Suggested-by: Andy Lutomirski Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 97f187e2663f..0a74a20ca32b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -20,7 +20,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -- cgit v1.2.3-70-g09d2 From ea344f6a507f8ce92bf1e10f044c6cfc67e4d22b Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 2 Oct 2017 17:02:22 -0600 Subject: selftests: mqueue: fix regression in silencing output from RUN_TESTS Fix fix regression in silencing output from RUN_TESTS introduced by commit <8230b905a6780c6> selftests: mqueue: Use full path to run tests from Makefile Signed-off-by: Shuah Khan --- tools/testing/selftests/mqueue/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 0f5e347b068d..152823b6cb21 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk override define RUN_TESTS - $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" + @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef override define EMIT_TESTS -- cgit v1.2.3-70-g09d2 From e42eef4ba38806b18c4a74f0c276fb2e0b548173 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Oct 2017 12:28:18 +0200 Subject: KVM: add X86_LOCAL_APIC dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rework of the posted interrupt handling broke building without support for the local APIC: ERROR: "boot_cpu_physical_apicid" [arch/x86/kvm/kvm-intel.ko] undefined! That configuration is probably not particularly useful anyway, so we can avoid the randconfig failures by adding a Kconfig dependency. Fixes: 8b306e2f3c41 ("KVM: VMX: avoid double list add with VT-d posted interrupts") Signed-off-by: Arnd Bergmann Signed-off-by: Radim Krčmář --- arch/x86/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3ea624452f93..3c48bc8bf08c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM depends on HIGH_RES_TIMERS # for TASKSTATS/TASK_DELAY_ACCT: depends on NET && MULTIUSER + depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES -- cgit v1.2.3-70-g09d2 From ec572b9e81b1df79147c2e6f69458e65cf248598 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 29 Sep 2017 15:01:10 +0800 Subject: nfsd4: define nfsd4_secinfo_no_name_release() Commit 34b1744c91cc ("nfsd4: define ->op_release for compound ops") defined a couple ->op_release functions and run them if necessary. But there's a problem with that is that it reused nfsd4_secinfo_release() as the op_release of OP_SECINFO_NO_NAME, and caused a leak on struct nfsd4_secinfo_no_name in nfsd4_encode_secinfo_no_name(), because there's no .si_exp field in struct nfsd4_secinfo_no_name. I found this because I was unable to umount an ext4 partition after exporting it via NFS & run fsstress on the nfs mount. A simplified reproducer would be: # mount a local-fs device at /mnt/test, and export it via NFS with # fsid=0 export option (this is required) mount /dev/sda5 /mnt/test echo "/mnt/test *(rw,no_root_squash,fsid=0)" >> /etc/exports service nfs restart # locally mount the nfs export with all default, note that I have # nfsv4.1 configured as the default nfs version, because of the # fsid export option, v4 mount would fail and fall back to v3 mount localhost:/mnt/test /mnt/nfs # try to umount the underlying device, but got EBUSY umount /mnt/nfs service nfs stop umount /mnt/test <=== EBUSY here Fixed it by defining a separate nfsd4_secinfo_no_name_release() function as the op_release method of OP_SECINFO_NO_NAME that releases the correct nfsd4_secinfo_no_name structure. Fixes: 34b1744c91cc ("nfsd4: define ->op_release for compound ops") Signed-off-by: Eryu Guan Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3c69db7d4905..8487486ec496 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -927,6 +927,13 @@ nfsd4_secinfo_release(union nfsd4_op_u *u) exp_put(u->secinfo.si_exp); } +static void +nfsd4_secinfo_no_name_release(union nfsd4_op_u *u) +{ + if (u->secinfo_no_name.sin_exp) + exp_put(u->secinfo_no_name.sin_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2375,7 +2382,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, - .op_release = nfsd4_secinfo_release, + .op_release = nfsd4_secinfo_no_name_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, -- cgit v1.2.3-70-g09d2 From 41dcf197ad5373a7dd0a4b6572aec2e3ec6a0e49 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Mon, 2 Oct 2017 17:17:35 -0500 Subject: dm raid: fix incorrect status output at the end of a "recover" process There are three important fields that indicate the overall health and status of an array: dev_health, sync_ratio, and sync_action. They tell us the condition of the devices in the array, and the degree to which the array is synchronized. This commit fixes a condition that is reported incorrectly. When a member of the array is being rebuilt or a new device is added, the "recover" process is used to synchronize it with the rest of the array. When the process is complete, but the sync thread hasn't yet been reaped, it is possible for the state of MD to be: mddev->recovery = [ MD_RECOVERY_RUNNING MD_RECOVERY_RECOVER MD_RECOVERY_DONE ] curr_resync_completed = (but not MaxSector) and all rdevs to be In_sync. This causes the 'array_in_sync' output parameter that is passed to rs_get_progress() to be computed incorrectly and reported as 'false' -- or not in-sync. This in turn causes the dev_health status characters to be reported as all 'a', rather than the proper 'A'. This can cause erroneous output for several seconds at a time when tools will want to be checking the condition due to events that are raised at the end of a sync process. Fix this by properly calculating the 'array_in_sync' return parameter in rs_get_progress(). Also, remove an unnecessary intermediate 'recovery_cp' variable in rs_get_progress(). Signed-off-by: Jonathan Brassow Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-raid.txt | 1 + drivers/md/dm-raid.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 4a0a7469fdd7..32df07e29f68 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -344,3 +344,4 @@ Version History (wrong raid10_copies/raid10_format sequence) 1.11.1 Add raid4/5/6 journal write-back support via journal_mode option 1.12.1 fix for MD deadlock between mddev_suspend() and md_write_start() available +1.13.0 Fix dev_health status at end of "recover" (was 'a', now 'A') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5bfe285ea9d1..43094ea89e37 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3297,11 +3297,10 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, static sector_t rs_get_progress(struct raid_set *rs, sector_t resync_max_sectors, bool *array_in_sync) { - sector_t r, recovery_cp, curr_resync_completed; + sector_t r, curr_resync_completed; struct mddev *mddev = &rs->md; curr_resync_completed = mddev->curr_resync_completed ?: mddev->recovery_cp; - recovery_cp = mddev->recovery_cp; *array_in_sync = false; if (rs_is_raid0(rs)) { @@ -3330,9 +3329,11 @@ static sector_t rs_get_progress(struct raid_set *rs, } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) r = curr_resync_completed; else - r = recovery_cp; + r = mddev->recovery_cp; - if (r == MaxSector) { + if ((r == MaxSector) || + (test_bit(MD_RECOVERY_DONE, &mddev->recovery) && + (mddev->curr_resync_completed == resync_max_sectors))) { /* * Sync complete. */ @@ -3892,7 +3893,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 12, 1}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3-70-g09d2 From 1561b3266ebe029c487a95f92d1a58c03ded84a1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2017 15:53:47 +0300 Subject: selftests/net: rxtimestamp: Fix an off by one The > should be >= so that we don't write one element beyond the end of the array. Fixes: 16e781224198 ("selftests/net: Add a test to validate behavior of rx timestamps") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- tools/testing/selftests/networking/timestamping/rxtimestamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c index 00f286661dcd..dd4162fc0419 100644 --- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c @@ -341,7 +341,7 @@ int main(int argc, char **argv) return 0; case 'n': t = atoi(optarg); - if (t > ARRAY_SIZE(test_cases)) + if (t >= ARRAY_SIZE(test_cases)) error(1, 0, "Invalid test case: %d", t); all_tests = false; test_cases[t].enabled = true; -- cgit v1.2.3-70-g09d2 From 53ecde0b9126ff140abe3aefd7f0ec64d6fa36b0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Sep 2017 15:05:51 +1000 Subject: powerpc/powernv: Increase memory block size to 1GB on radix Memory hot unplug on PowerNV radix hosts is broken. Our memory block size is 256MB but since we map the linear region with very large pages, each pte we tear down maps 1GB. A hot unplug of one 256MB memory block results in 768MB of memory getting unintentionally unmapped. At this point we are likely to oops. Fix this by increasing our memory block size to 1GB on PowerNV radix hosts. Fixes: 4b5d62ca17a1 ("powerpc/mm: add radix__remove_section_mapping()") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 897aa1400eb8..bbb73aa0eb8f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -272,7 +272,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) { - return 256UL * 1024 * 1024; + /* + * We map the kernel linear region with 1GB large pages on radix. For + * memory hot unplug to work our memory block size must be at least + * this size. + */ + if (radix_enabled()) + return 1UL * 1024 * 1024 * 1024; + else + return 256UL * 1024 * 1024; } #endif -- cgit v1.2.3-70-g09d2 From baf41bc35f2bdb953da532645fd82009c2d12acf Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 13 Sep 2017 16:46:14 +0300 Subject: iwlwifi: mvm: do not print security error in monitor mode In monitor mode we are not expected to decrypt encrypted packets (not having the keys). Hence we are expected to get an unknown rx security status. Keeping the print in monitor mode causes a print for each captured packet flooding the dmesg. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 7 +++++++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/rx.c | 4 +++- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3bcaa82f59b2..a9ac872226fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1077,6 +1077,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm) mvm->vif_count = 0; mvm->rx_ba_sessions = 0; mvm->fwrt.dump.conf = FW_DBG_INVALID; + mvm->monitor_on = false; /* keep statistics ticking */ iwl_mvm_accu_radio_stats(mvm); @@ -1437,6 +1438,9 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, mvm->p2p_device_vif = vif; } + if (vif->type == NL80211_IFTYPE_MONITOR) + mvm->monitor_on = true; + iwl_mvm_vif_dbgfs_register(mvm, vif); goto out_unlock; @@ -1526,6 +1530,9 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw, iwl_mvm_power_update_mac(mvm); iwl_mvm_mac_ctxt_remove(mvm, vif); + if (vif->type == NL80211_IFTYPE_MONITOR) + mvm->monitor_on = false; + out_release: mutex_unlock(&mvm->mutex); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 83303bac0e4b..d75da37a79f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1015,6 +1015,9 @@ struct iwl_mvm { bool drop_bcn_ap_mode; struct delayed_work cs_tx_unblock_dwork; + + /* does a monitor vif exist (only one can exist hence bool) */ + bool monitor_on; #ifdef CONFIG_ACPI struct iwl_mvm_sar_profile sar_profiles[IWL_MVM_SAR_PROFILE_NUM]; struct iwl_mvm_geo_profile geo_profiles[IWL_NUM_GEO_PROFILES]; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index 184c749766f2..2d14a58cbdd7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -244,7 +244,9 @@ static u32 iwl_mvm_set_mac80211_rx_flag(struct iwl_mvm *mvm, return 0; default: - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); + /* Expected in monitor (not having the keys) */ + if (!mvm->monitor_on) + IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status); } return 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 77f77bc5d083..248699c2c4bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -277,7 +277,9 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_hdr *hdr, stats->flag |= RX_FLAG_DECRYPTED; return 0; default: - IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status); + /* Expected in monitor (not having the keys) */ + if (!mvm->monitor_on) + IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status); } return 0; -- cgit v1.2.3-70-g09d2 From 1efc3843a4ee1331bc20df685a79b47fa0f547d2 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Tue, 12 Sep 2017 12:32:25 +0300 Subject: iwlwifi: stop dbgc recording before stopping DMA Today we stop the device and the DMA without stopping the dbgc recording before. This causes host crashes when the DMA rate is high. Stop dbgc recording when clearing the fw debug configuration to fix this. Signed-off-by: Golan Ben Ami Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 7 ++----- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 6afc7a799892..f5dd7d83cd0a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1086,7 +1086,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { /* stop recording */ - iwl_set_bits_prph(fwrt->trans, MON_BUFF_SAMPLE_CTL, 0x100); + iwl_fw_dbg_stop_recording(fwrt); iwl_fw_error_dump(fwrt); @@ -1104,10 +1104,7 @@ void iwl_fw_error_dump_wk(struct work_struct *work) u32 in_sample = iwl_read_prph(fwrt->trans, DBGC_IN_SAMPLE); u32 out_ctrl = iwl_read_prph(fwrt->trans, DBGC_OUT_CTRL); - /* stop recording */ - iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, 0); - udelay(100); - iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, 0); + iwl_fw_dbg_stop_recording(fwrt); /* wait before we collect the data till the DBGC stop */ udelay(500); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 0f810ea89d31..9c889a32fe24 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -68,6 +68,8 @@ #include #include #include "runtime.h" +#include "iwl-prph.h" +#include "iwl-io.h" #include "file.h" #include "error-dump.h" @@ -194,8 +196,21 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt, iwl_fw_dbg_get_trigger((fwrt)->fw,\ (trig))) +static inline void iwl_fw_dbg_stop_recording(struct iwl_fw_runtime *fwrt) +{ + if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) { + iwl_set_bits_prph(fwrt->trans, MON_BUFF_SAMPLE_CTL, 0x100); + } else { + iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, 0); + udelay(100); + iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, 0); + } +} + static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt) { + iwl_fw_dbg_stop_recording(fwrt); + fwrt->dump.conf = FW_DBG_INVALID; } -- cgit v1.2.3-70-g09d2 From 1442a9a9f2e441b15393c2d89286303b103a57e8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 18 Sep 2017 14:39:26 +0300 Subject: iwlwifi: mvm: return -ENODATA when reading the temperature with the FW down It seems that libsensors treats -EIO as a special non-recoverable failure when it tries to read the temperature while the firmware is not running. To solve that, change the error code to a milder -ENODATA. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=196941 Fixes: c221daf219b1 ("iwlwifi: mvm: add registration to thermal zone") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 4d907f60bce9..1232f63278eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -631,7 +631,7 @@ static int iwl_mvm_tzone_get_temp(struct thermal_zone_device *device, if (!iwl_mvm_firmware_running(mvm) || mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) { - ret = -EIO; + ret = -ENODATA; goto out; } -- cgit v1.2.3-70-g09d2 From d8c73e455d7b973d1346bb5632b4a41819b090c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Sep 2017 11:03:50 +0200 Subject: iwlwifi: nvm-parse: unify channel flags printing The current channel flags printing is very strange and messy, in LAR we sometimes print the channel number and sometimes the frequency, in both we print a calculated value (whether ad-hoc is supported or not) etc. Unify all this to * print the channel number, not the frequency * remove the band print (2.4/5.2 GHz, it's obvious) * remove the calculated Ad-Hoc print Doing all of this also gets the length of the string to a max of 101 characters, which is below the max of 110 for tracing, and thus avoids the warning that came up on certain channels with certain flag combinations. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 98 +++++++++------------- 1 file changed, 39 insertions(+), 59 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 3014beef4873..35638404c24e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -206,8 +206,36 @@ enum iwl_nvm_channel_flags { NVM_CHANNEL_DC_HIGH = BIT(12), }; +static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level, + int chan, u16 flags) +{ #define CHECK_AND_PRINT_I(x) \ - ((ch_flags & NVM_CHANNEL_##x) ? # x " " : "") + ((flags & NVM_CHANNEL_##x) ? " " #x : "") + + if (!(flags & NVM_CHANNEL_VALID)) { + IWL_DEBUG_DEV(dev, level, "Ch. %d: 0x%x: No traffic\n", + chan, flags); + return; + } + + /* Note: already can print up to 101 characters, 110 is the limit! */ + IWL_DEBUG_DEV(dev, level, + "Ch. %d: 0x%x:%s%s%s%s%s%s%s%s%s%s%s%s\n", + chan, flags, + CHECK_AND_PRINT_I(VALID), + CHECK_AND_PRINT_I(IBSS), + CHECK_AND_PRINT_I(ACTIVE), + CHECK_AND_PRINT_I(RADAR), + CHECK_AND_PRINT_I(INDOOR_ONLY), + CHECK_AND_PRINT_I(GO_CONCURRENT), + CHECK_AND_PRINT_I(UNIFORM), + CHECK_AND_PRINT_I(20MHZ), + CHECK_AND_PRINT_I(40MHZ), + CHECK_AND_PRINT_I(80MHZ), + CHECK_AND_PRINT_I(160MHZ), + CHECK_AND_PRINT_I(DC_HIGH)); +#undef CHECK_AND_PRINT_I +} static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, u16 nvm_flags, const struct iwl_cfg *cfg) @@ -302,12 +330,8 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, * supported, hence we still want to add them to * the list of supported channels to cfg80211. */ - IWL_DEBUG_EEPROM(dev, - "Ch. %d Flags %x [%sGHz] - No traffic\n", - nvm_chan[ch_idx], - ch_flags, - (ch_idx >= num_2ghz_channels) ? - "5.2" : "2.4"); + iwl_nvm_print_channel_flags(dev, IWL_DL_EEPROM, + nvm_chan[ch_idx], ch_flags); continue; } @@ -337,27 +361,10 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, else channel->flags = 0; - IWL_DEBUG_EEPROM(dev, - "Ch. %d [%sGHz] flags 0x%x %s%s%s%s%s%s%s%s%s%s%s%s(%ddBm): Ad-Hoc %ssupported\n", - channel->hw_value, - is_5ghz ? "5.2" : "2.4", - ch_flags, - CHECK_AND_PRINT_I(VALID), - CHECK_AND_PRINT_I(IBSS), - CHECK_AND_PRINT_I(ACTIVE), - CHECK_AND_PRINT_I(RADAR), - CHECK_AND_PRINT_I(INDOOR_ONLY), - CHECK_AND_PRINT_I(GO_CONCURRENT), - CHECK_AND_PRINT_I(UNIFORM), - CHECK_AND_PRINT_I(20MHZ), - CHECK_AND_PRINT_I(40MHZ), - CHECK_AND_PRINT_I(80MHZ), - CHECK_AND_PRINT_I(160MHZ), - CHECK_AND_PRINT_I(DC_HIGH), - channel->max_power, - ((ch_flags & NVM_CHANNEL_IBSS) && - !(ch_flags & NVM_CHANNEL_RADAR)) - ? "" : "not "); + iwl_nvm_print_channel_flags(dev, IWL_DL_EEPROM, + channel->hw_value, ch_flags); + IWL_DEBUG_EEPROM(dev, "Ch. %d: %ddBm\n", + channel->hw_value, channel->max_power); } return n_channels; @@ -873,12 +880,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, new_rule = false; if (!(ch_flags & NVM_CHANNEL_VALID)) { - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d Flags %x [%sGHz] - No traffic\n", - nvm_chan[ch_idx], - ch_flags, - (ch_idx >= NUM_2GHZ_CHANNELS) ? - "5.2" : "2.4"); + iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, + nvm_chan[ch_idx], ch_flags); continue; } @@ -914,31 +917,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, prev_center_freq = center_freq; prev_reg_rule_flags = reg_rule_flags; - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s%s%s%s(0x%02x)\n", - center_freq, - band == NL80211_BAND_5GHZ ? "5.2" : "2.4", - CHECK_AND_PRINT_I(VALID), - CHECK_AND_PRINT_I(IBSS), - CHECK_AND_PRINT_I(ACTIVE), - CHECK_AND_PRINT_I(RADAR), - CHECK_AND_PRINT_I(INDOOR_ONLY), - CHECK_AND_PRINT_I(GO_CONCURRENT), - CHECK_AND_PRINT_I(UNIFORM), - CHECK_AND_PRINT_I(20MHZ), - CHECK_AND_PRINT_I(40MHZ), - CHECK_AND_PRINT_I(80MHZ), - CHECK_AND_PRINT_I(160MHZ), - CHECK_AND_PRINT_I(DC_HIGH), - ch_flags); - IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] reg_flags 0x%x: %s\n", - center_freq, - band == NL80211_BAND_5GHZ ? "5.2" : "2.4", - reg_rule_flags, - ((ch_flags & NVM_CHANNEL_ACTIVE) && - !(ch_flags & NVM_CHANNEL_RADAR)) - ? "Ad-Hoc" : ""); + iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, + nvm_chan[ch_idx], ch_flags); } regd->n_reg_rules = valid_rules; -- cgit v1.2.3-70-g09d2 From 44fd09dad5d2b78efbabbbbf623774e561e36cca Mon Sep 17 00:00:00 2001 From: Chaya Rachel Ivgi Date: Mon, 4 Sep 2017 14:40:06 +0300 Subject: iwlwifi: nvm: set the correct offsets to 3168 series The driver currently handles two NVM formats, one for 7000 family and below, and one for 8000 family and above. The 3168 series uses something in between, so currently the driver uses incorrect offsets for it. Fix the incorrect offsets. Fixes: c4836b056d83 ("iwlwifi: Add PCI IDs for the new 3168 series") Signed-off-by: Chaya Rachel Ivgi Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/7000.c | 1 + drivers/net/wireless/intel/iwlwifi/cfg/8000.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 2 +- drivers/net/wireless/intel/iwlwifi/cfg/a000.c | 2 +- .../net/wireless/intel/iwlwifi/fw/api/nvm-reg.h | 2 ++ drivers/net/wireless/intel/iwlwifi/iwl-config.h | 16 +++++++-- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 39 +++++++++++++--------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 21 ++++++++---- 9 files changed, 59 insertions(+), 28 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/7000.c b/drivers/net/wireless/intel/iwlwifi/cfg/7000.c index 45e2efc70d19..ce741beec1fc 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/7000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/7000.c @@ -309,6 +309,7 @@ const struct iwl_cfg iwl3168_2ac_cfg = { .nvm_calib_ver = IWL3168_TX_POWER_VERSION, .pwr_tx_backoffs = iwl7265_pwr_tx_backoffs, .dccm_len = IWL7265_DCCM_LEN, + .nvm_type = IWL_NVM_SDP, }; const struct iwl_cfg iwl7265_2ac_cfg = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c index 2e6c52664cee..c2a5936ccede 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c @@ -164,7 +164,7 @@ static const struct iwl_tt_params iwl8000_tt_params = { .default_nvm_file_C_step = DEFAULT_NVM_FILE_FAMILY_8000C, \ .thermal_params = &iwl8000_tt_params, \ .apmg_not_supported = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true #define IWL_DEVICE_8000 \ diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index 2babe0a1f18b..e8b5ff42f5a8 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -148,7 +148,7 @@ static const struct iwl_tt_params iwl9000_tt_params = { .vht_mu_mimo_supported = true, \ .mac_addr_from_csr = true, \ .rf_id = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true const struct iwl_cfg iwl9160_2ac_cfg = { diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c index 76ba1f8bc72f..a440140ed8dd 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c @@ -133,7 +133,7 @@ static const struct iwl_ht_params iwl_a000_ht_params = { .use_tfh = true, \ .rf_id = true, \ .gen2 = true, \ - .ext_nvm = true, \ + .nvm_type = IWL_NVM_EXT, \ .dbgc_supported = true const struct iwl_cfg iwla000_2ac_cfg_hr = { diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h index 00bc7a25dece..3fd07bc80f54 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h @@ -108,6 +108,7 @@ enum iwl_nvm_access_target { * @NVM_SECTION_TYPE_REGULATORY: regulatory section * @NVM_SECTION_TYPE_CALIBRATION: calibration section * @NVM_SECTION_TYPE_PRODUCTION: production section + * @NVM_SECTION_TYPE_REGULATORY_SDP: regulatory section used by 3168 series * @NVM_SECTION_TYPE_MAC_OVERRIDE: MAC override section * @NVM_SECTION_TYPE_PHY_SKU: PHY SKU section * @NVM_MAX_NUM_SECTIONS: number of sections @@ -117,6 +118,7 @@ enum iwl_nvm_section_type { NVM_SECTION_TYPE_REGULATORY = 3, NVM_SECTION_TYPE_CALIBRATION = 4, NVM_SECTION_TYPE_PRODUCTION = 5, + NVM_SECTION_TYPE_REGULATORY_SDP = 8, NVM_SECTION_TYPE_MAC_OVERRIDE = 11, NVM_SECTION_TYPE_PHY_SKU = 12, NVM_MAX_NUM_SECTIONS = 13, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 3e057b539d5b..71cb1ecde0f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -108,6 +108,18 @@ enum iwl_led_mode { IWL_LED_DISABLE, }; +/** + * enum iwl_nvm_type - nvm formats + * @IWL_NVM: the regular format + * @IWL_NVM_EXT: extended NVM format + * @IWL_NVM_SDP: NVM format used by 3168 series + */ +enum iwl_nvm_type { + IWL_NVM, + IWL_NVM_EXT, + IWL_NVM_SDP, +}; + /* * This is the threshold value of plcp error rate per 100mSecs. It is * used to set and check for the validity of plcp_delta. @@ -320,7 +332,7 @@ struct iwl_pwr_tx_backoff { * @integrated: discrete or integrated * @gen2: a000 and on transport operation * @cdb: CDB support - * @ext_nvm: extended NVM format + * @nvm_type: see &enum iwl_nvm_type * * We enable the driver to be backward compatible wrt. hardware features. * API differences in uCode shouldn't be handled here but through TLVs @@ -342,6 +354,7 @@ struct iwl_cfg { const struct iwl_tt_params *thermal_params; enum iwl_device_family device_family; enum iwl_led_mode led_mode; + enum iwl_nvm_type nvm_type; u32 max_data_size; u32 max_inst_size; netdev_features_t features; @@ -369,7 +382,6 @@ struct iwl_cfg { use_tfh:1, gen2:1, cdb:1, - ext_nvm:1, dbgc_supported:1; u8 valid_tx_ant; u8 valid_rx_ant; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 35638404c24e..c3a5d8ccc95e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -77,7 +77,7 @@ #include "iwl-csr.h" /* NVM offsets (in words) definitions */ -enum wkp_nvm_offsets { +enum nvm_offsets { /* NVM HW-Section offset (in words) definitions */ SUBSYSTEM_ID = 0x0A, HW_ADDR = 0x15, @@ -92,7 +92,10 @@ enum wkp_nvm_offsets { /* NVM calibration section offset (in words) definitions */ NVM_CALIB_SECTION = 0x2B8, - XTAL_CALIB = 0x316 - NVM_CALIB_SECTION + XTAL_CALIB = 0x316 - NVM_CALIB_SECTION, + + /* NVM REGULATORY -Section offset (in words) definitions */ + NVM_CHANNELS_SDP = 0, }; enum ext_nvm_offsets { @@ -243,7 +246,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, u32 flags = IEEE80211_CHAN_NO_HT40; u32 last_5ghz_ht = LAST_5GHZ_HT; - if (cfg->ext_nvm) + if (cfg->nvm_type == IWL_NVM_EXT) last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000; if (!is_5ghz && (nvm_flags & NVM_CHANNEL_40MHZ)) { @@ -296,7 +299,7 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, num_2ghz_channels; const u8 *nvm_chan; - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { num_of_ch = IWL_NUM_CHANNELS; nvm_chan = &iwl_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS; @@ -491,7 +494,7 @@ IWL_EXPORT_SYMBOL(iwl_init_sbands); static int iwl_get_sku(const struct iwl_cfg *cfg, const __le16 *nvm_sw, const __le16 *phy_sku) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + SKU); return le32_to_cpup((__le32 *)(phy_sku + SKU_FAMILY_8000)); @@ -499,7 +502,7 @@ static int iwl_get_sku(const struct iwl_cfg *cfg, const __le16 *nvm_sw, static int iwl_get_nvm_version(const struct iwl_cfg *cfg, const __le16 *nvm_sw) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + NVM_VERSION); else return le32_to_cpup((__le32 *)(nvm_sw + @@ -509,7 +512,7 @@ static int iwl_get_nvm_version(const struct iwl_cfg *cfg, const __le16 *nvm_sw) static int iwl_get_radio_cfg(const struct iwl_cfg *cfg, const __le16 *nvm_sw, const __le16 *phy_sku) { - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + RADIO_CFG); return le32_to_cpup((__le32 *)(phy_sku + RADIO_CFG_FAMILY_EXT_NVM)); @@ -520,7 +523,7 @@ static int iwl_get_n_hw_addrs(const struct iwl_cfg *cfg, const __le16 *nvm_sw) { int n_hw_addr; - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) return le16_to_cpup(nvm_sw + N_HW_ADDRS); n_hw_addr = le32_to_cpup((__le32 *)(nvm_sw + N_HW_ADDRS_FAMILY_8000)); @@ -532,7 +535,7 @@ static void iwl_set_radio_cfg(const struct iwl_cfg *cfg, struct iwl_nvm_data *data, u32 radio_cfg) { - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { data->radio_cfg_type = NVM_RF_CFG_TYPE_MSK(radio_cfg); data->radio_cfg_step = NVM_RF_CFG_STEP_MSK(radio_cfg); data->radio_cfg_dash = NVM_RF_CFG_DASH_MSK(radio_cfg); @@ -641,7 +644,7 @@ static int iwl_set_hw_address(struct iwl_trans *trans, { if (cfg->mac_addr_from_csr) { iwl_set_hw_address_from_csr(trans, data); - } else if (!cfg->ext_nvm) { + } else if (cfg->nvm_type != IWL_NVM_EXT) { const u8 *hw_addr = (const u8 *)(nvm_hw + HW_ADDR); /* The byte order is little endian 16 bit, meaning 214365 */ @@ -713,7 +716,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, u16 lar_config; const __le16 *ch_section; - if (!cfg->ext_nvm) + if (cfg->nvm_type != IWL_NVM_EXT) data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * IWL_NUM_CHANNELS, @@ -747,7 +750,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw); - if (!cfg->ext_nvm) { + if (cfg->nvm_type != IWL_NVM_EXT) { /* Checking for required sections */ if (!nvm_calib) { IWL_ERR(trans, @@ -755,11 +758,15 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, kfree(data); return NULL; } + + ch_section = cfg->nvm_type == IWL_NVM_SDP ? + ®ulatory[NVM_CHANNELS_SDP] : + &nvm_sw[NVM_CHANNELS]; + /* in family 8000 Xtal calibration values moved to OTP */ data->xtal_calib[0] = *(nvm_calib + XTAL_CALIB); data->xtal_calib[1] = *(nvm_calib + XTAL_CALIB + 1); lar_enabled = true; - ch_section = &nvm_sw[NVM_CHANNELS]; } else { u16 lar_offset = data->nvm_version < 0xE39 ? NVM_LAR_OFFSET_OLD : @@ -793,7 +800,7 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan, u32 flags = NL80211_RRF_NO_HT40; u32 last_5ghz_ht = LAST_5GHZ_HT; - if (cfg->ext_nvm) + if (cfg->nvm_type == IWL_NVM_EXT) last_5ghz_ht = LAST_5GHZ_HT_FAMILY_8000; if (ch_idx < NUM_2GHZ_CHANNELS && @@ -841,7 +848,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int ch_idx; u16 ch_flags; u32 reg_rule_flags, prev_reg_rule_flags = 0; - const u8 *nvm_chan = cfg->ext_nvm ? + const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ? iwl_ext_nvm_channels : iwl_nvm_channels; struct ieee80211_regdomain *regd; int size_of_regd; @@ -850,7 +857,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int center_freq, prev_center_freq = 0; int valid_rules = 0; bool new_rule; - int max_num_ch = cfg->ext_nvm ? + int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ? IWL_NUM_CHANNELS_EXT : IWL_NUM_CHANNELS; if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES)) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index d75da37a79f3..949e63418299 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1162,7 +1162,7 @@ static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) * Enable LAR only if it is supported by the FW (TLV) && * enabled in the NVM */ - if (mvm->cfg->ext_nvm) + if (mvm->cfg->nvm_type == IWL_NVM_EXT) return nvm_lar && tlv_lar; else return tlv_lar; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 422aa6be9932..fb25b6f29323 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -295,18 +295,24 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) const __be16 *hw; const __le16 *sw, *calib, *regulatory, *mac_override, *phy_sku; bool lar_enabled; + int regulatory_type; /* Checking for required sections */ - if (!mvm->trans->cfg->ext_nvm) { + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || !mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data) { IWL_ERR(mvm, "Can't parse empty OTP/NVM sections\n"); return NULL; } } else { + if (mvm->trans->cfg->nvm_type == IWL_NVM_SDP) + regulatory_type = NVM_SECTION_TYPE_REGULATORY_SDP; + else + regulatory_type = NVM_SECTION_TYPE_REGULATORY; + /* SW and REGULATORY sections are mandatory */ if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || - !mvm->nvm_sections[NVM_SECTION_TYPE_REGULATORY].data) { + !mvm->nvm_sections[regulatory_type].data) { IWL_ERR(mvm, "Can't parse empty family 8000 OTP/NVM sections\n"); return NULL; @@ -330,11 +336,14 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) hw = (const __be16 *)sections[mvm->cfg->nvm_hw_section_num].data; sw = (const __le16 *)sections[NVM_SECTION_TYPE_SW].data; calib = (const __le16 *)sections[NVM_SECTION_TYPE_CALIBRATION].data; - regulatory = (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data; mac_override = (const __le16 *)sections[NVM_SECTION_TYPE_MAC_OVERRIDE].data; phy_sku = (const __le16 *)sections[NVM_SECTION_TYPE_PHY_SKU].data; + regulatory = mvm->trans->cfg->nvm_type == IWL_NVM_SDP ? + (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY_SDP].data : + (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data; + lar_enabled = !iwlwifi_mod_params.lar_disable && fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT); @@ -394,7 +403,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) IWL_DEBUG_EEPROM(mvm->trans->dev, "Read from external NVM\n"); /* Maximal size depends on NVM version */ - if (!mvm->trans->cfg->ext_nvm) + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) max_section_size = IWL_MAX_NVM_SECTION_SIZE; else max_section_size = IWL_MAX_EXT_NVM_SECTION_SIZE; @@ -465,7 +474,7 @@ int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm) break; } - if (!mvm->trans->cfg->ext_nvm) { + if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { section_size = 2 * NVM_WORD1_LEN(le16_to_cpu(file_sec->word1)); section_id = NVM_WORD2_ID(le16_to_cpu(file_sec->word2)); @@ -740,7 +749,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) struct ieee80211_regdomain *regd; char mcc[3]; - if (mvm->cfg->ext_nvm) { + if (mvm->cfg->nvm_type == IWL_NVM_EXT) { tlv_lar = fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_LAR_SUPPORT); nvm_lar = mvm->nvm_data->lar_enabled; -- cgit v1.2.3-70-g09d2 From 265e60a170d0a0ecfc2d20490134ed2c48dd45ab Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 17 Aug 2017 20:42:26 +1000 Subject: powerpc/64s: Use emergency stack for kernel TM Bad Thing program checks When using transactional memory (TM), the CPU can be in one of six states as far as TM is concerned, encoded in the Machine State Register (MSR). Certain state transitions are illegal and if attempted trigger a "TM Bad Thing" type program check exception. If we ever hit one of these exceptions it's treated as a bug, ie. we oops, and kill the process and/or panic, depending on configuration. One case where we can trigger a TM Bad Thing, is when returning to userspace after a system call or interrupt, using RFID. When this happens the CPU first restores the user register state, in particular r1 (the stack pointer) and then attempts to update the MSR. However the MSR update is not allowed and so we take the program check with the user register state, but the kernel MSR. This tricks the exception entry code into thinking we have a bad kernel stack pointer, because the MSR says we're coming from the kernel, but r1 is pointing to userspace. To avoid this we instead always switch to the emergency stack if we take a TM Bad Thing from the kernel. That way none of the user register values are used, other than for printing in the oops message. This is the fix for CVE-2017-1000255. Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Cyril Bur [mpe: Rewrite change log & comments, tweak asm slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 48da0f5d2f7f..b82586c53560 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -734,7 +734,29 @@ EXC_REAL(program_check, 0x700, 0x100) EXC_VIRT(program_check, 0x4700, 0x100, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) - EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + /* + * It's possible to receive a TM Bad Thing type program check with + * userspace register values (in particular r1), but with SRR1 reporting + * that we came from the kernel. Normally that would confuse the bad + * stack logic, and we would report a bad kernel stack pointer. Instead + * we switch to the emergency stack if we're taking a TM Bad Thing from + * the kernel. + */ + li r10,MSR_PR /* Build a mask of MSR_PR .. */ + oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */ + and r10,r10,r12 /* Mask SRR1 with that. */ + srdi r10,r10,8 /* Shift it so we can compare */ + cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */ + bne 1f /* If != go to normal path. */ + + /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */ + andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */ + /* 3 in EXCEPTION_PROLOG_COMMON */ + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + b 3f /* Jump into the macro !! */ +1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD -- cgit v1.2.3-70-g09d2 From 044215d145a7a8a60ffa8fdc859d110a795fa6ea Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Tue, 22 Aug 2017 17:20:09 -0400 Subject: powerpc/tm: Fix illegal TM state in signal handler Currently it's possible that on returning from the signal handler through the restore_tm_sigcontexts() code path (e.g. from a signal caught due to a `trap` instruction executed in the middle of an HTM block, or a deliberately constructed sigframe) an illegal TM state (like TS=10 TM=0, i.e. "T0") is set in SRR1 and when `rfid` sets implicitly the MSR register from SRR1 register on return to userspace it causes a TM Bad Thing exception. That illegal state can be set (a) by a malicious user that disables the TM bit by tweaking the bits in uc_mcontext before returning from the signal handler or (b) by a sufficient number of context switches occurring such that the load_tm counter overflows and TM is disabled whilst in the signal handler. This commit fixes the illegal TM state by ensuring that TM bit is always enabled before we return from restore_tm_sigcontexts(). A small comment correction is made as well. Fixes: 5d176f751ee3 ("powerpc: tm: Enable transactional memory (TM) lazily for userspace") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Gustavo Romero Signed-off-by: Breno Leitao Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/signal_64.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c83c115858c1..b2c002993d78 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, if (MSR_TM_RESV(msr)) return -EINVAL; - /* pull in MSR TM from user context */ + /* pull in MSR TS bits from user context */ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + /* + * Ensure that TM is enabled in regs->msr before we leave the signal + * handler. It could be the case that (a) user disabled the TM bit + * through the manipulation of the MSR bits in uc_mcontext or (b) the + * TM bit was disabled because a sufficient number of context switches + * happened whilst in the signal handler and load_tm overflowed, + * disabling the TM bit. In either case we can end up with an illegal + * TM state leading to a TM Bad Thing when we return to userspace. + */ + regs->msr |= MSR_TM; + /* pull in MSR LE from user context */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); -- cgit v1.2.3-70-g09d2 From e9516c0813aeb89ebd19ec0ed39fbfcd78b6ef3a Mon Sep 17 00:00:00 2001 From: Mark Santaniello Date: Fri, 6 Oct 2017 01:07:22 -0700 Subject: perf script: Add missing separator for "-F ip,brstack" (and brstackoff) Prior to commit 55b9b50811ca ("perf script: Support -F brstack,dso and brstacksym,dso"), we were printing a space before the brstack data. It seems that this space was important. Without it, parsing is difficult. Very sorry for the mistake. Notice here how the "ip" and "brstack" run together: $ perf script -F ip,brstack | head -n 1 22e18c40x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 After this diff, sanity is restored: $ perf script -F ip,brstack | head -n 1 22e18c4 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 0x22e195d/0x22e1990/P/-/-/0 0x22e18e9/0x22e1943/P/-/-/0 0x22e1a69/0x22e18c0/P/-/-/0 0x22e19f7/0x22e1a20/P/-/-/0 0x22e1910/0x22e19ee/P/-/-/0 0x22e19e2/0x22e190b/P/-/-/0 0x22e19a1/0x22e19d0/P/-/-/0 Signed-off-by: Mark Santaniello Cc: Alexander Shishkin Cc: Peter Zijlstra Cc: 4.13+ Fixes: 55b9b50811ca ("perf script: Support -F brstack,dso and brstacksym,dso") Link: http://lkml.kernel.org/r/20171006080722.3442046-1-marksan@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3d4c3b5e1868..0c977b6e0f8b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -586,7 +586,7 @@ static void print_sample_brstack(struct perf_sample *sample, thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); } - printf("0x%"PRIx64, from); + printf(" 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { printf("("); map__fprintf_dsoname(alf.map, stdout); @@ -681,7 +681,7 @@ static void print_sample_brstackoff(struct perf_sample *sample, if (alt.map && !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); - printf("0x%"PRIx64, from); + printf(" 0x%"PRIx64, from); if (PRINT_FIELD(DSO)) { printf("("); map__fprintf_dsoname(alf.map, stdout); -- cgit v1.2.3-70-g09d2 From 5f9bfe0ef622a7bb9707c22ceb4b6451e1e2cb7b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Oct 2017 17:18:27 +0200 Subject: netfilter: nf_tables: do not dump chain counters if not enabled Chain counters are only enabled on demand since 9f08ea848117, skip them when dumping them via netlink. Fixes: 9f08ea848117 ("netfilter: nf_tables: keep chain counters away from hot path") Reported-by: Johny Mattsson Tested-by: Johny Mattsson Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 34adedcb239e..64e1ee091225 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1048,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + if (basechain->stats && nft_dump_stats(skb, basechain->stats)) goto nla_put_failure; } -- cgit v1.2.3-70-g09d2 From e466af75c074e76107ae1cd5a2823e9c61894ffb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 02:50:07 -0700 Subject: netfilter: x_tables: avoid stack-out-of-bounds read in xt_copy_counters_from_user syzkaller reports an out of bound read in strlcpy(), triggered by xt_copy_counters_from_user() Fix this by using memcpy(), then forcing a zero byte at the last position of the destination, as Florian did for the non COMPAT code. Fixes: d7591f0c41ce ("netfilter: x_tables: introduce and use xt_copy_counters_from_user") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c83a3b5e1c6c..d8571f414208 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); - strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; user += sizeof(compat_tmp); } else @@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(info, user, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - info->name[sizeof(info->name) - 1] = '\0'; user += sizeof(*info); } + info->name[sizeof(info->name) - 1] = '\0'; size = sizeof(struct xt_counters); size *= info->num_counters; -- cgit v1.2.3-70-g09d2 From c0d8832e78cbfd4a64b7112e34920af4b0b0e60e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 6 Oct 2017 14:16:52 +0100 Subject: arm64: Ensure the instruction emulation is ready for userspace We trap and emulate some instructions (e.g, mrs, deprecated instructions) for the userspace. However the handlers for these are registered as late_initcalls and the userspace could be up and running from the initramfs by that time (with populate_rootfs, which is a rootfs_initcall()). This could cause problems for the early applications ending up in failure like : [ 11.152061] modprobe[93]: undefined instruction: pc=0000ffff8ca48ff4 This patch promotes the specific calls to core_initcalls, which are guaranteed to be completed before we hit userspace. Cc: stable@vger.kernel.org Cc: Dave Martin Cc: Matthias Brugger Cc: James Morse Reported-by: Matwey V. Kornilov Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index f0e6d717885b..d06fbe4cd38d 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -649,4 +649,4 @@ static int __init armv8_deprecated_init(void) return 0; } -late_initcall(armv8_deprecated_init); +core_initcall(armv8_deprecated_init); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cd52d365d1f0..21e2c95d24e7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1307,4 +1307,4 @@ static int __init enable_mrs_emulation(void) return 0; } -late_initcall(enable_mrs_emulation); +core_initcall(enable_mrs_emulation); -- cgit v1.2.3-70-g09d2 From ae2e972dae3cea795e9f8f94eb1601213c2d49f0 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 6 Oct 2017 14:16:53 +0100 Subject: arm64: Ensure fpsimd support is ready before userspace is active We register the pm/hotplug callbacks for FPSIMD as late_initcall, which happens after the userspace is active (from initramfs via populate_rootfs, a rootfs_initcall). Make sure we are ready even before the userspace could potentially use it, by promoting to a core_initcall. Cc: Will Deacon Cc: Mark Rutland Cc: Dave Martin Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f444f374bd7b..5d547deb6996 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -444,4 +444,4 @@ static int __init fpsimd_init(void) return 0; } -late_initcall(fpsimd_init); +core_initcall(fpsimd_init); -- cgit v1.2.3-70-g09d2 From ab8eb7db1dcc03e7ae9eb379884fd701af6b1a38 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 22 Sep 2017 19:49:11 +0300 Subject: ARC: [plat-hsdk]: Add reset controller node to manage ethernet reset DW ethernet controller on HSDK hangs sometimes after SW reset, so add reset node to make possible to reset DW ethernet controller HW. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 9 +++++++++ arch/arc/configs/hsdk_defconfig | 1 + 2 files changed, 10 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index b922f3faf554..8adde1b492f1 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -12,6 +12,7 @@ /dts-v1/; #include +#include / { model = "snps,hsdk"; @@ -102,6 +103,12 @@ ranges = <0x00000000 0xf0000000 0x10000000>; + cgu_rst: reset-controller@8a0 { + compatible = "snps,hsdk-reset"; + #reset-cells = <1>; + reg = <0x8A0 0x4>, <0xFF0 0x4>; + }; + core_clk: core-clk@0 { compatible = "snps,hsdk-core-pll-clock"; reg = <0x00 0x10>, <0x14B8 0x4>; @@ -158,6 +165,8 @@ clocks = <&gmacclk>; clock-names = "stmmaceth"; phy-handle = <&phy0>; + resets = <&cgu_rst HSDK_ETH_RESET>; + reset-names = "stmmaceth"; mdio { #address-cells = <1>; diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..15f0f6b5fec1 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -63,6 +63,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set +CONFIG_RESET_HSDK=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y -- cgit v1.2.3-70-g09d2 From 6151b8b37b119e8e3a8401b080d532520c95faf4 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Oct 2017 17:05:49 +0200 Subject: ppp: fix race in ppp device destruction ppp_release() tries to ensure that netdevices are unregistered before decrementing the unit refcount and running ppp_destroy_interface(). This is all fine as long as the the device is unregistered by ppp_release(): the unregister_netdevice() call, followed by rtnl_unlock(), guarantee that the unregistration process completes before rtnl_unlock() returns. However, the device may be unregistered by other means (like ppp_nl_dellink()). If this happens right before ppp_release() calling rtnl_lock(), then ppp_release() has to wait for the concurrent unregistration code to release the lock. But rtnl_unlock() releases the lock before completing the device unregistration process. This allows ppp_release() to proceed and eventually call ppp_destroy_interface() before the unregistration process completes. Calling free_netdev() on this partially unregistered device will BUG(): ------------[ cut here ]------------ kernel BUG at net/core/dev.c:8141! invalid opcode: 0000 [#1] SMP CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 Call Trace: ppp_destroy_interface+0xd8/0xe0 [ppp_generic] ppp_disconnect_channel+0xda/0x110 [ppp_generic] ppp_unregister_channel+0x5e/0x110 [ppp_generic] pppox_unbind_sock+0x23/0x30 [pppox] pppoe_connect+0x130/0x440 [pppoe] SYSC_connect+0x98/0x110 ? do_fcntl+0x2c0/0x5d0 SyS_connect+0xe/0x10 entry_SYSCALL_64_fastpath+0x1a/0xa5 RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88 ---[ end trace ed294ff0cc40eeff ]--- We could set the ->needs_free_netdev flag on PPP devices and move the ppp_destroy_interface() logic in the ->priv_destructor() callback. But that'd be quite intrusive as we'd first need to unlink from the other channels and units that depend on the device (the ones that used the PPPIOCCONNECT and PPPIOCATTACH ioctls). Instead, we can just let the netdevice hold a reference on its ppp_file. This reference is dropped in ->priv_destructor(), at the very end of the unregistration process, so that neither ppp_release() nor ppp_disconnect_channel() can call ppp_destroy_interface() in the interim. Reported-by: Beniamino Galvani Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index c3f77e3b7819..e365866600ba 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1339,7 +1339,17 @@ ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) static int ppp_dev_init(struct net_device *dev) { + struct ppp *ppp; + netdev_lockdep_set_classes(dev); + + ppp = netdev_priv(dev); + /* Let the netdevice take a reference on the ppp file. This ensures + * that ppp_destroy_interface() won't run before the device gets + * unregistered. + */ + atomic_inc(&ppp->file.refcnt); + return 0; } @@ -1362,6 +1372,15 @@ static void ppp_dev_uninit(struct net_device *dev) wake_up_interruptible(&ppp->file.rwait); } +static void ppp_dev_priv_destructor(struct net_device *dev) +{ + struct ppp *ppp; + + ppp = netdev_priv(dev); + if (atomic_dec_and_test(&ppp->file.refcnt)) + ppp_destroy_interface(ppp); +} + static const struct net_device_ops ppp_netdev_ops = { .ndo_init = ppp_dev_init, .ndo_uninit = ppp_dev_uninit, @@ -1387,6 +1406,7 @@ static void ppp_setup(struct net_device *dev) dev->tx_queue_len = 3; dev->type = ARPHRD_PPP; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->priv_destructor = ppp_dev_priv_destructor; netif_keep_dst(dev); } -- cgit v1.2.3-70-g09d2 From fbce4d97fd4333bcffd00a73b9d98412be630332 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 4 Oct 2017 10:28:56 +0200 Subject: scsi: fixup kernel warning during rmmod() Calling rmmod() on a FC driver will results in warnings like WARNING: CPU: 60 PID: 14640 at fs/sysfs/group.c:237 device_del+0x54/0x240() sysfs group ffffffff81eff140 not found for kobject '3:0:0:3' The problem here is that during scsi_remove_target() we will iterate over all devices, but fail to remove any of those as the call to scsi_device_get() fails the check to module_is_live(). Hence the devices will not be removed at this point, but all intermediate structures like fc rport etc. will be. Later on during scsi_forget_host() the devices are removed for real, but the device parent is already removed and causes this warning. Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Kyle Fortin Tested-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index bf53356f41f0..f796bd61f3f0 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1376,13 +1376,19 @@ static void __scsi_remove_target(struct scsi_target *starget) spin_lock_irqsave(shost->host_lock, flags); restart: list_for_each_entry(sdev, &shost->__devices, siblings) { + /* + * We cannot call scsi_device_get() here, as + * we might've been called from rmmod() causing + * scsi_device_get() to fail the module_is_live() + * check. + */ if (sdev->channel != starget->channel || sdev->id != starget->id || - scsi_device_get(sdev)) + !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); scsi_remove_device(sdev); - scsi_device_put(sdev); + put_device(&sdev->sdev_gendev); spin_lock_irqsave(shost->host_lock, flags); goto restart; } -- cgit v1.2.3-70-g09d2 From d1b3f51ee1eab3a6db1b09a60e61280c48eb0b01 Mon Sep 17 00:00:00 2001 From: Satish Kharat Date: Thu, 5 Oct 2017 16:41:21 -0700 Subject: scsi: libfc: fix a deadlock in fc_rport_work In places like fc_rport_recv_plogi_req and fcoe_ctlr_vn_add we always take the lport disc_mutex lock before the rports mutex (rp_mutex) lock. Gaurding list_del_rcu(&rdata->peers) with disc.disc_mutex in fc_rport_work is correct but the rp_mutex lock can and should to be dropped before taking that lock else results in a deadlock. Signed-off-by: Satish Kharat Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_rport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 520325867e2b..31d31aad3de1 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -383,11 +383,11 @@ static void fc_rport_work(struct work_struct *work) fc_rport_enter_flogi(rdata); mutex_unlock(&rdata->rp_mutex); } else { + mutex_unlock(&rdata->rp_mutex); FC_RPORT_DBG(rdata, "work delete\n"); mutex_lock(&lport->disc.disc_mutex); list_del_rcu(&rdata->peers); mutex_unlock(&lport->disc.disc_mutex); - mutex_unlock(&rdata->rp_mutex); kref_put(&rdata->kref, fc_rport_destroy); } } else { -- cgit v1.2.3-70-g09d2 From f08f58a2bf68900a84e782b8c7ad701c0654173c Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 6 Oct 2017 15:52:52 +0200 Subject: ARM: dts: bcm283x: Fix console path on RPi3 Contrary to other RPi devices, RPi3 uses uart0 to communicate with the BCM43438 bluetooth controller. uart1 is then used for the console. Today, the console configuration is inherited from the bcm283x dtsi (bootargs) which is not the correct one for the RPi3. This leads to routing issue and confuses the Bluetooth controller with unexpected data. This patch introduces chosen/stdout path to configure console to uart0 on bcm283x family and overwrite it to uart1 in the RPi3 dts. Create serial0/1 aliases referring to uart0 and uart1 paths. Remove unneeded earlyprintk. Fixes: 4188ea2aeb6d ("ARM: bcm283x: Define UART pinmuxing on board level") Signed-off-by: Loic Poulain Tested-by: Stefan Wahren Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt --- arch/arm/boot/dts/bcm2835-rpi-zero-w.dts | 9 +++------ arch/arm/boot/dts/bcm2837-rpi-3-b.dts | 5 +++++ arch/arm/boot/dts/bcm283x.dtsi | 7 ++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 82651c3eb682..b8565fc33eea 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -18,12 +18,9 @@ compatible = "raspberrypi,model-zero-w", "brcm,bcm2835"; model = "Raspberry Pi Zero W"; - /* Needed by firmware to properly init UARTs */ - aliases { - uart0 = "/soc/serial@7e201000"; - uart1 = "/soc/serial@7e215040"; - serial0 = "/soc/serial@7e201000"; - serial1 = "/soc/serial@7e215040"; + chosen { + /* 8250 auxiliary UART instead of pl011 */ + stdout-path = "serial1:115200n8"; }; leds { diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts index 20725ca487f3..c71a0d73d2a2 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts @@ -8,6 +8,11 @@ compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; model = "Raspberry Pi 3 Model B"; + chosen { + /* 8250 auxiliary UART instead of pl011 */ + stdout-path = "serial1:115200n8"; + }; + memory { reg = <0 0x40000000>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 431dcfc900c0..013431e3d7c3 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -20,8 +20,13 @@ #address-cells = <1>; #size-cells = <1>; + aliases { + serial0 = &uart0; + serial1 = &uart1; + }; + chosen { - bootargs = "earlyprintk console=ttyAMA0"; + stdout-path = "serial0:115200n8"; }; thermal-zones { -- cgit v1.2.3-70-g09d2 From 532f419cde077ffe9616c97902af177fbb868b17 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2017 11:35:39 +0200 Subject: crypto: stm32 - Try to fix hash padding gcc warns that the length for the extra unaligned data in the hash function may be used unaligned. In theory this could happen if we pass a zero-length sg_list, or if sg_is_last() was never true: In file included from drivers/crypto/stm32/stm32-hash.c:23: drivers/crypto/stm32/stm32-hash.c: In function 'stm32_hash_one_request': include/uapi/linux/kernel.h:12:49: error: 'ncp' may be used uninitialized in this function [-Werror=maybe-uninitialized] #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) Neither of these can happen in practice, so the warning is harmless. However while trying to suppress the warning, I noticed multiple problems with that code: - On big-endian kernels, we byte-swap the data like we do for register accesses, however this is a data stream and almost certainly needs to use a single writesl() instead of series of writel() to give the correct hash. - If the length is not a multiple of four bytes, we skip the last word entirely, since we write the truncated length using stm32_hash_set_nblw(). - If we change the code to round the length up rather than down, the last bytes contain stale data, so it needs some form of padding. This tries to address all four problems, by correctly initializing the length to zero, using endian-safe copy functions, adding zero-padding and passing the padded length. I have done no testing on this patch, so please review carefully and if possible test with an unaligned length and big-endian kernel builds. Fixes: 8a1012d3f2ab ("crypto: stm32 - Support for STM32 HASH module") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/stm32/stm32-hash.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index b585ce54a802..4835dd4a9e50 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -553,9 +553,9 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct scatterlist sg[1], *tsg; - int err = 0, len = 0, reg, ncp; + int err = 0, len = 0, reg, ncp = 0; unsigned int i; - const u32 *buffer = (const u32 *)rctx->buffer; + u32 *buffer = (void *)rctx->buffer; rctx->sg = hdev->req->src; rctx->total = hdev->req->nbytes; @@ -620,10 +620,13 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) reg |= HASH_CR_DMAA; stm32_hash_write(hdev, HASH_CR, reg); - for (i = 0; i < DIV_ROUND_UP(ncp, sizeof(u32)); i++) - stm32_hash_write(hdev, HASH_DIN, buffer[i]); - - stm32_hash_set_nblw(hdev, ncp); + if (ncp) { + memset(buffer + ncp, 0, + DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } + stm32_hash_set_nblw(hdev, DIV_ROUND_UP(ncp, sizeof(u32))); reg = stm32_hash_read(hdev, HASH_STR); reg |= HASH_STR_DCAL; stm32_hash_write(hdev, HASH_STR, reg); -- cgit v1.2.3-70-g09d2 From 5125e4e867ab888f2d4b443a1ce463adefb370db Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 26 Sep 2017 08:17:44 +0200 Subject: crypto: xts - Fix an error handling path in 'create()' All error handling paths 'goto err_drop_spawn' except this one. In order to avoid some resources leak, we should do it as well here. Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher") Signed-off-by: Christophe JAILLET Signed-off-by: Herbert Xu --- crypto/xts.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/xts.c b/crypto/xts.c index d86c11a8c882..e31828ed0046 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -554,8 +554,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) ctx->name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, - "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; + "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) { + err = -ENAMETOOLONG; + goto err_drop_spawn; + } } else goto err_drop_spawn; -- cgit v1.2.3-70-g09d2 From 9039f3ef446e9ffa200200c934f049add9e58426 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 3 Oct 2017 10:25:22 +0800 Subject: crypto: shash - Fix a sleep-in-atomic bug in shash_setkey_unaligned The SCTP program may sleep under a spinlock, and the function call path is: sctp_generate_t3_rtx_event (acquire the spinlock) sctp_do_sm sctp_side_effects sctp_cmd_interpreter sctp_make_init_ack sctp_pack_cookie crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) For the same reason, the orinoco driver may sleep in interrupt handler, and the function call path is: orinoco_rx_isr_tasklet orinoco_rx orinoco_mic crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Herbert Xu --- crypto/shash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/shash.c b/crypto/shash.c index 5e31c8d776df..8fcecc66741d 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -41,7 +41,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, int err; absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); - buffer = kmalloc(absize, GFP_KERNEL); + buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 0cabf2af6f5ac3c88cb106c4e06087a5a39b8e1e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 7 Oct 2017 11:29:48 +0800 Subject: crypto: skcipher - Fix crash on zero-length input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skcipher walk interface doesn't handle zero-length input properly as the old blkcipher walk interface did. This is due to the fact that the length check is done too late. This patch moves the length check forward so that it does the right thing. Fixes: b286d8b1a690 ("crypto: skcipher - Add skcipher walk...") Cc: Reported-by: Stephan Müller Signed-off-by: Herbert Xu --- crypto/skcipher.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 4faa0fd53b0c..d5692e35fab1 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -426,14 +426,9 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) static int skcipher_walk_first(struct skcipher_walk *walk) { - walk->nbytes = 0; - if (WARN_ON_ONCE(in_irq())) return -EDEADLK; - if (unlikely(!walk->total)) - return 0; - walk->buffer = NULL; if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { int err = skcipher_copy_iv(walk); @@ -452,10 +447,15 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + walk->total = req->cryptlen; + walk->nbytes = 0; + + if (unlikely(!walk->total)) + return 0; + scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->total = req->cryptlen; walk->iv = req->iv; walk->oiv = req->iv; @@ -509,6 +509,11 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, struct crypto_aead *tfm = crypto_aead_reqtfm(req); int err; + walk->nbytes = 0; + + if (unlikely(!walk->total)) + return 0; + walk->flags &= ~SKCIPHER_WALK_PHYS; scatterwalk_start(&walk->in, req->src); -- cgit v1.2.3-70-g09d2 From 028568d84da3cfca49f5f846eeeef01441d70451 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 2 Oct 2017 17:07:28 +0900 Subject: kbuild: revert $(realpath ...) to $(shell cd ... && /bin/pwd) I thought commit 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") was a safe conversion, but it changed the behavior. $(abspath ...) / $(realpath ...) does not expand shell special characters, such as '~'. Here is a simple Makefile example: ---------------->8---------------- $(info /bin/pwd: $(shell cd ~/; /bin/pwd)) $(info abspath: $(abspath ~/)) $(info realpath: $(realpath ~/)) all: @: ---------------->8---------------- $ make /bin/pwd: /home/masahiro abspath: /home/masahiro/workspace/~ realpath: This can be a real problem if 'make O=~/foo' is invoked from another Makefile or primitive shell like dash. This commit partially reverts 8e9b46679923. Fixes: 8e9b46679923 ("kbuild: use $(abspath ...) instead of $(shell cd ... && /bin/pwd)") Reported-by: Julien Grall Signed-off-by: Masahiro Yamada Tested-by: Julien Grall --- Makefile | 4 ++-- tools/power/cpupower/Makefile | 2 +- tools/scripts/Makefile.include | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index cf007a31d575..1b487175f3bc 100644 --- a/Makefile +++ b/Makefile @@ -130,8 +130,8 @@ endif ifneq ($(KBUILD_OUTPUT),) # check that the output directory actually exists saved-output := $(KBUILD_OUTPUT) -$(shell [ -d $(KBUILD_OUTPUT) ] || mkdir -p $(KBUILD_OUTPUT)) -KBUILD_OUTPUT := $(realpath $(KBUILD_OUTPUT)) +KBUILD_OUTPUT := $(shell mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) \ + && /bin/pwd) $(if $(KBUILD_OUTPUT),, \ $(error failed to create output directory "$(saved-output)")) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 4c5a481a850c..d6e1c02ddcfe 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -26,7 +26,7 @@ endif ifneq ($(OUTPUT),) # check that the output directory actually exists -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 9dc8f078a83c..1e8b6116ba3c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,7 +1,7 @@ ifneq ($(O),) ifeq ($(origin O), command line) - ABSOLUTE_O := $(realpath $(O)) - dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist)) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) @@ -12,7 +12,7 @@ endif # check that the output directory actually exists ifneq ($(OUTPUT),) -OUTDIR := $(realpath $(OUTPUT)) +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif -- cgit v1.2.3-70-g09d2 From 990404188581cba45937ebad24a4b1910a4ec568 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Mon, 25 Sep 2017 20:25:10 +0800 Subject: kbuild: drop unused symverfile in Makefile.modpost Since commit 040fcc819a2e ("kbuild: improved modversioning support for external modules"), symverfile has been replaced with kernelsymfile and modulesymfile. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 16923ba4b5b1..756d14f0d763 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -97,7 +97,6 @@ vmlinux.o: FORCE $(call cmd,kernel-mod) # Declare generated files as targets for modpost -$(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -- cgit v1.2.3-70-g09d2 From 2cc3ce24a9874451e6751a062cc2ae5040fd8bee Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Wed, 4 Oct 2017 01:53:26 +0200 Subject: kbuild: Fix optimization level choice default The choice containing the CC_OPTIMIZE_FOR_PERFORMANCE symbol accidentally added a "CONFIG_" prefix when trying to make it the default, selecting an undefined symbol as the default. The mistake is harmless here: Since the default symbol is not visible, the choice falls back on using the visible symbol as the default instead, which is CC_OPTIMIZE_FOR_PERFORMANCE, as intended. A patch that makes Kconfig print a warning in this case has been submitted separately: http://www.spinics.net/lists/linux-kbuild/msg15566.html Signed-off-by: Ulf Magnusson Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 78cb2461012e..3c1faaa2af4a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1033,7 +1033,7 @@ endif choice prompt "Compiler optimization level" - default CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE + default CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE bool "Optimize for performance" -- cgit v1.2.3-70-g09d2 From bbfe63b60aad29bb0ed65a1968519765bd68368a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Oct 2017 17:17:52 -0700 Subject: Makefile: kselftest: fix grammar typo Correct typo in kselftest help text. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1b487175f3bc..d5ed3cec7938 100644 --- a/Makefile +++ b/Makefile @@ -1395,7 +1395,7 @@ help: @echo ' Build, install, and boot kernel before' @echo ' running kselftest on it' @echo ' kselftest-clean - Remove all generated kselftest files' - @echo ' kselftest-merge - Merge all the config dependencies of kselftest to existed' + @echo ' kselftest-merge - Merge all the config dependencies of kselftest to existing' @echo ' .config.' @echo '' @echo 'Userspace tools targets:' -- cgit v1.2.3-70-g09d2 From 80ac93c274411a55ae731f259f75e4ca5e499e8b Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 3 Oct 2017 11:17:05 -0500 Subject: gpio: omap: Fix lost edge interrupts Now acking of edge irqs happens the following way: - omap_gpio_irq_handler - "isr" = read irq status - omap_clear_gpio_irqbank(bank, isr_saved & ~level_mask); ^ clear edge status, so irq can be accepted - loop while "isr" generic_handle_irq() - handle_edge_irq() - desc->irq_data.chip->irq_ack(&desc->irq_data); - omap_gpio_ack_irq() it might be that at this moment edge IRQ was triggered again and it will be cleared and IRQ will be lost. Use handle_simple_irq and clear edge interrupts early without disabling them in omap_gpio_irq_handler to avoid loosing interrupts. [1] https://marc.info/?l=linux-omap&m=149004465313534&w=2 Signed-off-by: Grygorii Strashko Signed-off-by: Ladislav Michl Signed-off-by: Linus Walleij --- drivers/gpio/gpio-omap.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 22d7d4838265..3233b72b6828 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -518,7 +518,13 @@ static int omap_gpio_irq_type(struct irq_data *d, unsigned type) if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) irq_set_handler_locked(d, handle_level_irq); else if (type & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) - irq_set_handler_locked(d, handle_edge_irq); + /* + * Edge IRQs are already cleared/acked in irq_handler and + * not need to be masked, as result handle_edge_irq() + * logic is excessed here and may cause lose of interrupts. + * So just use handle_simple_irq. + */ + irq_set_handler_locked(d, handle_simple_irq); return 0; @@ -678,7 +684,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) { void __iomem *isr_reg = NULL; - u32 isr; + u32 enabled, isr, level_mask; unsigned int bit; struct gpio_bank *bank = gpiobank; unsigned long wa_lock_flags; @@ -691,23 +697,21 @@ static irqreturn_t omap_gpio_irq_handler(int irq, void *gpiobank) pm_runtime_get_sync(bank->chip.parent); while (1) { - u32 isr_saved, level_mask = 0; - u32 enabled; - raw_spin_lock_irqsave(&bank->lock, lock_flags); enabled = omap_get_gpio_irqbank_mask(bank); - isr_saved = isr = readl_relaxed(isr_reg) & enabled; + isr = readl_relaxed(isr_reg) & enabled; if (bank->level_mask) level_mask = bank->level_mask & enabled; + else + level_mask = 0; /* clear edge sensitive interrupts before handler(s) are called so that we don't miss any interrupt occurred while executing them */ - omap_disable_gpio_irqbank(bank, isr_saved & ~level_mask); - omap_clear_gpio_irqbank(bank, isr_saved & ~level_mask); - omap_enable_gpio_irqbank(bank, isr_saved & ~level_mask); + if (isr & ~level_mask) + omap_clear_gpio_irqbank(bank, isr & ~level_mask); raw_spin_unlock_irqrestore(&bank->lock, lock_flags); -- cgit v1.2.3-70-g09d2 From a2d3f3e33853ef52e5f66b41c3e8ee5710aa3305 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 5 Oct 2017 19:03:05 +0200 Subject: ipv6: fix net.ipv6.conf.all.accept_dad behaviour for real Commit 35e015e1f577 ("ipv6: fix net.ipv6.conf.all interface DAD handlers") was intended to affect accept_dad flag handling in such a way that DAD operation and mode on a given interface would be selected according to the maximum value of conf/{all,interface}/accept_dad. However, addrconf_dad_begin() checks for particular cases in which we need to skip DAD, and this check was modified in the wrong way. Namely, it was modified so that, if the accept_dad flag is 0 for the given interface *or* for all interfaces, DAD would be skipped. We have instead to skip DAD if accept_dad is 0 for the given interface *and* for all interfaces. Fixes: 35e015e1f577 ("ipv6: fix net.ipv6.conf.all interface DAD handlers") Acked-by: Stefano Brivio Signed-off-by: Matteo Croce Reported-by: Erik Kline Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96861c702c06..4a96ebbf8eda 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3820,8 +3820,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - dev_net(dev)->ipv6.devconf_all->accept_dad < 1 || - idev->cnf.accept_dad < 1 || + (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bump_id = ifp->flags & IFA_F_TENTATIVE; -- cgit v1.2.3-70-g09d2 From 00a534e5ea5c21b95f58cbb2f7918cc9fa82dd47 Mon Sep 17 00:00:00 2001 From: Axel Beckert Date: Thu, 5 Oct 2017 22:00:33 +0200 Subject: doc: Fix typo "8023.ad" in bonding documentation Should be "802.3ad" like everywhere else in the document. Signed-off-by: Axel Beckert Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index 57f52cdce32e..9ba04c0bab8d 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -2387,7 +2387,7 @@ broadcast: Like active-backup, there is not much advantage to this and packet type ID), so in a "gatewayed" configuration, all outgoing traffic will generally use the same device. Incoming traffic may also end up on a single device, but that is - dependent upon the balancing policy of the peer's 8023.ad + dependent upon the balancing policy of the peer's 802.3ad implementation. In a "local" configuration, traffic will be distributed across the devices in the bond. -- cgit v1.2.3-70-g09d2 From 8fe2d6ccd52b086268f2f36e5e2fc0fe3aeffa80 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 5 Oct 2017 16:20:56 -0700 Subject: bpf: fix liveness marking while processing Rx = Ry instruction the verifier does regs[insn->dst_reg] = regs[insn->src_reg] which often clears write mark (when Ry doesn't have it) that was just set by check_reg_arg(Rx) prior to the assignment. That causes mark_reg_read() to keep marking Rx in this block as REG_LIVE_READ (since the logic incorrectly misses that it's screened by the write) and in many of its parents (until lucky write into the same Rx or beginning of the program). That causes is_state_visited() logic to miss many pruning opportunities. Furthermore mark_reg_read() logic propagates the read mark for BPF_REG_FP as well (though it's readonly) which causes harmless but unnecssary work during is_state_visited(). Note that do_propagate_liveness() skips FP correctly, so do the same in mark_reg_read() as well. It saves 0.2 seconds for the test below program before after bpf_lb-DLB_L3.o 2604 2304 bpf_lb-DLB_L4.o 11159 3723 bpf_lb-DUNKNOWN.o 1116 1110 bpf_lxc-DDROP_ALL.o 34566 28004 bpf_lxc-DUNKNOWN.o 53267 39026 bpf_netdev.o 17843 16943 bpf_overlay.o 8672 7929 time ~11 sec ~4 sec Fixes: dc503a8ad984 ("bpf/verifier: track liveness for pruning") Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b914fbe1383e..8b8d6ba39e23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -653,6 +653,10 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) { struct bpf_verifier_state *parent = state->parent; + if (regno == BPF_REG_FP) + /* We don't need to worry about FP liveness because it's read-only */ + return; + while (parent) { /* if read wasn't screened by an earlier write ... */ if (state->regs[regno].live & REG_LIVE_WRITTEN) @@ -2345,6 +2349,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * copy register state to dest reg */ regs[insn->dst_reg] = regs[insn->src_reg]; + regs[insn->dst_reg].live |= REG_LIVE_WRITTEN; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { -- cgit v1.2.3-70-g09d2 From 845e405e5e6c9dc9ed10306a4b5bfeaefebc2e84 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 3 Oct 2017 12:00:49 -0500 Subject: pinctrl: cherryview: fix issues caused by dynamic gpio irqs mapping New GPIO IRQs are allocated and mapped dynamically by default when GPIO IRQ infrastructure is used by cherryview-pinctrl driver. This causes issues on some Intel platforms [1][2] with broken BIOS which hardcodes Linux IRQ numbers in their ACPI tables. On such platforms cherryview-pinctrl driver should allocate and map all GPIO IRQs at probe time. Side effect - "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n" can be seen at boot log. NOTE. It still may fail if boot sequence will changed and some interrupt controller will be probed before cherryview-pinctrl which will shift Linux IRQ numbering (expected with CONFIG_SPARCE_IRQ enabled). [1] https://bugzilla.kernel.org/show_bug.cgi?id=194945 [2] https://lkml.org/lkml/2017/9/28/153 Cc: Andy Shevchenko Cc: Chris Gorman Cc: Mika Westerberg Cc: Heikki Krogerus Signed-off-by: Grygorii Strashko Reported-by: Chris Gorman Reported-by: Mika Westerberg Tested-by: Chris Gorman Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 04e929fd0ffe..fadbca907c7c 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1577,6 +1577,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) struct gpio_chip *chip = &pctrl->chip; bool need_valid_mask = !dmi_check_system(chv_no_valid_mask); int ret, i, offset; + int irq_base; *chip = chv_gpio_chip; @@ -1622,7 +1623,18 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); - ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, + if (!need_valid_mask) { + irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0, + chip->ngpio, NUMA_NO_NODE); + if (irq_base < 0) { + dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n"); + return irq_base; + } + } else { + irq_base = 0; + } + + ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); -- cgit v1.2.3-70-g09d2 From 5151b4afb41dd7c5e13a130efcc95326a49da8c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 7 Oct 2017 16:53:20 -0700 Subject: iio: adc: dln2-adc: fix build error The dln2-adc driver uses interface(s) that are controlled by the IIO_TRIGGERED_BUFFER Kconfig symbol, so the driver needs to select that symbol to prevent the build error. drivers/iio/adc/dln2-adc.o: In function `dln2_adc_probe': dln2-adc.c:(.text+0x528): undefined reference to `devm_iio_triggered_buffer_setup' Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Jonathan Cameron Cc: linux-iio@vger.kernel.org Cc: Jack Andersen Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 57625653fcb6..1d13bf03c758 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -243,6 +243,8 @@ config DA9150_GPADC config DLN2_ADC tristate "Diolan DLN-2 ADC driver support" depends on MFD_DLN2 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say yes here to build support for Diolan DLN-2 ADC. -- cgit v1.2.3-70-g09d2 From a69518cf0b4cbf02c6bc1239cdeb8750a9eb8077 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 8 Oct 2017 11:53:26 +0200 Subject: mlxsw: spectrum_router: Avoid expensive lookup during route removal In commit fc922bb0dd94 ("mlxsw: spectrum_router: Use one LPM tree for all virtual routers") I increased the scale of supported VRFs by having all of them share the same LPM tree. In order to avoid look-ups for prefix lengths that don't exist, each route removal would trigger an aggregation across all the active virtual routers to see which prefix lengths are in use and which aren't and structure the tree accordingly. With the way the data structures are currently laid out, this is a very expensive operation. When preformed repeatedly - due to the invocation of the abort mechanism - and with enough VRFs, this can result in a hung task. For now, avoid this optimization until it can be properly re-added in net-next. Fixes: fc922bb0dd94 ("mlxsw: spectrum_router: Use one LPM tree for all virtual routers") Signed-off-by: Ido Schimmel Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 032089efc1a0..c16718d296d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3505,20 +3505,6 @@ static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; - struct mlxsw_sp_lpm_tree *lpm_tree; - - /* Aggregate prefix lengths across all virtual routers to make - * sure we only have used prefix lengths in the LPM tree. - */ - mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - goto err_tree_get; - mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); - -err_tree_get: if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) return; mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); -- cgit v1.2.3-70-g09d2 From 3d0241d57c7b25bb75ac9d7a62753642264fdbce Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 6 Oct 2017 19:02:35 +0300 Subject: gso: fix payload length when gso_size is zero When gso_size reset to zero for the tail segment in skb_segment(), later in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment() we will get incorrect results (payload length, pcsum) for that segment. inet_gso_segment() already has a check for gso_size before calculating payload. The issue was found with LTP vxlan & gre tests over ixgbe NIC. Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer") Signed-off-by: Alexey Kodanev Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 416bb304a281..1859c473b21a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 97658bfc1b58..e360d55be555 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index cdb3728faca7..4a87f9428ca5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (gso_partial) + if (gso_partial && skb_is_gso(skb)) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); -- cgit v1.2.3-70-g09d2 From 8a5776a5f49812d29fe4b2d0a2d71675c3facf3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Oct 2017 20:53:29 -0700 Subject: Linux 4.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cf007a31d575..2835863bdd5a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3-70-g09d2 From 3382605fd8db1ed1fb03f3f1529490133fe3ab08 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 7 Oct 2017 14:32:49 +0200 Subject: tipc: correct initialization of skb list We change the initialization of the skb transmit buffer queues in the functions tipc_bcast_xmit() and tipc_rcast_xmit() to also initialize their spinlocks. This is needed because we may, during error conditions, need to call skb_queue_purge() on those queues further down the stack. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 7d99029df342..a140dd4a84af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - __skb_queue_head_init(&xmitq); + skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -263,7 +263,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dst, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - __skb_queue_head_init(&_pkts); + skb_queue_head_init(&_pkts); list_for_each_entry_safe(n, tmp, &dests->list, list) { dst = n->value; -- cgit v1.2.3-70-g09d2 From a9e2971b8cd3ef469de0112ba15778b5b98ad72e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 7 Oct 2017 15:07:20 +0200 Subject: tipc: Unclone message at secondary destination lookup When a bundling message is received, the function tipc_link_input() calls function tipc_msg_extract() to unbundle all inner messages of the bundling message before adding them to input queue. The function tipc_msg_extract() just clones all inner skb for all inner messagges from the bundling skb. This means that the skb headroom of an inner message overlaps with the data part of the preceding message in the bundle. If the message in question is a name addressed message, it may be subject to a secondary destination lookup, and eventually be sent out on one of the interfaces again. But, since what is perceived as headroom by the device driver in reality is the last bytes of the preceding message in the bundle, the latter will be overwritten by the MAC addresses of the L2 header. If the preceding message has not yet been consumed by the user, it will evenually be delivered with corrupted contents. This commit fixes this by uncloning all messages passing through the function tipc_msg_lookup_dest(), hence ensuring that the headroom is always valid when the message is passed on. Signed-off-by: Tung Nguyen Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 121e59a1d0e7..17146c16ee2d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -568,6 +568,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); *err = TIPC_OK; + + if (!skb_cloned(skb)) + return true; + + /* Unclone buffer in case it was bundled */ + if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) + return false; + return true; } -- cgit v1.2.3-70-g09d2 From 2f61929eb10a0cef383295d28b7933c395f82467 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Mon, 25 Sep 2017 11:42:36 +0200 Subject: ARM: dts: at91: at91-sama5d27_som1: fix PHY ID The PHY ID is incorrect. It leads to troubles when resuming from standby or mem power states. Signed-off-by: Ludovic Desroches Fixes: af690fa37e39 ("ARM: dts: at91: at91-sama5d27_som1: add sama5d27 SoM1 support") Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91-sama5d27_som1.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi index 63a5af898165..cf0087b4c9e1 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi +++ b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi @@ -67,8 +67,8 @@ pinctrl-0 = <&pinctrl_macb0_default>; phy-mode = "rmii"; - ethernet-phy@1 { - reg = <0x1>; + ethernet-phy@0 { + reg = <0x0>; interrupt-parent = <&pioA>; interrupts = ; pinctrl-names = "default"; -- cgit v1.2.3-70-g09d2 From bd998c2e0df0469707503023d50d46cf0b10c787 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Oct 2017 11:01:12 +0200 Subject: USB: serial: console: fix use-after-free on disconnect A clean-up patch removing two redundant NULL-checks from the console disconnect handler inadvertently also removed a third check. This could lead to the struct usb_serial being prematurely freed by the console code when a driver accepts but does not register any ports for an interface which also lacks endpoint descriptors. Fixes: 0e517c93dc02 ("USB: serial: console: clean up sanity checks") Cc: stable # 4.11 Reported-by: Andrey Konovalov Acked-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index fdf89800ebc3..ed8ba3ef5c79 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -265,7 +265,7 @@ static struct console usbcons = { void usb_serial_console_disconnect(struct usb_serial *serial) { - if (serial->port[0] == usbcons_info.port) { + if (serial->port[0] && serial->port[0] == usbcons_info.port) { usb_serial_console_exit(); usb_serial_put(serial); } -- cgit v1.2.3-70-g09d2 From 299d7572e46f98534033a9e65973f13ad1ce9047 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Oct 2017 11:01:13 +0200 Subject: USB: serial: console: fix use-after-free after failed setup Make sure to reset the USB-console port pointer when console setup fails in order to avoid having the struct usb_serial be prematurely freed by the console code when the device is later disconnected. Fixes: 73e487fdb75f ("[PATCH] USB console: fix disconnection issues") Cc: stable # 2.6.18 Acked-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/console.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index ed8ba3ef5c79..43a862a90a77 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -186,6 +186,7 @@ static int usb_console_setup(struct console *co, char *options) tty_kref_put(tty); reset_open_count: port->port.count = 0; + info->port = NULL; usb_autopm_put_interface(serial->interface); error_get_interface: usb_serial_put(serial); -- cgit v1.2.3-70-g09d2 From 49f817d793d1bcc11d721881aac037b996feef5c Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 00:44:03 +0800 Subject: netfilter: SYNPROXY: skip non-tcp packet in {ipv4, ipv6}_synproxy_hook In function {ipv4,ipv6}_synproxy_hook we expect a normal tcp packet, but the real server maybe reply an icmp error packet related to the exist tcp conntrack, so we will access wrong tcp data. Fix it by checking for the protocol field and only process tcp traffic. Signed-off-by: Lin Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 3 ++- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 811689e523c3..f75fc6b53115 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (synproxy == NULL) return NF_ACCEPT; - if (nf_is_loopback_packet(skb)) + if (nf_is_loopback_packet(skb) || + ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a5cd43d75393..437af8c95277 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, nexthdr = ipv6_hdr(skb)->nexthdr; thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (thoff < 0) + if (thoff < 0 || nexthdr != IPPROTO_TCP) return NF_ACCEPT; th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); -- cgit v1.2.3-70-g09d2 From 19e1d4e947cac3b5e08225d15ad7744e691c7376 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Oct 2017 12:41:36 +0200 Subject: genirq: Warn when effective affinity is not updated Emit a one time warning when the effective affinity mask is enabled in Kconfig, but the interrupt chip does not update the mask in its irq_set_affinity() callback, Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos --- kernel/irq/manage.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d00132b5c325..ef89f7246656 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -168,6 +168,19 @@ void irq_set_thread_affinity(struct irq_desc *desc) set_bit(IRQTF_AFFINITY, &action->thread_flags); } +static void irq_validate_effective_affinity(struct irq_data *data) +{ +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + const struct cpumask *m = irq_data_get_effective_affinity_mask(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (!cpumask_empty(m)) + return; + pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n", + chip->name, data->irq); +#endif +} + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { @@ -181,6 +194,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); case IRQ_SET_MASK_OK_NOCOPY: + irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); ret = 0; } -- cgit v1.2.3-70-g09d2 From 60b09c51bb4fb46e2331fdbb39f91520f31d35f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Oct 2017 12:47:24 +0200 Subject: genirq/cpuhotplug: Add sanity check for effective affinity mask The effective affinity mask handling has no safety net when the mask is not updated by the interrupt chip or the mask contains offline CPUs. If that happens the CPU unplug code fails to migrate interrupts. Add sanity checks and emit a warning when the mask contains only offline CPUs. Fixes: 415fcf1a2293 ("genirq/cpuhotplug: Use effective affinity mask") Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Christoph Hellwig Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos --- kernel/irq/cpuhotplug.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 638eb9c83d9f..9eb09aef0313 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -18,8 +18,34 @@ static inline bool irq_needs_fixup(struct irq_data *d) { const struct cpumask *m = irq_data_get_effective_affinity_mask(d); + unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(smp_processor_id(), m); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + /* + * The cpumask_empty() check is a workaround for interrupt chips, + * which do not implement effective affinity, but the architecture has + * enabled the config switch. Use the general affinity mask instead. + */ + if (cpumask_empty(m)) + m = irq_data_get_affinity_mask(d); + + /* + * Sanity check. If the mask is not empty when excluding the outgoing + * CPU then it must contain at least one online CPU. The outgoing CPU + * has been removed from the online mask already. + */ + if (cpumask_any_but(m, cpu) < nr_cpu_ids && + cpumask_any_and(m, cpu_online_mask) >= nr_cpu_ids) { + /* + * If this happens then there was a missed IRQ fixup at some + * point. Warn about it and enforce fixup. + */ + pr_warn("Eff. affinity %*pbl of IRQ %u contains only offline CPUs after offlining CPU %u\n", + cpumask_pr_args(m), d->irq, cpu); + return true; + } +#endif + return cpumask_test_cpu(cpu, m); } static bool migrate_one_irq(struct irq_desc *desc) -- cgit v1.2.3-70-g09d2 From e43b3b58548051f8809391eb7bec7a27ed3003ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Oct 2017 21:07:38 +0200 Subject: genirq/cpuhotplug: Enforce affinity setting on startup of managed irqs Managed interrupts can end up in a stale state on CPU hotplug. If the interrupt is not targeting a single CPU, i.e. the affinity mask spawns multiple CPUs then the following can happen: After boot: dstate: 0x01601200 IRQD_ACTIVATED IRQD_IRQ_STARTED IRQD_SINGLE_TARGET IRQD_AFFINITY_SET IRQD_AFFINITY_MANAGED node: 0 affinity: 24-31 effectiv: 24 pending: 0 After offlining CPU 31 - 24 dstate: 0x01a31000 IRQD_IRQ_DISABLED IRQD_IRQ_MASKED IRQD_SINGLE_TARGET IRQD_AFFINITY_SET IRQD_AFFINITY_MANAGED IRQD_MANAGED_SHUTDOWN node: 0 affinity: 24-31 effectiv: 24 pending: 0 Now CPU 25 gets onlined again, so it should get the effective interrupt affinity for this interruopt, but due to the x86 interrupt affinity setter restrictions this ends up after restarting the interrupt with: dstate: 0x01601300 IRQD_ACTIVATED IRQD_IRQ_STARTED IRQD_SINGLE_TARGET IRQD_AFFINITY_SET IRQD_SETAFFINITY_PENDING IRQD_AFFINITY_MANAGED node: 0 affinity: 24-31 effectiv: 24 pending: 24-31 So the interrupt is still affine to CPU 24, which was the last CPU to go offline of that affinity set and the move to an online CPU within 24-31, in this case 25, is pending. This mechanism is x86/ia64 specific as those architectures cannot move interrupts from thread context and do this when an interrupt is actually handled. So the move is set to pending. Whats worse is that offlining CPU 25 again results in: dstate: 0x01601300 IRQD_ACTIVATED IRQD_IRQ_STARTED IRQD_SINGLE_TARGET IRQD_AFFINITY_SET IRQD_SETAFFINITY_PENDING IRQD_AFFINITY_MANAGED node: 0 affinity: 24-31 effectiv: 24 pending: 24-31 This means the interrupt has not been shut down, because the outgoing CPU is not in the effective affinity mask, but of course nothing notices that the effective affinity mask is pointing at an offline CPU. In the case of restarting a managed interrupt the move restriction does not apply, so the affinity setting can be made unconditional. This needs to be done _before_ the interrupt is started up as otherwise the condition for moving it from thread context would not longer be fulfilled. With that change applied onlining CPU 25 after offlining 31-24 results in: dstate: 0x01600200 IRQD_ACTIVATED IRQD_IRQ_STARTED IRQD_SINGLE_TARGET IRQD_AFFINITY_MANAGED node: 0 affinity: 24-31 effectiv: 25 pending: And after offlining CPU 25: dstate: 0x01a30000 IRQD_IRQ_DISABLED IRQD_IRQ_MASKED IRQD_SINGLE_TARGET IRQD_AFFINITY_MANAGED IRQD_MANAGED_SHUTDOWN node: 0 affinity: 24-31 effectiv: 25 pending: which is the correct and expected result. Fixes: 761ea388e8c4 ("genirq: Handle managed irqs gracefully in irq_startup()") Reported-by: YASUAKI ISHIMATSU Signed-off-by: Thomas Gleixner Cc: axboe@kernel.dk Cc: linux-scsi@vger.kernel.org Cc: Sumit Saxena Cc: Marc Zyngier Cc: mpe@ellerman.id.au Cc: Shivasharan Srikanteshwara Cc: Kashyap Desai Cc: keith.busch@intel.com Cc: peterz@infradead.org Cc: Hannes Reinecke Cc: Christoph Hellwig Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710042208400.2406@nanos --- kernel/irq/chip.c | 2 +- kernel/irq/manage.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6fc89fd93824..5a2ef92c2782 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -265,8 +265,8 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) irq_setup_affinity(desc); break; case IRQ_STARTUP_MANAGED: + irq_do_set_affinity(d, aff, false); ret = __irq_startup(desc); - irq_set_affinity_locked(d, aff, false); break; case IRQ_STARTUP_ABORT: return 0; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ef89f7246656..4bff6a10ae8e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -188,6 +188,9 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: -- cgit v1.2.3-70-g09d2 From 924c6b900cfdf376b07bccfd80e62b21914f8a5a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 8 Oct 2017 21:53:05 -0700 Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE Trying to reboot via real mode fails with PCID on: long mode cannot be exited while CR4.PCIDE is set. (No, I have no idea why, but the SDM and actual CPUs are in agreement here.) The result is a GPF and a hang instead of a reboot. I didn't catch this in testing because neither my computer nor my VM reboots this way. I can trigger it with reboot=bios, though. Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems") Reported-and-tested-by: Steven Rostedt (VMware) Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org --- arch/x86/kernel/reboot.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 54180fa6f66f..add33f600531 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type) load_cr3(initial_page_table); #else write_cr3(real_mode_header->trampoline_pgd); + + /* Exiting long mode will fail if CR4.PCIDE is set. */ + if (static_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); #endif /* Jump to the identity-mapped low memory code */ -- cgit v1.2.3-70-g09d2 From 6b32c126d33d5cb379bca280ab8acedc1ca978ff Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 5 Oct 2017 20:30:12 +0200 Subject: x86/alternatives: Fix alt_max_short macro to really be a max() The alt_max_short() macro in asm/alternative.h does not work as intended, leading to nasty bugs. E.g. alt_max_short("1", "3") evaluates to 3, but alt_max_short("3", "1") evaluates to 1 -- not exactly the maximum of 1 and 3. In fact, I had to learn it the hard way by crashing my kernel in not so funny ways by attempting to make use of the ALTENATIVE_2 macro with alternatives where the first one was larger than the second one. According to [1] and commit dbe4058a6a44 ("x86/alternatives: Fix ALTERNATIVE_2 padding generation properly") the right handed side should read "-(-(a < b))" not "-(-(a - b))". Fix that, to make the macro work as intended. While at it, fix up the comments regarding the additional "-", too. It's not about gas' usage of s32 but brain dead logic of having a "true" value of -1 for the < operator ... *sigh* Btw., the one in asm/alternative-asm.h is correct. And, apparently, all current users of ALTERNATIVE_2() pass same sized alternatives, avoiding to hit the bug. [1] http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax Reviewed-and-tested-by: Borislav Petkov Fixes: dbe4058a6a44 ("x86/alternatives: Fix ALTERNATIVE_2 padding generation properly") Signed-off-by: Mathias Krause Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1507228213-13095-1-git-send-email-minipli@googlemail.com --- arch/x86/include/asm/alternative-asm.h | 4 +++- arch/x86/include/asm/alternative.h | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e7636bac7372..6c98821fef5e 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -62,8 +62,10 @@ #define new_len2 145f-144f /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas uses a "true" value of -1. */ #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c096624137ae..ccbe24e697c4 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end) alt_end_marker ":\n" /* - * max without conditionals. Idea adapted from: + * gas compatible max based on the idea from: * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax * - * The additional "-" is needed because gas works with s32s. + * The additional "-" is needed because gas uses a "true" value of -1. */ -#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" /* * Pad the second replacement alternative with additional NOPs if it is -- cgit v1.2.3-70-g09d2 From c247487c0dd6fefff6ed0cbcbe66f037721755fb Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 6 Oct 2017 02:04:06 +0800 Subject: ALSA: usb-audio: Add sample rate quirk for Plantronics P610 Like other Plantronics devices, P610 does not support sample rate reading. Apply sample rate quirk to it. BugLink: https://bugs.launchpad.net/bugs/1719853 Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b8cb57aeec77..9ddaae3784f5 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x047F, 0xC022): /* Plantronics C310 */ + case USB_ID(0x047F, 0xC02F): /* Plantronics P610 */ case USB_ID(0x047F, 0xC036): /* Plantronics C520-M */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ -- cgit v1.2.3-70-g09d2 From 5803b023881857db32ffefa0d269c90280a67ee0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 10:02:56 +0200 Subject: ALSA: seq: Fix copy_from_user() call inside lock The event handler in the virmidi sequencer code takes a read-lock for the linked list traverse, while it's calling snd_seq_dump_var_event() in the loop. The latter function may expand the user-space data depending on the event type. It eventually invokes copy_from_user(), which might be a potential dead-lock. The sequencer core guarantees that the user-space data is passed only with atomic=0 argument, but snd_virmidi_dev_receive_event() ignores it and always takes read-lock(). For avoiding the problem above, this patch introduces rwsem for non-atomic case, while keeping rwlock for atomic case. Also while we're at it: the superfluous irq flags is dropped in snd_virmidi_input_open(). Reported-by: Jia-Ju Bai Cc: Signed-off-by: Takashi Iwai --- include/sound/seq_virmidi.h | 1 + sound/core/seq/seq_virmidi.c | 27 +++++++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h index a03acd0d398a..695257ae64ac 100644 --- a/include/sound/seq_virmidi.h +++ b/include/sound/seq_virmidi.h @@ -60,6 +60,7 @@ struct snd_virmidi_dev { int port; /* created/attached port */ unsigned int flags; /* SNDRV_VIRMIDI_* */ rwlock_t filelist_lock; + struct rw_semaphore filelist_sem; struct list_head filelist; }; diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 8d93a4021c78..f48a4cd24ffc 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -77,13 +77,17 @@ static void snd_virmidi_init_event(struct snd_virmidi *vmidi, * decode input event and put to read buffer of each opened file */ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, - struct snd_seq_event *ev) + struct snd_seq_event *ev, + bool atomic) { struct snd_virmidi *vmidi; unsigned char msg[4]; int len; - read_lock(&rdev->filelist_lock); + if (atomic) + read_lock(&rdev->filelist_lock); + else + down_read(&rdev->filelist_sem); list_for_each_entry(vmidi, &rdev->filelist, list) { if (!vmidi->trigger) continue; @@ -97,7 +101,10 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, snd_rawmidi_receive(vmidi->substream, msg, len); } } - read_unlock(&rdev->filelist_lock); + if (atomic) + read_unlock(&rdev->filelist_lock); + else + up_read(&rdev->filelist_sem); return 0; } @@ -115,7 +122,7 @@ int snd_virmidi_receive(struct snd_rawmidi *rmidi, struct snd_seq_event *ev) struct snd_virmidi_dev *rdev; rdev = rmidi->private_data; - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, true); } #endif /* 0 */ @@ -130,7 +137,7 @@ static int snd_virmidi_event_input(struct snd_seq_event *ev, int direct, rdev = private_data; if (!(rdev->flags & SNDRV_VIRMIDI_USE)) return 0; /* ignored */ - return snd_virmidi_dev_receive_event(rdev, ev); + return snd_virmidi_dev_receive_event(rdev, ev, atomic); } /* @@ -209,7 +216,6 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_rawmidi_runtime *runtime = substream->runtime; struct snd_virmidi *vmidi; - unsigned long flags; vmidi = kzalloc(sizeof(*vmidi), GFP_KERNEL); if (vmidi == NULL) @@ -223,9 +229,11 @@ static int snd_virmidi_input_open(struct snd_rawmidi_substream *substream) vmidi->client = rdev->client; vmidi->port = rdev->port; runtime->private_data = vmidi; - write_lock_irqsave(&rdev->filelist_lock, flags); + down_write(&rdev->filelist_sem); + write_lock_irq(&rdev->filelist_lock); list_add_tail(&vmidi->list, &rdev->filelist); - write_unlock_irqrestore(&rdev->filelist_lock, flags); + write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); vmidi->rdev = rdev; return 0; } @@ -264,9 +272,11 @@ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream) struct snd_virmidi_dev *rdev = substream->rmidi->private_data; struct snd_virmidi *vmidi = substream->runtime->private_data; + down_write(&rdev->filelist_sem); write_lock_irq(&rdev->filelist_lock); list_del(&vmidi->list); write_unlock_irq(&rdev->filelist_lock); + up_write(&rdev->filelist_sem); snd_midi_event_free(vmidi->parser); substream->runtime->private_data = NULL; kfree(vmidi); @@ -520,6 +530,7 @@ int snd_virmidi_new(struct snd_card *card, int device, struct snd_rawmidi **rrmi rdev->rmidi = rmidi; rdev->device = device; rdev->client = -1; + init_rwsem(&rdev->filelist_sem); rwlock_init(&rdev->filelist_lock); INIT_LIST_HEAD(&rdev->filelist); rdev->seq_mode = SNDRV_VIRMIDI_SEQ_DISPATCH; -- cgit v1.2.3-70-g09d2 From 78279127253a6c36ed8829eb2b7bc28ef48d9717 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Mon, 9 Oct 2017 14:46:41 +0800 Subject: drm/atomic: Unref duplicated drm_atomic_state in drm_atomic_helper_resume() Kmemleak reported memory leak after suspend and resume: unreferenced object 0xffffffc0e31d8880 (size 128): comm "bash", pid 181, jiffies 4294763583 (age 24.694s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 20 a2 eb c0 ff ff ff ......... ...... 01 00 00 00 00 00 00 00 80 87 1d e3 c0 ff ff ff ................ backtrace: [] __save_stack_trace+0x48/0x6c [] create_object+0x138/0x254 [] kmemleak_alloc+0x58/0x8c [] kmem_cache_alloc_trace+0x188/0x254 [] drm_atomic_state_alloc+0x3c/0x88 [] drm_atomic_helper_duplicate_state+0x28/0x158 [] drm_atomic_helper_suspend+0x5c/0xf0 Problem here is that we are duplicating the drm_atomic_state in drm_atomic_helper_suspend(), but not unreference it in the resume path. Fixes: 1494276000db ("drm/atomic-helper: Implement subsystem-level suspend/resume") Signed-off-by: Jeffy Chen Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171009064641.15174-1-jeffy.chen@rock-chips.com Fixes: 0853695c3ba4 ("drm: Add reference counting to drm_atomic_state") Cc: # v4.10+ (cherry picked from commit 6d281b1f79e194c02125da29ea77316810261ca8) --- drivers/gpu/drm/drm_atomic_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 4e53aae9a1fb..0028591f3f95 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -2960,6 +2960,7 @@ out: drm_modeset_backoff(&ctx); } + drm_atomic_state_put(state); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); -- cgit v1.2.3-70-g09d2 From 94c3390ab84a6b449accc7351ffda4a0c17bdb92 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 27 Sep 2017 09:14:58 +0100 Subject: MIPS: bpf: Fix uninitialised target compiler error Compiling ebpf_jit.c with gcc 4.9 results in a (likely spurious) compiler warning, as gcc has detected that the variable "target" may be used uninitialised. Since -Werror is active, this is treated as an error and causes a kernel build failure whenever CONFIG_MIPS_EBPF_JIT is enabled. arch/mips/net/ebpf_jit.c: In function 'build_one_insn': arch/mips/net/ebpf_jit.c:1118:80: error: 'target' may be used uninitialized in this function [-Werror=maybe-uninitialized] emit_instr(ctx, j, target); ^ cc1: all warnings being treated as errors Fix this by initialising "target" to 0. If it really is used uninitialised this would result in a jump to 0 and a detectable run time failure. Signed-off-by: Matt Redfearn Fixes: b6bd53f9c4e8 ("MIPS: Add missing file for eBPF JIT.") Cc: James Hogan Cc: David Daney Cc: David S. Miller Cc: Colin Ian King Cc: Daniel Borkmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: # v4.13+ Patchwork: https://patchwork.linux-mips.org/patch/17375/ Signed-off-by: Ralf Baechle --- arch/mips/net/ebpf_jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 7646891c4e9b..01b7a87ea678 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -667,7 +667,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, { int src, dst, r, td, ts, mem_off, b_off; bool need_swap, did_move, cmp_eq; - unsigned int target; + unsigned int target = 0; u64 t64; s64 t64s; int bpf_op = BPF_OP(insn->code); -- cgit v1.2.3-70-g09d2 From 1b6ad6df8b4fd723d8d98b670b0d7772402e4e34 Mon Sep 17 00:00:00 2001 From: Kelvin Cheung Date: Fri, 6 Oct 2017 21:13:18 +0800 Subject: MIPS: loongson1: set default number of rx and tx queues for stmmac Set the default number of RX and TX queues due to the recent changes of stmmac driver. Otherwise the ethernet will crash once it starts. Signed-off-by: Kelvin Cheung Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17452/ Signed-off-by: Ralf Baechle --- arch/mips/loongson32/common/platform.c | 38 +++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c index 100f23dfa438..ac584c5823d0 100644 --- a/arch/mips/loongson32/common/platform.c +++ b/arch/mips/loongson32/common/platform.c @@ -183,18 +183,20 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv) } static struct plat_stmmacenet_data ls1x_eth0_pdata = { - .bus_id = 0, - .phy_addr = -1, + .bus_id = 0, + .phy_addr = -1, #if defined(CONFIG_LOONGSON1_LS1B) - .interface = PHY_INTERFACE_MODE_MII, + .interface = PHY_INTERFACE_MODE_MII, #elif defined(CONFIG_LOONGSON1_LS1C) - .interface = PHY_INTERFACE_MODE_RMII, + .interface = PHY_INTERFACE_MODE_RMII, #endif - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .init = ls1x_eth_mux_init, + .mdio_bus_data = &ls1x_mdio_bus_data, + .dma_cfg = &ls1x_eth_dma_cfg, + .has_gmac = 1, + .tx_coe = 1, + .rx_queues_to_use = 1, + .tx_queues_to_use = 1, + .init = ls1x_eth_mux_init, }; static struct resource ls1x_eth0_resources[] = { @@ -222,14 +224,16 @@ struct platform_device ls1x_eth0_pdev = { #ifdef CONFIG_LOONGSON1_LS1B static struct plat_stmmacenet_data ls1x_eth1_pdata = { - .bus_id = 1, - .phy_addr = -1, - .interface = PHY_INTERFACE_MODE_MII, - .mdio_bus_data = &ls1x_mdio_bus_data, - .dma_cfg = &ls1x_eth_dma_cfg, - .has_gmac = 1, - .tx_coe = 1, - .init = ls1x_eth_mux_init, + .bus_id = 1, + .phy_addr = -1, + .interface = PHY_INTERFACE_MODE_MII, + .mdio_bus_data = &ls1x_mdio_bus_data, + .dma_cfg = &ls1x_eth_dma_cfg, + .has_gmac = 1, + .tx_coe = 1, + .rx_queues_to_use = 1, + .tx_queues_to_use = 1, + .init = ls1x_eth_mux_init, }; static struct resource ls1x_eth1_resources[] = { -- cgit v1.2.3-70-g09d2 From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 9 Oct 2017 15:27:15 +0300 Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1' Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced support for attaching an eBPF object by an fd, with the 'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each IPT_SO_SET_REPLACE call. However this breaks subsequent iptables calls: # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT # iptables -A INPUT -s 5.6.7.8 -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. That's because iptables works by loading existing rules using IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with the replacement set. However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number (from the initial "iptables -m bpf" invocation) - so when 2nd invocation occurs, userspace passes a bogus fd number, which leads to 'bpf_mt_check_v1' to fail. One suggested solution [1] was to hack iptables userspace, to perform a "entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new, process-local fd per every 'xt_bpf_info_v1' entry seen. However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects. This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given '.fd' and instead perform an in-kernel lookup for the bpf object given the provided '.path'. It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is expected to provide the path of the pinned object. Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved. References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2 [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2 Reported-by: Rafael Buchbinder Signed-off-by: Shmulik Ladkani Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/bpf.h | 5 +++++ include/uapi/linux/netfilter/xt_bpf.h | 1 + kernel/bpf/inode.c | 1 + net/netfilter/xt_bpf.c | 22 ++++++++++++++++++++-- 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..f1af7d63d678 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline int bpf_obj_get_user(const char __user *pathname) +{ + return -EOPNOTSUPP; +} + static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index b97725af2ac0..da161b56c79e 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -23,6 +23,7 @@ enum xt_bpf_modes { XT_BPF_MODE_FD_PINNED, XT_BPF_MODE_FD_ELF, }; +#define XT_BPF_MODE_PATH_PINNED XT_BPF_MODE_FD_PINNED struct xt_bpf_info_v1 { __u16 mode; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index e833ed914358..be1dde967208 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -363,6 +363,7 @@ out: putname(pname); return ret; } +EXPORT_SYMBOL_GPL(bpf_obj_get_user); static void bpf_evict_inode(struct inode *inode) { diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 38986a95216c..29123934887b 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) return 0; } +static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) +{ + mm_segment_t oldfs = get_fs(); + int retval, fd; + + set_fs(KERNEL_DS); + fd = bpf_obj_get_user(path); + set_fs(oldfs); + if (fd < 0) + return fd; + + retval = __bpf_mt_check_fd(fd, ret); + sys_close(fd); + return retval; +} + static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; @@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); - else if (info->mode == XT_BPF_MODE_FD_PINNED || - info->mode == XT_BPF_MODE_FD_ELF) + else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); + else if (info->mode == XT_BPF_MODE_PATH_PINNED) + return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 84c70b2395c652fa0702f338ca4b7f992531ee00 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 15 Jun 2017 16:24:54 +0300 Subject: ARM: dts: at91: sama5d2_xplained: enable ADTRG pin Enable pinctrl for ADTRG pin (PD31) for ADC hardware trigger support. Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Acked-by: Jonathan Cameron Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/at91-sama5d2_xplained.dts | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts index c7e9ccf2bc87..cbc26001247b 100644 --- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts @@ -309,7 +309,7 @@ vddana-supply = <&vdd_3v3_lp_reg>; vref-supply = <&vdd_3v3_lp_reg>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_adc_default>; + pinctrl-0 = <&pinctrl_adc_default &pinctrl_adtrg_default>; status = "okay"; }; @@ -340,6 +340,20 @@ bias-disable; }; + /* + * The ADTRG pin can work on any edge type. + * In here it's being pulled up, so need to + * connect it to ground to get an edge e.g. + * Trigger can be configured on falling, rise + * or any edge, and the pull-up can be changed + * to pull-down or left floating according to + * needs. + */ + pinctrl_adtrg_default: adtrg_default { + pinmux = ; + bias-pull-up; + }; + pinctrl_charger_chglev: charger_chglev { pinmux = ; bias-disable; -- cgit v1.2.3-70-g09d2 From 27d90f46f253ccc7c5447f6fa62505acb1c246fe Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 15 Jun 2017 16:24:56 +0300 Subject: ARM: dts: at91: sama5d2: add ADC hw trigger edge type Added ADTRG edge type property as interrupt edge type value Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Acked-by: Jonathan Cameron Signed-off-by: Nicolas Ferre --- arch/arm/boot/dts/sama5d2.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 38d2216c7ead..b1a26b42d190 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1430,6 +1430,7 @@ atmel,min-sample-rate-hz = <200000>; atmel,max-sample-rate-hz = <20000000>; atmel,startup-time-ms = <4>; + atmel,trigger-edge-type = ; status = "disabled"; }; -- cgit v1.2.3-70-g09d2 From cb02ffc76a53b5ea751b79b8d4f4d180e5868475 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 14:32:15 +0200 Subject: ALSA: line6: Fix missing initialization before error path The error path in podhd_init() tries to clear the pending timer, while the timer object is initialized at the end of init sequence, thus it may hit the uninitialized object, as spotted by syzkaller: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 1845 Comm: kworker/1:2 Not tainted 4.14.0-rc2-42613-g1488251d1a98 #238 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 register_lock_class+0x6c4/0x1a00 kernel/locking/lockdep.c:769 __lock_acquire+0x27e/0x4550 kernel/locking/lockdep.c:3385 lock_acquire+0x259/0x620 kernel/locking/lockdep.c:4002 del_timer_sync+0x12c/0x280 kernel/time/timer.c:1237 podhd_disconnect+0x8c/0x160 sound/usb/line6/podhd.c:299 line6_probe+0x844/0x1310 sound/usb/line6/driver.c:783 podhd_probe+0x64/0x70 sound/usb/line6/podhd.c:474 .... For addressing it, assure the initializations of timer and work by moving them to the beginning of podhd_init(). Fixes: 790869dacc3d ("ALSA: line6: Add support for POD X3") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/podhd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 956f847a96e4..14ab82ea7e82 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -317,6 +317,9 @@ static int podhd_init(struct usb_line6 *line6, line6->disconnect = podhd_disconnect; + init_timer(&pod->startup_timer); + INIT_WORK(&pod->startup_work, podhd_startup_workqueue); + if (pod->line6.properties->capabilities & LINE6_CAP_CONTROL) { /* claim the data interface */ intf = usb_ifnum_to_if(line6->usbdev, @@ -358,8 +361,6 @@ static int podhd_init(struct usb_line6 *line6, } /* init device and delay registering */ - init_timer(&pod->startup_timer); - INIT_WORK(&pod->startup_work, podhd_startup_workqueue); podhd_startup(pod); return 0; } -- cgit v1.2.3-70-g09d2 From 54a4b2b45817ea2365b40c923c098a26af0c0dbb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 14:26:27 +0200 Subject: ALSA: line6: Fix NULL dereference at podhd_disconnect() When podhd_init() failed with the acquiring a ctrl i/f, the line6 helper still calls the disconnect callback that eventually calls again usb_driver_release_interface() with the NULL intf. Put the proper NULL check before calling it for avoiding an Oops. Fixes: fc90172ba283 ("ALSA: line6: Claim pod x3 usb data interface") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/podhd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 14ab82ea7e82..451007c27743 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -301,7 +301,8 @@ static void podhd_disconnect(struct usb_line6 *line6) intf = usb_ifnum_to_if(line6->usbdev, pod->line6.properties->ctrl_if); - usb_driver_release_interface(&podhd_driver, intf); + if (intf) + usb_driver_release_interface(&podhd_driver, intf); } } -- cgit v1.2.3-70-g09d2 From c95072b3d88fac4be295815f2b67df366c0c297f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 14:51:23 +0200 Subject: ALSA: line6: Fix leftover URB at error-path during probe While line6_probe() may kick off URB for a control MIDI endpoint, the function doesn't clean up it properly at its error path. This results in a leftover URB action that is eventually triggered later and causes an Oops like: general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 0 Comm: swapper/1 Not tainted RIP: 0010:usb_fill_bulk_urb ./include/linux/usb.h:1619 RIP: 0010:line6_start_listen+0x3fe/0x9e0 sound/usb/line6/driver.c:76 Call Trace: line6_data_received+0x1f7/0x470 sound/usb/line6/driver.c:326 __usb_hcd_giveback_urb+0x2e0/0x650 drivers/usb/core/hcd.c:1779 usb_hcd_giveback_urb+0x337/0x420 drivers/usb/core/hcd.c:1845 dummy_timer+0xba9/0x39f0 drivers/usb/gadget/udc/dummy_hcd.c:1965 call_timer_fn+0x2a2/0x940 kernel/time/timer.c:1281 .... Since the whole clean-up procedure is done in line6_disconnect() callback, we can simply call it in the error path instead of open-coding the whole again. It'll fix such an issue automagically. The bug was spotted by syzkaller. Fixes: eedd0e95d355 ("ALSA: line6: Don't forget to call driver's destructor at error path") Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 0ff5a7d2e19f..c8f723c3a033 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -779,9 +779,10 @@ int line6_probe(struct usb_interface *interface, return 0; error: - if (line6->disconnect) - line6->disconnect(line6); - snd_card_free(card); + /* we can call disconnect callback here because no close-sync is + * needed yet at this point + */ + line6_disconnect(interface); return ret; } EXPORT_SYMBOL_GPL(line6_probe); -- cgit v1.2.3-70-g09d2 From 09aa97c78a784df2f781ff03b57b7dd6f1339edc Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Mon, 9 Oct 2017 03:00:28 +0530 Subject: skd: Use kmem_cache_free Use kmem_cache_free instead of kfree for freeing the memory previously allocated with kmem_cache_zalloc/kmem_cache_alloc/kmem_cache_node. Signed-off-by: Himanshu Jha Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 7cedb4295e9d..64d0fc17c174 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2604,7 +2604,7 @@ static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s, return NULL; *dma_handle = dma_map_single(dev, buf, s->size, dir); if (dma_mapping_error(dev, *dma_handle)) { - kfree(buf); + kmem_cache_free(s, buf); buf = NULL; } return buf; -- cgit v1.2.3-70-g09d2 From 133d68e0ed4d822ccfa276ff7d2d1753477de1a8 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 1 Sep 2017 14:46:50 -0700 Subject: MIPS: Fix cmpxchg on 32b signed ints for 64b kernel with !kernel_uses_llsc Commit 8263db4d7768 ("MIPS: cmpxchg: Implement __cmpxchg() as a function") refactored our implementation of __cmpxchg() to be a function rather than a macro, with the aim of making it easier to read & modify. Unfortunately the commit breaks use of cmpxchg() for signed 32 bit values when we have a 64 bit kernel with kernel_uses_llsc == false, because: - In cmpxchg_local() we cast the old value to the type the pointer points to, and then to an unsigned long. If the pointer points to a signed type smaller than 64 bits then the old value will be sign extended to 64 bits. That is, bits beyond the size of the pointed to type will be set to 1 if the old value is negative. In the case of a signed 32 bit integer with a negative value, bits 63:32 will all be set. - In __cmpxchg_asm() we load the value from memory, ie. dereference the pointer, and store the value as an unsigned integer (__ret) whose size matches the pointer. For a 32 bit cmpxchg() this means we store the value in a u32, because the pointer provided to __cmpxchg_asm() by __cmpxchg() is of type volatile u32 *. - __cmpxchg_asm() then checks whether the value in memory (__ret) matches the provided old value, by comparing the two values. This results in the u32 being promoted to a 64 bit unsigned long to match the old argument - however because both types are unsigned the value is zero extended, which does not match the sign extension performed on the old value in cmpxchg_local() earlier. This mismatch means that unfortunate cmpxchg() calls can incorrectly fail for 64 bit kernels with kernel_uses_llsc == false. This is the case on at least non-SMP Cavium Octeon kernels, which hardcode kernel_uses_llsc in their cpu-feature-overrides.h header. Using a v4.13-rc7 kernel configured using cavium_octeon_defconfig with SMP manually disabled, this presents itself as oddity when we reach userland - for example: can't run '/bin/mount': Text file busy can't run '/bin/mkdir': Text file busy can't run '/bin/mkdir': Text file busy can't run '/bin/mount': Text file busy can't run '/bin/hostname': Text file busy can't run '/etc/init.d/rcS': Text file busy can't run '/sbin/getty': Text file busy can't run '/sbin/getty': Text file busy It appears that some part of the init process, which is in this case buildroot's busybox init, is running successfully. It never manages to reach the login prompt though, and complains about /sbin/getty being busy repeatedly and indefinitely. Fix this by casting the old value provided to __cmpxchg_asm() to an appropriately sized unsigned integer, such that we consistently zero-extend avoiding the mismatch. The __cmpxchg_small() case for 8 & 16 bit values is unaffected because __cmpxchg_small() already masks provided values appropriately. Signed-off-by: Paul Burton Fixes: 8263db4d7768 ("MIPS: cmpxchg: Implement __cmpxchg() as a function") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17226/ Cc: linux-mips@linux-mips.org Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cmpxchg.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 903f3bf48419..7e25c5cc353a 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -155,14 +155,16 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return __cmpxchg_small(ptr, old, new, size); case 4: - return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, old, new); + return __cmpxchg_asm("ll", "sc", (volatile u32 *)ptr, + (u32)old, new); case 8: /* lld/scd are only available for MIPS64 */ if (!IS_ENABLED(CONFIG_64BIT)) return __cmpxchg_called_with_bad_pointer(); - return __cmpxchg_asm("lld", "scd", (volatile u64 *)ptr, old, new); + return __cmpxchg_asm("lld", "scd", (volatile u64 *)ptr, + (u64)old, new); default: return __cmpxchg_called_with_bad_pointer(); -- cgit v1.2.3-70-g09d2 From e1270575fb7ee7ed6058d4ad3714a3b28001a295 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sun, 3 Sep 2017 10:24:58 -0700 Subject: MIPS: Fix generic-board-config.sh for builds using O= When configuring the kernel using one of the generic MIPS defconfig targets, the generic-board-config.sh script is used to check requirements listed in board config fragments against a reference config in order to determine which board config fragments to merge into the final config. When specifying O= to configure in a directory other than the kernel source directory, this generic-board-config.sh script is invoked in the directory that we are configuring in (ie. the directory that O equals), and the path to the reference config is relative to the current directory. The script then changes the current directory to the source tree, which unfortunately breaks later access to the reference file since its path is relative to a directory that is no longer the current working directory. This results in configuration failing with errors such as: $ make ARCH=mips O=tmp 32r2_defconfig make[1]: Entering directory '/home/pburton/src/linux/tmp' Using ../arch/mips/configs/generic_defconfig as base Merging ../arch/mips/configs/generic/32r2.config Merging ../arch/mips/configs/generic/eb.config grep: ./.config.32r2_defconfig: No such file or directory grep: ./.config.32r2_defconfig: No such file or directory The base file '.config' does not exist. Exit. make[1]: *** [arch/mips/Makefile:505: 32r2_defconfig] Error 1 make[1]: Leaving directory '/home/pburton/src/linux-ingenic/tmp' make: *** [Makefile:145: sub-make] Error 2 Fix this by avoiding changing the working directory in generic-board-config.sh, instead using full paths to files under $(srctree)/ where necessary. Signed-off-by: Paul Burton Fixes: 27e0d4b05107 ("MIPS: generic: Allow filtering enabled boards by requirements") Cc: linux-mips@linux-mips.org Cc: kbuild test robot Cc: kbuild-all@01.org Patchwork: https://patchwork.linux-mips.org/patch/17231/ Signed-off-by: Ralf Baechle --- arch/mips/tools/generic-board-config.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/mips/tools/generic-board-config.sh b/arch/mips/tools/generic-board-config.sh index 5c4f93687039..654d652d7fa1 100755 --- a/arch/mips/tools/generic-board-config.sh +++ b/arch/mips/tools/generic-board-config.sh @@ -30,8 +30,6 @@ cfg="$4" boards_origin="$5" shift 5 -cd "${srctree}" - # Only print Skipping... lines if the user explicitly specified BOARDS=. In the # general case it only serves to obscure the useful output about what actually # was included. @@ -48,7 +46,7 @@ environment*) esac for board in $@; do - board_cfg="arch/mips/configs/generic/board-${board}.config" + board_cfg="${srctree}/arch/mips/configs/generic/board-${board}.config" if [ ! -f "${board_cfg}" ]; then echo "WARNING: Board config '${board_cfg}' not found" continue @@ -84,7 +82,7 @@ for board in $@; do done || continue # Merge this board config fragment into our final config file - ./scripts/kconfig/merge_config.sh \ + ${srctree}/scripts/kconfig/merge_config.sh \ -m -O ${objtree} ${cfg} ${board_cfg} \ | grep -Ev '^(#|Using)' done -- cgit v1.2.3-70-g09d2 From ca8eb05b5f332a9e1ab3e2ece498d49f4d683470 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 8 Sep 2017 15:12:21 -0700 Subject: MIPS: math-emu: Remove pr_err() calls from fpu_emu() The FPU emulator includes 2 calls to pr_err() which are triggered by invalid instruction encodings for MIPSr6 cmp.cond.fmt instructions. These cases are not kernel errors, merely invalid instructions which are already handled by delivering a SIGILL which will provide notification that something failed in cases where that makes sense. In cases where that SIGILL is somewhat expected & being handled, for example when crashme happens to generate one of the affected bad encodings, the message is printed with no useful context about what triggered it & spams the kernel log for no good reason. Remove the pr_err() calls to make crashme run silently & treat the bad encodings the same way we do others, with a SIGILL & no further kernel log output. Signed-off-by: Paul Burton Fixes: f8c3c6717a71 ("MIPS: math-emu: Add support for the CMP.condn.fmt R6 instruction") Cc: linux-mips@linux-mips.org Cc: stable # v4.3+ Patchwork: https://patchwork.linux-mips.org/patch/17253/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/cp1emu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 192542dbd972..16d9ef5a78c5 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -2558,7 +2558,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.S operation\n"); return SIGILL; } } @@ -2719,7 +2718,6 @@ dcopuop: break; default: /* Reserved R6 ops */ - pr_err("Reserved MIPS R6 CMP.condn.D operation\n"); return SIGILL; } } -- cgit v1.2.3-70-g09d2 From e0f06bba9629987fb3ec1d6928bf17ef689702e8 Mon Sep 17 00:00:00 2001 From: Mark D Rustad Date: Wed, 31 Aug 2016 10:34:28 -0700 Subject: ixgbe: Return error when getting PHY address if PHY access is not supported In cases where PHY register access is not supported, don't mislead a caller into thinking that it is supported by returning a PHY address. Instead, return -EOPNOTSUPP when PHY access is not supported. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d962368d08d0..822cdb4f2c25 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8529,6 +8529,10 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) return ixgbe_ptp_set_ts_config(adapter, req); case SIOCGHWTSTAMP: return ixgbe_ptp_get_ts_config(adapter, req); + case SIOCGMIIPHY: + if (!adapter->hw.phy.ops.read_reg) + return -EOPNOTSUPP; + /* fall through */ default: return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd); } -- cgit v1.2.3-70-g09d2 From a39221ce969b316d3c3dcf7fcff8c0d8cf223007 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 3 Jul 2017 13:02:55 +0200 Subject: ixgbe: fix masking of bits read from IXGBE_VXLANCTRL register In ixgbe_clear_udp_tunnel_port(), we read the IXGBE_VXLANCTRL register and then try to mask some bits out of the value, using the logical instead of bitwise and operator. Fixes: a21d0822ff69 ("ixgbe: add support for geneve Rx offload") Signed-off-by: Sabrina Dubroca Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 822cdb4f2c25..4d76afd13868 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4881,7 +4881,7 @@ static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))) return; - vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask; + vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) & ~mask; IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl); if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK) -- cgit v1.2.3-70-g09d2 From f4986d250ada29ae0c65c209a9d8f97968ea7eae Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Fri, 18 Aug 2017 14:21:04 +0800 Subject: Revert commit 1a8b6d76dc5b ("net:add one common config...") The new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added to indicate that Relaxed Ordering Attributes (RO) should not be used for Transaction Layer Packets (TLP) targeted toward these affected Root Port, it will clear the bit4 in the PCIe Device Control register, so the PCIe device drivers could query PCIe configuration space to determine if it can send TLPs to Root Port with the Relaxed Ordering Attributes set. With this new flag we don't need the config ARCH_WANT_RELAX_ORDER to control the Relaxed Ordering Attributes for the ixgbe drivers just like the commit 1a8b6d76dc5b ("net:add one common config...") did, so revert this commit. Signed-off-by: Ding Tianhong Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- arch/Kconfig | 3 --- arch/sparc/Kconfig | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 1aafb4efbb51..d789a89cb32c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -937,9 +937,6 @@ config STRICT_MODULE_RWX and non-text memory will be made non-executable. This provides protection against certain security exploits (e.g. writing to text) -config ARCH_WANT_RELAX_ORDER - bool - config ARCH_HAS_REFCOUNT bool help diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 0be3828752e5..4e83f950713e 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -44,7 +44,6 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP - select ARCH_WANT_RELAX_ORDER config SPARC32 def_bool !64BIT diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 2c19070d2a0b..e8c1788aed1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -366,7 +366,7 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) } IXGBE_WRITE_FLUSH(hw); -#ifndef CONFIG_ARCH_WANT_RELAX_ORDER +#ifndef CONFIG_SPARC /* Disable relaxed ordering */ for (i = 0; i < hw->mac.max_tx_queues; i++) { u32 regval; -- cgit v1.2.3-70-g09d2 From 5e0fac63a694918870af9d6eaf716af19e7f5652 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Fri, 18 Aug 2017 14:21:05 +0800 Subject: net: ixgbe: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag The ixgbe driver use the compile check to determine if it can send TLPs to Root Port with the Relaxed Ordering Attribute set, this is too inconvenient, now the new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING has been added to the kernel and we could check the bit4 in the PCIe Device Control register to determine whether we should use the Relaxed Ordering Attributes or not, so use this new way in the ixgbe driver. Signed-off-by: Ding Tianhong Acked-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c | 22 ---------------------- drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 19 ------------------- 2 files changed, 41 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 523f9d05a810..8a32eb7d47b9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -175,31 +175,9 @@ static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) **/ static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) { -#ifndef CONFIG_SPARC - u32 regval; - u32 i; -#endif s32 ret_val; ret_val = ixgbe_start_hw_generic(hw); - -#ifndef CONFIG_SPARC - /* Disable relaxed ordering */ - for (i = 0; ((i < hw->mac.max_tx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); - } - - for (i = 0; ((i < hw->mac.max_rx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } -#endif if (ret_val) return ret_val; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index e8c1788aed1f..6e6ab6f6875e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -366,25 +366,6 @@ s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) } IXGBE_WRITE_FLUSH(hw); -#ifndef CONFIG_SPARC - /* Disable relaxed ordering */ - for (i = 0; i < hw->mac.max_tx_queues; i++) { - u32 regval; - - regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); - regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); - } - - for (i = 0; i < hw->mac.max_rx_queues; i++) { - u32 regval; - - regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); - regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | - IXGBE_DCA_RXCTRL_HEAD_WRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); - } -#endif return 0; } -- cgit v1.2.3-70-g09d2 From 8e679021c5b9465ac5b0d7efd26baab9b10a2dbd Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 7 Sep 2017 10:32:48 -0700 Subject: ixgbe: incorrect XDP ring accounting in ethtool tx_frame param Changing the TX ring parameters with an XDP program attached may cause the XDP queues to be cleared and the TX rings to be incorrectly configured. Fix by doing correct ring accounting in setup call. Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action") Signed-off-by: John Fastabend Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 72c565712a5f..c3e7a8191128 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1048,7 +1048,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_ring *temp_ring; - int i, err = 0; + int i, j, err = 0; u32 new_rx_count, new_tx_count; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) @@ -1085,8 +1085,8 @@ static int ixgbe_set_ringparam(struct net_device *netdev, } /* allocate temporary buffer to store rings in */ - i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues); - i = max_t(int, i, adapter->num_xdp_queues); + i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues, + adapter->num_rx_queues); temp_ring = vmalloc(i * sizeof(struct ixgbe_ring)); if (!temp_ring) { @@ -1118,8 +1118,8 @@ static int ixgbe_set_ringparam(struct net_device *netdev, } } - for (i = 0; i < adapter->num_xdp_queues; i++) { - memcpy(&temp_ring[i], adapter->xdp_ring[i], + for (j = 0; j < adapter->num_xdp_queues; j++, i++) { + memcpy(&temp_ring[i], adapter->xdp_ring[j], sizeof(struct ixgbe_ring)); temp_ring[i].count = new_tx_count; @@ -1139,10 +1139,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev, memcpy(adapter->tx_ring[i], &temp_ring[i], sizeof(struct ixgbe_ring)); } - for (i = 0; i < adapter->num_xdp_queues; i++) { - ixgbe_free_tx_resources(adapter->xdp_ring[i]); + for (j = 0; j < adapter->num_xdp_queues; j++, i++) { + ixgbe_free_tx_resources(adapter->xdp_ring[j]); - memcpy(adapter->xdp_ring[i], &temp_ring[i], + memcpy(adapter->xdp_ring[j], &temp_ring[i], sizeof(struct ixgbe_ring)); } -- cgit v1.2.3-70-g09d2 From f7974880cf869ddbd0ba9a8e2ab11dff4a667f96 Mon Sep 17 00:00:00 2001 From: John Einar Reitan Date: Mon, 9 Oct 2017 15:49:36 +0200 Subject: sync_file: Return consistent status in SYNC_IOC_FILE_INFO sync_file_ioctl_fence_info has a race between filling the status of the underlying fences and the overall status of the sync_file. If fence transitions in the time frame between its sync_fill_fence_info and the later dma_fence_is_signaled for the sync_file, the returned information is inconsistent showing non-signaled underlying fences but an overall signaled state. This patch changes sync_file_ioctl_fence_info to track what has been encoded and using that as the overall sync_file status. Tested-by: Vamsidhar Reddy Gaddam Signed-off-by: John Einar Reitan Cc: Sumit Semwal Cc: Gustavo Padovan Cc: dri-devel@lists.freedesktop.org Reviewed-by: Chris Wilson Signed-off-by: Gustavo Padovan Link: https://patchwork.freedesktop.org/patch/msgid/20171009134936.27219-1-john.reitan@arm.com --- drivers/dma-buf/sync_file.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 66fb40d0ebdb..03830634e141 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -383,7 +383,7 @@ err_put_fd: return err; } -static void sync_fill_fence_info(struct dma_fence *fence, +static int sync_fill_fence_info(struct dma_fence *fence, struct sync_fence_info *info) { strlcpy(info->obj_name, fence->ops->get_timeline_name(fence), @@ -399,6 +399,8 @@ static void sync_fill_fence_info(struct dma_fence *fence, test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? ktime_to_ns(fence->timestamp) : ktime_set(0, 0); + + return info->status; } static long sync_file_ioctl_fence_info(struct sync_file *sync_file, @@ -424,8 +426,12 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, * sync_fence_info and return the actual number of fences on * info->num_fences. */ - if (!info.num_fences) + if (!info.num_fences) { + info.status = dma_fence_is_signaled(sync_file->fence); goto no_fences; + } else { + info.status = 1; + } if (info.num_fences < num_fences) return -EINVAL; @@ -435,8 +441,10 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!fence_info) return -ENOMEM; - for (i = 0; i < num_fences; i++) - sync_fill_fence_info(fences[i], &fence_info[i]); + for (i = 0; i < num_fences; i++) { + int status = sync_fill_fence_info(fences[i], &fence_info[i]); + info.status = info.status <= 0 ? info.status : status; + } if (copy_to_user(u64_to_user_ptr(info.sync_fence_info), fence_info, size)) { @@ -446,7 +454,6 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, no_fences: sync_file_get_name(sync_file, info.name, sizeof(info.name)); - info.status = dma_fence_is_signaled(sync_file->fence); info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) -- cgit v1.2.3-70-g09d2 From 62cf27e52b8c9a39066172ca6b6134cb5eaa9450 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 Oct 2017 08:39:43 +0200 Subject: ipv6: Fix traffic triggered IPsec connections. A recent patch removed the dst_free() on the allocated dst_entry in ipv6_blackhole_route(). The dst_free() marked the dst_entry as dead and added it to the gc list. I.e. it was setup for a one time usage. As a result we may now have a blackhole route cached at a socket on some IPsec scenarios. This makes the connection unusable. Fix this by marking the dst_entry directly at allocation time as 'dead', so it is used only once. Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()") Reported-by: Tobias Brunner Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 26cc9f483b6d..a96d5b385d8f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1325,7 +1325,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct dst_entry *new = NULL; rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, - DST_OBSOLETE_NONE, 0); + DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); -- cgit v1.2.3-70-g09d2 From 6c0e7284d89995877740d8a26c3e99a937312a3c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 Oct 2017 08:43:55 +0200 Subject: ipv4: Fix traffic triggered IPsec connections. A recent patch removed the dst_free() on the allocated dst_entry in ipv4_blackhole_route(). The dst_free() marked the dst_entry as dead and added it to the gc list. I.e. it was setup for a one time usage. As a result we may now have a blackhole route cached at a socket on some IPsec scenarios. This makes the connection unusable. Fix this by marking the dst_entry directly at allocation time as 'dead', so it is used only once. Fixes: b838d5e1c5b6 ("ipv4: mark DST_NOGC and remove the operation of dst_free()") Reported-by: Tobias Brunner Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6fde5d45f1..3d9f1c2f81c5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2513,7 +2513,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; -- cgit v1.2.3-70-g09d2 From 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 9 Oct 2017 14:14:51 +0200 Subject: netlink: do not set cb_running if dump's start() errs It turns out that multiple places can call netlink_dump(), which means it's still possible to dereference partially initialized values in dump() that were the result of a faulty returned start(). This fixes the issue by calling start() _before_ setting cb_running to true, so that there's no chance at all of hitting the dump() function through any indirect paths. It also moves the call to start() to be when the mutex is held. This has the nice side effect of serializing invocations to start(), which is likely desirable anyway. It also prevents any possible other races that might come out of this logic. In testing this with several different pieces of tricky code to trigger these issues, this commit fixes all avenues that I'm aware of. Signed-off-by: Jason A. Donenfeld Cc: Johannes Berg Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 94c11cf0459d..f34750691c5c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2266,16 +2266,17 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->min_dump_alloc = control->min_dump_alloc; cb->skb = skb; + if (cb->start) { + ret = cb->start(cb); + if (ret) + goto error_unlock; + } + nlk->cb_running = true; mutex_unlock(nlk->cb_mutex); - ret = 0; - if (cb->start) - ret = cb->start(cb); - - if (!ret) - ret = netlink_dump(sk); + ret = netlink_dump(sk); sock_put(sk); -- cgit v1.2.3-70-g09d2 From 996b44fcef8f216ea0b6b6e74468c5a77b5e341f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 9 Oct 2017 14:52:10 +0200 Subject: udp: fix bcast packet reception The commit bc044e8db796 ("udp: perform source validation for mcast early demux") does not take into account that broadcast packets lands in the same code path and they need different checks for the source address - notably, zero source address are valid for bcast and invalid for mcast. As a result, 2nd and later broadcast packets with 0 source address landing to the same socket are dropped. This breaks dhcp servers. Since we don't have stringent performance requirements for ingress broadcast traffic, fix it by disabling UDP early demux such traffic. Reported-by: Hannes Frederic Sowa Fixes: bc044e8db796 ("udp: perform source validation for mcast early demux") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5676237d2b0f..e45177ceb0ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2240,20 +2240,16 @@ int udp_v4_early_demux(struct sk_buff *skb) iph = ip_hdr(skb); uh = udp_hdr(skb); - if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) { + if (skb->pkt_type == PACKET_MULTICAST) { in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; - /* we are supposed to accept bcast packets */ - if (skb->pkt_type == PACKET_MULTICAST) { - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return 0; - } + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, -- cgit v1.2.3-70-g09d2 From d7ba25bd9ef802ff02414e9105f4222d1795f27a Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Wed, 4 Oct 2017 09:48:26 -0700 Subject: drm/i915/edp: Get the Panel Power Off timestamp after panel is off Kernel stores the time in jiffies at which the eDP panel is turned off. This should be obtained after the panel is off (after the wait_panel_off). When we next attempt to turn the panel on, we use the difference between the timestamp at which we want to turn the panel on and timestamp at which panel was turned off to ensure that this is equal to panel power cycle delay and if not we wait for the remaining time. Not waiting for the panel power cycle delay can cause the panel to not turn on giving rise to AUX timeouts for the attempted AUX transactions. v2: * Separate lines for bugzilla (Jani Nikula) * Suggested by tag (Daniel Vetter) Cc: Daniel Vetter Cc: Jani Nikula Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101518 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101144 Suggested-by: Daniel Vetter Signed-off-by: Manasi Navare Reviewed-by: Daniel Vetter Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1507135706-17147-1-git-send-email-manasi.d.navare@intel.com (cherry picked from commit cbacf02e7796fea02e5c6e46c90ed7cbe9e6f2c0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 64134947c0aa..c0f8d7e66049 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2307,8 +2307,8 @@ static void edp_panel_off(struct intel_dp *intel_dp) I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); - intel_dp->panel_power_off_time = ktime_get_boottime(); wait_panel_off(intel_dp); + intel_dp->panel_power_off_time = ktime_get_boottime(); /* We got a reference when we enabled the VDD. */ intel_display_power_put(dev_priv, intel_dp->aux_power_domain); -- cgit v1.2.3-70-g09d2 From 7313f5a93d2017f789909a7a727a6cab48ea6d20 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 3 Oct 2017 16:37:25 -0700 Subject: drm/i915/edp: Increase the T12 delay quirk to 1300ms For this specific PCI device, the eDP panel requires a higher panel power cycle delay of 1300ms where the minimum spec requirement of panel power cycle delay is 500ms. This fix in combination with correct timestamp at which we get the panel power off time fixes the dP AUX CH timeouts seen on various IGT tests. Fixes: c99a259b4b5192ba ("drm/i915/edp: Add a T12 panel delay quirk to fix DP AUX CH timeouts") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101144 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101518 Cc: Daniel Vetter Cc: Jani Nikula Cc: Ville Syrjala Signed-off-by: Manasi Navare Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1507073845-13420-2-git-send-email-manasi.d.navare@intel.com (cherry picked from commit c02b8fb4073d1b9aa5af909a91b51056b819d946) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index c0f8d7e66049..203198659ab2 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5273,7 +5273,7 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, * seems sufficient to avoid this problem. */ if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) { - vbt.t11_t12 = max_t(u16, vbt.t11_t12, 900 * 10); + vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10); DRM_DEBUG_KMS("Increasing T12 panel delay as per the quirk to %d\n", vbt.t11_t12); } -- cgit v1.2.3-70-g09d2 From d6a55c63e6adcb58957bbdce2d390088970273da Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Oct 2017 16:15:20 +0200 Subject: drm/i915: Use crtc_state_is_legacy_gamma in intel_color_check crtc_state_is_legacy_gamma also checks for CTM, which was missing from intel_color_check. By using the same condition for commit and check we reduce the chance of mismatches. This was spotted by KASAN while trying to rework kms_color igt test. [ 72.008660] ================================================================== [ 72.009326] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.009519] Read of size 2 at addr ffff880220216e50 by task kms_color/1158 [ 72.009900] CPU: 2 PID: 1158 Comm: kms_color Tainted: G U W 4.14.0-rc3-patser+ #5281 [ 72.009921] Hardware name: GIGABYTE GB-BKi3A-7100/MFLP3AP-00, BIOS F1 07/27/2016 [ 72.009941] Call Trace: [ 72.009968] dump_stack+0xc5/0x151 [ 72.009996] ? _atomic_dec_and_lock+0x10f/0x10f [ 72.010024] ? show_regs_print_info+0x3c/0x3c [ 72.010072] print_address_description+0x7f/0x240 [ 72.010108] kasan_report+0x216/0x370 [ 72.010308] ? bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.010349] __asan_load2+0x74/0x80 [ 72.010552] bdw_load_gamma_lut.isra.3+0x15c/0x360 [i915] [ 72.010772] broadwell_load_luts+0x1f0/0x300 [i915] [ 72.010997] intel_color_load_luts+0x36/0x40 [i915] [ 72.011205] intel_begin_crtc_commit+0xa1/0x310 [i915] [ 72.011283] drm_atomic_helper_commit_planes_on_crtc+0xa6/0x320 [drm_kms_helper] [ 72.011316] ? wait_for_completion_io+0x460/0x460 [ 72.011524] intel_update_crtc+0xe3/0x100 [i915] [ 72.011720] skl_update_crtcs+0x360/0x3f0 [i915] [ 72.011945] ? intel_update_crtcs+0xf0/0xf0 [i915] [ 72.012010] ? drm_atomic_helper_wait_for_dependencies+0x3d9/0x400 [drm_kms_helper] [ 72.012231] intel_atomic_commit_tail+0x8db/0x1500 [i915] [ 72.012273] ? __lock_is_held+0x9c/0xc0 [ 72.012494] ? skl_update_crtcs+0x3f0/0x3f0 [i915] [ 72.012518] ? find_next_bit+0xb/0x10 [ 72.012544] ? cpumask_next+0x1a/0x20 [ 72.012745] ? i915_sw_fence_complete+0x9d/0xe0 [i915] [ 72.012938] ? __i915_sw_fence_complete+0x5d0/0x5d0 [i915] [ 72.013176] intel_atomic_commit+0x528/0x570 [i915] [ 72.013280] ? drm_atomic_get_property+0xc00/0xc00 [drm] [ 72.013466] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.013496] ? kmem_cache_alloc_trace+0x266/0x280 [ 72.013714] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.013812] drm_atomic_commit+0x77/0x80 [drm] [ 72.013911] set_property_atomic+0x14a/0x210 [drm] [ 72.014015] ? drm_object_property_get_value+0x70/0x70 [drm] [ 72.014080] ? mutex_unlock+0xd/0x10 [ 72.014292] ? intel_atomic_commit_tail+0x1500/0x1500 [i915] [ 72.014379] drm_mode_obj_set_property_ioctl+0x1cf/0x310 [drm] [ 72.014481] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.014510] ? lock_release+0x6c0/0x6c0 [ 72.014602] ? drm_is_current_master+0x46/0x60 [drm] [ 72.014706] drm_ioctl_kernel+0x148/0x1d0 [drm] [ 72.014799] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.014898] ? drm_ioctl_permit+0x100/0x100 [drm] [ 72.014936] ? kasan_check_write+0x14/0x20 [ 72.015039] drm_ioctl+0x441/0x660 [drm] [ 72.015129] ? drm_mode_obj_find_prop_id+0xa0/0xa0 [drm] [ 72.015235] ? drm_getstats+0x20/0x20 [drm] [ 72.015287] ? ___might_sleep+0x159/0x340 [ 72.015311] ? find_held_lock+0xcf/0xf0 [ 72.015341] ? __schedule_bug+0x110/0x110 [ 72.015405] do_vfs_ioctl+0xa88/0xb10 [ 72.015449] ? ioctl_preallocate+0x1a0/0x1a0 [ 72.015487] ? selinux_capable+0x20/0x20 [ 72.015525] ? rcu_dynticks_momentary_idle+0x40/0x40 [ 72.015607] SyS_ioctl+0x4e/0x80 [ 72.015647] entry_SYSCALL_64_fastpath+0x18/0xad [ 72.015670] RIP: 0033:0x7ff74a3d04d7 [ 72.015691] RSP: 002b:00007ffc594bec08 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 72.015734] RAX: ffffffffffffffda RBX: ffffffff8718f54a RCX: 00007ff74a3d04d7 [ 72.015756] RDX: 00007ffc594bec40 RSI: 00000000c01864ba RDI: 0000000000000003 [ 72.015777] RBP: ffff880211c0ff98 R08: 0000000000000086 R09: 0000000000000000 [ 72.015799] R10: 00007ff74a691b58 R11: 0000000000000246 R12: 0000000000000355 [ 72.015821] R13: 00000000ff00eb00 R14: 0000000000000a00 R15: 00007ff746082000 [ 72.015857] ? trace_hardirqs_off_caller+0xfa/0x110 Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171005141520.23990-1-maarten.lankhorst@linux.intel.com [mlankhorst: s/crtc_state_is_legacy/&_gamma/ (danvet)] Reviewed-by: Daniel Vetter Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl") Cc: # v4.7+ (cherry picked from commit 0c3767b28186c8129f2a2cfec06a93dcd6102391) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_color.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ff9ecd211abb..b8315bca852b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -74,7 +74,7 @@ #define I9XX_CSC_COEFF_1_0 \ ((7 << 12) | I9XX_CSC_COEFF_FP(CTM_COEFF_1_0, 8)) -static bool crtc_state_is_legacy(struct drm_crtc_state *state) +static bool crtc_state_is_legacy_gamma(struct drm_crtc_state *state) { return !state->degamma_lut && !state->ctm && @@ -288,7 +288,7 @@ static void cherryview_load_csc_matrix(struct drm_crtc_state *state) } mode = (state->ctm ? CGM_PIPE_MODE_CSC : 0); - if (!crtc_state_is_legacy(state)) { + if (!crtc_state_is_legacy_gamma(state)) { mode |= (state->degamma_lut ? CGM_PIPE_MODE_DEGAMMA : 0) | (state->gamma_lut ? CGM_PIPE_MODE_GAMMA : 0); } @@ -469,7 +469,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(state->crtc)->pipe; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -529,7 +529,7 @@ static void glk_load_luts(struct drm_crtc_state *state) glk_load_degamma_lut(state); - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { haswell_load_luts(state); return; } @@ -551,7 +551,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) uint32_t i, lut_size; uint32_t word0, word1; - if (crtc_state_is_legacy(state)) { + if (crtc_state_is_legacy_gamma(state)) { /* Turn off degamma/gamma on CGM block. */ I915_WRITE(CGM_PIPE_MODE(pipe), (state->ctm ? CGM_PIPE_MODE_CSC : 0)); @@ -632,12 +632,10 @@ int intel_color_check(struct drm_crtc *crtc, return 0; /* - * We also allow no degamma lut and a gamma lut at the legacy + * We also allow no degamma lut/ctm and a gamma lut at the legacy * size (256 entries). */ - if (!crtc_state->degamma_lut && - crtc_state->gamma_lut && - crtc_state->gamma_lut->length == LEGACY_LUT_LENGTH) + if (crtc_state_is_legacy_gamma(crtc_state)) return 0; return -EINVAL; -- cgit v1.2.3-70-g09d2 From de3ded0ae677749b4fc9f59d15b26b9f077340ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Oct 2017 11:04:16 +0100 Subject: drm/i915: Silence compiler warning for hsw_power_well_enable() Not all compilers are able to determine that pg is guarded by wait_fuses and so may think that pg is used uninitialized. Reported-by: Geert Uytterhoeven Fixes: b2891eb2531e ("drm/i915/hsw+: Add has_fuses power well attribute") Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Arkadiusz Hiler Link: https://patchwork.freedesktop.org/patch/msgid/20171002100416.25865-1-chris@chris-wilson.co.uk Reviewed-by: Imre Deak (cherry picked from commit 320671f94ada80ff036cc9d5dcd730ba4f3e0f1a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index b3a087cb0860..49577eba8e7e 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -368,7 +368,7 @@ static void hsw_power_well_enable(struct drm_i915_private *dev_priv, { enum i915_power_well_id id = power_well->id; bool wait_fuses = power_well->hsw.has_fuses; - enum skl_power_gate pg; + enum skl_power_gate uninitialized_var(pg); u32 val; if (wait_fuses) { -- cgit v1.2.3-70-g09d2 From b85577b72837ee8d9213e93d2c8b67ef78a47803 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Oct 2017 12:56:17 +0100 Subject: drm/i915: Order two completing nop_submit_request If two nop's (requests in-flight following a wedged device) complete at the same time, the global_seqno value written to the HWSP is undefined as the two threads are not serialized. v2: Use irqsafe spinlock. We expect the callback may be called from inside another irq spinlock, so we can't unconditionally restore irqs. Fixes: ce1135c7de64 ("drm/i915: Complete requests in nop_submit_request") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171006115617.18432-1-chris@chris-wilson.co.uk (cherry picked from commit 8d550824c6f52506754f11cb6be51aa153cc580d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 19404c96eeb1..af289d35b77a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3013,10 +3013,15 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct drm_i915_gem_request *request) { + unsigned long flags; + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); - i915_gem_request_submit(request); + + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } static void engine_set_wedged(struct intel_engine_cs *engine) -- cgit v1.2.3-70-g09d2 From 7b50f7b24cd6c98541f1af53bddc5b6e861ee8c8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 1 Apr 2016 18:37:25 +0300 Subject: drm/i915: Read timings from the correct transcoder in intel_crtc_mode_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_crtc->config->cpu_transcoder isn't yet filled out when intel_crtc_mode_get() gets called during output probing, so we should not use it there. Instead intel_crtc_mode_get() figures out the correct transcoder on its own, and that's what we should use. If the BIOS boots LVDS on pipe B, intel_crtc_mode_get() would actually end up reading the timings from pipe A instead (since PIPE_A==0), which clearly isn't what we want. It looks to me like this may have been broken by commit eccb140bca67 ("drm/i915: hw state readout&check support for cpu_transcoder") as that one removed the early initialization of cpu_transcoder from intel_crtc_init(). Cc: stable@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: Rob Kramer Cc: Daniel Vetter Reported-by: Rob Kramer Fixes: eccb140bca67 ("drm/i915: hw state readout&check support for cpu_transcoder") References: https://lists.freedesktop.org/archives/dri-devel/2016-April/104142.html Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1459525046-19425-1-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit e30a154b5262b967b133b06ac40777e651045898) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 64f7b51ed97c..5c7828c52d12 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10245,13 +10245,10 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder; struct drm_display_mode *mode; struct intel_crtc_state *pipe_config; - int htot = I915_READ(HTOTAL(cpu_transcoder)); - int hsync = I915_READ(HSYNC(cpu_transcoder)); - int vtot = I915_READ(VTOTAL(cpu_transcoder)); - int vsync = I915_READ(VSYNC(cpu_transcoder)); + u32 htot, hsync, vtot, vsync; enum pipe pipe = intel_crtc->pipe; mode = kzalloc(sizeof(*mode), GFP_KERNEL); @@ -10279,6 +10276,13 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, i9xx_crtc_clock_get(intel_crtc, pipe_config); mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier; + + cpu_transcoder = pipe_config->cpu_transcoder; + htot = I915_READ(HTOTAL(cpu_transcoder)); + hsync = I915_READ(HSYNC(cpu_transcoder)); + vtot = I915_READ(VTOTAL(cpu_transcoder)); + vsync = I915_READ(VSYNC(cpu_transcoder)); + mode->hdisplay = (htot & 0xffff) + 1; mode->htotal = ((htot & 0xffff0000) >> 16) + 1; mode->hsync_start = (hsync & 0xffff) + 1; -- cgit v1.2.3-70-g09d2 From 1a2ace56cea2d752b212e198c5e70ff0c0f39b59 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 9 Oct 2017 11:44:53 -0500 Subject: net: thunderx: mark expected switch fall-throughs in nicvf_main() In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Cc: Sunil Goutham Cc: Robert Richter Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 49b80da51ba7..805ab45e9b5a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -565,8 +565,10 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, return true; default: bpf_warn_invalid_xdp_action(action); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(nic->netdev, prog, action); + /* fall through */ case XDP_DROP: /* Check if it's a recycled page, if not * unmap the DMA mapping. -- cgit v1.2.3-70-g09d2 From a528629dfd3b87f206ac0e5637f7e7f958143ab8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 6 Oct 2017 16:44:27 -0700 Subject: ARC: [plat-hsdk] select CONFIG_RESET_HSDK from Kconfig Signed-off-by: Vineet Gupta --- arch/arc/configs/hsdk_defconfig | 1 - arch/arc/plat-hsdk/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 15f0f6b5fec1..7b8f8faf8a24 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -63,7 +63,6 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_RESET_HSDK=y CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index bd08de4be75e..19ab3cf98f0f 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -8,3 +8,4 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" select CLK_HSDK + select RESET_HSDK -- cgit v1.2.3-70-g09d2 From 639812a1ed9bf49ae2c026086fbf975339cd1eef Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 9 Oct 2017 13:12:10 -0400 Subject: nbd: don't set the device size until we're connected A user reported a regression with using the normal ioctl interface on newer kernels. This happens because I was setting the device size before the device was actually connected, which caused us to error out and close everything down. This didn't happen on netlink because we hold the device lock the whole time we're setting things up, but we don't do that for the ioctl path. This fixes the problem. Cc: stable@vger.kernel.org Fixes: 29eaadc ("nbd: stop using the bdev everywhere") Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 883dfebd3014..baebbdfd74d5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -243,7 +243,6 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, struct nbd_config *config = nbd->config; config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; - nbd_size_update(nbd); } static void nbd_complete_rq(struct request *req) @@ -1094,6 +1093,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(recv_workqueue, &args->work); } + nbd_size_update(nbd); return error; } -- cgit v1.2.3-70-g09d2 From 66ec11919a0f96e936bb731fdbc2851316077d26 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 6 Oct 2017 19:38:22 +0100 Subject: perf pmu: Unbreak perf record for arm/arm64 with events with explicit PMU Currently, perf record is broken on arm/arm64 systems when the PMU is specified explicitly as part of the event, e.g. $ ./perf record -e armv8_cortex_a53/cpu_cycles/u true In such cases, perf record fails to open events unless perf_event_paranoid is set to -1, even if the PMU in question supports mode exclusion. Further, even when perf_event_paranoid is toggled, no samples are recorded. This is an unintended side effect of commit: e3ba76deef23064f ("perf tools: Force uncore events to system wide monitoring) ... which assumes that if a PMU has an associated cpu_map, it is an uncore PMU, and forces events for such PMUs to be system-wide. This is not true for arm/arm64 systems, which can have heterogeneous CPUs. To account for this, multiple CPU PMUs are exposed, each with a "cpus" field under sysfs, which the perf tool parses into a cpu_map. ARM PMUs do not have a "cpumask" file, and only have a "cpus" file. For the gory details as to why, see commit: 7e3fcffe95544010 ("perf pmu: Support alternative sysfs cpumask") Given all of this, we can instead identify uncore PMUs by explicitly checking for a "cpumask" file, and restore arm/arm64 PMU support back to a working state. This patch does so, adding a new perf_pmu::is_uncore field, and splitting the existing cpumask parsing so that it can be reused. Signed-off-by: Mark Rutland Tested-by Will Deacon Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: 4.12+ Fixes: e3ba76deef23064f ("perf tools: Force uncore events to system wide monitoring) Link: http://lkml.kernel.org/r/1507315102-5942-1-git-send-email-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 9 ++++--- tools/perf/util/pmu.c | 56 +++++++++++++++++++++++++++++++----------- tools/perf/util/pmu.h | 1 + 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6257fb4f08c..39b15968eab1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -309,10 +309,11 @@ static char *get_config_name(struct list_head *head_terms) static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct cpu_map *cpus, + char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats) { struct perf_evsel *evsel; + struct cpu_map *cpus = pmu ? pmu->cpus : NULL; event_attr_init(attr); @@ -323,7 +324,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->cpus = cpu_map__get(cpus); evsel->own_cpus = cpu_map__get(cpus); - evsel->system_wide = !!cpus; + evsel->system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) @@ -1233,7 +1234,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); return evsel ? 0 : -ENOMEM; } @@ -1254,7 +1255,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; evsel = __add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), pmu->cpus, + get_config_name(head_config), pmu, &config_terms, auto_merge_stats); if (evsel) { evsel->unit = info.unit; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac16a9db1fb5..1c4d7b4e4fb5 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -470,17 +470,36 @@ static void pmu_read_sysfs(void) closedir(dir); } +static struct cpu_map *__pmu_cpumask(const char *path) +{ + FILE *file; + struct cpu_map *cpus; + + file = fopen(path, "r"); + if (!file) + return NULL; + + cpus = cpu_map__read(file); + fclose(file); + return cpus; +} + +/* + * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) + * may have a "cpus" file. + */ +#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" +#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" + static struct cpu_map *pmu_cpumask(const char *name) { - struct stat st; char path[PATH_MAX]; - FILE *file; struct cpu_map *cpus; const char *sysfs = sysfs__mountpoint(); const char *templates[] = { - "%s/bus/event_source/devices/%s/cpumask", - "%s/bus/event_source/devices/%s/cpus", - NULL + CPUS_TEMPLATE_UNCORE, + CPUS_TEMPLATE_CPU, + NULL }; const char **template; @@ -489,20 +508,25 @@ static struct cpu_map *pmu_cpumask(const char *name) for (template = templates; *template; template++) { snprintf(path, PATH_MAX, *template, sysfs, name); - if (stat(path, &st) == 0) - break; + cpus = __pmu_cpumask(path); + if (cpus) + return cpus; } - if (!*template) - return NULL; + return NULL; +} - file = fopen(path, "r"); - if (!file) - return NULL; +static bool pmu_is_uncore(const char *name) +{ + char path[PATH_MAX]; + struct cpu_map *cpus; + const char *sysfs = sysfs__mountpoint(); - cpus = cpu_map__read(file); - fclose(file); - return cpus; + snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); + cpus = __pmu_cpumask(path); + cpu_map__put(cpus); + + return !!cpus; } /* @@ -617,6 +641,8 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->cpus = pmu_cpumask(name); + pmu->is_uncore = pmu_is_uncore(name); + INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 389e9729331f..fe0de0502ce2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -22,6 +22,7 @@ struct perf_pmu { char *name; __u32 type; bool selectable; + bool is_uncore; struct perf_event_attr *default_config; struct cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ -- cgit v1.2.3-70-g09d2 From aa7b4e02b328f0589b6133e72aafb1289f614a79 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Oct 2017 15:55:45 -0300 Subject: tools include uapi bpf.h: Sync kernel ABI header with tooling header Silences the checker: Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' The 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT") cset only updated a comment in uapi/bpf.h. Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: David Ahern Cc: David S. Miller Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-rwx2cqbf0x1lwa1krsr6e6hd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..f90860d1f897 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -312,7 +312,7 @@ union bpf_attr { * jump into another BPF program * @ctx: context pointer passed to next program * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY - * @index: index inside array that selects specific program to run + * @index: 32-bit index inside array that selects specific program to run * Return: 0 on success or negative error * * int bpf_clone_redirect(skb, ifindex, flags) -- cgit v1.2.3-70-g09d2 From fdfbad3256918fc5736d68384331d2dbf45ccbd6 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Mon, 9 Oct 2017 14:05:12 +0200 Subject: cdc_ether: flag the u-blox TOBY-L2 and SARA-U2 as wwan The u-blox TOBY-L2 is a LTE Cat 4 module with HSPA+ and 2G fallback. This module allows switching to different USB profiles with the 'AT+UUSBCONF' command, and provides a ECM network interface when the 'AT+UUSBCONF=2' profile is selected. The u-blox SARA-U2 is a HSPA module with 2G fallback. The default USB configuration includes a ECM network interface. Both these modules are controlled via AT commands through one of the TTYs exposed. Connecting these modules may be done just by activating the desired PDP context with 'AT+CGACT=1,' and then running DHCP on the ECM interface. Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 29c7e2ec0dcb..52ea80bcd639 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -560,6 +560,7 @@ static const struct driver_info wwan_info = { #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e +#define UBLOX_VENDOR_ID 0x1546 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -868,6 +869,18 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&zte_cdc_info, +}, { + /* U-blox TOBY-L2 */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1143, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { + /* U-blox SARA-U2 */ + USB_DEVICE_AND_INTERFACE_INFO(UBLOX_VENDOR_ID, 0x1104, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), -- cgit v1.2.3-70-g09d2 From 96ca579a1ecc943b75beba58bebb0356f6cc4b51 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 Oct 2017 11:36:52 -0700 Subject: waitid(): Add missing access_ok() checks Adds missing access_ok() checks. CVE-2017-5123 Reported-by: Chris Salls Signed-off-by: Kees Cook Acked-by: Al Viro Fixes: 4c48abe91be0 ("waitid(): switch copyout of siginfo to unsafe_put_user()") Cc: stable@kernel.org # 4.13 Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index f2cd53e92147..cf28528842bc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1610,6 +1610,9 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, if (!infop) return err; + if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) + goto Efault; + user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); @@ -1735,6 +1738,9 @@ COMPAT_SYSCALL_DEFINE5(waitid, if (!infop) return err; + if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) + goto Efault; + user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); -- cgit v1.2.3-70-g09d2 From 5ab894aee0f171a682bcd90dd5d1930cb53c55dc Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 9 Oct 2017 16:28:37 +0300 Subject: device property: Track owner device of device property Deletion of subdevice will remove device properties associated to parent when they share the same firmware node after commit 478573c93abd (driver core: Don't leak secondary fwnode on device removal). This was observed with a driver adding subdevice that driver wasn't able to read device properties after rmmod/modprobe cycle. Consider the lifecycle of it: parent device registration ACPI_COMPANION_SET() device_add_properties() pset_copy_set() set_secondary_fwnode(dev, &p->fwnode) device_add() parent probe read device properties ACPI_COMPANION_SET(subdevice, ACPI_COMPANION(parent)) device_add(subdevice) parent remove device_del(subdevice) device_remove_properties() set_secondary_fwnode(dev, NULL); pset_free() Parent device will have its primary firmware node pointing to an ACPI node and secondary firmware node point to device properties. ACPI_COMPANION_SET() call in parent probe will set the subdevice's firmware node to point to the same 'struct fwnode_handle' and the associated secondary firmware node, i.e. the device properties as the parent. When subdevice is deleted in parent remove that will remove those device properties and attempt to read device properties in next parent probe call will fail. Fix this by tracking the owner device of device properties and delete them only when owner device is being deleted. Fixes: 478573c93abd (driver core: Don't leak secondary fwnode on device removal) Cc: 4.9+ # 4.9+ Signed-off-by: Jarkko Nikula Signed-off-by: Rafael J. Wysocki --- drivers/base/property.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index d0b65bbe7e15..21fcc13013a5 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -21,6 +21,7 @@ #include struct property_set { + struct device *dev; struct fwnode_handle fwnode; const struct property_entry *properties; }; @@ -891,6 +892,7 @@ static struct property_set *pset_copy_set(const struct property_set *pset) void device_remove_properties(struct device *dev) { struct fwnode_handle *fwnode; + struct property_set *pset; fwnode = dev_fwnode(dev); if (!fwnode) @@ -900,16 +902,16 @@ void device_remove_properties(struct device *dev) * the pset. If there is no real firmware node (ACPI/DT) primary * will hold the pset. */ - if (is_pset_node(fwnode)) { + pset = to_pset_node(fwnode); + if (pset) { set_primary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); } else { - fwnode = fwnode->secondary; - if (!IS_ERR(fwnode) && is_pset_node(fwnode)) { + pset = to_pset_node(fwnode->secondary); + if (pset && dev == pset->dev) set_secondary_fwnode(dev, NULL); - pset_free_set(to_pset_node(fwnode)); - } } + if (pset && dev == pset->dev) + pset_free_set(pset); } EXPORT_SYMBOL_GPL(device_remove_properties); @@ -938,6 +940,7 @@ int device_add_properties(struct device *dev, p->fwnode.ops = &pset_fwnode_ops; set_secondary_fwnode(dev, &p->fwnode); + p->dev = dev; return 0; } EXPORT_SYMBOL_GPL(device_add_properties); -- cgit v1.2.3-70-g09d2 From 2b30297d481ad305134252557768c22391e0fed6 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Mon, 9 Oct 2017 20:51:05 -0700 Subject: Input: synaptics - disable kernel tracking on SMBus devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In certain situations kernel tracking seems to be getting confused and incorrectly reporting the slot of a contact. On example is when the user does a three finger click or tap and then places two fingers on the touchpad in the same area. The kernel tracking code seems to continue to think that there are three contacts on the touchpad and incorrectly alternates the slot of one of the contacts. The result that is the input subsystem reports a stream of button press and release events as the reported slot changes. Kernel tracking was originally enabled to prevent cursor jumps, but it is unclear how much of an issue kernel jumps actually are. This patch simply disabled kernel tracking for now. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1482640 Signed-off-by: Andrew Duggan Tested-by: Kamil Páral Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 5af0b7d200bc..ee5466a374bf 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1709,8 +1709,7 @@ static int synaptics_create_intertouch(struct psmouse *psmouse, .sensor_pdata = { .sensor_type = rmi_sensor_touchpad, .axis_align.flip_y = true, - /* to prevent cursors jumps: */ - .kernel_tracking = true, + .kernel_tracking = false, .topbuttonpad = topbuttonpad, }, .f30_data = { -- cgit v1.2.3-70-g09d2 From 9f1c2674b328a69ab5a9b5a1c52405795ee4163f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Oct 2017 21:44:51 -0700 Subject: net: memcontrol: defer call to mem_cgroup_sk_alloc() Instead of calling mem_cgroup_sk_alloc() from BH context, it is better to call it from inet_csk_accept() in process context. Not only this removes code in mem_cgroup_sk_alloc(), but it also fixes a bug since listener might have been dismantled and css_get() might cause a use-after-free. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- mm/memcontrol.c | 15 --------------- net/core/sock.c | 5 ++++- net/ipv4/inet_connection_sock.c | 1 + 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5f3a62887cf..661f046ad318 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5828,21 +5828,6 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - BUG_ON(mem_cgroup_is_root(sk->sk_memcg)); - css_get(&sk->sk_memcg->css); - return; - } - rcu_read_lock(); memcg = mem_cgroup_from_task(current); if (memcg == root_mem_cgroup) diff --git a/net/core/sock.c b/net/core/sock.c index 23953b741a41..70c6ccbdf49f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,6 +1677,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; @@ -1714,7 +1718,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - mem_cgroup_sk_alloc(newsk); cgroup_sk_alloc(&newsk->sk_cgrp_data); /* diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c039c937ba90..67aec7a10686 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + mem_cgroup_sk_alloc(newsk); out: release_sock(sk); if (req) -- cgit v1.2.3-70-g09d2 From fbb1fb4ad415cb31ce944f65a5ca700aaf73a227 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Oct 2017 21:44:52 -0700 Subject: net: defer call to cgroup_sk_alloc() sk_clone_lock() might run while TCP/DCCP listener already vanished. In order to prevent use after free, it is better to defer cgroup_sk_alloc() to the point we know both parent and child exist, and from process context. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- kernel/cgroup/cgroup.c | 11 ----------- net/core/sock.c | 3 +-- net/ipv4/inet_connection_sock.c | 5 +++++ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 44857278eb8a..3380a3e49af5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5709,17 +5709,6 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; - /* Socket clone path */ - if (skcd->val) { - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - return; - } - rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 70c6ccbdf49f..4499e3153813 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1680,6 +1680,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; + memset(&newsk->sk_cgrp_data, 0, sizeof(newsk->sk_cgrp_data)); atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; @@ -1718,8 +1719,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 67aec7a10686..d32c74507314 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -476,6 +478,9 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } mem_cgroup_sk_alloc(newsk); + cgroup_sk_alloc(&newsk->sk_cgrp_data); + sock_update_classid(&newsk->sk_cgrp_data); + sock_update_netprioidx(&newsk->sk_cgrp_data); out: release_sock(sk); if (req) -- cgit v1.2.3-70-g09d2 From e36a82ee4c514a2f4f8fa30c780ad059282f5d64 Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Wed, 20 Sep 2017 15:49:51 +0530 Subject: powerpc/livepatch: Fix livepatch stack access While running stress test with livepatch module loaded, kernel bug was triggered. cpu 0x5: Vector: 400 (Instruction Access) at [c0000000eb9d3b60] 5:mon> t [c0000000eb9d3de0] c0000000eb9d3e30 (unreliable) [c0000000eb9d3e30] c000000000008ab4 hardware_interrupt_common+0x114/0x120 --- Exception: 501 (Hardware Interrupt) at c000000000053040 livepatch_handler+0x4c/0x74 [c0000000eb9d4120] 0000000057ac6e9d (unreliable) [d0000000089d9f78] 2e0965747962382e SP (965747962342e09) is in userspace When an interrupt occurs during the livepatch_handler execution, it's possible for the livepatch_stack and/or thread_info to be corrupted. eg: Task A Interrupt Handler ========= ================= livepatch_handler: mr r0, r1 ld r1, TI_livepatch_sp(r12) hardware_interrupt_common: do_IRQ+0x8: mflr r0 <- saved stack pointer is overwritten bl _mcount ... std r27,-40(r1) <- overwrite of thread_info() lis r2, STACK_END_MAGIC@h ori r2, r2, STACK_END_MAGIC@l ld r12, -8(r1) Fix the corruption by using r11 register for livepatch stack manipulation, instead of shuffling task stack and livepatch stack into r1 register. Using r11 register also avoids disabling/enabling irq's while setting up the livepatch stack. Signed-off-by: Kamalesh Babulal Reviewed-by: Naveen N. Rao Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 45 +++++++++----------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index c98e90b4ea7b..b4e2b7165f79 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -181,34 +181,25 @@ _GLOBAL(ftrace_stub) * - we have no stack frame and can not allocate one * - LR points back to the original caller (in A) * - CTR holds the new NIP in C - * - r0 & r12 are free - * - * r0 can't be used as the base register for a DS-form load or store, so - * we temporarily shuffle r1 (stack pointer) into r0 and then put it back. + * - r0, r11 & r12 are free */ livepatch_handler: CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - /* Allocate 3 x 8 bytes */ - ld r1, TI_livepatch_sp(r12) - addi r1, r1, 24 - std r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) + addi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Save toc & real LR on livepatch stack */ - std r2, -24(r1) + std r2, -24(r11) mflr r12 - std r12, -16(r1) + std r12, -16(r11) /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r1) - - /* Restore real stack pointer */ - mr r1, r0 + std r12, -8(r11) /* Put ctr in r12 for global entry and branch there */ mfctr r12 @@ -216,36 +207,30 @@ livepatch_handler: /* * Now we are returning from the patched function to the original - * caller A. We are free to use r0 and r12, and we can use r2 until we + * caller A. We are free to use r11, r12 and we can use r2 until we * restore it. */ CURRENT_THREAD_INFO(r12, r1) - /* Save stack pointer into r0 */ - mr r0, r1 - - ld r1, TI_livepatch_sp(r12) + ld r11, TI_livepatch_sp(r12) /* Check stack marker hasn't been trashed */ lis r2, STACK_END_MAGIC@h ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r1) + ld r12, -8(r11) 1: tdne r12, r2 EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 /* Restore LR & toc from livepatch stack */ - ld r12, -16(r1) + ld r12, -16(r11) mtlr r12 - ld r2, -24(r1) + ld r2, -24(r11) /* Pop livepatch stack frame */ - CURRENT_THREAD_INFO(r12, r0) - subi r1, r1, 24 - std r1, TI_livepatch_sp(r12) - - /* Restore real stack pointer */ - mr r1, r0 + CURRENT_THREAD_INFO(r12, r1) + subi r11, r11, 24 + std r11, TI_livepatch_sp(r12) /* Return to original caller of live patched function */ blr -- cgit v1.2.3-70-g09d2 From eb701ce16a45ed9880897c48f05ee608d77c72e3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 9 Oct 2017 10:24:01 +0300 Subject: mmc: sdhci-pci: Fix default d3_retune for Intel host controllers The default for d3_retune is true, but that was not being set in all cases, which results in eMMC errors because re-tuning has not been done. Fix by initializing d3_retune to true. Signed-off-by: Adrian Hunter Fixes: c959a6b00ff5 ("mmc: sdhci-pci: Don't re-tune with runtime pm for some Intel devices") Cc: stable@vger.kernel.org # v4.12+ Reported-and-tested-by: ojab Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index d0ccc6729fd2..67d787fa3306 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -448,6 +448,8 @@ static void intel_dsm_init(struct intel_host *intel_host, struct device *dev, int err; u32 val; + intel_host->d3_retune = true; + err = __intel_dsm(intel_host, dev, INTEL_DSM_FNS, &intel_host->dsm_fns); if (err) { pr_debug("%s: DSM not supported, error %d\n", -- cgit v1.2.3-70-g09d2 From 8b405d5c5d0996d3d16f70c42744a0500f5b6ec3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Oct 2017 11:13:37 +0200 Subject: locking/lockdep: Fix stacktrace mess There is some complication between check_prevs_add() and check_prev_add() wrt. saving stack traces. The problem is that we want to be frugal with saving stack traces, since it consumes static resources. We'll only know in check_prev_add() if we need the trace, but we can call into it multiple times. So we want to do on-demand and re-use. A further complication is that check_prev_add() can drop graph_lock and mess with our static resources. In any case, the current state; after commit: ce07a9415f26 ("locking/lockdep: Make check_prev_add() able to handle external stack_trace") is that we'll assume the trace contains valid data once check_prev_add() returns '2'. However, as noted by Josh, this is false, check_prev_add() can return '2' before having saved a trace, this then result in the possibility of using uninitialized data. Testing, as reported by Wu, shows a NULL deref. So simplify. Since the graph_lock() thing is a debug path that hasn't really been used in a long while, take it out back and avoid the head-ache. Further initialize the stack_trace to a known 'empty' state; as long as nr_entries == 0, nothing should deref entries. We can then use the 'entries == NULL' test for a valid trace / on-demand saving. Analyzed-by: Josh Poimboeuf Reported-by: Fengguang Wu Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ce07a9415f26 ("locking/lockdep: Make check_prev_add() able to handle external stack_trace") Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 44c8d0d17170..e36e652d996f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1873,10 +1873,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, struct held_lock *next, int distance, struct stack_trace *trace, int (*save)(struct stack_trace *trace)) { + struct lock_list *uninitialized_var(target_entry); struct lock_list *entry; - int ret; struct lock_list this; - struct lock_list *uninitialized_var(target_entry); + int ret; /* * Prove that the new -> dependency would not @@ -1890,8 +1890,17 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, this.class = hlock_class(next); this.parent = NULL; ret = check_noncircular(&this, hlock_class(prev), &target_entry); - if (unlikely(!ret)) + if (unlikely(!ret)) { + if (!trace->entries) { + /* + * If @save fails here, the printing might trigger + * a WARN but because of the !nr_entries it should + * not do bad things. + */ + save(trace); + } return print_circular_bug(&this, target_entry, next, prev, trace); + } else if (unlikely(ret < 0)) return print_bfs_bug(ret); @@ -1938,7 +1947,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, return print_bfs_bug(ret); - if (save && !save(trace)) + if (!trace->entries && !save(trace)) return 0; /* @@ -1958,20 +1967,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, if (!ret) return 0; - /* - * Debugging printouts: - */ - if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { - graph_unlock(); - printk("\n new dependency: "); - print_lock_name(hlock_class(prev)); - printk(KERN_CONT " => "); - print_lock_name(hlock_class(next)); - printk(KERN_CONT "\n"); - dump_stack(); - if (!graph_lock()) - return 0; - } return 2; } @@ -1986,8 +1981,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; struct held_lock *hlock; - struct stack_trace trace; - int (*save)(struct stack_trace *trace) = save_trace; + struct stack_trace trace = { + .nr_entries = 0, + .max_entries = 0, + .entries = NULL, + .skip = 0, + }; /* * Debugging checks. @@ -2018,17 +2017,10 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) */ if (hlock->read != 2 && hlock->check) { int ret = check_prev_add(curr, hlock, next, - distance, &trace, save); + distance, &trace, save_trace); if (!ret) return 0; - /* - * Stop saving stack_trace if save_trace() was - * called at least once: - */ - if (save && ret == 2) - save = NULL; - /* * Stop after the first non-trylock entry, * as non-trylock entries have added their -- cgit v1.2.3-70-g09d2 From c7e2f69d3ed2e56de1f5eaaf37c0f5f91d7adb0a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Oct 2017 10:50:25 +0200 Subject: locking/selftest: Avoid false BUG report The work-around for the expected failure is providing another failure :/ Only when CONFIG_PROVE_LOCKING=y do we increment unexpected_testcase_failures, so only then do we need to decrement, otherwise we'll end up with a negative number and that will again trigger a BUG (printout, not crash). Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d82fed752942 ("locking/lockdep/selftests: Fix mixed read-write ABBA tests") Signed-off-by: Ingo Molnar --- lib/locking-selftest.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index cd0b5c964bd0..2b827b8a1d8c 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -2031,11 +2031,13 @@ void locking_selftest(void) print_testname("mixed read-lock/lock-write ABBA"); pr_cont(" |"); dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK); +#ifdef CONFIG_PROVE_LOCKING /* * Lockdep does indeed fail here, but there's nothing we can do about * that now. Don't kill lockdep for it. */ unexpected_testcase_failures--; +#endif pr_cont(" |"); dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM); -- cgit v1.2.3-70-g09d2 From df0062b27ebf473b372914a3e3574d93790e2b72 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Oct 2017 15:20:50 +0100 Subject: perf/core: Avoid freeing static PMU contexts when PMU is unregistered Since commit: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") ... when a PMU is unregistered then its associated ->pmu_cpu_context is unconditionally freed. Whilst this is fine for dynamically allocated context types (i.e. those registered using perf_invalid_context), this causes a problem for sharing of static contexts such as perf_{sw,hw}_context, which are used by multiple built-in PMUs and effectively have a global lifetime. Whilst testing the ARM SPE driver, which must use perf_sw_context to support per-task AUX tracing, unregistering the driver as a result of a module unload resulted in: Unable to handle kernel NULL pointer dereference at virtual address 00000038 Internal error: Oops: 96000004 [#1] PREEMPT SMP Modules linked in: [last unloaded: arm_spe_pmu] PC is at ctx_resched+0x38/0xe8 LR is at perf_event_exec+0x20c/0x278 [...] ctx_resched+0x38/0xe8 perf_event_exec+0x20c/0x278 setup_new_exec+0x88/0x118 load_elf_binary+0x26c/0x109c search_binary_handler+0x90/0x298 do_execveat_common.isra.14+0x540/0x618 SyS_execve+0x38/0x48 since the software context has been freed and the ctx.pmu->pmu_disable_count field has been set to NULL. This patch fixes the problem by avoiding the freeing of static PMU contexts altogether. Whilst the sharing of dynamic contexts is questionable, this actually requires the caller to share their context pointer explicitly and so the burden is on them to manage the object lifetime. Reported-by: Kim Phillips Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") Link: http://lkml.kernel.org/r/1507040450-7730-1-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e202ae4..243bfc68d0fe 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8955,6 +8955,14 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) static void free_pmu_context(struct pmu *pmu) { + /* + * Static contexts such as perf_sw_context have a global lifetime + * and may be shared between different PMUs. Avoid freeing them + * when a single PMU is going away. + */ + if (pmu->task_ctx_nr > perf_invalid_context) + return; + mutex_lock(&pmus_lock); free_percpu(pmu->pmu_cpu_context); mutex_unlock(&pmus_lock); -- cgit v1.2.3-70-g09d2 From e6a5203399d19871021c1fa0eb2a08fc63b67e91 Mon Sep 17 00:00:00 2001 From: "leilei.lin" Date: Fri, 29 Sep 2017 13:54:44 +0800 Subject: perf/core: Fix cgroup time when scheduling descendants Update cgroup time when an event is scheduled in by descendants. Reviewed-and-tested-by: Jiri Olsa Signed-off-by: leilei.lin Signed-off-by: Peter Zijlstra (Intel) Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: brendan.d.gregg@gmail.com Cc: yang_oliver@hotmail.com Link: http://lkml.kernel.org/r/CALPjY3mkHiekRkRECzMi9G-bjUQOvOjVBAqxmWkTzc-g+0LwMg@mail.gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 243bfc68d0fe..9d93db81fa36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -662,7 +662,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgrp == event->cgrp) + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) __update_cgrp_time(event->cgrp); } -- cgit v1.2.3-70-g09d2 From d153b153446f7d8832bb2ebd92309c8a6003b3bb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Sep 2017 11:35:30 +0200 Subject: sched/core: Fix wake_affine() performance regression Eric reported a sysbench regression against commit: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") Similarly, Rik was looking at the NAS-lu.C benchmark, which regressed against his v3.10 enterprise kernel. PRE (current tip/master): ivb-ep sysbench: 2: [30 secs] transactions: 64110 (2136.94 per sec.) 5: [30 secs] transactions: 143644 (4787.99 per sec.) 10: [30 secs] transactions: 274298 (9142.93 per sec.) 20: [30 secs] transactions: 418683 (13955.45 per sec.) 40: [30 secs] transactions: 320731 (10690.15 per sec.) 80: [30 secs] transactions: 355096 (11834.28 per sec.) hsw-ex NAS: OMP_PROC_BIND/lu.C.x_threads_144_run_1.log: Time in seconds = 18.01 OMP_PROC_BIND/lu.C.x_threads_144_run_2.log: Time in seconds = 17.89 OMP_PROC_BIND/lu.C.x_threads_144_run_3.log: Time in seconds = 17.93 lu.C.x_threads_144_run_1.log: Time in seconds = 434.68 lu.C.x_threads_144_run_2.log: Time in seconds = 405.36 lu.C.x_threads_144_run_3.log: Time in seconds = 433.83 POST (+patch): ivb-ep sysbench: 2: [30 secs] transactions: 64494 (2149.75 per sec.) 5: [30 secs] transactions: 145114 (4836.99 per sec.) 10: [30 secs] transactions: 278311 (9276.69 per sec.) 20: [30 secs] transactions: 437169 (14571.60 per sec.) 40: [30 secs] transactions: 669837 (22326.73 per sec.) 80: [30 secs] transactions: 631739 (21055.88 per sec.) hsw-ex NAS: lu.C.x_threads_144_run_1.log: Time in seconds = 23.36 lu.C.x_threads_144_run_2.log: Time in seconds = 22.96 lu.C.x_threads_144_run_3.log: Time in seconds = 22.52 This patch takes out all the shiny wake_affine() stuff and goes back to utter basics. Between the two CPUs involved with the wakeup (the CPU doing the wakeup and the CPU we ran on previously) pick the CPU we can run on _now_. This restores much of the regressions against the older kernels, but leaves some ground in the overloaded case. The default-enabled WA_WEIGHT (which will be introduced in the next patch) is an attempt to address the overloaded situation. Reported-by: Eric Farman Signed-off-by: Peter Zijlstra (Intel) Cc: Christian Borntraeger Cc: Linus Torvalds Cc: Matthew Rosato Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: jinpuwang@gmail.com Cc: vcaputo@pengaru.com Fixes: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 8 --- kernel/sched/fair.c | 126 +++++------------------------------------ kernel/sched/features.h | 1 + 3 files changed, 16 insertions(+), 119 deletions(-) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index d7b6dab956ec..7d065abc7a47 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -71,14 +71,6 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; - - /* - * Some variables from the most recent sd_lb_stats for this domain, - * used by wake_affine(). - */ - unsigned long nr_running; - unsigned long load; - unsigned long capacity; }; struct sched_domain { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 70ba32e08a23..28cabed85387 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5356,115 +5356,36 @@ static int wake_wide(struct task_struct *p) return 1; } -struct llc_stats { - unsigned long nr_running; - unsigned long load; - unsigned long capacity; - int has_capacity; -}; - -static bool get_llc_stats(struct llc_stats *stats, int cpu) -{ - struct sched_domain_shared *sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - - if (!sds) - return false; - - stats->nr_running = READ_ONCE(sds->nr_running); - stats->load = READ_ONCE(sds->load); - stats->capacity = READ_ONCE(sds->capacity); - stats->has_capacity = stats->nr_running < per_cpu(sd_llc_size, cpu); - - return true; -} - /* - * Can a task be moved from prev_cpu to this_cpu without causing a load - * imbalance that would trigger the load balancer? + * The purpose of wake_affine() is to quickly determine on which CPU we can run + * soonest. For the purpose of speed we only consider the waking and previous + * CPU. * - * Since we're running on 'stale' values, we might in fact create an imbalance - * but recomputing these values is expensive, as that'd mean iteration 2 cache - * domains worth of CPUs. + * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or + * will be) idle. */ + static bool -wake_affine_llc(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int prev_cpu, int sync) +wake_affine_idle(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int prev_cpu, int sync) { - struct llc_stats prev_stats, this_stats; - s64 this_eff_load, prev_eff_load; - unsigned long task_load; - - if (!get_llc_stats(&prev_stats, prev_cpu) || - !get_llc_stats(&this_stats, this_cpu)) - return false; - - /* - * If sync wakeup then subtract the (maximum possible) - * effect of the currently running task from the load - * of the current LLC. - */ - if (sync) { - unsigned long current_load = task_h_load(current); - - /* in this case load hits 0 and this LLC is considered 'idle' */ - if (current_load > this_stats.load) - return true; - - this_stats.load -= current_load; - } - - /* - * The has_capacity stuff is not SMT aware, but by trying to balance - * the nr_running on both ends we try and fill the domain at equal - * rates, thereby first consuming cores before siblings. - */ - - /* if the old cache has capacity, stay there */ - if (prev_stats.has_capacity && prev_stats.nr_running < this_stats.nr_running+1) - return false; - - /* if this cache has capacity, come here */ - if (this_stats.has_capacity && this_stats.nr_running+1 < prev_stats.nr_running) + if (idle_cpu(this_cpu)) return true; - /* - * Check to see if we can move the load without causing too much - * imbalance. - */ - task_load = task_h_load(p); - - this_eff_load = 100; - this_eff_load *= prev_stats.capacity; - - prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; - prev_eff_load *= this_stats.capacity; - - this_eff_load *= this_stats.load + task_load; - prev_eff_load *= prev_stats.load - task_load; + if (sync && cpu_rq(this_cpu)->nr_running == 1) + return true; - return this_eff_load <= prev_eff_load; + return false; } static int wake_affine(struct sched_domain *sd, struct task_struct *p, int prev_cpu, int sync) { int this_cpu = smp_processor_id(); - bool affine; - - /* - * Default to no affine wakeups; wake_affine() should not effect a task - * placement the load-balancer feels inclined to undo. The conservative - * option is therefore to not move tasks when they wake up. - */ - affine = false; + bool affine = false; - /* - * If the wakeup is across cache domains, try to evaluate if movement - * makes sense, otherwise rely on select_idle_siblings() to do - * placement inside the cache domain. - */ - if (!cpus_share_cache(prev_cpu, this_cpu)) - affine = wake_affine_llc(sd, p, this_cpu, prev_cpu, sync); + if (sched_feat(WA_IDLE) && !affine) + affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync); schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts); if (affine) { @@ -7600,7 +7521,6 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq) */ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds) { - struct sched_domain_shared *shared = env->sd->shared; struct sched_domain *child = env->sd->child; struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; @@ -7672,22 +7592,6 @@ next_group: if (env->dst_rq->rd->overload != overload) env->dst_rq->rd->overload = overload; } - - if (!shared) - return; - - /* - * Since these are sums over groups they can contain some CPUs - * multiple times for the NUMA domains. - * - * Currently only wake_affine_llc() and find_busiest_group() - * uses these numbers, only the last is affected by this problem. - * - * XXX fix that. - */ - WRITE_ONCE(shared->nr_running, sds->total_running); - WRITE_ONCE(shared->load, sds->total_load); - WRITE_ONCE(shared->capacity, sds->total_capacity); } /** diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d3fb15555291..0a519f8c224d 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -81,3 +81,4 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) +SCHED_FEAT(WA_IDLE, true) -- cgit v1.2.3-70-g09d2 From f2cdd9cc6c97e617b95f430f527a6e3165e1bee8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Oct 2017 09:23:24 +0200 Subject: sched/core: Address more wake_affine() regressions The trivial wake_affine_idle() implementation is very good for a number of workloads, but it comes apart at the moment there are no idle CPUs left, IOW. the overloaded case. hackbench: NO_WA_WEIGHT WA_WEIGHT hackbench-20 : 7.362717561 seconds 6.450509391 seconds (win) netperf: NO_WA_WEIGHT WA_WEIGHT TCP_SENDFILE-1 : Avg: 54524.6 Avg: 52224.3 TCP_SENDFILE-10 : Avg: 48185.2 Avg: 46504.3 TCP_SENDFILE-20 : Avg: 29031.2 Avg: 28610.3 TCP_SENDFILE-40 : Avg: 9819.72 Avg: 9253.12 TCP_SENDFILE-80 : Avg: 5355.3 Avg: 4687.4 TCP_STREAM-1 : Avg: 41448.3 Avg: 42254 TCP_STREAM-10 : Avg: 24123.2 Avg: 25847.9 TCP_STREAM-20 : Avg: 15834.5 Avg: 18374.4 TCP_STREAM-40 : Avg: 5583.91 Avg: 5599.57 TCP_STREAM-80 : Avg: 2329.66 Avg: 2726.41 TCP_RR-1 : Avg: 80473.5 Avg: 82638.8 TCP_RR-10 : Avg: 72660.5 Avg: 73265.1 TCP_RR-20 : Avg: 52607.1 Avg: 52634.5 TCP_RR-40 : Avg: 57199.2 Avg: 56302.3 TCP_RR-80 : Avg: 25330.3 Avg: 26867.9 UDP_RR-1 : Avg: 108266 Avg: 107844 UDP_RR-10 : Avg: 95480 Avg: 95245.2 UDP_RR-20 : Avg: 68770.8 Avg: 68673.7 UDP_RR-40 : Avg: 76231 Avg: 75419.1 UDP_RR-80 : Avg: 34578.3 Avg: 35639.1 UDP_STREAM-1 : Avg: 64684.3 Avg: 66606 UDP_STREAM-10 : Avg: 52701.2 Avg: 52959.5 UDP_STREAM-20 : Avg: 30376.4 Avg: 29704 UDP_STREAM-40 : Avg: 15685.8 Avg: 15266.5 UDP_STREAM-80 : Avg: 8415.13 Avg: 7388.97 (wins and losses) sysbench: NO_WA_WEIGHT WA_WEIGHT sysbench-mysql-2 : 2135.17 per sec. 2142.51 per sec. sysbench-mysql-5 : 4809.68 per sec. 4800.19 per sec. sysbench-mysql-10 : 9158.59 per sec. 9157.05 per sec. sysbench-mysql-20 : 14570.70 per sec. 14543.55 per sec. sysbench-mysql-40 : 22130.56 per sec. 22184.82 per sec. sysbench-mysql-80 : 20995.56 per sec. 21904.18 per sec. sysbench-psql-2 : 1679.58 per sec. 1705.06 per sec. sysbench-psql-5 : 3797.69 per sec. 3879.93 per sec. sysbench-psql-10 : 7253.22 per sec. 7258.06 per sec. sysbench-psql-20 : 11166.75 per sec. 11220.00 per sec. sysbench-psql-40 : 17277.28 per sec. 17359.78 per sec. sysbench-psql-80 : 17112.44 per sec. 17221.16 per sec. (increase on the top end) tbench: NO_WA_WEIGHT Throughput 685.211 MB/sec 2 clients 2 procs max_latency=0.123 ms Throughput 1596.64 MB/sec 5 clients 5 procs max_latency=0.119 ms Throughput 2985.47 MB/sec 10 clients 10 procs max_latency=0.262 ms Throughput 4521.15 MB/sec 20 clients 20 procs max_latency=0.506 ms Throughput 9438.1 MB/sec 40 clients 40 procs max_latency=2.052 ms Throughput 8210.5 MB/sec 80 clients 80 procs max_latency=8.310 ms WA_WEIGHT Throughput 697.292 MB/sec 2 clients 2 procs max_latency=0.127 ms Throughput 1596.48 MB/sec 5 clients 5 procs max_latency=0.080 ms Throughput 2975.22 MB/sec 10 clients 10 procs max_latency=0.254 ms Throughput 4575.14 MB/sec 20 clients 20 procs max_latency=0.502 ms Throughput 9468.65 MB/sec 40 clients 40 procs max_latency=2.069 ms Throughput 8631.73 MB/sec 80 clients 80 procs max_latency=8.605 ms (increase on the top end) Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Rik van Riel Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 41 +++++++++++++++++++++++++++++++++++++++++ kernel/sched/features.h | 2 ++ 2 files changed, 43 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 28cabed85387..ed2ab474ec93 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5363,6 +5363,10 @@ static int wake_wide(struct task_struct *p) * * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or * will be) idle. + * + * wake_affine_weight() - considers the weight to reflect the average + * scheduling latency of the CPUs. This seems to work + * for the overloaded case. */ static bool @@ -5378,6 +5382,40 @@ wake_affine_idle(struct sched_domain *sd, struct task_struct *p, return false; } +static bool +wake_affine_weight(struct sched_domain *sd, struct task_struct *p, + int this_cpu, int prev_cpu, int sync) +{ + s64 this_eff_load, prev_eff_load; + unsigned long task_load; + + this_eff_load = target_load(this_cpu, sd->wake_idx); + prev_eff_load = source_load(prev_cpu, sd->wake_idx); + + if (sync) { + unsigned long current_load = task_h_load(current); + + if (current_load > this_eff_load) + return true; + + this_eff_load -= current_load; + } + + task_load = task_h_load(p); + + this_eff_load += task_load; + if (sched_feat(WA_BIAS)) + this_eff_load *= 100; + this_eff_load *= capacity_of(prev_cpu); + + prev_eff_load -= task_load; + if (sched_feat(WA_BIAS)) + prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2; + prev_eff_load *= capacity_of(this_cpu); + + return this_eff_load <= prev_eff_load; +} + static int wake_affine(struct sched_domain *sd, struct task_struct *p, int prev_cpu, int sync) { @@ -5387,6 +5425,9 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, if (sched_feat(WA_IDLE) && !affine) affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync); + if (sched_feat(WA_WEIGHT) && !affine) + affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync); + schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts); if (affine) { schedstat_inc(sd->ttwu_move_affine); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 0a519f8c224d..319ed0e8a347 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -82,3 +82,5 @@ SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) SCHED_FEAT(WA_IDLE, true) +SCHED_FEAT(WA_WEIGHT, true) +SCHED_FEAT(WA_BIAS, true) -- cgit v1.2.3-70-g09d2 From 024c9d2faebdad3fb43fe49ad68e91a36190f1e2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Oct 2017 10:36:53 +0200 Subject: sched/core: Ensure load_balance() respects the active_mask While load_balance() masks the source CPUs against active_mask, it had a hole against the destination CPU. Ensure the destination CPU is also part of the 'domain-mask & active-mask' set. Reported-by: Levin, Alexander (Sasha Levin) Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 77d1dfda0e79 ("sched/topology, cpuset: Avoid spurious/wrong domain rebuilds") Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ed2ab474ec93..d3f3094856fe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8042,6 +8042,13 @@ static int should_we_balance(struct lb_env *env) struct sched_group *sg = env->sd->groups; int cpu, balance_cpu = -1; + /* + * Ensure the balancing environment is consistent; can happen + * when the softirq triggers 'during' hotplug. + */ + if (!cpumask_test_cpu(env->dst_cpu, env->cpus)) + return 0; + /* * In the newly idle case, we will allow all the cpu's * to do the newly idle load balance. -- cgit v1.2.3-70-g09d2 From 5a866ec0014b2baa4ecbb1eaa19c835482829d08 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 10 Oct 2017 10:43:17 +0200 Subject: spi: a3700: Return correct value on timeout detection When waiting for transfer completion, a3700_spi_wait_completion returns a boolean indicating if a timeout occurred. The function was returning 'true' everytime, failing to detect any timeout. This patch makes it return 'false' when a timeout is reached. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-armada-3700.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 9172cb2d2e7a..568e1c65aa82 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -387,7 +387,8 @@ static bool a3700_spi_wait_completion(struct spi_device *spi) spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0); - return true; + /* Timeout was reached */ + return false; } static bool a3700_spi_transfer_wait(struct spi_device *spi, -- cgit v1.2.3-70-g09d2 From b0490a04e736356e427e227902b17f9927a56caf Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Tue, 10 Oct 2017 12:15:30 +0530 Subject: powerpc/lib/sstep: Fix count leading zeros instructions According to the GCC documentation, the behaviour of __builtin_clz() and __builtin_clzl() is undefined if the value of the input argument is zero. Without handling this special case, these builtins have been used for emulating the following instructions: * Count Leading Zeros Word (cntlzw[.]) * Count Leading Zeros Doubleword (cntlzd[.]) This fixes the emulated behaviour of these instructions by adding an additional check for this special case. Fixes: 3cdfcbfd32b9d ("powerpc: Change analyse_instr so it doesn't modify *regs") Signed-off-by: Sandipan Das Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5e8418c28bd8..f208f560aecd 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1684,11 +1684,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * Logical instructions */ case 26: /* cntlzw */ - op->val = __builtin_clz((unsigned int) regs->gpr[rd]); + val = (unsigned int) regs->gpr[rd]; + op->val = ( val ? __builtin_clz(val) : 32 ); goto logical_done; #ifdef __powerpc64__ case 58: /* cntlzd */ - op->val = __builtin_clzl(regs->gpr[rd]); + val = regs->gpr[rd]; + op->val = ( val ? __builtin_clzl(val) : 64 ); goto logical_done; #endif case 28: /* and */ -- cgit v1.2.3-70-g09d2 From 6b2c08f989250c54f31b53dba9ace863a1f3fff6 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Wed, 4 Oct 2017 21:04:30 -0300 Subject: powerpc: Don't call lockdep_assert_cpus_held() from arch_update_cpu_topology() It turns out that not all paths calling arch_update_cpu_topology() hold cpu_hotplug_lock, but that's OK because those paths can't race with any concurrent hotplug events. Warnings were reported with the following trace: lockdep_assert_cpus_held arch_update_cpu_topology sched_init_domains sched_init_smp kernel_init_freeable kernel_init ret_from_kernel_thread Which is safe because it's called early in boot when hotplug is not live yet. And also this trace: lockdep_assert_cpus_held arch_update_cpu_topology partition_sched_domains cpuset_update_active_cpus sched_cpu_deactivate cpuhp_invoke_callback cpuhp_down_callbacks cpuhp_thread_fun smpboot_thread_fn kthread ret_from_kernel_thread Which is safe because it's called as part of CPU hotplug, so although we don't hold the CPU hotplug lock, there is another thread driving the CPU hotplug operation which does hold the lock, and there is no race. Thanks to tglx for deciphering it for us. Fixes: 3e401f7a2e51 ("powerpc: Only obtain cpu_hotplug_lock if called by rtasd") Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b95c584ce19d..a51df9ef529d 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1438,7 +1438,6 @@ out: int arch_update_cpu_topology(void) { - lockdep_assert_cpus_held(); return numa_update_cpu_topology(true); } -- cgit v1.2.3-70-g09d2 From 62dd86ac01f9fb6386d7f8c6b389c3ea4582a50a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Oct 2017 20:20:02 -0500 Subject: x86/unwind: Fix dereference of untrusted pointer Tetsuo Handa and Fengguang Wu reported a panic in the unwinder: BUG: unable to handle kernel NULL pointer dereference at 000001f2 IP: update_stack_state+0xd4/0x340 *pde = 00000000 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 18728 Comm: 01-cpu-hotplug Not tainted 4.13.0-rc4-00170-gb09be67 #592 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20161025_171302-gandalf 04/01/2014 task: bb0b53c0 task.stack: bb3ac000 EIP: update_stack_state+0xd4/0x340 EFLAGS: 00010002 CPU: 0 EAX: 0000a570 EBX: bb3adccb ECX: 0000f401 EDX: 0000a570 ESI: 00000001 EDI: 000001ba EBP: bb3adc6b ESP: bb3adc3f DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 CR0: 80050033 CR2: 000001f2 CR3: 0b3a7000 CR4: 00140690 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: fffe0ff0 DR7: 00000400 Call Trace: ? unwind_next_frame+0xea/0x400 ? __unwind_start+0xf5/0x180 ? __save_stack_trace+0x81/0x160 ? save_stack_trace+0x20/0x30 ? __lock_acquire+0xfa5/0x12f0 ? lock_acquire+0x1c2/0x230 ? tick_periodic+0x3a/0xf0 ? _raw_spin_lock+0x42/0x50 ? tick_periodic+0x3a/0xf0 ? tick_periodic+0x3a/0xf0 ? debug_smp_processor_id+0x12/0x20 ? tick_handle_periodic+0x23/0xc0 ? local_apic_timer_interrupt+0x63/0x70 ? smp_trace_apic_timer_interrupt+0x235/0x6a0 ? trace_apic_timer_interrupt+0x37/0x3c ? strrchr+0x23/0x50 Code: 0f 95 c1 89 c7 89 45 e4 0f b6 c1 89 c6 89 45 dc 8b 04 85 98 cb 74 bc 88 4d e3 89 45 f0 83 c0 01 84 c9 89 04 b5 98 cb 74 bc 74 3b <8b> 47 38 8b 57 34 c6 43 1d 01 25 00 00 02 00 83 e2 03 09 d0 83 EIP: update_stack_state+0xd4/0x340 SS:ESP: 0068:bb3adc3f CR2: 00000000000001f2 ---[ end trace 0d147fd4aba8ff50 ]--- Kernel panic - not syncing: Fatal exception in interrupt On x86-32, after decoding a frame pointer to get a regs address, regs_size() dereferences the regs pointer when it checks regs->cs to see if the regs are user mode. This is dangerous because it's possible that what looks like a decoded frame pointer is actually a corrupt value, and we don't want the unwinder to make things worse. Instead of calling regs_size() on an unsafe pointer, just assume they're kernel regs to start with. Later, once it's safe to access the regs, we can do the user mode check and corresponding safety check for the remaining two regs. Reported-and-tested-by: Tetsuo Handa Reported-and-tested-by: Fengguang Wu Signed-off-by: Josh Poimboeuf Cc: Byungchul Park Cc: LKP Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5ed8d8bb38c5 ("x86/unwind: Move common code into update_stack_state()") Link: http://lkml.kernel.org/r/7f95b9a6993dec7674b3f3ab3dcd3294f7b9644d.1507597785.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index d145a0b1f529..d05637726c10 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -184,6 +184,12 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) return (struct pt_regs *)(regs & ~0x1); } +#ifdef CONFIG_X86_32 +#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) +#else +#define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) +#endif + static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -202,7 +208,7 @@ static bool update_stack_state(struct unwind_state *state, regs = decode_frame_pointer(next_bp); if (regs) { frame = (unsigned long *)regs; - len = regs_size(regs); + len = KERNEL_REGS_SIZE; state->got_irq = true; } else { frame = next_bp; @@ -226,6 +232,14 @@ static bool update_stack_state(struct unwind_state *state, frame < prev_frame_end) return false; + /* + * On 32-bit with user mode regs, make sure the last two regs are safe + * to access: + */ + if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && + !on_stack(info, frame, len + 2*sizeof(long))) + return false; + /* Move state to the next frame: */ if (regs) { state->regs = regs; -- cgit v1.2.3-70-g09d2 From 5c99b692cfd62f6915ed7ee7ed3c38d65ba3feab Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Oct 2017 20:20:03 -0500 Subject: x86/unwind: Use MSB for frame pointer encoding on 32-bit On x86-32, Tetsuo Handa and Fengguang Wu reported unwinder warnings like: WARNING: kernel stack regs at f60bb9c8 in swapper:1 has bad 'bp' value 0ba00000 And also there were some stack dumps with a bunch of unreliable '?' symbols after an apic_timer_interrupt symbol, meaning the unwinder got confused when it tried to read the regs. The cause of those issues is that, with GCC 4.8 (and possibly older), there are cases where GCC misaligns the stack pointer in a leaf function for no apparent reason: c124a388 : c124a388: 55 push %ebp c124a389: 89 e5 mov %esp,%ebp c124a38b: 57 push %edi c124a38c: 56 push %esi c124a38d: 89 d6 mov %edx,%esi c124a38f: 53 push %ebx c124a390: 31 db xor %ebx,%ebx c124a392: 83 ec 03 sub $0x3,%esp ... c124a3e3: 83 c4 03 add $0x3,%esp c124a3e6: 5b pop %ebx c124a3e7: 5e pop %esi c124a3e8: 5f pop %edi c124a3e9: 5d pop %ebp c124a3ea: c3 ret If an interrupt occurs in such a function, the regs on the stack will be unaligned, which breaks the frame pointer encoding assumption. So on 32-bit, use the MSB instead of the LSB to encode the regs. This isn't an issue on 64-bit, because interrupts align the stack before writing to it. Reported-and-tested-by: Tetsuo Handa Reported-and-tested-by: Fengguang Wu Signed-off-by: Josh Poimboeuf Cc: Byungchul Park Cc: LKP Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/279a26996a482ca716605c7dbc7f2db9d8d91e81.1507597785.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/kernel/unwind_frame.c | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8a13d468635a..50e0d2bc4528 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -176,7 +176,7 @@ /* * This is a sneaky trick to help the unwinder find pt_regs on the stack. The * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just setting the LSB, which makes it an invalid stack address and is also + * is just clearing the MSB, which makes it an invalid stack address and is also * a signal to the unwinder that it's a pt_regs pointer in disguise. * * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the @@ -185,7 +185,7 @@ .macro ENCODE_FRAME_POINTER #ifdef CONFIG_FRAME_POINTER mov %esp, %ebp - orl $0x1, %ebp + andl $0x7fffffff, %ebp #endif .endm diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index d05637726c10..4949bbc95f75 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -174,6 +174,7 @@ static bool is_last_task_frame(struct unwind_state *state) * This determines if the frame pointer actually contains an encoded pointer to * pt_regs on the stack. See ENCODE_FRAME_POINTER. */ +#ifdef CONFIG_X86_64 static struct pt_regs *decode_frame_pointer(unsigned long *bp) { unsigned long regs = (unsigned long)bp; @@ -183,6 +184,17 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) return (struct pt_regs *)(regs & ~0x1); } +#else +static struct pt_regs *decode_frame_pointer(unsigned long *bp) +{ + unsigned long regs = (unsigned long)bp; + + if (regs & 0x80000000) + return NULL; + + return (struct pt_regs *)(regs | 0x80000000); +} +#endif #ifdef CONFIG_X86_32 #define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) -- cgit v1.2.3-70-g09d2 From 99bd28a49b150e4b938313a63b5532d95ba77885 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Oct 2017 20:20:04 -0500 Subject: x86/unwind: Align stack pointer in unwinder dump When printing the unwinder dump, the stack pointer could be unaligned, for one of two reasons: - stack corruption; or - GCC created an unaligned stack. There's no way for the unwinder to tell the difference between the two, so we have to assume one or the other. GCC unaligned stacks are very rare, and have only been spotted before GCC 5. Presumably, if we're doing an unwinder stack dump, stack corruption is more likely than a GCC unaligned stack. So always align the stack before starting the dump. Reported-and-tested-by: Tetsuo Handa Reported-and-tested-by: Fengguang Wu Signed-off-by: Josh Poimboeuf Cc: Byungchul Park Cc: LKP Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2f540c515946ab09ed267e1a1d6421202a0cce08.1507597785.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 4949bbc95f75..81aca077fbb6 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state) state->stack_info.type, state->stack_info.next_sp, state->stack_mask, state->graph_idx); - for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; + sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) break; -- cgit v1.2.3-70-g09d2 From d4a2d031dd42f9594107d317e2a9a0c6d73ad46b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 9 Oct 2017 20:20:05 -0500 Subject: x86/unwind: Disable unwinder warnings on 32-bit x86-32 doesn't have stack validation, so in most cases it doesn't make sense to warn about bad frame pointers. Reported-by: Tetsuo Handa Signed-off-by: Josh Poimboeuf Cc: Byungchul Park Cc: Fengguang Wu Cc: LKP Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a69658760800bf281e6353248c23e0fa0acf5230.1507597785.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 81aca077fbb6..3dc26f95d46e 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -355,6 +355,13 @@ bad_address: state->regs->sp < (unsigned long)task_pt_regs(state->task)) goto the_end; + /* + * There are some known frame pointer issues on 32-bit. Disable + * unwinder warnings on 32-bit until it gets objtool support. + */ + if (IS_ENABLED(CONFIG_X86_32)) + goto the_end; + if (state->regs) { printk_deferred_once(KERN_WARNING "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", -- cgit v1.2.3-70-g09d2 From 629eb703d3e46aa570c6c91235d38fd09ed8c58b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 9 Oct 2017 18:26:55 +0100 Subject: perf/x86/intel/uncore: Fix memory leaks on allocation failures Currently if an allocation fails then the error return paths don't free up any currently allocated pmus[].boxes and pmus causing a memory leak. Add an error clean up exit path that frees these objects. Detected by CoverityScan, CID#711632 ("Resource Leak") Signed-off-by: Colin Ian King Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Fixes: 087bfbb03269 ("perf/x86: Add generic Intel uncore PMU support") Link: http://lkml.kernel.org/r/20171009172655.6132-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 1c5390f1cf09..d45e06346f14 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -822,7 +822,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) pmus[i].type = type; pmus[i].boxes = kzalloc(size, GFP_KERNEL); if (!pmus[i].boxes) - return -ENOMEM; + goto err; } type->pmus = pmus; @@ -836,7 +836,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + sizeof(*attr_group), GFP_KERNEL); if (!attr_group) - return -ENOMEM; + goto err; attrs = (struct attribute **)(attr_group + 1); attr_group->name = "events"; @@ -849,7 +849,15 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) } type->pmu_group = &uncore_pmu_attr_group; + return 0; + +err: + for (i = 0; i < type->num_boxes; i++) + kfree(pmus[i].boxes); + kfree(pmus); + + return -ENOMEM; } static int __init -- cgit v1.2.3-70-g09d2 From a3b7424392924e778b608e30ee321f7b10cc94b8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 6 Oct 2017 17:48:54 +0200 Subject: x86/hyperv: Clear vCPU banks between calls to avoid flushing unneeded vCPUs hv_flush_pcpu_ex structures are not cleared between calls for performance reasons (they're variable size up to PAGE_SIZE each) but we must clear hv_vp_set.bank_contents part of it to avoid flushing unneeded vCPUs. The rest of the structure is formed correctly. To do the clearing in an efficient way stash the maximum possible vCPU number (this may differ from Linux CPU id). Reported-by: Jork Loeser Signed-off-by: Vitaly Kuznetsov Cc: Dexuan Cui Cc: Haiyang Zhang Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20171006154854.18092-1-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/hyperv/hv_init.c | 5 +++++ arch/x86/hyperv/mmu.c | 17 ++++++++++++----- arch/x86/include/asm/mshyperv.h | 1 + 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1a8eb550c40f..a5db63f728a2 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); +u32 hv_max_vp_index; + static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; @@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu) hv_vp_index[smp_processor_id()] = msr_vp_index; + if (msr_vp_index > hv_max_vp_index) + hv_max_vp_index = msr_vp_index; + return 0; } diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 39e7f6e50919..9502d04c0c95 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + flush->hv_vp_set.bank_contents[vcpu_bank] = 0; + /* * Some banks may end up being empty but this is acceptable. */ @@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, vcpu = hv_cpu_number_to_vp_number(cpu); vcpu_bank = vcpu / 64; vcpu_offset = vcpu % 64; - - /* valid_bank_mask can represent up to 64 banks */ - if (vcpu_bank >= 64) - return 0; - __set_bit(vcpu_offset, (unsigned long *) &flush->hv_vp_set.bank_contents[vcpu_bank]); if (vcpu_bank >= nr_bank) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 738503e1f80c..530f448fddaf 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, * to this information. */ extern u32 *hv_vp_index; +extern u32 hv_max_vp_index; /** * hv_cpu_number_to_vp_number() - Map CPU to VP. -- cgit v1.2.3-70-g09d2 From 60d73a7c96601434dfdb56d5b9167ff3b850d8d7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 5 Oct 2017 13:39:24 +0200 Subject: x86/hyperv: Don't use percpu areas for pcpu_flush/pcpu_flush_ex structures hv_do_hypercall() does virt_to_phys() translation and with some configs (CONFIG_SLAB) this doesn't work for percpu areas, we pass wrong memory to hypervisor and get #GP. We could use working slow_virt_to_phys() instead but doing so kills the performance. Move pcpu_flush/pcpu_flush_ex structures out of percpu areas and allocate memory on first call. The additional level of indirection gives us a small performance penalty, in future we may consider introducing hypercall functions which avoid virt_to_phys() conversion and cache physical addresses of pcpu_flush/pcpu_flush_ex structures somewhere. Reported-by: Simon Xiao Signed-off-by: Vitaly Kuznetsov Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Jork Loeser Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: devel@linuxdriverproject.org Link: http://lkml.kernel.org/r/20171005113924.28021-1-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/hyperv/mmu.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 9502d04c0c95..f21cebbb5f6c 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex { /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu *pcpu_flush; +static struct hv_flush_pcpu __percpu **pcpu_flush; -static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex; +static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -109,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { int cpu, vcpu, gva_n, max_gvas; + struct hv_flush_pcpu **flush_pcpu; struct hv_flush_pcpu *flush; u64 status = U64_MAX; unsigned long flags; @@ -123,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush = this_cpu_ptr(pcpu_flush); + flush_pcpu = this_cpu_ptr(pcpu_flush); + + if (unlikely(!*flush_pcpu)) + *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto do_native; + } if (info->mm) { flush->address_space = virt_to_phys(info->mm->pgd); @@ -180,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, const struct flush_tlb_info *info) { int nr_bank = 0, max_gvas, gva_n; + struct hv_flush_pcpu_ex **flush_pcpu; struct hv_flush_pcpu_ex *flush; u64 status = U64_MAX; unsigned long flags; @@ -194,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush = this_cpu_ptr(pcpu_flush_ex); + flush_pcpu = this_cpu_ptr(pcpu_flush_ex); + + if (unlikely(!*flush_pcpu)) + *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto do_native; + } if (info->mm) { flush->address_space = virt_to_phys(info->mm->pgd); @@ -273,7 +295,7 @@ void hyper_alloc_mmu(void) return; if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); + pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); else - pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); + pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); } -- cgit v1.2.3-70-g09d2 From ab7ff471aa5db670197070760f022622793da7e5 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Cerri Date: Thu, 5 Oct 2017 10:34:29 -0300 Subject: x86/hyperv: Fix hypercalls with extended CPU ranges for TLB flushing Do not consider the fixed size of hv_vp_set when passing the variable header size to hv_do_rep_hypercall(). The Hyper-V hypervisor specification states that for a hypercall with a variable header only the size of the variable portion should be supplied via the input control. For HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX/LIST_EX calls that means the fixed portion of hv_vp_set should not be considered. That fixes random failures of some applications that are unexpectedly killed with SIGBUS or SIGSEGV. Signed-off-by: Marcelo Henrique Cerri Cc: Dexuan Cui Cc: Haiyang Zhang Cc: Jork Loeser Cc: Josh Poulson Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Simon Xiao Cc: Stephen Hemminger Cc: Thomas Gleixner Cc: Vitaly Kuznetsov Cc: devel@linuxdriverproject.org Fixes: 628f54cc6451 ("x86/hyper-v: Support extended CPU ranges for TLB flush hypercalls") Link: http://lkml.kernel.org/r/1507210469-29065-1-git-send-email-marcelo.cerri@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/hyperv/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index f21cebbb5f6c..9cc9e1c1e2db 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -251,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, - 0, nr_bank + 2, flush, NULL); + 0, nr_bank, flush, NULL); } else if (info->end && ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, - 0, nr_bank + 2, flush, NULL); + 0, nr_bank, flush, NULL); } else { gva_n = fill_gva_list(flush->gva_list, nr_bank, info->start, info->end); status = hv_do_rep_hypercall( HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, - gva_n, nr_bank + 2, flush, NULL); + gva_n, nr_bank, flush, NULL); } local_irq_restore(flags); -- cgit v1.2.3-70-g09d2 From 124751d5e63c823092060074bd0abaae61aaa9c4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 10 Oct 2017 14:10:32 +0200 Subject: ALSA: usb-audio: Kill stray URB at exiting USB-audio driver may leave a stray URB for the mixer interrupt when it exits by some error during probe. This leads to a use-after-free error as spotted by syzkaller like: ================================================================== BUG: KASAN: use-after-free in snd_usb_mixer_interrupt+0x604/0x6f0 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 print_address_description+0x78/0x280 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 kasan_report+0x23d/0x350 mm/kasan/report.c:409 __asan_report_load8_noabort+0x19/0x20 mm/kasan/report.c:430 snd_usb_mixer_interrupt+0x604/0x6f0 sound/usb/mixer.c:2490 __usb_hcd_giveback_urb+0x2e0/0x650 drivers/usb/core/hcd.c:1779 .... Allocated by task 1484: save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x11e/0x2d0 mm/slub.c:2772 kmalloc ./include/linux/slab.h:493 kzalloc ./include/linux/slab.h:666 snd_usb_create_mixer+0x145/0x1010 sound/usb/mixer.c:2540 create_standard_mixer_quirk+0x58/0x80 sound/usb/quirks.c:516 snd_usb_create_quirk+0x92/0x100 sound/usb/quirks.c:560 create_composite_quirk+0x1c4/0x3e0 sound/usb/quirks.c:59 snd_usb_create_quirk+0x92/0x100 sound/usb/quirks.c:560 usb_audio_probe+0x1040/0x2c10 sound/usb/card.c:618 .... Freed by task 1484: save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 kasan_slab_free+0x72/0xc0 mm/kasan/kasan.c:524 slab_free_hook mm/slub.c:1390 slab_free_freelist_hook mm/slub.c:1412 slab_free mm/slub.c:2988 kfree+0xf6/0x2f0 mm/slub.c:3919 snd_usb_mixer_free+0x11a/0x160 sound/usb/mixer.c:2244 snd_usb_mixer_dev_free+0x36/0x50 sound/usb/mixer.c:2250 __snd_device_free+0x1ff/0x380 sound/core/device.c:91 snd_device_free_all+0x8f/0xe0 sound/core/device.c:244 snd_card_do_free sound/core/init.c:461 release_card_device+0x47/0x170 sound/core/init.c:181 device_release+0x13f/0x210 drivers/base/core.c:814 .... Actually such a URB is killed properly at disconnection when the device gets probed successfully, and what we need is to apply it for the error-path, too. In this patch, we apply snd_usb_mixer_disconnect() at releasing. Also introduce a new flag, disconnected, to struct usb_mixer_interface for not performing the disconnection procedure twice. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 12 ++++++++++-- sound/usb/mixer.h | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 9732edf77f86..91bc8f18791e 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2234,6 +2234,9 @@ static int parse_audio_unit(struct mixer_build *state, int unitid) static void snd_usb_mixer_free(struct usb_mixer_interface *mixer) { + /* kill pending URBs */ + snd_usb_mixer_disconnect(mixer); + kfree(mixer->id_elems); if (mixer->urb) { kfree(mixer->urb->transfer_buffer); @@ -2584,8 +2587,13 @@ _error: void snd_usb_mixer_disconnect(struct usb_mixer_interface *mixer) { - usb_kill_urb(mixer->urb); - usb_kill_urb(mixer->rc_urb); + if (mixer->disconnected) + return; + if (mixer->urb) + usb_kill_urb(mixer->urb); + if (mixer->rc_urb) + usb_kill_urb(mixer->rc_urb); + mixer->disconnected = true; } #ifdef CONFIG_PM diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 2b4b067646ab..545d99b09706 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -22,6 +22,8 @@ struct usb_mixer_interface { struct urb *rc_urb; struct usb_ctrlrequest *rc_setup_packet; u8 rc_buffer[6]; + + bool disconnected; }; #define MAX_CHANNELS 16 /* max logical channels */ -- cgit v1.2.3-70-g09d2 From eac779aa509d453a55da0ea4302bdb79c4e0854f Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sun, 8 Oct 2017 19:58:46 -0700 Subject: xen/vcpu: Use a unified name about cpu hotplug state for pv and pvhvm As xen_cpuhp_setup is called by PV and PVHVM, the name of "x86/xen/hvm_guest" is confusing. Signed-off-by: Zhenzhong Duan Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0e7ef69e8531..d669e9d89001 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -93,11 +93,11 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), int rc; rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE, - "x86/xen/hvm_guest:prepare", + "x86/xen/guest:prepare", cpu_up_prepare_cb, cpu_dead_cb); if (rc >= 0) { rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "x86/xen/hvm_guest:online", + "x86/xen/guest:online", xen_cpu_up_online, NULL); if (rc < 0) cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE); -- cgit v1.2.3-70-g09d2 From fd19d3b45164466a4adce7cbff448ba9189e1427 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 5 Oct 2017 11:10:22 +0200 Subject: KVM: nVMX: update last_nonleaf_level when initializing nested EPT The function updates context->root_level but didn't call update_last_nonleaf_level so the previous and potentially wrong value was used for page walks. For example, a zero value of last_nonleaf_level would allow a potential out-of-bounds access in arch/x86/mmu/paging_tmpl.h's walk_addr_generic function (CVE-2017-12188). Fixes: 155a97a3d7c78b46cef6f1a973c831bc5a4f82bb Signed-off-by: Ladi Prosek Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 106d4a029a8a..3c25f20115bc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4555,6 +4555,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); + update_last_nonleaf_level(vcpu, context); reset_rsvds_bits_mask_ept(vcpu, context, execonly); reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } -- cgit v1.2.3-70-g09d2 From 829ee279aed43faa5cb1e4d65c0cad52f2426c53 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 5 Oct 2017 11:10:23 +0200 Subject: KVM: MMU: always terminate page walks at level 1 is_last_gpte() is not equivalent to the pseudo-code given in commit 6bb69c9b69c31 ("KVM: MMU: simplify last_pte_bitmap") because an incorrect value of last_nonleaf_level may override the result even if level == 1. It is critical for is_last_gpte() to return true on level == 1 to terminate page walks. Otherwise memory corruption may occur as level is used as an index to various data structures throughout the page walking code. Even though the actual bug would be wherever the MMU is initialized (as in the previous patch), be defensive and ensure here that is_last_gpte() returns the correct value. This patch is also enough to fix CVE-2017-12188. Fixes: 6bb69c9b69c315200ddc2bc79aee14c0184cf5b2 Cc: stable@vger.kernel.org Cc: Andy Honig Signed-off-by: Ladi Prosek [Panic if walk_addr_generic gets an incorrect level; this is a serious bug and it's not worth a WARN_ON where the recovery path might hide further exploitable issues; suggested by Andrew Honig. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 14 +++++++------- arch/x86/kvm/paging_tmpl.h | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3c25f20115bc..7a69cf053711 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3973,13 +3973,6 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) { - /* - * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set - * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means - * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. - */ - gpte |= level - PT_PAGE_TABLE_LEVEL - 1; - /* * The RHS has bit 7 set iff level < mmu->last_nonleaf_level. * If it is clear, there are no large pages at this level, so clear @@ -3987,6 +3980,13 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, */ gpte &= level - mmu->last_nonleaf_level; + /* + * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set + * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means + * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + */ + gpte |= level - PT_PAGE_TABLE_LEVEL - 1; + return gpte & PT_PAGE_SIZE_MASK; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 86b68dc5a649..f18d1f8d332b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -334,10 +334,11 @@ retry_walk: --walker->level; index = PT_INDEX(addr, walker->level); - table_gfn = gpte_to_gfn(pte); offset = index * sizeof(pt_element_t); pte_gpa = gfn_to_gpa(table_gfn) + offset; + + BUG_ON(walker->level < 1); walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; -- cgit v1.2.3-70-g09d2 From 692b48258dda7c302e777d7d5f4217244478f1f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Oct 2017 08:04:13 -0700 Subject: workqueue: replace pool->manager_arb mutex with a flag Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted [ 1270.473240] ----------------------------------------------------- [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [] __mutex_unlock_slowpath+0xa2/0x280 [ 1270.474994] [ 1270.474994] and this task is already holding: [ 1270.475440] (&pool->lock/1){-.-.}, at: [] worker_thread+0x366/0x3c0 [ 1270.476046] which would create a new lock dependency: [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} [ 1270.476949] [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: [ 1270.477553] (&pool->lock/1){-.-.} ... [ 1270.488900] to a HARDIRQ-irq-unsafe lock: [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... [ 1270.494735] Possible interrupt unsafe locking scenario: [ 1270.494735] [ 1270.495250] CPU0 CPU1 [ 1270.495600] ---- ---- [ 1270.495947] lock(&(&lock->wait_lock)->rlock); [ 1270.496295] local_irq_disable(); [ 1270.496753] lock(&pool->lock/1); [ 1270.497205] lock(&(&lock->wait_lock)->rlock); [ 1270.497744] [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock held. Unlocking mutex while holding an irq spinlock was never safe and this problem has been around forever but it never got noticed because the only time the mutex is usually trylocked while holding irqlock making actual failures very unlikely and lockdep annotation missed the condition until the recent b9c16a0e1f73 ("locking/mutex: Fix lockdep_assert_held() fail"). Using mutex for pool->manager_arb has always been a bit of stretch. It primarily is an mechanism to arbitrate managership between workers which can easily be done with a pool flag. The only reason it became a mutex is that pool destruction path wants to exclude parallel managing operations. This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE and make the destruction path wait for the current manager on a wait queue. v2: Drop unnecessary flag clearing before pool destruction as suggested by Boqun. Signed-off-by: Tejun Heo Reported-by: Josef Bacik Reviewed-by: Lai Jiangshan Cc: Peter Zijlstra Cc: Boqun Feng Cc: stable@vger.kernel.org --- kernel/workqueue.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 64d0edf428f8..a2dccfe1acec 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -68,6 +68,7 @@ enum { * attach_mutex to avoid changing binding state while * worker_attach_to_pool() is in progress. */ + POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ /* worker flags */ @@ -165,7 +166,6 @@ struct worker_pool { /* L: hash of busy workers */ /* see manage_workers() for details on the two manager mutexes */ - struct mutex manager_arb; /* manager arbitration */ struct worker *manager; /* L: purely informational */ struct mutex attach_mutex; /* attach/detach exclusion */ struct list_head workers; /* A: attached workers */ @@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ +static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ @@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool) /* Do we have too many workers and should some go away? */ static bool too_many_workers(struct worker_pool *pool) { - bool managing = mutex_is_locked(&pool->manager_arb); + bool managing = pool->flags & POOL_MANAGER_ACTIVE; int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; @@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker) { struct worker_pool *pool = worker->pool; - /* - * Anyone who successfully grabs manager_arb wins the arbitration - * and becomes the manager. mutex_trylock() on pool->manager_arb - * failure while holding pool->lock reliably indicates that someone - * else is managing the pool and the worker which failed trylock - * can proceed to executing work items. This means that anyone - * grabbing manager_arb is responsible for actually performing - * manager duties. If manager_arb is grabbed and released without - * actual management, the pool may stall indefinitely. - */ - if (!mutex_trylock(&pool->manager_arb)) + if (pool->flags & POOL_MANAGER_ACTIVE) return false; + + pool->flags |= POOL_MANAGER_ACTIVE; pool->manager = worker; maybe_create_worker(pool); pool->manager = NULL; - mutex_unlock(&pool->manager_arb); + pool->flags &= ~POOL_MANAGER_ACTIVE; + wake_up(&wq_manager_wait); return true; } @@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool) setup_timer(&pool->mayday_timer, pool_mayday_timeout, (unsigned long)pool); - mutex_init(&pool->manager_arb); mutex_init(&pool->attach_mutex); INIT_LIST_HEAD(&pool->workers); @@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool) hash_del(&pool->hash_node); /* - * Become the manager and destroy all workers. Grabbing - * manager_arb prevents @pool's workers from blocking on - * attach_mutex. + * Become the manager and destroy all workers. This prevents + * @pool's workers from blocking on attach_mutex. We're the last + * manager and @pool gets freed with the flag set. */ - mutex_lock(&pool->manager_arb); - spin_lock_irq(&pool->lock); + wait_event_lock_irq(wq_manager_wait, + !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); + pool->flags |= POOL_MANAGER_ACTIVE; + while ((worker = first_idle_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); @@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool) if (pool->detach_completion) wait_for_completion(pool->detach_completion); - mutex_unlock(&pool->manager_arb); - /* shut down the timers */ del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); -- cgit v1.2.3-70-g09d2 From e836e3211229d7307660239cc957f2ab60e6aa00 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 6 Sep 2017 10:11:38 +0200 Subject: i40e: Fix comment about locking for __i40e_read_nvm_word() Caller needs to acquire the lock. Called functions will not. Fixes: 09f79fd49d94 ("i40e: avoid NVM acquire deadlock during NVM update") Signed-off-by: Stefano Brivio Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_nvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 57505b1df98d..d591b3e6bd7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -298,7 +298,7 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset, } /** - * __i40e_read_nvm_word - Reads nvm word, assumes called does the locking + * __i40e_read_nvm_word - Reads nvm word, assumes caller does the locking * @hw: pointer to the HW structure * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) * @data: word read from the Shadow RAM -- cgit v1.2.3-70-g09d2 From 2b9478ffc550f17c6cd8c69057234e91150f5972 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Oct 2017 08:44:43 -0700 Subject: i40e: Fix memory leak related filter programming status It looks like we weren't correctly placing the pages from buffers that had been used to return a filter programming status back on the ring. As a result they were being overwritten and tracking of the pages was lost. This change works to correct that by incorporating part of i40e_put_rx_buffer into the programming status handler code. As a result we should now be correctly placing the pages for those buffers on the re-allocation list instead of letting them stay in place. Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status") Reported-by: Anders K. Pedersen Signed-off-by: Alexander Duyck Tested-by: Anders K Pedersen Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1519dfb851d0..2756131495f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1037,6 +1037,32 @@ reset_latency: return false; } +/** + * i40e_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_buff) +{ + struct i40e_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_bi[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; +} + /** * i40e_rx_is_programming_status - check for programming status descriptor * @qw: qword representing status_error_len in CPU ordering @@ -1071,15 +1097,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, u64 qw) { - u32 ntc = rx_ring->next_to_clean + 1; + struct i40e_rx_buffer *rx_buffer; + u32 ntc = rx_ring->next_to_clean; u8 id; /* fetch, update, and store next to clean */ + rx_buffer = &rx_ring->rx_bi[ntc++]; ntc = (ntc < rx_ring->count) ? ntc : 0; rx_ring->next_to_clean = ntc; prefetch(I40E_RX_DESC(rx_ring, ntc)); + /* place unused page back on the ring */ + i40e_reuse_rx_page(rx_ring, rx_buffer); + rx_ring->rx_stats.page_reuse_count++; + + /* clear contents of buffer_info */ + rx_buffer->page = NULL; + id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; @@ -1638,32 +1673,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, return false; } -/** - * i40e_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *old_buff) -{ - struct i40e_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_bi[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->dma = old_buff->dma; - new_buff->page = old_buff->page; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - /** * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check -- cgit v1.2.3-70-g09d2 From ac3d79392f8c2728f7600dd32ed88b3a1bfdc1af Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Oct 2017 14:40:42 +0200 Subject: quota: Generate warnings for DQUOT_SPACE_NOFAIL allocations Eryu has reported that since commit 7b9ca4c61bc2 "quota: Reduce contention on dq_data_lock" test generic/233 occasionally fails. This is caused by the fact that since that commit we don't generate warning and set grace time for quota allocations that have DQUOT_SPACE_NOFAIL set (these are for example some metadata allocations in ext4). We need these allocations to behave regularly wrt warning generation and grace time setting so fix the code to return to the original behavior. Reported-and-tested-by: Eryu Guan CC: stable@vger.kernel.org Fixes: 7b9ca4c61bc278b771fb57d6290a31ab1fc7fdac Signed-off-by: Jan Kara --- fs/quota/dquot.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 50b0556a124f..52ad15192e72 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1297,21 +1297,18 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, spin_lock(&dquot->dq_dqb_lock); if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || test_bit(DQ_FAKE_B, &dquot->dq_flags)) - goto add; + goto finish; tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace + space + rsv_space; - if (flags & DQUOT_SPACE_NOFAIL) - goto add; - if (dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1322,7 +1319,7 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, if (flags & DQUOT_SPACE_WARN) prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); ret = -EDQUOT; - goto out; + goto finish; } if (dquot->dq_dqb.dqb_bsoftlimit && @@ -1338,13 +1335,21 @@ static int dquot_add_space(struct dquot *dquot, qsize_t space, * be always printed */ ret = -EDQUOT; - goto out; + goto finish; } } -add: - dquot->dq_dqb.dqb_rsvspace += rsv_space; - dquot->dq_dqb.dqb_curspace += space; -out: +finish: + /* + * We have to be careful and go through warning generation & grace time + * setting even if DQUOT_SPACE_NOFAIL is set. That's why we check it + * only here... + */ + if (flags & DQUOT_SPACE_NOFAIL) + ret = 0; + if (!ret) { + dquot->dq_dqb.dqb_rsvspace += rsv_space; + dquot->dq_dqb.dqb_curspace += space; + } spin_unlock(&dquot->dq_dqb_lock); return ret; } -- cgit v1.2.3-70-g09d2 From b61907bb42409adf9b3120f741af7c57dd7e3db2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 9 Oct 2017 23:30:02 +0800 Subject: crypto: shash - Fix zero-length shash ahash digest crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shash ahash digest adaptor function may crash if given a zero-length input together with a null SG list. This is because it tries to read the SG list before looking at the length. This patch fixes it by checking the length first. Cc: Reported-by: Stephan Müller Signed-off-by: Herbert Xu Tested-by: Stephan Müller --- crypto/shash.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crypto/shash.c b/crypto/shash.c index 8fcecc66741d..325a14da5827 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -275,12 +275,14 @@ static int shash_async_finup(struct ahash_request *req) int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) { - struct scatterlist *sg = req->src; - unsigned int offset = sg->offset; unsigned int nbytes = req->nbytes; + struct scatterlist *sg; + unsigned int offset; int err; - if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) { + if (nbytes && + (sg = req->src, offset = sg->offset, + nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; data = kmap_atomic(sg_page(sg)); -- cgit v1.2.3-70-g09d2 From aba2d9a6385a5cc4f7a7e8eb5788e1ddbc213fc0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 6 Oct 2017 16:35:40 -0500 Subject: iommu/amd: Do not disable SWIOTLB if SME is active When SME memory encryption is active it will rely on SWIOTLB to handle DMA for devices that cannot support the addressing requirements of having the encryption mask set in the physical address. The IOMMU currently disables SWIOTLB if it is not running in passthrough mode. This is not desired as non-PCI devices attempting DMA may fail. Update the code to check if SME is active and not disable SWIOTLB. Fixes: 2543a786aa25 ("iommu/amd: Allow the AMD IOMMU to work with memory encryption") Signed-off-by: Tom Lendacky Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 51f8215877f5..822679ac90a1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2773,14 +2773,16 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { - swiotlb = iommu_pass_through ? 1 : 0; + swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; iommu_detected = 1; /* * In case we don't initialize SWIOTLB (actually the common case - * when AMD IOMMU is enabled), make sure there are global - * dma_ops set as a fall-back for devices not handled by this - * driver (for example non-PCI devices). + * when AMD IOMMU is enabled and SME is not active), make sure there + * are global dma_ops set as a fall-back for devices not handled by + * this driver (for example non-PCI devices). When SME is active, + * make sure that swiotlb variable remains set so the global dma_ops + * continue to be SWIOTLB. */ if (!swiotlb) dma_ops = &nommu_dma_ops; -- cgit v1.2.3-70-g09d2 From 0a7480bd327afcccd7263be5b485f85943e1e903 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Sep 2017 13:33:45 +0300 Subject: rpmsg: glink: Unlock on error in qcom_glink_request_intent() If qcom_glink_tx() fails, then we need to unlock before returning the error code. Fixes: 27b9c5b66b23 ("rpmsg: glink: Request for intents when unavailable") Acked-by: Sricharan R Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Andersson --- drivers/rpmsg/qcom_glink_native.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index 5a5e927ea50f..fecb1dafa8f3 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -1197,7 +1197,7 @@ static int qcom_glink_request_intent(struct qcom_glink *glink, ret = qcom_glink_tx(glink, &cmd, sizeof(cmd), NULL, 0, true); if (ret) - return ret; + goto unlock; ret = wait_for_completion_timeout(&channel->intent_req_comp, 10 * HZ); if (!ret) { @@ -1207,6 +1207,7 @@ static int qcom_glink_request_intent(struct qcom_glink *glink, ret = channel->intent_req_result ? 0 : -ECANCELED; } +unlock: mutex_unlock(&channel->intent_req_lock); return ret; } -- cgit v1.2.3-70-g09d2 From b775d158530285c9657a1a0628c139b0dfd0d2e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Sep 2017 13:34:42 +0300 Subject: rpmsg: glink: Fix memory leak in qcom_glink_alloc_intent() We need to free "intent" and "intent->data" on a couple error paths. Fixes: 933b45da5d1d ("rpmsg: glink: Add support for TX intents") Acked-by: Sricharan R Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Andersson --- drivers/rpmsg/qcom_glink_native.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index fecb1dafa8f3..5dcc9bf1c5bc 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -635,19 +635,18 @@ qcom_glink_alloc_intent(struct qcom_glink *glink, unsigned long flags; intent = kzalloc(sizeof(*intent), GFP_KERNEL); - if (!intent) return NULL; intent->data = kzalloc(size, GFP_KERNEL); if (!intent->data) - return NULL; + goto free_intent; spin_lock_irqsave(&channel->intent_lock, flags); ret = idr_alloc_cyclic(&channel->liids, intent, 1, -1, GFP_ATOMIC); if (ret < 0) { spin_unlock_irqrestore(&channel->intent_lock, flags); - return NULL; + goto free_data; } spin_unlock_irqrestore(&channel->intent_lock, flags); @@ -656,6 +655,12 @@ qcom_glink_alloc_intent(struct qcom_glink *glink, intent->reuse = reuseable; return intent; + +free_data: + kfree(intent->data); +free_intent: + kfree(intent); + return NULL; } static void qcom_glink_handle_rx_done(struct qcom_glink *glink, -- cgit v1.2.3-70-g09d2 From 68c2d645ebbd4a636cf93ed56f15912bcf9376bc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2017 15:58:27 +0300 Subject: remoteproc: imx_rproc: fix a couple off by one bugs The priv->mem[] array has IMX7D_RPROC_MEM_MAX elements so the > should be >= to avoid writing one element beyond the end of the array. Fixes: a0ff4aa6f010 ("remoteproc: imx_rproc: add a NXP/Freescale imx_rproc driver") Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Andersson --- drivers/remoteproc/imx_rproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 612d91403341..81ba44510b75 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -264,7 +264,7 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, if (!(att->flags & ATT_OWN)) continue; - if (b > IMX7D_RPROC_MEM_MAX) + if (b >= IMX7D_RPROC_MEM_MAX) break; priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev, @@ -296,7 +296,7 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, return err; } - if (b > IMX7D_RPROC_MEM_MAX) + if (b >= IMX7D_RPROC_MEM_MAX) break; priv->mem[b].cpu_addr = devm_ioremap_resource(&pdev->dev, &res); -- cgit v1.2.3-70-g09d2 From ab759b9732fd8a4ae0252bb2087e90d776f74b9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Sep 2017 21:54:17 +0200 Subject: remoteproc: qcom: fix RPMSG_QCOM_GLINK_SMEM dependencies When RPMSG_QCOM_GLINK_SMEM=m and one driver causes the qcom_common.c file to be compiled as built-in, we get a link error: drivers/remoteproc/qcom_common.o: In function `glink_subdev_remove': qcom_common.c:(.text+0x130): undefined reference to `qcom_glink_smem_unregister' qcom_common.c:(.text+0x130): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_glink_smem_unregister' drivers/remoteproc/qcom_common.o: In function `glink_subdev_probe': qcom_common.c:(.text+0x160): undefined reference to `qcom_glink_smem_register' qcom_common.c:(.text+0x160): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_glink_smem_register' Out of the three PIL driver instances, QCOM_ADSP_PIL already has a Kconfig dependency to prevent this from happening, but the other two do not. This adds the same dependency there. Fixes: eea07023e6d9 ("remoteproc: qcom: adsp: Allow defining GLINK edge") Signed-off-by: Arnd Bergmann Signed-off-by: Bjorn Andersson --- drivers/remoteproc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index df63e44526ac..bf04479456a0 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -109,6 +109,7 @@ config QCOM_Q6V5_PIL depends on OF && ARCH_QCOM depends on QCOM_SMEM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) + depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n select MFD_SYSCON select QCOM_RPROC_COMMON select QCOM_SCM @@ -120,6 +121,7 @@ config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n) + depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SMEM select QCOM_MDT_LOADER select QCOM_RPROC_COMMON -- cgit v1.2.3-70-g09d2 From 084f5601c357e4ee59cf0712200d3f5c4710ba40 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 14:26:48 +0100 Subject: seccomp: make function __get_seccomp_filter static The function __get_seccomp_filter is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol '__get_seccomp_filter' was not declared. Should it be static? Signed-off-by: Colin Ian King Fixes: 66a733ea6b61 ("seccomp: fix the usage of get/put_seccomp_filter() in seccomp_get_filter()") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- kernel/seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index bb3a38005b9c..0ae832e13b97 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -473,7 +473,7 @@ static long seccomp_attach_filter(unsigned int flags, return 0; } -void __get_seccomp_filter(struct seccomp_filter *filter) +static void __get_seccomp_filter(struct seccomp_filter *filter) { /* Reference count is bounded by the number of total processes. */ refcount_inc(&filter->usage); -- cgit v1.2.3-70-g09d2 From 365ff9df562889501964ab5ee9fb4ce700d1a8c0 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Mon, 9 Oct 2017 12:41:53 -0700 Subject: wimax/i2400m: Remove VLAIS Convert Variable Length Array in Struct (VLAIS) to valid C by converting local struct definition to use a flexible array. The structure is only used to define a cast of a buffer so the size of the struct is not used to allocate storage. Signed-off-by: Behan Webster Signed-off-by: Mark Charebois Suggested-by: Arnd Bergmann Signed-off-by: Matthias Kaehlcke Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c index c9c711dcd0e6..a89b5685e68b 100644 --- a/drivers/net/wimax/i2400m/fw.c +++ b/drivers/net/wimax/i2400m/fw.c @@ -652,7 +652,7 @@ static int i2400m_download_chunk(struct i2400m *i2400m, const void *chunk, struct device *dev = i2400m_dev(i2400m); struct { struct i2400m_bootrom_header cmd; - u8 cmd_payload[chunk_len]; + u8 cmd_payload[]; } __packed *buf; struct i2400m_bootrom_header ack; -- cgit v1.2.3-70-g09d2 From c3d64ad4fea66d07e878b248b803ccd12c45e18c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2017 09:16:22 -0700 Subject: nfp: fix ethtool stats gather retry The while loop fetching 64 bit ethtool statistics may have to retry multiple times, it shouldn't modify the outside state. Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 07969f06df10..dc016dfec64d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -464,7 +464,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) do { start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); - *data++ = nn->r_vecs[i].rx_pkts; + data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; tmp[2] = nn->r_vecs[i].hw_csum_rx_error; @@ -472,14 +472,16 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) do { start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); - *data++ = nn->r_vecs[i].tx_pkts; - *data++ = nn->r_vecs[i].tx_busy; + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; tmp[3] = nn->r_vecs[i].hw_csum_tx; tmp[4] = nn->r_vecs[i].hw_csum_tx_inner; tmp[5] = nn->r_vecs[i].tx_gather; tmp[6] = nn->r_vecs[i].tx_lso; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + data += 3; + for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) gathered_stats[j] += tmp[j]; } -- cgit v1.2.3-70-g09d2 From 5f0ca2fb71e28df146f590eebfe32b41171b737f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Oct 2017 09:16:23 -0700 Subject: nfp: handle page allocation failures page_address() does not handle NULL argument gracefully, make sure we NULL-check the page pointer before passing it to page_address(). Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1c0187f0af51..e118b5f23996 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1180,10 +1180,14 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) { void *frag; - if (!dp->xdp_prog) + if (!dp->xdp_prog) { frag = netdev_alloc_frag(dp->fl_bufsz); - else - frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD)); + } else { + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_COLD); + frag = page ? page_address(page) : NULL; + } if (!frag) { nn_dp_warn(dp, "Failed to alloc receive page frag\n"); return NULL; @@ -1203,10 +1207,14 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) { void *frag; - if (!dp->xdp_prog) + if (!dp->xdp_prog) { frag = napi_alloc_frag(dp->fl_bufsz); - else - frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD)); + } else { + struct page *page; + + page = alloc_page(GFP_ATOMIC | __GFP_COLD); + frag = page ? page_address(page) : NULL; + } if (!frag) { nn_dp_warn(dp, "Failed to alloc receive page frag\n"); return NULL; -- cgit v1.2.3-70-g09d2 From 8c2b4e3c3725801b57d7b858d216d38f83bdb35d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 9 Oct 2017 12:29:35 +0200 Subject: Revert "PCI: tegra: Do not allocate MSI target memory" This reverts commit d7bd554f27c942e6b8b54100b4044f9be1038edf. It turns out that Tegra20 has a bug in the implementation of the MSI target address register (which is worked around by the existence of the struct tegra_pcie_soc.msi_base_shift parameter) that restricts the MSI target memory to the lower 32 bits of physical memory on that particular generation. The offending patch causes a regression on TrimSlice, which is a Tegra20-based device and has a PCI network interface card. An initial, simpler fix was to change the MSI target address for Tegra20 only, but it was pointed out that the offending commit also prevents the use of 32-bit only MSI capable devices, even on later chips. Technically this was never guaranteed to work with the prior code in the first place because the allocated page could have resided beyond the 4 GiB boundary, but it is still possible that this could've introduced a regression. The proper fix that was settled on is to select a fixed address within the lowest 32 bits of physical address space that is otherwise unused, but testing of that patch has provided mixed results that are not fully understood yet. Given all of the above and the relative urgency to get this fixed in v4.13, revert the offending commit until a universal fix is found. Fixes: d7bd554f27c9 ("PCI: tegra: Do not allocate MSI target memory") Reported-by: Tomasz Maciej Nowak Reported-by: Erik Faye-Lund Signed-off-by: Thierry Reding Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.13.x --- drivers/pci/host/pci-tegra.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 9c40da54f88a..1987fec1f126 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -233,6 +233,7 @@ struct tegra_msi { struct msi_controller chip; DECLARE_BITMAP(used, INT_PCI_MSI_NR); struct irq_domain *domain; + unsigned long pages; struct mutex lock; u64 phys; int irq; @@ -1529,22 +1530,9 @@ static int tegra_pcie_enable_msi(struct tegra_pcie *pcie) goto err; } - /* - * The PCI host bridge on Tegra contains some logic that intercepts - * MSI writes, which means that the MSI target address doesn't have - * to point to actual physical memory. Rather than allocating one 4 - * KiB page of system memory that's never used, we can simply pick - * an arbitrary address within an area reserved for system memory - * in the FPCI address map. - * - * However, in order to avoid confusion, we pick an address that - * doesn't map to physical memory. The FPCI address map reserves a - * 1012 GiB region for system memory and memory-mapped I/O. Since - * none of the Tegra SoCs that contain this PCI host bridge can - * address more than 16 GiB of system memory, the last 4 KiB of - * these 1012 GiB is a good candidate. - */ - msi->phys = 0xfcfffff000; + /* setup AFI/FPCI range */ + msi->pages = __get_free_pages(GFP_KERNEL, 0); + msi->phys = virt_to_phys((void *)msi->pages); afi_writel(pcie, msi->phys >> soc->msi_base_shift, AFI_MSI_FPCI_BAR_ST); afi_writel(pcie, msi->phys, AFI_MSI_AXI_BAR_ST); @@ -1596,6 +1584,8 @@ static int tegra_pcie_disable_msi(struct tegra_pcie *pcie) afi_writel(pcie, 0, AFI_MSI_EN_VEC6); afi_writel(pcie, 0, AFI_MSI_EN_VEC7); + free_pages(msi->pages, 0); + if (msi->irq > 0) free_irq(msi->irq, pcie); -- cgit v1.2.3-70-g09d2 From 407dae1e4415acde2d9f48bb76361893c4653756 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 9 Oct 2017 09:00:49 +0200 Subject: PCI: aardvark: Move to struct pci_host_bridge IRQ mapping functions struct pci_host_bridge gained hooks to map/swizzle IRQs, so that the IRQ mapping can be done automatically by PCI core code through the pci_assign_irq() function instead of resorting to arch-specific implementation callbacks to carry out the same task which force PCI host bridge drivers implementation to implement per-arch kludges to carry out a task that is inherently architecture agnostic. Commit 769b461fc0c0 ("arm64: PCI: Drop DT IRQ allocation from pcibios_alloc_irq()") was assuming all PCI host controller drivers had been converted to use ->map_irq(), but that wasn't the case: pci-aardvark had not been converted. Due to this, it broke the support for legacy PCI interrupts when using the pci-aardvark driver (used on Marvell Armada 3720 platforms). In order to fix this, we make sure the ->map_irq and ->swizzle_irq fields of pci_host_bridge are properly filled in. Fixes: 769b461fc0c0 ("arm64: PCI: Drop DT IRQ allocation from pcibios_alloc_irq()") Signed-off-by: Thomas Petazzoni Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.13+ --- drivers/pci/host/pci-aardvark.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/host/pci-aardvark.c b/drivers/pci/host/pci-aardvark.c index 89f4e3d072d7..26ed0c08f209 100644 --- a/drivers/pci/host/pci-aardvark.c +++ b/drivers/pci/host/pci-aardvark.c @@ -935,6 +935,8 @@ static int advk_pcie_probe(struct platform_device *pdev) bridge->sysdata = pcie; bridge->busnr = 0; bridge->ops = &advk_pcie_ops; + bridge->map_irq = of_irq_parse_and_map_pci; + bridge->swizzle_irq = pci_common_swizzle; ret = pci_scan_root_bus_bridge(bridge); if (ret < 0) { -- cgit v1.2.3-70-g09d2 From 899f0429c7d3eed886406cd72182bee3b96aa1f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 Oct 2017 11:13:18 +0200 Subject: direct-io: Prevent NULL pointer access in submit_page_section In the code added to function submit_page_section by commit b1058b981, sdio->bio can currently be NULL when calling dio_bio_submit. This then leads to a NULL pointer access in dio_bio_submit, so check for a NULL bio in submit_page_section before trying to submit it instead. Fixes xfstest generic/250 on gfs2. Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Andreas Gruenbacher Reviewed-by: Jan Kara Signed-off-by: Al Viro --- fs/direct-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812ed0e5..96415c65bbdc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -866,7 +866,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); put_page(sdio->cur_page); sdio->cur_page = NULL; } -- cgit v1.2.3-70-g09d2 From 75cb070960ade40fba5de32138390f3c85c90941 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Oct 2017 19:12:32 -0700 Subject: Revert "net: defer call to cgroup_sk_alloc()" This reverts commit fbb1fb4ad415cb31ce944f65a5ca700aaf73a227. This was not the proper fix, lets cleanly revert it, so that following patch can be carried to stable versions. sock_cgroup_ptr() callers do not expect a NULL return value. Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- kernel/cgroup/cgroup.c | 11 +++++++++++ net/core/sock.c | 3 ++- net/ipv4/inet_connection_sock.c | 5 ----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3380a3e49af5..44857278eb8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5709,6 +5709,17 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 4499e3153813..70c6ccbdf49f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1680,7 +1680,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - memset(&newsk->sk_cgrp_data, 0, sizeof(newsk->sk_cgrp_data)); atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; @@ -1719,6 +1718,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + cgroup_sk_alloc(&newsk->sk_cgrp_data); + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d32c74507314..67aec7a10686 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -26,8 +26,6 @@ #include #include #include -#include -#include #ifdef INET_CSK_DEBUG const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; @@ -478,9 +476,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) spin_unlock_bh(&queue->fastopenq.lock); } mem_cgroup_sk_alloc(newsk); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - sock_update_classid(&newsk->sk_cgrp_data); - sock_update_netprioidx(&newsk->sk_cgrp_data); out: release_sock(sk); if (req) -- cgit v1.2.3-70-g09d2 From c0576e3975084d4699b7bfef578613fb8e1144f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Oct 2017 19:12:33 -0700 Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock() If for some reason, the newly allocated child need to be freed, we will call cgroup_put() (via sk_free_unlock_clone()) while the corresponding cgroup_get() was not yet done, and we will free memory too soon. Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") Signed-off-by: Eric Dumazet Cc: Johannes Weiner Cc: Tejun Heo Signed-off-by: David S. Miller --- net/core/sock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 70c6ccbdf49f..415f441c63b9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1687,6 +1687,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -1718,8 +1719,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) -- cgit v1.2.3-70-g09d2 From 95d78c28b5a85bacbc29b8dba7c04babb9b0d467 Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Fri, 22 Sep 2017 01:18:39 -0400 Subject: fix unbalanced page refcounting in bio_map_user_iov bio_map_user_iov and bio_unmap_user do unbalanced pages refcounting if IO vector has small consecutive buffers belonging to the same page. bio_add_pc_page merges them into one, but the page reference is never dropped. Cc: stable@vger.kernel.org Signed-off-by: Vitaly Mayatskikh Signed-off-by: Al Viro --- block/bio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/bio.c b/block/bio.c index b38e962fa83e..0d6439e89acb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1383,6 +1383,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, offset = offset_in_page(uaddr); for (j = cur_page; j < page_limit; j++) { unsigned int bytes = PAGE_SIZE - offset; + unsigned short prev_bi_vcnt = bio->bi_vcnt; if (len <= 0) break; @@ -1397,6 +1398,13 @@ struct bio *bio_map_user_iov(struct request_queue *q, bytes) break; + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(pages[j]); + len -= bytes; offset = 0; } -- cgit v1.2.3-70-g09d2 From 2b04e8f6bbb196cab4b232af0f8d48ff2c7a8058 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 15:51:23 -0400 Subject: more bio_map_user_iov() leak fixes we need to take care of failure exit as well - pages already in bio should be dropped by analogue of bio_unmap_pages(), since their refcounts had been bumped only once per reference in bio. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- block/bio.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/block/bio.c b/block/bio.c index 0d6439e89acb..9e9606d26cc6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1331,6 +1331,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, int ret, offset; struct iov_iter i; struct iovec iov; + struct bio_vec *bvec; iov_for_each(iov, i, *iter) { unsigned long uaddr = (unsigned long) iov.iov_base; @@ -1375,7 +1376,12 @@ struct bio *bio_map_user_iov(struct request_queue *q, ret = get_user_pages_fast(uaddr, local_nr_pages, (iter->type & WRITE) != WRITE, &pages[cur_page]); - if (ret < local_nr_pages) { + if (unlikely(ret < local_nr_pages)) { + for (j = cur_page; j < page_limit; j++) { + if (!pages[j]) + break; + put_page(pages[j]); + } ret = -EFAULT; goto out_unmap; } @@ -1431,10 +1437,8 @@ struct bio *bio_map_user_iov(struct request_queue *q, return bio; out_unmap: - for (j = 0; j < nr_pages; j++) { - if (!pages[j]) - break; - put_page(pages[j]); + bio_for_each_segment_all(bvec, bio, j) { + put_page(bvec->bv_page); } out: kfree(pages); -- cgit v1.2.3-70-g09d2 From 1cfd0ddd82232804e03f3023f6a58b50dfef0574 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 10:21:15 -0400 Subject: bio_copy_user_iov(): don't ignore ->iov_offset Since "block: support large requests in blk_rq_map_user_iov" we started to call it with partially drained iter; that works fine on the write side, but reads create a copy of iter for completion time. And that needs to take the possibility of ->iov_iter != 0 into account... Cc: stable@vger.kernel.org #v4.5+ Signed-off-by: Al Viro --- block/bio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index 9e9606d26cc6..101c2a9b5481 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1239,8 +1239,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, */ bmd->is_our_pages = map_data ? 0 : 1; memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - iov_iter_init(&bmd->iter, iter->type, bmd->iov, - iter->nr_segs, iter->count); + bmd->iter = *iter; + bmd->iter.iov = bmd->iov; ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); -- cgit v1.2.3-70-g09d2 From 71105998845fb012937332fe2e806d443c09e026 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Oct 2017 11:09:20 +0200 Subject: ALSA: seq: Fix use-after-free at creating a port There is a potential race window opened at creating and deleting a port via ioctl, as spotted by fuzzing. snd_seq_create_port() creates a port object and returns its pointer, but it doesn't take the refcount, thus it can be deleted immediately by another thread. Meanwhile, snd_seq_ioctl_create_port() still calls the function snd_seq_system_client_ev_port_start() with the created port object that is being deleted, and this triggers use-after-free like: BUG: KASAN: use-after-free in snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] at addr ffff8801f2241cb1 ============================================================================= BUG kmalloc-512 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in snd_seq_create_port+0x94/0x9b0 [snd_seq] age=1 cpu=3 pid=4511 ___slab_alloc+0x425/0x460 __slab_alloc+0x20/0x40 kmem_cache_alloc_trace+0x150/0x190 snd_seq_create_port+0x94/0x9b0 [snd_seq] snd_seq_ioctl_create_port+0xd1/0x630 [snd_seq] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] snd_seq_ioctl+0x40/0x80 [snd_seq] do_vfs_ioctl+0x54b/0xda0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x16/0x75 INFO: Freed in port_delete+0x136/0x1a0 [snd_seq] age=1 cpu=2 pid=4717 __slab_free+0x204/0x310 kfree+0x15f/0x180 port_delete+0x136/0x1a0 [snd_seq] snd_seq_delete_port+0x235/0x350 [snd_seq] snd_seq_ioctl_delete_port+0xc8/0x180 [snd_seq] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] snd_seq_ioctl+0x40/0x80 [snd_seq] do_vfs_ioctl+0x54b/0xda0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x16/0x75 Call Trace: [] dump_stack+0x63/0x82 [] print_trailer+0xfb/0x160 [] object_err+0x34/0x40 [] kasan_report.part.2+0x223/0x520 [] ? snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] [] __asan_report_load1_noabort+0x2e/0x30 [] snd_seq_ioctl_create_port+0x504/0x630 [snd_seq] [] ? snd_seq_ioctl_delete_port+0x180/0x180 [snd_seq] [] ? taskstats_exit+0xbc0/0xbc0 [] snd_seq_do_ioctl+0x11c/0x190 [snd_seq] [] snd_seq_ioctl+0x40/0x80 [snd_seq] [] ? acct_account_cputime+0x63/0x80 [] do_vfs_ioctl+0x54b/0xda0 ..... We may fix this in a few different ways, and in this patch, it's fixed simply by taking the refcount properly at snd_seq_create_port() and letting the caller unref the object after use. Also, there is another potential use-after-free by sprintf() call in snd_seq_create_port(), and this is moved inside the lock. This fix covers CVE-2017-15265. Reported-and-tested-by: Michael23 Yu Suggested-by: Linus Torvalds Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 6 +++++- sound/core/seq/seq_ports.c | 7 +++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index ea2d0ae85bd3..6c9cba2166d9 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1259,6 +1259,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) struct snd_seq_port_info *info = arg; struct snd_seq_client_port *port; struct snd_seq_port_callback *callback; + int port_idx; /* it is not allowed to create the port for an another client */ if (info->addr.client != client->number) @@ -1269,7 +1270,9 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) return -ENOMEM; if (client->type == USER_CLIENT && info->kernel) { - snd_seq_delete_port(client, port->addr.port); + port_idx = port->addr.port; + snd_seq_port_unlock(port); + snd_seq_delete_port(client, port_idx); return -EINVAL; } if (client->type == KERNEL_CLIENT) { @@ -1290,6 +1293,7 @@ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) snd_seq_set_port_info(port, info); snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port); + snd_seq_port_unlock(port); return 0; } diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c index 0a7020c82bfc..d21ece9f8d73 100644 --- a/sound/core/seq/seq_ports.c +++ b/sound/core/seq/seq_ports.c @@ -122,7 +122,9 @@ static void port_subs_info_init(struct snd_seq_port_subs_info *grp) } -/* create a port, port number is returned (-1 on failure) */ +/* create a port, port number is returned (-1 on failure); + * the caller needs to unref the port via snd_seq_port_unlock() appropriately + */ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, int port) { @@ -151,6 +153,7 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, snd_use_lock_init(&new_port->use_lock); port_subs_info_init(&new_port->c_src); port_subs_info_init(&new_port->c_dest); + snd_use_lock_use(&new_port->use_lock); num = port >= 0 ? port : 0; mutex_lock(&client->ports_mutex); @@ -165,9 +168,9 @@ struct snd_seq_client_port *snd_seq_create_port(struct snd_seq_client *client, list_add_tail(&new_port->list, &p->list); client->num_ports++; new_port->addr.port = num; /* store the port number in the port */ + sprintf(new_port->name, "port-%d", num); write_unlock_irqrestore(&client->ports_lock, flags); mutex_unlock(&client->ports_mutex); - sprintf(new_port->name, "port-%d", num); return new_port; } -- cgit v1.2.3-70-g09d2 From 10a7ef33679073d13bf1dd05e3f1b7912f999543 Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 10 Oct 2017 20:59:38 -0700 Subject: ipsec: Fix dst leak in xfrm_bundle_create(). If we cannot find a suitable inner_mode value, we will leak the currently allocated 'xdst'. The fix is to make sure it is linked into the chain before erroring out. Signed-off-by: David S. Miller Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f06253969972..2746b62a8944 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1573,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (!dst_prev) + dst0 = dst1; + else + /* Ref count is taken during xfrm_alloc_dst() + * No need to do dst_clone() on dst1 + */ + dst_prev->child = dst1; + if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], xfrm_af2proto(family)); @@ -1584,14 +1592,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } else inner_mode = xfrm[i]->inner_mode; - if (!dst_prev) - dst0 = dst1; - else - /* Ref count is taken during xfrm_alloc_dst() - * No need to do dst_clone() on dst1 - */ - dst_prev->child = dst1; - xdst->route = dst; dst_copy_metrics(dst1, dst); -- cgit v1.2.3-70-g09d2 From cda77556447c782b3c9c068f81ef58136cb487c3 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 10 Oct 2017 15:13:55 +0200 Subject: gpu: ipu-v3: Allow channel burst locking on i.MX6 only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IDMAC_LOCK_EN registers on i.MX51 have a different layout, and on i.MX53 enabling the lock feature causes bursts to get lost. Restrict enabling the burst lock feature to i.MX6. Reported-by: Patrick Brünn Fixes: 790cb4c7c954 ("drm/imx: lock scanout transfers for consecutive bursts") Tested-by: Patrick Brünn Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 6a573d21d3cc..658fa2d3e40c 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -405,6 +405,14 @@ int ipu_idmac_lock_enable(struct ipuv3_channel *channel, int num_bursts) return -EINVAL; } + /* + * IPUv3EX / i.MX51 has a different register layout, and on IPUv3M / + * i.MX53 channel arbitration locking doesn't seem to work properly. + * Allow enabling the lock feature on IPUv3H / i.MX6 only. + */ + if (bursts && ipu->ipu_type != IPUV3H) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(idmac_lock_en_info); i++) { if (channel->num == idmac_lock_en_info[i].chnum) break; -- cgit v1.2.3-70-g09d2 From 263c3b8044f9c9356a34fdb2640b52d27e378f9c Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Mar 2017 18:01:53 +0100 Subject: gpu: ipu-v3: prg: wait for double buffers to be filled on channel startup Wait for both double buffer to be filled when first starting a channel. This makes channel startup a lot more reliable, probably because it allows the internal state machine to settle before the requests from the IPU are coming in. Signed-off-by: Lucas Stach [p.zabel@pengutronix.de: rebased before switch to runtime PM] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prg.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c index ecc9ea44dc50..0013ca9f72c8 100644 --- a/drivers/gpu/ipu-v3/ipu-prg.c +++ b/drivers/gpu/ipu-v3/ipu-prg.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -329,6 +330,12 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan, val = IPU_PRG_REG_UPDATE_REG_UPDATE; writel(val, prg->regs + IPU_PRG_REG_UPDATE); + /* wait for both double buffers to be filled */ + readl_poll_timeout(prg->regs + IPU_PRG_STATUS, val, + (val & IPU_PRG_STATUS_BUFFER0_READY(prg_chan)) && + (val & IPU_PRG_STATUS_BUFFER1_READY(prg_chan)), + 5, 1000); + clk_disable_unprepare(prg->clk_ipg); chan->enabled = true; -- cgit v1.2.3-70-g09d2 From 11aff4b4c7c4b7257660ef890920f2ac72911ed0 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 18 Sep 2017 17:45:07 +0200 Subject: gpu: ipu-v3: pre: implement workaround for ERR009624 The PRE has a bug where a software write to the CTRL register can block the setting of the ENABLE bit by the hardware in auto repeat mode. When this happens the PRE will fail to handle new jobs. To work around this software must not write to CTRL register when the PRE store engine is inside the unsafe window, where a hardware update to the ENABLE bit may happen. Signed-off-by: Lucas Stach [p.zabel@pengutronix.de: rebased before PRE tiled prefetch support] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-pre.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index c35f74c83065..c860a7997cb5 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -73,6 +73,14 @@ #define IPU_PRE_STORE_ENG_CTRL_WR_NUM_BYTES(v) ((v & 0x7) << 1) #define IPU_PRE_STORE_ENG_CTRL_OUTPUT_ACTIVE_BPP(v) ((v & 0x3) << 4) +#define IPU_PRE_STORE_ENG_STATUS 0x120 +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_X_MASK 0xffff +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_X_SHIFT 0 +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_MASK 0x3fff +#define IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_SHIFT 16 +#define IPU_PRE_STORE_ENG_STATUS_STORE_FIFO_FULL (1 << 30) +#define IPU_PRE_STORE_ENG_STATUS_STORE_FIELD (1 << 31) + #define IPU_PRE_STORE_ENG_SIZE 0x130 #define IPU_PRE_STORE_ENG_SIZE_INPUT_WIDTH(v) ((v & 0xffff) << 0) #define IPU_PRE_STORE_ENG_SIZE_INPUT_HEIGHT(v) ((v & 0xffff) << 16) @@ -93,6 +101,7 @@ struct ipu_pre { dma_addr_t buffer_paddr; void *buffer_virt; bool in_use; + unsigned int safe_window_end; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -160,6 +169,9 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, u32 active_bpp = info->cpp[0] >> 1; u32 val; + /* calculate safe window for ctrl register updates */ + pre->safe_window_end = height - 2; + writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); @@ -199,7 +211,24 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) { + unsigned long timeout = jiffies + msecs_to_jiffies(5); + unsigned short current_yblock; + u32 val; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + + do { + if (time_after(jiffies, timeout)) { + dev_warn(pre->dev, "timeout waiting for PRE safe window\n"); + return; + } + + val = readl(pre->regs + IPU_PRE_STORE_ENG_STATUS); + current_yblock = + (val >> IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_SHIFT) & + IPU_PRE_STORE_ENG_STATUS_STORE_BLOCK_Y_MASK; + } while (current_yblock == 0 || current_yblock >= pre->safe_window_end); + writel(IPU_PRE_CTRL_SDW_UPDATE, pre->regs + IPU_PRE_CTRL_SET); } -- cgit v1.2.3-70-g09d2 From 203f44c475a1a8ace6d30c4c14ab41295081a23f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 2 Oct 2017 12:22:53 +0100 Subject: usb: phy: tegra: Fix phy suspend for UDC Commit dfebb5f43a78 ("usb: chipidea: Add support for Tegra20/30/114/124") added UDC support for Tegra but with UDC support enabled, is was found that Tegra30, Tegra114 and Tegra124 would hang on entry to suspend. The hang occurred during the suspend of the USB PHY when the Tegra PHY driver attempted to disable the PHY clock. The problem is that before the Tegra PHY driver is suspended, the chipidea driver already disabled the PHY clock and when the Tegra PHY driver suspended, it could not read DEVLC register and caused the device to hang. The Tegra USB PHY driver is used by both the Tegra EHCI driver and now the chipidea UDC driver and so simply removing the disabling of the PHY clock from the USB PHY driver would not work for the Tegra EHCI driver. Fortunately, the status of the USB PHY clock can be read from the USB_SUSP_CTRL register and therefore, to workaround this issue, simply poll the register prior to disabling the clock in USB PHY driver to see if clock gating has already been initiated. Please note that it can take a few uS for the clock to disable and so simply reading this status register once on entry is not sufficient. Similarly when turning on the PHY clock, it is possible that the clock is already enabled or in the process of being enabled, and so check for this when enabling the PHY. Please note that no issues are seen with Tegra20 because it has a slightly different PHY to Tegra30/114/124. Fixes: dfebb5f43a78 ("usb: chipidea: Add support for Tegra20/30/114/124") Reviewed-by: Dmitry Osipenko Tested-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Jon Hunter Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-tegra-usb.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c index 5fe4a5704bde..ccc2bf5274b4 100644 --- a/drivers/usb/phy/phy-tegra-usb.c +++ b/drivers/usb/phy/phy-tegra-usb.c @@ -329,6 +329,14 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy) unsigned long val; void __iomem *base = phy->regs; + /* + * The USB driver may have already initiated the phy clock + * disable so wait to see if the clock turns off and if not + * then proceed with gating the clock. + */ + if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, 0) == 0) + return; + if (phy->is_legacy_phy) { val = readl(base + USB_SUSP_CTRL); val |= USB_SUSP_SET; @@ -351,6 +359,15 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy) unsigned long val; void __iomem *base = phy->regs; + /* + * The USB driver may have already initiated the phy clock + * enable so wait to see if the clock turns on and if not + * then proceed with ungating the clock. + */ + if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, + USB_PHY_CLK_VALID) == 0) + return; + if (phy->is_legacy_phy) { val = readl(base + USB_SUSP_CTRL); val |= USB_SUSP_CLR; -- cgit v1.2.3-70-g09d2 From ab219221a5064abfff9f78c323c4a257b16cdb81 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 6 Oct 2017 10:27:44 -0400 Subject: USB: dummy-hcd: Fix deadlock caused by disconnect detection The dummy-hcd driver calls the gadget driver's disconnect callback under the wrong conditions. It should invoke the callback when Vbus power is turned off, but instead it does so when the D+ pullup is turned off. This can cause a deadlock in the composite core when a gadget driver is unregistered: [ 88.361471] ============================================ [ 88.362014] WARNING: possible recursive locking detected [ 88.362580] 4.14.0-rc2+ #9 Not tainted [ 88.363010] -------------------------------------------- [ 88.363561] v4l_id/526 is trying to acquire lock: [ 88.364062] (&(&cdev->lock)->rlock){....}, at: [] composite_disconnect+0x43/0x100 [libcomposite] [ 88.365051] [ 88.365051] but task is already holding lock: [ 88.365826] (&(&cdev->lock)->rlock){....}, at: [] usb_function_deactivate+0x29/0x80 [libcomposite] [ 88.366858] [ 88.366858] other info that might help us debug this: [ 88.368301] Possible unsafe locking scenario: [ 88.368301] [ 88.369304] CPU0 [ 88.369701] ---- [ 88.370101] lock(&(&cdev->lock)->rlock); [ 88.370623] lock(&(&cdev->lock)->rlock); [ 88.371145] [ 88.371145] *** DEADLOCK *** [ 88.371145] [ 88.372211] May be due to missing lock nesting notation [ 88.372211] [ 88.373191] 2 locks held by v4l_id/526: [ 88.373715] #0: (&(&cdev->lock)->rlock){....}, at: [] usb_function_deactivate+0x29/0x80 [libcomposite] [ 88.374814] #1: (&(&dum_hcd->dum->lock)->rlock){....}, at: [] dummy_pullup+0x7d/0xf0 [dummy_hcd] [ 88.376289] [ 88.376289] stack backtrace: [ 88.377726] CPU: 0 PID: 526 Comm: v4l_id Not tainted 4.14.0-rc2+ #9 [ 88.378557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 88.379504] Call Trace: [ 88.380019] dump_stack+0x86/0xc7 [ 88.380605] __lock_acquire+0x841/0x1120 [ 88.381252] lock_acquire+0xd5/0x1c0 [ 88.381865] ? composite_disconnect+0x43/0x100 [libcomposite] [ 88.382668] _raw_spin_lock_irqsave+0x40/0x54 [ 88.383357] ? composite_disconnect+0x43/0x100 [libcomposite] [ 88.384290] composite_disconnect+0x43/0x100 [libcomposite] [ 88.385490] set_link_state+0x2d4/0x3c0 [dummy_hcd] [ 88.386436] dummy_pullup+0xa7/0xf0 [dummy_hcd] [ 88.387195] usb_gadget_disconnect+0xd8/0x160 [udc_core] [ 88.387990] usb_gadget_deactivate+0xd3/0x160 [udc_core] [ 88.388793] usb_function_deactivate+0x64/0x80 [libcomposite] [ 88.389628] uvc_function_disconnect+0x1e/0x40 [usb_f_uvc] This patch changes the code to test the port-power status bit rather than the port-connect status bit when deciding whether to isue the callback. Signed-off-by: Alan Stern Reported-by: David Tulloh CC: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index b17618a55f1b..f04e91ef9e7c 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -419,6 +419,7 @@ static void set_link_state_by_speed(struct dummy_hcd *dum_hcd) static void set_link_state(struct dummy_hcd *dum_hcd) { struct dummy *dum = dum_hcd->dum; + unsigned int power_bit; dum_hcd->active = 0; if (dum->pullup) @@ -429,17 +430,19 @@ static void set_link_state(struct dummy_hcd *dum_hcd) return; set_link_state_by_speed(dum_hcd); + power_bit = (dummy_hcd_to_hcd(dum_hcd)->speed == HCD_USB3 ? + USB_SS_PORT_STAT_POWER : USB_PORT_STAT_POWER); if ((dum_hcd->port_status & USB_PORT_STAT_ENABLE) == 0 || dum_hcd->active) dum_hcd->resuming = 0; /* Currently !connected or in reset */ - if ((dum_hcd->port_status & USB_PORT_STAT_CONNECTION) == 0 || + if ((dum_hcd->port_status & power_bit) == 0 || (dum_hcd->port_status & USB_PORT_STAT_RESET) != 0) { - unsigned disconnect = USB_PORT_STAT_CONNECTION & + unsigned int disconnect = power_bit & dum_hcd->old_status & (~dum_hcd->port_status); - unsigned reset = USB_PORT_STAT_RESET & + unsigned int reset = USB_PORT_STAT_RESET & (~dum_hcd->old_status) & dum_hcd->port_status; /* Report reset and disconnect events to the driver */ -- cgit v1.2.3-70-g09d2 From 29c7f3e68eec4ae94d85ad7b5dfdafdb8089f513 Mon Sep 17 00:00:00 2001 From: Kazuya Mizuguchi Date: Mon, 2 Oct 2017 14:01:41 +0900 Subject: usb: renesas_usbhs: Fix DMAC sequence for receiving zero-length packet The DREQE bit of the DnFIFOSEL should be set to 1 after the DE bit of USB-DMAC on R-Car SoCs is set to 1 after the USB-DMAC received a zero-length packet. Otherwise, a transfer completion interruption of USB-DMAC doesn't happen. Even if the driver changes the sequence, normal operations (transmit/receive without zero-length packet) will not cause any side-effects. So, this patch fixes the sequence anyway. Signed-off-by: Kazuya Mizuguchi [shimoda: revise the commit log] Fixes: e73a9891b3a1 ("usb: renesas_usbhs: add DMAEngine support") Cc: # v3.1+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/fifo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 68f26904c316..50285b01da92 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -857,9 +857,9 @@ static void xfer_work(struct work_struct *work) fifo->name, usbhs_pipe_number(pipe), pkt->length, pkt->zero); usbhs_pipe_running(pipe, 1); - usbhsf_dma_start(pipe, fifo); usbhs_pipe_set_trans_count_if_bulk(pipe, pkt->trans); dma_async_issue_pending(chan); + usbhsf_dma_start(pipe, fifo); usbhs_pipe_enable(pipe); xfer_work_end: -- cgit v1.2.3-70-g09d2 From cb84f56861eb333af0a5bab475d741b13067c05c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 30 Sep 2017 11:15:29 +0300 Subject: usb: misc: usbtest: Fix overflow in usbtest_do_ioctl() There used to be a test against "if (param->sglen > MAX_SGLEN)" but it was removed during a refactor. It leads to an integer overflow and a stack overflow in test_queue() if we try to create a too large urbs[] array on the stack. There is a second integer overflow in test_queue() as well if "param->iterations" is too high. I don't immediately see that it's harmful but I've added a check to prevent it and silence the static checker warning. Fixes: 18fc4ebdc705 ("usb: misc: usbtest: Remove timeval usage") Acked-by: Deepa Dinamani Signed-off-by: Dan Carpenter Signed-off-by: Felipe Balbi --- drivers/usb/misc/usbtest.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index eee82ca55b7b..113e38bfe0ef 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -1964,6 +1964,9 @@ test_queue(struct usbtest_dev *dev, struct usbtest_param_32 *param, int status = 0; struct urb *urbs[param->sglen]; + if (!param->sglen || param->iterations > UINT_MAX / param->sglen) + return -EINVAL; + memset(&context, 0, sizeof(context)); context.count = param->iterations * param->sglen; context.dev = dev; @@ -2087,6 +2090,8 @@ usbtest_do_ioctl(struct usb_interface *intf, struct usbtest_param_32 *param) if (param->iterations <= 0) return -EINVAL; + if (param->sglen > MAX_SGLEN) + return -EINVAL; /* * Just a bunch of test cases that every HCD is expected to handle. * -- cgit v1.2.3-70-g09d2 From aec17e1e249567e82b26dafbb86de7d07fde8729 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Sat, 30 Sep 2017 08:55:55 -0700 Subject: usb: gadget: composite: Fix use-after-free in usb_composite_overwrite_options KASAN enabled configuration reports an error BUG: KASAN: use-after-free in usb_composite_overwrite_options+... [libcomposite] at addr ... Read of size 1 by task ... when some driver is un-bound and then bound again. For example, this happens with FunctionFS driver when "ffs-test" test application is run several times in a row. If the driver has empty manufacturer ID string in initial static data, it is then replaced with generated string. After driver unbinding the generated string is freed, but the driver data still keep that pointer. And if the driver is then bound again, that pointer is re-used for string emptiness check. The fix is to clean up the driver string data upon its unbinding to drop the pointer to freed memory. Fixes: cc2683c318a5 ("usb: gadget: Provide a default implementation of default manufacturer string") Cc: stable@vger.kernel.org Signed-off-by: Andrew Gabbasov Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index dd74c99d6ce1..5d061b3d8224 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2026,6 +2026,8 @@ static DEVICE_ATTR_RO(suspended); static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) { struct usb_composite_dev *cdev = get_gadget_data(gadget); + struct usb_gadget_strings *gstr = cdev->driver->strings[0]; + struct usb_string *dev_str = gstr->strings; /* composite_disconnect() must already have been called * by the underlying peripheral controller driver! @@ -2045,6 +2047,9 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver) composite_dev_cleanup(cdev); + if (dev_str[USB_GADGET_MANUFACTURER_IDX].s == cdev->def_manufacturer) + dev_str[USB_GADGET_MANUFACTURER_IDX].s = ""; + kfree(cdev->def_manufacturer); kfree(cdev); set_gadget_data(gadget, NULL); -- cgit v1.2.3-70-g09d2 From ff74745e6d3d97a865eda8c1f3fd29c13b79f0cc Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Sat, 30 Sep 2017 08:54:52 -0700 Subject: usb: gadget: configfs: Fix memory leak of interface directory data Kmemleak checking configuration reports a memory leak in usb_os_desc_prepare_interf_dir function when rndis function instance is freed and then allocated again. For example, this happens with FunctionFS driver with RNDIS function enabled when "ffs-test" test application is run several times in a row. The data for intermediate "os_desc" group for interface directories is allocated as a single VLA chunk and (after a change of default groups handling) is not ever freed and actually not stored anywhere besides inside a list of default groups of a parent group. The fix is to make usb_os_desc_prepare_interf_dir function return a pointer to allocated data (as a pointer to the first VLA item) instead of (an unused) integer and to make the caller component (currently the only one is RNDIS function) responsible for storing the pointer and freeing the memory when appropriate. Fixes: 1ae1602de028 ("configfs: switch ->default groups to a linked list") Cc: stable@vger.kernel.org Signed-off-by: Andrew Gabbasov Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 15 ++++++++------- drivers/usb/gadget/configfs.h | 11 ++++++----- drivers/usb/gadget/function/f_rndis.c | 12 ++++++++++-- drivers/usb/gadget/function/u_rndis.h | 1 + 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index a22a892de7b7..aeb9f3c40521 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1143,11 +1143,12 @@ static struct configfs_attribute *interf_grp_attrs[] = { NULL }; -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner) +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner) { struct config_group *os_desc_group; struct config_item_type *os_desc_type, *interface_type; @@ -1159,7 +1160,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, char *vlabuf = kzalloc(vla_group_size(data_chunk), GFP_KERNEL); if (!vlabuf) - return -ENOMEM; + return ERR_PTR(-ENOMEM); os_desc_group = vla_ptr(vlabuf, data_chunk, os_desc_group); os_desc_type = vla_ptr(vlabuf, data_chunk, os_desc_type); @@ -1184,7 +1185,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent, configfs_add_default_group(&d->group, os_desc_group); } - return 0; + return os_desc_group; } EXPORT_SYMBOL(usb_os_desc_prepare_interf_dir); diff --git a/drivers/usb/gadget/configfs.h b/drivers/usb/gadget/configfs.h index 36c468c4f5e9..540d5e92ed22 100644 --- a/drivers/usb/gadget/configfs.h +++ b/drivers/usb/gadget/configfs.h @@ -5,11 +5,12 @@ void unregister_gadget_item(struct config_item *item); -int usb_os_desc_prepare_interf_dir(struct config_group *parent, - int n_interf, - struct usb_os_desc **desc, - char **names, - struct module *owner); +struct config_group *usb_os_desc_prepare_interf_dir( + struct config_group *parent, + int n_interf, + struct usb_os_desc **desc, + char **names, + struct module *owner); static inline struct usb_os_desc *to_usb_os_desc(struct config_item *item) { diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index e1d5853ef1e4..c7c5b3ce1d98 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -908,6 +908,7 @@ static void rndis_free_inst(struct usb_function_instance *f) free_netdev(opts->net); } + kfree(opts->rndis_interf_group); /* single VLA chunk */ kfree(opts); } @@ -916,6 +917,7 @@ static struct usb_function_instance *rndis_alloc_inst(void) struct f_rndis_opts *opts; struct usb_os_desc *descs[1]; char *names[1]; + struct config_group *rndis_interf_group; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) @@ -940,8 +942,14 @@ static struct usb_function_instance *rndis_alloc_inst(void) names[0] = "rndis"; config_group_init_type_name(&opts->func_inst.group, "", &rndis_func_type); - usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, - names, THIS_MODULE); + rndis_interf_group = + usb_os_desc_prepare_interf_dir(&opts->func_inst.group, 1, descs, + names, THIS_MODULE); + if (IS_ERR(rndis_interf_group)) { + rndis_free_inst(&opts->func_inst); + return ERR_CAST(rndis_interf_group); + } + opts->rndis_interf_group = rndis_interf_group; return &opts->func_inst; } diff --git a/drivers/usb/gadget/function/u_rndis.h b/drivers/usb/gadget/function/u_rndis.h index a35ee3c2545d..efdb7ac381d9 100644 --- a/drivers/usb/gadget/function/u_rndis.h +++ b/drivers/usb/gadget/function/u_rndis.h @@ -26,6 +26,7 @@ struct f_rndis_opts { bool bound; bool borrowed_net; + struct config_group *rndis_interf_group; struct usb_os_desc rndis_os_desc; char rndis_ext_compat_id[16]; -- cgit v1.2.3-70-g09d2 From 7c80f9e4a588f1925b07134bb2e3689335f6c6d8 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 29 Sep 2017 10:54:24 -0400 Subject: usb: usbtest: fix NULL pointer dereference If the usbtest driver encounters a device with an IN bulk endpoint but no OUT bulk endpoint, it will try to dereference a NULL pointer (out->desc.bEndpointAddress). The problem can be solved by adding a missing test. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Felipe Balbi --- drivers/usb/misc/usbtest.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 113e38bfe0ef..b3fc602b2e24 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -202,12 +202,13 @@ found: return tmp; } - if (in) { + if (in) dev->in_pipe = usb_rcvbulkpipe(udev, in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); + if (out) dev->out_pipe = usb_sndbulkpipe(udev, out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); - } + if (iso_in) { dev->iso_in = &iso_in->desc; dev->in_iso_pipe = usb_rcvisocpipe(udev, -- cgit v1.2.3-70-g09d2 From ef8daf8eeb5b8ab6bc356656163d19f20fb827ed Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Mon, 2 Oct 2017 11:56:48 -0400 Subject: livepatch: unpatch all klp_objects if klp_module_coming fails When an incoming module is considered for livepatching by klp_module_coming(), it iterates over multiple patches and multiple kernel objects in this order: list_for_each_entry(patch, &klp_patches, list) { klp_for_each_object(patch, obj) { which means that if one of the kernel objects fails to patch, klp_module_coming()'s error path needs to unpatch and cleanup any kernel objects that were already patched by a previous patch. Reported-by: Miroslav Benes Suggested-by: Petr Mladek Signed-off-by: Joe Lawrence Acked-by: Josh Poimboeuf Reviewed-by: Petr Mladek Signed-off-by: Jiri Kosina --- kernel/livepatch/core.c | 60 ++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index b9628e43c78f..bf8c8fd72589 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -830,6 +830,41 @@ int klp_register_patch(struct klp_patch *patch) } EXPORT_SYMBOL_GPL(klp_register_patch); +/* + * Remove parts of patches that touch a given kernel module. The list of + * patches processed might be limited. When limit is NULL, all patches + * will be handled. + */ +static void klp_cleanup_module_patches_limited(struct module *mod, + struct klp_patch *limit) +{ + struct klp_patch *patch; + struct klp_object *obj; + + list_for_each_entry(patch, &klp_patches, list) { + if (patch == limit) + break; + + klp_for_each_object(patch, obj) { + if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) + continue; + + /* + * Only unpatch the module if the patch is enabled or + * is in transition. + */ + if (patch->enabled || patch == klp_transition_patch) { + pr_notice("reverting patch '%s' on unloading module '%s'\n", + patch->mod->name, obj->mod->name); + klp_unpatch_object(obj); + } + + klp_free_object_loaded(obj); + break; + } + } +} + int klp_module_coming(struct module *mod) { int ret; @@ -894,7 +929,7 @@ err: pr_warn("patch '%s' failed for module '%s', refusing to load module '%s'\n", patch->mod->name, obj->mod->name, obj->mod->name); mod->klp_alive = false; - klp_free_object_loaded(obj); + klp_cleanup_module_patches_limited(mod, patch); mutex_unlock(&klp_mutex); return ret; @@ -902,9 +937,6 @@ err: void klp_module_going(struct module *mod) { - struct klp_patch *patch; - struct klp_object *obj; - if (WARN_ON(mod->state != MODULE_STATE_GOING && mod->state != MODULE_STATE_COMING)) return; @@ -917,25 +949,7 @@ void klp_module_going(struct module *mod) */ mod->klp_alive = false; - list_for_each_entry(patch, &klp_patches, list) { - klp_for_each_object(patch, obj) { - if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) - continue; - - /* - * Only unpatch the module if the patch is enabled or - * is in transition. - */ - if (patch->enabled || patch == klp_transition_patch) { - pr_notice("reverting patch '%s' on unloading module '%s'\n", - patch->mod->name, obj->mod->name); - klp_unpatch_object(obj); - } - - klp_free_object_loaded(obj); - break; - } - } + klp_cleanup_module_patches_limited(mod, NULL); mutex_unlock(&klp_mutex); } -- cgit v1.2.3-70-g09d2 From f043bfc98c193c284e2cd768fefabe18ac2fed9b Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Thu, 28 Sep 2017 19:16:30 +0900 Subject: HID: usbhid: fix out-of-bounds bug The hid descriptor identifies the length and type of subordinate descriptors for a device. If the received hid descriptor is smaller than the size of the struct hid_descriptor, it is possible to cause out-of-bounds. In addition, if bNumDescriptors of the hid descriptor have an incorrect value, this can also cause out-of-bounds while approaching hdesc->desc[n]. So check the size of hid descriptor and bNumDescriptors. BUG: KASAN: slab-out-of-bounds in usbhid_parse+0x9b1/0xa20 Read of size 1 at addr ffff88006c5f8edf by task kworker/1:2/1261 CPU: 1 PID: 1261 Comm: kworker/1:2 Not tainted 4.14.0-rc1-42251-gebb2c2437d80 #169 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: usb_hub_wq hub_event Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 print_address_description+0x78/0x280 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 kasan_report+0x22f/0x340 mm/kasan/report.c:409 __asan_report_load1_noabort+0x19/0x20 mm/kasan/report.c:427 usbhid_parse+0x9b1/0xa20 drivers/hid/usbhid/hid-core.c:1004 hid_add_device+0x16b/0xb30 drivers/hid/hid-core.c:2944 usbhid_probe+0xc28/0x1100 drivers/hid/usbhid/hid-core.c:1369 usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361 really_probe drivers/base/dd.c:413 driver_probe_device+0x610/0xa00 drivers/base/dd.c:557 __device_attach_driver+0x230/0x290 drivers/base/dd.c:653 bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463 __device_attach+0x26e/0x3d0 drivers/base/dd.c:710 device_initial_probe+0x1f/0x30 drivers/base/dd.c:757 bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523 device_add+0xd0b/0x1660 drivers/base/core.c:1835 usb_set_configuration+0x104e/0x1870 drivers/usb/core/message.c:1932 generic_probe+0x73/0xe0 drivers/usb/core/generic.c:174 usb_probe_device+0xaf/0xe0 drivers/usb/core/driver.c:266 really_probe drivers/base/dd.c:413 driver_probe_device+0x610/0xa00 drivers/base/dd.c:557 __device_attach_driver+0x230/0x290 drivers/base/dd.c:653 bus_for_each_drv+0x161/0x210 drivers/base/bus.c:463 __device_attach+0x26e/0x3d0 drivers/base/dd.c:710 device_initial_probe+0x1f/0x30 drivers/base/dd.c:757 bus_probe_device+0x1eb/0x290 drivers/base/bus.c:523 device_add+0xd0b/0x1660 drivers/base/core.c:1835 usb_new_device+0x7b8/0x1020 drivers/usb/core/hub.c:2457 hub_port_connect drivers/usb/core/hub.c:4903 hub_port_connect_change drivers/usb/core/hub.c:5009 port_event drivers/usb/core/hub.c:5115 hub_event+0x194d/0x3740 drivers/usb/core/hub.c:5195 process_one_work+0xc7f/0x1db0 kernel/workqueue.c:2119 worker_thread+0x221/0x1850 kernel/workqueue.c:2253 kthread+0x3a1/0x470 kernel/kthread.c:231 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Cc: stable@vger.kernel.org Reported-by: Andrey Konovalov Signed-off-by: Jaejoong Kim Tested-by: Andrey Konovalov Acked-by: Alan Stern Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 089bad8a9a21..045b5da9b992 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -975,6 +975,8 @@ static int usbhid_parse(struct hid_device *hid) unsigned int rsize = 0; char *rdesc; int ret, n; + int num_descriptors; + size_t offset = offsetof(struct hid_descriptor, desc); quirks = usbhid_lookup_quirk(le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); @@ -997,10 +999,18 @@ static int usbhid_parse(struct hid_device *hid) return -ENODEV; } + if (hdesc->bLength < sizeof(struct hid_descriptor)) { + dbg_hid("hid descriptor is too short\n"); + return -EINVAL; + } + hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; - for (n = 0; n < hdesc->bNumDescriptors; n++) + num_descriptors = min_t(int, hdesc->bNumDescriptors, + (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); + + for (n = 0; n < num_descriptors; n++) if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); -- cgit v1.2.3-70-g09d2 From a0933a456ff83a3b5ffa3a1903e0b8de4a56adf5 Mon Sep 17 00:00:00 2001 From: Alex Manoussakis Date: Thu, 5 Oct 2017 13:41:20 -0400 Subject: HID: hid-elecom: extend to fix descriptor for HUGE trackball In addition to DEFT, Elecom introduced a larger trackball called HUGE, in both wired (M-HT1URBK) and wireless (M-HT1DRBK) versions. It has the same buttons and behavior as the DEFT. This patch adds the two relevant USB IDs to enable operation of the three Fn buttons on the top of the device. Cc: Diego Elio Petteno Signed-off-by: Alex Manoussakis Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-core.c | 2 ++ drivers/hid/hid-elecom.c | 13 +++++++++---- drivers/hid/hid-ids.h | 2 ++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 0a3117cc29e7..374301fcbc86 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -281,6 +281,7 @@ config HID_ELECOM Support for ELECOM devices: - BM084 Bluetooth Mouse - DEFT Trackball (Wired and wireless) + - HUGE Trackball (Wired and wireless) config HID_ELO tristate "ELO USB 4000/4500 touchscreen" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9bc91160819b..330ca983828b 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2032,6 +2032,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRELESS) }, #endif #if IS_ENABLED(CONFIG_HID_ELO) { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index e2c7465df69f..54aeea57d209 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -3,6 +3,7 @@ * Copyright (c) 2010 Richard Nauber * Copyright (c) 2016 Yuxuan Shui * Copyright (c) 2017 Diego Elio Pettenò + * Copyright (c) 2017 Alex Manoussakis */ /* @@ -32,9 +33,11 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; case USB_DEVICE_ID_ELECOM_DEFT_WIRED: case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS: - /* The DEFT trackball has eight buttons, but its descriptor only - * reports five, disabling the three Fn buttons on the top of - * the mouse. + case USB_DEVICE_ID_ELECOM_HUGE_WIRED: + case USB_DEVICE_ID_ELECOM_HUGE_WIRELESS: + /* The DEFT/HUGE trackball has eight buttons, but its descriptor + * only reports five, disabling the three Fn buttons on the top + * of the mouse. * * Apply the following diff to the descriptor: * @@ -62,7 +65,7 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, * End Collection, End Collection, */ if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) { - hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n"); + hid_info(hdev, "Fixing up Elecom DEFT/HUGE Fn buttons\n"); rdesc[13] = 8; /* Button/Variable Report Count */ rdesc[21] = 8; /* Button/Variable Usage Maximum */ rdesc[29] = 0; /* Button/Constant Report Count */ @@ -76,6 +79,8 @@ static const struct hid_device_id elecom_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_HUGE_WIRELESS) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index a98919199858..be2e005c3c51 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -368,6 +368,8 @@ #define USB_DEVICE_ID_ELECOM_BM084 0x0061 #define USB_DEVICE_ID_ELECOM_DEFT_WIRED 0x00fe #define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS 0x00ff +#define USB_DEVICE_ID_ELECOM_HUGE_WIRED 0x010c +#define USB_DEVICE_ID_ELECOM_HUGE_WIRELESS 0x010d #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34 #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004 -- cgit v1.2.3-70-g09d2 From 99fee508245825765ff60155fed43f970ff83a8f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Oct 2017 16:39:02 +0200 Subject: ALSA: caiaq: Fix stray URB at probe error path caiaq driver doesn't kill the URB properly at its error path during the probe, which may lead to a use-after-free error later. This patch addresses it. Reported-by: Johan Hovold Reviewed-by: Johan Hovold Cc: Signed-off-by: Takashi Iwai --- sound/usb/caiaq/device.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 0fb6b1b79261..d8409d9ae55b 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -469,10 +469,12 @@ static int init_card(struct snd_usb_caiaqdev *cdev) err = snd_usb_caiaq_send_command(cdev, EP1_CMD_GET_DEVICE_INFO, NULL, 0); if (err) - return err; + goto err_kill_urb; - if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) - return -ENODEV; + if (!wait_event_timeout(cdev->ep1_wait_queue, cdev->spec_received, HZ)) { + err = -ENODEV; + goto err_kill_urb; + } usb_string(usb_dev, usb_dev->descriptor.iManufacturer, cdev->vendor_name, CAIAQ_USB_STR_LEN); @@ -507,6 +509,10 @@ static int init_card(struct snd_usb_caiaqdev *cdev) setup_card(cdev); return 0; + + err_kill_urb: + usb_kill_urb(&cdev->ep1_in_urb); + return err; } static int snd_probe(struct usb_interface *intf, -- cgit v1.2.3-70-g09d2 From 67bb8e999e0aeac285d22f0e53c856b9df5282c6 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 10 Oct 2017 14:45:04 -0500 Subject: x86/mm: Disable various instrumentations of mm/mem_encrypt.c and mm/tlb.c Some routines in mem_encrypt.c are called very early in the boot process, e.g. sme_enable(). When CONFIG_KCOV=y is defined the resulting code added to sme_enable() (and others) for KCOV instrumentation results in a kernel crash. Disable the KCOV instrumentation for mem_encrypt.c by adding KCOV_INSTRUMENT_mem_encrypt.o := n to arch/x86/mm/Makefile. In order to avoid other possible early boot issues, model mem_encrypt.c after head64.c in regards to tools. In addition to disabling KCOV as stated above and a previous patch that disables branch profiling, also remove the "-pg" CFLAG if CONFIG_FUNCTION_TRACER is enabled and set KASAN_SANITIZE to "n", each of which are done on a file basis. Reported-by: kernel test robot Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171010194504.18887.38053.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 72bf8c01c6e3..e1f095884386 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,12 @@ -# Kernel does not boot with instrumentation of tlb.c. -KCOV_INSTRUMENT_tlb.o := n +# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_mem_encrypt.o := n + +KASAN_SANITIZE_mem_encrypt.o := n + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_mem_encrypt.o = -pg +endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o physaddr.o setup_nx.o tlb.o -- cgit v1.2.3-70-g09d2 From 56ae414e9d2718bcbfda9ba3797c39005e2f90fb Mon Sep 17 00:00:00 2001 From: Alexander Levin Date: Mon, 10 Apr 2017 18:46:51 +0000 Subject: 9p: set page uptodate when required in write_end() Commit 77469c3f570 prevented setting the page as uptodate when we wrote the right amount of data, fix that. Fixes: 77469c3f570 ("9p: saner ->write_end() on failing copy into non-uptodate page") Reviewed-by: Jan Kara Signed-off-by: Alexander Levin Signed-off-by: Linus Torvalds --- fs/9p/vfs_addr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index adaf6f6dd858..e1cbdfdb7c68 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -310,9 +310,13 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - if (unlikely(copied < len && !PageUptodate(page))) { - copied = 0; - goto out; + if (!PageUptodate(page)) { + if (unlikely(copied < len)) { + copied = 0; + goto out; + } else if (len == PAGE_SIZE) { + SetPageUptodate(page); + } } /* * No need to use i_size_read() here, the i_size -- cgit v1.2.3-70-g09d2 From ead666000a5fe34bdc82d61838e4df2d416ea15e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 12 Sep 2017 05:58:26 -0400 Subject: media: dvb_frontend: only use kref after initialized As reported by Laurent, when a DVB frontend need to register two drivers (e. g. a tuner and a demod), if the second driver fails to register (for example because it was not compiled), the error handling logic frees the frontend by calling dvb_frontend_detach(). That used to work fine, but changeset 1f862a68df24 ("[media] dvb_frontend: move kref to struct dvb_frontend") added a kref at struct dvb_frontend. So, now, instead of just freeing the data, the error handling do a kref_put(). That works fine only after dvb_register_frontend() succeeds. While it would be possible to add a helper function that would be initializing earlier the kref, that would require changing every single DVB frontend on non-trivial ways, and would make frontends different than other drivers. So, instead of doing that, let's focus on the real issue: only call kref_put() after kref_init(). That's easy to check, as, when the dvb frontend is successfuly registered, it will allocate its own private struct. So, if such struct is allocated, it means that it is safe to use kref_put(). If not, then nobody is using yet the frontend, and it is safe to just deallocate it. Fixes: 1f862a68df24 ("[media] dvb_frontend: move kref to struct dvb_frontend") Reported-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_frontend.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 2fcba1616168..9139d01ba7ed 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -141,22 +141,39 @@ struct dvb_frontend_private { static void dvb_frontend_invoke_release(struct dvb_frontend *fe, void (*release)(struct dvb_frontend *fe)); -static void dvb_frontend_free(struct kref *ref) +static void __dvb_frontend_free(struct dvb_frontend *fe) { - struct dvb_frontend *fe = - container_of(ref, struct dvb_frontend, refcount); struct dvb_frontend_private *fepriv = fe->frontend_priv; + if (!fepriv) + return; + dvb_free_device(fepriv->dvbdev); dvb_frontend_invoke_release(fe, fe->ops.release); kfree(fepriv); + fe->frontend_priv = NULL; +} + +static void dvb_frontend_free(struct kref *ref) +{ + struct dvb_frontend *fe = + container_of(ref, struct dvb_frontend, refcount); + + __dvb_frontend_free(fe); } static void dvb_frontend_put(struct dvb_frontend *fe) { - kref_put(&fe->refcount, dvb_frontend_free); + /* + * Check if the frontend was registered, as otherwise + * kref was not initialized yet. + */ + if (fe->frontend_priv) + kref_put(&fe->refcount, dvb_frontend_free); + else + __dvb_frontend_free(fe); } static void dvb_frontend_get(struct dvb_frontend *fe) -- cgit v1.2.3-70-g09d2 From 753affba96d3608e058cefc4534007661efd8c96 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 11 Oct 2017 20:01:42 +0300 Subject: ARC: [plat-hsdk] Increase SDIO CIU frequency to 50000000Hz With current SDIO CIU clock frequency (12500000Hz) DW MMC controller fails to initialize some SD cards (which don't support slow mode). So increase SDIO CIU frequency from 12500000Hz to 50000000Hz by switching from the default divisor value (div-by-8) to the minimum possible value of the divisor (div-by-2) in HSDK platform code. Reported-by: Vineet Gupta Tested-by: Vineet Gupta Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 11 ++++++----- arch/arc/plat-hsdk/platform.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8adde1b492f1..8f627c200d60 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -137,14 +137,15 @@ /* * DW sdio controller has external ciu clock divider * controlled via register in SDIO IP. Due to its - * unexpected default value (it should devide by 1 - * but it devides by 8) SDIO IP uses wrong clock and + * unexpected default value (it should divide by 1 + * but it divides by 8) SDIO IP uses wrong clock and * works unstable (see STAR 9001204800) + * We switched to the minimum possible value of the + * divisor (div-by-2) in HSDK platform code. * So add temporary fix and change clock frequency - * from 100000000 to 12500000 Hz until we fix dw sdio - * driver itself. + * to 50000000 Hz until we fix dw sdio driver itself. */ - clock-frequency = <12500000>; + clock-frequency = <50000000>; #clock-cells = <0>; }; diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 744e62e58788..fd0ae5e38639 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -74,6 +74,10 @@ static void __init hsdk_set_cpu_freq_1ghz(void) pr_err("Failed to setup CPU frequency to 1GHz!"); } +#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) +#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) +#define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) + static void __init hsdk_init_early(void) { /* @@ -89,6 +93,12 @@ static void __init hsdk_init_early(void) /* Really apply settings made above */ writel(1, (void __iomem *) CREG_PAE_UPDATE); + /* + * Switch SDIO external ciu clock divider from default div-by-8 to + * minimum possible div-by-2. + */ + iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); + /* * Setup CPU frequency to 1GHz. * TODO: remove it after smart hsdk pll driver will be introduced. -- cgit v1.2.3-70-g09d2 From 20413e37d71befd02b5846acdaf5e2564dd1c38e Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 9 Oct 2017 11:37:22 -0700 Subject: xfs: Don't log uninitialised fields in inode structures Prevent kmemcheck from throwing warnings about reading uninitialised memory when formatting inodes into the incore log buffer. There are several issues here - we don't always log all the fields in the inode log format item, and we never log the inode the di_next_unlinked field. In the case of the inode log format item, this is exacerbated by the old xfs_inode_log_format structure padding issue. Hence make the padded, 64 bit aligned version of the structure the one we always use for formatting the log and get rid of the 64 bit variant. This means we'll always log the 64-bit version and so recovery only needs to convert from the unpadded 32 bit version from older 32 bit kernels. Signed-Off-By: Dave Chinner Tested-by: Tetsuo Handa Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_log_format.h | 27 +++++---------- fs/xfs/xfs_inode_item.c | 79 ++++++++++++++++++++++-------------------- fs/xfs/xfs_ondisk.h | 2 +- 3 files changed, 50 insertions(+), 58 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 8372e9bcd7b6..71de185735e0 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -270,6 +270,7 @@ typedef struct xfs_inode_log_format { uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ + uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -280,29 +281,17 @@ typedef struct xfs_inode_log_format { int32_t ilf_boffset; /* off of inode in buffer */ } xfs_inode_log_format_t; -typedef struct xfs_inode_log_format_32 { - uint16_t ilf_type; /* inode log item type */ - uint16_t ilf_size; /* size of this item */ - uint32_t ilf_fields; /* flags for fields logged */ - uint16_t ilf_asize; /* size of attr d/ext/root */ - uint16_t ilf_dsize; /* size of data/ext/root */ - uint64_t ilf_ino; /* inode number */ - union { - uint32_t ilfu_rdev; /* rdev value for dev inode*/ - uuid_t ilfu_uuid; /* mount point value */ - } ilf_u; - int64_t ilf_blkno; /* blkno of inode buffer */ - int32_t ilf_len; /* len of inode buffer */ - int32_t ilf_boffset; /* off of inode in buffer */ -} __attribute__((packed)) xfs_inode_log_format_32_t; - -typedef struct xfs_inode_log_format_64 { +/* + * Old 32 bit systems will log in this format without the 64 bit + * alignment padding. Recovery will detect this and convert it to the + * correct format. + */ +struct xfs_inode_log_format_32 { uint16_t ilf_type; /* inode log item type */ uint16_t ilf_size; /* size of this item */ uint32_t ilf_fields; /* flags for fields logged */ uint16_t ilf_asize; /* size of attr d/ext/root */ uint16_t ilf_dsize; /* size of data/ext/root */ - uint32_t ilf_pad; /* pad for 64 bit boundary */ uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ @@ -311,7 +300,7 @@ typedef struct xfs_inode_log_format_64 { int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ int32_t ilf_boffset; /* off of inode in buffer */ -} xfs_inode_log_format_64_t; +} __attribute__((packed)); /* diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index a705f34b58fa..9bbc2d7cc8cb 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -364,6 +364,9 @@ xfs_inode_to_log_dinode( to->di_dmstate = from->di_dmstate; to->di_flags = from->di_flags; + /* log a dummy value to ensure log structure is fully initialised */ + to->di_next_unlinked = NULLAGINO; + if (from->di_version == 3) { to->di_changecount = inode->i_version; to->di_crtime.t_sec = from->di_crtime.t_sec; @@ -404,6 +407,11 @@ xfs_inode_item_format_core( * the second with the on-disk inode structure, and a possible third and/or * fourth with the inode data/extents/b-tree root and inode attributes * data/extents/b-tree root. + * + * Note: Always use the 64 bit inode log format structure so we don't + * leave an uninitialised hole in the format item on 64 bit systems. Log + * recovery on 32 bit systems handles this just fine, so there's no reason + * for not using an initialising the properly padded structure all the time. */ STATIC void xfs_inode_item_format( @@ -412,8 +420,8 @@ xfs_inode_item_format( { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; + struct xfs_inode_log_format *ilf; ASSERT(ip->i_d.di_version > 1); @@ -425,7 +433,17 @@ xfs_inode_item_format( ilf->ilf_boffset = ip->i_imap.im_boffset; ilf->ilf_fields = XFS_ILOG_CORE; ilf->ilf_size = 2; /* format + core */ - xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + + /* + * make sure we don't leak uninitialised data into the log in the case + * when we don't log every field in the inode. + */ + ilf->ilf_dsize = 0; + ilf->ilf_asize = 0; + ilf->ilf_pad = 0; + uuid_copy(&ilf->ilf_u.ilfu_uuid, &uuid_null); + + xlog_finish_iovec(lv, vecp, sizeof(*ilf)); xfs_inode_item_format_core(ip, lv, &vecp); xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); @@ -855,44 +873,29 @@ xfs_istale_done( } /* - * convert an xfs_inode_log_format struct from either 32 or 64 bit versions - * (which can have different field alignments) to the native version + * convert an xfs_inode_log_format struct from the old 32 bit version + * (which can have different field alignments) to the native 64 bit version */ int xfs_inode_item_format_convert( - xfs_log_iovec_t *buf, - xfs_inode_log_format_t *in_f) + struct xfs_log_iovec *buf, + struct xfs_inode_log_format *in_f) { - if (buf->i_len == sizeof(xfs_inode_log_format_32_t)) { - xfs_inode_log_format_32_t *in_f32 = buf->i_addr; - - in_f->ilf_type = in_f32->ilf_type; - in_f->ilf_size = in_f32->ilf_size; - in_f->ilf_fields = in_f32->ilf_fields; - in_f->ilf_asize = in_f32->ilf_asize; - in_f->ilf_dsize = in_f32->ilf_dsize; - in_f->ilf_ino = in_f32->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f32->ilf_blkno; - in_f->ilf_len = in_f32->ilf_len; - in_f->ilf_boffset = in_f32->ilf_boffset; - return 0; - } else if (buf->i_len == sizeof(xfs_inode_log_format_64_t)){ - xfs_inode_log_format_64_t *in_f64 = buf->i_addr; - - in_f->ilf_type = in_f64->ilf_type; - in_f->ilf_size = in_f64->ilf_size; - in_f->ilf_fields = in_f64->ilf_fields; - in_f->ilf_asize = in_f64->ilf_asize; - in_f->ilf_dsize = in_f64->ilf_dsize; - in_f->ilf_ino = in_f64->ilf_ino; - /* copy biggest field of ilf_u */ - uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); - in_f->ilf_blkno = in_f64->ilf_blkno; - in_f->ilf_len = in_f64->ilf_len; - in_f->ilf_boffset = in_f64->ilf_boffset; - return 0; - } - return -EFSCORRUPTED; + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + + if (buf->i_len != sizeof(*in_f32)) + return -EFSCORRUPTED; + + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; + in_f->ilf_fields = in_f32->ilf_fields; + in_f->ilf_asize = in_f32->ilf_asize; + in_f->ilf_dsize = in_f32->ilf_dsize; + in_f->ilf_ino = in_f32->ilf_ino; + /* copy biggest field of ilf_u */ + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); + in_f->ilf_blkno = in_f32->ilf_blkno; + in_f->ilf_len = in_f32->ilf_len; + in_f->ilf_boffset = in_f32->ilf_boffset; + return 0; } diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 0c381d71b242..0492436a053f 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,7 +134,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8); XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); - XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); } -- cgit v1.2.3-70-g09d2 From bb9c2e5433250f5b477035dc478314f8e6dd5e36 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 9 Oct 2017 11:37:22 -0700 Subject: xfs: move more RT specific code under CONFIG_XFS_RT Various utility functions and interfaces that iterate internal devices try to reference the realtime device even when RT support is not compiled into the kernel. Make sure this code is excluded from the CONFIG_XFS_RT=n build, and where appropriate stub functions to return fatal errors if they ever get called when RT support is not present. Signed-Off-By: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 2 ++ fs/xfs/xfs_bmap_util.h | 13 +++++++++++++ fs/xfs/xfs_fsmap.c | 12 ++++++++++++ 3 files changed, 27 insertions(+) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index e9db7fc95b70..6503cfa44262 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -84,6 +84,7 @@ xfs_zero_extent( GFP_NOFS, 0); } +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ @@ -190,6 +191,7 @@ xfs_bmap_rtalloc( } return 0; } +#endif /* CONFIG_XFS_RT */ /* * Check if the endoff is outside the last extent. If so the caller will grow diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 0eaa81dc49be..7d330b3c77c3 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -28,7 +28,20 @@ struct xfs_mount; struct xfs_trans; struct xfs_bmalloca; +#ifdef CONFIG_XFS_RT int xfs_bmap_rtalloc(struct xfs_bmalloca *ap); +#else /* !CONFIG_XFS_RT */ +/* + * Attempts to allocate RT extents when RT is disable indicates corruption and + * should trigger a shutdown. + */ +static inline int +xfs_bmap_rtalloc(struct xfs_bmalloca *ap) +{ + return -EFSCORRUPTED; +} +#endif /* CONFIG_XFS_RT */ + int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff, int whichfork, int *eof); int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 814ed729881d..560e0b40ac1b 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -521,6 +521,7 @@ __xfs_getfsmap_rtdev( return query_fn(tp, info); } +#ifdef CONFIG_XFS_RT /* Actually query the realtime bitmap. */ STATIC int xfs_getfsmap_rtdev_rtbitmap_query( @@ -561,6 +562,7 @@ xfs_getfsmap_rtdev_rtbitmap( return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, info); } +#endif /* CONFIG_XFS_RT */ /* Execute a getfsmap query against the regular data device. */ STATIC int @@ -795,7 +797,15 @@ xfs_getfsmap_check_keys( return false; } +/* + * There are only two devices if we didn't configure RT devices at build time. + */ +#ifdef CONFIG_XFS_RT #define XFS_GETFSMAP_DEVS 3 +#else +#define XFS_GETFSMAP_DEVS 2 +#endif /* CONFIG_XFS_RT */ + /* * Get filesystem's extents as described in head, and format for * output. Calls formatter to fill the user's buffer until all @@ -853,10 +863,12 @@ xfs_getfsmap( handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_getfsmap_logdev; } +#ifdef CONFIG_XFS_RT if (mp->m_rtdev_targp) { handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; } +#endif /* CONFIG_XFS_RT */ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), xfs_getfsmap_dev_compare); -- cgit v1.2.3-70-g09d2 From 67f2ffe31d1a683170c2ba0ecc643e42a5fdd397 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 9 Oct 2017 11:37:23 -0700 Subject: xfs: don't change inode mode if ACL update fails If we get ENOSPC half way through setting the ACL, the inode mode can still be changed even though the ACL does not exist. Reorder the operation to only change the mode of the inode if the ACL is set correctly. Whilst this does not fix the problem with crash consistency (that requires attribute addition to be a deferred op) it does prevent ENOSPC and other non-fatal errors setting an xattr to be handled sanely. This fixes xfstests generic/449. Signed-Off-By: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_acl.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7034e17535de..3354140de07e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -247,6 +247,8 @@ xfs_set_mode(struct inode *inode, umode_t mode) int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + umode_t mode; + bool set_mode = false; int error = 0; if (!acl) @@ -257,16 +259,24 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; if (type == ACL_TYPE_ACCESS) { - umode_t mode; - error = posix_acl_update_mode(inode, &mode, &acl); if (error) return error; - error = xfs_set_mode(inode, mode); - if (error) - return error; + set_mode = true; } set_acl: - return __xfs_set_acl(inode, acl, type); + error = __xfs_set_acl(inode, acl, type); + if (error) + return error; + + /* + * We set the mode after successfully updating the ACL xattr because the + * xattr update can fail at ENOSPC and we don't want to change the mode + * if the ACL update hasn't been applied. + */ + if (set_mode) + error = xfs_set_mode(inode, mode); + + return error; } -- cgit v1.2.3-70-g09d2 From 749f24f33e87c1706d716c283027595b72a034f3 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Mon, 9 Oct 2017 11:38:54 -0700 Subject: xfs: Fix bool initialization/comparison Bool initializations should use true and false. Bool tests don't need comparisons. Signed-off-by: Thomas Meyer Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 4 ++-- fs/xfs/libxfs/xfs_ialloc.c | 4 ++-- fs/xfs/xfs_file.c | 4 ++-- fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_mount.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 044a363119be..def32fa1c225 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1477,14 +1477,14 @@ xfs_bmap_isaeof( int is_empty; int error; - bma->aeof = 0; + bma->aeof = false; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); if (error) return error; if (is_empty) { - bma->aeof = 1; + bma->aeof = true; return 0; } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 988bb3f31446..dfd643909f85 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1962,7 +1962,7 @@ xfs_difree_inobt( if (!(mp->m_flags & XFS_MOUNT_IKEEP) && rec.ir_free == XFS_INOBT_ALL_FREE && mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) { - xic->deleted = 1; + xic->deleted = true; xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); xic->alloc = xfs_inobt_irec_to_allocmask(&rec); @@ -1989,7 +1989,7 @@ xfs_difree_inobt( xfs_difree_inode_chunk(mp, agno, &rec, dfops); } else { - xic->deleted = 0; + xic->deleted = false; error = xfs_inobt_update(cur, &rec); if (error) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 309e26c9dddb..56d0e526870c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -764,7 +764,7 @@ xfs_file_fallocate( enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL; loff_t new_size = 0; - bool do_file_insert = 0; + bool do_file_insert = false; if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -825,7 +825,7 @@ xfs_file_fallocate( error = -EINVAL; goto out_unlock; } - do_file_insert = 1; + do_file_insert = true; } else { flags |= XFS_PREALLOC_SET; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c5107c7bc4bf..dc95a49d62e7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2515,7 +2515,7 @@ next_lv: if (lv) vecp = lv->lv_iovecp; } - if (record_cnt == 0 && ordered == false) { + if (record_cnt == 0 && !ordered) { if (!lv) return 0; break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index ea7d4b4e50d0..e9727d0a541a 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -704,7 +704,7 @@ xfs_mountfs( xfs_set_maxicount(mp); /* enable fail_at_unmount as default */ - mp->m_fail_unmount = 1; + mp->m_fail_unmount = true; error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); if (error) -- cgit v1.2.3-70-g09d2 From f35c5e10c6ed6ba52a8dd8573924a80b6a02f03f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 9 Oct 2017 11:38:56 -0700 Subject: xfs: reinit btree pointer on attr tree inactivation walk xfs_attr3_root_inactive() walks the attr fork tree to invalidate the associated blocks. xfs_attr3_node_inactive() recursively descends from internal blocks to leaf blocks, caching block address values along the way to revisit parent blocks, locate the next entry and descend down that branch of the tree. The code that attempts to reread the parent block is unsafe because it assumes that the local xfs_da_node_entry pointer remains valid after an xfs_trans_brelse() and re-read of the parent buffer. Under heavy memory pressure, it is possible that the buffer has been reclaimed and reallocated by the time the parent block is reread. This means that 'btree' can point to an invalid memory address, lead to a random/garbage value for child_fsb and cause the subsequent read of the attr fork to go off the rails and return a NULL buffer for an attr fork offset that is most likely not allocated. Note that this problem can be manufactured by setting XFS_ATTR_BTREE_REF to 0 to prevent LRU caching of attr buffers, creating a file with a multi-level attr fork and removing it to trigger inactivation. To address this problem, reinit the node/btree pointers to the parent buffer after it has been re-read. This ensures btree points to a valid record and allows the walk to proceed. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_attr_inactive.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index ebd66b19fbfc..e3a950ed35a8 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -302,6 +302,8 @@ xfs_attr3_node_inactive( &bp, XFS_ATTR_FORK); if (error) return error; + node = bp->b_addr; + btree = dp->d_ops->node_tree_p(node); child_fsb = be32_to_cpu(btree[i + 1].before); xfs_trans_brelse(*trans, bp); } -- cgit v1.2.3-70-g09d2 From 93e8befc17f6d6ea92b0aee3741ceac8bca4590f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 9 Oct 2017 21:08:06 -0700 Subject: xfs: handle error if xfs_btree_get_bufs fails Jason reported that a corrupted filesystem failed to replay the log with a metadata block out of bounds warning: XFS (dm-2): _xfs_buf_find: Block out of range: block 0x80270fff8, EOFS 0x9c40000 _xfs_buf_find() and xfs_btree_get_bufs() return NULL if that happens, and then when xfs_alloc_fix_freelist() calls xfs_trans_binval() on that NULL bp, we oops with: BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8 We don't handle _xfs_buf_find errors very well, every caller higher up the stack gets to guess at why it failed. But we should at least handle it somehow, so return EFSCORRUPTED here. Reported-by: Jason L Tibbitts III Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 744dcaec34cc..f965ce832bc0 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1584,6 +1584,10 @@ xfs_alloc_ag_vextent_small( bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto error0; + } xfs_trans_binval(args->tp, bp); } args->len = 1; @@ -2141,6 +2145,10 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); + if (!bp) { + error = -EFSCORRUPTED; + goto out_agbp_relse; + } xfs_trans_binval(tp, bp); } -- cgit v1.2.3-70-g09d2 From eef9ffdf9cd39b2986367bc8395e2772bc1284ba Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 9 Oct 2017 13:33:19 +0200 Subject: scsi: libiscsi: fix shifting of DID_REQUEUE host byte The SCSI host byte should be shifted left by 16 in order to have scsi_decide_disposition() do the right thing (.i.e. requeue the command). Signed-off-by: Johannes Thumshirn Fixes: 661134ad3765 ("[SCSI] libiscsi, bnx2i: make bound ep check common") Cc: Lee Duncan Cc: Hannes Reinecke Cc: Bart Van Assche Cc: Chris Leech Acked-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index c62e8d111fd9..f8dc1601efd5 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1728,7 +1728,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { reason = FAILURE_SESSION_IN_RECOVERY; - sc->result = DID_REQUEUE; + sc->result = DID_REQUEUE << 16; goto fault; } -- cgit v1.2.3-70-g09d2 From ea850f64c2722278f150dc11de2141baeb24211c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 28 Sep 2017 11:21:57 +0300 Subject: drm/i915/bios: parse DDI ports also for CHV for HDMI DDC pin and DP AUX channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While technically CHV isn't DDI, we do look at the VBT based DDI port info for HDMI DDC pin and DP AUX channel. (We call these "alternate", but they're really just something that aren't platform defaults.) In commit e4ab73a13291 ("drm/i915: Respect alternate_ddc_pin for all DDI ports") Ville writes, "IIRC there may be CHV system that might actually need this." I'm not sure why there couldn't be even more platforms that need this, but start conservative, and parse the info for CHV in addition to DDI. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100553 Reported-by: Marek Wilczewski Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/d0815082cb98487618429b62414854137049b888.1506586821.git.jani.nikula@intel.com (cherry picked from commit 348e4058ebf53904e817eec7a1b25327143c2ed2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_bios.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 00c6aee0a9a1..5d4cd3d00564 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1240,7 +1240,7 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, { enum port port; - if (!HAS_DDI(dev_priv)) + if (!HAS_DDI(dev_priv) && !IS_CHERRYVIEW(dev_priv)) return; if (!dev_priv->vbt.child_dev_num) -- cgit v1.2.3-70-g09d2 From 68a39a3e9fe1d6f0dfe59e3f4d6bc6765e01c903 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Oct 2017 10:48:44 +0000 Subject: remoteproc: imx_rproc: fix return value check in imx_rproc_addr_init() In case of error, the function devm_ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Reviewed-by: Oleksij Rempel Signed-off-by: Wei Yongjun Signed-off-by: Bjorn Andersson --- drivers/remoteproc/imx_rproc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 81ba44510b75..633268e9d550 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -269,10 +269,9 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev, att->sa, att->size); - if (IS_ERR(priv->mem[b].cpu_addr)) { + if (!priv->mem[b].cpu_addr) { dev_err(dev, "devm_ioremap_resource failed\n"); - err = PTR_ERR(priv->mem[b].cpu_addr); - return err; + return -ENOMEM; } priv->mem[b].sys_addr = att->sa; priv->mem[b].size = att->size; -- cgit v1.2.3-70-g09d2 From a9e170e28636fd577249f39029d59e4e960a42b8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 10 Oct 2017 12:08:22 -0700 Subject: scsi: qla2xxx: Fix uninitialized work element Fixes following stack trace kernel: Call Trace: kernel: dump_stack+0x63/0x84 kernel: __warn+0xd1/0xf0 kernel: warn_slowpath_null+0x1d/0x20 kernel: __queue_work+0x37a/0x420 kernel: queue_work_on+0x27/0x40 kernel: queue_work+0x14/0x20 [qla2xxx] kernel: schedule_work+0x13/0x20 [qla2xxx] kernel: qla2x00_post_work+0xab/0xb0 [qla2xxx] kernel: qla2x00_post_aen_work+0x3b/0x50 [qla2xxx] kernel: qla2x00_async_event+0x20d/0x15d0 [qla2xxx] kernel: ? lock_timer_base+0x7d/0xa0 kernel: qla24xx_intr_handler+0x1da/0x310 [qla2xxx] kernel: qla2x00_poll+0x36/0x60 [qla2xxx] kernel: qla2x00_mailbox_command+0x659/0xec0 [qla2xxx] kernel: ? proc_create_data+0x7a/0xd0 kernel: qla25xx_init_rsp_que+0x15b/0x240 [qla2xxx] kernel: ? request_irq+0x14/0x20 [qla2xxx] kernel: qla25xx_create_rsp_que+0x256/0x3c0 [qla2xxx] kernel: qla2xxx_create_qpair+0x2af/0x5b0 [qla2xxx] kernel: qla2x00_probe_one+0x1107/0x1c30 [qla2xxx] Fixes: ec7193e26055 ("qla2xxx: Fix delayed response to command for loop mode/direct connect.") Cc: # 4.13 Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5b2437a5ea44..937209805baf 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3175,6 +3175,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->can_queue, base_vha->req, base_vha->mgmt_svr_loop_id, host->sg_tablesize); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + if (ha->mqenable) { bool mq = false; bool startit = false; @@ -3223,7 +3225,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ qla2xxx_wake_dpc(base_vha); - INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error); if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) { -- cgit v1.2.3-70-g09d2 From 8d30371fd7c328e192d7ea3108bd71b903631d6a Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 10 Oct 2017 17:31:38 +0200 Subject: scsi: fc: check for rport presence in fc_block_scsi_eh Coverity-scan recently found a possible NULL pointer dereference in fc_block_scsi_eh() as starget_to_rport() either returns the rport for the startget or NULL. While it is rather unlikely to have fc_block_scsi_eh() called without an rport associated it's a good idea to catch potential misuses of the API gracefully. Signed-off-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_fc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index cbd4495d0ff9..8c46a6d536af 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3320,6 +3320,9 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd) { struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device)); + if (WARN_ON_ONCE(!rport)) + return FAST_IO_FAIL; + return fc_block_rport(rport); } EXPORT_SYMBOL(fc_block_scsi_eh); -- cgit v1.2.3-70-g09d2 From c343bc2ce2c627b6cef2b09794a4a5b63419a798 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 26 Sep 2017 12:08:27 +0300 Subject: ACPI: properties: Align return codes of __acpi_node_get_property_reference() acpi_fwnode_get_reference_args(), the function implementing ACPI support for fwnode_property_get_reference_args(), returns directly error codes from __acpi_node_get_property_reference(). The latter uses different error codes than the OF implementation. In particular, the OF implementation uses -ENOENT to indicate that the property is not found, a reference entry is empty and there are no more references. Document and align the error codes for property for fwnode_property_get_reference_args() so that they match with of_parse_phandle_with_args(). Fixes: 3e3119d3088f (device property: Introduce fwnode_property_get_reference_args) Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 19 +++++++++---------- drivers/base/property.c | 4 ++++ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 3fb8ff513461..5a8ac5e1081b 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -571,10 +571,9 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data, * } * } * - * Calling this function with index %2 return %-ENOENT and with index %3 - * returns the last entry. If the property does not contain any more values - * %-ENODATA is returned. The NULL entry must be single integer and - * preferably contain value %0. + * Calling this function with index %2 or index %3 return %-ENOENT. If the + * property does not contain any more values %-ENOENT is returned. The NULL + * entry must be single integer and preferably contain value %0. * * Return: %0 on success, negative error code on failure. */ @@ -590,7 +589,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, data = acpi_device_data_of_node(fwnode); if (!data) - return -EINVAL; + return -ENOENT; ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj); if (ret) @@ -635,7 +634,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, ret = acpi_bus_get_device(element->reference.handle, &device); if (ret) - return -ENODEV; + return -EINVAL; nargs = 0; element++; @@ -649,11 +648,11 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, else if (type == ACPI_TYPE_LOCAL_REFERENCE) break; else - return -EPROTO; + return -EINVAL; } if (nargs > MAX_ACPI_REFERENCE_ARGS) - return -EPROTO; + return -EINVAL; if (idx == index) { args->adev = device; @@ -670,13 +669,13 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, return -ENOENT; element++; } else { - return -EPROTO; + return -EINVAL; } idx++; } - return -ENODATA; + return -ENOENT; } EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference); diff --git a/drivers/base/property.c b/drivers/base/property.c index 21fcc13013a5..7ed99c1b2a8b 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -683,6 +683,10 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_string); * Caller is responsible to call fwnode_handle_put() on the returned * args->fwnode pointer. * + * Returns: %0 on success + * %-ENOENT when the index is out of bounds, the index has an empty + * reference or the property was not found + * %-EINVAL on parse error */ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, -- cgit v1.2.3-70-g09d2 From 51858a2777f025333c5ac3b3484263bba56461b3 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 11 Oct 2017 11:06:13 +0300 Subject: ACPI: properties: Fix __acpi_node_get_property_reference() return codes Fix more return codes for device property: Align return codes of __acpi_node_get_property_reference(). In particular, what was missed previously: -EPROTO could be returned in certain cases, now -EINVAL; -EINVAL was returned if the property was not found, now -ENOENT; -EINVAL was returned also if the index was higher than the number of entries in a package, now -ENOENT. Reported-by: Hyungwoo Yang Fixes: 3e3119d3088f (device property: Introduce fwnode_property_get_reference_args) Signed-off-by: Sakari Ailus Tested-by: Hyungwoo Yang Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 5a8ac5e1081b..e26ea209b63e 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -593,7 +593,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj); if (ret) - return ret; + return ret == -EINVAL ? -ENOENT : -EINVAL; /* * The simplest case is when the value is a single reference. Just @@ -605,7 +605,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, ret = acpi_bus_get_device(obj->reference.handle, &device); if (ret) - return ret; + return ret == -ENODEV ? -EINVAL : ret; args->adev = device; args->nargs = 0; @@ -621,8 +621,10 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, * The index argument is then used to determine which reference * the caller wants (along with the arguments). */ - if (obj->type != ACPI_TYPE_PACKAGE || index >= obj->package.count) - return -EPROTO; + if (obj->type != ACPI_TYPE_PACKAGE) + return -EINVAL; + if (index >= obj->package.count) + return -ENOENT; element = obj->package.elements; end = element + obj->package.count; -- cgit v1.2.3-70-g09d2 From 5aba2ba5030b66a6f8c93049b718556f9aacd7c6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 10 Oct 2017 17:07:12 +0200 Subject: macsec: fix memory leaks when skb_to_sgvec fails Fixes: cda7ea690350 ("macsec: check return value of skb_to_sgvec always") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 98e4deaa3a6a..5ab1b8849c30 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -742,6 +742,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { + aead_request_free(req); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); @@ -954,6 +955,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { + aead_request_free(req); kfree_skb(skb); return ERR_PTR(ret); } -- cgit v1.2.3-70-g09d2 From bde135a672bfd1cc41d91c2bbbbd36eb25409b74 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 11 Oct 2017 12:56:52 +0800 Subject: r8169: only enable PCI wakeups when WOL is active rtl_init_one() currently enables PCI wakeups if the ethernet device is found to be WOL-capable. There is no need to do this when rtl8169_set_wol() will correctly enable or disable the same wakeup flag when WOL is activated/deactivated. This works around an ACPI DSDT bug which prevents the Acer laptop models Aspire ES1-533, Aspire ES1-732, PackardBell ENTE69AP and Gateway NE533 from entering S3 suspend - even when no ethernet cable is connected. On these platforms, the DSDT says that GPE08 is a wakeup source for ethernet, but this GPE fires as soon as the system goes into suspend, waking the system up immediately. Having the wakeup normally disabled avoids this issue in the default case. With this change, WOL will continue to be unusable on these platforms (it will instantly wake up if WOL is later enabled by the user) but we do not expect this to be a commonly used feature on these consumer laptops. We have separately determined that WOL works fine without any ACPI GPEs enabled during sleep, so a DSDT fix or override would be possible to make WOL work. Signed-off-by: Daniel Drake Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e03fcf914690..a3c949ea7d1a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8491,8 +8491,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl8168_driver_start(tp); } - device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL); - if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); -- cgit v1.2.3-70-g09d2 From fdbed19697e1aa0f7cb719c11c67f2da26ea7f47 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Oct 2017 17:07:41 -0700 Subject: ARC: unbork module link errors with !CONFIG_ARC_HAS_LLSC | SYSMAP System.map | Building modules, stage 2. | MODPOST 18 modules |ERROR: "smp_atomic_ops_lock" [drivers/gpu/drm/drm_kms_helper.ko] undefined! |ERROR: "smp_bitops_lock" [drivers/gpu/drm/drm_kms_helper.ko] undefined! |ERROR: "smp_atomic_ops_lock" [drivers/gpu/drm/drm.ko] undefined! | ERROR: "smp_bitops_lock" [drivers/gpu/drm/drm.ko] undefined! |../scripts/Makefile.modpost:91: recipe for target '__modpost' failed Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f46267153ec2..6df9d94a9537 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -23,6 +23,8 @@ #include #include #include +#include + #include #include #include @@ -30,6 +32,9 @@ #ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +EXPORT_SYMBOL_GPL(smp_atomic_ops_lock); +EXPORT_SYMBOL_GPL(smp_bitops_lock); #endif struct plat_smp_ops __weak plat_smp_ops; -- cgit v1.2.3-70-g09d2 From 6e9c0075409d4ec1bc63558ee5a93916a6d7d16f Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Wed, 11 Oct 2017 16:54:27 +1100 Subject: net/ncsi: Don't limit vids based on hot_channel Currently we drop any new VLAN ids if there are more than the current (or last used) channel can support. Most importantly this is a problem if no channel has been selected yet, resulting in a segfault. Secondly this does not necessarily reflect the capabilities of any other channels. Instead only drop a new VLAN id if we are already tracking the maximum allowed by the NCSI specification. Per-channel limits are already handled by ncsi_add_filter(), but add a message to set_one_vid() to make it obvious that the channel can not support any more VLAN ids. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index af3d636534ef..d30f7bd741d0 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -286,6 +286,7 @@ struct ncsi_dev_priv { struct work_struct work; /* For channel management */ struct packet_type ptype; /* NCSI packet Rx handler */ struct list_head node; /* Form NCSI device list */ +#define NCSI_MAX_VLAN_VIDS 15 struct list_head vlan_vids; /* List of active VLAN IDs */ }; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 3fd3c39e6278..b6a449aa9d4b 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -732,6 +732,10 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, if (index < 0) { netdev_err(ndp->ndev.dev, "Failed to add new VLAN tag, error %d\n", index); + if (index == -ENOSPC) + netdev_err(ndp->ndev.dev, + "Channel %u already has all VLAN filters set\n", + nc->id); return -1; } @@ -1403,7 +1407,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { - struct ncsi_channel_filter *ncf; struct ncsi_dev_priv *ndp; unsigned int n_vids = 0; struct vlan_vid *vlan; @@ -1420,7 +1423,6 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) } ndp = TO_NCSI_DEV_PRIV(nd); - ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN]; /* Add the VLAN id to our internal list */ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { @@ -1431,12 +1433,11 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) return 0; } } - - if (n_vids >= ncf->total) { - netdev_info(dev, - "NCSI Channel supports up to %u VLAN tags but %u are already set\n", - ncf->total, n_vids); - return -EINVAL; + if (n_vids >= NCSI_MAX_VLAN_VIDS) { + netdev_warn(dev, + "tried to add vlan id %u but NCSI max already registered (%u)\n", + vid, NCSI_MAX_VLAN_VIDS); + return -ENOSPC; } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 9d25e3cc83d731ae4eeb017fd07562fde3f80bef Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 9 Oct 2017 13:40:23 +0200 Subject: iommu/exynos: Remove initconst attribute to avoid potential kernel oops Exynos SYSMMU registers standard platform device with sysmmu_of_match table, what means that this table is accessed every time a new platform device is registered in a system. This might happen also after the boot, so the table must not be attributed as initconst to avoid potential kernel oops caused by access to freed memory. Fixes: 6b21a5db3642 ("iommu/exynos: Support for device tree") Signed-off-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index f596fcc32898..25c2c75f5332 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -709,7 +709,7 @@ static const struct dev_pm_ops sysmmu_pm_ops = { pm_runtime_force_resume) }; -static const struct of_device_id sysmmu_of_match[] __initconst = { +static const struct of_device_id sysmmu_of_match[] = { { .compatible = "samsung,exynos-sysmmu", }, { }, }; -- cgit v1.2.3-70-g09d2 From c0368e4db4a3e8a3dce40f3f621c06e14c560d79 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 11 Oct 2017 14:59:22 -0700 Subject: spi: bcm-qspi: Fix use after free in bcm_qspi_probe() in error path There was an inversion in how the error path in bcm_qspi_probe() is done which would make us trip over a KASAN use-after-free report. Turns out that qspi->dev_ids does not get allocated until later in the probe process. Fix this by introducing a new lable: qspi_resource_err which takes care of cleaning up the SPI master instance. Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Florian Fainelli Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-bcm-qspi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6ef6c44f39f5..a172ab299e80 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1250,7 +1250,7 @@ int bcm_qspi_probe(struct platform_device *pdev, goto qspi_probe_err; } } else { - goto qspi_probe_err; + goto qspi_resource_err; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi"); @@ -1272,7 +1272,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi->base[CHIP_SELECT] = devm_ioremap_resource(dev, res); if (IS_ERR(qspi->base[CHIP_SELECT])) { ret = PTR_ERR(qspi->base[CHIP_SELECT]); - goto qspi_probe_err; + goto qspi_resource_err; } } @@ -1280,7 +1280,7 @@ int bcm_qspi_probe(struct platform_device *pdev, GFP_KERNEL); if (!qspi->dev_ids) { ret = -ENOMEM; - goto qspi_probe_err; + goto qspi_resource_err; } for (val = 0; val < num_irqs; val++) { @@ -1369,8 +1369,9 @@ qspi_reg_err: bcm_qspi_hw_uninit(qspi); clk_disable_unprepare(qspi->clk); qspi_probe_err: - spi_master_put(master); kfree(qspi->dev_ids); +qspi_resource_err: + spi_master_put(master); return ret; } /* probe function to be called by SoC specific platform driver probe */ -- cgit v1.2.3-70-g09d2 From 195320fd6e9946a0aedeb2fd0e1ac47aa5dc81c4 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 1 Oct 2017 02:06:27 +0100 Subject: ARM: 8700/1: nommu: always reserve address 0 away Some nommu systems have RAM at address 0. When vectors are not located there, the very beginning of memory remains available for dynamic allocations. The memblock allocator explicitly skips the first page but the standard page allocator does not, and while it correctly returns a non-null struct page pointer for that page, page_address() gives 0 which gets confused with NULL (out of memory) by callers despite having plenty of free memory left. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/nommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 3b8e728cc944..91537d90f5f5 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -344,6 +344,11 @@ void __init arm_mm_memblock_reserve(void) * reserved here. */ #endif + /* + * In any case, always ensure address 0 is never used as many things + * get very confused if 0 is returned as a legitimate address. + */ + memblock_reserve(0, 1); } void __init adjust_lowmem_bounds(void) -- cgit v1.2.3-70-g09d2 From 6042b8c7c08cad7a8bdc0456c619ae941962b40a Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck II Date: Mon, 2 Oct 2017 20:33:35 +0100 Subject: ARM: 8701/1: fix sparse flags for build on 64bit machines By default sparse uses the characteristics of the build machine to infer things like the wordsize. This is fine when doing native builds but for ARM it's, I suspect, very rarely the case and if the build are done on a 64bit machine we get a bunch of warnings like: 'cast truncates bits from constant value (... becomes ...)' Fix this by adding the -m32 flags for sparse. Reported-by: Stephen Boyd Signed-off-by: Luc Van Oostenryck Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 47d3a1ab08d2..817e5cfef83a 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -131,7 +131,7 @@ endif KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float -CHECKFLAGS += -D__arm__ +CHECKFLAGS += -D__arm__ -m32 #Default value head-y := arch/arm/kernel/head$(MMUEXT).o -- cgit v1.2.3-70-g09d2 From ee3eaee6a1dafb7ed7213ec2fad22552b4d58ed1 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 6 Oct 2017 19:39:57 +0100 Subject: ARM: 8704/1: semihosting: use proper instruction on v7m processors The svc instruction doesn't exist on v7m processors. Semihosting ops are invoked with the bkpt instruction instead. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/debug.S | 4 ++++ arch/arm/kernel/debug.S | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm/boot/compressed/debug.S b/arch/arm/boot/compressed/debug.S index 5392ee63338f..8f6e37177de1 100644 --- a/arch/arm/boot/compressed/debug.S +++ b/arch/arm/boot/compressed/debug.S @@ -23,7 +23,11 @@ ENTRY(putc) strb r0, [r1] mov r0, #0x03 @ SYS_WRITEC ARM( svc #0x123456 ) +#ifdef CONFIG_CPU_V7M + THUMB( bkpt #0xab ) +#else THUMB( svc #0xab ) +#endif mov pc, lr .align 2 1: .word _GLOBAL_OFFSET_TABLE_ - . diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index ea9646cc2a0e..0a498cb3fad8 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -115,7 +115,11 @@ ENTRY(printascii) mov r1, r0 mov r0, #0x04 @ SYS_WRITE0 ARM( svc #0x123456 ) +#ifdef CONFIG_CPU_V7M + THUMB( bkpt #0xab ) +#else THUMB( svc #0xab ) +#endif ret lr ENDPROC(printascii) @@ -124,7 +128,11 @@ ENTRY(printch) strb r0, [r1] mov r0, #0x03 @ SYS_WRITEC ARM( svc #0x123456 ) +#ifdef CONFIG_CPU_V7M + THUMB( bkpt #0xab ) +#else THUMB( svc #0xab ) +#endif ret lr ENDPROC(printch) -- cgit v1.2.3-70-g09d2 From 8eb3f87d903168bdbd1222776a6b1e281f50513e Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 10 Oct 2017 15:01:22 +0800 Subject: KVM: nVMX: fix guest CR4 loading when emulating L2 to L1 exit When KVM emulates an exit from L2 to L1, it loads L1 CR4 into the guest CR4. Before this CR4 loading, the guest CR4 refers to L2 CR4. Because these two CR4's are in different levels of guest, we should vmx_set_cr4() rather than kvm_set_cr4() here. The latter, which is used to handle guest writes to its CR4, checks the guest change to CR4 and may fail if the change is invalid. The failure may cause trouble. Consider we start a L1 guest with non-zero L1 PCID in use, (i.e. L1 CR4.PCIDE == 1 && L1 CR3.PCID != 0) and a L2 guest with L2 PCID disabled, (i.e. L2 CR4.PCIDE == 0) and following events may happen: 1. If kvm_set_cr4() is used in load_vmcs12_host_state() to load L1 CR4 into guest CR4 (in VMCS01) for L2 to L1 exit, it will fail because of PCID check. As a result, the guest CR4 recorded in L0 KVM (i.e. vcpu->arch.cr4) is left to the value of L2 CR4. 2. Later, if L1 attempts to change its CR4, e.g., clearing VMXE bit, kvm_set_cr4() in L0 KVM will think L1 also wants to enable PCID, because the wrong L2 CR4 is used by L0 KVM as L1 CR4. As L1 CR3.PCID != 0, L0 KVM will inject GP to L1 guest. Fixes: 4704d0befb072 ("KVM: nVMX: Exiting from L2 to L1") Cc: qemu-stable@nongnu.org Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a2b804e10c95..95a01609d7ee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11297,7 +11297,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, /* Same as above - no reason to call set_cr4_guest_host_mask(). */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); - kvm_set_cr4(vcpu, vmcs12->host_cr4); + vmx_set_cr4(vcpu, vmcs12->host_cr4); nested_ept_uninit_mmu_context(vcpu); -- cgit v1.2.3-70-g09d2 From 8a60aea62100c79fb5d151a1e261f11534c3dbff Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 9 Oct 2017 16:34:01 +0200 Subject: MAINTAINERS: Add Paul Mackerras as maintainer for KVM/powerpc Paul is handling almost all of the powerpc related KVM patches nowadays, so he should be mentioned in the MAINTAINERS file accordingly. Signed-off-by: Thomas Huth Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2d3d750b19c0..6457e5163f93 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7571,7 +7571,7 @@ F: arch/mips/include/asm/kvm* F: arch/mips/kvm/ KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) -M: Alexander Graf +M: Paul Mackerras L: kvm-ppc@vger.kernel.org W: http://www.linux-kvm.org/ T: git git://github.com/agraf/linux-2.6.git -- cgit v1.2.3-70-g09d2 From 0d923820c6db1644c27c2d0a5af8920fc0f8cd81 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 4 Oct 2017 12:20:52 +0530 Subject: powerpc/perf: Fix for core/nest imc call trace on cpuhotplug Nest/core pmu units are enabled only when it is used. A reference count is maintained for the events which uses the nest/core pmu units. Currently in *_imc_counters_release function a WARN() is used for notification of any underflow of ref count. The case where event ref count hit a negative value is, when perf session is started, followed by offlining of all cpus in a given core. i.e. in cpuhotplug offline path ppc_core_imc_cpu_offline() function set the ref->count to zero, if the current cpu which is about to offline is the last cpu in a given core and make an OPAL call to disable the engine in that core. And on perf session termination, perf->destroy (core_imc_counters_release) will first decrement the ref->count for this core and based on the ref->count value an opal call is made to disable the core-imc engine. Now, since cpuhotplug path already clears the ref->count for core and disabled the engine, perf->destroy() decrementing again at event termination make it negative which in turn fires the WARN_ON. The same happens for nest units. Add a check to see if the reference count is alreday zero, before decrementing the count, so that the ref count will not hit a negative value. Signed-off-by: Anju T Sudhakar Reviewed-by: Santosh Sivaraj Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ccac86f3463..e3a1f65933b5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -399,6 +399,20 @@ static void nest_imc_counters_release(struct perf_event *event) /* Take the mutex lock for this node and then decrement the reference count */ mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given node. + * + * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given node and make + * an OPAL call to disable the engine in that node. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, @@ -646,6 +660,20 @@ static void core_imc_counters_release(struct perf_event *event) return; mutex_lock(&ref->lock); + if (ref->refc == 0) { + /* + * The scenario where this is true is, when perf session is + * started, followed by offlining of all cpus in a given core. + * + * In the cpuhotplug offline path, ppc_core_imc_cpu_offline() + * function set the ref->count to zero, if the cpu which is + * about to offline is the last cpu in a given core and make + * an OPAL call to disable the engine in that core. + * + */ + mutex_unlock(&ref->lock); + return; + } ref->refc--; if (ref->refc == 0) { rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, -- cgit v1.2.3-70-g09d2 From cd4f2b30e5ef7d4bde61eb515372d96e8aec1690 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Wed, 11 Oct 2017 18:27:39 +0530 Subject: powerpc/perf: Add ___GFP_NOWARN flag to alloc_pages_node() Stack trace output during a stress test: [ 4.310049] Freeing initrd memory: 22592K [ 4.310646] rtas_flash: no firmware flash support [ 4.313341] cpuhp/64: page allocation failure: order:0, mode:0x14480c0(GFP_KERNEL|__GFP_ZERO|__GFP_THISNODE), nodemask=(null) [ 4.313465] cpuhp/64 cpuset=/ mems_allowed=0 [ 4.313521] CPU: 64 PID: 392 Comm: cpuhp/64 Not tainted 4.11.0-39.el7a.ppc64le #1 [ 4.313588] Call Trace: [ 4.313622] [c000000f1fb1b8e0] [c000000000c09388] dump_stack+0xb0/0xf0 (unreliable) [ 4.313694] [c000000f1fb1b920] [c00000000030ef6c] warn_alloc+0x12c/0x1c0 [ 4.313753] [c000000f1fb1b9c0] [c00000000030ff68] __alloc_pages_nodemask+0xea8/0x1000 [ 4.313823] [c000000f1fb1bbb0] [c000000000113a8c] core_imc_mem_init+0xbc/0x1c0 [ 4.313892] [c000000f1fb1bc00] [c000000000113cdc] ppc_core_imc_cpu_online+0x14c/0x170 [ 4.313962] [c000000f1fb1bc90] [c000000000125758] cpuhp_invoke_callback+0x198/0x5d0 [ 4.314031] [c000000f1fb1bd00] [c00000000012782c] cpuhp_thread_fun+0x8c/0x3d0 [ 4.314101] [c000000f1fb1bd60] [c0000000001678d0] smpboot_thread_fn+0x290/0x2a0 [ 4.314169] [c000000f1fb1bdc0] [c00000000015ee78] kthread+0x168/0x1b0 [ 4.314229] [c000000f1fb1be30] [c00000000000b368] ret_from_kernel_thread+0x5c/0x74 [ 4.314313] Mem-Info: [ 4.314356] active_anon:0 inactive_anon:0 isolated_anon:0 core_imc_mem_init() at system boot use alloc_pages_node() to get memory and alloc_pages_node() throws this stack dump when tried to allocate memory from a node which has no memory behind it. Add a ___GFP_NOWARN flag in allocation request as a fix. Signed-off-by: Anju T Sudhakar Reported-by: Michael Ellerman Reported-by: Venkat R.B Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index e3a1f65933b5..39a0203fd8a5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -537,8 +537,8 @@ static int core_imc_mem_init(int cpu, int size) /* We need only vbase for core counters */ mem_info->vbase = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!mem_info->vbase) return -ENOMEM; @@ -791,8 +791,8 @@ static int thread_imc_mem_alloc(int cpu_id, int size) * free the memory in cpu offline path. */ local_mem = page_address(alloc_pages_node(phys_id, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - get_order(size))); + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); if (!local_mem) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 2bbbd96357ce76cc45ec722c00f654aa7b189112 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 28 Aug 2017 17:25:16 +0200 Subject: bus: mbus: fix window size calculation for 4GB windows At least the Armada XP SoC supports 4GB on a single DRAM window. Because the size register values contain the actual size - 1, the MSB is set in that case. For example, the SDRAM window's control register's value is 0xffffffe1 for 4GB (bits 31 to 24 contain the size). The MBUS driver reads back each window's size from registers and calculates the actual size as (control_reg | ~DDR_SIZE_MASK) + 1, which overflows for 32 bit values, resulting in other miscalculations further on (a bad RAM window for the CESA crypto engine calculated by mvebu_mbus_setup_cpu_target_nooverlap() in my case). This patch changes the type in 'struct mbus_dram_window' from u32 to u64, which allows us to keep using the same register calculation code in most MBUS-using drivers (which calculate ->size - 1 again). Fixes: fddddb52a6c4 ("bus: introduce an Marvell EBU MBus driver") CC: stable@vger.kernel.org Signed-off-by: Jan Luebbe Signed-off-by: Gregory CLEMENT --- drivers/bus/mvebu-mbus.c | 2 +- include/linux/mbus.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index c7f396903184..70db4d5638a6 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -720,7 +720,7 @@ mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus) if (mbus->hw_io_coherency) w->mbus_attr |= ATTR_HW_COHERENCY; w->base = base & DDR_BASE_CS_LOW_MASK; - w->size = (size | ~DDR_SIZE_MASK) + 1; + w->size = (u64)(size | ~DDR_SIZE_MASK) + 1; } } mvebu_mbus_dram_info.num_cs = cs; diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 0d3f14fd2621..4773145246ed 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -31,8 +31,8 @@ struct mbus_dram_target_info struct mbus_dram_window { u8 cs_index; u8 mbus_attr; - u32 base; - u32 size; + u64 base; + u64 size; } cs[4]; }; -- cgit v1.2.3-70-g09d2 From 27b94b4f1386c3a8181f5a0277434a32e24e7dd7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 1 Sep 2017 09:22:56 +0200 Subject: drm/amdgpu: fix placement flags in amdgpu_ttm_bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we lose the NO_EVICT flag and can try to evict pinned BOs. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7ef6c28a34d9..bc746131987f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -834,7 +834,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; - placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.flags = bo->mem.placement | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); if (unlikely(r)) -- cgit v1.2.3-70-g09d2 From 13923d0865ca96312197962522e88bc0aedccd74 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:37:49 -0700 Subject: KEYS: encrypted: fix dereference of NULL user_key_payload A key of type "encrypted" references a "master key" which is used to encrypt and decrypt the encrypted key's payload. However, when we accessed the master key's payload, we failed to handle the case where the master key has been revoked, which sets the payload pointer to NULL. Note that request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire its semaphore. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. This was an issue for master keys of type "user" only. Master keys can also be of type "trusted", but those cannot be revoked. Fixes: 7e70cb497850 ("keys: add new key-type encrypted") Reviewed-by: James Morris Cc: [v2.6.38+] Cc: Mimi Zohar Cc: David Safford Signed-off-by: Eric Biggers Signed-off-by: David Howells --- security/keys/encrypted-keys/encrypted.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 69855ba0d3b3..535db141f4da 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -309,6 +309,13 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k down_read(&ukey->sem); upayload = user_key_payload_locked(ukey); + if (!upayload) { + /* key was revoked before we acquired its semaphore */ + up_read(&ukey->sem); + key_put(ukey); + ukey = ERR_PTR(-EKEYREVOKED); + goto error; + } *master_key = upayload->data; *master_keylen = upayload->datalen; error: -- cgit v1.2.3-70-g09d2 From aa3c2ba1c3a7c25d0440a8ac3ddd266c0f43b7b7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 12 Oct 2017 08:37:45 -0400 Subject: drm/msm/mdp5: add missing max size for 8x74 v1 This should have same max width as v2. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index c2bdad88447e..824067d2d427 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -83,6 +83,8 @@ const struct mdp5_cfg_hw msm8x74v1_config = { .caps = MDP_LM_CAP_WB }, }, .nb_stages = 5, + .max_width = 2048, + .max_height = 0xFFFF, }, .dspp = { .count = 3, -- cgit v1.2.3-70-g09d2 From f44001e2637138d9d506efe8da67011f8170e860 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 2 Oct 2017 10:28:37 -0400 Subject: drm/msm: use proper memory barriers for updating tail/head Fixes intermittent corruption of cmdstream dump. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_rd.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 0366b8092f97..ec56794ad039 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -111,10 +111,14 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + /* Note that smp_load_acquire() is not strictly required + * as CIRC_SPACE_TO_END() does not access the tail more + * than once. + */ n = min(sz, circ_space_to_end(&rd->fifo)); memcpy(fptr, ptr, n); - fifo->head = (fifo->head + n) & (BUF_SZ - 1); + smp_store_release(&fifo->head, (fifo->head + n) & (BUF_SZ - 1)); sz -= n; ptr += n; @@ -145,13 +149,17 @@ static ssize_t rd_read(struct file *file, char __user *buf, if (ret) goto out; + /* Note that smp_load_acquire() is not strictly required + * as CIRC_CNT_TO_END() does not access the head more than + * once. + */ n = min_t(int, sz, circ_count_to_end(&rd->fifo)); if (copy_to_user(buf, fptr, n)) { ret = -EFAULT; goto out; } - fifo->tail = (fifo->tail + n) & (BUF_SZ - 1); + smp_store_release(&fifo->tail, (fifo->tail + n) & (BUF_SZ - 1)); *ppos += n; wake_up_all(&rd->fifo_event); -- cgit v1.2.3-70-g09d2 From c9811d0fa55929b182f62e0ee49b71b0bea6a936 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 11 Oct 2017 11:36:56 +0000 Subject: drm/msm: fix return value check in _msm_gem_kernel_new() In case of error, the function msm_gem_get_vaddr() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 8223286d62e2 ("drm/msm: Add a helper function for in-kernel buffer allocations") Signed-off-by: Wei Yongjun Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index f15821a0d900..0b338fbf97ce 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1045,10 +1045,10 @@ static void *_msm_gem_kernel_new(struct drm_device *dev, uint32_t size, } vaddr = msm_gem_get_vaddr(obj); - if (!vaddr) { + if (IS_ERR(vaddr)) { msm_gem_put_iova(obj, aspace); drm_gem_object_unreference(obj); - return ERR_PTR(-ENOMEM); + return ERR_CAST(vaddr); } if (bo) -- cgit v1.2.3-70-g09d2 From cc6afe2240298049585e86b1ade85efc8a7f225d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Oct 2017 12:12:57 +0200 Subject: x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors Commit 594a30fb1242 ("x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on CPUs without the feature", 2017-08-30) was also about silencing the warning on VirtualBox; however, KVM does expose the TSC deadline timer, and it's virtualized so that it is immune from CPU errata. Therefore, booting 4.13 with "-cpu Haswell" shows this in the logs: [ 0.000000] [Firmware Bug]: TSC_DEADLINE disabled due to Errata; please update microcode to version: 0xb2 (or later) Even if you had a hypervisor that does _not_ virtualize the TSC deadline and rather exposes the hardware one, it should be the hypervisors task to update microcode and possibly hide the flag from CPUID. So just hide the message when running on _any_ hypervisor, not just those that do not support the TSC deadline timer. The older check still makes sense, so keep it. Fixes: bd9240a18e ("x86/apic: Add TSC_DEADLINE quirk due to errata") Signed-off-by: Paolo Bonzini Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Hans de Goede Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1507630377-54471-1-git-send-email-pbonzini@redhat.com --- arch/x86/kernel/apic/apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d705c769f77d..50109eae8cd7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -600,7 +600,8 @@ static void apic_check_deadline_errata(void) const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || + boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; m = x86_match_cpu(deadline_match); -- cgit v1.2.3-70-g09d2 From 616dd5872e52493863b0202632703eebd51243dc Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 11 Oct 2017 17:16:04 -0400 Subject: x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping SKX stepping-3 fixed the TSC_DEADLINE issue in a different ucode version number than stepping-4. Linux needs to know this stepping-3 specific version number to also enable the TSC_DEADLINE on stepping-3. The steppings and ucode versions are documented in the SKX BIOS update: https://downloadmirror.intel.com/26978/eng/ReleaseNotes_R00.01.0004.txt Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Link: https://lkml.kernel.org/r/60f2bbf7cf617e212b522e663f84225bfebc50e5.1507756305.git.len.brown@intel.com --- arch/x86/kernel/apic/apic.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 50109eae8cd7..ff891772c9f8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void) return ~0U; } +static u32 skx_deadline_rev(void) +{ + switch (boot_cpu_data.x86_mask) { + case 0x03: return 0x01000136; + case 0x04: return 0x02000014; + } + + return ~0U; +} + static const struct x86_cpu_id deadline_match[] = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev), - DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014), + DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20), -- cgit v1.2.3-70-g09d2 From d124b2c53c7bee6569d2a2d0b18b4a1afde00134 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:40:00 -0700 Subject: FS-Cache: fix dereference of NULL user_key_payload When the file /proc/fs/fscache/objects (available with CONFIG_FSCACHE_OBJECT_LIST=y) is opened, we request a user key with description "fscache:objlist", then access its payload. However, a revoked key has a NULL payload, and we failed to check for this. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we access its payload. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. Fixes: 4fbf4291aa15 ("FS-Cache: Allow the current state of all objects to be dumped") Reviewed-by: James Morris Cc: [v2.6.32+] Signed-off-by: Eric Biggers Signed-off-by: David Howells --- fs/fscache/object-list.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index b5ab06fabc60..0438d4cd91ef 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -331,6 +331,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data) rcu_read_lock(); confkey = user_key_payload_rcu(key); + if (!confkey) { + /* key was revoked */ + rcu_read_unlock(); + key_put(key); + goto no_config; + } + buf = confkey->data; for (len = confkey->datalen - 1; len >= 0; len--) { -- cgit v1.2.3-70-g09d2 From 192cabd6a296cbc57b3d8c05c4c89d87fc102506 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:43:20 -0700 Subject: lib/digsig: fix dereference of NULL user_key_payload digsig_verify() requests a user key, then accesses its payload. However, a revoked key has a NULL payload, and we failed to check for this. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire its semaphore. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. Fixes: 051dbb918c7f ("crypto: digital signature verification support") Reviewed-by: James Morris Cc: [v3.3+] Cc: Dmitry Kasatkin Signed-off-by: Eric Biggers Signed-off-by: David Howells --- lib/digsig.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/digsig.c b/lib/digsig.c index 03d7c63837ae..6ba6fcd92dd1 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -87,6 +87,12 @@ static int digsig_verify_rsa(struct key *key, down_read(&key->sem); ukp = user_key_payload_locked(key); + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + err = -EKEYREVOKED; + goto err1; + } + if (ukp->datalen < sizeof(*pkh)) goto err1; -- cgit v1.2.3-70-g09d2 From d60b5b7854c3d135b869f74fb93eaf63cbb1991a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:46:18 -0700 Subject: fscrypt: fix dereference of NULL user_key_payload When an fscrypt-encrypted file is opened, we request the file's master key from the keyrings service as a logon key, then access its payload. However, a revoked key has a NULL payload, and we failed to check for this. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire its semaphore. Fix it by checking for a NULL payload, treating it like a key which was already revoked at the time it was requested. Fixes: 88bd6ccdcdd6 ("ext4 crypto: add encryption key management facilities") Reviewed-by: James Morris Cc: [v4.1+] Signed-off-by: Eric Biggers Signed-off-by: David Howells --- fs/crypto/keyinfo.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 018c588c7ac3..8e704d12a1cf 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -109,6 +109,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info, goto out; } ukp = user_key_payload_locked(keyring_key); + if (!ukp) { + /* key was revoked before we acquired its semaphore */ + res = -EKEYREVOKED; + goto out; + } if (ukp->datalen != sizeof(struct fscrypt_key)) { res = -EINVAL; goto out; -- cgit v1.2.3-70-g09d2 From f66665c09ab489a11ca490d6a82df57cfc1bea3e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:51:27 -0700 Subject: ecryptfs: fix dereference of NULL user_key_payload In eCryptfs, we failed to verify that the authentication token keys are not revoked before dereferencing their payloads, which is problematic because the payload of a revoked key is NULL. request_key() *does* skip revoked keys, but there is still a window where the key can be revoked before we acquire the key semaphore. Fix it by updating ecryptfs_get_key_payload_data() to return -EKEYREVOKED if the key payload is NULL. For completeness we check this for "encrypted" keys as well as "user" keys, although encrypted keys cannot be revoked currently. Alternatively we could use key_validate(), but since we'll also need to fix ecryptfs_get_key_payload_data() to validate the payload length, it seems appropriate to just check the payload pointer. Fixes: 237fead61998 ("[PATCH] ecryptfs: fs/Makefile and fs/Kconfig") Reviewed-by: James Morris Cc: [v2.6.19+] Cc: Michael Halcrow Signed-off-by: Eric Biggers Signed-off-by: David Howells --- fs/ecryptfs/ecryptfs_kernel.h | 24 +++++++++++++++++------- fs/ecryptfs/keystore.c | 9 ++++++++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 9c351bf757b2..3fbc0ff79699 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -84,11 +84,16 @@ struct ecryptfs_page_crypt_context { static inline struct ecryptfs_auth_tok * ecryptfs_get_encrypted_key_payload_data(struct key *key) { - if (key->type == &key_type_encrypted) - return (struct ecryptfs_auth_tok *) - (&((struct encrypted_key_payload *)key->payload.data[0])->payload_data); - else + struct encrypted_key_payload *payload; + + if (key->type != &key_type_encrypted) return NULL; + + payload = key->payload.data[0]; + if (!payload) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)payload->payload_data; } static inline struct key *ecryptfs_get_encrypted_key(char *sig) @@ -114,12 +119,17 @@ static inline struct ecryptfs_auth_tok * ecryptfs_get_key_payload_data(struct key *key) { struct ecryptfs_auth_tok *auth_tok; + struct user_key_payload *ukp; auth_tok = ecryptfs_get_encrypted_key_payload_data(key); - if (!auth_tok) - return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data; - else + if (auth_tok) return auth_tok; + + ukp = user_key_payload_locked(key); + if (!ukp) + return ERR_PTR(-EKEYREVOKED); + + return (struct ecryptfs_auth_tok *)ukp->data; } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3cf1546dca82..fa218cd64f74 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -459,7 +459,8 @@ out: * @auth_tok_key: key containing the authentication token * @auth_tok: authentication token * - * Returns zero on valid auth tok; -EINVAL otherwise + * Returns zero on valid auth tok; -EINVAL if the payload is invalid; or + * -EKEYREVOKED if the key was revoked before we acquired its semaphore. */ static int ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, @@ -468,6 +469,12 @@ ecryptfs_verify_auth_tok_from_key(struct key *auth_tok_key, int rc = 0; (*auth_tok) = ecryptfs_get_key_payload_data(auth_tok_key); + if (IS_ERR(*auth_tok)) { + rc = PTR_ERR(*auth_tok); + *auth_tok = NULL; + goto out; + } + if (ecryptfs_verify_version((*auth_tok)->version)) { printk(KERN_ERR "Data structure version mismatch. Userspace " "tools must match eCryptfs kernel module with major " -- cgit v1.2.3-70-g09d2 From a18a0ea0096833ecb52053b183fcf9709f7bafd8 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 6 Oct 2017 16:27:06 +0530 Subject: drm/msm/dsi: Use correct pm_runtime_put variant during host_init The DSI runtime PM suspend/resume callbacks check whether msm_host->cfg_hnd is non-NULL before trying to enable the bus clocks. This is done to accommodate early calls to these functions that may happen before the bus clocks are even initialized. Calling pm_runtime_put_autosuspend() in dsi_host_init() can result in racy behaviour since msm_host->cfg_hnd is set very soon after. If the suspend callback happens too late, we end up trying to disable clocks that were never enabled, resulting in a bunch of WARN_ON splats. Use pm_runtime_put_sync() so that the suspend callback is called immediately. Reported-by: Nicolas Dechesne Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index dbb31a014419..deaf869374ea 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -248,7 +248,7 @@ disable_clks: clk_disable_unprepare(ahb_clk); disable_gdsc: regulator_disable(gdsc_reg); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); put_clk: clk_put(ahb_clk); put_gdsc: -- cgit v1.2.3-70-g09d2 From 9e4621531e2af230611c28c67306a31e1a09f76a Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 6 Oct 2017 16:27:07 +0530 Subject: drm/msm/mdp5: Remove extra pm_runtime_put call in mdp5_crtc_cursor_set() While converting mdp5_enable/disable() calls to pm_runtime_get/put() API, an extra call to pm_runtime_put_autosuspend() crept in mdp5_crtc_cursor_set(). This results in calling the suspend handler twice, and therefore clk_disables twice, which isn't a nice thing to do. Fixes: d68fe15b1878 (drm/msm/mdp5: Use runtime PM get/put API instead ...) Reported-by: Stanimir Varbanov Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 6fcb58ab718c..440977677001 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -804,8 +804,6 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); - pm_runtime_put_autosuspend(&pdev->dev); - set_cursor: ret = mdp5_ctl_set_cursor(ctl, pipeline, 0, cursor_enable); if (ret) { -- cgit v1.2.3-70-g09d2 From db179e0d0d1003f10b798e072524be6bcdae5053 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Sep 2017 15:25:10 +0900 Subject: of: do not leak console options Do not strdup() console options. It seems that the only reason for it to be strdup()-ed was a compilation warning: printk, UART and console drivers, for some reason, expect char pointer instead of const char pointer. So we can just pass `of_stdout_options', but need to cast it to char pointer. A better fix would be to change printk, console drivers and UART to accept const char `options'; but that will take time - there are lots of drivers to update. The patch also fixes a possible memory leak: add_preferred_console() can fail, but we don't kfree() options. Signed-off-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Signed-off-by: Rob Herring --- drivers/of/base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 260d33c0f26c..63897531cd75 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1781,8 +1781,12 @@ bool of_console_check(struct device_node *dn, char *name, int index) { if (!dn || dn != of_stdout || console_set_on_cmdline) return false; - return !add_preferred_console(name, index, - kstrdup(of_stdout_options, GFP_KERNEL)); + + /* + * XXX: cast `options' to char pointer to suppress complication + * warnings: printk, UART and console drivers expect char pointer. + */ + return !add_preferred_console(name, index, (char *)of_stdout_options); } EXPORT_SYMBOL_GPL(of_console_check); -- cgit v1.2.3-70-g09d2 From 22f8cc6e33731678e7687a18ffd0f578131edb4c Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Tue, 26 Sep 2017 18:40:00 +1000 Subject: drivers: of: increase MAX_RESERVED_REGIONS to 32 There are two types of memory reservations firmware can ask the kernel to make in the device tree: static and dynamic. See Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt If you have greater than 16 entries in /reserved-memory (as we do on POWER9 systems) you would get this scary looking error message: [ 0.000000] OF: reserved mem: not enough space all defined regions. This is harmless if all your reservations are static (which with OPAL on POWER9, they are). It is not harmless if you have any dynamic reservations after the 16th. In the first pass over the fdt to find reservations, the child nodes of /reserved-memory are added to a static array in of_reserved_mem.c so that memory can be reserved in a 2nd pass. The array has 16 entries. This is why, on my dual socket POWER9 system, I get that error 4 times with 20 static reservations. We don't have a problem on ppc though, as in arch/powerpc/kernel/prom.c we look at the new style /reserved-ranges property to do reservations, and this logic was introduced in 0962e8004e974 (well before any powernv system shipped). A Google search shows up no occurances of that exact error message, so we're probably safe in that no machine that people use has memory not being reserved when it should be. The simple fix is to bump the length of the array to 32 which "should be enough for everyone(TM)". The simple fix of not recording static allocations in the array would cause problems for devices with "memory-region" properties. A more future-proof fix is likely possible, although more invasive and this simple fix is perfectly suitable in the meantime while a more future-proof fix is developed. Signed-off-by: Stewart Smith Tested-by: Mauricio Faria de Oliveira Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index d507c3569a88..32771c2ced7b 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -25,7 +25,7 @@ #include #include -#define MAX_RESERVED_REGIONS 16 +#define MAX_RESERVED_REGIONS 32 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; static int reserved_mem_count; -- cgit v1.2.3-70-g09d2 From 3314c6bdd26880e0dfbcb0cb85a1b36d185ce47c Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Tue, 22 Aug 2017 02:19:12 +0200 Subject: device property: preserve usecount for node passed to of_fwnode_graph_get_port_parent() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using CONFIG_OF_DYNAMIC=y uncovered an imbalance in the usecount of the node being passed to of_fwnode_graph_get_port_parent(). Preserve the usecount by using of_get_parent() instead of of_get_next_parent() which don't decrement the usecount of the node passed to it. Fixes: 3b27d00e7b6d7c88 ("device property: Move fwnode graph ops to firmware specific locations") Signed-off-by: Niklas Söderlund Acked-by: Sakari Ailus Signed-off-by: Rob Herring --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index fbb72116e9d4..264c355ba1ff 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -954,7 +954,7 @@ of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) struct device_node *np; /* Get the parent of the port */ - np = of_get_next_parent(to_of_node(fwnode)); + np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; -- cgit v1.2.3-70-g09d2 From 6bd6ae2dfc7e091059fd8a650579bb1efc9b4b9f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 24 Aug 2017 14:24:29 -0400 Subject: drm/msm: fix error path cleanup If we fail to attach iommu, gpu->aspace could be IS_ERR().. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ffbff27600e0..6a887032c66a 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -718,7 +718,8 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_gem_put_iova(gpu->rb->bo, gpu->aspace); msm_ringbuffer_destroy(gpu->rb); } - if (gpu->aspace) { + + if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, NULL, 0); msm_gem_address_space_put(gpu->aspace); -- cgit v1.2.3-70-g09d2 From 06451a3d1d777141dedfa947649cbb0c594ac3af Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 12 Sep 2017 14:23:05 -0400 Subject: drm/msm: fix _NO_IMPLICIT fencing case We need to call reservation_object_reserve_shared() in both cases, but this wasn't happening in the _NO_IMPLICIT submit case. Fixes: f0a42bb ("drm/msm: submit support for in-fences") Reported-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 11 ----------- drivers/gpu/drm/msm/msm_gem_submit.c | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 0b338fbf97ce..ea5bb0e1632c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -610,17 +610,6 @@ int msm_gem_sync_object(struct drm_gem_object *obj, struct dma_fence *fence; int i, ret; - if (!exclusive) { - /* NOTE: _reserve_shared() must happen before _add_shared_fence(), - * which makes this a slightly strange place to call it. OTOH this - * is a convenient can-fail point to hook it in. (And similar to - * how etnaviv and nouveau handle this.) - */ - ret = reservation_object_reserve_shared(msm_obj->resv); - if (ret) - return ret; - } - fobj = reservation_object_get_list(msm_obj->resv); if (!fobj || (fobj->shared_count == 0)) { fence = reservation_object_get_excl(msm_obj->resv); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 5d0a75d4b249..93535cac0676 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -221,7 +221,7 @@ fail: return ret; } -static int submit_fence_sync(struct msm_gem_submit *submit) +static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) { int i, ret = 0; @@ -229,6 +229,20 @@ static int submit_fence_sync(struct msm_gem_submit *submit) struct msm_gem_object *msm_obj = submit->bos[i].obj; bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE; + if (!write) { + /* NOTE: _reserve_shared() must happen before + * _add_shared_fence(), which makes this a slightly + * strange place to call it. OTOH this is a + * convenient can-fail point to hook it in. + */ + ret = reservation_object_reserve_shared(msm_obj->resv); + if (ret) + return ret; + } + + if (no_implicit) + continue; + ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); if (ret) break; @@ -451,11 +465,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { - ret = submit_fence_sync(submit); - if (ret) - goto out; - } + ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); + if (ret) + goto out; ret = submit_pin_objects(submit); if (ret) -- cgit v1.2.3-70-g09d2 From 2aab9c3ca47dd4fcc19a8743c6e4d348640dd3fa Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 12 Oct 2017 14:22:04 +1100 Subject: scripts: fix faddr2line to work on last symbol If faddr2line is given a function name which is the last one listed by "nm -n", it will fail because it never finds the next symbol. So teach the awk script to catch that possibility, and use 'size' to provide the end point of the last function. Signed-off-by: NeilBrown Signed-off-by: Linus Torvalds --- scripts/faddr2line | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 29df825d375c..2f6ce802397d 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -103,11 +103,12 @@ __faddr2line() { # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates. + file_end=$(size -Ax $objfile | awk '$1 == ".text" {print $2}') while read symbol; do local fields=($symbol) local sym_base=0x${fields[0]} local sym_type=${fields[1]} - local sym_end=0x${fields[3]} + local sym_end=${fields[3]} # calculate the size local sym_size=$(($sym_end - $sym_base)) @@ -157,7 +158,7 @@ __faddr2line() { addr2line -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 - done < <(nm -n $objfile | awk -v fn=$func '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, $1 }') + done < <(nm -n $objfile | awk -v fn=$func -v end=$file_end '$3 == fn { found=1; line=$0; start=$1; next } found == 1 { found=0; print line, "0x"$1 } END {if (found == 1) print line, end; }') } [[ $# -lt 2 ]] && usage -- cgit v1.2.3-70-g09d2 From 12ed3772b7e11b53a58ecbd8ce258271fb148cc6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 11 Oct 2017 20:10:31 -0700 Subject: ip: update policy routing config help The kernel config help for policy routing was still pointing at an ancient document from 2000 that refers to Linux 2.1. Update it to point to something that is at least occasionally updated. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 91a2557942fa..f48fe6fc7e8c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES address into account. Furthermore, the TOS (Type-Of-Service) field of the packet can be used for routing decisions as well. - If you are interested in this, please see the preliminary - documentation at - and . - You will need supporting software from - . + If you need more information, see the Linux Advanced + Routing and Traffic Control documentation at + If unsure, say N. -- cgit v1.2.3-70-g09d2 From 0d8ba16278ec30a262d931875018abee332f926f Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 13 Oct 2017 11:29:41 +0530 Subject: powerpc/perf: Fix IMC initialization crash Panic observed with latest firmware, and upstream kernel: NIP init_imc_pmu+0x8c/0xcf0 LR init_imc_pmu+0x2f8/0xcf0 Call Trace: init_imc_pmu+0x2c8/0xcf0 (unreliable) opal_imc_counters_probe+0x300/0x400 platform_drv_probe+0x64/0x110 driver_probe_device+0x3d8/0x580 __driver_attach+0x14c/0x1a0 bus_for_each_dev+0x8c/0xf0 driver_attach+0x34/0x50 bus_add_driver+0x298/0x350 driver_register+0x9c/0x180 __platform_driver_register+0x5c/0x70 opal_imc_driver_init+0x2c/0x40 do_one_initcall+0x64/0x1d0 kernel_init_freeable+0x280/0x374 kernel_init+0x24/0x160 ret_from_kernel_thread+0x5c/0x74 While registering nest imc at init, cpu-hotplug callback nest_pmu_cpumask_init() makes an OPAL call to stop the engine. And if the OPAL call fails, imc_common_cpuhp_mem_free() is invoked to cleanup memory and cpuhotplug setup. But when cleaning up the attribute group, we are dereferencing the attribute element array without checking whether the backing element is not NULL. This causes the kernel panic. Add a check for the backing element prior to dereferencing the attribute element, to handle the failing case gracefully. Signed-off-by: Anju T Sudhakar Reported-by: Pridhiviraj Paidipeddi [mpe: Trim change log] Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 39a0203fd8a5..88126245881b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1176,7 +1176,8 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) } /* Only free the attr_groups which are dynamically allocated */ - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); return; -- cgit v1.2.3-70-g09d2 From 4a40aedec653bb9e22c01ef4fe0a66278b1a666f Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 3 Oct 2017 15:20:27 +0100 Subject: DT: arm,gic-v3: Update the ITS size in the examples Currently, the examples are using 2MB for the ITS size. Per the specification (section 8.18 in ARM IHI 0069D), the ITS address map is 128KB. Update the examples to match the specification. Signed-off-by: Julien Grall Signed-off-by: Marc Zyngier --- .../devicetree/bindings/interrupt-controller/arm,gic-v3.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt index 4c29cdab0ea5..5eb108e180fa 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt @@ -99,7 +99,7 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c200000 0 0x200000>; + reg = <0x0 0x2c200000 0 0x20000>; }; }; @@ -124,14 +124,14 @@ Examples: compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c200000 0 0x200000>; + reg = <0x0 0x2c200000 0 0x20000>; }; gic-its@2c400000 { compatible = "arm,gic-v3-its"; msi-controller; #msi-cells = <1>; - reg = <0x0 0x2c400000 0 0x200000>; + reg = <0x0 0x2c400000 0 0x20000>; }; ppi-partitions { -- cgit v1.2.3-70-g09d2 From c427a475b6bc9d3304cca04acdec53464f71f24c Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Sat, 23 Sep 2017 13:50:19 -0500 Subject: irqchip/gic-v3-its: Fix the incorrect BUG_ON in its_init_vpe_domain() The driver probe path hits 'BUG_ON(entries != vpe_proxy.dev->nr_ites)' on systems where it has VLPI capability, doesn't support direct LPI feature and boot with a single CPU. Relax the BUG_ON() condition to fix the issue. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e8d89343d613..e3a59f43def8 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2851,7 +2851,7 @@ static int its_init_vpe_domain(void) return -ENOMEM; } - BUG_ON(entries != vpe_proxy.dev->nr_ites); + BUG_ON(entries > vpe_proxy.dev->nr_ites); raw_spin_lock_init(&vpe_proxy.lock); vpe_proxy.next_victim = 0; -- cgit v1.2.3-70-g09d2 From 32bd44dc19de012e22f1fdebd2606b5fb86d54c5 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Sat, 7 Oct 2017 15:43:48 -0500 Subject: irqchip/gic-v3-its: Fix the incorrect parsing of VCPU table size The VCPU table consists of vPE entries, and its size provides the number of VPEs supported by GICv4 hardware. Unfortunately the maximum size of the VPE table is not discoverable like Device table. All VLPI commands limits the number of bits to 16 to hold VPEID, which is index into VCPU table. Don't apply DEVID bits for VCPU table instead assume maximum bits to 16. ITS log messages on QDF2400 without fix: allocated 524288 Devices (indirect, esz 8, psz 64K, shr 1) allocated 8192 Interrupt Collections (flat, esz 8, psz 64K, shr 1) Virtual CPUs Table too large, reduce ids 32->26 Virtual CPUs too large, reduce ITS pages 8192->256 allocated 2097152 Virtual CPUs (flat, esz 8, psz 64K, shr 1) ITS log messages on QDF2400 with fix: allocated 524288 Devices (indirect, esz 8, psz 64K, shr 1) allocated 8192 Interrupt Collections (flat, esz 8, psz 64K, shr 1) allocated 65536 Virtual CPUs (flat, esz 8, psz 64K, shr 1) Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e3a59f43def8..991cf33750c6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -107,6 +107,10 @@ struct its_node { #define ITS_ITT_ALIGN SZ_256 +/* The maximum number of VPEID bits supported by VLPI commands */ +#define ITS_MAX_VPEID_BITS (16) +#define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS)) + /* Convert page order to size in bytes */ #define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o)) @@ -1582,13 +1586,12 @@ retry_baser: static bool its_parse_indirect_baser(struct its_node *its, struct its_baser *baser, - u32 psz, u32 *order) + u32 psz, u32 *order, u32 ids) { u64 tmp = its_read_baser(its, baser); u64 type = GITS_BASER_TYPE(tmp); u64 esz = GITS_BASER_ENTRY_SIZE(tmp); u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb; - u32 ids = its->device_ids; u32 new_order = *order; bool indirect = false; @@ -1680,9 +1683,13 @@ static int its_alloc_tables(struct its_node *its) continue; case GITS_BASER_TYPE_DEVICE: + indirect = its_parse_indirect_baser(its, baser, + psz, &order, + its->device_ids); case GITS_BASER_TYPE_VCPU: indirect = its_parse_indirect_baser(its, baser, - psz, &order); + psz, &order, + ITS_MAX_VPEID_BITS); break; } @@ -2551,7 +2558,7 @@ static struct irq_chip its_vpe_irq_chip = { static int its_vpe_id_alloc(void) { - return ida_simple_get(&its_vpeid_ida, 0, 1 << 16, GFP_KERNEL); + return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL); } static void its_vpe_id_free(u16 id) -- cgit v1.2.3-70-g09d2 From 30ae9610d275f8f03f5bf7612ce71d8af6fc400b Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 9 Oct 2017 11:46:55 -0500 Subject: irqchip/gic-v3-its: Add missing changes to support 52bit physical address The current ITS driver works fine as long as normal memory and GICR regions are located within the lower 48bit (>=0 && <2^48) physical address space. Some of the registers GICR_PEND/PROP, GICR_VPEND/VPROP and GITS_CBASER are handled properly but not all when configuring the hardware with 52bit physical address. This patch does the following changes to support 52bit PA. -Handle 52bit PA in GITS_BASERn. -Fix ITT_addr width to 52bits, bits[51:8]. -Fix RDbase width to 52bits, bits[51:16]. -Fix VPT_addr width to 52bits, bits[51:16]. Definition of the GITS_BASERn register when ITS PageSize is 64KB: -Bits[47:16] of the register provide bits[47:16] of the table PA. -Bits[15:12] of the register provide bits[51:48] of the table PA. -Bits[15:00] of the base physical address are 0. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 26 +++++++++++++++++++++----- include/linux/irqchip/arm-gic-v3.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 991cf33750c6..e88395605e32 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -312,7 +312,7 @@ static void its_encode_size(struct its_cmd_block *cmd, u8 size) static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) { - its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 50, 8); + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); } static void its_encode_valid(struct its_cmd_block *cmd, int valid) @@ -322,7 +322,7 @@ static void its_encode_valid(struct its_cmd_block *cmd, int valid) static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) { - its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); } static void its_encode_collection(struct its_cmd_block *cmd, u16 col) @@ -362,7 +362,7 @@ static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list) static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) { - its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 50, 16); + its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 51, 16); } static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size) @@ -1482,9 +1482,9 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, u64 val = its_read_baser(its, baser); u64 esz = GITS_BASER_ENTRY_SIZE(val); u64 type = GITS_BASER_TYPE(val); + u64 baser_phys, tmp; u32 alloc_pages; void *base; - u64 tmp; retry_alloc_baser: alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); @@ -1500,8 +1500,24 @@ retry_alloc_baser: if (!base) return -ENOMEM; + baser_phys = virt_to_phys(base); + + /* Check if the physical address of the memory is above 48bits */ + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { + + /* 52bit PA is supported only when PageSize=64K */ + if (psz != SZ_64K) { + pr_err("ITS: no 52bit PA support when psz=%d\n", psz); + free_pages((unsigned long)base, order); + return -ENXIO; + } + + /* Convert 52bit PA to 48bit field */ + baser_phys = GITS_BASER_PHYS_52_to_48(baser_phys); + } + retry_baser: - val = (virt_to_phys(base) | + val = (baser_phys | (type << GITS_BASER_TYPE_SHIFT) | ((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | ((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT) | diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1ea576c8126f..14b74f22d43c 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -372,6 +372,8 @@ #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.2.3-70-g09d2 From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:26:26 +0200 Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set() The irq_gc_mask_disable_reg_and_ack() function name implies that it provides the combined functions of irq_gc_mask_disable_reg() and irq_gc_ack(). However, the implementation does not actually do that since it writes the mask instead of the disable register. It also does not maintain the mask cache which makes it inappropriate to use with other masking functions. In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with {set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to irq_gc_ack_set_bit() so this function probably should have also been renamed at that time. The generic chip code currently provides three functions for use with the irq_mask member of the irq_chip structure and two functions for use with the irq_ack member of the irq_chip structure. These functions could be combined into six functions for use with the irq_mask_ack member of the irq_chip structure. However, since only one of the combinations is currently used, only the function irq_gc_mask_disable_and_ack_set() is added by this commit. The '_reg' and '_bit' portions of the base function name were left out of the new combined function name in an attempt to keep the function name length manageable with the 80 character source code line length while still allowing the distinct aspects of each combination to be captured by the name. If other combinations are desired in the future please add them to the irq generic chip library at that time. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 + kernel/irq/generic-chip.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index d4728bf6a537..494d328f7051 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1010,6 +1010,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); +void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5270a54b9fa4..ec5fe9a0cb05 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -150,6 +150,31 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) irq_gc_unlock(gc); } +/** + * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt + * @d: irq_data + * + * This generic implementation of the irq_mask_ack method is for chips + * with separate enable/disable registers instead of a single mask + * register and where a pending interrupt is acknowledged by setting a + * bit. + * + * Note: This is the only permutation currently used. Similar generic + * functions should be added here if other permutations are required. + */ +void irq_gc_mask_disable_and_ack_set(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = d->mask; + + irq_gc_lock(gc); + irq_reg_writel(gc, mask, ct->regs.disable); + *ct->mask_cache &= ~mask; + irq_reg_writel(gc, mask, ct->regs.ack); + irq_gc_unlock(gc); +} + /** * irq_gc_eoi - EOI interrupt * @d: irq_data -- cgit v1.2.3-70-g09d2 From 16150904d8ba7b93b51d97bcfc671951b7f3dc02 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 4 Oct 2017 14:27:20 +0200 Subject: irqchip/tango: Use irq_gc_mask_disable_and_ack_set The only usage of the irq_gc_mask_disable_reg_and_ack() function is by the Tango irqchip driver. This usage is replaced by the irq_gc_mask_disable_and_ack_set() function since it provides the intended functionality. Fixes: 4bba66899ac6 ("irqchip/tango: Add support for Sigma Designs SMP86xx/SMP87xx interrupt controller") Acked-by: Mans Rullgard Acked-by: Marc Gonzalez Signed-off-by: Florian Fainelli Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-tango.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-tango.c b/drivers/irqchip/irq-tango.c index bdbb5c0ff7fe..0c085303a583 100644 --- a/drivers/irqchip/irq-tango.c +++ b/drivers/irqchip/irq-tango.c @@ -141,7 +141,7 @@ static void __init tangox_irq_init_chip(struct irq_chip_generic *gc, for (i = 0; i < 2; i++) { ct[i].chip.irq_ack = irq_gc_ack_set_bit; ct[i].chip.irq_mask = irq_gc_mask_disable_reg; - ct[i].chip.irq_mask_ack = irq_gc_mask_disable_reg_and_ack; + ct[i].chip.irq_mask_ack = irq_gc_mask_disable_and_ack_set; ct[i].chip.irq_unmask = irq_gc_unmask_enable_reg; ct[i].chip.irq_set_type = tangox_irq_set_type; ct[i].chip.name = gc->domain->name; -- cgit v1.2.3-70-g09d2 From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:28:17 +0200 Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack() Any usage of the irq_gc_mask_disable_reg_and_ack() function has been replaced with the desired functionality. The incorrect and ambiguously named function is removed here to prevent accidental misuse. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 - kernel/irq/generic-chip.c | 16 ---------------- 2 files changed, 17 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 494d328f7051..5ad10948ea95 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1009,7 +1009,6 @@ void irq_gc_mask_clr_bit(struct irq_data *d); void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index ec5fe9a0cb05..c26c5bb6b491 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -134,22 +134,6 @@ void irq_gc_ack_clr_bit(struct irq_data *d) irq_gc_unlock(gc); } -/** - * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt - * @d: irq_data - */ -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = d->mask; - - irq_gc_lock(gc); - irq_reg_writel(gc, mask, ct->regs.mask); - irq_reg_writel(gc, mask, ct->regs.ack); - irq_gc_unlock(gc); -} - /** * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt * @d: irq_data -- cgit v1.2.3-70-g09d2 From ce76353f169a6471542d999baf3d29b121dce9c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 13 Oct 2017 14:32:37 +0200 Subject: iommu/amd: Finish TLB flush in amd_iommu_unmap() The function only sends the flush command to the IOMMU(s), but does not wait for its completion when it returns. Fix that. Fixes: 601367d76bd1 ('x86/amd-iommu: Remove iommu_flush_domain function') Cc: stable@vger.kernel.org # >= 2.6.33 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 822679ac90a1..8e8874d23717 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3048,6 +3048,7 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, mutex_unlock(&domain->api_lock); domain_flush_tlb_pde(domain); + domain_flush_complete(domain); return unmap_size; } -- cgit v1.2.3-70-g09d2 From e7ad97938eaccb5a9ff4534167b1abafb507935c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 12 Oct 2017 11:48:31 +0200 Subject: liquidio: fix timespec64_to_ns typo While experimenting with changes to the timekeeping code, I ran into a build error in the liquidio driver: drivers/net/ethernet/cavium/liquidio/lio_main.c: In function 'liquidio_ptp_settime': drivers/net/ethernet/cavium/liquidio/lio_main.c:1850:22: error: passing argument 1 of 'timespec_to_ns' from incompatible pointer type [-Werror=incompatible-pointer-types] The driver had a type mismatch since it was first merged, but this never caused problems because it is only built on 64-bit architectures that define timespec and timespec64 to the same type. If we ever want to compile-test the driver on 32-bit or change the way that 64-bit timespec64 is defined, we need to fix it, so let's just do it now. Fixes: f21fb3ed364b ("Add support of Cavium Liquidio ethernet adapters") Signed-off-by: Arnd Bergmann Acked-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e7f54948173f..5b19826a7e16 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1847,7 +1847,7 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp, struct lio *lio = container_of(ptp, struct lio, ptp_info); struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; - ns = timespec_to_ns(ts); + ns = timespec64_to_ns(ts); spin_lock_irqsave(&lio->ptp_lock, flags); lio_pci_writeq(oct, ns, CN6XXX_MIO_PTP_CLOCK_HI); -- cgit v1.2.3-70-g09d2 From b9849860675f925da0380f4ea76c3f5041909737 Mon Sep 17 00:00:00 2001 From: Emiliano Ingrassia Date: Thu, 12 Oct 2017 11:00:47 +0200 Subject: net: stmmac: dwmac_lib: fix interchanged sleep/timeout values in DMA reset function The DMA reset timeout, used in read_poll_timeout, is ten times shorter than the sleep time. This patch fixes these values interchanging them, as it was before the read_poll_timeout introduction. Fixes: 8a70aeca80c2 ("net: stmmac: Use readl_poll_timeout") Signed-off-by: Emiliano Ingrassia Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 67af0bdd7f10..7516ca210855 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -34,7 +34,7 @@ int dwmac_dma_reset(void __iomem *ioaddr) err = readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), - 100000, 10000); + 10000, 100000); if (err) return -EBUSY; -- cgit v1.2.3-70-g09d2 From c6ebcedbab7ca78984959386012a17b21183e1a3 Mon Sep 17 00:00:00 2001 From: Pontus Andersson Date: Mon, 2 Oct 2017 14:45:19 +0200 Subject: i2c: ismt: Separate I2C block read from SMBus block read Commit b6c159a9cb69 ("i2c: ismt: Don't duplicate the receive length for block reads") broke I2C block reads. It aimed to fix normal SMBus block read, but changed the correct behavior of I2C block read in the process. According to Documentation/i2c/smbus-protocol, one vital difference between normal SMBus block read and I2C block read is that there is no byte count prefixed in the data sent on the wire: SMBus Block Read: i2c_smbus_read_block_data() S Addr Wr [A] Comm [A] S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P I2C Block Read: i2c_smbus_read_i2c_block_data() S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P Therefore the two transaction types need to be processed differently in the driver by copying of the dma_buffer as done previously for the I2C_SMBUS_I2C_BLOCK_DATA case. Fixes: b6c159a9cb69 ("i2c: ismt: Don't duplicate the receive length for block reads") Signed-off-by: Pontus Andersson Tested-by: Stephen Douthit Cc: stable@vger.kernel.org Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 22ffcb73c185..b51adffa4841 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -340,12 +340,15 @@ static int ismt_process_desc(const struct ismt_desc *desc, data->word = dma_buffer[0] | (dma_buffer[1] << 8); break; case I2C_SMBUS_BLOCK_DATA: - case I2C_SMBUS_I2C_BLOCK_DATA: if (desc->rxbytes != dma_buffer[0] + 1) return -EMSGSIZE; memcpy(data->block, dma_buffer, desc->rxbytes); break; + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(&data->block[1], dma_buffer, desc->rxbytes); + data->block[0] = desc->rxbytes; + break; } return 0; } -- cgit v1.2.3-70-g09d2 From df0a2fdab0068f7452bf0a97ea9ba0ad69d49a1f Mon Sep 17 00:00:00 2001 From: Wei Jinhua Date: Wed, 11 Oct 2017 15:57:20 +0800 Subject: i2c: imx: use IRQF_SHARED mode to request IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some SoC share one irq number between I2C controllers. For example, on the LS2088 board, I2C 1 and I2C 2 share one irq number. In this case, only one I2C controller can register successfully, and others will fail. Signed-off-by: Wei Jinhua Reviewed-by: Jiang Biao Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 54a47b40546f..e5c8b3d5df77 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1100,7 +1100,7 @@ static int i2c_imx_probe(struct platform_device *pdev) } /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, 0, + ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, pdev->name, i2c_imx); if (ret) { dev_err(&pdev->dev, "can't claim irq %d\n", irq); -- cgit v1.2.3-70-g09d2 From eba523b468a1e30384b6e8c1a9419163f325086e Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Mon, 9 Oct 2017 21:26:14 +0200 Subject: i2c: imx: fix misleading bus recovery debug message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arguments for SDA and SCL were swapped. Fix it. Signed-off-by: Clemens Gruber Acked-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index e5c8b3d5df77..f96830ffd9f1 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1021,7 +1021,7 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx, } dev_dbg(&pdev->dev, "using scl-gpio %d and sda-gpio %d for recovery\n", - rinfo->sda_gpio, rinfo->scl_gpio); + rinfo->scl_gpio, rinfo->sda_gpio); rinfo->prepare_recovery = i2c_imx_prepare_recovery; rinfo->unprepare_recovery = i2c_imx_unprepare_recovery; -- cgit v1.2.3-70-g09d2 From 0fe16195f89173652cf111d7b384941b00c5aabd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 15 Jul 2017 16:51:26 -0700 Subject: i2c: piix4: Fix SMBus port selection for AMD Family 17h chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AMD Family 17h uses the KERNCZ SMBus controller. While its documentation is not publicly available, it is documented in the BIOS and Kernel Developer’s Guide for AMD Family 15h Models 60h-6Fh Processors. On this SMBus controller, the port select register is at PMx register 0x02, bit 4:3 (PMx00 register bit 20:19). Without this patch, the 4 SMBus channels on AMD Family 17h chips are mirrored and report the same chips on all channels. Signed-off-by: Guenter Roeck Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-piix4.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 0ecdb47a23ab..01f767ee4546 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -94,6 +94,12 @@ #define SB800_PIIX4_PORT_IDX_ALT 0x2e #define SB800_PIIX4_PORT_IDX_SEL 0x2f #define SB800_PIIX4_PORT_IDX_MASK 0x06 +#define SB800_PIIX4_PORT_IDX_SHIFT 1 + +/* On kerncz, SmBus0Sel is at bit 20:19 of PMx00 DecodeEn */ +#define SB800_PIIX4_PORT_IDX_KERNCZ 0x02 +#define SB800_PIIX4_PORT_IDX_MASK_KERNCZ 0x18 +#define SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ 3 /* insmod parameters */ @@ -149,6 +155,8 @@ static const struct dmi_system_id piix4_dmi_ibm[] = { */ static DEFINE_MUTEX(piix4_mutex_sb800); static u8 piix4_port_sel_sb800; +static u8 piix4_port_mask_sb800; +static u8 piix4_port_shift_sb800; static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = { " port 0", " port 2", " port 3", " port 4" }; @@ -347,7 +355,19 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, /* Find which register is used for port selection */ if (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD) { - piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT; + switch (PIIX4_dev->device) { + case PCI_DEVICE_ID_AMD_KERNCZ_SMBUS: + piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_KERNCZ; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK_KERNCZ; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT_KERNCZ; + break; + case PCI_DEVICE_ID_AMD_HUDSON2_SMBUS: + default: + piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; + break; + } } else { mutex_lock(&piix4_mutex_sb800); outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX); @@ -355,6 +375,8 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev, piix4_port_sel_sb800 = (port_sel & 0x01) ? SB800_PIIX4_PORT_IDX_ALT : SB800_PIIX4_PORT_IDX; + piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK; + piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT; mutex_unlock(&piix4_mutex_sb800); } @@ -616,8 +638,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); port = adapdata->port; - if ((smba_en_lo & SB800_PIIX4_PORT_IDX_MASK) != port) - outb_p((smba_en_lo & ~SB800_PIIX4_PORT_IDX_MASK) | port, + if ((smba_en_lo & piix4_port_mask_sb800) != port) + outb_p((smba_en_lo & ~piix4_port_mask_sb800) | port, SB800_PIIX4_SMB_IDX + 1); retval = piix4_access(adap, addr, flags, read_write, @@ -706,7 +728,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, adapdata->smba = smba; adapdata->sb800_main = sb800_main; - adapdata->port = port << 1; + adapdata->port = port << piix4_port_shift_sb800; /* set up the sysfs linkage to our parent device */ adap->dev.parent = &dev->dev; -- cgit v1.2.3-70-g09d2 From 88fa2dfb075a20c3464e3d303c57dd8ced9e8309 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Tue, 10 Oct 2017 18:11:15 +0200 Subject: i2c: piix4: Disable completely the IMC during SMBUS_BLOCK_DATA SMBUS_BLOCK_DATA transactions might fail due to a race condition with the IMC (Integrated Micro Controller), even when the IMC semaphore is used. This bug has been reported and confirmed by AMD, who suggested as a solution an IMC firmware upgrade (obtained via BIOS update) and disabling the IMC during SMBUS_BLOCK_DATA transactions. Even without the IMC upgrade, the SMBUS is much more stable with this patch. Tested on a Bettong-alike board. Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 132 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 126 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 01f767ee4546..174579d32e5f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -85,6 +85,9 @@ /* SB800 constants */ #define SB800_PIIX4_SMB_IDX 0xcd6 +#define KERNCZ_IMC_IDX 0x3e +#define KERNCZ_IMC_DATA 0x3f + /* * SB800 port is selected by bits 2:1 of the smb_en register (0x2c) * or the smb_sel register (0x2e), depending on bit 0 of register 0x2f. @@ -167,6 +170,7 @@ struct i2c_piix4_adapdata { /* SB800 */ bool sb800_main; + bool notify_imc; u8 port; /* Port number, shifted */ }; @@ -594,6 +598,67 @@ static s32 piix4_access(struct i2c_adapter * adap, u16 addr, return 0; } +static uint8_t piix4_imc_read(uint8_t idx) +{ + outb_p(idx, KERNCZ_IMC_IDX); + return inb_p(KERNCZ_IMC_DATA); +} + +static void piix4_imc_write(uint8_t idx, uint8_t value) +{ + outb_p(idx, KERNCZ_IMC_IDX); + outb_p(value, KERNCZ_IMC_DATA); +} + +static int piix4_imc_sleep(void) +{ + int timeout = MAX_TIMEOUT; + + if (!request_muxed_region(KERNCZ_IMC_IDX, 2, "smbus_kerncz_imc")) + return -EBUSY; + + /* clear response register */ + piix4_imc_write(0x82, 0x00); + /* request ownership flag */ + piix4_imc_write(0x83, 0xB4); + /* kick off IMC Mailbox command 96 */ + piix4_imc_write(0x80, 0x96); + + while (timeout--) { + if (piix4_imc_read(0x82) == 0xfa) { + release_region(KERNCZ_IMC_IDX, 2); + return 0; + } + usleep_range(1000, 2000); + } + + release_region(KERNCZ_IMC_IDX, 2); + return -ETIMEDOUT; +} + +static void piix4_imc_wakeup(void) +{ + int timeout = MAX_TIMEOUT; + + if (!request_muxed_region(KERNCZ_IMC_IDX, 2, "smbus_kerncz_imc")) + return; + + /* clear response register */ + piix4_imc_write(0x82, 0x00); + /* release ownership flag */ + piix4_imc_write(0x83, 0xB5); + /* kick off IMC Mailbox command 96 */ + piix4_imc_write(0x80, 0x96); + + while (timeout--) { + if (piix4_imc_read(0x82) == 0xfa) + break; + usleep_range(1000, 2000); + } + + release_region(KERNCZ_IMC_IDX, 2); +} + /* * Handles access to multiple SMBus ports on the SB800. * The port is selected by bits 2:1 of the smb_en register (0x2c). @@ -634,6 +699,41 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, return -EBUSY; } + /* + * Notify the IMC (Integrated Micro Controller) if required. + * Among other responsibilities, the IMC is in charge of monitoring + * the System fans and temperature sensors, and act accordingly. + * All this is done through SMBus and can/will collide + * with our transactions if they are long (BLOCK_DATA). + * Therefore we need to request the ownership flag during those + * transactions. + */ + if ((size == I2C_SMBUS_BLOCK_DATA) && adapdata->notify_imc) { + int ret; + + ret = piix4_imc_sleep(); + switch (ret) { + case -EBUSY: + dev_warn(&adap->dev, + "IMC base address index region 0x%x already in use.\n", + KERNCZ_IMC_IDX); + break; + case -ETIMEDOUT: + dev_warn(&adap->dev, + "Failed to communicate with the IMC.\n"); + break; + default: + break; + } + + /* If IMC communication fails do not retry */ + if (ret) { + dev_warn(&adap->dev, + "Continuing without IMC notification.\n"); + adapdata->notify_imc = false; + } + } + outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX); smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1); @@ -650,6 +750,9 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr, /* Release the semaphore */ outb_p(smbslvcnt | 0x20, SMBSLVCNT); + if ((size == I2C_SMBUS_BLOCK_DATA) && adapdata->notify_imc) + piix4_imc_wakeup(); + mutex_unlock(&piix4_mutex_sb800); return retval; @@ -701,7 +804,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS]; static struct i2c_adapter *piix4_aux_adapter; static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, - bool sb800_main, u8 port, + bool sb800_main, u8 port, bool notify_imc, const char *name, struct i2c_adapter **padap) { struct i2c_adapter *adap; @@ -729,6 +832,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, adapdata->smba = smba; adapdata->sb800_main = sb800_main; adapdata->port = port << piix4_port_shift_sb800; + adapdata->notify_imc = notify_imc; /* set up the sysfs linkage to our parent device */ adap->dev.parent = &dev->dev; @@ -750,14 +854,15 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba, return 0; } -static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba) +static int piix4_add_adapters_sb800(struct pci_dev *dev, unsigned short smba, + bool notify_imc) { struct i2c_piix4_adapdata *adapdata; int port; int retval; for (port = 0; port < PIIX4_MAX_ADAPTERS; port++) { - retval = piix4_add_adapter(dev, smba, true, port, + retval = piix4_add_adapter(dev, smba, true, port, notify_imc, piix4_main_port_names_sb800[port], &piix4_main_adapters[port]); if (retval < 0) @@ -791,6 +896,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS && dev->revision >= 0x40) || dev->vendor == PCI_VENDOR_ID_AMD) { + bool notify_imc = false; is_sb800 = true; if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) { @@ -800,6 +906,20 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) return -EBUSY; } + if (dev->vendor == PCI_VENDOR_ID_AMD && + dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS) { + u8 imc; + + /* + * Detect if IMC is active or not, this method is + * described on coreboot's AMD IMC notes + */ + pci_bus_read_config_byte(dev->bus, PCI_DEVFN(0x14, 3), + 0x40, &imc); + if (imc & 0x80) + notify_imc = true; + } + /* base address location etc changed in SB800 */ retval = piix4_setup_sb800(dev, id, 0); if (retval < 0) { @@ -811,7 +931,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) * Try to register multiplexed main SMBus adapter, * give up if we can't */ - retval = piix4_add_adapters_sb800(dev, retval); + retval = piix4_add_adapters_sb800(dev, retval, notify_imc); if (retval < 0) { release_region(SB800_PIIX4_SMB_IDX, 2); return retval; @@ -822,7 +942,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) return retval; /* Try to register main SMBus adapter, give up if we can't */ - retval = piix4_add_adapter(dev, retval, false, 0, "", + retval = piix4_add_adapter(dev, retval, false, 0, false, "", &piix4_main_adapters[0]); if (retval < 0) return retval; @@ -849,7 +969,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) if (retval > 0) { /* Try to add the aux adapter if it exists, * piix4_add_adapter will clean up if this fails */ - piix4_add_adapter(dev, retval, false, 0, + piix4_add_adapter(dev, retval, false, 0, false, is_sb800 ? piix4_aux_port_name_sb800 : "", &piix4_aux_adapter); } -- cgit v1.2.3-70-g09d2 From 70b01dfd765dd2196d51f33a49df23954416f34a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:28 +0200 Subject: perf hists: Fix crash in perf_hpp__reset_output_field() Du Changbin reported crash [1] when calling perf_hpp__reset_output_field() after unregistering field via perf_hpp__column_unregister(). This ends up in calling following list_del* sequence on the same format: perf_hpp__column_unregister: list_del(&format->list); perf_hpp__reset_output_field: list_del_init(&fmt->list); where the later list_del_init might touch already freed formats. Fixing this by replacing list_del() with list_del_init() in perf_hpp__column_unregister(). [1] http://marc.info/?l=linux-kernel&m=149059595826019&w=2 Reported-by: Changbin Du Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index ddb2c6fbdf91..6ee6b36bbc76 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -532,7 +532,7 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - list_del(&format->list); + list_del_init(&format->list); } void perf_hpp__cancel_cumulate(void) -- cgit v1.2.3-70-g09d2 From d0e35234f647631ddfa5fa8c8ec66c9bc698f0ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:29 +0200 Subject: perf hists: Add extra integrity checks to fmt_free() Make sure the struct perf_hpp_fmt is properly unhooked before we free it. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 6ee6b36bbc76..db79017a6e56 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -606,6 +606,13 @@ next: static void fmt_free(struct perf_hpp_fmt *fmt) { + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + if (fmt->free) fmt->free(fmt); } -- cgit v1.2.3-70-g09d2 From 29479bfe83bafb8aa37f36ca132ee8349d11da0c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 13 Oct 2017 10:37:35 +0200 Subject: perf tools: Check wether the eBPF file exists in event parsing Adding the check wether the eBPF file exists, to consider it as eBPF input file. This way we can differentiate eBPF events from events that end up with same suffix as eBPF file. Before: $ perf stat -e 'cpu/uops_executed.core/' true bpf: builtin compilation failed: -95, try external compiler WARNING: unable to get correct kernel building directory. Hint: Set correct kbuild directory using 'kbuild-dir' option in [llvm] section of ~/.perfconfig or set it to "" to suppress kbuild detection. event syntax error: 'cpu/uops_executed.core/' \___ Failed to load cpu/uops_executed.c from source: 'version' section incorrect or lost After: $ perf stat -e 'cpu/uops_executed.core/' true Performance counter stats for 'true': 181,533 cpu/uops_executed.core/:u 0.002795447 seconds time elapsed If user makes type in the eBPF file, we prioritize the event syntax and show following warning: $ perf stat -e 'krava.c//' true event syntax error: 'krava.c//' \___ Cannot find PMU `krava.c'. Missing kernel support? Reported-and-Tested-by: Andi Kleen Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Changbin Du Cc: David Ahern Cc: Jin Yao Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20171013083736.15037-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edeac451f..dcfdafdc2f1c 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -8,6 +8,9 @@ %{ #include +#include +#include +#include #include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" @@ -53,9 +56,8 @@ static int str(yyscan_t scanner, int token) return token; } -static bool isbpf(yyscan_t scanner) +static bool isbpf_suffix(char *text) { - char *text = parse_events_get_text(scanner); int len = strlen(text); if (len < 2) @@ -68,6 +70,17 @@ static bool isbpf(yyscan_t scanner) return false; } +static bool isbpf(yyscan_t scanner) +{ + char *text = parse_events_get_text(scanner); + struct stat st; + + if (!isbpf_suffix(text)) + return false; + + return stat(text, &st) == 0; +} + /* * This function is called when the parser gets two kind of input: * -- cgit v1.2.3-70-g09d2 From e20d103b6c37038ca27409f746f0b3351bcd0c44 Mon Sep 17 00:00:00 2001 From: Mark Hairgrove Date: Fri, 13 Oct 2017 15:57:30 -0700 Subject: mm/migrate: fix indexing bug (off by one) and avoid out of bound access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Index was incremented before last use and thus the second array could dereference to an invalid address (not mentioning the fact that it did not properly clear the entry we intended to clear). Link: http://lkml.kernel.org/r/1506973525-16491-1-git-send-email-jglisse@redhat.com Fixes: 8315ada7f095bf ("mm/migrate: allow migrate_vma() to alloc new page on empty entry") Signed-off-by: Mark Hairgrove Signed-off-by: Jérôme Glisse Cc: Reza Arbab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 6954c1435833..e00814ca390e 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start, unsigned long addr; for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; + migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE; migrate->dst[migrate->npages] = 0; + migrate->npages++; migrate->cpages++; } -- cgit v1.2.3-70-g09d2 From cc3fa84045694c2fd7ccf6ce84dee5cba372a7d3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Oct 2017 15:57:33 -0700 Subject: lib/Kconfig.debug: kernel hacking menu: runtime testing: keep tests together Expand the "Runtime testing" menu by including more entries inside it instead of after it. This is just Kconfig symbol movement. This causes the (arch-independent) Runtime tests to be presented (listed) all in one place instead of in multiple places. Link: http://lkml.kernel.org/r/c194e5c4-2042-bf94-a2d8-7aa13756e257@infradead.org Signed-off-by: Randy Dunlap Acked-by: Kees Cook Cc: Dave Hansen Cc: "Luis R. Rodriguez" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 143 +++++++++++++++++++++++++++--------------------------- 1 file changed, 71 insertions(+), 72 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2689b7c50c52..c1e720a22c71 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1590,6 +1590,54 @@ config LATENCYTOP source kernel/trace/Kconfig +config PROVIDE_OHCI1394_DMA_INIT + bool "Remote debugging over FireWire early on boot" + depends on PCI && X86 + help + If you want to debug problems which hang or crash the kernel early + on boot and the crashing machine has a FireWire port, you can use + this feature to remotely access the memory of the crashed machine + over FireWire. This employs remote DMA as part of the OHCI1394 + specification which is now the standard for FireWire controllers. + + With remote DMA, you can monitor the printk buffer remotely using + firescope and access all memory below 4GB using fireproxy from gdb. + Even controlling a kernel debugger is possible using remote DMA. + + Usage: + + If ohci1394_dma=early is used as boot parameter, it will initialize + all OHCI1394 controllers which are found in the PCI config space. + + As all changes to the FireWire bus such as enabling and disabling + devices cause a bus reset and thereby disable remote DMA for all + devices, be sure to have the cable plugged and FireWire enabled on + the debugging host before booting the debug target for debugging. + + This code (~1k) is freed after boot. By then, the firewire stack + in charge of the OHCI-1394 controllers should be used instead. + + See Documentation/debugging-via-ohci1394.txt for more information. + +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. + menu "Runtime Testing" config LKDTM @@ -1749,56 +1797,6 @@ config TEST_PARMAN If unsure, say N. -endmenu # runtime tests - -config PROVIDE_OHCI1394_DMA_INIT - bool "Remote debugging over FireWire early on boot" - depends on PCI && X86 - help - If you want to debug problems which hang or crash the kernel early - on boot and the crashing machine has a FireWire port, you can use - this feature to remotely access the memory of the crashed machine - over FireWire. This employs remote DMA as part of the OHCI1394 - specification which is now the standard for FireWire controllers. - - With remote DMA, you can monitor the printk buffer remotely using - firescope and access all memory below 4GB using fireproxy from gdb. - Even controlling a kernel debugger is possible using remote DMA. - - Usage: - - If ohci1394_dma=early is used as boot parameter, it will initialize - all OHCI1394 controllers which are found in the PCI config space. - - As all changes to the FireWire bus such as enabling and disabling - devices cause a bus reset and thereby disable remote DMA for all - devices, be sure to have the cable plugged and FireWire enabled on - the debugging host before booting the debug target for debugging. - - This code (~1k) is freed after boot. By then, the firewire stack - in charge of the OHCI-1394 controllers should be used instead. - - See Documentation/debugging-via-ohci1394.txt for more information. - -config DMA_API_DEBUG - bool "Enable debugging of DMA-API usage" - depends on HAVE_DMA_API_DEBUG - help - Enable this option to debug the use of the DMA API by device drivers. - With this option you will be able to detect common bugs in device - drivers like double-freeing of DMA mappings or freeing mappings that - were never allocated. - - This also attempts to catch cases where a page owned by DMA is - accessed by the cpu in a way that could cause data corruption. For - example, this enables cow_user_page() to check that the source page is - not undergoing DMA. - - This option causes a performance degradation. Use only if you want to - debug device drivers and dma interactions. - - If unsure, say N. - config TEST_LKM tristate "Test module loading with 'hello world' module" default n @@ -1873,18 +1871,6 @@ config TEST_UDELAY If unsure, say N. -config MEMTEST - bool "Memtest" - depends on HAVE_MEMBLOCK - ---help--- - This option adds a kernel parameter 'memtest', which allows memtest - to be set. - memtest=0, mean disabled; -- default - memtest=1, mean do 1 test pattern; - ... - memtest=17, mean do 17 test patterns. - If you are unsure how to answer this question, answer N. - config TEST_STATIC_KEYS tristate "Test static keys" default n @@ -1894,16 +1880,6 @@ config TEST_STATIC_KEYS If unsure, say N. -config BUG_ON_DATA_CORRUPTION - bool "Trigger a BUG when data corruption is detected" - select DEBUG_LIST - help - Select this option if the kernel should BUG when it encounters - data corruption in kernel memory structures when they get checked - for validity. - - If unsure, say N. - config TEST_KMOD tristate "kmod stress tester" default n @@ -1941,6 +1917,29 @@ config TEST_DEBUG_VIRTUAL If unsure, say N. +endmenu # runtime tests + +config MEMTEST + bool "Memtest" + depends on HAVE_MEMBLOCK + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=17, mean do 17 test patterns. + If you are unsure how to answer this question, answer N. + +config BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + select DEBUG_LIST + help + Select this option if the kernel should BUG when it encounters + data corruption in kernel memory structures when they get checked + for validity. + + If unsure, say N. source "samples/Kconfig" -- cgit v1.2.3-70-g09d2 From c02c30093254189a6ef55fed415a4ffb55a74cdf Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 13 Oct 2017 15:57:37 -0700 Subject: mm/madvise.c: add description for MADV_WIPEONFORK and MADV_KEEPONFORK mm/madvise.c has a brief description about all MADV_ flags. Add a description for the newly added MADV_WIPEONFORK and MADV_KEEPONFORK. Although man page has the similar information, but it'd better to keep the consistent with other flags. Link: http://lkml.kernel.org/r/1506117328-88228-1-git-send-email-yang.s@alibaba-inc.com Signed-off-by: Yang Shi Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 25bade36e9ca..fd70d6aabc3e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -757,6 +757,9 @@ madvise_behavior_valid(int behavior) * MADV_DONTFORK - omit this area from child's address space when forking: * typically, to avoid COWing pages pinned by get_user_pages(). * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. + * MADV_WIPEONFORK - present the child process with zero-filled memory in this + * range after a fork. + * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK * MADV_HWPOISON - trigger memory error handler as if the given memory range * were corrupted by unrecoverable hardware memory failure. * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. @@ -777,7 +780,9 @@ madvise_behavior_valid(int behavior) * zero - success * -EINVAL - start + len < 0, start is not page-aligned, * "behavior" is not a valid value, or application - * is attempting to release locked or shared pages. + * is attempting to release locked or shared pages, + * or the specified address range includes file, Huge TLB, + * MAP_SHARED or VMPFNMAP range. * -ENOMEM - addresses in the specified range are not currently * mapped, or are outside the AS of the process. * -EIO - an I/O error occurred while paging in data. -- cgit v1.2.3-70-g09d2 From 8a1ac5dc7be09883051b1bf89a5e57d7ad850fa5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 15:57:40 -0700 Subject: include/linux/of.h: provide of_n_{addr,size}_cells wrappers for !CONFIG_OF The pci-rcar driver is enabled for compile tests, and this has shown that the driver cannot build without CONFIG_OF, following the inclusion of commit f8f2fe7355fb ("PCI: rcar: Use new OF interrupt mapping when possible"): drivers/pci/host/pcie-rcar.c: In function 'pci_dma_range_parser_init': drivers/pci/host/pcie-rcar.c:1039:2: error: implicit declaration of function 'of_n_addr_cells' [-Werror=implicit-function-declaration] parser->pna = of_n_addr_cells(node); ^ As pointed out by Ben Dooks and Geert Uytterhoeven, this is actually supposed to build fine, which we can achieve if we make the declaration of of_irq_parse_and_map_pci conditional on CONFIG_OF and provide an empty inline function otherwise, as we do for a lot of other of interfaces. This lets us build the rcar_pci driver again without CONFIG_OF for build testing. All platforms using this driver select OF, so this doesn't change anything for the users. [akpm@linux-foundation.org: be consistent with surrounding code] Link: http://lkml.kernel.org/r/20170911200805.3363318-1-arnd@arndb.de Fixes: c25da4778803 ("PCI: rcar: Add Renesas R-Car PCIe driver") Signed-off-by: Arnd Bergmann Reviewed-by: Frank Rowand Acked-by: Geert Uytterhoeven Cc: Bjorn Helgaas Cc: Magnus Damm Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/of.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/of.h b/include/linux/of.h index cfc34117fc92..b240ed69dc96 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu, return NULL; } +static inline int of_n_addr_cells(struct device_node *np) +{ + return 0; + +} +static inline int of_n_size_cells(struct device_node *np) +{ + return 0; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { -- cgit v1.2.3-70-g09d2 From de55c8b251974247edda38e952da8e8dd71683ec Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Oct 2017 15:57:43 -0700 Subject: mm/mempolicy: fix NUMA_INTERLEAVE_HIT counter Commit 3a321d2a3dde ("mm: change the call sites of numa statistics items") separated NUMA counters from zone counters, but the NUMA_INTERLEAVE_HIT call site wasn't updated to use the new interface. So alloc_page_interleave() actually increments NR_ZONE_INACTIVE_FILE instead of NUMA_INTERLEAVE_HIT. Fix this by using __inc_numa_state() interface to increment NUMA_INTERLEAVE_HIT. Link: http://lkml.kernel.org/r/20171003191003.8573-1-aryabinin@virtuozzo.com Fixes: 3a321d2a3dde ("mm: change the call sites of numa statistics items") Signed-off-by: Andrey Ryabinin Acked-by: Mel Gorman Cc: Kemi Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 006ba625c0b8..a2af6d58a68f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, struct page *page; page = __alloc_pages(gfp, order, nid); - if (page && page_to_nid(page) == nid) - inc_zone_page_state(page, NUMA_INTERLEAVE_HIT); + if (page && page_to_nid(page) == nid) { + preempt_disable(); + __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT); + preempt_enable(); + } return page; } -- cgit v1.2.3-70-g09d2 From af0db981f35ea99b00a0b249bf0bedef8cf972e8 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 13 Oct 2017 15:57:47 -0700 Subject: mm: remove unnecessary WARN_ONCE in page_vma_mapped_walk(). A non present pmd entry can appear after pmd_lock is taken in page_vma_mapped_walk(), even if THP migration is not enabled. The WARN_ONCE is unnecessary. Link: http://lkml.kernel.org/r/20171003142606.12324-1-zi.yan@sent.com Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path") Signed-off-by: Zi Yan Reported-by: Abdul Haleem Tested-by: Abdul Haleem Acked-by: Kirill A. Shutemov Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_vma_mapped.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 6a03946469a9..eb462e7db0a9 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -167,8 +167,7 @@ restart: return not_found(pvmw); return true; } - } else - WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + } return not_found(pvmw); } else { /* THP pmd was split under us: handle on pte level */ -- cgit v1.2.3-70-g09d2 From 064f0e9302af4f4ab5e9dca03a5a77d6bebfd35e Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Fri, 13 Oct 2017 15:57:50 -0700 Subject: mm: only display online cpus of the numa node When I execute numactl -H (which reads /sys/devices/system/node/nodeX/cpumap and displays cpumask_of_node for each node), I get different result on X86 and arm64. For each numa node, the former only displayed online CPUs, and the latter displayed all possible CPUs. Unfortunately, both Linux documentation and numactl manual have not described it clear. I sent a mail to ask for help, and Michal Hocko replied that he preferred to print online cpus because it doesn't really make much sense to bind anything on offline nodes. Will said: "I suspect the vast majority (if not all) code that reads this file was developed for x86, so having the same behaviour for arm64 sounds like something we should do ASAP before people try to special case with things like #ifdef __aarch64__. I'd rather have this in 4.14 if possible." Link: http://lkml.kernel.org/r/1506678805-15392-2-git-send-email-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Acked-by: Michal Hocko Cc: Catalin Marinas Cc: Will Deacon Cc: Greg Kroah-Hartman Cc: Tianhong Ding Cc: Hanjun Guo Cc: Libin Cc: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 3855902f2c5b..aae2402f3791 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -27,13 +27,21 @@ static struct bus_type node_subsys = { static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) { + ssize_t n; + cpumask_var_t mask; struct node *node_dev = to_node(dev); - const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); - return cpumap_print_to_pagebuf(list, buf, mask); + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return 0; + + cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); + n = cpumap_print_to_pagebuf(list, buf, mask); + free_cpumask_var(mask); + + return n; } static inline ssize_t node_read_cpumask(struct device *dev, -- cgit v1.2.3-70-g09d2 From 7ddd8faf4399ab4f4edad5604eab35f8a87caf02 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 13 Oct 2017 15:57:54 -0700 Subject: userfaultfd: selftest: exercise -EEXIST only in background transfer I was stress testing some backports and with high load, after some time, the latest version of the selftest showed some false positive in connection with the uffdio_copy_retry. This seems to fix it while still exercising -EEXIST in the background transfer once in a while. The fork child will quit after the last UFFDIO_COPY is run, so a repeated UFFDIO_COPY may not return -EEXIST. This change restricts the -EEXIST stress to the background transfer where the memory can't go away from under it. Also updated uffdio_zeropage, so the interface is consistent. Link: http://lkml.kernel.org/r/20171004171541.1495-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: Pavel Emelyanov Cc: Mike Rapoport Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index a2c53a3d223d..de2f9ec8a87f 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -397,7 +397,7 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, } } -static int copy_page(int ufd, unsigned long offset) +static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; @@ -418,7 +418,7 @@ static int copy_page(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", uffdio_copy.copy), exit(1); } else { - if (test_uffdio_copy_eexist) { + if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; retry_copy_page(ufd, &uffdio_copy, offset); } @@ -427,6 +427,16 @@ static int copy_page(int ufd, unsigned long offset) return 0; } +static int copy_page_retry(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, true); +} + +static int copy_page(int ufd, unsigned long offset) +{ + return __copy_page(ufd, offset, false); +} + static void *uffd_poll_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -544,7 +554,7 @@ static void *background_thread(void *arg) for (page_nr = cpu * nr_pages_per_cpu; page_nr < (cpu+1) * nr_pages_per_cpu; page_nr++) - copy_page(uffd, page_nr * page_size); + copy_page_retry(uffd, page_nr * page_size); return NULL; } @@ -779,7 +789,7 @@ static void retry_uffdio_zeropage(int ufd, } } -static int uffdio_zeropage(int ufd, unsigned long offset) +static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) { struct uffdio_zeropage uffdio_zeropage; int ret; @@ -814,7 +824,7 @@ static int uffdio_zeropage(int ufd, unsigned long offset) fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", uffdio_zeropage.zeropage), exit(1); } else { - if (test_uffdio_zeropage_eexist) { + if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; retry_uffdio_zeropage(ufd, &uffdio_zeropage, offset); @@ -830,6 +840,11 @@ static int uffdio_zeropage(int ufd, unsigned long offset) return 0; } +static int uffdio_zeropage(int ufd, unsigned long offset) +{ + return __uffdio_zeropage(ufd, offset, false); +} + /* exercise UFFDIO_ZEROPAGE */ static int userfaultfd_zeropage_test(void) { -- cgit v1.2.3-70-g09d2 From 51962a9d437f0d580c04cd2c4abc2bd417200da2 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 13 Oct 2017 15:57:58 -0700 Subject: scripts/kallsyms.c: ignore symbol type 'n' gcc on aarch64 may emit synbols of type 'n' if the kernel is built with '-frecord-gcc-switches'. In most cases, those symbols are reported with nm as 000000000000000e n $d and with objdump as 0000000000000000 l d .GCC.command.line 0000000000000000 .GCC.command.line 000000000000000e l .GCC.command.line 0000000000000000 $d Those symbols are detected in is_arm_mapping_symbol() and ignored. However, if "--prefix-symbols=" is configured as well, the situation is different. For example, in efi/libstub, arm64 images are built with '--prefix-alloc-sections=.init --prefix-symbols=__efistub_'. In combination with '-frecord-gcc-switches', the symbols are now reported by nm as: 000000000000000e n __efistub_$d and by objdump as: 0000000000000000 l d .GCC.command.line 0000000000000000 .GCC.command.line 000000000000000e l .GCC.command.line 0000000000000000 __efistub_$d Those symbols are no longer ignored and included in the base address calculation. This results in a base address of 000000000000000e, which in turn causes kallsyms to abort with kallsyms failure: relative symbol value 0xffffff900800a000 out of range in relative mode The problem is seen in little endian arm64 builds with CONFIG_EFI enabled and with '-frecord-gcc-switches' set in KCFLAGS. Explicitly ignore symbols of type 'n' since those are clearly debug symbols. Link: http://lkml.kernel.org/r/1507136063-3139-1-git-send-email-linux@roeck-us.net Signed-off-by: Guenter Roeck Acked-by: Ard Biesheuvel Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/kallsyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 5d554419170b..9ee9bf7fd1a2 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -158,7 +158,7 @@ static int read_symbol(FILE *in, struct sym_entry *s) else if (str[0] == '$') return -1; /* exclude debugging symbols */ - else if (stype == 'N') + else if (stype == 'N' || stype == 'n') return -1; /* include the type field in the symbol name, so that it gets -- cgit v1.2.3-70-g09d2 From ef4650144e76ae361fe4b8c9a0afcd53074cd520 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 13 Oct 2017 15:58:01 -0700 Subject: mm/cma.c: take __GFP_NOWARN into account in cma_alloc() cma_alloc() unconditionally prints an INFO message when the CMA allocation fails. Make this message conditional on the non-presence of __GFP_NOWARN in gfp_mask. This patch aims at removing INFO messages that are displayed when the VC4 driver tries to allocate buffer objects. From the driver perspective an allocation failure is acceptable, and the driver can possibly do something to make following allocation succeed (like flushing the VC4 internal cache). Link: http://lkml.kernel.org/r/20171004125447.15195-1-boris.brezillon@free-electrons.com Signed-off-by: Boris Brezillon Acked-by: Laura Abbott Cc: Jaewon Kim Cc: David Airlie Cc: Daniel Vetter Cc: Eric Anholt Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index c0da318c020e..022e52bd8370 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, trace_cma_alloc(pfn, page, count, align); - if (ret) { + if (ret && !(gfp_mask & __GFP_NOWARN)) { pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n", __func__, count, ret); cma_debug_show_areas(cma); -- cgit v1.2.3-70-g09d2 From b8c8a338f75e052d9fa2fed851259320af412e3f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 13 Oct 2017 15:58:05 -0700 Subject: Revert "vmalloc: back off when the current task is killed" This reverts commits 5d17a73a2ebe ("vmalloc: back off when the current task is killed") and 171012f56127 ("mm: don't warn when vmalloc() fails due to a fatal signal"). Commit 5d17a73a2ebe ("vmalloc: back off when the current task is killed") made all vmalloc allocations from a signal-killed task fail. We have seen crashes in the tty driver from this, where a killed task exiting tries to switch back to N_TTY, fails n_tty_open because of the vmalloc failing, and later crashes when dereferencing tty->disc_data. Arguably, relying on a vmalloc() call to succeed in order to properly exit a task is not the most robust way of doing things. There will be a follow-up patch to the tty code to fall back to the N_NULL ldisc. But the justification to make that vmalloc() call fail like this isn't convincing, either. The patch mentions an OOM victim exhausting the memory reserves and thus deadlocking the machine. But the OOM killer is only one, improbable source of fatal signals. It doesn't make sense to fail allocations preemptively with plenty of memory in most cases. The patch doesn't mention real-life instances where vmalloc sites would exhaust memory, which makes it sound more like a theoretical issue to begin with. But just in case, the OOM access to memory reserves has been restricted on the allocator side in cd04ae1e2dc8 ("mm, oom: do not rely on TIF_MEMDIE for memory reserves access"), which should take care of any theoretical concerns on that front. Revert this patch, and the follow-up that suppresses the allocation warnings when we fail the allocations due to a signal. Link: http://lkml.kernel.org/r/20171004185906.GB2136@cmpxchg.org Fixes: 171012f56127 ("mm: don't warn when vmalloc() fails due to a fatal signal") Signed-off-by: Johannes Weiner Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Alan Cox Cc: Christoph Hellwig Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8a43db6284eb..673942094328 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, for (i = 0; i < area->nr_pages; i++) { struct page *page; - if (fatal_signal_pending(current)) { - area->nr_pages = i; - goto fail_no_warn; - } - if (node == NUMA_NO_NODE) page = alloc_page(alloc_mask|highmem_mask); else @@ -1723,7 +1718,6 @@ fail: warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); -fail_no_warn: vfree(area->addr); return NULL; } -- cgit v1.2.3-70-g09d2 From e65c62b1375cbff69fa925787bcdae4b27bffb48 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 13 Oct 2017 15:58:08 -0700 Subject: tty: fall back to N_NULL if switching to N_TTY fails during hangup We have seen NULL-pointer dereference crashes in tty->disc_data when the N_TTY fallback driver failed to open during hangup. The immediate cause of this open to fail has been addressed in the preceding patch to vmalloc(), but this code could be more robust. As Alan pointed out in commit 8a8dabf2dd68 ("tty: handle the case where we cannot restore a line discipline"), the N_TTY driver, historically the safe fallback that could never fail, can indeed fail, but the surrounding code is not prepared to handle this. To avoid crashes he added a new N_NULL driver to take N_TTY's place as the last resort. Hook that fallback up to the hangup path. Update tty_ldisc_reinit() to reflect the reality that n_tty_open can indeed fail. Link: http://lkml.kernel.org/r/20171004185959.GC2136@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Alan Cox Cc: Christoph Hellwig Cc: Dmitry Vyukov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/tty_ldisc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 2fe216b276e2..84a8ac2a779f 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -694,10 +694,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - if (!WARN_ON(disc == N_TTY)) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; - } + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; } return retval; } @@ -752,8 +750,9 @@ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit) if (tty->ldisc) { if (reinit) { - if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0) - tty_ldisc_reinit(tty, N_TTY); + if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && + tty_ldisc_reinit(tty, N_TTY) < 0) + WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); } else tty_ldisc_kill(tty); } -- cgit v1.2.3-70-g09d2 From e8c97af0c1f23d6ffedcaa3918861f2595e1db62 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Oct 2017 15:58:11 -0700 Subject: linux/kernel.h: add/correct kernel-doc notation Add kernel-doc notation for some macros. Correct kernel-doc comments & typos for a few macros. Link: http://lkml.kernel.org/r/76fa1403-1511-be4c-e9c4-456b43edfad3@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 90 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0ad4c3044cf9..91189bb0c818 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -44,6 +44,12 @@ #define STACK_MAGIC 0xdeadbeef +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) /* @a is a power of 2 value */ @@ -57,6 +63,10 @@ #define READ 0 #define WRITE 1 +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ @@ -76,7 +86,15 @@ #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * FIELD_SIZEOF - get the size of a struct's field + * @t: the target struct + * @f: the target struct's field + * Return: the size of @f in the struct definition without having a + * declared instance of @t. + */ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ @@ -107,7 +125,7 @@ /* * Divide positive or negative dividend by positive or negative divisor * and round to closest integer. Result is undefined for negative - * divisors if he dividend variable type is unsigned and for negative + * divisors if the dividend variable type is unsigned and for negative * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ @@ -247,13 +265,13 @@ extern int _cond_resched(void); * @ep_ro: right open interval endpoint * * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, ep_ro), where the upper interval endpoint is right-open. + * range [0, @ep_ro), where the upper interval endpoint is right-open. * This is useful, e.g. for accessing a index of an array containing - * ep_ro elements, for example. Think of it as sort of modulus, only that + * @ep_ro elements, for example. Think of it as sort of modulus, only that * the result isn't that of modulo. ;) Note that if initial input is a * small value, then result will return 0. * - * Return: a result based on val in interval [0, ep_ro). + * Return: a result based on @val in interval [0, @ep_ro). */ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) { @@ -618,8 +636,8 @@ do { \ * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __trace_printk is an internal function for trace_printk and - * the @ip is passed in via the trace_printk macro. + * Note: __trace_printk is an internal function for trace_printk() and + * the @ip is passed in via the trace_printk() macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -629,7 +647,7 @@ do { \ * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_printk() is used) + * allocated when trace_printk() is used.) * * A little optization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. @@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * the @ip is passed in via the trace_puts macro. * * This is similar to trace_printk() but is made for those really fast - * paths that a developer wants the least amount of "Heisenbug" affects, + * paths that a developer wants the least amount of "Heisenbug" effects, * where the processing of the print format is still too much. * * This function allows a kernel developer to debug fast path sections @@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_puts scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_puts() is used) + * allocated when trace_puts() is used.) * * Returns: 0 if nothing was written, positive # if string was. * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) @@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 min2 = (y); \ (void) (&min1 == &min2); \ min1 < min2 ? min1 : min2; }) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define min(x, y) \ __min(typeof(x), typeof(y), \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 max2 = (y); \ (void) (&max1 == &max2); \ max1 > max2 ? max1 : max2; }) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define max(x, y) \ __max(typeof(x), typeof(y), \ __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ x, y) +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define max3(x, y, z) max((typeof(x))max(x, y), z) /** @@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of lo/hi to make sure they are of the - * same type as val. See the unnecessary pointer comparisons. + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. */ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) @@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * * Or not use min/max/clamp at all, of course. */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define min_t(type, x, y) \ __min(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define max_t(type, x, y) \ __max(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of type - * 'type' to make all the comparisons. + * @type to make all the comparisons. */ #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) @@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of whatever - * type the input argument 'val' is. This is useful when val is an unsigned - * type and min and max are literals that will otherwise be assigned a signed + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed * integer type. */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) -/* - * swap - swap value of @a and @b +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value */ #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) -- cgit v1.2.3-70-g09d2 From f892760aa66a2d657deaf59538fb69433036767c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 13 Oct 2017 15:58:15 -0700 Subject: fs/mpage.c: fix mpage_writepage() for pages with buffers When using FAT on a block device which supports rw_page, we can hit BUG_ON(!PageLocked(page)) in try_to_free_buffers(). This is because we call clean_buffers() after unlocking the page we've written. Introduce a new clean_page_buffers() which cleans all buffers associated with a page and call it from within bdev_write_page(). [akpm@linux-foundation.org: s/PAGE_SIZE/~0U/ per Linus and Matthew] Link: http://lkml.kernel.org/r/20171006211541.GA7409@bombadil.infradead.org Signed-off-by: Matthew Wilcox Reported-by: Toshi Kani Reported-by: OGAWA Hirofumi Tested-by: Toshi Kani Acked-by: Johannes Thumshirn Cc: Ross Zwisler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 6 ++++-- fs/mpage.c | 14 +++++++++++--- include/linux/buffer_head.h | 1 + 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 93d088ffc05c..789f55e851ae 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true); - if (result) + if (result) { end_page_writeback(page); - else + } else { + clean_page_buffers(page); unlock_page(page); + } blk_queue_exit(bdev->bd_queue); return result; } diff --git a/fs/mpage.c b/fs/mpage.c index 37bb77c1302c..c991faec70b9 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped) try_to_free_buffers(page); } +/* + * For situations where we want to clean all buffers attached to a page. + * We don't need to calculate how many buffers are attached to the page, + * we just need to specify a number larger than the maximum number of buffers. + */ +void clean_page_buffers(struct page *page) +{ + clean_buffers(page, ~0U); +} + static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { @@ -605,10 +615,8 @@ alloc_new: if (bio == NULL) { if (first_unmapped == blocks_per_page) { if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9), - page, wbc)) { - clean_buffers(page, first_unmapped); + page, wbc)) goto out; - } } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c8dae555eccf..446b24cac67d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); +void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); -- cgit v1.2.3-70-g09d2 From 7e86600606cef21beec725039d70377fb364f881 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 13 Oct 2017 15:58:18 -0700 Subject: fs/binfmt_misc.c: node could be NULL when evicting inode inode->i_private is assigned by a Node pointer only after registering a new binary format, so it could be NULL if inode was created by bm_fill_super() (or iput() was called by the error path in bm_register_write()), and this could result in NULL pointer dereference when evicting such an inode. e.g. mount binfmt_misc filesystem then umount it immediately: mount -t binfmt_misc binfmt_misc /proc/sys/fs/binfmt_misc umount /proc/sys/fs/binfmt_misc will result in BUG: unable to handle kernel NULL pointer dereference at 0000000000000013 IP: bm_evict_inode+0x16/0x40 [binfmt_misc] ... Call Trace: evict+0xd3/0x1a0 iput+0x17d/0x1d0 dentry_unlink_inode+0xb9/0xf0 __dentry_kill+0xc7/0x170 shrink_dentry_list+0x122/0x280 shrink_dcache_parent+0x39/0x90 do_one_tree+0x12/0x40 shrink_dcache_for_umount+0x2d/0x90 generic_shutdown_super+0x1f/0x120 kill_litter_super+0x29/0x40 deactivate_locked_super+0x43/0x70 deactivate_super+0x45/0x60 cleanup_mnt+0x3f/0x70 __cleanup_mnt+0x12/0x20 task_work_run+0x86/0xa0 exit_to_usermode_loop+0x6d/0x99 syscall_return_slowpath+0xba/0xf0 entry_SYSCALL_64_fastpath+0xa3/0xa Fix it by making sure Node (e) is not NULL. Link: http://lkml.kernel.org/r/20171010100642.31786-1-eguan@redhat.com Fixes: 83f918274e4b ("exec: binfmt_misc: shift filp_close(interp_file) from kill_node() to bm_evict_inode()") Signed-off-by: Eryu Guan Acked-by: Oleg Nesterov Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 2a46762def31..a7c5a9861bef 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode) { Node *e = inode->i_private; - if (e->flags & MISC_FMT_OPEN_FILE) + if (e && e->flags & MISC_FMT_OPEN_FILE) filp_close(e->interp_file, NULL); clear_inode(inode); -- cgit v1.2.3-70-g09d2 From ca182551857cc2c1e6a2b7f1e72090a137a15008 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 13 Oct 2017 15:58:22 -0700 Subject: kmemleak: clear stale pointers from task stacks Kmemleak considers any pointers on task stacks as references. This patch clears newly allocated and reused vmap stacks. Link: http://lkml.kernel.org/r/150728990124.744199.8403409836394318684.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 2 +- kernel/fork.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 905d769d8ddc..5f7eeab990fe 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,7 +42,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#ifdef CONFIG_DEBUG_STACK_USAGE +#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ __GFP_ZERO) #else diff --git a/kernel/fork.c b/kernel/fork.c index e702cb9ffbd8..07cc743698d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -215,6 +215,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; +#ifdef CONFIG_DEBUG_KMEMLEAK + /* Clear stale pointers from reused stack. */ + memset(s->addr, 0, THREAD_SIZE); +#endif tsk->stack_vm_area = s; return s->addr; } -- cgit v1.2.3-70-g09d2 From a7b100953aa33a5bbdc3e5e7f2241b9c0704606e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Oct 2017 15:58:25 -0700 Subject: mm: page_vma_mapped: ensure pmd is loaded with READ_ONCE outside of lock Loading the pmd without holding the pmd_lock exposes us to races with concurrent updaters of the page tables but, worse still, it also allows the compiler to cache the pmd value in a register and reuse it later on, even if we've performed a READ_ONCE in between and seen a more recent value. In the case of page_vma_mapped_walk, this leads to the following crash when the pmd loaded for the initial pmd_trans_huge check is all zeroes and a subsequent valid table entry is loaded by check_pmd. We then proceed into map_pte, but the compiler re-uses the zero entry inside pte_offset_map, resulting in a junk pointer being installed in pvmw->pte: PC is at check_pte+0x20/0x170 LR is at page_vma_mapped_walk+0x2e0/0x540 [...] Process doio (pid: 2463, stack limit = 0xffff00000f2e8000) Call trace: check_pte+0x20/0x170 page_vma_mapped_walk+0x2e0/0x540 page_mkclean_one+0xac/0x278 rmap_walk_file+0xf0/0x238 rmap_walk+0x64/0xa0 page_mkclean+0x90/0xa8 clear_page_dirty_for_io+0x84/0x2a8 mpage_submit_page+0x34/0x98 mpage_process_page_bufs+0x164/0x170 mpage_prepare_extent_to_map+0x134/0x2b8 ext4_writepages+0x484/0xe30 do_writepages+0x44/0xe8 __filemap_fdatawrite_range+0xbc/0x110 file_write_and_wait_range+0x48/0xd8 ext4_sync_file+0x80/0x4b8 vfs_fsync_range+0x64/0xc0 SyS_msync+0x194/0x1e8 This patch fixes the problem by ensuring that READ_ONCE is used before the initial checks on the pmd, and this value is subsequently used when checking whether or not the pmd is present. pmd_check is removed and the pmd_present check is inlined directly. Link: http://lkml.kernel.org/r/1507222630-5839-1-git-send-email-will.deacon@arm.com Fixes: f27176cfc363 ("mm: convert page_mkclean_one() to use page_vma_mapped_walk()") Signed-off-by: Will Deacon Tested-by: Yury Norov Tested-by: Richard Ruigrok Acked-by: Kirill A. Shutemov Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_vma_mapped.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index eb462e7db0a9..53afbb919a1c 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -6,17 +6,6 @@ #include "internal.h" -static inline bool check_pmd(struct page_vma_mapped_walk *pvmw) -{ - pmd_t pmde; - /* - * Make sure we don't re-load pmd between present and !trans_huge check. - * We need a consistent view. - */ - pmde = READ_ONCE(*pvmw->pmd); - return pmd_present(pmde) && !pmd_trans_huge(pmde); -} - static inline bool not_found(struct page_vma_mapped_walk *pvmw) { page_vma_mapped_walk_done(pvmw); @@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) pgd_t *pgd; p4d_t *p4d; pud_t *pud; + pmd_t pmde; /* The only possible pmd mapping has been handled on last iteration */ if (pvmw->pmd && !pvmw->pte) @@ -148,7 +138,13 @@ restart: if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); - if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { + /* + * Make sure the pmd value isn't cached in a register by the + * compiler and used as a stale value after we've observed a + * subsequent update. + */ + pmde = READ_ONCE(*pvmw->pmd); + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); if (likely(pmd_trans_huge(*pvmw->pmd))) { if (pvmw->flags & PVMW_MIGRATION) @@ -174,9 +170,8 @@ restart: spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } - } else { - if (!check_pmd(pvmw)) - return false; + } else if (!pmd_present(pmde)) { + return false; } if (!map_pte(pvmw)) goto next_pte; -- cgit v1.2.3-70-g09d2 From 61b639723be5a9fc4812d5d85cb769589afa5a38 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 13 Oct 2017 15:58:29 -0700 Subject: mm, swap: use page-cluster as max window of VMA based swap readahead When the VMA based swap readahead was introduced, a new knob /sys/kernel/mm/swap/vma_ra_max_order was added as the max window of VMA swap readahead. This is to make it possible to use different max window for VMA based readahead and original physical readahead. But Minchan Kim pointed out that this will cause a regression because setting page-cluster sysctl to zero cannot disable swap readahead with the change. To fix the regression, the page-cluster sysctl is used as the max window of both the VMA based swap readahead and original physical swap readahead. If more fine grained control is needed in the future, more knobs can be added as the subordinate knobs of the page-cluster sysctl. The vma_ra_max_order knob is deleted. Because the knob was introduced in v4.14-rc1, and this patch is targeting being merged before v4.14 releasing, there should be no existing users of this newly added ABI. Link: http://lkml.kernel.org/r/20171011070847.16003-1-ying.huang@intel.com Fixes: ec560175c0b6fce ("mm, swap: VMA based swap readahead") Signed-off-by: "Huang, Ying" Reported-by: Minchan Kim Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Rik van Riel Cc: Shaohua Li Cc: Hugh Dickins Cc: Fengguang Wu Cc: Tim Chen Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-kernel-mm-swap | 10 ------- mm/swap_state.c | 41 +++++--------------------- 2 files changed, 7 insertions(+), 44 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap index 587db52084c7..94672016c268 100644 --- a/Documentation/ABI/testing/sysfs-kernel-mm-swap +++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap @@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead. still used for tmpfs etc. other users. If set to false, the global swap readahead algorithm will be used for all swappable pages. - -What: /sys/kernel/mm/swap/vma_ra_max_order -Date: August 2017 -Contact: Linux memory management mailing list -Description: The max readahead size in order for VMA based swap readahead - - VMA based swap readahead algorithm will readahead at - most 1 << max_order pages for each readahead. The - real readahead size for each readahead will be scaled - according to the estimation algorithm. diff --git a/mm/swap_state.c b/mm/swap_state.c index ed91091d1e68..05b6803f0cce 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES]; static unsigned int nr_swapper_spaces[MAX_SWAPFILES]; bool swap_vma_readahead = true; -#define SWAP_RA_MAX_ORDER_DEFAULT 3 - -static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT; - #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK @@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, pte_t *tpte; #endif + max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), + SWAP_RA_ORDER_CEILING); + if (max_win == 1) { + swap_ra->win = 1; + return NULL; + } + faddr = vmf->address; entry = pte_to_swp_entry(vmf->orig_pte); if ((unlikely(non_swap_entry(entry)))) @@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, if (page) return page; - max_win = 1 << READ_ONCE(swap_ra_max_order); - if (max_win == 1) { - swap_ra->win = 1; - return NULL; - } - fpfn = PFN_DOWN(faddr); swap_ra_info = GET_SWAP_RA_VAL(vma); pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); @@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr = __ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show, vma_ra_enabled_store); -static ssize_t vma_ra_max_order_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", swap_ra_max_order); -} -static ssize_t vma_ra_max_order_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - int err, v; - - err = kstrtoint(buf, 10, &v); - if (err || v > SWAP_RA_ORDER_CEILING || v <= 0) - return -EINVAL; - - swap_ra_max_order = v; - - return count; -} -static struct kobj_attribute vma_ra_max_order_attr = - __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show, - vma_ra_max_order_store); - static struct attribute *swap_attrs[] = { &vma_ra_enabled_attr.attr, - &vma_ra_max_order_attr.attr, NULL, }; -- cgit v1.2.3-70-g09d2 From ac64115a66c18c01745bbd3c47a36b124e5fd8c0 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Thu, 14 Sep 2017 23:56:25 +0200 Subject: KVM: PPC: Fix oops when checking KVM_CAP_PPC_HTM The following program causes a kernel oops: #include #include #include #include #include main() { int fd = open("/dev/kvm", O_RDWR); ioctl(fd, KVM_CHECK_EXTENSION, KVM_CAP_PPC_HTM); } This happens because when using the global KVM fd with KVM_CHECK_EXTENSION, kvm_vm_ioctl_check_extension() gets called with a NULL kvm argument, which gets dereferenced in is_kvmppc_hv_enabled(). Spotted while reading the code. Let's use the hv_enabled fallback variable, like everywhere else in this function. Fixes: 23528bb21ee2 ("KVM: PPC: Introduce KVM_CAP_PPC_HTM") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Greg Kurz Reviewed-by: David Gibson Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3480faaf1ef8..ee279c7f4802 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -644,8 +644,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_PPC_HTM: - r = cpu_has_feature(CPU_FTR_TM_COMP) && - is_kvmppc_hv_enabled(kvm); + r = cpu_has_feature(CPU_FTR_TM_COMP) && hv_enabled; break; default: r = 0; -- cgit v1.2.3-70-g09d2 From 2cde3716321ec64a1faeaf567bd94100c7b4160f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 10 Oct 2017 20:18:28 +1000 Subject: KVM: PPC: Book3S HV: POWER9 more doorbell fixes - Add another case where msgsync is required. - Required barrier sequence for global doorbells is msgsync ; lwsync When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index ec69fa45d5a2..c700bedccaab 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1310,6 +1310,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) bne 3f BEGIN_FTR_SECTION PPC_MSGSYNC + lwsync END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 @@ -2788,6 +2789,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) PPC_MSGCLR(6) /* see if it's a host IPI */ li r3, 1 +BEGIN_FTR_SECTION + PPC_MSGSYNC + lwsync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bnelr -- cgit v1.2.3-70-g09d2 From 8f6a9f0d0604817f7c8d4376fd51718f1bf192ee Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 11 Oct 2017 16:00:34 +1100 Subject: KVM: PPC: Book3S: Protect kvmppc_gpa_to_ua() with SRCU kvmppc_gpa_to_ua() accesses KVM memory slot array via srcu_dereference_check() and this produces warnings from RCU like below. This extends the existing srcu_read_lock/unlock to cover that kvmppc_gpa_to_ua() as well. We did not hit this before as this lock is not needed for the realmode handlers and hash guests would use the realmode path all the time; however the radix guests are always redirected to the virtual mode handlers and hence the warning. [ 68.253798] ./include/linux/kvm_host.h:575 suspicious rcu_dereference_check() usage! [ 68.253799] other info that might help us debug this: [ 68.253802] rcu_scheduler_active = 2, debug_locks = 1 [ 68.253804] 1 lock held by qemu-system-ppc/6413: [ 68.253806] #0: (&vcpu->mutex){+.+.}, at: [] vcpu_load+0x3c/0xc0 [kvm] [ 68.253826] stack backtrace: [ 68.253830] CPU: 92 PID: 6413 Comm: qemu-system-ppc Tainted: G W 4.14.0-rc3-00553-g432dcba58e9c-dirty #72 [ 68.253833] Call Trace: [ 68.253839] [c000000fd3d9f790] [c000000000b7fcc8] dump_stack+0xe8/0x160 (unreliable) [ 68.253845] [c000000fd3d9f7d0] [c0000000001924c0] lockdep_rcu_suspicious+0x110/0x180 [ 68.253851] [c000000fd3d9f850] [c0000000000e825c] kvmppc_gpa_to_ua+0x26c/0x2b0 [ 68.253858] [c000000fd3d9f8b0] [c00800000e3e1984] kvmppc_h_put_tce+0x12c/0x2a0 [kvm] Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8f2da8bba737..4dffa611376d 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -478,28 +478,30 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); + + idx = srcu_read_lock(&vcpu->kvm->srcu); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, - tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) - return H_PARAMETER; + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) { + ret = H_PARAMETER; + goto unlock_exit; + } entry = ioba >> stt->page_shift; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { - if (dir == DMA_NONE) { + if (dir == DMA_NONE) ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stit->tbl, entry); - } else { - idx = srcu_read_lock(&vcpu->kvm->srcu); + else ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, entry, ua, dir); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - } if (ret == H_SUCCESS) continue; if (ret == H_TOO_HARD) - return ret; + goto unlock_exit; WARN_ON_ONCE(1); kvmppc_clear_tce(stit->tbl, entry); @@ -507,7 +509,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, kvmppc_tce_put(stt, entry, tce); - return H_SUCCESS; +unlock_exit: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); -- cgit v1.2.3-70-g09d2 From b956575bed91ecfb136a8300742ecbbf451471ab Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 9 Oct 2017 09:50:49 -0700 Subject: x86/mm: Flush more aggressively in lazy TLB mode Since commit: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking") x86's lazy TLB mode has been all the way lazy: when running a kernel thread (including the idle thread), the kernel keeps using the last user mm's page tables without attempting to maintain user TLB coherence at all. From a pure semantic perspective, this is fine -- kernel threads won't attempt to access user pages, so having stale TLB entries doesn't matter. Unfortunately, I forgot about a subtlety. By skipping TLB flushes, we also allow any paging-structure caches that may exist on the CPU to become incoherent. This means that we can have a paging-structure cache entry that references a freed page table, and the CPU is within its rights to do a speculative page walk starting at the freed page table. I can imagine this causing two different problems: - A speculative page walk starting from a bogus page table could read IO addresses. I haven't seen any reports of this causing problems. - A speculative page walk that involves a bogus page table can install garbage in the TLB. Such garbage would always be at a user VA, but some AMD CPUs have logic that triggers a machine check when it notices these bogus entries. I've seen a couple reports of this. Boris further explains the failure mode: > It is actually more of an optimization which assumes that paging-structure > entries are in WB DRAM: > > "TlbCacheDis: cacheable memory disable. Read-write. 0=Enables > performance optimization that assumes PML4, PDP, PDE, and PTE entries > are in cacheable WB-DRAM; memory type checks may be bypassed, and > addresses outside of WB-DRAM may result in undefined behavior or NB > protocol errors. 1=Disables performance optimization and allows PML4, > PDP, PDE and PTE entries to be in any memory type. Operating systems > that maintain page tables in memory types other than WB- DRAM must set > TlbCacheDis to insure proper operation." > > The MCE generated is an NB protocol error to signal that > > "Link: A specific coherent-only packet from a CPU was issued to an > IO link. This may be caused by software which addresses page table > structures in a memory type other than cacheable WB-DRAM without > properly configuring MSRC001_0015[TlbCacheDis]. This may occur, for > example, when page table structure addresses are above top of memory. In > such cases, the NB will generate an MCE if it sees a mismatch between > the memory operation generated by the core and the link type." > > I'm assuming coherent-only packets don't go out on IO links, thus the > error. To fix this, reinstate TLB coherence in lazy mode. With this patch applied, we do it in one of two ways: - If we have PCID, we simply switch back to init_mm's page tables when we enter a kernel thread -- this seems to be quite cheap except for the cost of serializing the CPU. - If we don't have PCID, then we set a flag and switch to init_mm the first time we would otherwise need to flush the TLB. The /sys/kernel/debug/x86/tlb_use_lazy_mode debug switch can be changed to override the default mode for benchmarking. In theory, we could optimize this better by only flushing the TLB in lazy CPUs when a page table is freed. Doing that would require auditing the mm code to make sure that all page table freeing goes through tlb_remove_page() as well as reworking some data structures to implement the improved flush logic. Reported-by: Markus Trippelsdorf Reported-by: Adam Borowski Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Borkmann Cc: Eric Biggers Cc: Johannes Hirte Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Roman Kagan Cc: Thomas Gleixner Fixes: 94b1b03b519b ("x86/mm: Rework lazy TLB mode and TLB freshness tracking") Link: http://lkml.kernel.org/r/20171009170231.fkpraqokz6e4zeco@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 8 +- arch/x86/include/asm/tlbflush.h | 24 ++++++ arch/x86/mm/tlb.c | 153 +++++++++++++++++++++++++++---------- 3 files changed, 136 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c120b5db178a..3c856a15b98e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) DEBUG_LOCKS_WARN_ON(preemptible()); } -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (cpumask_test_cpu(cpu, mm_cpumask(mm))) - cpumask_clear_cpu(cpu, mm_cpumask(mm)); -} +void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 4893abf7f74f..d362161d3291 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #endif +/* + * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point + * to init_mm when we switch to a kernel thread (e.g. the idle thread). If + * it's false, then we immediately switch CR3 when entering a kernel thread. + */ +DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode); + /* * 6 because 6 should be plenty and struct tlb_state will fit in * two cache lines. @@ -104,6 +111,23 @@ struct tlb_state { u16 loaded_mm_asid; u16 next_asid; + /* + * We can be in one of several states: + * + * - Actively using an mm. Our CPU's bit will be set in + * mm_cpumask(loaded_mm) and is_lazy == false; + * + * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit + * will not be set in mm_cpumask(&init_mm) and is_lazy == false. + * + * - Lazily using a real mm. loaded_mm != &init_mm, our bit + * is set in mm_cpumask(loaded_mm), but is_lazy == true. + * We're heuristically guessing that the CR3 load we + * skipped more than makes up for the overhead added by + * lazy mode. + */ + bool is_lazy; + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 49d9778376d7..658bf0090565 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,6 +30,8 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); +DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode); + static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) { @@ -80,7 +82,7 @@ void leave_mm(int cpu) return; /* Warn if we're not lazy. */ - WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))); + WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy)); switch_mm(NULL, &init_mm, NULL); } @@ -142,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, __flush_tlb_all(); } #endif + this_cpu_write(cpu_tlbstate.is_lazy, false); if (real_prev == next) { VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); - if (cpumask_test_cpu(cpu, mm_cpumask(next))) { - /* - * There's nothing to do: we weren't lazy, and we - * aren't changing our mm. We don't need to flush - * anything, nor do we need to update CR3, CR4, or - * LDTR. - */ - return; - } - - /* Resume remote flushes and then read tlb_gen. */ - cpumask_set_cpu(cpu, mm_cpumask(next)); - next_tlb_gen = atomic64_read(&next->context.tlb_gen); - - if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) < - next_tlb_gen) { - /* - * Ideally, we'd have a flush_tlb() variant that - * takes the known CR3 value as input. This would - * be faster on Xen PV and on hypothetical CPUs - * on which INVPCID is fast. - */ - this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen, - next_tlb_gen); - write_cr3(build_cr3(next, prev_asid)); - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, - TLB_FLUSH_ALL); - } - /* - * We just exited lazy mode, which means that CR4 and/or LDTR - * may be stale. (Changes to the required CR4 and LDTR states - * are not reflected in tlb_gen.) + * We don't currently support having a real mm loaded without + * our cpu set in mm_cpumask(). We have all the bookkeeping + * in place to figure out whether we would need to flush + * if our cpu were cleared in mm_cpumask(), but we don't + * currently use it. */ + if (WARN_ON_ONCE(real_prev != &init_mm && + !cpumask_test_cpu(cpu, mm_cpumask(next)))) + cpumask_set_cpu(cpu, mm_cpumask(next)); + + return; } else { u16 new_asid; bool need_flush; @@ -199,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } /* Stop remote flushes for the previous mm */ - if (cpumask_test_cpu(cpu, mm_cpumask(real_prev))) - cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); - - VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && + real_prev != &init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); /* * Start remote flushes and then read tlb_gen. @@ -232,6 +212,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, switch_ldt(real_prev, next); } +/* + * enter_lazy_tlb() is a hint from the scheduler that we are entering a + * kernel thread or other context without an mm. Acceptable implementations + * include doing nothing whatsoever, switching to init_mm, or various clever + * lazy tricks to try to minimize TLB flushes. + * + * The scheduler reserves the right to call enter_lazy_tlb() several times + * in a row. It will notify us that we're going back to a real mm by + * calling switch_mm_irqs_off(). + */ +void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) + return; + + if (static_branch_unlikely(&tlb_use_lazy_mode)) { + /* + * There's a significant optimization that may be possible + * here. We have accurate enough TLB flush tracking that we + * don't need to maintain coherence of TLB per se when we're + * lazy. We do, however, need to maintain coherence of + * paging-structure caches. We could, in principle, leave our + * old mm loaded and only switch to init_mm when + * tlb_remove_page() happens. + */ + this_cpu_write(cpu_tlbstate.is_lazy, true); + } else { + switch_mm(NULL, &init_mm, NULL); + } +} + /* * Call this when reinitializing a CPU. It fixes the following potential * problems: @@ -303,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, /* This code cannot presently handle being reentered. */ VM_WARN_ON(!irqs_disabled()); + if (unlikely(loaded_mm == &init_mm)) + return; + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) != loaded_mm->context.ctx_id); - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) { + if (this_cpu_read(cpu_tlbstate.is_lazy)) { /* - * We're in lazy mode -- don't flush. We can get here on - * remote flushes due to races and on local flushes if a - * kernel thread coincidentally flushes the mm it's lazily - * still using. + * We're in lazy mode. We need to at least flush our + * paging-structure cache to avoid speculatively reading + * garbage into our TLB. Since switching to init_mm is barely + * slower than a minimal flush, just switch to init_mm. */ + switch_mm_irqs_off(NULL, &init_mm, NULL); return; } @@ -611,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); + +static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[2]; + + buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0'; + buf[1] = '\n'; + + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t tlblazy_write_file(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + bool val; + + if (kstrtobool_from_user(user_buf, count, &val)) + return -EINVAL; + + if (val) + static_branch_enable(&tlb_use_lazy_mode); + else + static_branch_disable(&tlb_use_lazy_mode); + + return count; +} + +static const struct file_operations fops_tlblazy = { + .read = tlblazy_read_file, + .write = tlblazy_write_file, + .llseek = default_llseek, +}; + +static int __init init_tlb_use_lazy_mode(void) +{ + if (boot_cpu_has(X86_FEATURE_PCID)) { + /* + * Heuristic: with PCID on, switching to and from + * init_mm is reasonably fast, but remote flush IPIs + * as expensive as ever, so turn off lazy TLB mode. + * + * We can't do this in setup_pcid() because static keys + * haven't been initialized yet, and it would blow up + * badly. + */ + static_branch_disable(&tlb_use_lazy_mode); + } + + debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR, + arch_debugfs_dir, NULL, &fops_tlblazy); + return 0; +} +late_initcall(init_tlb_use_lazy_mode); -- cgit v1.2.3-70-g09d2 From b483cf3bc249d7af706390efa63d6671e80d1c09 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Oct 2017 09:26:59 +0200 Subject: locking/lockdep: Disable cross-release features for now Johan Hovold reported a big lockdep slowdown on his system, caused by lockdep: > I had noticed that the BeagleBone Black boot time appeared to have > increased significantly with 4.14 and yesterday I finally had time to > investigate it. > > Boot time (from "Linux version" to login prompt) had in fact doubled > since 4.13 where it took 17 seconds (with my current config) compared to > the 35 seconds I now see with 4.14-rc4. > > I quick bisect pointed to lockdep and specifically the following commit: > > 28a903f63ec0 ("locking/lockdep: Handle non(or multi)-acquisition of a crosslock") Because the final v4.14 release is close, disable the cross-release lockdep features for now. Bisected-by: Johan Hovold Debugged-by: Johan Hovold Reported-by: Johan Hovold Cc: Arnd Bergmann Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Lindgren Cc: kernel-team@lge.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mm@kvack.org Cc: linux-omap@vger.kernel.org Link: http://lkml.kernel.org/r/20171014072659.f2yr6mhm5ha3eou7@gmail.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2689b7c50c52..e2705843c524 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1092,8 +1092,8 @@ config PROVE_LOCKING select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC - select LOCKDEP_CROSSRELEASE - select LOCKDEP_COMPLETIONS + select LOCKDEP_CROSSRELEASE if BROKEN + select LOCKDEP_COMPLETIONS if BROKEN select TRACE_IRQFLAGS default n help -- cgit v1.2.3-70-g09d2 From 1f161f67a272cc4f29f27934dd3f74cb657eb5c4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Oct 2017 13:23:16 +0200 Subject: x86/microcode: Do the family check first On CPUs like AMD's Geode, for example, we shouldn't even try to load microcode because they do not support the modern microcode loading interface. However, we do the family check *after* the other checks whether the loader has been disabled on the command line or whether we're running in a guest. So move the family checks first in order to exit early if we're being loaded on an unsupported family. Reported-and-tested-by: Sven Glodowski Signed-off-by: Borislav Petkov Cc: # 4.11.. Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://bugzilla.suse.com/show_bug.cgi?id=1061396 Link: http://lkml.kernel.org/r/20171012112316.977-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/core.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 86e8f0b2537b..c4fa4a85d4cb 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (!have_cpuid_p()) - return *res; - /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not * completely accurate as xen pv guests don't see that CPUID bit set but @@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { unsigned int cpuid_1_eax; + bool intel = true; - if (check_loader_disabled_bsp()) + if (!have_cpuid_p()) return; cpuid_1_eax = native_cpuid_eax(1); switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (x86_family(cpuid_1_eax) >= 6) - load_ucode_intel_bsp(); + if (x86_family(cpuid_1_eax) < 6) + return; break; + case X86_VENDOR_AMD: - if (x86_family(cpuid_1_eax) >= 0x10) - load_ucode_amd_bsp(cpuid_1_eax); + if (x86_family(cpuid_1_eax) < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(cpuid_1_eax); } static bool check_loader_disabled_ap(void) -- cgit v1.2.3-70-g09d2 From 1c86c9dd82f859b474474a7fee0d5195da2c9c1d Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 9 Oct 2017 11:43:44 -0700 Subject: ARM: dts: imx7d: Invert legacy PCI irq mapping According to i.MX7D reference manual (Rev. 0.1, table 7-1, page 1221) legacy PCI interrupt mapping is as follows: - PCIE INT A is IRQ 122 - PCIE INT B is IRQ 123 - PCIE INT C is IRQ 124 - PCIE INT D is IRQ 125 Invert the mapping information in corresponding DT node to reflect that. Cc: yurovsky@gmail.com Cc: Fabio Estevam Cc: Rob Herring Cc: Mark Rutland Signed-off-by: Andrey Smirnov Fixes: a816d5750edf ("ARM: dts: imx7d: Add node for PCIe controller") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index f46814a7ea44..4d308d17f040 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -144,10 +144,10 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &intc GIC_SPI 124 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &intc GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_PCIE_CTRL_ROOT_CLK>, <&clks IMX7D_PLL_ENET_MAIN_100M_CLK>, <&clks IMX7D_PCIE_PHY_ROOT_CLK>; -- cgit v1.2.3-70-g09d2 From ca4c302398963c0cae29bc168e44cf91e40ff0d3 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Wed, 11 Oct 2017 14:21:14 +0300 Subject: iio: adc: at91-sama5d2_adc: fix probe error on missing trigger property This fix allows platforms to probe correctly even if the trigger edge property is missing. The hardware trigger will no longer be registered in the sybsystem Preserves backwards compatibility with the support that was in the driver before the hardware trigger. https://storage.kernelci.org/mainline/master/v4.14-rc2-255-g74d83ec2b734/arm/sama5_defconfig/lab-free-electrons/boot-at91-sama5d2_xplained.txt Signed-off-by: Eugen Hristev Fixes: 5e1a1da0f ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 45 ++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index bc5b38e3a147..a70ef7fec95f 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -225,6 +225,7 @@ struct at91_adc_trigger { char *name; unsigned int trgmod_value; unsigned int edge_type; + bool hw_trig; }; struct at91_adc_state { @@ -254,16 +255,25 @@ static const struct at91_adc_trigger at91_adc_trigger_list[] = { .name = "external_rising", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_RISE, .edge_type = IRQ_TYPE_EDGE_RISING, + .hw_trig = true, }, { .name = "external_falling", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_FALL, .edge_type = IRQ_TYPE_EDGE_FALLING, + .hw_trig = true, }, { .name = "external_any", .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_EXT_TRIG_ANY, .edge_type = IRQ_TYPE_EDGE_BOTH, + .hw_trig = true, + }, + { + .name = "software", + .trgmod_value = AT91_SAMA5D2_TRGR_TRGMOD_NO_TRIGGER, + .edge_type = IRQ_TYPE_NONE, + .hw_trig = false, }, }; @@ -597,7 +607,7 @@ static int at91_adc_probe(struct platform_device *pdev) struct at91_adc_state *st; struct resource *res; int ret, i; - u32 edge_type; + u32 edge_type = IRQ_TYPE_NONE; indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*st)); if (!indio_dev) @@ -641,14 +651,14 @@ static int at91_adc_probe(struct platform_device *pdev) ret = of_property_read_u32(pdev->dev.of_node, "atmel,trigger-edge-type", &edge_type); if (ret) { - dev_err(&pdev->dev, - "invalid or missing value for atmel,trigger-edge-type\n"); - return ret; + dev_dbg(&pdev->dev, + "atmel,trigger-edge-type not specified, only software trigger available\n"); } st->selected_trig = NULL; - for (i = 0; i < AT91_SAMA5D2_HW_TRIG_CNT; i++) + /* find the right trigger, or no trigger at all */ + for (i = 0; i < AT91_SAMA5D2_HW_TRIG_CNT + 1; i++) if (at91_adc_trigger_list[i].edge_type == edge_type) { st->selected_trig = &at91_adc_trigger_list[i]; break; @@ -717,24 +727,27 @@ static int at91_adc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, indio_dev); - ret = at91_adc_buffer_init(indio_dev); - if (ret < 0) { - dev_err(&pdev->dev, "couldn't initialize the buffer.\n"); - goto per_clk_disable_unprepare; - } + if (st->selected_trig->hw_trig) { + ret = at91_adc_buffer_init(indio_dev); + if (ret < 0) { + dev_err(&pdev->dev, "couldn't initialize the buffer.\n"); + goto per_clk_disable_unprepare; + } - ret = at91_adc_trigger_init(indio_dev); - if (ret < 0) { - dev_err(&pdev->dev, "couldn't setup the triggers.\n"); - goto per_clk_disable_unprepare; + ret = at91_adc_trigger_init(indio_dev); + if (ret < 0) { + dev_err(&pdev->dev, "couldn't setup the triggers.\n"); + goto per_clk_disable_unprepare; + } } ret = iio_device_register(indio_dev); if (ret < 0) goto per_clk_disable_unprepare; - dev_info(&pdev->dev, "setting up trigger as %s\n", - st->selected_trig->name); + if (st->selected_trig->hw_trig) + dev_info(&pdev->dev, "setting up trigger as %s\n", + st->selected_trig->name); dev_info(&pdev->dev, "version: %x\n", readl_relaxed(st->base + AT91_SAMA5D2_VERSION)); -- cgit v1.2.3-70-g09d2 From 09001b03f722be96827bf8df5ba4d48b7ec0cc30 Mon Sep 17 00:00:00 2001 From: Wenhua Shi Date: Sat, 14 Oct 2017 18:51:36 +0200 Subject: net: fix typo in skbuff.c Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16982de649b9..e62476beee95 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1896,7 +1896,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) } /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, + * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. -- cgit v1.2.3-70-g09d2 From 5903f594935a3841137c86b9d5b75143a5b7121c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 13 Oct 2017 19:22:35 +0200 Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl() When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(), the session may be unconnected. That is, it was created by pppol2tp_session_create() and hasn't been connected with pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check for this case in order to avoid dereferencing a NULL pointer. Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index bc6e8bfc5be4..f50452b919d5 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -988,6 +988,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session, session->name, cmd, arg); sk = ps->sock; + if (!sk) + return -EBADR; + sock_hold(sk); switch (cmd) { -- cgit v1.2.3-70-g09d2 From 3efc93c2bc243f940beb3324f67aa14e223abdd1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 13 Oct 2017 13:39:22 -0400 Subject: net: dsa: mv88e6060: fix switch MAC address The 88E6060 Ethernet switch always transmits the multicast bit of the switch MAC address as a zero. It re-uses the corresponding bit 8 of the register "Switch MAC Address Register Bytes 0 & 1" for "DiffAddr". If the "DiffAddr" bit is 0, then all ports transmit the same source address. If it is set to 1, then bit 2:0 are used for the port number. The mv88e6060 driver is currently wrongly shifting the MAC address byte 0 by 9. To fix this, shift it by 8 as usual and clear its bit 0. Signed-off-by: Vivien Didelot Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index dce7fa57eb55..f123ed57630d 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -214,8 +214,14 @@ static int mv88e6060_setup(struct dsa_switch *ds) static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) { - /* Use the same MAC Address as FD Pause frames for all ports */ - REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]); + u16 val = addr[0] << 8 | addr[1]; + + /* The multicast bit is always transmitted as a zero, so the switch uses + * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA. + */ + val &= 0xfeff; + + REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); -- cgit v1.2.3-70-g09d2 From c213eae8d3cd4c026f348ce4fd64f4754b3acf2b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:29 -0400 Subject: bnxt_en: Improve VF/PF link change logic. Link status query firmware messages originating from the VFs are forwarded to the PF. The driver handles these interactions in a workqueue for the VF and PF. The VF driver waits for the response from the PF in the workqueue. If the PF and VF driver are running on the same host and the work for both PF and VF are queued on the same workqueue, the VF driver may not get the response if the PF work item is queued behind it on the same workqueue. This will lead to the VF link query message timing out. To prevent this, we create a private workqueue for PFs instead of using the common workqueue. The VF query and PF response will never be on the same workqueue. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 66 +++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aacec8bc19d5..7906153c5c05 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -214,6 +214,8 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE, }; +static struct workqueue_struct *bnxt_pf_wq; + static bool bnxt_vf_pciid(enum board_idx idx) { return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); @@ -1024,12 +1026,28 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_napi *bnapi, return 0; } +static void bnxt_queue_sp_work(struct bnxt *bp) +{ + if (BNXT_PF(bp)) + queue_work(bnxt_pf_wq, &bp->sp_task); + else + schedule_work(&bp->sp_task); +} + +static void bnxt_cancel_sp_work(struct bnxt *bp) +{ + if (BNXT_PF(bp)) + flush_workqueue(bnxt_pf_wq); + else + cancel_work_sync(&bp->sp_task); +} + static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { if (!rxr->bnapi->in_reset) { rxr->bnapi->in_reset = true; set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } rxr->rx_next_cons = 0xffff; } @@ -1717,7 +1735,7 @@ static int bnxt_async_event_process(struct bnxt *bp, default: goto async_event_process_exit; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); async_event_process_exit: bnxt_ulp_async_events(bp, cmpl); return 0; @@ -1751,7 +1769,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp) set_bit(vf_id - bp->pf.first_vf_id, bp->pf.vf_event_bmap); set_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); break; case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT: @@ -6647,7 +6665,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) vnic->rx_mask = mask; set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } } @@ -6920,7 +6938,7 @@ static void bnxt_tx_timeout(struct net_device *dev) netdev_err(bp->dev, "TX timeout detected, starting reset task!\n"); set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -6952,7 +6970,7 @@ static void bnxt_timer(unsigned long data) if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) && bp->stats_coal_ticks) { set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); @@ -7433,7 +7451,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, spin_unlock_bh(&bp->ntp_fltr_lock); set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); return new_fltr->sw_id; @@ -7516,7 +7534,7 @@ static void bnxt_udp_tunnel_add(struct net_device *dev, if (bp->vxlan_port_cnt == 1) { bp->vxlan_port = ti->port; set_bit(BNXT_VXLAN_ADD_PORT_SP_EVENT, &bp->sp_event); - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } break; case UDP_TUNNEL_TYPE_GENEVE: @@ -7533,7 +7551,7 @@ static void bnxt_udp_tunnel_add(struct net_device *dev, return; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } static void bnxt_udp_tunnel_del(struct net_device *dev, @@ -7572,7 +7590,7 @@ static void bnxt_udp_tunnel_del(struct net_device *dev, return; } - schedule_work(&bp->sp_task); + bnxt_queue_sp_work(bp); } static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, @@ -7720,7 +7738,7 @@ static void bnxt_remove_one(struct pci_dev *pdev) pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); bnxt_shutdown_tc(bp); - cancel_work_sync(&bp->sp_task); + bnxt_cancel_sp_work(bp); bp->sp_event = 0; bnxt_clear_int_mode(bp); @@ -8138,8 +8156,17 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else device_set_wakeup_capable(&pdev->dev, false); - if (BNXT_PF(bp)) + if (BNXT_PF(bp)) { + if (!bnxt_pf_wq) { + bnxt_pf_wq = + create_singlethread_workqueue("bnxt_pf_wq"); + if (!bnxt_pf_wq) { + dev_err(&pdev->dev, "Unable to create workqueue.\n"); + goto init_err_pci_clean; + } + } bnxt_init_tc(bp); + } rc = register_netdev(dev); if (rc) @@ -8375,4 +8402,17 @@ static struct pci_driver bnxt_pci_driver = { #endif }; -module_pci_driver(bnxt_pci_driver); +static int __init bnxt_init(void) +{ + return pci_register_driver(&bnxt_pci_driver); +} + +static void __exit bnxt_exit(void) +{ + pci_unregister_driver(&bnxt_pci_driver); + if (bnxt_pf_wq) + destroy_workqueue(bnxt_pf_wq); +} + +module_init(bnxt_init); +module_exit(bnxt_exit); -- cgit v1.2.3-70-g09d2 From e2dc9b6e38fa3919e63d6d7905da70ca41cbf908 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:30 -0400 Subject: bnxt_en: Don't use rtnl lock to protect link change logic in workqueue. As a further improvement to the PF/VF link change logic, use a private mutex instead of the rtnl lock to protect link change logic. With the new mutex, we don't have to take the rtnl lock in the workqueue when we have to handle link related functions. If the VF and PF drivers are running on the same host and both take the rtnl lock and one is waiting for the other, it will cause timeout. This patch fixes these timeouts. Fixes: 90c694bb7181 ("bnxt_en: Fix RTNL lock usage on bnxt_update_link().") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++++++----------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++++ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7906153c5c05..3f596de2abe3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6345,7 +6345,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) } if (link_re_init) { + mutex_lock(&bp->link_lock); rc = bnxt_update_phy_setting(bp); + mutex_unlock(&bp->link_lock); if (rc) netdev_warn(bp->dev, "failed to update phy settings\n"); } @@ -7043,30 +7045,28 @@ static void bnxt_sp_task(struct work_struct *work) if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event)) bnxt_hwrm_port_qstats(bp); - /* These functions below will clear BNXT_STATE_IN_SP_TASK. They - * must be the last functions to be called before exiting. - */ if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) { - int rc = 0; + int rc; + mutex_lock(&bp->link_lock); if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event)) bnxt_hwrm_phy_qcaps(bp); - bnxt_rtnl_lock_sp(bp); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - rc = bnxt_update_link(bp, true); - bnxt_rtnl_unlock_sp(bp); + rc = bnxt_update_link(bp, true); + mutex_unlock(&bp->link_lock); if (rc) netdev_err(bp->dev, "SP task can't update link (rc: %x)\n", rc); } if (test_and_clear_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event)) { - bnxt_rtnl_lock_sp(bp); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_get_port_module_status(bp); - bnxt_rtnl_unlock_sp(bp); + mutex_lock(&bp->link_lock); + bnxt_get_port_module_status(bp); + mutex_unlock(&bp->link_lock); } + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They + * must be the last functions to be called before exiting. + */ if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false); @@ -7766,6 +7766,7 @@ static int bnxt_probe_phy(struct bnxt *bp) rc); return rc; } + mutex_init(&bp->link_lock); rc = bnxt_update_link(bp, false); if (rc) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7b888d4b2b55..d2925c04709a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1290,6 +1290,10 @@ struct bnxt { unsigned long *ntp_fltr_bmap; int ntp_fltr_count; + /* To protect link related settings during link changes and + * ethtool settings changes. + */ + struct mutex link_lock; struct bnxt_link_info link_info; struct ethtool_eee eee; u32 lpi_tmr_lo; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8eff05a3e0e4..b2cbc970b497 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1052,6 +1052,7 @@ static int bnxt_get_link_ksettings(struct net_device *dev, u32 ethtool_speed; ethtool_link_ksettings_zero_link_mode(lk_ksettings, supported); + mutex_lock(&bp->link_lock); bnxt_fw_to_ethtool_support_spds(link_info, lk_ksettings); ethtool_link_ksettings_zero_link_mode(lk_ksettings, advertising); @@ -1099,6 +1100,7 @@ static int bnxt_get_link_ksettings(struct net_device *dev, base->port = PORT_FIBRE; } base->phy_address = link_info->phy_addr; + mutex_unlock(&bp->link_lock); return 0; } @@ -1190,6 +1192,7 @@ static int bnxt_set_link_ksettings(struct net_device *dev, if (!BNXT_SINGLE_PF(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (base->autoneg == AUTONEG_ENABLE) { BNXT_ETHTOOL_TO_FW_SPDS(fw_advertising, lk_ksettings, advertising); @@ -1234,6 +1237,7 @@ static int bnxt_set_link_ksettings(struct net_device *dev, rc = bnxt_hwrm_set_link_setting(bp, set_pause, false); set_setting_exit: + mutex_unlock(&bp->link_lock); return rc; } -- cgit v1.2.3-70-g09d2 From 7ab0760f5178169c4c218852f51646ea90817d7c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 13 Oct 2017 21:09:31 -0400 Subject: bnxt_en: Fix VF PCIe link speed and width logic. PCIE PCIE_EP_REG_LINK_STATUS_CONTROL register is only defined in PF config space, so we must read it from the PF. Fixes: 90c4f788f6c0 ("bnxt_en: Report PCIe link speed and width during driver load") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3f596de2abe3..4ffa0b1e565a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7965,7 +7965,7 @@ static void bnxt_parse_log_pcie_link(struct bnxt *bp) enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN; enum pci_bus_speed speed = PCI_SPEED_UNKNOWN; - if (pcie_get_minimum_link(bp->pdev, &speed, &width) || + if (pcie_get_minimum_link(pci_physfn(bp->pdev), &speed, &width) || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) netdev_info(bp->dev, "Failed to determine PCIe Link Info\n"); else -- cgit v1.2.3-70-g09d2 From 021570793d8cd86cb62ac038c535f4450586b454 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:32 -0400 Subject: bnxt_en: Fix VF resource checking. In bnxt_sriov_enable(), we calculate to see if we have enough hardware resources to enable the requested number of VFs. The logic to check for minimum completion rings and statistics contexts is missing. Add the required checks so that VF configuration won't fail. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index d37925a8a65b..5ee18660bc33 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -502,6 +502,7 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) int rc = 0, vfs_supported; int min_rx_rings, min_tx_rings, min_rss_ctxs; int tx_ok = 0, rx_ok = 0, rss_ok = 0; + int avail_cp, avail_stat; /* Check if we can enable requested num of vf's. At a mininum * we require 1 RX 1 TX rings for each VF. In this minimum conf @@ -509,6 +510,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) */ vfs_supported = *num_vfs; + avail_cp = bp->pf.max_cp_rings - bp->cp_nr_rings; + avail_stat = bp->pf.max_stat_ctxs - bp->num_stat_ctxs; + avail_cp = min_t(int, avail_cp, avail_stat); + while (vfs_supported) { min_rx_rings = vfs_supported; min_tx_rings = vfs_supported; @@ -523,10 +528,12 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) min_rx_rings) rx_ok = 1; } - if (bp->pf.max_vnics - bp->nr_vnics < min_rx_rings) + if (bp->pf.max_vnics - bp->nr_vnics < min_rx_rings || + avail_cp < min_rx_rings) rx_ok = 0; - if (bp->pf.max_tx_rings - bp->tx_nr_rings >= min_tx_rings) + if (bp->pf.max_tx_rings - bp->tx_nr_rings >= min_tx_rings && + avail_cp >= min_tx_rings) tx_ok = 1; if (bp->pf.max_rsscos_ctxs - bp->rsscos_nr_ctxs >= min_rss_ctxs) -- cgit v1.2.3-70-g09d2 From cc72f3b1feb4fd38d33ab7a013d5ab95041cb8ba Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 13 Oct 2017 21:09:33 -0400 Subject: bnxt_en: Fix possible corrupted NVRAM parameters from firmware response. In bnxt_find_nvram_item(), it is copying firmware response data after releasing the mutex. This can cause the firmware response data to be corrupted if the next firmware response overwrites the response buffer. The rare problem shows up when running ethtool -i repeatedly. Fix it by calling the new variant _hwrm_send_message_silent() that requires the caller to take the mutex and to release it after the response data has been copied. Fixes: 3ebf6f0a09a2 ("bnxt_en: Add installed-package version reporting via Ethtool GDRVINFO") Reported-by: Sarveswara Rao Mygapula Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 +++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4ffa0b1e565a..dc5de275352a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3466,6 +3466,12 @@ int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout) return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, false); } +int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len, + int timeout) +{ + return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true); +} + int hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout) { int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d2925c04709a..c911e69ff25f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1362,6 +1362,7 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16); int _hwrm_send_message(struct bnxt *, void *, u32, int); +int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout); int hwrm_send_message(struct bnxt *, void *, u32, int); int hwrm_send_message_silent(struct bnxt *, void *, u32, int); int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b2cbc970b497..3cbe771b3352 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1809,7 +1809,8 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, req.dir_ordinal = cpu_to_le16(ordinal); req.dir_ext = cpu_to_le16(ext); req.opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ; - rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc == 0) { if (index) *index = le16_to_cpu(output->dir_idx); @@ -1818,6 +1819,7 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, if (data_length) *data_length = le32_to_cpu(output->dir_data_length); } + mutex_unlock(&bp->hwrm_cmd_lock); return rc; } -- cgit v1.2.3-70-g09d2 From 5b1e1a9ce06fd94b563d6c3dd896589231995d89 Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Fri, 13 Oct 2017 21:09:34 -0400 Subject: bnxt_en: Fix possible corruption in DCB parameters from firmware. hwrm_send_message() is replaced with _hwrm_send_message(), and hwrm_cmd_lock mutex lock is grabbed for the whole period of firmware call until the firmware DCB parameters have been copied. This will prevent possible corruption of the firmware data. Fixes: 7df4ae9fe855 ("bnxt_en: Implement DCBNL to support host-based DCBX.") Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index aa1f3a2c7a78..fed37cd9ae1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -50,7 +50,9 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_QCFG, -1, -1); req.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { u8 *pri2cos = &resp->pri0_cos_queue_id; int i, j; @@ -66,6 +68,7 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets) } } } + mutex_unlock(&bp->hwrm_cmd_lock); return rc; } @@ -119,9 +122,13 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets) int rc, i; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_QCFG, -1, -1); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); return rc; + } data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id); for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) { @@ -143,6 +150,7 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets) } } } + mutex_unlock(&bp->hwrm_cmd_lock); return 0; } @@ -240,12 +248,17 @@ static int bnxt_hwrm_queue_pfc_qcfg(struct bnxt *bp, struct ieee_pfc *pfc) int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_QCFG, -1, -1); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) { + mutex_unlock(&bp->hwrm_cmd_lock); return rc; + } pri_mask = le32_to_cpu(resp->flags); pfc->pfc_en = pri_mask; + mutex_unlock(&bp->hwrm_cmd_lock); return 0; } -- cgit v1.2.3-70-g09d2 From ad98dd1a75ac6a8b68cd2f7bf4676b65734f2a43 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 16 Oct 2017 08:37:54 +1100 Subject: KVM: PPC: Book3S HV: Add more barriers in XIVE load/unload code On POWER9 systems, we push the VCPU context onto the XIVE (eXternal Interrupt Virtualization Engine) hardware when entering a guest, and pull the context off the XIVE when exiting the guest. The push is done with cache-inhibited stores, and the pull with cache-inhibited loads. Testing has revealed that it is possible (though very rare) for the stores to get reordered with the loads so that we end up with the guest VCPU context still loaded on the XIVE after we have exited the guest. When that happens, it is possible for the same VCPU context to then get loaded on another CPU, which causes the machine to checkstop. To fix this, we add I/O barrier instructions (eieio) before and after the push and pull operations. As partial compensation for the potential slowdown caused by the extra barriers, we remove the eieio instructions between the two stores in the push operation, and between the two loads in the pull operation. (The architecture requires loads to cache-inhibited, guarded storage to be kept in order, and requires stores to cache-inhibited, guarded storage likewise to be kept in order, but allows such loads and stores to be reordered with respect to each other.) Reported-by: Carol L Soto Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c700bedccaab..42639fba89e8 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -989,13 +989,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) beq no_xive ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS - stdcix r11,r9,r10 eieio + stdcix r11,r9,r10 lwz r11, VCPU_XIVE_CAM_WORD(r4) li r9, TM_QW1_OS + TM_WORD2 stwcix r11,r9,r10 li r9, 1 stw r9, VCPU_XIVE_PUSHED(r4) + eieio no_xive: #endif /* CONFIG_KVM_XICS */ @@ -1401,8 +1402,8 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ cmpldi cr0, r10, 0 beq 1f /* First load to pull the context, we ignore the value */ - lwzx r11, r7, r10 eieio + lwzx r11, r7, r10 /* Second load to recover the context state (Words 0 and 1) */ ldx r11, r6, r10 b 3f @@ -1410,8 +1411,8 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ cmpldi cr0, r10, 0 beq 1f /* First load to pull the context, we ignore the value */ - lwzcix r11, r7, r10 eieio + lwzcix r11, r7, r10 /* Second load to recover the context state (Words 0 and 1) */ ldcix r11, r6, r10 3: std r11, VCPU_XIVE_SAVED_STATE(r9) @@ -1421,6 +1422,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ stw r10, VCPU_XIVE_PUSHED(r9) stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) + eieio 1: #endif /* CONFIG_KVM_XICS */ /* Save more register state */ -- cgit v1.2.3-70-g09d2 From 0a51897bfac9886d36e986d009df0317582b19a2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 9 Oct 2017 10:43:53 +0200 Subject: drm/exynos: Fix potential NULL pointer dereference in suspend/resume paths The patch 6e8edf8a7d8d: "drm/exynos: Fix suspend/resume support" introduced a new code in suspend/resume paths. However it unconditionally dereference drm_dev pointer, which might be NULL if suspend/resume happens before Exynos DRM driver components bind. This patch fixes this issue. Reported-by: Dan Carpenter Fixes: 6e8edf8a7d8d "drm/exynos: Fix suspend/resume support" Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index e651a58c18cf..aa770bb0153c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -168,11 +168,13 @@ static struct drm_driver exynos_drm_driver = { static int exynos_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; + drm_kms_helper_poll_disable(drm_dev); exynos_drm_fbdev_suspend(drm_dev); private->suspend_state = drm_atomic_helper_suspend(drm_dev); @@ -188,11 +190,12 @@ static int exynos_drm_suspend(struct device *dev) static int exynos_drm_resume(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct exynos_drm_private *private = drm_dev->dev_private; + struct exynos_drm_private *private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; + private = drm_dev->dev_private; drm_atomic_helper_resume(drm_dev, private->suspend_state); exynos_drm_fbdev_resume(drm_dev); drm_kms_helper_poll_enable(drm_dev); -- cgit v1.2.3-70-g09d2 From 238604ca0b708319e089e22545bcda39afb5faa8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 9 Oct 2017 10:44:01 +0200 Subject: drm/exynos: Clear drvdata after component unbind When components are unbound, DRM driver is unregistered and freed, so clear drvdata to avoid potential use-after-free issue in suspend/resume paths. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index aa770bb0153c..82b72425a42f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -430,6 +430,7 @@ static void exynos_drm_unbind(struct device *dev) kfree(drm->dev_private); drm->dev_private = NULL; + dev_set_drvdata(dev, NULL); drm_dev_unref(drm); } -- cgit v1.2.3-70-g09d2 From 33d930e59a98fa10a0db9f56c7fa2f21a4aef9b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Oct 2017 21:01:12 -0400 Subject: Linux 4.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5bf6fa4d62d8..46bfb0ed2257 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3-70-g09d2 From b7662eef14caf4f582d453d45395825b5a8f594c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 4 Oct 2017 14:46:17 +0200 Subject: s390/cputime: fix guest/irq/softirq times after CPU hotplug On CPU hotplug some cpu stats contain bogus values: $ cat /proc/stat cpu 0 0 49 1280 0 0 0 3 0 0 cpu0 0 0 49 618 0 0 0 3 0 0 cpu1 0 0 0 662 0 0 0 0 0 0 [...] $ echo 0 > /sys/devices/system/cpu/cpu1/online $ echo 1 > /sys/devices/system/cpu/cpu1/online $ cat /proc/stat cpu 0 0 49 3200 0 450359962737 450359962737 3 0 0 cpu0 0 0 49 1956 0 0 0 3 0 0 cpu1 0 0 0 1244 0 450359962737 450359962737 0 0 0 [...] pcpu_attach_task() needs the same assignments as vtime_task_switch. Signed-off-by: Christian Borntraeger Fixes: b7394a5f4ce9 ("sched/cputime, s390: Implement delayed accounting of system time") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1cee6753d47a..495ff6959dec 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -293,7 +293,10 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->lpp = LPP_MAGIC; lc->current_pid = tsk->pid; lc->user_timer = tsk->thread.user_timer; + lc->guest_timer = tsk->thread.guest_timer; lc->system_timer = tsk->thread.system_timer; + lc->hardirq_timer = tsk->thread.hardirq_timer; + lc->softirq_timer = tsk->thread.softirq_timer; lc->steal_timer = 0; } -- cgit v1.2.3-70-g09d2 From 0015a978a254c528f52ce17bbeba612be0d75e22 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Thu, 12 Oct 2017 11:15:09 +0100 Subject: s390: fix zfcpdump-config zipl from s390-tools generates root=/dev/ram0 kernel cmdline for zfcpdump, thus BLK_DEV_RAM is required. zfcpdump initrd mounts DEBUG_FS, thus is also required. Bug-Ubuntu: https://launchpad.net/bugs/1722735 Bug-Ubuntu: https://launchpad.net/bugs/1719290 Signed-off-by: Dimitri John Ledkov Signed-off-by: Martin Schwidefsky --- arch/s390/configs/zfcpdump_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index afa46a7406ea..04e042edbab7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -27,6 +27,7 @@ CONFIG_NET=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -59,6 +60,7 @@ CONFIG_CONFIGFS_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set -- cgit v1.2.3-70-g09d2 From e6fc454b7794fc45c27364c7896b8f03094635ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 15 Oct 2017 17:02:03 +0100 Subject: x86/cpu/intel_cacheinfo: Remove redundant assignment to 'this_leaf' The 'this_leaf' variable is assigned a value that is never read and it is updated a little later with a newer value, hence we can remove the redundant assignment. Cleans up the following Clang warning: Value stored to 'this_leaf' is never read Signed-off-by: Colin Ian King Reviewed-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20171015160203.12332-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 24f749324c0f..9990a71e311f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -831,7 +831,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; - this_leaf = this_cpu_ci->info_list + index; nshared = base->eax.split.num_threads_sharing + 1; apicid = cpu_data(cpu).apicid; first = apicid - (apicid % nshared); -- cgit v1.2.3-70-g09d2 From 31dc3f819bac28a0990b36510197560258ab7421 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Oct 2017 14:50:46 +0200 Subject: USB: serial: metro-usb: add MS7820 device id Add device-id entry for (Honeywell) Metrologic MS7820 bar code scanner. The device has two interfaces (in this mode?); a vendor-specific interface with two interrupt endpoints and a second HID interface, which we do not bind to. Reported-by: Ladislav Dobrovsky Tested-by: Ladislav Dobrovsky Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/metro-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c index cc84da8dbb84..14511d6a7d44 100644 --- a/drivers/usb/serial/metro-usb.c +++ b/drivers/usb/serial/metro-usb.c @@ -45,6 +45,7 @@ struct metrousb_private { static const struct usb_device_id id_table[] = { { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_BI) }, { USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_UNI) }, + { USB_DEVICE_INTERFACE_CLASS(0x0c2e, 0x0730, 0xff) }, /* MS7820 */ { }, /* Terminating entry. */ }; MODULE_DEVICE_TABLE(usb, id_table); -- cgit v1.2.3-70-g09d2 From 9ed95129ffcabbde564b40ffbbf9c26e8702d858 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 4 Oct 2017 16:17:55 +0200 Subject: Documentation: Add a file explaining the Linux kernel license enforcement policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a short document describing the views of how the Linux kernel community feels about enforcing the license of the kernel. Acked-by: Al Viro Acked-by: Alex Elder (Linaro) Acked-by: Andrea Arcangeli Acked-by: Andy Gross Acked-by: Aneesh Kumar K.V Acked-by: Anna Schumaker Acked-by: Ard Biesheuvel Acked-by: Arnd Bergmann Acked-by: Arvind Yadav Acked-by: Bart Van Assche Acked-by: Bhumika Goyal Acked-by: Bjorn Andersson Acked-by: Borislav Petkov Acked-by: Christian Borntraeger Acked-by: Christian König Acked-by: Christophe JAILLET Acked-by: Chuck Lever Acked-by: Colin Ian King Acked-by: Daniel Borkmann Acked-by: Daniel Lezcano Acked-by: Daniel Vetter Acked-by: Darrick J. Wong (Oracle) Acked-by: Darrick J. Wong Acked-by: David Kershner Acked-by: David S. Miller Acked-by: Dmitry Torokhov Acked-by: Doug Ledford Acked-by: Fabio Estevam Acked-by: Felipe Balbi Acked-by: Florian Westphal Acked-by: Geert Uytterhoeven Acked-by: Guenter Roeck Acked-by: Hannes Reinecke Acked-by: Hans de Goede Acked-by: Heiko Carstens Acked-by: Heiko Stuebner Acked-by: Heiner Kallweit Acked-by: Ingo Molnar Acked-by: Ivan Safonov Acked-by: Jaegeuk Kim Acked-by: Jan Kara (SUSE) Acked-by: Javier Martinez Canillas Acked-by: Jeff Kirsher Acked-by: Jens Axboe Acked-by: Jes Sorensen Acked-by: Jiri Kosina Acked-by: Jiri Pirko Acked-by: Joe Perches Acked-by: Joerg Roedel (SUSE) Acked-by: Johan Hovold Acked-by: Josh Poimboeuf Acked-by: Juergen Gross Acked-by: Julia Lawall Acked-by: K. Y. Srinivasan Acked-by: Khalid Aziz Acked-by: Krzysztof Kozlowski Acked-by: Kuninori Morimoto Acked-by: Larry Finger Acked-by: Laura Abbott Acked-by: Lee Jones Acked-by: Leon Romanovsky Acked-by: Linus Walleij (Linaro) Acked-by: Lv Zheng Acked-by: Martin K. Petersen (Oracle) Acked-by: Masahiro Yamada Acked-by: Masami Hiramatsu Acked-by: Mel Gorman Acked-by: Michael S. Tsirkin Acked-by: Michal Hocko Acked-by: Mike Marshall Acked-by: Namhyung Kim Acked-by: Neil Armstrong Acked-by: Olof Johansson Acked-by: Pablo Neira Ayuso Acked-by: Paolo Bonzini Acked-by: Paul Burton Acked-by: Paul E. McKenney Acked-by: Peter Zijlstra Acked-by: Rafael J. Wysocki Acked-by: Ralf Baechle Acked-by: Richard Weinberger Acked-by: Rik van Riel Acked-by: Rob Clark Acked-by: Rob Herring Acked-by: Sebastian Reichel (Collabora) Acked-by: Shawn Guo Acked-by: Shuah Khan Acked-by: Simon Horman Acked-by: Srinivas Kandagatla Acked-by: Steven Rostedt (VMware) Acked-by: Sven Eckelmann Acked-by: Takashi Iwai (SUSE) Acked-by: Tejun Heo Acked-by: Thierry Reding Acked-by: Tony Luck Acked-by: Ulf Hansson Acked-by: Vinod Koul Acked-by: Viresh Kumar Acked-by: Vivien Didelot Acked-by: Wei Yongjun Acked-by: Xin Long Signed-off-by: Greg Kroah-Hartman --- Documentation/process/index.rst | 1 + .../process/kernel-enforcement-statement.rst | 147 +++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 Documentation/process/kernel-enforcement-statement.rst diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 82fc399fcd33..61e43cc3ed17 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -25,6 +25,7 @@ Below are the essential guides that every developer should read. submitting-patches coding-style email-clients + kernel-enforcement-statement Other guides to the community that are of interest to most developers are: diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst new file mode 100644 index 000000000000..1e23d4227337 --- /dev/null +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -0,0 +1,147 @@ +Linux Kernel Enforcement Statement +---------------------------------- + +As developers of the Linux kernel, we have a keen interest in how our software +is used and how the license for our software is enforced. Compliance with the +reciprocal sharing obligations of GPL-2.0 is critical to the long-term +sustainability of our software and community. + +Although there is a right to enforce the separate copyright interests in the +contributions made to our community, we share an interest in ensuring that +individual enforcement actions are conducted in a manner that benefits our +community and do not have an unintended negative impact on the health and +growth of our software ecosystem. In order to deter unhelpful enforcement +actions, we agree that it is in the best interests of our development +community to undertake the following commitment to users of the Linux kernel +on behalf of ourselves and any successors to our copyright interests: + + Notwithstanding the termination provisions of the GPL-2.0, we agree that + it is in the best interests of our development community to adopt the + following provisions of GPL-3.0 as additional permissions under our + license with respect to any non-defensive assertion of rights under the + license. + + However, if you cease all violation of this License, then your license + from a particular copyright holder is reinstated (a) provisionally, + unless and until the copyright holder explicitly and finally + terminates your license, and (b) permanently, if the copyright holder + fails to notify you of the violation by some reasonable means prior to + 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from that + copyright holder, and you cure the violation prior to 30 days after + your receipt of the notice. + +Our intent in providing these assurances is to encourage more use of the +software. We want companies and individuals to use, modify and distribute +this software. We want to work with users in an open and transparent way to +eliminate any uncertainty about our expectations regarding compliance or +enforcement that might limit adoption of our software. We view legal action +as a last resort, to be initiated only when other community efforts have +failed to resolve the problem. + +Finally, once a non-compliance issue is resolved, we hope the user will feel +welcome to join us in our efforts on this project. Working together, we will +be stronger. + +Except where noted below, we speak only for ourselves, and not for any company +we might work for today, have in the past, or will in the future. + + - Bjorn Andersson (Linaro) + - Andrea Arcangeli (Red Hat) + - Neil Armstrong + - Jens Axboe + - Pablo Neira Ayuso + - Khalid Aziz + - Ralf Baechle + - Felipe Balbi + - Arnd Bergmann + - Ard Biesheuvel + - Paolo Bonzini (Red Hat) + - Christian Borntraeger + - Mark Brown (Linaro) + - Paul Burton + - Javier Martinez Canillas + - Rob Clark + - Jonathan Corbet + - Vivien Didelot (Savoir-faire Linux) + - Hans de Goede (Red Hat) + - Mel Gorman (SUSE) + - Sven Eckelmann + - Alex Elder (Linaro) + - Fabio Estevam + - Larry Finger + - Bhumika Goyal + - Andy Gross + - Juergen Gross + - Shawn Guo + - Ulf Hansson + - Tejun Heo + - Rob Herring + - Masami Hiramatsu + - Michal Hocko + - Simon Horman + - Johan Hovold (Hovold Consulting AB) + - Christophe JAILLET + - Olof Johansson + - Lee Jones (Linaro) + - Heiner Kallweit + - Srinivas Kandagatla + - Jan Kara + - Shuah Khan (Samsung) + - David Kershner + - Jaegeuk Kim + - Namhyung Kim + - Colin Ian King + - Jeff Kirsher + - Greg Kroah-Hartman (Linux Foundation) + - Christian König + - Vinod Koul + - Krzysztof Kozlowski + - Viresh Kumar + - Aneesh Kumar K.V + - Julia Lawall + - Doug Ledford (Red Hat) + - Chuck Lever (Oracle) + - Daniel Lezcano + - Shaohua Li + - Xin Long (Red Hat) + - Tony Luck + - Mike Marshall + - Chris Mason + - Paul E. McKenney + - David S. Miller + - Ingo Molnar + - Kuninori Morimoto + - Borislav Petkov + - Jiri Pirko + - Josh Poimboeuf + - Sebastian Reichel (Collabora) + - Guenter Roeck + - Joerg Roedel + - Leon Romanovsky + - Steven Rostedt (VMware) + - Ivan Safonov + - Ivan Safonov + - Anna Schumaker + - Jes Sorensen + - K.Y. Srinivasan + - Heiko Stuebner + - Jiri Kosina (SUSE) + - Dmitry Torokhov + - Linus Torvalds + - Thierry Reding + - Rik van Riel + - Geert Uytterhoeven (Glider bvba) + - Daniel Vetter + - Linus Walleij + - Richard Weinberger + - Dan Williams + - Rafael J. Wysocki + - Arvind Yadav + - Masahiro Yamada + - Wei Yongjun + - Lv Zheng -- cgit v1.2.3-70-g09d2 From fdf7cb4185b60c68e1a75e61691c4afdc15dea0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Sep 2017 14:54:54 +0200 Subject: mac80211: accept key reinstall without changing anything When a key is reinstalled we can reset the replay counters etc. which can lead to nonce reuse and/or replay detection being impossible, breaking security properties, as described in the "KRACK attacks". In particular, CVE-2017-13080 applies to GTK rekeying that happened in firmware while the host is in D3, with the second part of the attack being done after the host wakes up. In this case, the wpa_supplicant mitigation isn't sufficient since wpa_supplicant doesn't know the GTK material. In case this happens, simply silently accept the new key coming from userspace but don't take any action on it since it's the same key; this keeps the PN replay counters intact. Signed-off-by: Johannes Berg --- net/mac80211/key.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a98fc2b5e0dc..ae995c8480db 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -620,9 +620,6 @@ int ieee80211_key_link(struct ieee80211_key *key, pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; - key->local = sdata->local; - key->sdata = sdata; - key->sta = sta; mutex_lock(&sdata->local->key_mtx); @@ -633,6 +630,21 @@ int ieee80211_key_link(struct ieee80211_key *key, else old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + /* + * Silently accept key re-installation without really installing the + * new version of the key to avoid nonce reuse or replay issues. + */ + if (old_key && key->conf.keylen == old_key->conf.keylen && + !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) { + ieee80211_key_free_unused(key); + ret = 0; + goto out; + } + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + increment_tailroom_need_count(sdata); ieee80211_key_replace(sdata, sta, pairwise, old_key, key); @@ -648,6 +660,7 @@ int ieee80211_key_link(struct ieee80211_key *key, ret = 0; } + out: mutex_unlock(&sdata->local->key_mtx); return ret; -- cgit v1.2.3-70-g09d2 From 9bb201a5d5acc733943e8af7151cceab9d976a69 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Sun, 15 Oct 2017 12:41:32 +0300 Subject: ALSA: usb-audio: Add native DSD support for Pro-Ject Pre Box S2 Digital Add native DSD support quirk for Pro-Ject Pre Box S2 Digital USB id 2772:0230. Signed-off-by: Jussi Laako Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 9ddaae3784f5..4f5f18f22974 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1354,6 +1354,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */ case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */ case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */ + case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ if (fp->altsetting == 2) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; -- cgit v1.2.3-70-g09d2 From 4c625a974fb81724e60966b677e47fcba782c950 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Oct 2017 14:08:55 -0400 Subject: SUNRPC: fix a list corruption issue in xprt_release() We remove the request from the receive list before we call xprt_wait_on_pinned_rqst(), and so we need to use list_del_init(). Otherwise, we will see list corruption when xprt_complete_rqst() is called. Reported-by: Emre Celebi Fixes: ce7c252a8c741 ("SUNRPC: Add a separate spinlock to protect...") Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e741ec2b4d8e..1a39ad14c42f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1333,7 +1333,7 @@ void xprt_release(struct rpc_task *task) rpc_count_iostats(task, task->tk_client->cl_metrics); spin_lock(&xprt->recv_lock); if (!list_empty(&req->rq_list)) { - list_del(&req->rq_list); + list_del_init(&req->rq_list); xprt_wait_on_pinned_rqst(req); } spin_unlock(&xprt->recv_lock); -- cgit v1.2.3-70-g09d2 From 8bc9481f43103da77d75a532d9eae55790b9eea6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Oct 2017 10:47:39 -0300 Subject: perf tools: Add long time reviewers to MAINTAINERS Jiri and Namhyung have long contributed a lot of code and time reviewing patches to tools/, so lets make that reflected in the MAINTAINERS file to encourage patch submitters to add them to the CC list, speeding up the process of tools/perf/ patch processing. Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-onicopw68bg6kn56lnybfpns@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index cc42c838ab4f..f2056624c672 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10559,6 +10559,8 @@ M: Peter Zijlstra M: Ingo Molnar M: Arnaldo Carvalho de Melo R: Alexander Shishkin +R: Jiri Olsa +R: Namhyung Kim L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core S: Supported -- cgit v1.2.3-70-g09d2 From ba3ee00683bc2dad4c14fba805c2241ae23acff9 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 22 Sep 2017 10:00:09 +0800 Subject: drm/i915/gvt: Fix GPU hang after reusing vGPU instance across different guest OS We have implemented delayed ring mmio switch mechanism to reduce unnecessary mmio switch. While the vGPU is being destroyed or detached from VM, we need to force the ring switch to host context. The later deadline is missed. Then it got a chance that word load from VM2 might execute under the ring context of VM1 which was attached to a same vGPU instance. Finally, the GPU is hang. This patch guarantee the two deadline are performed. v2: Remove unused variable 'scheduler' Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 436377da41ba..03532dfc0cd5 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -308,20 +308,8 @@ static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu) static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu) { - struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; - int ring_id; - kfree(vgpu->sched_data); vgpu->sched_data = NULL; - - spin_lock_bh(&scheduler->mmio_context_lock); - for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { - if (scheduler->engine_owner[ring_id] == vgpu) { - intel_gvt_switch_mmio(vgpu, NULL, ring_id); - scheduler->engine_owner[ring_id] = NULL; - } - } - spin_unlock_bh(&scheduler->mmio_context_lock); } static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) @@ -388,6 +376,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) { struct intel_gvt_workload_scheduler *scheduler = &vgpu->gvt->scheduler; + int ring_id; gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id); @@ -401,4 +390,13 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) scheduler->need_reschedule = true; scheduler->current_vgpu = NULL; } + + spin_lock_bh(&scheduler->mmio_context_lock); + for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) { + if (scheduler->engine_owner[ring_id] == vgpu) { + intel_gvt_switch_mmio(vgpu, NULL, ring_id); + scheduler->engine_owner[ring_id] = NULL; + } + } + spin_unlock_bh(&scheduler->mmio_context_lock); } -- cgit v1.2.3-70-g09d2 From 9c48c0965b97e14ddcf75490a754e84e05aaa062 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Oct 2017 12:12:16 +0200 Subject: x86/idt: Initialize early IDT before cr4_init_shadow() Moving the early IDT setup out of assembly code breaks the boot on first generation 486 systems. The reason is that the call of idt_setup_early_handler, which sets up the early handlers was added after the call to cr4_init_shadow(). cr4_init_shadow() tries to read CR4 which is not available on those systems. The accessor function uses a extable fixup to handle the resulting fault. As the IDT is not set up yet, the cr4 read exception causes an instantaneous reboot for obvious reasons. Call idt_setup_early_handler() before cr4_init_shadow() so IDT is set up before the first exception hits. Fixes: 87e81786b13b ("x86/idt: Move early IDT setup out of 32-bit asm") Reported-and-tested-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710161210290.1973@nanos --- arch/x86/kernel/head32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index cf2ce063f65a..2902ca4d5993 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -30,10 +30,11 @@ static void __init i386_default_early_setup(void) asmlinkage __visible void __init i386_start_kernel(void) { - cr4_init_shadow(); - + /* Make sure IDT is set up before any exception happens */ idt_setup_early_handler(); + cr4_init_shadow(); + sanitize_boot_params(&boot_params); x86_early_init_platform_quirks(); -- cgit v1.2.3-70-g09d2 From 793d7dbe6d82a50b9d14bf992b9eaacb70a11ce6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 13 Oct 2017 09:47:45 -0700 Subject: xfs: cancel dirty pages on invalidation Recently we've had warnings arise from the vm handing us pages without bufferheads attached to them. This should not ever occur in XFS, but we don't defend against it properly if it does. The only place where we remove bufferheads from a page is in xfs_vm_releasepage(), but we can't tell the difference here between "page is dirty so don't release" and "page is dirty but is being invalidated so release it". In some places that are invalidating pages ask for pages to be released and follow up afterward calling ->releasepage by checking whether the page was dirty and then aborting the invalidation. This is a possible vector for releasing buffers from a page but then leaving it in the mapping, so we really do need to avoid dirty pages in xfs_vm_releasepage(). To differentiate between invalidated pages and normal pages, we need to clear the page dirty flag when invalidating the pages. This can be done through xfs_vm_invalidatepage(), and will result xfs_vm_releasepage() seeing the page as clean which matches the bufferhead state on the page after calling block_invalidatepage(). Hence we can re-add the page dirty check in xfs_vm_releasepage to catch the case where we might be releasing a page that is actually dirty and so should not have the bufferheads on it removed. This will remove one possible vector of "dirty page with no bufferheads" and so help narrow down the search for the root cause of that problem. Signed-Off-By: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_aops.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e5932aec4..067284d84d9e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -735,6 +735,14 @@ xfs_vm_invalidatepage( { trace_xfs_invalidatepage(page->mapping->host, page, offset, length); + + /* + * If we are invalidating the entire page, clear the dirty state from it + * so that we can check for attempts to release dirty cached pages in + * xfs_vm_releasepage(). + */ + if (offset == 0 && length >= PAGE_SIZE) + cancel_dirty_page(page); block_invalidatepage(page, offset, length); } @@ -1190,25 +1198,27 @@ xfs_vm_releasepage( * mm accommodates an old ext3 case where clean pages might not have had * the dirty bit cleared. Thus, it can send actual dirty pages to * ->releasepage() via shrink_active_list(). Conversely, - * block_invalidatepage() can send pages that are still marked dirty - * but otherwise have invalidated buffers. + * block_invalidatepage() can send pages that are still marked dirty but + * otherwise have invalidated buffers. * * We want to release the latter to avoid unnecessary buildup of the - * LRU, skip the former and warn if we've left any lingering - * delalloc/unwritten buffers on clean pages. Skip pages with delalloc - * or unwritten buffers and warn if the page is not dirty. Otherwise - * try to release the buffers. + * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages + * that are entirely invalidated and need to be released. Hence the + * only time we should get dirty pages here is through + * shrink_active_list() and so we can simply skip those now. + * + * warn if we've left any lingering delalloc/unwritten buffers on clean + * or invalidated pages we are about to release. */ + if (PageDirty(page)) + return 0; + xfs_count_page_state(page, &delalloc, &unwritten); - if (delalloc) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(delalloc)) return 0; - } - if (unwritten) { - WARN_ON_ONCE(!PageDirty(page)); + if (WARN_ON_ONCE(unwritten)) return 0; - } return try_to_free_buffers(page); } -- cgit v1.2.3-70-g09d2 From 5e25c269e17de4c5a23ce886cda612b01365a944 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 13 Oct 2017 09:47:46 -0700 Subject: fs: invalidate page cache after end_io() in dio completion Commit 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") moved page cache invalidation from iomap_dio_rw() to iomap_dio_complete() for iomap based direct write path, but before the dio->end_io() call, and it re-introdued the bug fixed by commit c771c14baa33 ("iomap: invalidate page caches should be after iomap_dio_complete() in direct write"). I found this because fstests generic/418 started failing on XFS with v4.14-rc3 kernel, which is the regression test for this specific bug. So similarly, fix it by moving dio->end_io() (which does the unwritten extent conversion) before page cache invalidation, to make sure next buffer read reads the final real allocations not unwritten extents. I also add some comments about why should end_io() go first in case we get it wrong again in the future. Note that, there's no such problem in the non-iomap based direct write path, because we didn't remove the page cache invalidation after the ->direct_IO() in generic_file_direct_write() call, but I decided to fix dio_complete() too so we don't leave a landmine there, also be consistent with iomap_dio_complete(). Fixes: 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") Signed-off-by: Eryu Guan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Lukas Czerner --- fs/direct-io.c | 20 ++++++++++++-------- fs/iomap.c | 41 ++++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 96415c65bbdc..19ac3fe57deb 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -259,12 +259,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + if (dio->end_io) { + // XXX: ki_pos?? + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source * of the write was an mmap'ed region of the file we're writing. Either * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. */ if (ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { @@ -274,14 +286,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) WARN_ON_ONCE(err); } - if (dio->end_io) { - - // XXX: ki_pos?? - err = dio->end_io(dio->iocb, offset, ret, dio->private); - if (err) - ret = err; - } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..d4801f8dd4fd 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); + loff_t offset = iocb->ki_pos; ssize_t ret; - /* - * Try again to invalidate clean pages which might have been cached by - * non-direct readahead, or faulted in by get_user_pages() if the source - * of the write was an mmap'ed region of the file we're writing. Either - * one is a pretty crazy thing to do, so we don't support it 100%. If - * this invalidation fails, tough, the write still worked... - */ - if (!dio->error && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - iocb->ki_pos >> PAGE_SHIFT, - (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } - if (dio->end_io) { ret = dio->end_io(iocb, dio->error ? dio->error : dio->size, @@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) if (likely(!ret)) { ret = dio->size; /* check for short read */ - if (iocb->ki_pos + ret > dio->i_size && + if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) - ret = dio->i_size - iocb->ki_pos; + ret = dio->i_size - offset; iocb->ki_pos += ret; } + /* + * Try again to invalidate clean pages which might have been cached by + * non-direct readahead, or faulted in by get_user_pages() if the source + * of the write was an mmap'ed region of the file we're writing. Either + * one is a pretty crazy thing to do, so we don't support it 100%. If + * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. + */ + if (!dio->error && + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { + int err; + err = invalidate_inode_pages2_range(inode->i_mapping, + offset >> PAGE_SHIFT, + (offset + dio->size - 1) >> PAGE_SHIFT); + WARN_ON_ONCE(err); + } + inode_dio_end(file_inode(iocb->ki_filp)); kfree(dio); -- cgit v1.2.3-70-g09d2 From 40214d128e07dd21bb07a8ed6a7fe2f911281ab2 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 13 Oct 2017 09:47:46 -0700 Subject: xfs: trim writepage mapping to within eof The writeback rework in commit fbcc02561359 ("xfs: Introduce writeback context for writepages") introduced a subtle change in behavior with regard to the block mapping used across the ->writepages() sequence. The previous xfs_cluster_write() code would only flush pages up to EOF at the time of the writepage, thus ensuring that any pages due to file-extending writes would be handled on a separate cycle and with a new, updated block mapping. The updated code establishes a block mapping in xfs_writepage_map() that could extend beyond EOF if the file has post-eof preallocation. Because we now use the generic writeback infrastructure and pass the cached mapping to each writepage call, there is no implicit EOF limit in place. If eofblocks trimming occurs during ->writepages(), any post-eof portion of the cached mapping becomes invalid. The eofblocks code has no means to serialize against writeback because there are no pages associated with post-eof blocks. Therefore if an eofblocks trim occurs and is followed by a file-extending buffered write, not only has the mapping become invalid, but we could end up writing a page to disk based on the invalid mapping. Consider the following sequence of events: - A buffered write creates a delalloc extent and post-eof speculative preallocation. - Writeback starts and on the first writepage cycle, the delalloc extent is converted to real blocks (including the post-eof blocks) and the mapping is cached. - The file is closed and xfs_release() trims post-eof blocks. The cached writeback mapping is now invalid. - Another buffered write appends the file with a delalloc extent. - The concurrent writeback cycle picks up the just written page because the writeback range end is LLONG_MAX. xfs_writepage_map() attributes it to the (now invalid) cached mapping and writes the data to an incorrect location on disk (and where the file offset is still backed by a delalloc extent). This problem is reproduced by xfstests test generic/464, which triggers racing writes, appends, open/closes and writeback requests. To address this problem, trim the mapping used during writeback to within EOF when the mapping is validated. This ensures the mapping is revalidated for any pages encountered beyond EOF as of the time the current mapping was cached or last validated. Reported-by: Eryu Guan Diagnosed-by: Eryu Guan Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 11 +++++++++++ fs/xfs/libxfs/xfs_bmap.h | 1 + fs/xfs/xfs_aops.c | 13 +++++++++++++ 3 files changed, 25 insertions(+) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index def32fa1c225..89263797cf32 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3852,6 +3852,17 @@ xfs_trim_extent( } } +/* trim extent to within eof */ +void +xfs_trim_extent_eof( + struct xfs_bmbt_irec *irec, + struct xfs_inode *ip) + +{ + xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, + i_size_read(VFS_I(ip)))); +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a5dfbc..502e0d8fb4ff 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); +void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 067284d84d9e..a3eeaba156c5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -446,6 +446,19 @@ xfs_imap_valid( { offset >>= inode->i_blkbits; + /* + * We have to make sure the cached mapping is within EOF to protect + * against eofblocks trimming on file release leaving us with a stale + * mapping. Otherwise, a page for a subsequent file extending buffered + * write could get picked up by this writeback cycle and written to the + * wrong blocks. + * + * Note that what we really want here is a generic mapping invalidation + * mechanism to protect us from arbitrary extent modifying contexts, not + * just eofblocks. + */ + xfs_trim_extent_eof(imap, XFS_I(inode)); + return offset >= imap->br_startoff && offset < imap->br_startoff + imap->br_blockcount; } -- cgit v1.2.3-70-g09d2 From 785545c8982604fe3ba79d16409e83993be77d5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 09:47:47 -0700 Subject: xfs: move two more RT specific functions into CONFIG_XFS_RT The last cleanup introduced two harmless warnings: fs/xfs/xfs_fsmap.c:480:1: warning: '__xfs_getfsmap_rtdev' defined but not used fs/xfs/xfs_fsmap.c:372:1: warning: 'xfs_getfsmap_rtdev_rtbitmap_helper' defined but not used This moves those two functions as well. Fixes: bb9c2e543325 ("xfs: move more RT specific code under CONFIG_XFS_RT") Signed-off-by: Arnd Bergmann Reviewed-by: Brian Foster Acked-by: Geert Uytterhoeven Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_fsmap.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 560e0b40ac1b..43cfc07996a4 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); } -/* Transform a rtbitmap "record" into a fsmap */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_helper( - struct xfs_trans *tp, - struct xfs_rtalloc_rec *rec, - void *priv) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_getfsmap_info *info = priv; - struct xfs_rmap_irec irec; - xfs_daddr_t rec_daddr; - - rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); - - irec.rm_startblock = rec->ar_startblock; - irec.rm_blockcount = rec->ar_blockcount; - irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ - irec.rm_offset = 0; - irec.rm_flags = 0; - - return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); -} - /* Transform a bnobt irec into a fsmap */ STATIC int xfs_getfsmap_datadev_bnobt_helper( @@ -475,6 +452,30 @@ xfs_getfsmap_logdev( return xfs_getfsmap_helper(tp, info, &rmap, 0); } +#ifdef CONFIG_XFS_RT +/* Transform a rtbitmap "record" into a fsmap */ +STATIC int +xfs_getfsmap_rtdev_rtbitmap_helper( + struct xfs_trans *tp, + struct xfs_rtalloc_rec *rec, + void *priv) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; + xfs_daddr_t rec_daddr; + + rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); + + irec.rm_startblock = rec->ar_startblock; + irec.rm_blockcount = rec->ar_blockcount; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +} + /* Execute a getfsmap query against the realtime device. */ STATIC int __xfs_getfsmap_rtdev( @@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev( return query_fn(tp, info); } -#ifdef CONFIG_XFS_RT /* Actually query the realtime bitmap. */ STATIC int xfs_getfsmap_rtdev_rtbitmap_query( -- cgit v1.2.3-70-g09d2 From 8a212589fe0e45f26c549dfa271a157ca8eea1ac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:41 +0800 Subject: rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event Commit 085e1a65f04f ("rtnetlink: Do not generate notifications for MTU events") tried to fix the redundant notifications issue when ip link set mtu by removing NETDEV_CHANGEMTU event process in rtnetlink_event. But it also resulted in no notification generated when dev's mtu is changed via other methods, like: 'ifconfig eth1 mtu 1400' or 'echo 1400 > /sys/class/net/eth1/mtu' It would cause users not to be notified by this change. This patch is to fix it by bringing NETDEV_CHANGEMTU event back into rtnetlink_event, and the redundant notifications issue will be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: 085e1a65f04f ("rtnetlink: Do not generate notifications for MTU events") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4bcdcc68e92..72053ed7c891 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4279,6 +4279,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi switch (event) { case NETDEV_REBOOT: + case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: -- cgit v1.2.3-70-g09d2 From ebdcf0450b020748c2dab6bfe44a5ac3c5159fb0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:42 +0800 Subject: rtnetlink: bring NETDEV_CHANGE_TX_QUEUE_LEN event process back in rtnetlink_event The same fix for changing mtu in the patch 'rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event' is needed for changing tx_queue_len. Note that the redundant notifications issue for tx_queue_len will be fixed in the later patch 'rtnetlink: do not send notification for tx_queue_len in do_setlink'. Fixes: 27b3b551d8a7 ("rtnetlink: Do not generate notifications for NETDEV_CHANGE_TX_QUEUE_LEN event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 72053ed7c891..bf473604f33d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4287,6 +4287,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: + case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), GFP_KERNEL); break; -- cgit v1.2.3-70-g09d2 From e6e6659446c87057aede26a39d9f16b19001716f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:43 +0800 Subject: rtnetlink: bring NETDEV_POST_TYPE_CHANGE event process back in rtnetlink_event As I said in patch 'rtnetlink: bring NETDEV_CHANGEMTU event process back in rtnetlink_event', removing NETDEV_POST_TYPE_CHANGE event was not the right fix for the redundant notifications issue. So bring this event process back to rtnetlink_event and the old redundant notifications issue would be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: aef091ae58aa ("rtnetlink: Do not generate notifications for POST_TYPE_CHANGE event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bf473604f33d..8e44fd597f46 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4284,6 +4284,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: + case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: -- cgit v1.2.3-70-g09d2 From dc709f375743ebf5c9326cc9b946f6f09a34ac44 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:44 +0800 Subject: rtnetlink: bring NETDEV_CHANGEUPPER event process back in rtnetlink_event libteam needs this event notification in userspace when dev's master dev has been changed. After this, the redundant notifications issue would be fixed in the later patch 'rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink'. Fixes: b6b36eb23a46 ("rtnetlink: Do not generate notifications for NETDEV_CHANGEUPPER event") Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8e44fd597f46..ab98c1c8b6f3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4286,6 +4286,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_BONDING_FAILOVER: case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: + case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: case NETDEV_CHANGE_TX_QUEUE_LEN: -- cgit v1.2.3-70-g09d2 From 64ff90cc2e6f42596d7a0c37e41dc95292bb63b1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:45 +0800 Subject: rtnetlink: check DO_SETLINK_NOTIFY correctly in do_setlink The check 'status & DO_SETLINK_NOTIFY' in do_setlink doesn't really work after status & DO_SETLINK_MODIFIED, as: DO_SETLINK_MODIFIED 0x1 DO_SETLINK_NOTIFY 0x3 Considering that notifications are suppposed to be sent only when status have the flag DO_SETLINK_NOTIFY, the right check would be: (status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY This would avoid lots of duplicated notifications when setting some properties of a link. Fixes: ba9989069f4e ("rtnl/do_setlink(): notify when a netdev is modified") Signed-off-by: Xin Long Acked-by: David Ahern Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab98c1c8b6f3..3e98fb557598 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2248,7 +2248,7 @@ static int do_setlink(const struct sk_buff *skb, errout: if (status & DO_SETLINK_MODIFIED) { - if (status & DO_SETLINK_NOTIFY) + if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) netdev_state_change(dev); if (err < 0) -- cgit v1.2.3-70-g09d2 From 2d7f669b42a97022c8c2b6cd86f3990be5fcd1bc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Oct 2017 18:13:46 +0800 Subject: rtnetlink: do not set notification for tx_queue_len in do_setlink NETDEV_CHANGE_TX_QUEUE_LEN event process in rtnetlink_event would send a notification for userspace and tx_queue_len's setting in do_setlink would trigger NETDEV_CHANGE_TX_QUEUE_LEN. So it shouldn't set DO_SETLINK_NOTIFY status for this change to send a notification any more. Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3e98fb557598..a6bcf86ce471 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2093,7 +2093,7 @@ static int do_setlink(const struct sk_buff *skb, dev->tx_queue_len = orig_len; goto errout; } - status |= DO_SETLINK_NOTIFY; + status |= DO_SETLINK_MODIFIED; } } -- cgit v1.2.3-70-g09d2 From 2459b4c635858094df78abb9ca87d99f89fe8ca5 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 11 Oct 2017 16:24:48 +0200 Subject: net: enable interface alias removal via rtnl IFLA_IFALIAS is defined as NLA_STRING. It means that the minimal length of the attribute is 1 ("\0"). However, to remove an alias, the attribute length must be 0 (see dev_set_alias()). Let's define the type to NLA_BINARY to allow 0-length string, so that the alias can be removed. Example: $ ip l s dummy0 alias foo $ ip l l dev dummy0 5: dummy0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff alias foo Before the patch: $ ip l s dummy0 alias "" RTNETLINK answers: Numerical result out of range After the patch: $ ip l s dummy0 alias "" $ ip l l dev dummy0 5: dummy0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff CC: Oliver Hartkopp CC: Stephen Hemminger Fixes: 96ca4a2cc145 ("net: remove ifalias on empty given alias") Reported-by: Julien FLoret Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6bcf86ce471..5ace48926b19 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1483,7 +1483,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_NET_NS_FD] = { .type = NLA_U32 }, - [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, + /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to + * allow 0-length string (needed to remove an alias). + */ + [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, -- cgit v1.2.3-70-g09d2 From e1043a4bb9fce6cfc7d55c5767e429a18ac8c4eb Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Mon, 16 Oct 2017 15:20:32 +0200 Subject: xen-netfront, xen-netback: Use correct minimum MTU values RFC791 specifies the minimum MTU to be 68, while xen-net{front|back} drivers use a minimum value of 0. When set MTU to 0~67 with xen_net{front|back} driver, the network will become unreachable immediately, the guest can no longer be pinged. xen_net{front|back} should not allow the user to set this value which causes network problems. Reported-by: Chen Shi Signed-off-by: Mohammed Gamal Acked-by: Wei Liu Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/net/xen-netback/interface.c | 2 +- drivers/net/xen-netfront.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ee8ed9da00ad..4491ca5aee90 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -486,7 +486,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->tx_queue_len = XENVIF_QUEUE_LENGTH; - dev->min_mtu = 0; + dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_MAX_MTU - VLAN_ETH_HLEN; /* diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 523387e71a80..8b8689c6d887 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1316,7 +1316,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netdev->features |= netdev->hw_features; netdev->ethtool_ops = &xennet_ethtool_ops; - netdev->min_mtu = 0; + netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = XEN_NETIF_MAX_TX_SIZE; SET_NETDEV_DEV(netdev, &dev->dev); -- cgit v1.2.3-70-g09d2 From 0ad646c81b2182f7fa67ec0c8c825e0ee165696d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 11:58:53 -0700 Subject: tun: call dev_get_valid_name() before register_netdevice() register_netdevice() could fail early when we have an invalid dev name, in which case ->ndo_uninit() is not called. For tun device, this is a problem because a timer etc. are already initialized and it expects ->ndo_uninit() to clean them up. We could move these initializations into a ->ndo_init() so that register_netdevice() knows better, however this is still complicated due to the logic in tun_detach(). Therefore, I choose to just call dev_get_valid_name() before register_netdevice(), which is quicker and much easier to audit. And for this specific case, it is already enough. Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") Reported-by: Dmitry Alexeev Cc: Jason Wang Cc: "Michael S. Tsirkin" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 3 +++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 6 +++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 5ce580f413b9..e21bf90b819f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2027,6 +2027,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; + err = dev_get_valid_name(net, dev, name); + if (err) + goto err_free_dev; dev_net_set(dev, net); dev->rtnl_link_ops = &tun_link_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..2eaac7d75af4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3694,6 +3694,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name); + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) diff --git a/net/core/dev.c b/net/core/dev.c index 588b473194a8..11596a302a26 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1147,9 +1147,8 @@ static int dev_alloc_name_ns(struct net *net, return ret; } -static int dev_get_valid_name(struct net *net, - struct net_device *dev, - const char *name) +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { BUG_ON(!net); @@ -1165,6 +1164,7 @@ static int dev_get_valid_name(struct net *net, return 0; } +EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device -- cgit v1.2.3-70-g09d2 From 226584aedd94acd61ffa51fb69bcf6b3309a7b8f Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 16 Oct 2017 12:27:58 +0200 Subject: spi: fix IDR collision on systems with both fixed and dynamic SPI bus numbers On systems where some controllers get a dynamic ID assigned and some have a fixed number from DT, the current implemention might run into an IDR collision if the dynamic controllers gets probed first and get an IDR number, which is later requested by the controller with the fixed numbering. When this happens the fixed controller will fail to register with the SPI core. Fix this by skipping all known alias numbers when assigning the dynamic IDs. Fixes: 9b61e302210e (spi: Pick spi bus number from Linux idr or spi alias) Signed-off-by: Lucas Stach Signed-off-by: Mark Brown --- drivers/spi/spi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 6e65524cbfd9..e8b5a5e21b2e 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -45,7 +45,6 @@ #define CREATE_TRACE_POINTS #include -#define SPI_DYN_FIRST_BUS_NUM 0 static DEFINE_IDR(spi_master_idr); @@ -2086,7 +2085,7 @@ int spi_register_controller(struct spi_controller *ctlr) struct device *dev = ctlr->dev.parent; struct boardinfo *bi; int status = -ENODEV; - int id; + int id, first_dynamic; if (!dev) return -ENODEV; @@ -2116,9 +2115,15 @@ int spi_register_controller(struct spi_controller *ctlr) } } if (ctlr->bus_num < 0) { + first_dynamic = of_alias_get_highest_id("spi"); + if (first_dynamic < 0) + first_dynamic = 0; + else + first_dynamic++; + mutex_lock(&board_lock); - id = idr_alloc(&spi_master_idr, ctlr, SPI_DYN_FIRST_BUS_NUM, 0, - GFP_KERNEL); + id = idr_alloc(&spi_master_idr, ctlr, first_dynamic, + 0, GFP_KERNEL); mutex_unlock(&board_lock); if (WARN(id < 0, "couldn't get idr")) return id; -- cgit v1.2.3-70-g09d2 From c019b5166e11faaf9ed3b64316ed338eaa19de60 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 16 Oct 2017 12:19:48 +0300 Subject: net/sched: cls_flower: Set egress_dev mark when calling into the HW driver Commit 7091d8c '(net/sched: cls_flower: Add offload support using egress Hardware device') made sure (when fl_hw_replace_filter is called) to put the egress_dev mark on persisent structure instance. Hence, following calls into the HW driver for stats and deletion will note it and act accordingly. With commit de4784ca030f this property is lost and hence when called, the HW driver failes to operate (stats, delete) on the offloaded flow. Fix it by setting the egress_dev flag whenever the ingress device is different from the hw device since this is exactly the condition under which we're calling into the HW driver through the egress port net-device. Fixes: de4784ca030f ('net: sched: get rid of struct tc_to_netdev') Signed-off-by: Or Gerlitz Signed-off-by: Roi Dayan Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d230cb4c8094..b480d7c792ba 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -234,6 +234,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) tc_cls_common_offload_init(&cls_flower.common, tp); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; + cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); } @@ -289,6 +290,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; + cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev; dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower); -- cgit v1.2.3-70-g09d2 From 823038ca030e9f8283518b1e6a5a6879edcbe057 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 16 Oct 2017 19:43:15 +0800 Subject: dev_ioctl: add missing NETDEV_CHANGE_TX_QUEUE_LEN event notification When changing dev tx_queue_len via netlink or net-sysfs, a NETDEV_CHANGE_TX_QUEUE_LEN event notification will be called. But dev_ioctl missed this event notification, which could cause no userspace notification would be sent. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 709a4e6fb447..f9c7a88cd981 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -303,7 +303,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; + if (dev->tx_queue_len ^ ifr->ifr_qlen) { + unsigned int orig_len = dev->tx_queue_len; + + dev->tx_queue_len = ifr->ifr_qlen; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + return err; + } + } return 0; case SIOCSIFNAME: -- cgit v1.2.3-70-g09d2 From 99b169d3c2052717a9a56b2c8aab0cabd96f0598 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Oct 2017 13:57:24 +0100 Subject: drm/i915: Fix eviction when the GGTT is idle but full In the full-ppgtt world, we can fill the GGTT full of context objects. These context objects are currently implicitly tracked by the requests that pin them i.e. they are only unpinned when the request is completed and retired, but we do not have the link from the vma to the request (anymore). In order to unpin those contexts, we have to issue another request and wait upon the switch to the kernel context. The bug during eviction was that we assumed that a full GGTT meant we would have requests on the GGTT timeline, and so we missed situations where those requests where merely in flight (and when even they have not yet been submitted to hw yet). The fix employed here is to change the already-is-idle test to no look at the execution timeline, but count the outstanding requests and then check that we have switched to the kernel context. Erring on the side of overkill here just means that we stall a little longer than may be strictly required, but we only expect to hit this path in extreme corner cases where returning an erroneous error is worse than the delay. v2: Logical inversion when swapping over branches. Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171012125726.14736-1-chris@chris-wilson.co.uk (cherry picked from commit 55b4f1ce2f23692c57205b9974fba61baa4b9321) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem_evict.c | 63 ++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4df039ef2ce3..e161d383b526 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,21 +33,20 @@ #include "intel_drv.h" #include "i915_trace.h" -static bool ggtt_is_idle(struct drm_i915_private *dev_priv) +static bool ggtt_is_idle(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_engine_cs *engine; + enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) { - struct intel_timeline *tl; + if (i915->gt.active_requests) + return false; - tl = &ggtt->base.timeline.engine[engine->id]; - if (i915_gem_active_isset(&tl->last_request)) - return false; - } + for_each_engine(engine, i915, id) { + if (engine->last_retired_context != i915->kernel_context) + return false; + } - return true; + return true; } static int ggtt_flush(struct drm_i915_private *i915) @@ -157,7 +156,8 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, cache_level, start, end, mode); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. @@ -182,7 +182,8 @@ search_again: BUG_ON(ret); } - /* Can we unpin some objects such as idle hw contents, + /* + * Can we unpin some objects such as idle hw contents, * or pending flips? But since only the GGTT has global entries * such as scanouts, rinbuffers and contexts, we can skip the * purge when inspecting per-process local address spaces. @@ -190,19 +191,33 @@ search_again: if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - if (ggtt_is_idle(dev_priv)) { - /* If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; - } + /* + * Not everything in the GGTT is tracked via VMA using + * i915_vma_move_to_active(), otherwise we could evict as required + * with minimal stalling. Instead we are forced to idle the GPU and + * explicitly retire outstanding requests which will then remove + * the pinning for active objects such as contexts and ring, + * enabling us to evict them on the next iteration. + * + * To ensure that all user contexts are evictable, we perform + * a switch to the perma-pinned kernel context. This all also gives + * us a termination condition, when the last retired context is + * the kernel's there is no more we can evict. + */ + if (!ggtt_is_idle(dev_priv)) { + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + goto search_again; + } - goto search_again; + /* + * If we still have pending pageflip completions, drop + * back to userspace to give our workqueues time to + * acquire our locks and unpin the old scanouts. + */ + return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; found: /* drm_mm doesn't allow any other other operations while -- cgit v1.2.3-70-g09d2 From fbe776cc3a753618877f7ce87a28ae3480743348 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Oct 2017 16:47:35 +0100 Subject: drm/i915: Use bdw_ddi_translations_fdi for Broadwell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compiler warns: drivers/gpu/drm/i915/intel_ddi.c:118:35: warning: ‘bdw_ddi_translations_fdi’ defined but not used Lo and behold, if we look at intel_ddi_get_buf_trans_fdi(), it uses hsw_ddi_translations_fdi[] for both Haswell and *Broadwell* Fixes: 7d1c42e679f9 ("drm/i915: Refactor code to select the DDI buf translation table") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: David Weinehall Cc: Jani Nikula Cc: # v4.12+ Link: https://patchwork.freedesktop.org/patch/msgid/20171013154735.27163-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula Reviewed-by: Ville Syrjälä (cherry picked from commit 1210d3889077653b90b0bfd2cc54e19f4766e4e6) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 476681d5940c..d4135e0ee723 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -664,8 +664,8 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, int *n_entries) { if (IS_BROADWELL(dev_priv)) { - *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); - return hsw_ddi_translations_fdi; + *n_entries = ARRAY_SIZE(bdw_ddi_translations_fdi); + return bdw_ddi_translations_fdi; } else if (IS_HASWELL(dev_priv)) { *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); return hsw_ddi_translations_fdi; -- cgit v1.2.3-70-g09d2 From 41e64c1ac73bbc2380d7b85357a4b693043a5ba8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 3 Oct 2017 15:08:58 -0700 Subject: drm/i915/cnl: Fix PLL mapping. On PLL Enable sequence we need to "Configure DPCLKA_CFGCR0 to turn on the clock for the DDI and map the DPLL to the DDI" So we first do the map and then we unset DDI_CLK_OFF to turn the clock on. We do this in 2 separated steps. However, on this second step where we should only unset the off bit we are also unmapping the ddi from the pll. So we end up using the pll 0 for almost everything. Consequently breaking cases with more than one display. Fixes: 555e38d27317 ("drm/i915/cnl: DDI - PLL mapping") Cc: Paulo Zanoni Cc: Manasi Navare Cc: Kahola, Mika Signed-off-by: Rodrigo Vivi Reviewed-by: James Ausmus Link: https://patchwork.freedesktop.org/patch/msgid/20171003220859.21352-2-rodrigo.vivi@intel.com (cherry picked from commit 87145d95c3d8297fb74762bd92e022d7f5cc250c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_ddi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d4135e0ee723..5e5fe03b638c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2102,8 +2102,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, * register writes. */ val = I915_READ(DPCLKA_CFGCR0); - val &= ~(DPCLKA_CFGCR0_DDI_CLK_OFF(port) | - DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port)); + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); I915_WRITE(DPCLKA_CFGCR0, val); } else if (IS_GEN9_BC(dev_priv)) { /* DDI -> PLL mapping */ -- cgit v1.2.3-70-g09d2 From 038daf5556a486cefc7a239ca4528003a5a4ef00 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 3 Oct 2017 15:08:59 -0700 Subject: drm/i915/cnl: Fix PLL initialization for HDMI. HDMI Mode selection on CNL is on CFGCR0 for that PLL, not on in a global CTRL1 as it was on SKL. The original patch addressed this difference, but leaving behind this single entry here. So we were checking the wrong bits during the PLL initialization and consequently avoiding the CFGCR1 setup during HDMI initialization. Luckly when only HDMI was in use BIOS had already setup this for us. But the dual display with hot plug were messed up. Fixes: a927c927de34 ("drm/i915/cnl: Initialize PLLs") Cc: Paulo Zanoni Cc: Manasi Navare Cc: Kahola, Mika Signed-off-by: Rodrigo Vivi Reviewed-by: James Ausmus Reviewed-by: Manasi Navare Link: https://patchwork.freedesktop.org/patch/msgid/20171003220859.21352-3-rodrigo.vivi@intel.com (cherry picked from commit 614ee07acfbb55f2debfc3223ffae97fee17ed14) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index a2a3d93d67bd..df808a94c511 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1996,7 +1996,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, /* 3. Configure DPLL_CFGCR0 */ /* Avoid touch CFGCR1 if HDMI mode is not enabled */ - if (pll->state.hw_state.cfgcr0 & DPLL_CTRL1_HDMI_MODE(pll->id)) { + if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { val = pll->state.hw_state.cfgcr1; I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); /* 4. Reab back to ensure writes completed */ -- cgit v1.2.3-70-g09d2 From ab31fd0ce65ec93828b617123792c1bb7c6dcc42 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Fri, 13 Oct 2017 15:40:07 +0200 Subject: scsi: zfcp: fix erp_action use-before-initialize in REC action trace v4.10 commit 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") extended accessing parent pointer fields of struct zfcp_erp_action for tracing. If an erp_action has never been enqueued before, these parent pointer fields are uninitialized and NULL. Examples are zfcp objects freshly added to the parent object's children list, before enqueueing their first recovery subsequently. In zfcp_erp_try_rport_unblock(), we iterate such list. Accessing erp_action fields can cause a NULL pointer dereference. Since the kernel can read from lowcore on s390, it does not immediately cause a kernel page fault. Instead it can cause hangs on trying to acquire the wrong erp_action->adapter->dbf->rec_lock in zfcp_dbf_rec_action_lvl() ^bogus^ while holding already other locks with IRQs disabled. Real life example from attaching lots of LUNs in parallel on many CPUs: crash> bt 17723 PID: 17723 TASK: ... CPU: 25 COMMAND: "zfcperp0.0.1800" LOWCORE INFO: -psw : 0x0404300180000000 0x000000000038e424 -function : _raw_spin_lock_wait_flags at 38e424 ... #0 [fdde8fc90] zfcp_dbf_rec_action_lvl at 3e0004e9862 [zfcp] #1 [fdde8fce8] zfcp_erp_try_rport_unblock at 3e0004dfddc [zfcp] #2 [fdde8fd38] zfcp_erp_strategy at 3e0004e0234 [zfcp] #3 [fdde8fda8] zfcp_erp_thread at 3e0004e0a12 [zfcp] #4 [fdde8fe60] kthread at 173550 #5 [fdde8feb8] kernel_thread_starter at 10add2 zfcp_adapter zfcp_port zfcp_unit
, 0x404040d600000000 scsi_device NULL, returning early! zfcp_scsi_dev.status = 0x40000000 0x40000000 ZFCP_STATUS_COMMON_RUNNING crash> zfcp_unit
struct zfcp_unit { erp_action = { adapter = 0x0, port = 0x0, unit = 0x0, }, } zfcp_erp_action is always fully embedded into its container object. Such container object is never moved in its object tree (only add or delete). Hence, erp_action parent pointers can never change. To fix the issue, initialize the erp_action parent pointers before adding the erp_action container to any list and thus before it becomes accessible from outside of its initializing function. In order to also close the time window between zfcp_erp_setup_act() memsetting the entire erp_action to zero and setting the parent pointers again, drop the memset and instead explicitly initialize individually all erp_action fields except for parent pointers. To be extra careful not to introduce any other unintended side effect, even keep zeroing the erp_action fields for list and timer. Also double-check with WARN_ON_ONCE that erp_action parent pointers never change, so we get to know when we would deviate from previous behavior. Signed-off-by: Steffen Maier Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") Cc: #2.6.32+ Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_aux.c | 5 +++++ drivers/s390/scsi/zfcp_erp.c | 18 +++++++++++------- drivers/s390/scsi/zfcp_scsi.c | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index 82ac331d9125..84752152d41f 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -357,6 +357,8 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device) adapter->next_port_scan = jiffies; + adapter->erp_action.adapter = adapter; + if (zfcp_qdio_setup(adapter)) goto failed; @@ -513,6 +515,9 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn, port->dev.groups = zfcp_port_attr_groups; port->dev.release = zfcp_port_release; + port->erp_action.adapter = adapter; + port->erp_action.port = port; + if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) { kfree(port); goto err_out; diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 37408f5f81ce..ec2532ee1822 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -193,9 +193,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &zfcp_sdev->status); erp_action = &zfcp_sdev->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); - erp_action->port = port; - erp_action->sdev = sdev; + WARN_ON_ONCE(erp_action->port != port); + WARN_ON_ONCE(erp_action->sdev != sdev); if (!(atomic_read(&zfcp_sdev->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; @@ -208,8 +207,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, zfcp_erp_action_dismiss_port(port); atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status); erp_action = &port->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); - erp_action->port = port; + WARN_ON_ONCE(erp_action->port != port); + WARN_ON_ONCE(erp_action->sdev != NULL); if (!(atomic_read(&port->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; break; @@ -219,7 +218,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, zfcp_erp_action_dismiss_adapter(adapter); atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status); erp_action = &adapter->erp_action; - memset(erp_action, 0, sizeof(struct zfcp_erp_action)); + WARN_ON_ONCE(erp_action->port != NULL); + WARN_ON_ONCE(erp_action->sdev != NULL); if (!(atomic_read(&adapter->status) & ZFCP_STATUS_COMMON_RUNNING)) act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY; @@ -229,7 +229,11 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status, return NULL; } - erp_action->adapter = adapter; + WARN_ON_ONCE(erp_action->adapter != adapter); + memset(&erp_action->list, 0, sizeof(erp_action->list)); + memset(&erp_action->timer, 0, sizeof(erp_action->timer)); + erp_action->step = ZFCP_ERP_STEP_UNINITIALIZED; + erp_action->fsf_req_id = 0; erp_action->action = need; erp_action->status = act_status; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index ec3ddd1d31d5..6cf8732627e0 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -115,10 +115,15 @@ static int zfcp_scsi_slave_alloc(struct scsi_device *sdev) struct zfcp_unit *unit; int npiv = adapter->connection_features & FSF_FEATURE_NPIV_MODE; + zfcp_sdev->erp_action.adapter = adapter; + zfcp_sdev->erp_action.sdev = sdev; + port = zfcp_get_port_by_wwpn(adapter, rport->port_name); if (!port) return -ENXIO; + zfcp_sdev->erp_action.port = port; + unit = zfcp_unit_find(port, zfcp_scsi_dev_lun(sdev)); if (unit) put_device(&unit->dev); -- cgit v1.2.3-70-g09d2 From 1010f21ecf8ac43be676d498742de18fa6c20987 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Mon, 16 Oct 2017 11:26:05 -0700 Subject: scsi: qla2xxx: Initialize Work element before requesting IRQs commit a9e170e28636 ("scsi: qla2xxx: Fix uninitialized work element") moved initializiation of work element earlier in the probe to fix call stack. However, it still leaves a window where interrupt can be generated before work element is initialized. Fix that window by initializing work element before we are requesting IRQs. [mkp: fixed typos] Fixes: a9e170e28636 ("scsi: qla2xxx: Fix uninitialized work element") Cc: # 4.13 Signed-off-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 937209805baf..3bd956d3bc5d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3061,6 +3061,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->max_cmd_len, host->max_channel, host->max_lun, host->transportt, sht->vendor_id); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); + /* Set up the irqs */ ret = qla2x00_request_irqs(ha, rsp); if (ret) @@ -3175,8 +3177,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) host->can_queue, base_vha->req, base_vha->mgmt_svr_loop_id, host->sg_tablesize); - INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); - if (ha->mqenable) { bool mq = false; bool startit = false; -- cgit v1.2.3-70-g09d2 From c99dfd20f295b2b8c46da5185c0889493ba1f291 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Mon, 16 Oct 2017 20:28:02 +0100 Subject: scsi: hpsa: Fix configured_logical_drive_count·check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether configured_logical_drive_count is less than 255. Previous check was always evaluating to true as this variable is defined as u8. Signed-off-by: Christos Gkekas Acked-by: Don Brace --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 9abe81021484..4ed3d26ffdde 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -4091,7 +4091,7 @@ static int hpsa_set_local_logical_count(struct ctlr_info *h, memset(id_ctlr, 0, sizeof(*id_ctlr)); rc = hpsa_bmic_id_controller(h, id_ctlr, sizeof(*id_ctlr)); if (!rc) - if (id_ctlr->configured_logical_drive_count < 256) + if (id_ctlr->configured_logical_drive_count < 255) *nlocals = id_ctlr->configured_logical_drive_count; else *nlocals = le16_to_cpu( -- cgit v1.2.3-70-g09d2 From 45348de2c8a7a1e64c5be27b22c9786b4152dd41 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 16 Oct 2017 17:22:31 -0700 Subject: scsi: aacraid: Fix controller initialization failure This is a fix to an issue where the driver sends its periodic WELLNESS command to the controller after the driver shut it down.This causes the controller to crash. The window where this can happen is small, but it can be hit at around 4 hours of constant resets. Cc: Fixes: fbd185986eba (aacraid: Fix AIF triggered IOP_RESET) Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/comminit.c | 8 +++++--- drivers/scsi/aacraid/linit.c | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 97d269f16888..1bc623ad3faf 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -302,9 +302,11 @@ int aac_send_shutdown(struct aac_dev * dev) return -ENOMEM; aac_fib_init(fibctx); - mutex_lock(&dev->ioctl_mutex); - dev->adapter_shutdown = 1; - mutex_unlock(&dev->ioctl_mutex); + if (!dev->adapter_shutdown) { + mutex_lock(&dev->ioctl_mutex); + dev->adapter_shutdown = 1; + mutex_unlock(&dev->ioctl_mutex); + } cmd = (struct aac_close *) fib_data(fibctx); cmd->command = cpu_to_le32(VM_CloseAll); diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 62beb2596466..c9252b138c1f 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1551,8 +1551,9 @@ static void __aac_shutdown(struct aac_dev * aac) { int i; + mutex_lock(&aac->ioctl_mutex); aac->adapter_shutdown = 1; - aac_send_shutdown(aac); + mutex_unlock(&aac->ioctl_mutex); if (aac->aif_thread) { int i; @@ -1565,7 +1566,11 @@ static void __aac_shutdown(struct aac_dev * aac) } kthread_stop(aac->thread); } + + aac_send_shutdown(aac); + aac_adapter_disable_int(aac); + if (aac_is_src(aac)) { if (aac->max_msix > 1) { for (i = 0; i < aac->max_msix; i++) { -- cgit v1.2.3-70-g09d2 From 917086ff231f614e6705927d8fe3eb6aa74b21bf Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Sun, 8 Oct 2017 00:28:21 -0400 Subject: vfs: fix mounting a filesystem with i_version The mount i_version flag is not enabled in the new sb_flags. This patch adds the missing SB_I_VERSION flag. Fixes: e462ec5 "VFS: Differentiate mount flags (MS_*) from internal superblock flags" Signed-off-by: Mimi Zohar Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 3b601f115b6c..d18deb4c410b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2825,7 +2825,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_MANDLOCK | SB_DIRSYNC | SB_SILENT | - SB_POSIXACL); + SB_POSIXACL | + SB_I_VERSION); if (flags & MS_REMOUNT) retval = do_remount(&path, flags, sb_flags, mnt_flags, -- cgit v1.2.3-70-g09d2 From ea7d0d69426cab6747ed311c53f4142eb48b9454 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 6 Oct 2017 17:45:27 +0300 Subject: xhci: Identify USB 3.1 capable hosts by their port protocol capability Many USB 3.1 capable hosts never updated the Serial Bus Release Number (SBRN) register to USB 3.1 from USB 3.0 xhci driver identified USB 3.1 capable hosts based on this SBRN register, which according to specs "contains the release of the Universal Serial Bus Specification with which this Universal Serial Bus Host Controller module is compliant." but still in october 2017 gives USB 3.0 as the only possible option. Make an additional check for USB 3.1 support and enable it if the xHCI supported protocol capablity lists USB 3.1 capable ports. Cc: # v4.6+ Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index ee198ea47f49..51535ba2bcd4 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4805,7 +4805,8 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) */ hcd->has_tt = 1; } else { - if (xhci->sbrn == 0x31) { + /* Some 3.1 hosts return sbrn 0x30, can't rely on sbrn alone */ + if (xhci->sbrn == 0x31 || xhci->usb3_rhub.min_rev >= 1) { xhci_info(xhci, "Host supports USB 3.1 Enhanced SuperSpeed\n"); hcd->speed = HCD_USB31; hcd->self.root_hub->speed = USB_SPEED_SUPER_PLUS; -- cgit v1.2.3-70-g09d2 From d1aad52cf8b3f95dfe9b5b64da66343306ddf73b Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 6 Oct 2017 17:45:28 +0300 Subject: xhci: Cleanup current_cmd in xhci_cleanup_command_queue() KASAN reported use-after-free bug when xhci host controller died: [ 176.952537] BUG: KASAN: use-after-free in xhci_handle_command_timeout+0x68/0x224 [ 176.960846] Write of size 4 at addr ffffffc0cbb01608 by task kworker/3:3/1680 ... [ 177.180644] Freed by task 0: [ 177.183882] kasan_slab_free+0x90/0x15c [ 177.188194] kfree+0x114/0x28c [ 177.191630] xhci_cleanup_command_queue+0xc8/0xf8 [ 177.196916] xhci_hc_died+0x84/0x358 Problem here is that when the cmd_timer fired, it would try to access current_cmd while the command queue is already freed by xhci_hc_died(). Cleanup current_cmd in xhci_cleanup_command_queue() to avoid that. Fixes: d9f11ba9f107 ("xhci: Rework how we handle unresponsive or hoptlug removed hosts") Cc: # v4.12+ Signed-off-by: Jeffy Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index a9443651ce0f..48ae15afa59e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1309,6 +1309,7 @@ static void xhci_complete_del_and_free_cmd(struct xhci_command *cmd, u32 status) void xhci_cleanup_command_queue(struct xhci_hcd *xhci) { struct xhci_command *cur_cmd, *tmp_cmd; + xhci->current_cmd = NULL; list_for_each_entry_safe(cur_cmd, tmp_cmd, &xhci->cmd_list, cmd_list) xhci_complete_del_and_free_cmd(cur_cmd, COMP_COMMAND_ABORTED); } -- cgit v1.2.3-70-g09d2 From 810a624bd1b64b13ddcc2eb5c1880526a750a870 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 6 Oct 2017 17:45:29 +0300 Subject: usb: xhci: Reset halted endpoint if trb is noop When a URB is cancled, xhci driver turns the untransferred trbs into no-ops. If an endpoint stalls on a no-op trb that belongs to the cancelled URB, the event handler won't reset the endpoint. Hence, it will stay halted. Link: http://marc.info/?l=linux-usb&m=149582598330127&w=2 Cc: Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 48ae15afa59e..82c746e2d85c 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2580,15 +2580,21 @@ static int handle_tx_event(struct xhci_hcd *xhci, (struct xhci_generic_trb *) ep_trb); /* - * No-op TRB should not trigger interrupts. - * If ep_trb is a no-op TRB, it means the - * corresponding TD has been cancelled. Just ignore - * the TD. + * No-op TRB could trigger interrupts in a case where + * a URB was killed and a STALL_ERROR happens right + * after the endpoint ring stopped. Reset the halted + * endpoint. Otherwise, the endpoint remains stalled + * indefinitely. */ if (trb_is_noop(ep_trb)) { - xhci_dbg(xhci, - "ep_trb is a no-op TRB. Skip it for slot %u ep %u\n", - slot_id, ep_index); + if (trb_comp_code == COMP_STALL_ERROR || + xhci_requires_manual_halt_cleanup(xhci, ep_ctx, + trb_comp_code)) + xhci_cleanup_halted_endpoint(xhci, slot_id, + ep_index, + ep_ring->stream_id, + td, ep_trb, + EP_HARD_RESET); goto cleanup; } -- cgit v1.2.3-70-g09d2 From b3207c65dfafae27e7c492cb9188c0dc0eeaf3fd Mon Sep 17 00:00:00 2001 From: Mayank Rana Date: Fri, 6 Oct 2017 17:45:30 +0300 Subject: usb: xhci: Handle error condition in xhci_stop_device() xhci_stop_device() calls xhci_queue_stop_endpoint() multiple times without checking the return value. xhci_queue_stop_endpoint() can return error if the HC is already halted or unable to queue commands. This can cause a deadlock condition as xhci_stop_device() would end up waiting indefinitely for a completion for the command that didn't get queued. Fix this by checking the return value and bailing out of xhci_stop_device() in case of error. This patch happens to fix potential memory leaks of the allocated command structures as well. Fixes: c311e391a7ef ("xhci: rework command timeout and cancellation,") Cc: Signed-off-by: Mayank Rana Signed-off-by: Jack Pham Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index da9158f171cb..a2336deb5e36 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -420,14 +420,25 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) GFP_NOWAIT); if (!command) { spin_unlock_irqrestore(&xhci->lock, flags); - xhci_free_command(xhci, cmd); - return -ENOMEM; + ret = -ENOMEM; + goto cmd_cleanup; + } + + ret = xhci_queue_stop_endpoint(xhci, command, slot_id, + i, suspend); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_free_command(xhci, command); + goto cmd_cleanup; } - xhci_queue_stop_endpoint(xhci, command, slot_id, i, - suspend); } } - xhci_queue_stop_endpoint(xhci, cmd, slot_id, 0, suspend); + ret = xhci_queue_stop_endpoint(xhci, cmd, slot_id, 0, suspend); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + goto cmd_cleanup; + } + xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); @@ -439,6 +450,8 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) xhci_warn(xhci, "Timeout while waiting for stop endpoint command\n"); ret = -ETIME; } + +cmd_cleanup: xhci_free_command(xhci, cmd); return ret; } -- cgit v1.2.3-70-g09d2 From 845d584f41eac3475c21e4a7d5e88d0f6e410cf7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Oct 2017 16:21:19 +0200 Subject: USB: devio: Revert "USB: devio: Don't corrupt user memory" Taking the uurb->buffer_length userspace passes in as a maximum for the actual urbs transfer_buffer_length causes 2 serious issues: 1) It breaks isochronous support for all userspace apps using libusb, as existing libusb versions pass in 0 for uurb->buffer_length, relying on the kernel using the lenghts of the usbdevfs_iso_packet_desc descriptors passed in added together as buffer length. This for example causes redirection of USB audio and Webcam's into virtual machines using qemu-kvm to no longer work. This is a userspace ABI break and as such must be reverted. Note that the original commit does not protect other users / the kernels memory, it only stops the userspace process making the call from shooting itself in the foot. 2) It may cause the kernel to program host controllers to DMA over random memory. Just as the devio code used to only look at the iso_packet_desc lenghts, the host drivers do the same, relying on the submitter of the urbs to make sure the entire buffer is large enough and not checking transfer_buffer_length. But the "USB: devio: Don't corrupt user memory" commit now takes the userspace provided uurb->buffer_length for the buffer-size while copying over the user-provided iso_packet_desc lengths 1:1, allowing the user to specify a small buffer size while programming the host controller to dma a lot more data. (Atleast the ohci, uhci, xhci and fhci drivers do not check transfer_buffer_length for isoc transfers.) This reverts commit fa1ed74eb1c2 ("USB: devio: Don't corrupt user memory") fixing both these issues. Cc: Dan Carpenter Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 4664e543cf2f..e9326f31db8d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1576,11 +1576,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb totlen += isopkt[u].length; } u *= sizeof(struct usb_iso_packet_descriptor); - if (totlen <= uurb->buffer_length) - uurb->buffer_length = totlen; - else - WARN_ONCE(1, "uurb->buffer_length is too short %d vs %d", - totlen, uurb->buffer_length); + uurb->buffer_length = totlen; break; default: -- cgit v1.2.3-70-g09d2 From 765fb2f181cad669f2beb87842a05d8071f2be85 Mon Sep 17 00:00:00 2001 From: Maksim Salau Date: Wed, 11 Oct 2017 11:10:52 +0300 Subject: usb: cdc_acm: Add quirk for Elatec TWN3 Elatec TWN3 has the union descriptor on data interface. This results in failure to bind the device to the driver with the following log: usb 1-1.2: new full speed USB device using streamplug-ehci and address 4 usb 1-1.2: New USB device found, idVendor=09d8, idProduct=0320 usb 1-1.2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 usb 1-1.2: Product: RFID Device (COM) usb 1-1.2: Manufacturer: OEM cdc_acm 1-1.2:1.0: Zero length descriptor references cdc_acm: probe of 1-1.2:1.0 failed with error -22 Adding the NO_UNION_NORMAL quirk for the device fixes the issue. `lsusb -v` of the device: Bus 001 Device 003: ID 09d8:0320 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 2 Communications bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 32 idVendor 0x09d8 idProduct 0x0320 bcdDevice 3.00 iManufacturer 1 OEM iProduct 2 RFID Device (COM) iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 67 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 250mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 2 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0020 1x 32 bytes bInterval 0 CDC Header: bcdCDC 1.10 CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC ACM: bmCapabilities 0x06 sends break line coding and serial state CDC Union: bMasterInterface 0 bSlaveInterface 1 Device Status: 0x0000 (Bus Powered) Signed-off-by: Maksim Salau Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 5e056064259c..18c923a4c16e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1832,6 +1832,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, + { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ + .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ + }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, -- cgit v1.2.3-70-g09d2 From 4f190e0b9de89c4c917c3ffb3799e9d00fc534ac Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Oct 2017 22:46:07 -0500 Subject: USB: musb: fix session-bit runtime-PM quirk The current session-bit quirk implementation does not prevent the retry counter from underflowing, something which could break runtime PM and keep the device active for a very long time (about 2^32 seconds) after a disconnect. This notably breaks the B-device timeout case, but could potentially cause problems also when the controller is operating as an A-device. Fixes: 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") Cc: stable # 4.9 Cc: Tony Lindgren Signed-off-by: Johan Hovold Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 029692053dd3..07b8c7152e3d 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1861,22 +1861,22 @@ static void musb_pm_runtime_check_session(struct musb *musb) MUSB_DEVCTL_HR; switch (devctl & ~s) { case MUSB_QUIRK_B_INVALID_VBUS_91: - if (musb->quirk_retries--) { + if (musb->quirk_retries) { musb_dbg(musb, "Poll devctl on invalid vbus, assume no session"); schedule_delayed_work(&musb->irq_work, msecs_to_jiffies(1000)); - + musb->quirk_retries--; return; } /* fall through */ case MUSB_QUIRK_A_DISCONNECT_19: - if (musb->quirk_retries--) { + if (musb->quirk_retries) { musb_dbg(musb, "Poll devctl on possible host mode disconnect"); schedule_delayed_work(&musb->irq_work, msecs_to_jiffies(1000)); - + musb->quirk_retries--; return; } if (!musb->session) -- cgit v1.2.3-70-g09d2 From 0c3aae9bd59978fb8c3557d7883380bef0f2cfa1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 9 Oct 2017 22:46:08 -0500 Subject: USB: musb: fix late external abort on suspend The musb delayed irq work was never flushed on suspend, something which since 4.9 can lead to an external abort if the work is scheduled after the grandparent's clock has been disabled: PM: Suspending system (mem) PM: suspend of devices complete after 125.224 msecs PM: suspend devices took 0.132 seconds PM: late suspend of devices complete after 7.423 msecs PM: noirq suspend of devices complete after 7.083 msecs suspend debug: Waiting for 5 second(s). Unhandled fault: external abort on non-linefetch (0x1008) at 0xd0262c60 ... [] (musb_default_readb) from [] (musb_irq_work+0x48/0x220) [] (musb_irq_work) from [] (process_one_work+0x1f4/0x758) [] (process_one_work) from [] (worker_thread+0x54/0x514) [] (worker_thread) from [] (kthread+0x128/0x158) [] (kthread) from [] (ret_from_fork+0x14/0x24) Commit 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") started scheduling musb_irq_work with a delay of up to a second and with retries thereby making this easy to trigger, for example, by suspending shortly after a disconnect. Note that we set a flag to prevent the irq work from rescheduling itself during suspend and instead process a disconnect immediately. This takes care of the case where we are disconnected shortly before suspending. However, when in host mode, a disconnect while suspended will still go unnoticed and thus prevent the controller from runtime suspending upon resume as the session bit is always set. This will need to be addressed separately. Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") Fixes: 467d5c980709 ("usb: musb: Implement session bit based runtime PM for musb-core") Fixes: 2bff3916fda9 ("usb: musb: Fix PM for hub disconnect") Cc: stable # 4.9 Cc: Felipe Balbi Cc: Tony Lindgren Signed-off-by: Johan Hovold Tested-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 11 +++++++++-- drivers/usb/musb/musb_core.h | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 07b8c7152e3d..1ced3af75b05 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1861,7 +1861,7 @@ static void musb_pm_runtime_check_session(struct musb *musb) MUSB_DEVCTL_HR; switch (devctl & ~s) { case MUSB_QUIRK_B_INVALID_VBUS_91: - if (musb->quirk_retries) { + if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, "Poll devctl on invalid vbus, assume no session"); schedule_delayed_work(&musb->irq_work, @@ -1871,7 +1871,7 @@ static void musb_pm_runtime_check_session(struct musb *musb) } /* fall through */ case MUSB_QUIRK_A_DISCONNECT_19: - if (musb->quirk_retries) { + if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, "Poll devctl on possible host mode disconnect"); schedule_delayed_work(&musb->irq_work, @@ -2681,8 +2681,15 @@ static int musb_suspend(struct device *dev) musb_platform_disable(musb); musb_disable_interrupts(musb); + + musb->flush_irq_work = true; + while (flush_delayed_work(&musb->irq_work)) + ; + musb->flush_irq_work = false; + if (!(musb->io.quirks & MUSB_PRESERVE_SESSION)) musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + WARN_ON(!list_empty(&musb->pending_list)); spin_lock_irqsave(&musb->lock, flags); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index c748f4ac1154..20f4614178d9 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -428,6 +428,8 @@ struct musb { unsigned test_mode:1; unsigned softconnect:1; + unsigned flush_irq_work:1; + u8 address; u8 test_mode_nr; u16 ackpend; /* ep0 */ -- cgit v1.2.3-70-g09d2 From bfa53e0e366b98185fadb03f7916d1538cb90ebd Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:09 -0500 Subject: usb: musb: musb_cppi41: Fix the address of teardown and autoreq registers The DA8xx and DSPS platforms don't use the same address for few registers. On Da8xx, this is causing some issues (e.g. teardown that doesn't work). Configure the address of the register during the init and use them instead of constants. Cc: stable@vger.kernel.org # v4.12+ Reported-by: nsekhar@ti.com Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index ba255280a624..d66416a27146 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -26,6 +26,9 @@ #define MUSB_DMA_NUM_CHANNELS 15 +#define DA8XX_USB_AUTOREQ 0x14 +#define DA8XX_USB_TEARDOWN 0x1c + struct cppi41_dma_controller { struct dma_controller controller; struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS]; @@ -35,6 +38,9 @@ struct cppi41_dma_controller { u32 rx_mode; u32 tx_mode; u32 auto_req; + + u32 tdown_reg; + u32 autoreq_reg; }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -364,8 +370,8 @@ static void cppi41_set_autoreq_mode(struct cppi41_dma_channel *cppi41_channel, if (new_mode == old_mode) return; controller->auto_req = new_mode; - musb_writel(controller->controller.musb->ctrl_base, USB_CTRL_AUTOREQ, - new_mode); + musb_writel(controller->controller.musb->ctrl_base, + controller->autoreq_reg, new_mode); } static bool cppi41_configure_channel(struct dma_channel *channel, @@ -581,12 +587,13 @@ static int cppi41_dma_channel_abort(struct dma_channel *channel) do { if (is_tx) - musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); + musb_writel(musb->ctrl_base, controller->tdown_reg, + tdbit); ret = dmaengine_terminate_all(cppi41_channel->dc); } while (ret == -EAGAIN); if (is_tx) { - musb_writel(musb->ctrl_base, USB_TDOWN, tdbit); + musb_writel(musb->ctrl_base, controller->tdown_reg, tdbit); csr = musb_readw(epio, MUSB_TXCSR); if (csr & MUSB_TXCSR_TXPKTRDY) { @@ -727,6 +734,14 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) controller->controller.is_compatible = cppi41_is_compatible; controller->controller.musb = musb; + if (musb->io.quirks & MUSB_DA8XX) { + controller->tdown_reg = DA8XX_USB_TEARDOWN; + controller->autoreq_reg = DA8XX_USB_AUTOREQ; + } else { + controller->tdown_reg = USB_TDOWN; + controller->autoreq_reg = USB_CTRL_AUTOREQ; + } + ret = cppi41_dma_controller_start(controller); if (ret) goto plat_get_fail; -- cgit v1.2.3-70-g09d2 From e10c5b0c773efb8643ee89d387d310584ca30830 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:10 -0500 Subject: usb: musb: musb_cppi41: Fix cppi41_set_dma_mode() for DA8xx The way to configure the DMA mode on DA8xx is different from DSPS. Add a new function to configure DMA mode on DA8xx and use a callback to call the right function based on the platform. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index d66416a27146..b2b1306c01cf 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -26,6 +26,7 @@ #define MUSB_DMA_NUM_CHANNELS 15 +#define DA8XX_USB_MODE 0x10 #define DA8XX_USB_AUTOREQ 0x14 #define DA8XX_USB_TEARDOWN 0x1c @@ -41,6 +42,9 @@ struct cppi41_dma_controller { u32 tdown_reg; u32 autoreq_reg; + + void (*set_dma_mode)(struct cppi41_dma_channel *cppi41_channel, + unsigned int mode); }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -355,6 +359,32 @@ static void cppi41_set_dma_mode(struct cppi41_dma_channel *cppi41_channel, } } +static void da8xx_set_dma_mode(struct cppi41_dma_channel *cppi41_channel, + unsigned int mode) +{ + struct cppi41_dma_controller *controller = cppi41_channel->controller; + struct musb *musb = controller->controller.musb; + unsigned int shift; + u32 port; + u32 new_mode; + u32 old_mode; + + old_mode = controller->tx_mode; + port = cppi41_channel->port_num; + + shift = (port - 1) * 4; + if (!cppi41_channel->is_tx) + shift += 16; + new_mode = old_mode & ~(3 << shift); + new_mode |= mode << shift; + + if (new_mode == old_mode) + return; + controller->tx_mode = new_mode; + musb_writel(musb->ctrl_base, DA8XX_USB_MODE, new_mode); +} + + static void cppi41_set_autoreq_mode(struct cppi41_dma_channel *cppi41_channel, unsigned mode) { @@ -379,6 +409,7 @@ static bool cppi41_configure_channel(struct dma_channel *channel, dma_addr_t dma_addr, u32 len) { struct cppi41_dma_channel *cppi41_channel = channel->private_data; + struct cppi41_dma_controller *controller = cppi41_channel->controller; struct dma_chan *dc = cppi41_channel->dc; struct dma_async_tx_descriptor *dma_desc; enum dma_transfer_direction direction; @@ -404,7 +435,7 @@ static bool cppi41_configure_channel(struct dma_channel *channel, musb_writel(musb->ctrl_base, RNDIS_REG(cppi41_channel->port_num), len); /* gen rndis */ - cppi41_set_dma_mode(cppi41_channel, + controller->set_dma_mode(cppi41_channel, EP_MODE_DMA_GEN_RNDIS); /* auto req */ @@ -413,14 +444,15 @@ static bool cppi41_configure_channel(struct dma_channel *channel, } else { musb_writel(musb->ctrl_base, RNDIS_REG(cppi41_channel->port_num), 0); - cppi41_set_dma_mode(cppi41_channel, + controller->set_dma_mode(cppi41_channel, EP_MODE_DMA_TRANSPARENT); cppi41_set_autoreq_mode(cppi41_channel, EP_MODE_AUTOREQ_NONE); } } else { /* fallback mode */ - cppi41_set_dma_mode(cppi41_channel, EP_MODE_DMA_TRANSPARENT); + controller->set_dma_mode(cppi41_channel, + EP_MODE_DMA_TRANSPARENT); cppi41_set_autoreq_mode(cppi41_channel, EP_MODE_AUTOREQ_NONE); len = min_t(u32, packet_sz, len); } @@ -737,9 +769,11 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) if (musb->io.quirks & MUSB_DA8XX) { controller->tdown_reg = DA8XX_USB_TEARDOWN; controller->autoreq_reg = DA8XX_USB_AUTOREQ; + controller->set_dma_mode = da8xx_set_dma_mode; } else { controller->tdown_reg = USB_TDOWN; controller->autoreq_reg = USB_CTRL_AUTOREQ; + controller->set_dma_mode = cppi41_set_dma_mode; } ret = cppi41_dma_controller_start(controller); -- cgit v1.2.3-70-g09d2 From 297d7fe9e439473800ab1f2f853b4b5f8c888500 Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Mon, 9 Oct 2017 22:46:11 -0500 Subject: usb: musb: musb_cppi41: Configure the number of channels for DA8xx Currently, the number of channels is set to 15 but in the case of DA8xx, the number of channels is 4. Update the driver to configure the number of channels at runtime. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexandre Bailon Tested-by: Sekhar Nori Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index b2b1306c01cf..1ec0a4947b6b 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -30,10 +30,12 @@ #define DA8XX_USB_AUTOREQ 0x14 #define DA8XX_USB_TEARDOWN 0x1c +#define DA8XX_DMA_NUM_CHANNELS 4 + struct cppi41_dma_controller { struct dma_controller controller; - struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS]; - struct cppi41_dma_channel tx_channel[MUSB_DMA_NUM_CHANNELS]; + struct cppi41_dma_channel *rx_channel; + struct cppi41_dma_channel *tx_channel; struct hrtimer early_tx; struct list_head early_tx_list; u32 rx_mode; @@ -45,6 +47,7 @@ struct cppi41_dma_controller { void (*set_dma_mode)(struct cppi41_dma_channel *cppi41_channel, unsigned int mode); + u8 num_channels; }; static void save_rx_toggle(struct cppi41_dma_channel *cppi41_channel) @@ -483,7 +486,7 @@ static struct dma_channel *cppi41_dma_channel_allocate(struct dma_controller *c, struct cppi41_dma_channel *cppi41_channel = NULL; u8 ch_num = hw_ep->epnum - 1; - if (ch_num >= MUSB_DMA_NUM_CHANNELS) + if (ch_num >= controller->num_channels) return NULL; if (is_tx) @@ -643,7 +646,7 @@ static void cppi41_release_all_dma_chans(struct cppi41_dma_controller *ctrl) struct dma_chan *dc; int i; - for (i = 0; i < MUSB_DMA_NUM_CHANNELS; i++) { + for (i = 0; i < ctrl->num_channels; i++) { dc = ctrl->tx_channel[i].dc; if (dc) dma_release_channel(dc); @@ -695,7 +698,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) goto err; ret = -EINVAL; - if (port > MUSB_DMA_NUM_CHANNELS || !port) + if (port > controller->num_channels || !port) goto err; if (is_tx) cppi41_channel = &controller->tx_channel[port - 1]; @@ -736,6 +739,8 @@ void cppi41_dma_controller_destroy(struct dma_controller *c) hrtimer_cancel(&controller->early_tx); cppi41_dma_controller_stop(controller); + kfree(controller->rx_channel); + kfree(controller->tx_channel); kfree(controller); } EXPORT_SYMBOL_GPL(cppi41_dma_controller_destroy); @@ -744,6 +749,7 @@ struct dma_controller * cppi41_dma_controller_create(struct musb *musb, void __iomem *base) { struct cppi41_dma_controller *controller; + int channel_size; int ret = 0; if (!musb->controller->parent->of_node) { @@ -770,18 +776,33 @@ cppi41_dma_controller_create(struct musb *musb, void __iomem *base) controller->tdown_reg = DA8XX_USB_TEARDOWN; controller->autoreq_reg = DA8XX_USB_AUTOREQ; controller->set_dma_mode = da8xx_set_dma_mode; + controller->num_channels = DA8XX_DMA_NUM_CHANNELS; } else { controller->tdown_reg = USB_TDOWN; controller->autoreq_reg = USB_CTRL_AUTOREQ; controller->set_dma_mode = cppi41_set_dma_mode; + controller->num_channels = MUSB_DMA_NUM_CHANNELS; } + channel_size = controller->num_channels * + sizeof(struct cppi41_dma_channel); + controller->rx_channel = kzalloc(channel_size, GFP_KERNEL); + if (!controller->rx_channel) + goto rx_channel_alloc_fail; + controller->tx_channel = kzalloc(channel_size, GFP_KERNEL); + if (!controller->tx_channel) + goto tx_channel_alloc_fail; + ret = cppi41_dma_controller_start(controller); if (ret) goto plat_get_fail; return &controller->controller; plat_get_fail: + kfree(controller->tx_channel); +tx_channel_alloc_fail: + kfree(controller->rx_channel); +rx_channel_alloc_fail: kfree(controller); kzalloc_fail: if (ret == -EPROBE_DEFER) -- cgit v1.2.3-70-g09d2 From 445ef61543da3db5b699f87fb0aa4f227165f6ed Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 9 Oct 2017 22:46:12 -0500 Subject: usb: musb: Check for host-mode using is_host_active() on reset interrupt The sunxi musb has a bug where sometimes it will generate a babble error on device disconnect instead of a disconnect IRQ. When this happens the musb controller switches from host mode to device mode (it clears MUSB_DEVCTL_HM/MUSB_DEVCTL_SESSION and sets MUSB_DEVCTL_BDEVICE) and gets stuck in this state. The babble error is misdetected as a bus reset because MUSB_DEVCTL_HM was cleared. To fix this, use is_host_active() rather than (devctl & MUSB_DEVCTL_HM) to detect babble error so that sunxi musb babble recovery can handle it by restoring the mode. This information is provided by the driver logic and does not rely on register contents. Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Jonathan Liu Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 1ced3af75b05..ff5a1a8989d5 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -906,7 +906,7 @@ b_host: */ if (int_usb & MUSB_INTR_RESET) { handled = IRQ_HANDLED; - if (devctl & MUSB_DEVCTL_HM) { + if (is_host_active(musb)) { /* * When BABBLE happens what we can depends on which * platform MUSB is running, because some platforms @@ -916,9 +916,7 @@ b_host: * drop the session. */ dev_err(musb->controller, "Babble\n"); - - if (is_host_active(musb)) - musb_recover_from_babble(musb); + musb_recover_from_babble(musb); } else { musb_dbg(musb, "BUS RESET as %s", usb_otg_state_string(musb->xceiv->otg->state)); -- cgit v1.2.3-70-g09d2 From 6ed05c68cbcae42cd52b8e53b66952bfa9c002ce Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 9 Oct 2017 22:46:13 -0500 Subject: usb: musb: sunxi: Explicitly release USB PHY on exit This fixes a kernel oops when unloading the driver due to usb_put_phy being called after usb_phy_generic_unregister when the device is detached. Calling usb_phy_generic_unregister causes x->dev->driver to be NULL in usb_put_phy and results in a NULL pointer dereference. Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Jonathan Liu Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/sunxi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c index c9a09b5bb6e5..dc353e24d53c 100644 --- a/drivers/usb/musb/sunxi.c +++ b/drivers/usb/musb/sunxi.c @@ -297,6 +297,8 @@ static int sunxi_musb_exit(struct musb *musb) if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags)) sunxi_sram_release(musb->controller->parent); + devm_usb_put_phy(glue->dev, glue->xceiv); + return 0; } -- cgit v1.2.3-70-g09d2 From 2811501e6d8f5747d08f8e25b9ecf472d0dc4c7d Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 3 Oct 2017 11:16:43 +0300 Subject: usb: quirks: add quirk for WORLDE MINI MIDI keyboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This keyboard doesn't implement Get String descriptors properly even though string indexes are valid. What happens is that when requesting for the String descriptor, the device disconnects and reconnects. Without this quirk, this loop will continue forever. Cc: Alan Stern Reported-by: Владимир Мартьянов Cc: stable Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 82806e311202..a6aaf2f193a4 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -221,6 +221,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT }, + /* MIDI keyboard WORLDE MINI */ + { USB_DEVICE(0x1c75, 0x0204), .driver_info = + USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Acer C120 LED Projector */ { USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM }, -- cgit v1.2.3-70-g09d2 From 3f50f614d61f91ad30b1947c429d1f235493a7f9 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Sat, 14 Oct 2017 00:10:12 +0900 Subject: perf record: Fix documentation for a inexistent option '-l' 'perf record' had a '-l' option that meant "scale counter values" a very long time ago, but it currently belongs to 'perf stat' as '-c'. So remove it. I found this problem in the below case. $ perf record -e cycles -l sleep 3 Error: unknown switch `l Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1507907412-19813-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e397453e5a46..63526f4416ea 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -8,8 +8,8 @@ perf-record - Run a command and record its profile into perf.data SYNOPSIS -------- [verse] -'perf record' [-e | --event=EVENT] [-l] [-a] -'perf record' [-e | --event=EVENT] [-l] [-a] -- [] +'perf record' [-e | --event=EVENT] [-a] +'perf record' [-e | --event=EVENT] [-a] -- [] DESCRIPTION ----------- -- cgit v1.2.3-70-g09d2 From 7f0cd23615040b9e53bb4980c986b721cba08bbc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 17 Oct 2017 22:29:00 +0900 Subject: perf buildid-list: Fix crash when processing PERF_RECORD_NAMESPACE Thomas reported that 'perf buildid-list' gets a SEGFAULT due to NULL pointer deref when he ran it on a data with namespace events. It was because the buildid_id__mark_dso_hit_ops lacks the namespace event handler and perf_too__fill_default() didn't set it. Program received signal SIGSEGV, Segmentation fault. 0x0000000000000000 in ?? () Missing separate debuginfos, use: dnf debuginfo-install audit-libs-2.7.7-1.fc25.s390x bzip2-libs-1.0.6-21.fc25.s390x elfutils-libelf-0.169-1.fc25.s390x +elfutils-libs-0.169-1.fc25.s390x libcap-ng-0.7.8-1.fc25.s390x numactl-libs-2.0.11-2.ibm.fc25.s390x openssl-libs-1.1.0e-1.1.ibm.fc25.s390x perl-libs-5.24.1-386.fc25.s390x +python-libs-2.7.13-2.fc25.s390x slang-2.3.0-7.fc25.s390x xz-libs-5.2.3-2.fc25.s390x zlib-1.2.8-10.fc25.s390x (gdb) where #0 0x0000000000000000 in ?? () #1 0x00000000010fad6a in machines__deliver_event (machines=, machines@entry=0x2c6fd18, evlist=, event=event@entry=0x3fffdf00470, sample=0x3ffffffe880, sample@entry=0x3ffffffe888, tool=tool@entry=0x1312968 , file_offset=1136) at util/session.c:1287 #2 0x00000000010fbf4e in perf_session__deliver_event (file_offset=1136, tool=0x1312968 , sample=0x3ffffffe888, event=0x3fffdf00470, session=0x2c6fc30) at util/session.c:1340 #3 perf_session__process_event (session=0x2c6fc30, session@entry=0x0, event=event@entry=0x3fffdf00470, file_offset=file_offset@entry=1136) at util/session.c:1522 #4 0x00000000010fddde in __perf_session__process_events (file_size=11880, data_size=, data_offset=, session=0x0) at util/session.c:1899 #5 perf_session__process_events (session=0x0, session@entry=0x2c6fc30) at util/session.c:1953 #6 0x000000000103b2ac in perf_session__list_build_ids (with_hits=, force=) at builtin-buildid-list.c:83 #7 cmd_buildid_list (argc=, argv=) at builtin-buildid-list.c:115 #8 0x00000000010a026c in run_builtin (p=0x1311f78 , argc=argc@entry=2, argv=argv@entry=0x3fffffff3c0) at perf.c:296 #9 0x000000000102bc00 in handle_internal_command (argv=, argc=2) at perf.c:348 #10 run_argv (argcp=, argv=) at perf.c:392 #11 main (argc=, argv=0x3fffffff3c0) at perf.c:536 (gdb) Fix it by adding a stub event handler for namespace event. Committer testing: Further clarifying, plain using 'perf buildid-list' will not end up in a SEGFAULT when processing a perf.data file with namespace info: # perf record -a --namespaces sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.024 MB perf.data (1058 samples) ] # perf buildid-list | wc -l 38 # perf buildid-list | head -5 e2a171c7b905826fc8494f0711ba76ab6abbd604 /lib/modules/4.14.0-rc3+/build/vmlinux 874840a02d8f8a31cedd605d0b8653145472ced3 /lib/modules/4.14.0-rc3+/kernel/arch/x86/kvm/kvm-intel.ko ea7223776730cd8a22f320040aae4d54312984bc /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/i915/i915.ko 5961535e6732a8edb7f22b3f148bb2fa2e0be4b9 /lib/modules/4.14.0-rc3+/kernel/drivers/gpu/drm/drm.ko f045f54aa78cf1931cc893f78b6cbc52c72a8cb1 /usr/lib64/libc-2.25.so # It is only when one asks for checking what of those entries actually had samples, i.e. when we use either -H or --with-hits, that we will process all the PERF_RECORD_ events, and since tools/perf/builtin-buildid-list.c neither explicitely set a perf_tool.namespaces() callback nor the default stub was set that we end up, when processing a PERF_RECORD_NAMESPACE record, causing a SEGFAULT: # perf buildid-list -H Segmentation fault (core dumped) ^C # Reported-and-Tested-by: Thomas-Mich Richter Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Hari Bathini Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas-Mich Richter Fixes: f3b3614a284d ("perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/r/20171017132900.11043-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9fe8e40..76ab0709a20c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -374,6 +374,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->mmap2 = process_event_stub; if (tool->comm == NULL) tool->comm = process_event_stub; + if (tool->namespaces == NULL) + tool->namespaces = process_event_stub; if (tool->fork == NULL) tool->fork = process_event_stub; if (tool->exit == NULL) -- cgit v1.2.3-70-g09d2 From 671632a019853744bc6c0b2ed8bcfdbcea371848 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 17 Oct 2017 08:22:25 -0500 Subject: MAINTAINERS: fix git tree url for musb module Commit 3b2435192fe91 ("MAINTAINERS: drop OMAP USB and MUSB maintainership") switched the maintainer for musb module, but didn't update the git tree location. Delete the git tree information, since the current maintainer doesn't have a public tree. Reported-by: Jonathan Liu Signed-off-by: Bin Liu Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a74227ad082e..1f33ae356003 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9213,7 +9213,6 @@ F: include/linux/isicom.h MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER M: Bin Liu L: linux-usb@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git S: Maintained F: drivers/usb/musb/ -- cgit v1.2.3-70-g09d2 From ffe51f0142a291a957eebb9687cafb15f2b3fc14 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 17 Oct 2017 08:43:09 -0600 Subject: fs: Avoid invalidation in interrupt context in dio_complete() Currently we try to defer completion of async DIO to the process context in case there are any mapped pages associated with the inode so that we can invalidate the pages when the IO completes. However the check is racy and the pages can be mapped afterwards. If this happens we might end up calling invalidate_inode_pages2_range() in dio_complete() in interrupt context which could sleep. This can be reproduced by generic/451. Fix this by passing the information whether we can or can't invalidate to the dio_complete(). Thanks Eryu Guan for reporting this and Jan Kara for suggesting a fix. Fixes: 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") Reported-by: Eryu Guan Reviewed-by: Jan Kara Tested-by: Eryu Guan Signed-off-by: Lukas Czerner Signed-off-by: Jens Axboe --- fs/direct-io.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812ed0e5..8106a8dddfab 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -44,6 +44,12 @@ */ #define DIO_PAGES 64 +/* + * Flags for dio_complete() + */ +#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */ +#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */ + /* * This code generally works in units of "dio_blocks". A dio_block is * somewhere between the hard sector size and the filesystem block size. it @@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; @@ -266,7 +272,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... */ - if (ret > 0 && dio->op == REQ_OP_WRITE && + if (flags & DIO_COMPLETE_INVALIDATE && + ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -285,7 +292,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); - if (is_async) { + if (flags & DIO_COMPLETE_ASYNC) { /* * generic_write_sync expects ki_pos to have been updated * already, but the submission path only does this for @@ -306,7 +313,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE); } static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); @@ -348,7 +355,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC); } } } @@ -1359,7 +1366,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, retval, false); + retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE); } else BUG_ON(retval != -EIOCBQUEUED); -- cgit v1.2.3-70-g09d2 From b31ce3041787b61f2dad39d2dcda5c4a81d10e2b Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 17 Oct 2017 11:38:04 +0800 Subject: arm64: dts: rockchip: correct vqmmc voltage for rk3399 platforms The vcc_sd or vcc_sdio used for IO voltage for sdmmc and sdio interface on rk3399 platform have a limitation that it can't be larger than 3.0v, otherwise it has a potential risk for the chip. Correct all of them. Fixes: 171582e00db1 ("arm64: dts: rockchip: add support for firefly-rk3399 board") Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Fixes: 8164a84cca12 ("arm64: dts: rockchip: Add support for rk3399 sapphire SOM") Cc: stable@vger.kernel.org Signed-off-by: Shawn Lin Tested-by: Klaus Goger Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-firefly.dts | 4 ++-- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 4 ++-- arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index 7fd4bfcaa38e..fef82274a39d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -371,10 +371,10 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3300000>; + regulator-max-microvolt = <3000000>; regulator-state-mem { regulator-on-in-suspend; - regulator-suspend-microvolt = <3300000>; + regulator-suspend-microvolt = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 53ff3d191a1d..910628d18add 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -325,12 +325,12 @@ vcc_sd: LDO_REG4 { regulator-name = "vcc_sd"; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3300000>; + regulator-max-microvolt = <3000000>; regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; - regulator-suspend-microvolt = <3300000>; + regulator-suspend-microvolt = <3000000>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 6c30bb02210d..0f873c897d0d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -315,10 +315,10 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3300000>; + regulator-max-microvolt = <3000000>; regulator-state-mem { regulator-on-in-suspend; - regulator-suspend-microvolt = <3300000>; + regulator-suspend-microvolt = <3000000>; }; }; -- cgit v1.2.3-70-g09d2 From b521102d935d0501f5d95242ade2988f3f069c77 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Oct 2017 10:56:01 +0200 Subject: arm64: dts: rockchip: fix typo in iommu nodes The latest dtc warns about an extraneous cell in the interrupt property of two of the iommu device nodes: Warning (interrupts_property): interrupts size is (16), expected multiple of 12 in /iommu@ff373f00 Warning (interrupts_property): interrupts size is (16), expected multiple of 12 in /iommu@ff900800 This removes the typo. Fixes: cede4c79de28 ("arm64: dts: rockchip: add rk3368 iommu nodes") Fixes: 49c82f2b7c5d ("arm64: dts: rockchip: add rk3328 iommu nodes") Signed-off-by: Arnd Bergmann Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3368.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 6d615cb6e64d..41d61840fb99 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -582,7 +582,7 @@ vop_mmu: iommu@ff373f00 { compatible = "rockchip,iommu"; reg = <0x0 0xff373f00 0x0 0x100>; - interrupts = ; + interrupts = ; interrupt-names = "vop_mmu"; #iommu-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 19fbaa5e7bdd..1070c8264c13 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -740,7 +740,7 @@ iep_mmu: iommu@ff900800 { compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x100>; - interrupts = ; + interrupts = ; interrupt-names = "iep_mmu"; #iommu-cells = <0>; status = "disabled"; -- cgit v1.2.3-70-g09d2 From 6575257c60e1a26a5319ccf2b5ce5b6449001017 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 17 Oct 2017 14:55:24 -0400 Subject: tracing/samples: Fix creation and deletion of simple_thread_fn creation Commit 7496946a8 ("tracing: Add samples of DECLARE_EVENT_CLASS() and DEFINE_EVENT()") added template examples for all the events. It created a DEFINE_EVENT_FN() example which reused the foo_bar_reg and foo_bar_unreg functions. Enabling both the TRACE_EVENT_FN() and DEFINE_EVENT_FN() example trace events caused the foo_bar_reg to be called twice, creating the test thread twice. The foo_bar_unreg would remove it only once, even if it was called multiple times, leaving a thread existing when the module is unloaded, causing an oops. Add a ref count and allow foo_bar_reg() and foo_bar_unreg() be called by multiple trace events. Cc: stable@vger.kernel.org Fixes: 7496946a8 ("tracing: Add samples of DECLARE_EVENT_CLASS() and DEFINE_EVENT()") Signed-off-by: Steven Rostedt (VMware) --- samples/trace_events/trace-events-sample.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index bc7fcf010a5b..446beb7ac48d 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -78,29 +78,37 @@ static int simple_thread_fn(void *arg) } static DEFINE_MUTEX(thread_mutex); +static bool simple_thread_cnt; int foo_bar_reg(void) { + mutex_lock(&thread_mutex); + if (simple_thread_cnt++) + goto out; + pr_info("Starting thread for foo_bar_fn\n"); /* * We shouldn't be able to start a trace when the module is * unloading (there's other locks to prevent that). But * for consistency sake, we still take the thread_mutex. */ - mutex_lock(&thread_mutex); simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); + out: mutex_unlock(&thread_mutex); return 0; } void foo_bar_unreg(void) { - pr_info("Killing thread for foo_bar_fn\n"); - /* protect against module unloading */ mutex_lock(&thread_mutex); + if (--simple_thread_cnt) + goto out; + + pr_info("Killing thread for foo_bar_fn\n"); if (simple_tsk_fn) kthread_stop(simple_tsk_fn); simple_tsk_fn = NULL; + out: mutex_unlock(&thread_mutex); } -- cgit v1.2.3-70-g09d2 From 883b3b6567bfc8b5da7b3f0cec80513af111d2f5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 16 Oct 2017 14:06:14 -0700 Subject: i2c: omap: Fix error handling for clk_get() Otherwise we can get the following if the fck alias is missing: Unable to handle kernel paging request at virtual address fffffffe ... PC is at clk_get_rate+0x8/0x10 LR is at omap_i2c_probe+0x278/0x6ec ... [] (clk_get_rate) from [] (omap_i2c_probe+0x278/0x6ec) [] (omap_i2c_probe) from [] (platform_drv_probe+0x50/0xb0) [] (platform_drv_probe) from [] (driver_probe_device+0x264/0x2ec) [] (driver_probe_device) from [] (bus_for_each_drv+0x70/0xb8) [] (bus_for_each_drv) from [] (__device_attach+0xcc/0x13c) [] (__device_attach) from [] (bus_probe_device+0x88/0x90) [] (bus_probe_device) from [] (deferred_probe_work_func+0x4c/0x14c) Signed-off-by: Tony Lindgren Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-omap.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 1ebb5e947e0b..23c2ea2baedc 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -360,6 +360,7 @@ static int omap_i2c_init(struct omap_i2c_dev *omap) unsigned long fclk_rate = 12000000; unsigned long internal_clk = 0; struct clk *fclk; + int error; if (omap->rev >= OMAP_I2C_REV_ON_3430_3530) { /* @@ -378,6 +379,13 @@ static int omap_i2c_init(struct omap_i2c_dev *omap) * do this bit unconditionally. */ fclk = clk_get(omap->dev, "fck"); + if (IS_ERR(fclk)) { + error = PTR_ERR(fclk); + dev_err(omap->dev, "could not get fck: %i\n", error); + + return error; + } + fclk_rate = clk_get_rate(fclk); clk_put(fclk); @@ -410,6 +418,12 @@ static int omap_i2c_init(struct omap_i2c_dev *omap) else internal_clk = 4000; fclk = clk_get(omap->dev, "fck"); + if (IS_ERR(fclk)) { + error = PTR_ERR(fclk); + dev_err(omap->dev, "could not get fck: %i\n", error); + + return error; + } fclk_rate = clk_get_rate(fclk) / 1000; clk_put(fclk); -- cgit v1.2.3-70-g09d2 From c97cc7dbce2fe6f46e137f4b040f915a0181ee85 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 17 Oct 2017 16:00:02 -0400 Subject: Revert "tools/power turbostat: stop migrating, unless '-m'" This reverts commit c91fc8519d87715a3a173475ea3778794c139996. That change caused a C6 and PC6 residency regression on large idle systems. Users also complained about new output indicating jitter: turbostat: cpu6 jitter 3794 9142 Signed-off-by: Len Brown Cc: 4.13+ # v4.13+ Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dafba2c1e7d..bd9c6b31a504 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -92,7 +92,6 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; -int do_migrate; double discover_bclk(unsigned int family, unsigned int model); unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ @@ -303,9 +302,6 @@ int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg int cpu_migrate(int cpu) { - if (!do_migrate) - return 0; - CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) @@ -5007,7 +5003,6 @@ void cmdline(int argc, char **argv) {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help {"Joules", no_argument, 0, 'J'}, {"list", no_argument, 0, 'l'}, - {"migrate", no_argument, 0, 'm'}, {"out", required_argument, 0, 'o'}, {"quiet", no_argument, 0, 'q'}, {"show", required_argument, 0, 's'}, @@ -5019,7 +5014,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:Jmo:qST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': @@ -5062,9 +5057,6 @@ void cmdline(int argc, char **argv) list_header_only++; quiet++; break; - case 'm': - do_migrate = 1; - break; case 'o': outf = fopen_or_die(optarg, "w"); break; -- cgit v1.2.3-70-g09d2 From 587c3c9f286cee5c9cac38d28c8ae1875f4ec85b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 15 Oct 2017 18:16:33 +0100 Subject: scsi: sg: Re-fix off by one in sg_fill_request_table() Commit 109bade9c625 ("scsi: sg: use standard lists for sg_requests") introduced an off-by-one error in sg_ioctl(), which was fixed by commit bd46fc406b30 ("scsi: sg: off by one in sg_ioctl()"). Unfortunately commit 4759df905a47 ("scsi: sg: factor out sg_fill_request_table()") moved that code, and reintroduced the bug (perhaps due to a botched rebase). Fix it again. Fixes: 4759df905a47 ("scsi: sg: factor out sg_fill_request_table()") Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0419c2298eab..aa28874e8fb9 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -837,7 +837,7 @@ sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { - if (val > SG_MAX_QUEUE) + if (val >= SG_MAX_QUEUE) break; rinfo[val].req_state = srp->done + 1; rinfo[val].problem = -- cgit v1.2.3-70-g09d2 From 8009d506a1dd00cf436b0c4cca0dcec130580a21 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 18 Oct 2017 00:45:49 +0100 Subject: ALSA: seq: Enable 'use' locking in all configurations The 'use' locking macros are no-ops if neither SMP or SND_DEBUG is enabled. This might once have been OK in non-preemptible configurations, but even in that case snd_seq_read() may sleep while relying on a 'use' lock. So always use the proper implementations. Cc: stable@vger.kernel.org Signed-off-by: Ben Hutchings Signed-off-by: Takashi Iwai --- sound/core/seq/seq_lock.c | 4 ---- sound/core/seq/seq_lock.h | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c index 0ff7926a5a69..cda64b489e42 100644 --- a/sound/core/seq/seq_lock.c +++ b/sound/core/seq/seq_lock.c @@ -23,8 +23,6 @@ #include #include "seq_lock.h" -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - /* wait until all locks are released */ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) { @@ -41,5 +39,3 @@ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line) } } EXPORT_SYMBOL(snd_use_lock_sync_helper); - -#endif diff --git a/sound/core/seq/seq_lock.h b/sound/core/seq/seq_lock.h index 54044bc2c9ef..ac38031c370e 100644 --- a/sound/core/seq/seq_lock.h +++ b/sound/core/seq/seq_lock.h @@ -3,8 +3,6 @@ #include -#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG) - typedef atomic_t snd_use_lock_t; /* initialize lock */ @@ -20,14 +18,4 @@ typedef atomic_t snd_use_lock_t; void snd_use_lock_sync_helper(snd_use_lock_t *lock, const char *file, int line); #define snd_use_lock_sync(lockp) snd_use_lock_sync_helper(lockp, __BASE_FILE__, __LINE__) -#else /* SMP || CONFIG_SND_DEBUG */ - -typedef spinlock_t snd_use_lock_t; /* dummy */ -#define snd_use_lock_init(lockp) /**/ -#define snd_use_lock_use(lockp) /**/ -#define snd_use_lock_free(lockp) /**/ -#define snd_use_lock_sync(lockp) /**/ - -#endif /* SMP || CONFIG_SND_DEBUG */ - #endif /* __SND_SEQ_LOCK_H */ -- cgit v1.2.3-70-g09d2 From 2bdd713b92a9cade239d3c7d15205a09f556624d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 17 Oct 2017 20:32:07 +0200 Subject: mac80211: use constant time comparison with keys Otherwise we risk leaking information via timing side channel. Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Jason A. Donenfeld Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ae995c8480db..035d16fe926e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -635,7 +636,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * new version of the key to avoid nonce reuse or replay issues. */ if (old_key && key->conf.keylen == old_key->conf.keylen && - !memcmp(key->conf.key, old_key->conf.key, key->conf.keylen)) { + !crypto_memneq(key->conf.key, old_key->conf.key, key->conf.keylen)) { ieee80211_key_free_unused(key); ret = 0; goto out; -- cgit v1.2.3-70-g09d2 From 51e13359cd5ea34acc62c90627603352956380af Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Oct 2017 21:56:20 +0200 Subject: cfg80211: fix connect/disconnect edge cases If we try to connect while already connected/connecting, but this fails, we set ssid_len=0 but leave current_bss hanging, leading to errors. Check all of this better, first of all ensuring that we can't try to connect to a different SSID while connected/ing; ensure that prev_bssid is set for re-association attempts even in the case of the driver supporting the connect() method, and don't reset ssid_len in the failure cases. While at it, also reset ssid_len while disconnecting unless we were connected and expect a disconnected event, and warn on a successful connection without ssid_len being set. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/wireless/sme.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0a49b88070d0..b6533ecbf5b1 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -522,11 +522,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, return -EOPNOTSUPP; if (wdev->current_bss) { - if (!prev_bssid) - return -EALREADY; - if (prev_bssid && - !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) - return -ENOTCONN; cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; @@ -1063,11 +1058,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->connect_keys)) { - kzfree(wdev->connect_keys); - wdev->connect_keys = NULL; + /* + * If we have an ssid_len, we're trying to connect or are + * already connected, so reject a new SSID unless it's the + * same (which is the case for re-association.) + */ + if (wdev->ssid_len && + (wdev->ssid_len != connect->ssid_len || + memcmp(wdev->ssid, connect->ssid, wdev->ssid_len))) + return -EALREADY; + + /* + * If connected, reject (re-)association unless prev_bssid + * matches the current BSSID. + */ + if (wdev->current_bss) { + if (!prev_bssid) + return -EALREADY; + if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + return -ENOTCONN; } + /* + * Reject if we're in the process of connecting with WEP, + * this case isn't very interesting and trying to handle + * it would make the code much more complex. + */ + if (wdev->connect_keys) + return -EINPROGRESS; + cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); @@ -1118,7 +1137,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, if (err) { wdev->connect_keys = NULL; - wdev->ssid_len = 0; + /* + * This could be reassoc getting refused, don't clear + * ssid_len in that case. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; return err; } @@ -1145,6 +1169,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); + /* + * Clear ssid_len unless we actually were fully connected, + * in which case cfg80211_disconnected() will take care of + * this later. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; + return err; } -- cgit v1.2.3-70-g09d2 From e5f5ce37a7918ed7406c52987c7cc8b670ed5e14 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Oct 2017 09:36:51 +0200 Subject: mac80211: validate user rate mask before configuring driver Ben reported that when the user rate mask is rejected for not matching any basic rate, the driver had already been configured. This is clearly an oversight in my original change, fix this by doing the validation before calling the driver. Reported-by: Ben Greear Fixes: e8e4f5280ddd ("mac80211: reject/clear user rate mask if not usable") Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a354f1939e49..fb15d3b97cb2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { - ret = drv_set_bitrate_mask(local, sdata, mask); - if (ret) - return ret; - } - /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able @@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return -EINVAL; } + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { + ret = drv_set_bitrate_mask(local, sdata, mask); + if (ret) + return ret; + } + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; -- cgit v1.2.3-70-g09d2 From 0bfe649fbb1337400065fa47679b381b2ac845f0 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 16 Oct 2017 17:05:57 +0200 Subject: fq_impl: Properly enforce memory limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fq structure would fail to properly enforce the memory limit in the case where the packet being enqueued was bigger than the packet being removed to bring the memory usage down. So keep dropping packets until the memory usage is back below the limit. Also, fix the statistics for memory limit violations. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 4e6131cd3f43..ac1a2317941e 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -146,6 +146,7 @@ static void fq_tin_enqueue(struct fq *fq, fq_flow_get_default_t get_default_func) { struct fq_flow *flow; + bool oom; lockdep_assert_held(&fq->lock); @@ -167,8 +168,8 @@ static void fq_tin_enqueue(struct fq *fq, } __skb_queue_tail(&flow->queue, skb); - - if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { + oom = (fq->memory_usage > fq->memory_limit); + while (fq->backlog > fq->limit || oom) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -183,8 +184,10 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; - if (fq->memory_usage > fq->memory_limit) + if (oom) { fq->overmemory++; + oom = (fq->memory_usage > fq->memory_limit); + } } } -- cgit v1.2.3-70-g09d2 From 3cd18d1981731d5f74b8e437009124ac99905d14 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Oct 2017 12:27:00 +0200 Subject: security/keys: BIG_KEY requires CONFIG_CRYPTO The recent rework introduced a possible randconfig build failure when CONFIG_CRYPTO configured to only allow modules: security/keys/big_key.o: In function `big_key_crypt': big_key.c:(.text+0x29f): undefined reference to `crypto_aead_setkey' security/keys/big_key.o: In function `big_key_init': big_key.c:(.init.text+0x1a): undefined reference to `crypto_alloc_aead' big_key.c:(.init.text+0x45): undefined reference to `crypto_aead_setauthsize' big_key.c:(.init.text+0x77): undefined reference to `crypto_destroy_tfm' crypto/gcm.o: In function `gcm_hash_crypt_remain_continue': gcm.c:(.text+0x167): undefined reference to `crypto_ahash_finup' crypto/gcm.o: In function `crypto_gcm_exit_tfm': gcm.c:(.text+0x847): undefined reference to `crypto_destroy_tfm' When we 'select CRYPTO' like the other users, we always get a configuration that builds. Fixes: 428490e38b2e ("security/keys: rewrite all of big_key crypto") Signed-off-by: Arnd Bergmann Signed-off-by: David Howells --- security/keys/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 91eafada3164..6462e6654ccf 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -45,6 +45,7 @@ config BIG_KEYS bool "Large payload keys" depends on KEYS depends on TMPFS + select CRYPTO select CRYPTO_AES select CRYPTO_GCM help -- cgit v1.2.3-70-g09d2 From 6a6d2a77addce6bc26ccb53df34478b1593f0fbf Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 4 Oct 2017 19:18:22 +0800 Subject: KEYS: Fix the wrong index when checking the existence of second id Fix the wrong index number when checking the existence of second id in function of finding asymmetric key. The id_1 is the second id that the index in array must be 1 but not 0. Fixes: 9eb029893ad5 (KEYS: Generalise x509_request_asymmetric_key()) Cc: David Howells Cc: Herbert Xu Cc: "David S. Miller" Signed-off-by: Chun-Yi Lee Signed-off-by: David Howells --- crypto/asymmetric_keys/asymmetric_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index e4b0ed386bc8..a597f5c5a222 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -105,7 +105,7 @@ struct key *find_asymmetric_key(struct key *keyring, if (id_0 && id_1) { const struct asymmetric_key_ids *kids = asymmetric_key_ids(key); - if (!kids->id[0]) { + if (!kids->id[1]) { pr_debug("First ID matches, but second is missing\n"); goto reject; } -- cgit v1.2.3-70-g09d2 From b3811d36a3e7e7e8ed660bf01151496cf99cf9ed Mon Sep 17 00:00:00 2001 From: Chun-Yi Lee Date: Wed, 4 Oct 2017 16:45:09 +0800 Subject: KEYS: checking the input id parameters before finding asymmetric key For finding asymmetric key, the input id_0 and id_1 parameters can not be NULL at the same time. This patch adds the BUG_ON checking for id_0 and id_1. Cc: David Howells Cc: Herbert Xu Cc: "David S. Miller" Signed-off-by: Chun-Yi Lee Signed-off-by: David Howells --- crypto/asymmetric_keys/asymmetric_type.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c index a597f5c5a222..39aecad286fe 100644 --- a/crypto/asymmetric_keys/asymmetric_type.c +++ b/crypto/asymmetric_keys/asymmetric_type.c @@ -57,6 +57,8 @@ struct key *find_asymmetric_key(struct key *keyring, char *req, *p; int len; + BUG_ON(!id_0 && !id_1); + if (id_0) { lookup = id_0->data; len = id_0->len; -- cgit v1.2.3-70-g09d2 From 363b02dab09b3226f3bd1420dad9c72b79a42a76 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Oct 2017 16:43:25 +0100 Subject: KEYS: Fix race between updating and finding a negative key Consolidate KEY_FLAG_INSTANTIATED, KEY_FLAG_NEGATIVE and the rejection error into one field such that: (1) The instantiation state can be modified/read atomically. (2) The error can be accessed atomically with the state. (3) The error isn't stored unioned with the payload pointers. This deals with the problem that the state is spread over three different objects (two bits and a separate variable) and reading or updating them atomically isn't practical, given that not only can uninstantiated keys change into instantiated or rejected keys, but rejected keys can also turn into instantiated keys - and someone accessing the key might not be using any locking. The main side effect of this problem is that what was held in the payload may change, depending on the state. For instance, you might observe the key to be in the rejected state. You then read the cached error, but if the key semaphore wasn't locked, the key might've become instantiated between the two reads - and you might now have something in hand that isn't actually an error code. The state is now KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE or a negative error code if the key is negatively instantiated. The key_is_instantiated() function is replaced with key_is_positive() to avoid confusion as negative keys are also 'instantiated'. Additionally, barriering is included: (1) Order payload-set before state-set during instantiation. (2) Order state-read before payload-read when using the key. Further separate barriering is necessary if RCU is being used to access the payload content after reading the payload pointers. Fixes: 146aa8b1453b ("KEYS: Merge the type-specific data with the payload data") Cc: stable@vger.kernel.org # v4.4+ Reported-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: Eric Biggers --- include/linux/key.h | 47 ++++++++++++++++++++------------ net/dns_resolver/dns_key.c | 2 +- security/keys/big_key.c | 4 +-- security/keys/encrypted-keys/encrypted.c | 2 +- security/keys/gc.c | 8 +++--- security/keys/key.c | 31 +++++++++++++-------- security/keys/keyctl.c | 9 +++--- security/keys/keyring.c | 10 +++---- security/keys/proc.c | 7 +++-- security/keys/process_keys.c | 2 +- security/keys/request_key.c | 7 ++--- security/keys/request_key_auth.c | 2 +- security/keys/trusted.c | 2 +- security/keys/user_defined.c | 4 +-- 14 files changed, 80 insertions(+), 57 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index e315e16b6ff8..8a15cabe928d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -138,6 +138,11 @@ struct key_restriction { struct key_type *keytype; }; +enum key_state { + KEY_IS_UNINSTANTIATED, + KEY_IS_POSITIVE, /* Positively instantiated */ +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -169,6 +174,7 @@ struct key { * - may not match RCU dereferenced payload * - payload should contain own length */ + short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; @@ -176,18 +182,16 @@ struct key { #endif unsigned long flags; /* status flags (change with bitops) */ -#define KEY_FLAG_INSTANTIATED 0 /* set if key has been instantiated */ -#define KEY_FLAG_DEAD 1 /* set if key type has been deleted */ -#define KEY_FLAG_REVOKED 2 /* set if key had been revoked */ -#define KEY_FLAG_IN_QUOTA 3 /* set if key consumes quota */ -#define KEY_FLAG_USER_CONSTRUCT 4 /* set if key is being constructed in userspace */ -#define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ -#define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ -#define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ -#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ +#define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ +#define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ +#define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ +#define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ +#define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ +#define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ +#define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 8 /* set if key should not be removed */ +#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -213,7 +217,6 @@ struct key { struct list_head name_link; struct assoc_array keys; }; - int reject_error; }; /* This is set on a keyring to restrict the addition of a link to a key @@ -353,17 +356,27 @@ extern void key_set_timeout(struct key *, unsigned); #define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ #define KEY_NEED_ALL 0x3f /* All the above permissions */ +static inline short key_read_state(const struct key *key) +{ + /* Barrier versus mark_key_instantiated(). */ + return smp_load_acquire(&key->state); +} + /** - * key_is_instantiated - Determine if a key has been positively instantiated + * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ -static inline bool key_is_instantiated(const struct key *key) +static inline bool key_is_positive(const struct key *key) +{ + return key_read_state(key) == KEY_IS_POSITIVE; +} + +static inline bool key_is_negative(const struct key *key) { - return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags); + return key_read_state(key) < 0; } #define dereference_key_rcu(KEY) \ diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8737412c7b27..e1d4d898a007 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data) static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) { + if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index e607830b6154..929e14978c42 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -247,7 +247,7 @@ void big_key_revoke(struct key *key) /* clear the quota */ key_payload_reserve(key, 0); - if (key_is_instantiated(key) && + if (key_is_positive(key) && (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD) vfs_truncate(path, 0); } @@ -279,7 +279,7 @@ void big_key_describe(const struct key *key, struct seq_file *m) seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %zu [%s]", datalen, datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 535db141f4da..d92cbf9687c3 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -854,7 +854,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) size_t datalen = prep->datalen; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; diff --git a/security/keys/gc.c b/security/keys/gc.c index 87cb260e4890..f01d48cb3de1 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -129,15 +129,15 @@ static noinline void key_gc_unused_keys(struct list_head *keys) while (!list_empty(keys)) { struct key *key = list_entry(keys->next, struct key, graveyard_link); + short state = key->state; + list_del(&key->graveyard_link); kdebug("- %u", key->serial); key_check(key); /* Throw away the key data if the key is instantiated */ - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags) && - key->type->destroy) + if (state == KEY_IS_POSITIVE && key->type->destroy) key->type->destroy(key); security_key_free(key); @@ -151,7 +151,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys) } atomic_dec(&key->user->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + if (state != KEY_IS_UNINSTANTIATED) atomic_dec(&key->user->nikeys); key_user_put(key->user); diff --git a/security/keys/key.c b/security/keys/key.c index eb914a838840..9385e7cc710f 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -401,6 +401,18 @@ int key_payload_reserve(struct key *key, size_t datalen) } EXPORT_SYMBOL(key_payload_reserve); +/* + * Change the key state to being instantiated. + */ +static void mark_key_instantiated(struct key *key, int reject_error) +{ + /* Commit the payload before setting the state; barrier versus + * key_read_state(). + */ + smp_store_release(&key->state, + (reject_error < 0) ? reject_error : KEY_IS_POSITIVE); +} + /* * Instantiate a key and link it into the target keyring atomically. Must be * called with the target keyring's semaphore writelocked. The target key's @@ -424,14 +436,14 @@ static int __key_instantiate_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* instantiate the key */ ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ atomic_inc(&key->user->nikeys); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, 0); if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; @@ -577,13 +589,10 @@ int key_reject_and_link(struct key *key, mutex_lock(&key_construction_mutex); /* can't instantiate twice */ - if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state == KEY_IS_UNINSTANTIATED) { /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); - key->reject_error = -error; - smp_wmb(); - set_bit(KEY_FLAG_NEGATIVE, &key->flags); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); + mark_key_instantiated(key, -error); now = current_kernel_time(); key->expiry = now.tv_sec + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -752,8 +761,8 @@ static inline key_ref_t __key_update(key_ref_t key_ref, ret = key->type->update(key, prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); @@ -986,8 +995,8 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) ret = key->type->update(key, &prep); if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + /* Updating a negative key positively instantiates it */ + mark_key_instantiated(key, 0); up_write(&key->sem); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 365ff85d7e27..76d22f726ae4 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -766,10 +766,9 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) key = key_ref_to_ptr(key_ref); - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { - ret = -ENOKEY; - goto error2; - } + ret = key_read_state(key); + if (ret < 0) + goto error2; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); @@ -901,7 +900,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); - if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { + if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4fa82a8a9c0e..06173b091a74 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -414,7 +414,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) else seq_puts(m, "[anon]"); - if (key_is_instantiated(keyring)) { + if (key_is_positive(keyring)) { if (keyring->keys.nr_leaves_on_tree != 0) seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else @@ -553,7 +553,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) { struct keyring_search_context *ctx = iterator_data; const struct key *key = keyring_ptr_to_key(object); - unsigned long kflags = key->flags; + unsigned long kflags = READ_ONCE(key->flags); + short state = READ_ONCE(key->state); kenter("{%d}", key->serial); @@ -597,9 +598,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - smp_rmb(); - ctx->result = ERR_PTR(key->reject_error); + if (state < 0) { + ctx->result = ERR_PTR(state); kleave(" = %d [neg]", ctx->skipped_ret); goto skipped; } diff --git a/security/keys/proc.c b/security/keys/proc.c index de834309d100..4089ce1f7757 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -182,6 +182,7 @@ static int proc_keys_show(struct seq_file *m, void *v) unsigned long timo; key_ref_t key_ref, skey_ref; char xbuf[16]; + short state; int rc; struct keyring_search_context ctx = { @@ -236,17 +237,19 @@ static int proc_keys_show(struct seq_file *m, void *v) sprintf(xbuf, "%luw", timo / (60*60*24*7)); } + state = key_read_state(key); + #define showflag(KEY, LETTER, FLAG) \ (test_bit(FLAG, &(KEY)->flags) ? LETTER : '-') seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, - showflag(key, 'I', KEY_FLAG_INSTANTIATED), + state != KEY_IS_UNINSTANTIATED ? 'I' : '-', showflag(key, 'R', KEY_FLAG_REVOKED), showflag(key, 'D', KEY_FLAG_DEAD), showflag(key, 'Q', KEY_FLAG_IN_QUOTA), showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT), - showflag(key, 'N', KEY_FLAG_NEGATIVE), + state < 0 ? 'N' : '-', showflag(key, 'i', KEY_FLAG_INVALIDATED), refcount_read(&key->usage), xbuf, diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 293d3598153b..740affd65ee9 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -730,7 +730,7 @@ try_again: ret = -EIO; if (!(lflags & KEY_LOOKUP_PARTIAL) && - !test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) + key_read_state(key) == KEY_IS_UNINSTANTIATED) goto invalid_key; /* check the permissions */ diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 63e63a42db3c..e8036cd0ad54 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -595,10 +595,9 @@ int wait_for_key_construction(struct key *key, bool intr) intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (ret) return -ERESTARTSYS; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { - smp_rmb(); - return key->reject_error; - } + ret = key_read_state(key); + if (ret < 0) + return ret; return key_validate(key); } EXPORT_SYMBOL(wait_for_key_construction); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6ebf1af8fce9..424e1d90412e 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -73,7 +73,7 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index ddfaebf60fc8..bd85315cbfeb 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -1066,7 +1066,7 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep) char *datablob; int ret = 0; - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_negative(key)) return -ENOKEY; p = key->payload.data[0]; if (!p->migratable) diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index 3d8c68eba516..9f558bedba23 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -114,7 +114,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep) /* attach the new data, displacing the old */ key->expiry = prep->expiry; - if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags)) + if (key_is_positive(key)) zap = dereference_key_locked(key); rcu_assign_keypointer(key, prep->payload.data[0]); prep->payload.data[0] = NULL; @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(user_destroy); void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) + if (key_is_positive(key)) seq_printf(m, ": %u", key->datalen); } -- cgit v1.2.3-70-g09d2 From 60ff5b2f547af3828aebafd54daded44cfb0807a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Oct 2017 16:00:41 +0100 Subject: KEYS: don't let add_key() update an uninstantiated key Currently, when passed a key that already exists, add_key() will call the key's ->update() method if such exists. But this is heavily broken in the case where the key is uninstantiated because it doesn't call __key_instantiate_and_link(). Consequently, it doesn't do most of the things that are supposed to happen when the key is instantiated, such as setting the instantiation state, clearing KEY_FLAG_USER_CONSTRUCT and awakening tasks waiting on it, and incrementing key->user->nikeys. It also never takes key_construction_mutex, which means that ->instantiate() can run concurrently with ->update() on the same key. In the case of the "user" and "logon" key types this causes a memory leak, at best. Maybe even worse, the ->update() methods of the "encrypted" and "trusted" key types actually just dereference a NULL pointer when passed an uninstantiated key. Change key_create_or_update() to wait interruptibly for the key to finish construction before continuing. This patch only affects *uninstantiated* keys. For now we still allow a negatively instantiated key to be updated (thereby positively instantiating it), although that's broken too (the next patch fixes it) and I'm not sure that anyone actually uses that functionality either. Here is a simple reproducer for the bug using the "encrypted" key type (requires CONFIG_ENCRYPTED_KEYS=y), though as noted above the bug pertained to more than just the "encrypted" key type: #include #include #include int main(void) { int ringid = keyctl_join_session_keyring(NULL); if (fork()) { for (;;) { const char payload[] = "update user:foo 32"; usleep(rand() % 10000); add_key("encrypted", "desc", payload, sizeof(payload), ringid); keyctl_clear(ringid); } } else { for (;;) request_key("encrypted", "desc", "callout_info", ringid); } } It causes: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: encrypted_update+0xb0/0x170 PGD 7a178067 P4D 7a178067 PUD 77269067 PMD 0 PREEMPT SMP CPU: 0 PID: 340 Comm: reproduce Tainted: G D 4.14.0-rc1-00025-g428490e38b2e #796 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8a467a39a340 task.stack: ffffb15c40770000 RIP: 0010:encrypted_update+0xb0/0x170 RSP: 0018:ffffb15c40773de8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8a467a275b00 RCX: 0000000000000000 RDX: 0000000000000005 RSI: ffff8a467a275b14 RDI: ffffffffb742f303 RBP: ffffb15c40773e20 R08: 0000000000000000 R09: ffff8a467a275b17 R10: 0000000000000020 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8a4677057180 R15: ffff8a467a275b0f FS: 00007f5d7fb08700(0000) GS:ffff8a467f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 0000000077262005 CR4: 00000000001606f0 Call Trace: key_create_or_update+0x2bc/0x460 SyS_add_key+0x10c/0x1d0 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x7f5d7f211259 RSP: 002b:00007ffed03904c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000f8 RAX: ffffffffffffffda RBX: 000000003b2a7955 RCX: 00007f5d7f211259 RDX: 00000000004009e4 RSI: 00000000004009ff RDI: 0000000000400a04 RBP: 0000000068db8bad R08: 000000003b2a7955 R09: 0000000000000004 R10: 000000000000001a R11: 0000000000000246 R12: 0000000000400868 R13: 00007ffed03905d0 R14: 0000000000000000 R15: 0000000000000000 Code: 77 28 e8 64 34 1f 00 45 31 c0 31 c9 48 8d 55 c8 48 89 df 48 8d 75 d0 e8 ff f9 ff ff 85 c0 41 89 c4 0f 88 84 00 00 00 4c 8b 7d c8 <49> 8b 75 18 4c 89 ff e8 24 f8 ff ff 85 c0 41 89 c4 78 6d 49 8b RIP: encrypted_update+0xb0/0x170 RSP: ffffb15c40773de8 CR2: 0000000000000018 Cc: # v2.6.12+ Reported-by: Eric Biggers Signed-off-by: David Howells cc: Eric Biggers --- security/keys/key.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/security/keys/key.c b/security/keys/key.c index 9385e7cc710f..83bf4b4afd49 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -945,6 +945,16 @@ error: */ __key_link_end(keyring, &index_key, edit); + key = key_ref_to_ptr(key_ref); + if (test_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) { + ret = wait_for_key_construction(key, true); + if (ret < 0) { + key_ref_put(key_ref); + key_ref = ERR_PTR(ret); + goto error_free_prep; + } + } + key_ref = __key_update(key_ref, &prep); goto error_free_prep; } -- cgit v1.2.3-70-g09d2 From 1823d475a5eeaa0f52789b1b7e2d31a592ae92ea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Sep 2017 12:50:44 -0700 Subject: KEYS: load key flags and expiry time atomically in key_validate() In key_validate(), load the flags and expiry time once atomically, since these can change concurrently if key_validate() is called without the key semaphore held. And we don't want to get inconsistent results if a variable is referenced multiple times. For example, key->expiry was referenced in both 'if (key->expiry)' and in 'if (now.tv_sec >= key->expiry)', making it theoretically possible to see a spurious EKEYEXPIRED while the expiration time was being removed, i.e. set to 0. Signed-off-by: Eric Biggers Signed-off-by: David Howells --- security/keys/permission.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/security/keys/permission.c b/security/keys/permission.c index 732cc0beffdf..a72b4dd70c8a 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -88,7 +88,8 @@ EXPORT_SYMBOL(key_task_permission); */ int key_validate(const struct key *key) { - unsigned long flags = key->flags; + unsigned long flags = READ_ONCE(key->flags); + time_t expiry = READ_ONCE(key->expiry); if (flags & (1 << KEY_FLAG_INVALIDATED)) return -ENOKEY; @@ -99,9 +100,9 @@ int key_validate(const struct key *key) return -EKEYREVOKED; /* check it hasn't expired */ - if (key->expiry) { + if (expiry) { struct timespec now = current_kernel_time(); - if (now.tv_sec >= key->expiry) + if (now.tv_sec >= expiry) return -EKEYEXPIRED; } -- cgit v1.2.3-70-g09d2 From 9d6c8711b6a751a694bcfaf49fb557b82092ee46 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Sep 2017 12:50:45 -0700 Subject: KEYS: Load key expiry time atomically in keyring_search_iterator() Similar to the case for key_validate(), we should load the key ->expiry once atomically in keyring_search_iterator(), since it can be changed concurrently with the flags whenever the key semaphore isn't held. Signed-off-by: Eric Biggers Signed-off-by: David Howells --- security/keys/keyring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 06173b091a74..a7e51f793867 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -566,6 +566,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data) /* skip invalidated, revoked and expired keys */ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + time_t expiry = READ_ONCE(key->expiry); + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) { ctx->result = ERR_PTR(-EKEYREVOKED); @@ -573,7 +575,7 @@ static int keyring_search_iterator(const void *object, void *iterator_data) goto skipped; } - if (key->expiry && ctx->now.tv_sec >= key->expiry) { + if (expiry && ctx->now.tv_sec >= expiry) { if (!(ctx->flags & KEYRING_SEARCH_SKIP_EXPIRED)) ctx->result = ERR_PTR(-EKEYEXPIRED); kleave(" = %d [expire]", ctx->skipped_ret); -- cgit v1.2.3-70-g09d2 From ab5c69f01313c80df948e4f768efe616258f85f4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Sep 2017 12:50:46 -0700 Subject: KEYS: load key flags and expiry time atomically in proc_keys_show() In proc_keys_show(), the key semaphore is not held, so the key ->flags and ->expiry can be changed concurrently. We therefore should read them atomically just once. Signed-off-by: Eric Biggers Signed-off-by: David Howells --- security/keys/proc.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/security/keys/proc.c b/security/keys/proc.c index 4089ce1f7757..6d1fcbba1e09 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -179,7 +179,9 @@ static int proc_keys_show(struct seq_file *m, void *v) struct rb_node *_p = v; struct key *key = rb_entry(_p, struct key, serial_node); struct timespec now; + time_t expiry; unsigned long timo; + unsigned long flags; key_ref_t key_ref, skey_ref; char xbuf[16]; short state; @@ -218,12 +220,13 @@ static int proc_keys_show(struct seq_file *m, void *v) rcu_read_lock(); /* come up with a suitable timeout value */ - if (key->expiry == 0) { + expiry = READ_ONCE(key->expiry); + if (expiry == 0) { memcpy(xbuf, "perm", 5); - } else if (now.tv_sec >= key->expiry) { + } else if (now.tv_sec >= expiry) { memcpy(xbuf, "expd", 5); } else { - timo = key->expiry - now.tv_sec; + timo = expiry - now.tv_sec; if (timo < 60) sprintf(xbuf, "%lus", timo); @@ -239,18 +242,19 @@ static int proc_keys_show(struct seq_file *m, void *v) state = key_read_state(key); -#define showflag(KEY, LETTER, FLAG) \ - (test_bit(FLAG, &(KEY)->flags) ? LETTER : '-') +#define showflag(FLAGS, LETTER, FLAG) \ + ((FLAGS & (1 << FLAG)) ? LETTER : '-') + flags = READ_ONCE(key->flags); seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ", key->serial, state != KEY_IS_UNINSTANTIATED ? 'I' : '-', - showflag(key, 'R', KEY_FLAG_REVOKED), - showflag(key, 'D', KEY_FLAG_DEAD), - showflag(key, 'Q', KEY_FLAG_IN_QUOTA), - showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT), + showflag(flags, 'R', KEY_FLAG_REVOKED), + showflag(flags, 'D', KEY_FLAG_DEAD), + showflag(flags, 'Q', KEY_FLAG_IN_QUOTA), + showflag(flags, 'U', KEY_FLAG_USER_CONSTRUCT), state < 0 ? 'N' : '-', - showflag(key, 'i', KEY_FLAG_INVALIDATED), + showflag(flags, 'i', KEY_FLAG_INVALIDATED), refcount_read(&key->usage), xbuf, key->perm, -- cgit v1.2.3-70-g09d2 From 68a1fdbbf8bd3378325e45c19e167a165f9ffc3a Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 8 Oct 2017 20:02:32 +0200 Subject: pkcs7: Prevent NULL pointer dereference, since sinfo is not always set. The ASN.1 parser does not necessarily set the sinfo field, this patch prevents a NULL pointer dereference on broken input. Fixes: 99db44350672 ("PKCS#7: Appropriately restrict authenticated attributes and content type") Signed-off-by: Eric Sesterhenn Signed-off-by: David Howells cc: stable@vger.kernel.org # 4.3+ --- crypto/asymmetric_keys/pkcs7_parser.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index af4cd8649117..d140d8bb2c96 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -88,6 +88,9 @@ static int pkcs7_check_authattrs(struct pkcs7_message *msg) bool want = false; sinfo = msg->signed_infos; + if (!sinfo) + goto inconsistent; + if (sinfo->authattrs) { want = true; msg->have_authattrs = true; -- cgit v1.2.3-70-g09d2 From d824c7a8e88a7162d14782e73a6a6c867a266500 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Fri, 13 Oct 2017 19:37:31 +0300 Subject: regulator: rn5t618: Do not index regulator_desc arrays by id The regulator_desc arrays in this driver are indexed by RN5T618_* constants and some elements can be missing. This causes probe failures on older models: rn5t618-regulator rn5t618-regulator: failed to register (null) regulator rn5t618-regulator: probe of rn5t618-regulator failed with error -22 Fix this by making the arrays flat. This also saves a little memory because the regulator_desc arrays become smaller. Signed-off-by: Leonard Crestez Fixes: 83b2a3c2ab24 ("regulator: rn5t618: add RC5T619 PMIC support") Signed-off-by: Mark Brown --- drivers/regulator/rn5t618-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c index ef2be56460fe..790a4a73ea2c 100644 --- a/drivers/regulator/rn5t618-regulator.c +++ b/drivers/regulator/rn5t618-regulator.c @@ -29,7 +29,7 @@ static const struct regulator_ops rn5t618_reg_ops = { }; #define REG(rid, ereg, emask, vreg, vmask, min, max, step) \ - [RN5T618_##rid] = { \ + { \ .name = #rid, \ .of_match = of_match_ptr(#rid), \ .regulators_node = of_match_ptr("regulators"), \ -- cgit v1.2.3-70-g09d2 From 098a0a62c1554f5a3813ef1b8539563214ada8f6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Oct 2017 16:38:55 +0200 Subject: ALSA: hda: Abort capability probe at invalid register read The loop in snd_hdac_bus_parse_capabilities() may go to nirvana when it hits an invalid register value read: BUG: unable to handle kernel paging request at ffffad5dc41f3fff IP: pci_azx_readl+0x5/0x10 [snd_hda_intel] Call Trace: snd_hdac_bus_parse_capabilities+0x3c/0x1f0 [snd_hda_core] azx_probe_continue+0x7d5/0x940 [snd_hda_intel] ..... This happened on a new Intel machine, and we need to check the value and abort the loop accordingly. [Note: the fixes tag below indicates only the commit where this patch can be applied; the original problem was introduced even before that commit] Fixes: 6720b38420a0 ("ALSA: hda - move bus_parse_capabilities to core") Cc: Acked-by: Vinod Koul Signed-off-by: Takashi Iwai --- sound/hda/hdac_controller.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 978dc1801b3a..f6d2985b2520 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -284,6 +284,11 @@ int snd_hdac_bus_parse_capabilities(struct hdac_bus *bus) dev_dbg(bus->dev, "HDA capability ID: 0x%x\n", (cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF); + if (cur_cap == -1) { + dev_dbg(bus->dev, "Invalid capability reg read\n"); + break; + } + switch ((cur_cap & AZX_CAP_HDR_ID_MASK) >> AZX_CAP_HDR_ID_OFF) { case AZX_ML_CAP_ID: dev_dbg(bus->dev, "Found ML capability\n"); -- cgit v1.2.3-70-g09d2 From 6bf88a343db2b3c160edf9b82a74966b31cc80bd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Oct 2017 11:58:17 +0200 Subject: ALSA: hda: Remove superfluous '-' added by printk conversion While converting the error messages to the standard macros in the commit 4e76a8833fac ("ALSA: hda - Replace with standard printk"), a superfluous '-' slipped in the code mistakenly. Its influence is almost negligible, merely shows a dB value as negative integer instead of positive integer (or vice versa) in the rare error message. So let's kill this embarrassing byte to show more correct value. Fixes: 4e76a8833fac ("ALSA: hda - Replace with standard printk") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3db26c451837..b6cf9684c2ec 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1824,7 +1824,7 @@ static int get_kctl_0dB_offset(struct hda_codec *codec, return -1; if (*step_to_check && *step_to_check != step) { codec_err(codec, "Mismatching dB step for vmaster slave (%d!=%d)\n", -- *step_to_check, step); + *step_to_check, step); return -1; } *step_to_check = step; -- cgit v1.2.3-70-g09d2 From a91d66129fb9bcead12af3ed2008d6ddbf179509 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Oct 2017 11:39:28 +0200 Subject: ALSA: hda - Fix incorrect TLV callback check introduced during set_fs() removal The commit 99b5c5bb9a54 ("ALSA: hda - Remove the use of set_fs()") converted the get_kctl_0dB_offset() call for killing set_fs() usage in HD-audio codec code. The conversion assumed that the TLV callback used in HD-audio code is only snd_hda_mixer_amp() and applies the TLV calculation locally. Although this assumption is correct, and all slave kctls are actually with that callback, the current code is still utterly buggy; it doesn't hit this condition and falls back to the next check. It's because the function gets called after adding slave kctls to vmaster. By assigning a slave kctl, the slave kctl object is faked inside vmaster code, and the whole kctl ops are overridden. Thus the callback op points to a different value from what we've assumed. More badly, as reported by the KERNEXEC and UDEREF features of PaX, the code flow turns into the unexpected pitfall. The next fallback check is SNDRV_CTL_ELEM_ACCESS_TLV_READ access bit, and this always hits for each kctl with TLV. Then it evaluates the callback function pointer wrongly as if it were a TLV array. Although currently its side-effect is fairly limited, this incorrect reference may lead to an unpleasant result. For addressing the regression, this patch introduces a new helper to vmaster code, snd_ctl_apply_vmaster_slaves(). This works similarly like the existing map_slaves() in hda_codec.c: it loops over the slave list of the given master, and applies the given function to each slave. Then the initializer function receives the right kctl object and we can compare the correct pointer instead of the faked one. Also, for catching the similar breakage in future, give an error message when the unexpected TLV callback is found and bail out immediately. Fixes: 99b5c5bb9a54 ("ALSA: hda - Remove the use of set_fs()") Reported-by: PaX Team Cc: # v4.13 Signed-off-by: Takashi Iwai --- include/sound/control.h | 3 ++ sound/core/vmaster.c | 31 +++++++++++++++ sound/pci/hda/hda_codec.c | 97 +++++++++++++++++++++++++++-------------------- 3 files changed, 89 insertions(+), 42 deletions(-) diff --git a/include/sound/control.h b/include/sound/control.h index bd7246de58e7..a1f1152bc687 100644 --- a/include/sound/control.h +++ b/include/sound/control.h @@ -248,6 +248,9 @@ int snd_ctl_add_vmaster_hook(struct snd_kcontrol *kctl, void *private_data); void snd_ctl_sync_vmaster(struct snd_kcontrol *kctl, bool hook_only); #define snd_ctl_sync_vmaster_hook(kctl) snd_ctl_sync_vmaster(kctl, true) +int snd_ctl_apply_vmaster_slaves(struct snd_kcontrol *kctl, + int (*func)(struct snd_kcontrol *, void *), + void *arg); /* * Helper functions for jack-detection controls diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c index 6c58e6f73a01..e43af18d4383 100644 --- a/sound/core/vmaster.c +++ b/sound/core/vmaster.c @@ -484,3 +484,34 @@ void snd_ctl_sync_vmaster(struct snd_kcontrol *kcontrol, bool hook_only) master->hook(master->hook_private_data, master->val); } EXPORT_SYMBOL_GPL(snd_ctl_sync_vmaster); + +/** + * snd_ctl_apply_vmaster_slaves - Apply function to each vmaster slave + * @kctl: vmaster kctl element + * @func: function to apply + * @arg: optional function argument + * + * Apply the function @func to each slave kctl of the given vmaster kctl. + * Returns 0 if successful, or a negative error code. + */ +int snd_ctl_apply_vmaster_slaves(struct snd_kcontrol *kctl, + int (*func)(struct snd_kcontrol *, void *), + void *arg) +{ + struct link_master *master; + struct link_slave *slave; + int err; + + master = snd_kcontrol_chip(kctl); + err = master_init(master); + if (err < 0) + return err; + list_for_each_entry(slave, &master->slaves, list) { + err = func(&slave->slave, arg); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snd_ctl_apply_vmaster_slaves); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index b6cf9684c2ec..a0989d231fd0 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1803,36 +1803,6 @@ static int check_slave_present(struct hda_codec *codec, return 1; } -/* guess the value corresponding to 0dB */ -static int get_kctl_0dB_offset(struct hda_codec *codec, - struct snd_kcontrol *kctl, int *step_to_check) -{ - int _tlv[4]; - const int *tlv = NULL; - int val = -1; - - if ((kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) && - kctl->tlv.c == snd_hda_mixer_amp_tlv) { - get_ctl_amp_tlv(kctl, _tlv); - tlv = _tlv; - } else if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) - tlv = kctl->tlv.p; - if (tlv && tlv[0] == SNDRV_CTL_TLVT_DB_SCALE) { - int step = tlv[3]; - step &= ~TLV_DB_SCALE_MUTE; - if (!step) - return -1; - if (*step_to_check && *step_to_check != step) { - codec_err(codec, "Mismatching dB step for vmaster slave (%d!=%d)\n", - *step_to_check, step); - return -1; - } - *step_to_check = step; - val = -tlv[2] / step; - } - return val; -} - /* call kctl->put with the given value(s) */ static int put_kctl_with_value(struct snd_kcontrol *kctl, int val) { @@ -1847,19 +1817,58 @@ static int put_kctl_with_value(struct snd_kcontrol *kctl, int val) return 0; } -/* initialize the slave volume with 0dB */ -static int init_slave_0dB(struct hda_codec *codec, - void *data, struct snd_kcontrol *slave) +struct slave_init_arg { + struct hda_codec *codec; + int step; +}; + +/* initialize the slave volume with 0dB via snd_ctl_apply_vmaster_slaves() */ +static int init_slave_0dB(struct snd_kcontrol *kctl, void *_arg) { - int offset = get_kctl_0dB_offset(codec, slave, data); - if (offset > 0) - put_kctl_with_value(slave, offset); + struct slave_init_arg *arg = _arg; + int _tlv[4]; + const int *tlv = NULL; + int step; + int val; + + if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK) { + if (kctl->tlv.c != snd_hda_mixer_amp_tlv) { + codec_err(arg->codec, + "Unexpected TLV callback for slave %s:%d\n", + kctl->id.name, kctl->id.index); + return 0; /* ignore */ + } + get_ctl_amp_tlv(kctl, _tlv); + tlv = _tlv; + } else if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_READ) + tlv = kctl->tlv.p; + + if (!tlv || tlv[0] != SNDRV_CTL_TLVT_DB_SCALE) + return 0; + + step = tlv[3]; + step &= ~TLV_DB_SCALE_MUTE; + if (!step) + return 0; + if (arg->step && arg->step != step) { + codec_err(arg->codec, + "Mismatching dB step for vmaster slave (%d!=%d)\n", + arg->step, step); + return 0; + } + + arg->step = step; + val = -tlv[2] / step; + if (val > 0) { + put_kctl_with_value(kctl, val); + return val; + } + return 0; } -/* unmute the slave */ -static int init_slave_unmute(struct hda_codec *codec, - void *data, struct snd_kcontrol *slave) +/* unmute the slave via snd_ctl_apply_vmaster_slaves() */ +static int init_slave_unmute(struct snd_kcontrol *slave, void *_arg) { return put_kctl_with_value(slave, 1); } @@ -1919,9 +1928,13 @@ int __snd_hda_add_vmaster(struct hda_codec *codec, char *name, /* init with master mute & zero volume */ put_kctl_with_value(kctl, 0); if (init_slave_vol) { - int step = 0; - map_slaves(codec, slaves, suffix, - tlv ? init_slave_0dB : init_slave_unmute, &step); + struct slave_init_arg arg = { + .codec = codec, + .step = 0, + }; + snd_ctl_apply_vmaster_slaves(kctl, + tlv ? init_slave_0dB : init_slave_unmute, + &arg); } if (ctl_ret) -- cgit v1.2.3-70-g09d2 From d965465b60bad79d0b067f1009ba80ae76a6561a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Oct 2017 16:28:28 +0200 Subject: mlxsw: core: Fix possible deadlock When an EMAD is transmitted, a timeout work item is scheduled with a delay of 200ms, so that another EMAD will be retried until a maximum of five retries. In certain situations, it's possible for the function waiting on the EMAD to be associated with a work item that is queued on the same workqueue (`mlxsw_core`) as the timeout work item. This results in flushing a work item on the same workqueue. According to commit e159489baa71 ("workqueue: relax lockdep annotation on flush_work()") the above may lead to a deadlock in case the workqueue has only one worker active or if the system in under memory pressure and the rescue worker is in use. The latter explains the very rare and random nature of the lockdep splats we have been seeing: [ 52.730240] ============================================ [ 52.736179] WARNING: possible recursive locking detected [ 52.742119] 4.14.0-rc3jiri+ #4 Not tainted [ 52.746697] -------------------------------------------- [ 52.752635] kworker/1:3/599 is trying to acquire lock: [ 52.758378] (mlxsw_core_driver_name){+.+.}, at: [] flush_work+0x3a4/0x5e0 [ 52.767837] but task is already holding lock: [ 52.774360] (mlxsw_core_driver_name){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.784495] other info that might help us debug this: [ 52.791794] Possible unsafe locking scenario: [ 52.798413] CPU0 [ 52.801144] ---- [ 52.803875] lock(mlxsw_core_driver_name); [ 52.808556] lock(mlxsw_core_driver_name); [ 52.813236] *** DEADLOCK *** [ 52.819857] May be due to missing lock nesting notation [ 52.827450] 3 locks held by kworker/1:3/599: [ 52.832221] #0: (mlxsw_core_driver_name){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.842846] #1: ((&(&bridge->fdb_notify.dw)->work)){+.+.}, at: [] process_one_work+0x7d4/0x12f0 [ 52.854537] #2: (rtnl_mutex){+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.863021] stack backtrace: [ 52.867890] CPU: 1 PID: 599 Comm: kworker/1:3 Not tainted 4.14.0-rc3jiri+ #4 [ 52.875773] Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016 [ 52.886267] Workqueue: mlxsw_core mlxsw_sp_fdb_notify_work [mlxsw_spectrum] [ 52.894060] Call Trace: [ 52.909122] __lock_acquire+0xf6f/0x2a10 [ 53.025412] lock_acquire+0x158/0x440 [ 53.047557] flush_work+0x3c4/0x5e0 [ 53.087571] __cancel_work_timer+0x3ca/0x5e0 [ 53.177051] cancel_delayed_work_sync+0x13/0x20 [ 53.182142] mlxsw_reg_trans_bulk_wait+0x12d/0x7a0 [mlxsw_core] [ 53.194571] mlxsw_core_reg_access+0x586/0x990 [mlxsw_core] [ 53.225365] mlxsw_reg_query+0x10/0x20 [mlxsw_core] [ 53.230882] mlxsw_sp_fdb_notify_work+0x2a3/0x9d0 [mlxsw_spectrum] [ 53.237801] process_one_work+0x8f1/0x12f0 [ 53.321804] worker_thread+0x1fd/0x10c0 [ 53.435158] kthread+0x28e/0x370 [ 53.448703] ret_from_fork+0x2a/0x40 [ 53.453017] mlxsw_spectrum 0000:01:00.0: EMAD retries (2/5) (tid=bf4549b100000774) [ 53.453119] mlxsw_spectrum 0000:01:00.0: EMAD retries (5/5) (tid=bf4549b100000770) [ 53.453132] mlxsw_spectrum 0000:01:00.0: EMAD reg access failed (tid=bf4549b100000770,reg_id=200b(sfn),type=query,status=0(operation performed)) [ 53.453143] mlxsw_spectrum 0000:01:00.0: Failed to get FDB notifications Fix this by creating another workqueue for EMAD timeouts, thereby preventing the situation of a work item trying to flush a work item queued on the same workqueue. Fixes: caf7297e7ab5f ("mlxsw: core: Introduce support for asynchronous EMAD register access") Signed-off-by: Ido Schimmel Reported-by: Jiri Pirko Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 9d5e7cf288be..f3315bc874ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -96,6 +96,7 @@ struct mlxsw_core { const struct mlxsw_bus *bus; void *bus_priv; const struct mlxsw_bus_info *bus_info; + struct workqueue_struct *emad_wq; struct list_head rx_listener_list; struct list_head event_listener_list; struct { @@ -465,7 +466,7 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); - mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, @@ -587,12 +588,18 @@ static const struct mlxsw_listener mlxsw_emad_rx_listener = static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + struct workqueue_struct *emad_wq; u64 tid; int err; if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; + emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + if (!emad_wq) + return -ENOMEM; + mlxsw_core->emad_wq = emad_wq; + /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. @@ -619,6 +626,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) err_emad_trap_set: mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); return err; } @@ -631,6 +639,7 @@ static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core) mlxsw_core->emad.use_emad = false; mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener, mlxsw_core); + destroy_workqueue(mlxsw_core->emad_wq); } static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, -- cgit v1.2.3-70-g09d2 From 2de09681e4ce8b1caa79d2e4482b72d8ef41c550 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 16 Oct 2017 10:02:11 -0500 Subject: ibmvnic: Fix calculation of number of TX header descriptors This patch correctly sets the number of additional header descriptors that will be sent in an indirect SCRQ entry. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cb8182f4fdfa..c66abd476023 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1093,11 +1093,12 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, * places them in a descriptor array, scrq_arr */ -static void create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, - union sub_crq *scrq_arr) +static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, + union sub_crq *scrq_arr) { union sub_crq hdr_desc; int tmp_len = len; + int num_descs = 0; u8 *data, *cur; int tmp; @@ -1126,7 +1127,10 @@ static void create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, tmp_len -= tmp; *scrq_arr = hdr_desc; scrq_arr++; + num_descs++; } + + return num_descs; } /** @@ -1144,16 +1148,12 @@ static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff, int *num_entries, u8 hdr_field) { int hdr_len[3] = {0, 0, 0}; - int tot_len, len; + int tot_len; u8 *hdr_data = txbuff->hdr_data; tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len, txbuff->hdr_data); - len = tot_len; - len -= 24; - if (len > 0) - num_entries += len % 29 ? len / 29 + 1 : len / 29; - create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, + *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, txbuff->indir_arr + 1); } -- cgit v1.2.3-70-g09d2 From 48044eb490be71c203e14dd89e8bae87209eab52 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Oct 2017 17:09:53 +0200 Subject: netlink: fix netlink_ack() extack race It seems that it's possible to toggle NETLINK_F_EXT_ACK through setsockopt() while another thread/CPU is building a message inside netlink_ack(), which could then trigger the WARN_ON()s I added since if it goes from being turned off to being turned on between allocating and filling the message, the skb could end up being too small. Avoid this whole situation by storing the value of this flag in a separate variable and using that throughout the function instead. Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f34750691c5c..b93148e8e9fb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2307,6 +2307,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); unsigned int flags = 0; + bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; /* Error messages get the original request appened, unless the user * requests to cap the error message, and get extra error data if @@ -2317,7 +2318,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, payload += nlmsg_len(nlh); else flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (nlk_has_extack && extack) { if (extack->_msg) tlvlen += nla_total_size(strlen(extack->_msg) + 1); if (extack->bad_attr) @@ -2326,8 +2327,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, } else { flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && - extack && extack->cookie_len) + if (nlk_has_extack && extack && extack->cookie_len) tlvlen += nla_total_size(extack->cookie_len); } @@ -2355,7 +2355,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (nlk_has_extack && extack) { if (err) { if (extack->_msg) WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, -- cgit v1.2.3-70-g09d2 From 3d8bba9535ac6e79453c769dd0c8ea852a51ad60 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 18 Oct 2017 23:11:18 +0800 Subject: perf xyarray: Fix wrong processing when closing evsel fd In current xyarray code, xyarray__max_x() returns max_y, and xyarray__max_y() returns max_x. It's confusing and for code logic it looks not correct. Error happens when closing evsel fd. Let's see this scenario: 1. Allocate an fd (pseudo-code) perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); } xyarray__new(int xlen, int ylen, size_t entry_size) { size_t row_size = ylen * entry_size; struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size); xy->entry_size = entry_size; xy->row_size = row_size; xy->entries = xlen * ylen; xy->max_x = xlen; xy->max_y = ylen; ...... } So max_x is ncpus, max_y is nthreads and row_size = nthreads * 4. 2. Use perf syscall and get the fd int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = fd; } } static inline void *xyarray__entry(struct xyarray *xy, int x, int y) { return &xy->contents[x * xy->row_size + y * xy->entry_size]; } These codes don't have issues. The issue happens in the closing of fd. 3. Close fd. void perf_evsel__close_fd(struct perf_evsel *evsel) { int cpu, thread; for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } } Since xyarray__max_x() returns max_y (nthreads) and xyarry__max_y() returns max_x (ncpus), so above code is actually to be: for (cpu = 0; cpu < nthreads; cpu++) for (thread = 0; thread < ncpus; ++thread) { close(FD(evsel, cpu, thread)); FD(evsel, cpu, thread) = -1; } It's not correct! This change is introduced by "475fb533fb7d" ("perf evsel: Fix buffer overflow while freeing events") This fix is to let xyarray__max_x() return max_x (ncpus) and let xyarry__max_y() return max_y (nthreads) Committer note: This was also fixed by Ravi Bangoria, who provided the same patch, noticing the problem with 'perf record': I see 'perf record -p ' crashes with following log: *** Error in `./perf': free(): invalid next size (normal): 0x000000000298b340 *** ======= Backtrace: ========= /lib/x86_64-linux-gnu/libc.so.6(+0x777e5)[0x7f7fd85c87e5] /lib/x86_64-linux-gnu/libc.so.6(+0x8037a)[0x7f7fd85d137a] /lib/x86_64-linux-gnu/libc.so.6(cfree+0x4c)[0x7f7fd85d553c] ./perf(perf_evsel__close+0xb4)[0x4b7614] ./perf(perf_evlist__delete+0x100)[0x4ab180] ./perf(cmd_record+0x1d9)[0x43a5a9] ./perf[0x49aa2f] ./perf(main+0x631)[0x427841] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f7fd8571830] ./perf(_start+0x29)[0x427a59] Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Cc: Ravi Bangoria Fixes: d74be4767367 ("perf xyarray: Save max_x, max_y") Link: http://lkml.kernel.org/r/1508339478-26674-1-git-send-email-yao.jin@linux.intel.com Link: http://lkml.kernel.org/r/1508327446-15302-1-git-send-email-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/xyarray.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index 4ba726c90870..54af60462130 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -23,12 +23,12 @@ static inline void *xyarray__entry(struct xyarray *xy, int x, int y) static inline int xyarray__max_y(struct xyarray *xy) { - return xy->max_x; + return xy->max_y; } static inline int xyarray__max_x(struct xyarray *xy) { - return xy->max_y; + return xy->max_x; } #endif /* _PERF_XYARRAY_H_ */ -- cgit v1.2.3-70-g09d2 From 74f8e22c153f4464060a0c2e4cfd1d6e51af2109 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 18 Oct 2017 16:34:09 +0800 Subject: perf test shell trace+probe_libc_inet_pton.sh: Be compatible with Debian/Ubuntu In debian/ubuntu, libc.so is located at a different place, /lib/x86_64-linux-gnu/libc-2.23.so, so it outputs like this when testing: PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.040 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.040/0.040/0.040/0.000 ms 0.000 probe_libc:inet_pton:(7f0e2db741c0)) __GI___inet_pton (/lib/x86_64-linux-gnu/libc-2.23.so) getaddrinfo (/lib/x86_64-linux-gnu/libc-2.23.so) [0xffffa9d40f34ff4d] (/bin/ping) Fix up the libc path to make sure this test works in more OSes. Committer testing: When this test fails one can use 'perf test -v', i.e. in verbose mode, where it'll show the expected backtrace, so, after applying this test: On Fedora 26: # perf test -v ping 62: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 23322 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7fe344310d80)) __GI___inet_pton (/usr/lib64/libc-2.25.so) getaddrinfo (/usr/lib64/libc-2.25.so) _init (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok # Signed-off-by: Li Zhijian Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Kim Phillips Cc: Li Zhijian Cc: Peter Zijlstra Cc: Philip Li Link: http://lkml.kernel.org/r/1508315649-18836-1-git-send-email-lizhijian@cn.fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 462fc755092e..7a84d73324e3 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,6 +10,9 @@ . $(dirname $0)/lib/probe.sh +ld=$(realpath /lib64/ld*.so.* | uniq) +libc=$(echo $ld | sed 's/ld/libc/g') + trace_libc_inet_pton_backtrace() { idx=0 expected[0]="PING.*bytes" @@ -18,8 +21,8 @@ trace_libc_inet_pton_backtrace() { expected[3]=".*packets transmitted.*" expected[4]="rtt min.*" expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" - expected[6]=".*inet_pton[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" - expected[7]="getaddrinfo[[:space:]]\(/usr/lib.*/libc-[0-9]+\.[0-9]+\.so\)$" + expected[6]=".*inet_pton[[:space:]]\($libc\)$" + expected[7]="getaddrinfo[[:space:]]\($libc\)$" expected[8]=".*\(.*/bin/ping.*\)$" perf trace --no-syscalls -e probe_libc:inet_pton/max-stack=3/ ping -6 -c 1 ::1 2>&1 | grep -v ^$ | while read line ; do @@ -35,7 +38,7 @@ trace_libc_inet_pton_backtrace() { } skip_if_no_perf_probe && \ -perf probe -q /lib64/libc-*.so inet_pton && \ +perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace err=$? rm -f ${file} -- cgit v1.2.3-70-g09d2 From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 11:16:55 -0700 Subject: bpf: disallow arithmetic operations on context pointer Commit f1174f77b50c ("bpf/verifier: rework value tracking") removed the crafty selection of which pointer types are allowed to be modified. This is OK for most pointer types since adjust_ptr_min_max_vals() will catch operations on immutable pointers. One exception is PTR_TO_CTX which is now allowed to be offseted freely. The intent of aforementioned commit was to allow context access via modified registers. The offset passed to ->is_valid_access() verifier callback has been adjusted by the value of the variable offset. What is missing, however, is taking the variable offset into account when the context register is used. Or in terms of the code adding the offset to the value passed to the ->convert_ctx_access() callback. This leads to the following eBPF user code: r1 += 68 r0 = *(u32 *)(r1 + 8) exit being translated to this in kernel space: 0: (07) r1 += 68 1: (61) r0 = *(u32 *)(r1 +180) 2: (95) exit Offset 8 is corresponding to 180 in the kernel, but offset 76 is valid too. Verifier will "accept" access to offset 68+8=76 but then "convert" access to offset 8 as 180. Effective access to offset 248 is beyond the kernel context. (This is a __sk_buff example on a debug-heavy kernel - packet mark is 8 -> 180, 76 would be data.) Dereferencing the modified context pointer is not as easy as dereferencing other types, because we have to translate the access to reading a field in kernel structures which is usually at a different offset and often of a different size. To allow modifying the pointer we would have to make sure that given eBPF instruction will always access the same field or the fields accessed are "compatible" in terms of offset and size... Disallow dereferencing modified context pointers and add to selftests the test case described here. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 ++++++-- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..20f3889c006e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* ctx accesses must be at a fixed offset, so that we can * determine what type of data were returned. */ - if (!tnum_is_const(reg->var_off)) { + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", + regno, reg->off, off - reg->off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn tn_buf, off, size); return -EACCES; } - off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..3c7d3a45a3c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,20 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3-70-g09d2 From c97d96b4e612c7dc1b37d7afc61b598a9a25994d Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Sun, 24 Sep 2017 15:20:49 +0100 Subject: staging: bcm2835-audio: Fix memory corruption The previous commit (0adbfd46) fixed a memory leak but also freed a block in the success case, causing a stale pointer to be used with potentially fatal results. Only free the vchi_instance block in the case that vchi_connect fails; once connected, the instance is retained for subsequent connections. Simplifying the code by removing a bunch of gotos and returning errors directly. Signed-off-by: Phil Elwell Fixes: 0adbfd4694c2 ("staging: bcm2835-audio: fix memory leak in bcm2835_audio_open_connection()") Cc: stable # 4.12+ Tested-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- .../vc04_services/bcm2835-audio/bcm2835-vchiq.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c index 5f3d8f2339e3..4be864dbd41c 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c @@ -390,8 +390,7 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream __func__, instance); instance->alsa_stream = alsa_stream; alsa_stream->instance = instance; - ret = 0; // xxx todo -1; - goto err_free_mem; + return 0; } /* Initialize and create a VCHI connection */ @@ -401,16 +400,15 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream LOG_ERR("%s: failed to initialise VCHI instance (ret=%d)\n", __func__, ret); - ret = -EIO; - goto err_free_mem; + return -EIO; } ret = vchi_connect(NULL, 0, vchi_instance); if (ret) { LOG_ERR("%s: failed to connect VCHI instance (ret=%d)\n", __func__, ret); - ret = -EIO; - goto err_free_mem; + kfree(vchi_instance); + return -EIO; } initted = 1; } @@ -421,19 +419,16 @@ static int bcm2835_audio_open_connection(struct bcm2835_alsa_stream *alsa_stream if (IS_ERR(instance)) { LOG_ERR("%s: failed to initialize audio service\n", __func__); - ret = PTR_ERR(instance); - goto err_free_mem; + /* vchi_instance is retained for use the next time. */ + return PTR_ERR(instance); } instance->alsa_stream = alsa_stream; alsa_stream->instance = instance; LOG_DBG(" success !\n"); - ret = 0; -err_free_mem: - kfree(vchi_instance); - return ret; + return 0; } int bcm2835_audio_open(struct bcm2835_alsa_stream *alsa_stream) -- cgit v1.2.3-70-g09d2 From 723f2828a98c8ca19842042f418fb30dd8cfc0f7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Oct 2017 13:12:25 +0200 Subject: x86/microcode/intel: Disable late loading on model 79 Blacklist Broadwell X model 79 for late loading due to an erratum. Signed-off-by: Borislav Petkov Acked-by: Tony Luck Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171018111225.25635-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/intel.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 8f7a9bbad514..7dbcb7adf797 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -918,6 +919,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } +static bool is_blacklisted(unsigned int cpu) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { + pr_err_once("late loading on model 79 is disabled.\n"); + return true; + } + + return false; +} + static enum ucode_state request_microcode_fw(int cpu, struct device *device, bool refresh_fw) { @@ -926,6 +939,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, const struct firmware *firmware; enum ucode_state ret; + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); @@ -950,6 +966,9 @@ static int get_ucode_user(void *to, const void *from, size_t n) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { + if (is_blacklisted(cpu)) + return UCODE_NFOUND; + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } -- cgit v1.2.3-70-g09d2 From e8b9b0cc8269c85d8167aaee024bfcbb4976c031 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:49 -0700 Subject: x86/mm/64: Remove the last VM_BUG_ON() from the TLB code Let's avoid hard-to-diagnose crashes in the future. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f423bbc97864089fbdeb813f1ea126c6eaed844a.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 658bf0090565..7db23f9f804e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -147,8 +147,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, this_cpu_write(cpu_tlbstate.is_lazy, false); if (real_prev == next) { - VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != - next->context.ctx_id); + VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != + next->context.ctx_id); /* * We don't currently support having a real mm loaded without -- cgit v1.2.3-70-g09d2 From 4e57b94664fef55aa71cac33b4632fdfdd52b695 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:50 -0700 Subject: x86/mm: Tidy up "x86/mm: Flush more aggressively in lazy TLB mode" Due to timezones, commit: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") was an outdated patch that well tested and fixed the bug but didn't address Borislav's review comments. Tidy it up: - The name "tlb_use_lazy_mode()" was highly confusing. Change it to "tlb_defer_switch_to_init_mm()", which describes what it actually means. - Move the static_branch crap into a helper. - Improve comments. Actually removing the debugfs option is in the next patch. Reported-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") Link: http://lkml.kernel.org/r/154ef95428d4592596b6e98b0af1d2747d6cfbf8.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 7 ++++++- arch/x86/mm/tlb.c | 30 ++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d362161d3291..0d4a1bb7e303 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -87,7 +87,12 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) * to init_mm when we switch to a kernel thread (e.g. the idle thread). If * it's false, then we immediately switch CR3 when entering a kernel thread. */ -DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode); +DECLARE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); + +static inline bool tlb_defer_switch_to_init_mm(void) +{ + return static_branch_unlikely(&__tlb_defer_switch_to_init_mm); +} /* * 6 because 6 should be plenty and struct tlb_state will fit in diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7db23f9f804e..5ee3b59baa85 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,7 +30,7 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); -DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode); +DEFINE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) @@ -213,6 +213,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, } /* + * Please ignore the name of this function. It should be called + * switch_to_kernel_thread(). + * * enter_lazy_tlb() is a hint from the scheduler that we are entering a * kernel thread or other context without an mm. Acceptable implementations * include doing nothing whatsoever, switching to init_mm, or various clever @@ -227,7 +230,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) return; - if (static_branch_unlikely(&tlb_use_lazy_mode)) { + if (tlb_defer_switch_to_init_mm()) { /* * There's a significant optimization that may be possible * here. We have accurate enough TLB flush tracking that we @@ -632,7 +635,8 @@ static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, { char buf[2]; - buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0'; + buf[0] = static_branch_likely(&__tlb_defer_switch_to_init_mm) + ? '1' : '0'; buf[1] = '\n'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); @@ -647,9 +651,9 @@ static ssize_t tlblazy_write_file(struct file *file, return -EINVAL; if (val) - static_branch_enable(&tlb_use_lazy_mode); + static_branch_enable(&__tlb_defer_switch_to_init_mm); else - static_branch_disable(&tlb_use_lazy_mode); + static_branch_disable(&__tlb_defer_switch_to_init_mm); return count; } @@ -660,23 +664,25 @@ static const struct file_operations fops_tlblazy = { .llseek = default_llseek, }; -static int __init init_tlb_use_lazy_mode(void) +static int __init init_tlblazy(void) { if (boot_cpu_has(X86_FEATURE_PCID)) { /* - * Heuristic: with PCID on, switching to and from - * init_mm is reasonably fast, but remote flush IPIs - * as expensive as ever, so turn off lazy TLB mode. + * If we have PCID, then switching to init_mm is reasonably + * fast. If we don't have PCID, then switching to init_mm is + * quite slow, so we default to trying to defer it in the + * hopes that we can avoid it entirely. The latter approach + * runs the risk of receiving otherwise unnecessary IPIs. * * We can't do this in setup_pcid() because static keys * haven't been initialized yet, and it would blow up * badly. */ - static_branch_disable(&tlb_use_lazy_mode); + static_branch_disable(&__tlb_defer_switch_to_init_mm); } - debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR, + debugfs_create_file("tlb_defer_switch_to_init_mm", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlblazy); return 0; } -late_initcall(init_tlb_use_lazy_mode); +late_initcall(init_tlblazy); -- cgit v1.2.3-70-g09d2 From 7ac7f2c315ef76437f5119df354d334448534fb5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 14 Oct 2017 09:59:51 -0700 Subject: x86/mm: Remove debug/x86/tlb_defer_switch_to_init_mm Borislav thinks that we don't need this knob in a released kernel. Get rid of it. Requested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode") Link: http://lkml.kernel.org/r/1fa72431924e81e86c164ff7881bf9240d1f1a6c.1508000261.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 20 ++++++++------ arch/x86/mm/tlb.c | 58 ----------------------------------------- 2 files changed, 12 insertions(+), 66 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0d4a1bb7e303..c4aed0de565e 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -82,16 +82,20 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #endif -/* - * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point - * to init_mm when we switch to a kernel thread (e.g. the idle thread). If - * it's false, then we immediately switch CR3 when entering a kernel thread. - */ -DECLARE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); - static inline bool tlb_defer_switch_to_init_mm(void) { - return static_branch_unlikely(&__tlb_defer_switch_to_init_mm); + /* + * If we have PCID, then switching to init_mm is reasonably + * fast. If we don't have PCID, then switching to init_mm is + * quite slow, so we try to defer it in the hopes that we can + * avoid it entirely. The latter approach runs the risk of + * receiving otherwise unnecessary IPIs. + * + * This choice is just a heuristic. The tlb code can handle this + * function returning true or false regardless of whether we have + * PCID. + */ + return !static_cpu_has(X86_FEATURE_PCID); } /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5ee3b59baa85..0f3d0cea4d00 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -30,7 +30,6 @@ atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); -DEFINE_STATIC_KEY_TRUE(__tlb_defer_switch_to_init_mm); static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) @@ -629,60 +628,3 @@ static int __init create_tlb_single_page_flush_ceiling(void) return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[2]; - - buf[0] = static_branch_likely(&__tlb_defer_switch_to_init_mm) - ? '1' : '0'; - buf[1] = '\n'; - - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t tlblazy_write_file(struct file *file, - const char __user *user_buf, size_t count, loff_t *ppos) -{ - bool val; - - if (kstrtobool_from_user(user_buf, count, &val)) - return -EINVAL; - - if (val) - static_branch_enable(&__tlb_defer_switch_to_init_mm); - else - static_branch_disable(&__tlb_defer_switch_to_init_mm); - - return count; -} - -static const struct file_operations fops_tlblazy = { - .read = tlblazy_read_file, - .write = tlblazy_write_file, - .llseek = default_llseek, -}; - -static int __init init_tlblazy(void) -{ - if (boot_cpu_has(X86_FEATURE_PCID)) { - /* - * If we have PCID, then switching to init_mm is reasonably - * fast. If we don't have PCID, then switching to init_mm is - * quite slow, so we default to trying to defer it in the - * hopes that we can avoid it entirely. The latter approach - * runs the risk of receiving otherwise unnecessary IPIs. - * - * We can't do this in setup_pcid() because static keys - * haven't been initialized yet, and it would blow up - * badly. - */ - static_branch_disable(&__tlb_defer_switch_to_init_mm); - } - - debugfs_create_file("tlb_defer_switch_to_init_mm", S_IRUSR | S_IWUSR, - arch_debugfs_dir, NULL, &fops_tlblazy); - return 0; -} -late_initcall(init_tlblazy); -- cgit v1.2.3-70-g09d2 From ca8d7822054287352c41ff38f656e68fef959732 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Oct 2017 21:27:32 +0100 Subject: drm/i915: Report -EFAULT before pwrite fast path into shmemfs When pwriting into shmemfs, the fast path pagecache_write does not notice when it is writing to beyond the end of the truncated shmemfs inode. Report -EFAULT directly when we try to use pwrite into the !I915_MADV_WILLNEED object. Fixes: 7c55e2c5772d ("drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl") Testcase: igt/gem_madvise/dontneed-before-pwrite Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171016202732.25459-1-chris@chris-wilson.co.uk (cherry picked from commit a6d65e451cc4e7127698384868a4447ee7be7d16) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index af289d35b77a..32e857dc507c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2657,6 +2657,9 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, if (READ_ONCE(obj->mm.pages)) return -ENODEV; + if (obj->mm.madv != I915_MADV_WILLNEED) + return -EFAULT; + /* Before the pages are instantiated the object is treated as being * in the CPU domain. The pages will be clflushed as required before * use, and we can freely write into the pages directly. If userspace -- cgit v1.2.3-70-g09d2 From dd00ed9eff1e1819922f91da965f0e57e6a94216 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Tue, 17 Oct 2017 13:25:45 -0700 Subject: drm/i915: Use a mask when applying WaProgramL3SqcReg1Default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we are blasting other bits in GEN8_L3SQCREG1 that might be important (although we probably aren't at the moment because 0 seems to be the default for all the other bits). v2: Extra parentheses (Michel) Fixes: 050fc46 ("drm/i915:bxt: implement WaProgramL3SqcReg1DefaultForPerf") Fixes: 450174f ("drm/i915/chv: Tune L3 SQC credits based on actual latencies") Signed-off-by: Oscar Mateo Cc: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/1508271945-14961-1-git-send-email-oscar.mateo@intel.com Signed-off-by: Chris Wilson (cherry picked from commit 930a784d02339be437fec07b3bb7213bde0ed53b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 9 ++++++--- drivers/gpu/drm/i915/intel_pm.c | 9 ++++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ed7cd9ee2c2a..c9bcc6c45012 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6998,6 +6998,7 @@ enum { */ #define L3_GENERAL_PRIO_CREDITS(x) (((x) >> 1) << 19) #define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14) +#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14)) #define GEN7_L3CNTLREG1 _MMIO(0xB01C) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9ab596941372..3c2d9cf22ed5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1048,9 +1048,12 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) } /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } /* WaToEnableHwFixForPushConstHWBug:bxt */ if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ed662937ec3c..0a09f8ff6aff 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8245,14 +8245,17 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, int high_prio_credits) { u32 misccpctl; + u32 val; /* WaTempDisableDOPClkGating:bdw */ misccpctl = I915_READ(GEN7_MISCCPCTL); I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GEN8_L3SQCREG1, - L3_GENERAL_PRIO_CREDITS(general_prio_credits) | - L3_HIGH_PRIO_CREDITS(high_prio_credits)); + val = I915_READ(GEN8_L3SQCREG1); + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits); + val |= L3_HIGH_PRIO_CREDITS(high_prio_credits); + I915_WRITE(GEN8_L3SQCREG1, val); /* * Wait at least 100 clocks before re-enabling clock gating. -- cgit v1.2.3-70-g09d2 From 06e2290844fa408d3295ac03a1647f0798518ebe Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 25 Sep 2017 20:11:58 -0500 Subject: Fix encryption labels and lengths for SMB3.1.1 SMB3.1.1 is most secure and recent dialect. Fixup labels and lengths for sMB3.1.1 signing and encryption. Signed-off-by: Steve French CC: Stable --- fs/cifs/cifsglob.h | 8 ++++++-- fs/cifs/smb2transport.c | 26 ++++++++++++++------------ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index de5b2e1fcce5..e185b2853eab 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -661,7 +661,9 @@ struct TCP_Server_Info { #endif unsigned int max_read; unsigned int max_write; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; /* save initital negprot hash */ +#endif /* 3.1.1 */ struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; @@ -849,7 +851,9 @@ struct cifs_ses { __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; - __u8 preauth_hash[512]; +#ifdef CONFIG_CIFS_SMB311 + __u8 preauth_sha_hash[64]; +#endif /* 3.1.1 */ }; static inline bool diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 67367cf1f8cd..99493946e2f9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -390,6 +390,7 @@ generate_smb30signingkey(struct cifs_ses *ses) return generate_smb3signingkey(ses, &triplet); } +#ifdef CONFIG_CIFS_SMB311 int generate_smb311signingkey(struct cifs_ses *ses) @@ -398,25 +399,26 @@ generate_smb311signingkey(struct cifs_ses *ses) struct derivation *d; d = &triplet.signing; - d->label.iov_base = "SMB2AESCMAC"; - d->label.iov_len = 12; - d->context.iov_base = "SmbSign"; - d->context.iov_len = 8; + d->label.iov_base = "SMBSigningKey"; + d->label.iov_len = 14; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.encryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerIn "; - d->context.iov_len = 10; + d->label.iov_base = "SMBC2SCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; d = &triplet.decryption; - d->label.iov_base = "SMB2AESCCM"; - d->label.iov_len = 11; - d->context.iov_base = "ServerOut"; - d->context.iov_len = 10; + d->label.iov_base = "SMBS2CCipherKey"; + d->label.iov_len = 16; + d->context.iov_base = ses->preauth_sha_hash; + d->context.iov_len = 64; return generate_smb3signingkey(ses, &triplet); } +#endif /* 311 */ int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) -- cgit v1.2.3-70-g09d2 From 7cb3def44ce834e5ea462fd900505af4cd68b3d5 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 28 Sep 2017 09:39:58 +1000 Subject: cifs: handle large EA requests more gracefully in smb2+ Update reading the EA using increasingly larger buffer sizes until the response will fit in the buffer, or we exceed the (arbitrary) maximum set to 64kb. Without this change, a user is able to add more and more EAs using setfattr until the point where the total space of all EAs exceed 2kb at which point the user can no longer list the EAs at all and getfattr will abort with an error. The same issue still exists for EAs in SMB1. Signed-off-by: Ronnie Sahlberg Reported-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/smb2maperror.c | 2 +- fs/cifs/smb2ops.c | 31 +++++++++++++++++++++++++------ fs/cifs/smb2pdu.c | 6 +++--- fs/cifs/smb2pdu.h | 3 ++- fs/cifs/smb2proto.h | 1 + 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7ca9808a0daa..62c88dfed57b 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_DATATYPE_MISALIGNMENT, -EIO, "STATUS_DATATYPE_MISALIGNMENT"}, {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, - {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, + {STATUS_BUFFER_OVERFLOW, -E2BIG, "STATUS_BUFFER_OVERFLOW"}, {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0dafdbae1f8c..bdb963d0ba32 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -522,6 +522,7 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct smb2_file_full_ea_info *smb2_data; + int ea_buf_size = SMB2_MIN_EA_BUF; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) @@ -541,14 +542,32 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, return rc; } - smb2_data = kzalloc(SMB2_MAX_EA_BUF, GFP_KERNEL); - if (smb2_data == NULL) { - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return -ENOMEM; + while (1) { + smb2_data = kzalloc(ea_buf_size, GFP_KERNEL); + if (smb2_data == NULL) { + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } + + rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, + fid.volatile_fid, + ea_buf_size, smb2_data); + + if (rc != -E2BIG) + break; + + kfree(smb2_data); + ea_buf_size <<= 1; + + if (ea_buf_size > SMB2_MAX_EA_BUF) { + cifs_dbg(VFS, "EA size is too large\n"); + SMB2_close(xid, tcon, fid.persistent_fid, + fid.volatile_fid); + return -ENOMEM; + } } - rc = SMB2_query_eas(xid, tcon, fid.persistent_fid, fid.volatile_fid, - smb2_data); SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (!rc) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6f0e6343c15e..ba3865b338d8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2233,12 +2233,12 @@ qinf_exit: } int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct smb2_file_full_ea_info *data) + u64 persistent_fid, u64 volatile_fid, + int ea_buf_size, struct smb2_file_full_ea_info *data) { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, - SMB2_MAX_EA_BUF, + ea_buf_size, sizeof(struct smb2_file_full_ea_info), (void **)&data, NULL); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6c9653a130c8..4c155b95b558 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1178,7 +1178,8 @@ struct smb2_file_link_info { /* encoding of request for level 11 */ char FileName[0]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ -#define SMB2_MAX_EA_BUF 2048 +#define SMB2_MIN_EA_BUF 2048 +#define SMB2_MAX_EA_BUF 65536 struct smb2_file_full_ea_info { /* encoding of response for level 15 */ __le32 next_entry_offset; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 003217099ef3..e9ab5227e7a8 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -134,6 +134,7 @@ extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, + int ea_buf_size, struct smb2_file_full_ea_info *data); extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, -- cgit v1.2.3-70-g09d2 From 4572f0539c731c588cdce9575cb7d60ef23a521d Mon Sep 17 00:00:00 2001 From: Long Li Date: Sun, 1 Oct 2017 19:30:24 -0700 Subject: CIFS: SMBD: Fix the definition for SMB2_CHANNEL_RDMA_V1_INVALIDATE The channel value for requesting server remote invalidating local memory registration should be 0x00000002 Signed-off-by: Long Li Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4c155b95b558..c2ec934be968 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -832,7 +832,7 @@ struct smb2_flush_rsp { /* Channel field for read and write: exactly one of following flags can be set*/ #define SMB2_CHANNEL_NONE 0x00000000 #define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ -#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000001 /* SMB3.02 or later */ +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */ /* SMB2 read request without RFC1001 length at the beginning */ struct smb2_read_plain_req { -- cgit v1.2.3-70-g09d2 From c94501279bb191ccf204f496e9576ce036f81bcd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 12 Oct 2017 13:08:48 -0400 Subject: Revert "drm/amdgpu: discard commands of killed processes" This causes instability in piglit. It's fixed in drm-next with: 515c6faf85970af529953ec137b4b6fcb3272e25 1650c14b459ff9c85767746f1ef795a780653128 214a91e6bfabaa6cbfa692df8732000aab050795 29d253553559dba919315be847f4f2cce29edd42 79867462634836ee5c39a2cdf624719feeb189bd This reverts commit 6af0883ed9770cf9b0a4f224c91481484cd1b025. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 97c94f9683fa..38cea6fb25a8 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -205,32 +205,17 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity) { struct amd_sched_rq *rq = entity->rq; - int r; if (!amd_sched_entity_is_initialized(sched, entity)) return; + /** * The client will not queue more IBs during this fini, consume existing - * queued IBs or discard them on SIGKILL + * queued IBs */ - if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - r = -ERESTARTSYS; - else - r = wait_event_killable(sched->job_scheduled, - amd_sched_entity_is_idle(entity)); - amd_sched_rq_remove_entity(rq, entity); - if (r) { - struct amd_sched_job *job; + wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); - /* Park the kernel for a moment to make sure it isn't processing - * our enity. - */ - kthread_park(sched->thread); - kthread_unpark(sched->thread); - while (kfifo_out(&entity->job_queue, &job, sizeof(job))) - sched->ops->free_job(job); - - } + amd_sched_rq_remove_entity(rq, entity); kfifo_free(&entity->job_queue); } -- cgit v1.2.3-70-g09d2 From 1c0edc3633b56000e18d82fc241e3995ca18a69e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 18 Oct 2017 12:49:38 -0400 Subject: USB: core: fix out-of-bounds access bug in usb_get_bos_descriptor() Andrey used the syzkaller fuzzer to find an out-of-bounds memory access in usb_get_bos_descriptor(). The code wasn't checking that the next usb_dev_cap_header structure could fit into the remaining buffer space. This patch fixes the error and also reduces the bNumDeviceCaps field in the header to match the actual number of capabilities found, in cases where there are fewer than expected. Reported-by: Andrey Konovalov Signed-off-by: Alan Stern Tested-by: Andrey Konovalov CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 68b54bd88d1e..883549ee946c 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -960,10 +960,12 @@ int usb_get_bos_descriptor(struct usb_device *dev) for (i = 0; i < num; i++) { buffer += length; cap = (struct usb_dev_cap_header *)buffer; - length = cap->bLength; - if (total_len < length) + if (total_len < sizeof(*cap) || total_len < cap->bLength) { + dev->bos->desc->bNumDeviceCaps = i; break; + } + length = cap->bLength; total_len -= length; if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) { -- cgit v1.2.3-70-g09d2 From 8a82dbf19129dde9e6fc9ab25a00dbc7569abe6a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 13:39:44 -0700 Subject: nvme-fc: fix iowait hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing iowait head initialization. Fix irqsave vs irq: wait_event_lock_irq() doesn't do irq save/restore Fixes: 36715cf4b366 ("nvme_fc: replace ioabort msleep loop with completion”) Cc: # 4.13 Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Tested-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index af075e998944..8182b1999f49 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2545,10 +2545,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_abort_aen_ops(ctrl); /* wait for all io that had to be aborted */ - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); ctrl->flags &= ~FCCTRL_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2760,6 +2760,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->rport = rport; ctrl->dev = lport->dev; ctrl->cnum = idx; + init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); kref_init(&ctrl->ref); -- cgit v1.2.3-70-g09d2 From 17c4dc6eb7e1b2fb1ce6a52467e3be635224606e Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 16:39:22 -0700 Subject: nvme-fc: retry initial controller connections 3 times Currently, if a frame is lost of command fails as part of initial association create for a new controller, the new controller connection request will immediately fail. Add in an immediate 3 retry loop before giving up. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8182b1999f49..be49d0f79381 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2734,7 +2734,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, retry; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -2826,9 +2826,37 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - ret = nvme_fc_create_association(ctrl); + /* + * It's possible that transactions used to create the association + * may fail. Examples: CreateAssociation LS or CreateIOConnection + * LS gets dropped/corrupted/fails; or a frame gets dropped or a + * command times out for one of the actions to init the controller + * (Connect, Get/Set_Property, Set_Features, etc). Many of these + * transport errors (frame drop, LS failure) inherently must kill + * the association. The transport is coded so that any command used + * to create the association (prior to a LIVE state transition + * while NEW or RECONNECTING) will fail if it completes in error or + * times out. + * + * As such: as the connect request was mostly likely due to a + * udev event that discovered the remote port, meaning there is + * not an admin or script there to restart if the connect + * request fails, retry the initial connection creation up to + * three times before giving up and declaring failure. + */ + for (retry = 0; retry < 3; retry++) { + ret = nvme_fc_create_association(ctrl); + if (!ret) + break; + } + if (ret) { + /* couldn't schedule retry - fail out */ + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); -- cgit v1.2.3-70-g09d2 From 4813766325374af6ed0b66879ba6a0bbb05c83b6 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Sat, 23 Sep 2017 13:10:33 -0700 Subject: drm/nouveau/fbcon: fix oops without fbdev emulation This is similar to an earlier commit 52dfcc5ccfbb ("drm/nouveau: fix for disabled fbdev emulation"), but protects all occurrences of helper.fbdev in the source. I see oops in nouveau_fbcon_accel_save_disable() called from nouveau_fbcon_set_suspend_work() on Linux 3.13 when CONFIG_DRM_FBDEV_EMULATION option is disabled. Signed-off-by: Pavel Roskin Reviewed-by: Daniel Vetter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index f7707849bb53..2b12d82aac15 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -223,7 +223,7 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags; drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; } @@ -233,7 +233,7 @@ void nouveau_fbcon_accel_restore(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags; } } @@ -245,7 +245,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev) struct nouveau_fbdev *fbcon = drm->fbcon; if (fbcon && drm->channel) { console_lock(); - fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; console_unlock(); nouveau_channel_idle(drm->channel); nvif_object_fini(&fbcon->twod); -- cgit v1.2.3-70-g09d2 From 357fdad075abb976ee43f2dd450c40dcef4b7e2d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 18 Oct 2017 13:56:26 -0700 Subject: Convert fs/*/* to SB_I_VERSION [AV: in addition to the fix in previous commit] Signed-off-by: Matthew Garrett Cc: David Howells Cc: Alexander Viro Reviewed-by: David Howells Signed-off-by: Al Viro --- fs/btrfs/super.c | 2 +- fs/ext4/super.c | 4 ++-- fs/fuse/inode.c | 2 +- fs/xfs/xfs_super.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 35a128acfbd1..161694b66038 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1135,7 +1135,7 @@ static int btrfs_fill_super(struct super_block *sb, #ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; sb->s_iflags |= SB_I_CGROUPWB; err = super_setup_bdi(sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b104096fce9e..b0915b734a38 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1677,7 +1677,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; return 1; case Opt_i_version: - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; return 1; case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; @@ -2060,7 +2060,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); - if (sb->s_flags & MS_I_VERSION) + if (sb->s_flags & SB_I_VERSION) SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 65c88379a3a1..94a745acaef8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1059,7 +1059,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_flags & MS_MANDLOCK) goto err; - sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); + sb->s_flags &= ~(MS_NOSEC | SB_I_VERSION); if (!parse_fuse_opt(data, &d, is_bdev)) goto err; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 584cf2d573ba..f663022353c0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1637,7 +1637,7 @@ xfs_fs_fill_super( /* version 5 superblocks support inode version counters. */ if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= SB_I_VERSION; if (mp->m_flags & XFS_MOUNT_DAX) { xfs_warn(mp, -- cgit v1.2.3-70-g09d2 From cf5dd48907bebaefdb43a8ca079be77e8da2cb20 Mon Sep 17 00:00:00 2001 From: Jeff Lance Date: Wed, 18 Oct 2017 17:25:52 -0700 Subject: Input: ti_am335x_tsc - fix incorrect step config for 5 wire touchscreen Step config setting for 5 wire touchscreen is incorrect for Y coordinates. It was broken while we moved to DT. If you look close at the offending commit bb76dc09ddfc ("input: ti_am33x_tsc: Order of TSC wires, made configurable"), the change was: - STEPCONFIG_XNP | STEPCONFIG_YPN; + ts_dev->bit_xn | ts_dev->bit_yp; while bit_xn = STEPCONFIG_XNN and bit_yp = STEPCONFIG_YNN. Not quite the same. Fixes: bb76dc09ddfc ("input: ti_am33x_tsc: Order of TSC wires, made configurable") Signed-off-by: Jeff Lance [vigneshr@ti.com: Rebase to v4.14-rc1] Signed-off-by: Vignesh R Reviewed-by: Michael Nazzareno Trimarchi Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ti_am335x_tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index 7953381d939a..f1043ae71dcc 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -161,7 +161,7 @@ static void titsc_step_config(struct titsc *ts_dev) break; case 5: config |= ts_dev->bit_xp | STEPCONFIG_INP_AN4 | - ts_dev->bit_xn | ts_dev->bit_yp; + STEPCONFIG_XNP | STEPCONFIG_YPN; break; case 8: config |= ts_dev->bit_yp | STEPCONFIG_INP(ts_dev->inp_xp); -- cgit v1.2.3-70-g09d2 From c9bfb2f0031a2de664147ebbfb90f95bb12fdf79 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 18 Oct 2017 17:28:36 -0700 Subject: Input: stmfts - fix setting ABS_MT_POSITION_* maximum size The commit 78bcac7b2ae1e ("Input: add support for the STMicroelectronics FingerTip touchscreen) used the 'touchscreen_parse_properties()' helper function in order to get the value of common properties. But, commit 78bcac7b2ae1e didn't set the capability of ABS_MT_POSITION_* before calling touchscreen_parse_properties(). In result, the max_x and max_y of 'struct touchscreen_properties' were not set. Fixes: 78bcac7b2ae1e ("Input: add support for the STMicroelectronics FingerTip touchscreen") Cc: stable@vger.kernel.org Signed-off-by: Chanwoo Choi Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/stmfts.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index 157fdb4bb2e8..8c6c6178ec12 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -663,12 +663,10 @@ static int stmfts_probe(struct i2c_client *client, sdata->input->open = stmfts_input_open; sdata->input->close = stmfts_input_close; + input_set_capability(sdata->input, EV_ABS, ABS_MT_POSITION_X); + input_set_capability(sdata->input, EV_ABS, ABS_MT_POSITION_Y); touchscreen_parse_properties(sdata->input, true, &sdata->prop); - input_set_abs_params(sdata->input, ABS_MT_POSITION_X, 0, - sdata->prop.max_x, 0, 0); - input_set_abs_params(sdata->input, ABS_MT_POSITION_Y, 0, - sdata->prop.max_y, 0, 0); input_set_abs_params(sdata->input, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); input_set_abs_params(sdata->input, ABS_MT_TOUCH_MINOR, 0, 255, 0, 0); input_set_abs_params(sdata->input, ABS_MT_ORIENTATION, 0, 255, 0, 0); -- cgit v1.2.3-70-g09d2 From 76ba89c76f2c74e208d93a9e7c698e39eeb3b85c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 4 Sep 2017 18:50:05 +0100 Subject: commoncap: move assignment of fs_ns to avoid null pointer dereference The pointer fs_ns is assigned from inode->i_ib->s_user_ns before a null pointer check on inode, hence if inode is actually null we will get a null pointer dereference on this assignment. Fix this by only dereferencing inode after the null pointer check on inode. Detected by CoverityScan CID#1455328 ("Dereference before null check") Fixes: 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Signed-off-by: Colin Ian King Cc: stable@vger.kernel.org Acked-by: Serge Hallyn Signed-off-by: James Morris --- security/commoncap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/commoncap.c b/security/commoncap.c index c25e0d27537f..fc46f5b85251 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -585,13 +585,14 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data struct vfs_ns_cap_data data, *nscaps = &data; struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; kuid_t rootkuid; - struct user_namespace *fs_ns = inode->i_sb->s_user_ns; + struct user_namespace *fs_ns; memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); if (!inode) return -ENODATA; + fs_ns = inode->i_sb->s_user_ns; size = __vfs_getxattr((struct dentry *)dentry, inode, XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ); if (size == -ENODATA || size == -EOPNOTSUPP) -- cgit v1.2.3-70-g09d2 From 374b3bf8e8b519f61eb9775888074c6e46b3bf0c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 30 Sep 2017 17:24:23 -0400 Subject: parisc: Fix double-word compare and exchange in LWS code on 32-bit kernels As discussed on the debian-hppa list, double-wordcompare and exchange operations fail on 32-bit kernels. Looking at the code, I realized that the ",ma" completer does the wrong thing in the "ldw,ma 4(%r26), %r29" instruction. This increments %r26 and causes the following store to write to the wrong location. Note by Helge Deller: The patch applies cleanly to stable kernel series if this upstream commit is merged in advance: f4125cfdb300 ("parisc: Avoid trashing sr2 and sr3 in LWS code"). Signed-off-by: John David Anglin Tested-by: Christoph Biedl Fixes: 89206491201c ("parisc: Implement new LWS CAS supporting 64 bit operations.") Cc: stable@vger.kernel.org # 3.13+ Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 23de307c3052..41e60a9c7db2 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -742,7 +742,7 @@ lws_compare_and_swap_2: 10: ldd 0(%r25), %r25 11: ldd 0(%r24), %r24 #else - /* Load new value into r22/r23 - high/low */ + /* Load old value into r22/r23 - high/low */ 10: ldw 0(%r25), %r22 11: ldw 4(%r25), %r23 /* Load new value into fr4 for atomic store later */ @@ -834,11 +834,11 @@ cas2_action: copy %r0, %r28 #else /* Compare first word */ -19: ldw,ma 0(%r26), %r29 +19: ldw 0(%r26), %r29 sub,= %r29, %r22, %r0 b,n cas2_end /* Compare second word */ -20: ldw,ma 4(%r26), %r29 +20: ldw 4(%r26), %r29 sub,= %r29, %r23, %r0 b,n cas2_end /* Perform the store */ -- cgit v1.2.3-70-g09d2 From 2aae008ca6b3f5e978bea7aff265def4f2282f90 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 24 Sep 2017 10:28:09 -0700 Subject: parisc: Export __cmpxchg_u64 unconditionally __cmpxchg_u64 is built and used outside CONFIG_64BIT and thus needs to be exported. This fixes the following build error seen when building parisc:allmodconfig. ERROR: "__cmpxchg_u64" [drivers/net/ethernet/intel/i40e/i40e.ko] undefined! Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller --- arch/parisc/kernel/parisc_ksyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index c6d6272a934f..7baa2265d439 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -35,12 +35,12 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(__xchg8); EXPORT_SYMBOL(__xchg32); EXPORT_SYMBOL(__cmpxchg_u32); +EXPORT_SYMBOL(__cmpxchg_u64); #ifdef CONFIG_SMP EXPORT_SYMBOL(__atomic_hash); #endif #ifdef CONFIG_64BIT EXPORT_SYMBOL(__xchg64); -EXPORT_SYMBOL(__cmpxchg_u64); #endif #include -- cgit v1.2.3-70-g09d2 From f9cf2a64912d67c9cf49c316a0a0ada0ea7ed1da Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 18 Oct 2017 14:33:59 -0700 Subject: nvmet: synchronize sqhd update In testing target io in read write mix, we did indeed get into cases where sqhd didn't update properly and slowly missed enough updates to shutdown the queue. Protect the updating sqhd by using cmpxchg, and for that turn the sqhd field into a u32 so that cmpxchg works on it for all architectures. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 15 ++++++++++++--- drivers/nvme/target/nvmet.h | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1b208beeef50..645ba7eee35d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -387,12 +387,21 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { + u32 old_sqhd, new_sqhd; + u16 sqhd; + if (status) nvmet_set_status(req, status); - if (req->sq->size) - req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size; - req->rsp->sq_head = cpu_to_le16(req->sq->sqhd); + if (req->sq->size) { + do { + old_sqhd = req->sq->sqhd; + new_sqhd = (old_sqhd + 1) % req->sq->size; + } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != + old_sqhd); + } + sqhd = req->sq->sqhd & 0x0000FFFF; + req->rsp->sq_head = cpu_to_le16(sqhd); req->rsp->sq_id = cpu_to_le16(req->sq->qid); req->rsp->command_id = req->cmd->common.command_id; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7b8e20adf760..87e429bfcd8a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -74,7 +74,7 @@ struct nvmet_sq { struct percpu_ref ref; u16 qid; u16 size; - u16 sqhd; + u32 sqhd; struct completion free_done; struct completion confirm_done; }; -- cgit v1.2.3-70-g09d2 From 8642b31ba9eef8a01845146a26682d4869e62513 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 18 Oct 2017 22:25:00 +0200 Subject: parisc: Fix detection of nonsynchronous cr16 cycle counters For CPUs which have an unknown or invalid CPU location (physical location) assume that their cycle counters aren't syncronized across CPUs. Signed-off-by: Helge Deller Fixes: c8c3735997a3 ("parisc: Enhance detection of synchronous cr16 clocksources") Cc: stable@vger.kernel.org # 4.13+ Signed-off-by: Helge Deller --- arch/parisc/kernel/time.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 2d956aa0a38a..8c0105a49839 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -253,7 +253,10 @@ static int __init init_cr16_clocksource(void) cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; for_each_online_cpu(cpu) { - if (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc) + if (cpu == 0) + continue; + if ((cpu0_loc != 0) && + (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; clocksource_cr16.name = "cr16_unstable"; -- cgit v1.2.3-70-g09d2 From 1ac7db63333db1eeff901bfd6bbcd502b4634fa4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 17 Oct 2017 16:07:33 +0300 Subject: usb: hub: Allow reset retry for USB2 devices on connect bounce If the connect status change is set during reset signaling, but the status remains connected just retry port reset. This solves an issue with connecting a 90W HP Thunderbolt 3 dock with a Lenovo Carbon x1 (5th generation) which causes a 30min loop of a high speed device being re-discovererd before usb ports starts working. [...] [ 389.023845] usb 3-1: new high-speed USB device number 55 using xhci_hcd [ 389.491841] usb 3-1: new high-speed USB device number 56 using xhci_hcd [ 389.959928] usb 3-1: new high-speed USB device number 57 using xhci_hcd [...] This is caused by a high speed device that doesn't successfully go to the enabled state after the second port reset. Instead the connection bounces (connected, with connect status change), bailing out completely from enumeration just to restart from scratch. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1716332 Cc: Stable Signed-off-by: Mathias Nyman Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b5c733613823..e9ce6bb0b22d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2710,13 +2710,16 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1, if (!(portstatus & USB_PORT_STAT_CONNECTION)) return -ENOTCONN; - /* bomb out completely if the connection bounced. A USB 3.0 - * connection may bounce if multiple warm resets were issued, + /* Retry if connect change is set but status is still connected. + * A USB 3.0 connection may bounce if multiple warm resets were issued, * but the device may have successfully re-connected. Ignore it. */ if (!hub_is_superspeed(hub->hdev) && - (portchange & USB_PORT_STAT_C_CONNECTION)) - return -ENOTCONN; + (portchange & USB_PORT_STAT_C_CONNECTION)) { + usb_clear_port_feature(hub->hdev, port1, + USB_PORT_FEAT_C_CONNECTION); + return -EAGAIN; + } if (!(portstatus & USB_PORT_STAT_ENABLE)) return -EBUSY; -- cgit v1.2.3-70-g09d2 From 6afb10267c1692ada3a2903e31ea339917ad3ac0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Mon, 2 Oct 2017 12:00:54 +0800 Subject: pinctrl/amd: fix masking of GPIO interrupts On Asus laptop models X505BA, X505BP, X542BA and X542BP, the i2c-hid touchpad (using a GPIO for interrupts) becomes unresponsive after a few minutes of usage, or after placing two fingers on the touchpad, which seems to have the effect of queuing up a large amount of input data to be transferred. When the touchpad is in unresponsive state, we observed that the GPIO level-triggered interrupt is still at it's active level, however the pinctrl-amd driver is not receiving/dispatching more interrupts at this point. After the initial interrupt arrives, amd_gpio_irq_mask() is called however we then see amd_gpio_irq_handler() being called repeatedly for the same irq; the interrupt mask is not taking effect because of the following sequence of events: - amd_gpio_irq_handler fires, reads and caches pin reg - amd_gpio_irq_handler calls generic_handle_irq() - During IRQ handling, amd_gpio_irq_mask() is called and modifies pin reg - amd_gpio_irq_handler clears interrupt by writing cached value The stale cached value written at the final stage undoes the masking. Fix this by re-reading the register before clearing the interrupt. I also spotted that the interrupt-clearing code can race against amd_gpio_irq_mask() / amd_gpio_irq_unmask(), so add locking there. Presumably this race was leading to the loss of interrupts. After these changes, the touchpad appears to be working fine. Signed-off-by: Daniel Drake Acked-by: Shah, Nehal-bakulchandra Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 3f6b34febbf1..433af328d981 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -534,8 +534,16 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) continue; irq = irq_find_mapping(gc->irqdomain, irqnr + i); generic_handle_irq(irq); - /* Clear interrupt */ + + /* Clear interrupt. + * We must read the pin register again, in case the + * value was changed while executing + * generic_handle_irq() above. + */ + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + regval = readl(regs + i); writel(regval, regs + i); + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); ret = IRQ_HANDLED; } } -- cgit v1.2.3-70-g09d2 From 59861701076b0cfac0b5eaa67a380fff6ab85b9e Mon Sep 17 00:00:00 2001 From: Dmitry Mastykin Date: Wed, 18 Oct 2017 17:21:02 +0300 Subject: pinctrl: mcp23s08: fix interrupt handling regression interrupt handling was broken with conversion to using regmap caching. cached_gpio value was updated by boolean status instead of gpio reading. Fixes: 8f38910ba4f6 ("pinctrl: mcp23s08: switch to regmap caching") Tested-by: Phil Reid Signed-off-by: Dmitry Mastykin Reviewed-by: Sebastian Reichel Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-mcp23s08.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-mcp23s08.c b/drivers/pinctrl/pinctrl-mcp23s08.c index 3e40d4245512..9c950bbf07ba 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08.c +++ b/drivers/pinctrl/pinctrl-mcp23s08.c @@ -407,10 +407,10 @@ static int mcp23s08_get(struct gpio_chip *chip, unsigned offset) ret = mcp_read(mcp, MCP_GPIO, &status); if (ret < 0) status = 0; - else + else { + mcp->cached_gpio = status; status = !!(status & (1 << offset)); - - mcp->cached_gpio = status; + } mutex_unlock(&mcp->lock); return status; -- cgit v1.2.3-70-g09d2 From ad2302345d59d29232cb668baaae9e840925d153 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 06:59:15 +0000 Subject: can: flexcan: fix state transition regression Update state upon any interrupt to report correct state transitions in case the flexcan core enabled the broken error state quirk fix. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 13f0f219d8aa..df4bfb83024c 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -765,8 +765,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) flexcan_write(reg_esr & FLEXCAN_ESR_ALL_INT, ®s->esr); } - /* state change interrupt */ - if (reg_esr & FLEXCAN_ESR_ERR_STATE) + /* state change interrupt or broken error state quirk fix is enabled */ + if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE)) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ -- cgit v1.2.3-70-g09d2 From 2f8639b24b4f4f9dd6cf7c1f2aea90e2fcbcc451 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:01:23 +0000 Subject: can: flexcan: rename legacy error state quirk Rename FLEXCAN_QUIRK_BROKEN_ERR_STATE to FLEXCAN_QUIRK_BROKEN_WERR_STATE for better description of the missing [TR]WRN_INT quirk. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index df4bfb83024c..e163c55e737b 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -193,7 +193,7 @@ * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ -#define FLEXCAN_QUIRK_BROKEN_ERR_STATE BIT(1) /* [TR]WRN_INT not connected */ +#define FLEXCAN_QUIRK_BROKEN_WERR_STATE BIT(1) /* [TR]WRN_INT not connected */ #define FLEXCAN_QUIRK_DISABLE_RXFG BIT(2) /* Disable RX FIFO Global mask */ #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ @@ -281,7 +281,7 @@ struct flexcan_priv { }; static const struct flexcan_devtype_data fsl_p1010_devtype_data = { - .quirks = FLEXCAN_QUIRK_BROKEN_ERR_STATE, + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, }; static const struct flexcan_devtype_data fsl_imx28_devtype_data; @@ -767,7 +767,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* state change interrupt or broken error state quirk fix is enabled */ if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || - (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE)) + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE)) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ @@ -888,7 +888,7 @@ static int flexcan_chip_start(struct net_device *dev) * on most Flexcan cores, too. Otherwise we don't get * any error warning or passive interrupts. */ - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_ERR_STATE || + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE || priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) reg_ctrl |= FLEXCAN_CTRL_ERR_MSK; else -- cgit v1.2.3-70-g09d2 From da49a8075c00b4d98ef069a0ee201177a8b79ead Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:03:58 +0000 Subject: can: flexcan: implement error passive state quirk Add FLEXCAN_QUIRK_BROKEN_PERR_STATE for better description of the missing error passive interrupt quirk. Error interrupt flooding may happen if the broken error state quirk fix is enabled. For example, in case there is singled out node on the bus and the node sends a frame, then error interrupt flooding happens and will not stop because the node cannot go to bus off. The flooding will stop after another node connected to the bus again. If high bitrate configured on the low end system, then the flooding may causes performance issue, hence, this patch mitigates this by: 1. disable error interrupt upon error passive state transition 2. re-enable error interrupt upon error warning state transition 3. disable/enable error interrupt upon error active state transition depends on FLEXCAN_QUIRK_BROKEN_WERR_STATE In this way, the driver is still able to report correct state transitions without additional latency. When there are bus problems, flooding of error interrupts is limited to the number of frames required to change state from error warning to error passive if the core has [TR]WRN_INT connected (FLEXCAN_QUIRK_BROKEN_WERR_STATE is not enabled), otherwise, the flooding is limited to the number of frames required to change state from error active to error passive. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 75 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index e163c55e737b..c83a09fa4166 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -182,14 +182,14 @@ /* FLEXCAN hardware feature flags * * Below is some version info we got: - * SOC Version IP-Version Glitch- [TR]WRN_INT Memory err RTR re- - * Filter? connected? detection ception in MB - * MX25 FlexCAN2 03.00.00.00 no no no no - * MX28 FlexCAN2 03.00.04.00 yes yes no no - * MX35 FlexCAN2 03.00.00.00 no no no no - * MX53 FlexCAN2 03.00.00.00 yes no no no - * MX6s FlexCAN3 10.00.12.00 yes yes no yes - * VF610 FlexCAN3 ? no yes yes yes? + * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR re- + * Filter? connected? Passive detection ception in MB + * MX25 FlexCAN2 03.00.00.00 no no ? no no + * MX28 FlexCAN2 03.00.04.00 yes yes no no no + * MX35 FlexCAN2 03.00.00.00 no no ? no no + * MX53 FlexCAN2 03.00.00.00 yes no no no no + * MX6s FlexCAN3 10.00.12.00 yes yes no no yes + * VF610 FlexCAN3 ? no yes ? yes yes? * * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected. */ @@ -198,6 +198,7 @@ #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS BIT(3) /* Enable EACEN and RRS bit in ctrl2 */ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ +#define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */ /* Structure of the message buffer */ struct flexcan_mb { @@ -335,6 +336,22 @@ static inline void flexcan_write(u32 val, void __iomem *addr) } #endif +static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->regs; + u32 reg_ctrl = (priv->reg_ctrl_default | FLEXCAN_CTRL_ERR_MSK); + + flexcan_write(reg_ctrl, ®s->ctrl); +} + +static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->regs; + u32 reg_ctrl = (priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_MSK); + + flexcan_write(reg_ctrl, ®s->ctrl); +} + static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv) { if (!priv->reg_xceiver) @@ -713,6 +730,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) struct flexcan_regs __iomem *regs = priv->regs; irqreturn_t handled = IRQ_NONE; u32 reg_iflag1, reg_esr; + enum can_state last_state = priv->can.state; reg_iflag1 = flexcan_read(®s->iflag1); @@ -767,7 +785,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* state change interrupt or broken error state quirk fix is enabled */ if ((reg_esr & FLEXCAN_ESR_ERR_STATE) || - (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_WERR_STATE)) + (priv->devtype_data->quirks & (FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE))) flexcan_irq_state(dev, reg_esr); /* bus error IRQ - handle if bus error reporting is activated */ @@ -775,6 +794,44 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) flexcan_irq_bus_err(dev, reg_esr); + /* availability of error interrupt among state transitions in case + * bus error reporting is de-activated and + * FLEXCAN_QUIRK_BROKEN_PERR_STATE is enabled: + * +--------------------------------------------------------------+ + * | +----------------------------------------------+ [stopped / | + * | | | sleeping] -+ + * +-+-> active <-> warning <-> passive -> bus off -+ + * ___________^^^^^^^^^^^^_______________________________ + * disabled(1) enabled disabled + * + * (1): enabled if FLEXCAN_QUIRK_BROKEN_WERR_STATE is enabled + */ + if ((last_state != priv->can.state) && + (priv->devtype_data->quirks & FLEXCAN_QUIRK_BROKEN_PERR_STATE) && + !(priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)) { + switch (priv->can.state) { + case CAN_STATE_ERROR_ACTIVE: + if (priv->devtype_data->quirks & + FLEXCAN_QUIRK_BROKEN_WERR_STATE) + flexcan_error_irq_enable(priv); + else + flexcan_error_irq_disable(priv); + break; + + case CAN_STATE_ERROR_WARNING: + flexcan_error_irq_enable(priv); + break; + + case CAN_STATE_ERROR_PASSIVE: + case CAN_STATE_BUS_OFF: + flexcan_error_irq_disable(priv); + break; + + default: + break; + } + } + return handled; } -- cgit v1.2.3-70-g09d2 From cf9c04677f2bf599b44511963039ec6e25583feb Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:05:50 +0000 Subject: can: flexcan: fix i.MX6 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for i.MX6 to report correct state transitions. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index c83a09fa4166..d6ad12744ff1 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -289,7 +289,7 @@ static const struct flexcan_devtype_data fsl_imx28_devtype_data; static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct flexcan_devtype_data fsl_vf610_devtype_data = { -- cgit v1.2.3-70-g09d2 From 083c5571290a2d4308b75f1a59cf376b6e907808 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:08:23 +0000 Subject: can: flexcan: fix i.MX28 state transition issue Enable FLEXCAN_QUIRK_BROKEN_PERR_STATE for i.MX28 to report correct state transitions, especially to error passive. Signed-off-by: Wolfgang Grandegger Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d6ad12744ff1..ed544c44848f 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -285,7 +285,9 @@ static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, }; -static const struct flexcan_devtype_data fsl_imx28_devtype_data; +static const struct flexcan_devtype_data fsl_imx28_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE, +}; static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | -- cgit v1.2.3-70-g09d2 From fb5b91d61bebc24686ffc379138fd67808b1a1e6 Mon Sep 17 00:00:00 2001 From: "ZHU Yi (ST-FIR/ENG1-Zhu)" Date: Fri, 15 Sep 2017 07:09:37 +0000 Subject: can: flexcan: fix p1010 state transition issue Enable FLEXCAN_QUIRK_BROKEN_WERR_STATE and FLEXCAN_QUIRK_BROKEN_PERR_STATE for p1010 to report correct state transitions. Signed-off-by: Zhu Yi Signed-off-by: Mark Jonas Acked-by: Wolfgang Grandegger Cc: linux-stable # >= v4.11 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index ed544c44848f..a13a4896a8bd 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -282,7 +282,8 @@ struct flexcan_priv { }; static const struct flexcan_devtype_data fsl_p1010_devtype_data = { - .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE, + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; static const struct flexcan_devtype_data fsl_imx28_devtype_data = { -- cgit v1.2.3-70-g09d2 From 62c04647c6f44fa3d5d0c077133da0aa1cbbc34c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 16:02:35 +0100 Subject: can: bcm: check for null sk before deferencing it via the call to sock_net The assignment of net via call sock_net will dereference sk. This is performed before a sanity null check on sk, so there could be a potential null dereference on the sock_net call if sk is null. Fix this by assigning net after the sk null check. Also replace the sk == NULL with the more usual !sk idiom. Detected by CoverityScan CID#1431862 ("Dereference before null check") Fixes: 384317ef4187 ("can: network namespace support for CAN_BCM protocol") Signed-off-by: Colin Ian King Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 47a8748d953a..13690334efa3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk) static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; - if (sk == NULL) + if (!sk) return 0; + net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ -- cgit v1.2.3-70-g09d2 From cae1d5b78fb4874086170ad07921bca59ea2e893 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 17 Oct 2017 07:18:35 +0200 Subject: can: af_can: do not access proto_tab directly use rcu_access_pointer instead "proto_tab" is a RCU protected array, when directly accessing the array, sparse throws these warnings: CHECK /srv/work/frogger/socketcan/linux/net/can/af_can.c net/can/af_can.c:115:14: error: incompatible types in comparison expression (different address spaces) net/can/af_can.c:795:17: error: incompatible types in comparison expression (different address spaces) net/can/af_can.c:816:9: error: incompatible types in comparison expression (different address spaces) This patch fixes the problem by using rcu_access_pointer() and annotating "proto_tab" array as __rcu. Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 88edac0f3e36..eb1ad74b40f4 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); static struct kmem_cache *rcv_cache __read_mostly; /* table of registered CAN protocols */ -static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; +static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly; static DEFINE_MUTEX(proto_tab_lock); static atomic_t skbcounter = ATOMIC_INIT(0); @@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp) mutex_lock(&proto_tab_lock); - if (proto_tab[proto]) { + if (rcu_access_pointer(proto_tab[proto])) { pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; } else @@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp) int proto = cp->protocol; mutex_lock(&proto_tab_lock); - BUG_ON(proto_tab[proto] != cp); + BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp); RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); -- cgit v1.2.3-70-g09d2 From 5a606223c6b5b7560da253ed52e62c67fa18e29b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 29 Jul 2017 11:51:01 +0200 Subject: can: af_can: can_pernet_init(): add missing error handling for kzalloc returning NULL This patch adds the missing check and error handling for out-of-memory situations, when kzalloc cannot allocate memory. Fixes: cb5635a36776 ("can: complete initial namespace support") Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index eb1ad74b40f4..ecd5c703d11e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -875,9 +875,14 @@ static int can_pernet_init(struct net *net) spin_lock_init(&net->can.can_rcvlists_lock); net->can.can_rx_alldev_list = kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); - + if (!net->can.can_rx_alldev_list) + goto out; net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); + if (!net->can.can_stats) + goto out_free_alldev_list; net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); + if (!net->can.can_pstats) + goto out_free_can_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ @@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net) } return 0; + + out_free_can_stats: + kfree(net->can.can_stats); + out_free_alldev_list: + kfree(net->can.can_rx_alldev_list); + out: + return -ENOMEM; } static void can_pernet_exit(struct net *net) -- cgit v1.2.3-70-g09d2 From 72d92e865d1560723e1957ee3f393688c49ca5bf Mon Sep 17 00:00:00 2001 From: Stefan Mätje Date: Wed, 18 Oct 2017 13:25:17 +0200 Subject: can: esd_usb2: Fix can_dlc value for received RTR, frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dlc member of the struct rx_msg contains also the ESD_RTR flag to mark received RTR frames. Without the fix the can_dlc value for received RTR frames would always be set to 8 by get_can_dlc() instead of the received value. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Signed-off-by: Stefan Mätje Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index be928ce62d32..9fdb0f0bfa06 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -333,7 +333,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv, } cf->can_id = id & ESD_IDMASK; - cf->can_dlc = get_can_dlc(msg->msg.rx.dlc); + cf->can_dlc = get_can_dlc(msg->msg.rx.dlc & ~ESD_RTR); if (id & ESD_EXTID) cf->can_id |= CAN_EFF_FLAG; -- cgit v1.2.3-70-g09d2 From 97819f943063b622eca44d3644067c190dc75039 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Thu, 14 Sep 2017 18:37:14 +0200 Subject: can: gs_usb: fix busy loop if no more TX context is available If sending messages with no cable connected, it quickly happens that there is no more TX context available. Then "gs_can_start_xmit()" returns with "NETDEV_TX_BUSY" and the upper layer does retry immediately keeping the CPU busy. To fix that issue, I moved "atomic_dec(&dev->active_tx_urbs)" from "gs_usb_xmit_callback()" to the TX done handling in "gs_usb_receive_bulk_callback()". Renaming "active_tx_urbs" to "active_tx_contexts" and moving it into "gs_[alloc|free]_tx_context()" would also make sense. Signed-off-by: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index afcc1312dbaf..68ac3e88a8ce 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -375,6 +375,8 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) gs_free_tx_context(txc); + atomic_dec(&dev->active_tx_urbs); + netif_wake_queue(netdev); } @@ -463,14 +465,6 @@ static void gs_usb_xmit_callback(struct urb *urb) urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); - - atomic_dec(&dev->active_tx_urbs); - - if (!netif_device_present(netdev)) - return; - - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, -- cgit v1.2.3-70-g09d2 From cd7aea1875c54c69a54a333b75e9d8732503f273 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:03 +0000 Subject: net: ena: reduce the severity of some printouts Decrease log level of checksum errors as these messages can be triggered remotely by bad packets. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index f7dc22f65d9f..7040b9052747 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -966,7 +966,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring, u64_stats_update_begin(&rx_ring->syncp); rx_ring->rx_stats.bad_csum++; u64_stats_update_end(&rx_ring->syncp); - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "RX IPv4 header checksum error\n"); return; } @@ -979,7 +979,7 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring, u64_stats_update_begin(&rx_ring->syncp); rx_ring->rx_stats.bad_csum++; u64_stats_update_end(&rx_ring->syncp); - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "RX L4 checksum error\n"); skb->ip_summed = CHECKSUM_NONE; return; -- cgit v1.2.3-70-g09d2 From 411838e7b41c52cf4afa51929cec54c2162472ff Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:04 +0000 Subject: net: ena: fix rare kernel crash when bar memory remap fails This failure is rare and only found on testing where deliberately fail devm_ioremap() [ 451.170464] ena 0000:04:00.0: failed to remap regs bar 451.170549] Workqueue: pciehp-1 pciehp_power_thread [ 451.170551] task: ffff88085a5f2d00 task.stack: ffffc9000756c000 [ 451.170552] RIP: 0010:devm_iounmap+0x2d/0x40 [ 451.170553] RSP: 0018:ffffc9000756fac0 EFLAGS: 00010282 [ 451.170554] RAX: 00000000fffffffe RBX: 0000000000000000 RCX: 0000000000000000 [ 451.170555] RDX: ffffffff813a7e00 RSI: 0000000000000282 RDI: 0000000000000282 [ 451.170556] RBP: ffffc9000756fac8 R08: 00000000fffffffe R09: 00000000000009b7 [ 451.170557] R10: 0000000000000005 R11: 00000000000009b6 R12: ffff880856c9d0a0 [ 451.170558] R13: ffffc9000f5c90c0 R14: ffff880856c9d0a0 R15: 0000000000000028 [ 451.170559] FS: 0000000000000000(0000) GS:ffff88085f400000(0000) knlGS:0000000000000000 [ 451.170560] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 451.170561] CR2: 00007f169038b000 CR3: 0000000001c09000 CR4: 00000000003406f0 [ 451.170562] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 451.170562] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 451.170563] Call Trace: [ 451.170572] ena_release_bars.isra.48+0x34/0x60 [ena] [ 451.170574] ena_probe+0x144/0xd90 [ena] [ 451.170579] ? ida_simple_get+0x98/0x100 [ 451.170585] ? kernfs_next_descendant_post+0x40/0x50 [ 451.170591] local_pci_probe+0x45/0xa0 [ 451.170592] pci_device_probe+0x157/0x180 [ 451.170599] driver_probe_device+0x2a8/0x460 [ 451.170600] __device_attach_driver+0x7e/0xe0 [ 451.170602] ? driver_allows_async_probing+0x30/0x30 [ 451.170603] bus_for_each_drv+0x68/0xb0 [ 451.170605] __device_attach+0xdd/0x160 [ 451.170607] device_attach+0x10/0x20 [ 451.170610] pci_bus_add_device+0x4f/0xa0 [ 451.170611] pci_bus_add_devices+0x39/0x70 [ 451.170613] pciehp_configure_device+0x96/0x120 [ 451.170614] pciehp_enable_slot+0x1b3/0x290 [ 451.170616] pciehp_power_thread+0x3b/0xb0 [ 451.170622] process_one_work+0x149/0x360 [ 451.170623] worker_thread+0x4d/0x3c0 [ 451.170626] kthread+0x109/0x140 [ 451.170627] ? rescuer_thread+0x380/0x380 [ 451.170628] ? kthread_park+0x60/0x60 [ 451.170632] ret_from_fork+0x25/0x30 Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7040b9052747..c6bd5e24005d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3064,7 +3064,8 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) if (ena_dev->mem_bar) devm_iounmap(&pdev->dev, ena_dev->mem_bar); - devm_iounmap(&pdev->dev, ena_dev->reg_bar); + if (ena_dev->reg_bar) + devm_iounmap(&pdev->dev, ena_dev->reg_bar); release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; pci_release_selected_regions(pdev, release_bars); -- cgit v1.2.3-70-g09d2 From a59df396768a7e37c6ddafeb9666a30c8ac07854 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Tue, 17 Oct 2017 07:33:05 +0000 Subject: net: ena: fix wrong max Tx/Rx queues on ethtool ethtool ena_get_channels() expose the max number of queues as the max number of queues ENA supports (128 queues) and not the actual number of created queues. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index b1212debc2e1..967020fb26ee 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -742,8 +742,8 @@ static void ena_get_channels(struct net_device *netdev, { struct ena_adapter *adapter = netdev_priv(netdev); - channels->max_rx = ENA_MAX_NUM_IO_QUEUES; - channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_rx = adapter->num_queues; + channels->max_tx = adapter->num_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->num_queues; -- cgit v1.2.3-70-g09d2 From 0ea7eeec24be5f04ae80d68f5b1ea3a11f49de2f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:52 +0200 Subject: mm, percpu: add support for __GFP_NOWARN flag Add an option for pcpu_alloc() to support __GFP_NOWARN flag. Currently, we always throw a warning when size or alignment is unsupported (and also dump stack on failed allocation requests). The warning itself is harmless since we return NULL anyway for any failed request, which callers are required to handle anyway. However, it becomes harmful when panic_on_warn is set. The rationale for the WARN() in pcpu_alloc() is that it can be tracked when larger than supported allocation requests are made such that allocations limits can be tweaked if warranted. This makes sense for in-kernel users, however, there are users of pcpu allocator where allocation size is derived from user space requests, e.g. when creating BPF maps. In these cases, the requests should fail gracefully without throwing a splat. The current work-around was to check allocation size against the upper limit of PCPU_MIN_UNIT_SIZE from call-sites for bailing out prior to a call to pcpu_alloc() in order to avoid throwing the WARN(). This is bad in multiple ways since PCPU_MIN_UNIT_SIZE is an implementation detail, and having the checks on call-sites only complicates the code for no good reason. Thus, lets fix it generically by supporting the __GFP_NOWARN flag that users can then use with calling the __alloc_percpu_gfp() helper instead. Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: Mark Rutland Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- mm/percpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index aa121cef76de..a0e0c82c1e4c 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1329,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @gfp: allocation flags * * Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't - * contain %GFP_KERNEL, the allocation is atomic. + * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN + * then no warning will be triggered on invalid or failed allocation + * requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. @@ -1337,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; + bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; - bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; int slot, off, cpu, ret; unsigned long flags; void __percpu *ptr; @@ -1361,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || !is_power_of_2(align))) { - WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n", + WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n", size, align); return NULL; } @@ -1482,7 +1485,7 @@ fail_unlock: fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (!is_atomic && warn_limit) { + if (!is_atomic && do_warn && warn_limit) { pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", size, align, is_atomic, err); dump_stack(); @@ -1507,7 +1510,9 @@ fail: * * Allocate zero-filled percpu area of @size bytes aligned at @align. If * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can - * be called from any context but is a lot more likely to fail. + * be called from any context but is a lot more likely to fail. If @gfp + * has __GFP_NOWARN then no warning will be triggered on invalid or failed + * allocation requests. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. -- cgit v1.2.3-70-g09d2 From 82f8dd28bd3abe181b7a66ea4ea132134d37a400 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:53 +0200 Subject: bpf: fix splat for illegal devmap percpu allocation It was reported that syzkaller was able to trigger a splat on devmap percpu allocation due to illegal/unsupported allocation request size passed to __alloc_percpu(): [ 70.094249] illegal size (32776) or align (8) for percpu allocation [ 70.094256] ------------[ cut here ]------------ [ 70.094259] WARNING: CPU: 3 PID: 3451 at mm/percpu.c:1365 pcpu_alloc+0x96/0x630 [...] [ 70.094325] Call Trace: [ 70.094328] __alloc_percpu_gfp+0x12/0x20 [ 70.094330] dev_map_alloc+0x134/0x1e0 [ 70.094331] SyS_bpf+0x9bc/0x1610 [ 70.094333] ? selinux_task_setrlimit+0x5a/0x60 [ 70.094334] ? security_task_setrlimit+0x43/0x60 [ 70.094336] entry_SYSCALL_64_fastpath+0x1a/0xa5 This was due to too large max_entries for the map such that we surpassed the upper limit of PCPU_MIN_UNIT_SIZE. It's fine to fail naturally here, so switch to __alloc_percpu_gfp() and pass __GFP_NOWARN instead. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Reported-by: Mark Rutland Reported-by: Shankara Pailoor Reported-by: Richard Weinberger Signed-off-by: Daniel Borkmann Cc: John Fastabend Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..920428d84da2 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -111,8 +111,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) err = -ENOMEM; /* A per cpu bitfield with a bit per possible net device */ - dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), - __alignof__(unsigned long)); + dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), + __alignof__(unsigned long), + GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) goto free_dtab; -- cgit v1.2.3-70-g09d2 From bc6d5031b43a2291de638ab9304320b4cae61689 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Oct 2017 16:55:54 +0200 Subject: bpf: do not test for PCPU_MIN_UNIT_SIZE before percpu allocations PCPU_MIN_UNIT_SIZE is an implementation detail of the percpu allocator. Given we support __GFP_NOWARN now, lets just let the allocation request fail naturally instead. The two call sites from BPF mistakenly assumed __GFP_NOWARN would work, so no changes needed to their actual __alloc_percpu_gfp() calls which use the flag already. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/arraymap.c | 2 +- kernel/bpf/hashtab.c | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..e2636737b69b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); if (array_size >= U32_MAX - PAGE_SIZE || - elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { + bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..6533f08d1238 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ goto free_htab; - if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) - /* make sure the size for pcpu_alloc() is reasonable */ - goto free_htab; - htab->elem_size = sizeof(struct htab_elem) + round_up(htab->map.key_size, 8); if (percpu) -- cgit v1.2.3-70-g09d2 From df80cd9b28b9ebaa284a41df611dbf3a2d05ca74 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 17 Oct 2017 23:26:10 +0800 Subject: sctp: do not peel off an assoc from one netns to another one Now when peeling off an association to the sock in another netns, all transports in this assoc are not to be rehashed and keep use the old key in hashtable. As a transport uses sk->net as the hash key to insert into hashtable, it would miss removing these transports from hashtable due to the new netns when closing the sock and all transports are being freeed, then later an use-after-free issue could be caused when looking up an asoc and dereferencing those transports. This is a very old issue since very beginning, ChunYu found it with syzkaller fuzz testing with this series: socket$inet6_sctp() bind$inet6() sendto$inet6() unshare(0x40000000) getsockopt$inet_sctp6_SCTP_GET_ASSOC_ID_LIST() getsockopt$inet_sctp6_SCTP_SOCKOPT_PEELOFF() This patch is to block this call when peeling one assoc off from one netns to another one, so that the netns of all transport would not go out-sync with the key in hashtable. Note that this patch didn't fix it by rehashing transports, as it's difficult to handle the situation when the tuple is already in use in the new netns. Besides, no one would like to peel off one assoc to another netns, considering ipaddrs, ifaces, etc. are usually different. Reported-by: ChunYu Wang Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d4730ada7f32..17841ab30798 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4906,6 +4906,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) struct socket *sock; int err = 0; + /* Do not peel off from one netns to another one. */ + if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) + return -EINVAL; + if (!asoc) return -EINVAL; -- cgit v1.2.3-70-g09d2 From b3885bd6edb41b91a0e3976469f72ae31bfb8d95 Mon Sep 17 00:00:00 2001 From: Hirofumi Nakagawa Date: Tue, 26 Sep 2017 03:09:53 +0900 Subject: ovl: add NULL check in ovl_alloc_inode This was detected by fault injection test Signed-off-by: Hirofumi Nakagawa Fixes: 13cf199d0088 ("ovl: allocate an ovl_inode struct") Cc: # v4.13 --- fs/overlayfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 092d150643c1..f5738e96a052 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -174,6 +174,9 @@ static struct inode *ovl_alloc_inode(struct super_block *sb) { struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); + if (!oi) + return NULL; + oi->cache = NULL; oi->redirect = NULL; oi->version = 0; -- cgit v1.2.3-70-g09d2 From 0ce5cdc9d79277e55c3d80bf7d2b1adea2752078 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Sep 2017 23:45:18 +0300 Subject: ovl: Return -ENOMEM if an allocation fails ovl_lookup() The error code is missing here so it means we return ERR_PTR(0) or NULL. The other error paths all return an error code so this probably should as well. Fixes: 02b69b284cd7 ("ovl: lookup redirects") Signed-off-by: Dan Carpenter Reviewed-by: Chandan Rajendra Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 654bea1a5ac9..e08164156cfe 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -635,6 +635,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, } if (d.redirect) { + err = -ENOMEM; upperredirect = kstrdup(d.redirect, GFP_KERNEL); if (!upperredirect) goto out_put_upper; -- cgit v1.2.3-70-g09d2 From bd9f07590a17f3158b51fb869dca723f1f606bdc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 16:00:30 +0300 Subject: nvme-rdma: Fix possible double free in reconnect flow The fact that we free the async event buffer in nvme_rdma_destroy_admin_queue can cause us to free it more than once because this happens in every reconnect attempt since commit 31fdf1840170. we rely on the queue state flags DELETING to avoid this for other resources. A more complete fix is to not destroy the admin/io queues unconditionally on every reconnect attempt, but its a bit more extensive and will go in the next release. Fixes: 31fdf1840170 ("nvme-rdma: reuse configure/destroy_admin_queue") Reported-by: Yi Zhang Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 92a03ff5fb4d..4dbee893a047 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -571,6 +571,12 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; + if (nvme_rdma_queue_idx(queue) == 0) { + nvme_rdma_free_qe(queue->device->dev, + &queue->ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + } + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } @@ -739,8 +745,6 @@ out: static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); -- cgit v1.2.3-70-g09d2 From f04b9cc87b5fc466b1b7231ba7b078e885956c5b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 19 Oct 2017 18:10:53 +0300 Subject: nvme-rdma: Fix error status return in tagset allocation failure We should make sure to escelate allocation failures to prevent a use-after-free in nvmf_create_ctrl. Fixes: b28a308ee777 ("nvme-rdma: move tagset allocation to a dedicated routine") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4dbee893a047..87bac27ec64b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -769,8 +769,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); - if (IS_ERR(ctrl->ctrl.admin_tagset)) + if (IS_ERR(ctrl->ctrl.admin_tagset)) { + error = PTR_ERR(ctrl->ctrl.admin_tagset); goto out_free_queue; + } ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { @@ -850,8 +852,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) if (new) { ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); - if (IS_ERR(ctrl->ctrl.tagset)) + if (IS_ERR(ctrl->ctrl.tagset)) { + ret = PTR_ERR(ctrl->ctrl.tagset); goto out_free_io_queues; + } ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); if (IS_ERR(ctrl->ctrl.connect_q)) { -- cgit v1.2.3-70-g09d2 From 8633e4f2e94a4d12cfb413fedfe4c072a6b99a79 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 16 Oct 2017 16:26:07 +0200 Subject: ARM: dts: fix PCLK name on Gemini and MOXA ART These platforms provide a clock to their watchdog, in each case this is the peripheral clock (PCLK), so explicitly name the clock in the device tree. Take this opportunity to add the "faraday,ftwdt010" compatible as fallback to the watchdog IP blocks. Cc: Jonas Jensen Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/gemini.dtsi | 3 ++- arch/arm/boot/dts/moxart.dtsi | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/gemini.dtsi b/arch/arm/boot/dts/gemini.dtsi index c68e8d430234..f0d178c77153 100644 --- a/arch/arm/boot/dts/gemini.dtsi +++ b/arch/arm/boot/dts/gemini.dtsi @@ -145,11 +145,12 @@ }; watchdog@41000000 { - compatible = "cortina,gemini-watchdog"; + compatible = "cortina,gemini-watchdog", "faraday,ftwdt010"; reg = <0x41000000 0x1000>; interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; resets = <&syscon GEMINI_RESET_WDOG>; clocks = <&syscon GEMINI_CLK_APB>; + clock-names = "PCLK"; }; uart0: serial@42000000 { diff --git a/arch/arm/boot/dts/moxart.dtsi b/arch/arm/boot/dts/moxart.dtsi index 1f4c795d3f72..da7b3237bfe9 100644 --- a/arch/arm/boot/dts/moxart.dtsi +++ b/arch/arm/boot/dts/moxart.dtsi @@ -87,9 +87,10 @@ }; watchdog: watchdog@98500000 { - compatible = "moxa,moxart-watchdog"; + compatible = "moxa,moxart-watchdog", "faraday,ftwdt010"; reg = <0x98500000 0x10>; clocks = <&clk_apb>; + clock-names = "PCLK"; }; sdhci: sdhci@98e00000 { -- cgit v1.2.3-70-g09d2 From 72ecd793865f91c3cdeb06884f230f38f434c67c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 6 Oct 2017 06:20:25 +0200 Subject: ARM: ux500: Fix regression while init PM domains The commit afece3ab9a36 ("PM / Domains: Add time accounting to various genpd states") causes a boot regression for ux500. The problem occurs when the ux500 machine code calls pm_genpd_init(), which since the above change triggers a call to ktime_get(). More precisely, because ux500 initializes PM domains in the init_IRQ() phase of the boot, timekeeping has not yet been initialized. Fix the problem by moving the initialization of the PM domains to after timekeeping has been initialized. Fixes: afece3ab9a36 ("PM / Domains: Add time accounting to various genpd..") Cc: Thara Gopinath Cc: "Rafael J. Wysocki" Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/mach-ux500/cpu-db8500.c | 4 ++++ arch/arm/mach-ux500/pm.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 71a34e8c345a..57058ac46f49 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -32,6 +32,7 @@ #include #include "db8500-regs.h" +#include "pm_domains.h" static int __init ux500_l2x0_unlock(void) { @@ -157,6 +158,9 @@ static const struct of_device_id u8500_local_bus_nodes[] = { static void __init u8500_init_machine(void) { + /* Initialize ux500 power domains */ + ux500_pm_domains_init(); + /* automatically probe child nodes of dbx5x0 devices */ if (of_machine_is_compatible("st-ericsson,u8540")) of_platform_populate(NULL, u8500_local_bus_nodes, diff --git a/arch/arm/mach-ux500/pm.c b/arch/arm/mach-ux500/pm.c index a970e7fcba9e..f6c33a0c1c61 100644 --- a/arch/arm/mach-ux500/pm.c +++ b/arch/arm/mach-ux500/pm.c @@ -19,7 +19,6 @@ #include #include "db8500-regs.h" -#include "pm_domains.h" /* ARM WFI Standby signal register */ #define PRCM_ARM_WFI_STANDBY (prcmu_base + 0x130) @@ -203,7 +202,4 @@ void __init ux500_pm_init(u32 phy_base, u32 size) /* Set up ux500 suspend callbacks. */ suspend_set_ops(UX500_SUSPEND_OPS); - - /* Initialize ux500 power domains */ - ux500_pm_domains_init(); } -- cgit v1.2.3-70-g09d2 From 528fd3547bad0bdd31c8f987e5bd00c83df8af39 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Oct 2017 12:13:10 -0400 Subject: SUNRPC: Destroy transport from the system workqueue The transport may need to flush transport connect and receive tasks that are running on rpciod. In order to do so safely, we need to ensure that the caller of cancel_work_sync() etc is not itself running on rpciod. Do so by running the destroy task from the system workqueue. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1a39ad14c42f..898485e3ece4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1445,6 +1445,23 @@ out: return xprt; } +static void xprt_destroy_cb(struct work_struct *work) +{ + struct rpc_xprt *xprt = + container_of(work, struct rpc_xprt, task_cleanup); + + rpc_xprt_debugfs_unregister(xprt); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->backlog); + kfree(xprt->servername); + /* + * Tear down transport state and free the rpc_xprt + */ + xprt->ops->destroy(xprt); +} + /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy @@ -1454,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - /* Exclude transport connect/disconnect handlers */ + /* + * Exclude transport connect/disconnect handlers and autoclose + */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); del_timer_sync(&xprt->timer); - rpc_xprt_debugfs_unregister(xprt); - rpc_destroy_wait_queue(&xprt->binding); - rpc_destroy_wait_queue(&xprt->pending); - rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->backlog); - cancel_work_sync(&xprt->task_cleanup); - kfree(xprt->servername); /* - * Tear down transport state and free the rpc_xprt + * Destroy sockets etc from the system workqueue so they can + * safely flush receive work running on rpciod. */ - xprt->ops->destroy(xprt); + INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb); + schedule_work(&xprt->task_cleanup); } static void xprt_destroy_kref(struct kref *kref) -- cgit v1.2.3-70-g09d2 From 8f75bc3377fa6f2af16383cc8346abd81909353f Mon Sep 17 00:00:00 2001 From: Damien Riegel Date: Thu, 19 Oct 2017 15:34:55 -0700 Subject: Input: tca8418 - enable interrupt after it has been requested Currently, enabling keypad interrupts is one of the first operations done on the keypad, even before the interrupt is requested, so there is a small time window where the keypad can fire interrupts but the driver is not yet ready to handle them. It's fine for level interrupts because they will be handled anyway, but not so much for edge ones. This commit modifies and moves the function in charge of configuring the keypad. Enabling interrupts is now the last thing done on the keypad, and after the interrupt has been requested by the driver. Writing to the config register was also used to determine if the device was indeed present on the bus or not, this has been replaced by reading the lock/event count register to keep the same functionality. Signed-off-by: Damien Riegel Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tca8418_keypad.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c index e37e335e406f..6da607d3b811 100644 --- a/drivers/input/keyboard/tca8418_keypad.c +++ b/drivers/input/keyboard/tca8418_keypad.c @@ -234,14 +234,7 @@ static irqreturn_t tca8418_irq_handler(int irq, void *dev_id) static int tca8418_configure(struct tca8418_keypad *keypad_data, u32 rows, u32 cols) { - int reg, error; - - /* Write config register, if this fails assume device not present */ - error = tca8418_write_byte(keypad_data, REG_CFG, - CFG_INT_CFG | CFG_OVR_FLOW_IEN | CFG_KE_IEN); - if (error < 0) - return -ENODEV; - + int reg, error = 0; /* Assemble a mask for row and column registers */ reg = ~(~0 << rows); @@ -257,6 +250,12 @@ static int tca8418_configure(struct tca8418_keypad *keypad_data, error |= tca8418_write_byte(keypad_data, REG_DEBOUNCE_DIS2, reg >> 8); error |= tca8418_write_byte(keypad_data, REG_DEBOUNCE_DIS3, reg >> 16); + if (error) + return error; + + error = tca8418_write_byte(keypad_data, REG_CFG, + CFG_INT_CFG | CFG_OVR_FLOW_IEN | CFG_KE_IEN); + return error; } @@ -268,6 +267,7 @@ static int tca8418_keypad_probe(struct i2c_client *client, struct input_dev *input; u32 rows = 0, cols = 0; int error, row_shift, max_keys; + u8 reg; /* Check i2c driver capabilities */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) { @@ -301,10 +301,10 @@ static int tca8418_keypad_probe(struct i2c_client *client, keypad_data->client = client; keypad_data->row_shift = row_shift; - /* Initialize the chip or fail if chip isn't present */ - error = tca8418_configure(keypad_data, rows, cols); - if (error < 0) - return error; + /* Read key lock register, if this fails assume device not present */ + error = tca8418_read_byte(keypad_data, REG_KEY_LCK_EC, ®); + if (error) + return -ENODEV; /* Configure input device */ input = devm_input_allocate_device(dev); @@ -340,6 +340,11 @@ static int tca8418_keypad_probe(struct i2c_client *client, return error; } + /* Initialize the chip */ + error = tca8418_configure(keypad_data, rows, cols); + if (error < 0) + return error; + error = input_register_device(input); if (error) { dev_err(dev, "Unable to register input device, error: %d\n", -- cgit v1.2.3-70-g09d2 From 481c209fa016a9e594427a306718cdf48ceeb1c6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 19 Oct 2017 15:38:50 -0700 Subject: Input: axp20x-pek - fix module not auto-loading for axp221 pek Now that we have a platform_device_id table and multiple supported ids we should be using MODULE_DEVICE_TABLE instead of MODULE_ALIAS. This fixes a regression on Bay and Cherry Trail devices, where the power button is now enumerated as an "axp221-pek" and it was impossible to wakeup these devices from suspend since the module did not load. Fixes: c3cc94470bd3 ("Input: axp20x-pek - add support for AXP221 PEK") Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index 6cee5adc3b5c..debeeaeb8812 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -403,6 +403,7 @@ static const struct platform_device_id axp_pek_id_match[] = { }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(platform, axp_pek_id_match); static struct platform_driver axp20x_pek_driver = { .probe = axp20x_pek_probe, @@ -417,4 +418,3 @@ module_platform_driver(axp20x_pek_driver); MODULE_DESCRIPTION("axp20x Power Button"); MODULE_AUTHOR("Carlo Caione "); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:axp20x-pek"); -- cgit v1.2.3-70-g09d2 From 9b5db7aab4d6b66f84f5e147c87eff4fe8b48651 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 13 Oct 2017 11:04:48 -0700 Subject: Input: goodix - poll the 'buffer status' bit before reading data The Goodix panel triggers an interrupt on touch events. However, its registers will contain the valid values a short time after the interrupt, and not when it's raised. At that moment, the 'buffer status' bit is set. Previously, if the 'buffer status' bit was not set when the registers were read, the data was discarded and no input event was emitted, causing "finger down" or "finger up" events to be missed sometimes. This went unnoticed until v4.9, as the DesignWare I2C driver commonly used with this driver had enough latency for that bug to never trigger until commit 2702ea7dbec5 ("i2c: designware: wait for disable/enable only if necessary"). Now, in the IRQ handler we will poll (with a timeout) the 'buffer status' bit and process the data of the panel as soon as this bit gets set. Note that the Goodix panel will send a few spurious interrupts after the 'finger up' event, in which the 'buffer status' bit will never be set. Cc: Bastien Nocera Cc: russianneuromancer@ya.ru Signed-off-by: Paul Cercueil [hdegoede@redhat.com: Change poll loop to use jiffies, add comment about typical poll time] Signed-off-by: Hans de Goede [dtor: rearranged control flow a bit to avoid explicit goto and double check] Reviewed-by: Bastien Nocera Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 67 +++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 32d2762448aa..b3bbad7d2282 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -72,6 +72,9 @@ struct goodix_ts_data { #define GOODIX_REG_CONFIG_DATA 0x8047 #define GOODIX_REG_ID 0x8140 +#define GOODIX_BUFFER_STATUS_READY BIT(7) +#define GOODIX_BUFFER_STATUS_TIMEOUT 20 + #define RESOLUTION_LOC 1 #define MAX_CONTACTS_LOC 5 #define TRIGGER_LOC 6 @@ -195,35 +198,53 @@ static int goodix_get_cfg_len(u16 id) static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) { + unsigned long max_timeout; int touch_num; int error; - error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, data, - GOODIX_CONTACT_SIZE + 1); - if (error) { - dev_err(&ts->client->dev, "I2C transfer error: %d\n", error); - return error; - } + /* + * The 'buffer status' bit, which indicates that the data is valid, is + * not set as soon as the interrupt is raised, but slightly after. + * This takes around 10 ms to happen, so we poll for 20 ms. + */ + max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT); + do { + error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, + data, GOODIX_CONTACT_SIZE + 1); + if (error) { + dev_err(&ts->client->dev, "I2C transfer error: %d\n", + error); + return error; + } - if (!(data[0] & 0x80)) - return -EAGAIN; + if (data[0] & GOODIX_BUFFER_STATUS_READY) { + touch_num = data[0] & 0x0f; + if (touch_num > ts->max_touch_num) + return -EPROTO; + + if (touch_num > 1) { + data += 1 + GOODIX_CONTACT_SIZE; + error = goodix_i2c_read(ts->client, + GOODIX_READ_COOR_ADDR + + 1 + GOODIX_CONTACT_SIZE, + data, + GOODIX_CONTACT_SIZE * + (touch_num - 1)); + if (error) + return error; + } + + return touch_num; + } - touch_num = data[0] & 0x0f; - if (touch_num > ts->max_touch_num) - return -EPROTO; - - if (touch_num > 1) { - data += 1 + GOODIX_CONTACT_SIZE; - error = goodix_i2c_read(ts->client, - GOODIX_READ_COOR_ADDR + - 1 + GOODIX_CONTACT_SIZE, - data, - GOODIX_CONTACT_SIZE * (touch_num - 1)); - if (error) - return error; - } + usleep_range(1000, 2000); /* Poll every 1 - 2 ms */ + } while (time_before(jiffies, max_timeout)); - return touch_num; + /* + * The Goodix panel will send spurious interrupts after a + * 'finger up' event, which will always cause a timeout. + */ + return 0; } static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) -- cgit v1.2.3-70-g09d2 From 55dfce873dca46df00304c44a568d7933bffff89 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 11:09:33 -0700 Subject: Input: factor out and export input_device_id matching code Factor out and export input_match_device_id() so that modules may use it. It will be needed by joydev to blacklist accelerometers in composite devices. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 83 +++++++++++++++++++++++---------------------------- include/linux/input.h | 3 ++ 2 files changed, 41 insertions(+), 45 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index d268fdc23c64..02e6ea7955fe 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -933,58 +933,51 @@ int input_set_keycode(struct input_dev *dev, } EXPORT_SYMBOL(input_set_keycode); +bool input_match_device_id(const struct input_dev *dev, + const struct input_device_id *id) +{ + if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) + if (id->bustype != dev->id.bustype) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR) + if (id->vendor != dev->id.vendor) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT) + if (id->product != dev->id.product) + return false; + + if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION) + if (id->version != dev->id.version) + return false; + + if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX) || + !bitmap_subset(id->keybit, dev->keybit, KEY_MAX) || + !bitmap_subset(id->relbit, dev->relbit, REL_MAX) || + !bitmap_subset(id->absbit, dev->absbit, ABS_MAX) || + !bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX) || + !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) || + !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) || + !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) || + !bitmap_subset(id->swbit, dev->swbit, SW_MAX)) { + return false; + } + + return true; +} +EXPORT_SYMBOL(input_match_device_id); + static const struct input_device_id *input_match_device(struct input_handler *handler, struct input_dev *dev) { const struct input_device_id *id; for (id = handler->id_table; id->flags || id->driver_info; id++) { - - if (id->flags & INPUT_DEVICE_ID_MATCH_BUS) - if (id->bustype != dev->id.bustype) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_VENDOR) - if (id->vendor != dev->id.vendor) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT) - if (id->product != dev->id.product) - continue; - - if (id->flags & INPUT_DEVICE_ID_MATCH_VERSION) - if (id->version != dev->id.version) - continue; - - if (!bitmap_subset(id->evbit, dev->evbit, EV_MAX)) - continue; - - if (!bitmap_subset(id->keybit, dev->keybit, KEY_MAX)) - continue; - - if (!bitmap_subset(id->relbit, dev->relbit, REL_MAX)) - continue; - - if (!bitmap_subset(id->absbit, dev->absbit, ABS_MAX)) - continue; - - if (!bitmap_subset(id->mscbit, dev->mscbit, MSC_MAX)) - continue; - - if (!bitmap_subset(id->ledbit, dev->ledbit, LED_MAX)) - continue; - - if (!bitmap_subset(id->sndbit, dev->sndbit, SND_MAX)) - continue; - - if (!bitmap_subset(id->ffbit, dev->ffbit, FF_MAX)) - continue; - - if (!bitmap_subset(id->swbit, dev->swbit, SW_MAX)) - continue; - - if (!handler->match || handler->match(handler, dev)) + if (input_match_device_id(dev, id) && + (!handler->match || handler->match(handler, dev))) { return id; + } } return NULL; diff --git a/include/linux/input.h b/include/linux/input.h index fb5e23c7ed98..2a44650e449d 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -469,6 +469,9 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, const struct input_keymap_entry *ke); +bool input_match_device_id(const struct input_dev *dev, + const struct input_device_id *id); + void input_enable_softrepeat(struct input_dev *dev, int delay, int period); extern struct class input_class; -- cgit v1.2.3-70-g09d2 From 8724ecb072293f109a6f5dc93be8a98bf61fe14f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 12:01:14 -0700 Subject: Input: allow matching device IDs on property bits Let's allow matching input devices on their property bits, both in-kernel and when generating module aliases. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 3 ++- include/linux/input.h | 4 ++++ include/linux/mod_devicetable.h | 3 +++ scripts/mod/devicetable-offsets.c | 1 + scripts/mod/file2alias.c | 6 +++++- 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 02e6ea7955fe..762bfb9487dc 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -960,7 +960,8 @@ bool input_match_device_id(const struct input_dev *dev, !bitmap_subset(id->ledbit, dev->ledbit, LED_MAX) || !bitmap_subset(id->sndbit, dev->sndbit, SND_MAX) || !bitmap_subset(id->ffbit, dev->ffbit, FF_MAX) || - !bitmap_subset(id->swbit, dev->swbit, SW_MAX)) { + !bitmap_subset(id->swbit, dev->swbit, SW_MAX) || + !bitmap_subset(id->propbit, dev->propbit, INPUT_PROP_MAX)) { return false; } diff --git a/include/linux/input.h b/include/linux/input.h index 2a44650e449d..7c7516eb7d76 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -234,6 +234,10 @@ struct input_dev { #error "SW_MAX and INPUT_DEVICE_ID_SW_MAX do not match" #endif +#if INPUT_PROP_MAX != INPUT_DEVICE_ID_PROP_MAX +#error "INPUT_PROP_MAX and INPUT_DEVICE_ID_PROP_MAX do not match" +#endif + #define INPUT_DEVICE_ID_MATCH_DEVICE \ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION \ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3f74ef2281e8..72f0b7f19c59 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -293,6 +293,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f #define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 #define INPUT_DEVICE_ID_MATCH_VENDOR 2 @@ -308,6 +309,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_MATCH_SNDBIT 0x0400 #define INPUT_DEVICE_ID_MATCH_FFBIT 0x0800 #define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 +#define INPUT_DEVICE_ID_MATCH_PROPBIT 0x2000 struct input_device_id { @@ -327,6 +329,7 @@ struct input_device_id { kernel_ulong_t sndbit[INPUT_DEVICE_ID_SND_MAX / BITS_PER_LONG + 1]; kernel_ulong_t ffbit[INPUT_DEVICE_ID_FF_MAX / BITS_PER_LONG + 1]; kernel_ulong_t swbit[INPUT_DEVICE_ID_SW_MAX / BITS_PER_LONG + 1]; + kernel_ulong_t propbit[INPUT_DEVICE_ID_PROP_MAX / BITS_PER_LONG + 1]; kernel_ulong_t driver_info; }; diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e4d90e50f6fe..812657ab5aa3 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -105,6 +105,7 @@ int main(void) DEVID_FIELD(input_device_id, sndbit); DEVID_FIELD(input_device_id, ffbit); DEVID_FIELD(input_device_id, swbit); + DEVID_FIELD(input_device_id, propbit); DEVID(eisa_device_id); DEVID_FIELD(eisa_device_id, sig); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 29d6699d5a06..bc25898f6df0 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -761,7 +761,7 @@ static void do_input(char *alias, sprintf(alias + strlen(alias), "%X,*", i); } -/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ +/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwXprX where X is comma-separated %02X. */ static int do_input_entry(const char *filename, void *symval, char *alias) { @@ -779,6 +779,7 @@ static int do_input_entry(const char *filename, void *symval, DEF_FIELD_ADDR(symval, input_device_id, sndbit); DEF_FIELD_ADDR(symval, input_device_id, ffbit); DEF_FIELD_ADDR(symval, input_device_id, swbit); + DEF_FIELD_ADDR(symval, input_device_id, propbit); sprintf(alias, "input:"); @@ -816,6 +817,9 @@ static int do_input_entry(const char *filename, void *symval, sprintf(alias + strlen(alias), "w*"); if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); + sprintf(alias + strlen(alias), "pr*"); + if (flags & INPUT_DEVICE_ID_MATCH_PROPBIT) + do_input(alias, *propbit, 0, INPUT_DEVICE_ID_PROP_MAX); return 1; } ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); -- cgit v1.2.3-70-g09d2 From 20ac95d52a28f55472a54cc751eeec49fd445cb1 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Mon, 9 Oct 2017 12:02:03 -0700 Subject: Input: joydev - blacklist ds3/ds4/udraw motion sensors Introduce a device table used for blacklisting devices. We currently blacklist the motion sensor subdevice of THQ Udraw and Sony ds3/ds4. Signed-off-by: Roderick Colenbrander [dtor: siwtched to blacklist built on input_device_id and using input_match_device_id()] Signed-off-by: Dmitry Torokhov --- drivers/input/joydev.c | 70 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index 29d677c714d2..7b29a8944039 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -747,6 +747,68 @@ static void joydev_cleanup(struct joydev *joydev) input_close_device(handle); } +/* + * These codes are copied from from hid-ids.h, unfortunately there is no common + * usb_ids/bt_ids.h header. + */ +#define USB_VENDOR_ID_SONY 0x054c +#define USB_DEVICE_ID_SONY_PS3_CONTROLLER 0x0268 +#define USB_DEVICE_ID_SONY_PS4_CONTROLLER 0x05c4 +#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2 0x09cc +#define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE 0x0ba0 + +#define USB_VENDOR_ID_THQ 0x20d6 +#define USB_DEVICE_ID_THQ_PS3_UDRAW 0xcb17 + +#define ACCEL_DEV(vnd, prd) \ + { \ + .flags = INPUT_DEVICE_ID_MATCH_VENDOR | \ + INPUT_DEVICE_ID_MATCH_PRODUCT | \ + INPUT_DEVICE_ID_MATCH_PROPBIT, \ + .vendor = (vnd), \ + .product = (prd), \ + .propbit = { BIT_MASK(INPUT_PROP_ACCELEROMETER) }, \ + } + +static const struct input_device_id joydev_blacklist[] = { + /* Avoid touchpads and touchscreens */ + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) }, + }, + /* Avoid tablets, digitisers and similar devices */ + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | + INPUT_DEVICE_ID_MATCH_KEYBIT, + .evbit = { BIT_MASK(EV_KEY) }, + .keybit = { [BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_DIGI) }, + }, + /* Disable accelerometers on composite devices */ + ACCEL_DEV(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER), + ACCEL_DEV(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER), + ACCEL_DEV(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_2), + ACCEL_DEV(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE), + ACCEL_DEV(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW), + { /* sentinel */ } +}; + +static bool joydev_dev_is_blacklisted(struct input_dev *dev) +{ + const struct input_device_id *id; + + for (id = joydev_blacklist; id->flags; id++) { + if (input_match_device_id(dev, id)) { + dev_dbg(&dev->dev, + "joydev: blacklisting '%s'\n", dev->name); + return true; + } + } + + return false; +} + static bool joydev_dev_is_absolute_mouse(struct input_dev *dev) { DECLARE_BITMAP(jd_scratch, KEY_CNT); @@ -807,12 +869,8 @@ static bool joydev_dev_is_absolute_mouse(struct input_dev *dev) static bool joydev_match(struct input_handler *handler, struct input_dev *dev) { - /* Avoid touchpads and touchscreens */ - if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_TOUCH, dev->keybit)) - return false; - - /* Avoid tablets, digitisers and similar devices */ - if (test_bit(EV_KEY, dev->evbit) && test_bit(BTN_DIGI, dev->keybit)) + /* Disable blacklisted devices */ + if (joydev_dev_is_blacklisted(dev)) return false; /* Avoid absolute mice */ -- cgit v1.2.3-70-g09d2 From ea04efee7635c9120d015dcdeeeb6988130cb67a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 7 Oct 2017 11:07:47 -0700 Subject: Input: ims-psu - check if CDC union descriptor is sane Before trying to use CDC union descriptor, try to validate whether that it is sane by checking that intf->altsetting->extra is big enough and that descriptor bLength is not too big and not too small. Reported-by: Andrey Konovalov Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ims-pcu.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 6bf82ea8c918..ae473123583b 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1635,13 +1635,25 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf) return NULL; } - while (buflen > 0) { + while (buflen >= sizeof(*union_desc)) { union_desc = (struct usb_cdc_union_desc *)buf; + if (union_desc->bLength > buflen) { + dev_err(&intf->dev, "Too large descriptor\n"); + return NULL; + } + if (union_desc->bDescriptorType == USB_DT_CS_INTERFACE && union_desc->bDescriptorSubType == USB_CDC_UNION_TYPE) { dev_dbg(&intf->dev, "Found union header\n"); - return union_desc; + + if (union_desc->bLength >= sizeof(*union_desc)) + return union_desc; + + dev_err(&intf->dev, + "Union descriptor to short (%d vs %zd\n)", + union_desc->bLength, sizeof(*union_desc)); + return NULL; } buflen -= union_desc->bLength; -- cgit v1.2.3-70-g09d2 From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 19 Oct 2017 13:30:15 -0400 Subject: membarrier: Provide register expedited private command This introduces a "register private expedited" membarrier command which allows eventual removal of important memory barrier constraints on the scheduler fast-paths. It changes how the "private expedited" membarrier command (new to 4.14) is used from user-space. This new command allows processes to register their intent to use the private expedited command. This affects how the expedited private command introduced in 4.14-rc is meant to be used, and should be merged before 4.14 final. Processes are now required to register before using MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM. This fixes a problem that arose when designing requested extensions to sys_membarrier() to allow JITs to efficiently flush old code from instruction caches. Several potential algorithms are much less painful if the user register intent to use this functionality early on, for example, before the process spawns the second thread. Registering at this time removes the need to interrupt each and every thread in that process at the first expedited sys_membarrier() system call. Signed-off-by: Mathieu Desnoyers Acked-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Viro Signed-off-by: Linus Torvalds --- fs/exec.c | 1 + include/linux/mm_types.h | 3 +++ include/linux/sched/mm.h | 16 ++++++++++++++++ include/uapi/linux/membarrier.h | 23 ++++++++++++++++------- kernel/sched/membarrier.c | 34 ++++++++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 5470d3c1892a..3e14ba25f678 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; + membarrier_execve(current); acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 46f4ecf5479a..1861ea8dba77 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -445,6 +445,9 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ +#ifdef CONFIG_MEMBARRIER + atomic_t membarrier_state; +#endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae53e413fb13..ab9bf7b73954 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_MEMBARRIER +enum { + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_SWITCH_MM = (1U << 1), +}; + +static inline void membarrier_execve(struct task_struct *t) +{ + atomic_set(&t->mm->membarrier_state, 0); +} +#else +static inline void membarrier_execve(struct task_struct *t) +{ +} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h index 6d47b3249d8a..4e01ad7ffe98 100644 --- a/include/uapi/linux/membarrier.h +++ b/include/uapi/linux/membarrier.h @@ -52,21 +52,30 @@ * (non-running threads are de facto in such a * state). This only covers threads from the * same processes as the caller thread. This - * command returns 0. The "expedited" commands - * complete faster than the non-expedited ones, - * they never block, but have the downside of - * causing extra overhead. + * command returns 0 on success. The + * "expedited" commands complete faster than + * the non-expedited ones, they never block, + * but have the downside of causing extra + * overhead. A process needs to register its + * intent to use the private expedited command + * prior to using it, otherwise this command + * returns -EPERM. + * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + * Register the process intent to use + * MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always + * returns 0. * * Command to be passed to the membarrier system call. The commands need to * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to * the value 0. */ enum membarrier_cmd { - MEMBARRIER_CMD_QUERY = 0, - MEMBARRIER_CMD_SHARED = (1 << 0), + MEMBARRIER_CMD_QUERY = 0, + MEMBARRIER_CMD_SHARED = (1 << 0), /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */ /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */ - MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3), + MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4), }; #endif /* _UAPI_LINUX_MEMBARRIER_H */ diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc22747..dd7908743dab 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "sched.h" /* for cpu_rq(). */ @@ -26,21 +27,26 @@ * except MEMBARRIER_CMD_QUERY. */ #define MEMBARRIER_CMD_BITMASK \ - (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) + (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ } -static void membarrier_private_expedited(void) +static int membarrier_private_expedited(void) { int cpu; bool fallback = false; cpumask_var_t tmpmask; + if (!(atomic_read(¤t->mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) + return -EPERM; + if (num_online_cpus() == 1) - return; + return 0; /* * Matches memory barriers around rq->curr modification in @@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) * rq->curr modification in scheduler. */ smp_mb(); /* exit from system call is not a mb */ + return 0; +} + +static void membarrier_register_private_expedited(void) +{ + struct task_struct *p = current; + struct mm_struct *mm = p->mm; + + /* + * We need to consider threads belonging to different thread + * groups, which use the same mm. (CLONE_VM but not + * CLONE_THREAD). + */ + if (atomic_read(&mm->membarrier_state) + & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) + return; + atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, + &mm->membarrier_state); } /** @@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) synchronize_sched(); return 0; case MEMBARRIER_CMD_PRIVATE_EXPEDITED: - membarrier_private_expedited(); + return membarrier_private_expedited(); + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: + membarrier_register_private_expedited(); return 0; default: return -EINVAL; -- cgit v1.2.3-70-g09d2 From 533966c8ad9ec779d81179ea6a182055066c62a3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:26:20 -0700 Subject: doc: Fix RCU's docbook options Commit 764f80798b95 ("doc: Add RCU files to docbook-generation files") added :external: options for RCU source files in the file Documentation/core-api/kernel-api.rst. However, this now means nothing, so this commit removes them. Reported-by: Randy Dunlap Reported-by: Akira Yokosawa Signed-off-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- Documentation/core-api/kernel-api.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 8282099e0cbf..5da10184d908 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -352,44 +352,30 @@ Read-Copy Update (RCU) ---------------------- .. kernel-doc:: include/linux/rcupdate.h - :external: .. kernel-doc:: include/linux/rcupdate_wait.h - :external: .. kernel-doc:: include/linux/rcutree.h - :external: .. kernel-doc:: kernel/rcu/tree.c - :external: .. kernel-doc:: kernel/rcu/tree_plugin.h - :external: .. kernel-doc:: kernel/rcu/tree_exp.h - :external: .. kernel-doc:: kernel/rcu/update.c - :external: .. kernel-doc:: include/linux/srcu.h - :external: .. kernel-doc:: kernel/rcu/srcutree.c - :external: .. kernel-doc:: include/linux/rculist_bl.h - :external: .. kernel-doc:: include/linux/rculist.h - :external: .. kernel-doc:: include/linux/rculist_nulls.h - :external: .. kernel-doc:: include/linux/rcu_sync.h - :external: .. kernel-doc:: kernel/rcu/sync.c - :external: -- cgit v1.2.3-70-g09d2 From 27fdb35fe99011d86bcc54f62fe84712c53f4d05 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:26:21 -0700 Subject: doc: Fix various RCU docbook comment-header problems Because many of RCU's files have not been included into docbook, a number of errors have accumulated. This commit fixes them. Signed-off-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/rculist.h | 2 +- include/linux/rcupdate.h | 22 ++++++++++++++-------- include/linux/srcu.h | 1 + kernel/rcu/srcutree.c | 2 +- kernel/rcu/sync.c | 9 ++++++--- kernel/rcu/tree.c | 18 ++++++++++-------- 6 files changed, 33 insertions(+), 21 deletions(-) diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b1fd8bf85fdc..2bea1d5e9930 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -276,7 +276,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, #define list_entry_rcu(ptr, type, member) \ container_of(lockless_dereference(ptr), type, member) -/** +/* * Where are list_empty_rcu() and list_first_entry_rcu()? * * Implementing those functions following their counterparts list_empty() and diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de50d8a4cf41..1a9f70d44af9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -523,7 +523,7 @@ static inline void rcu_preempt_sleep_check(void) { } * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the - * pointer from changing. Please note that this primitive does -not- + * pointer from changing. Please note that this primitive does *not* * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. @@ -568,7 +568,7 @@ static inline void rcu_preempt_sleep_check(void) { } * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows: - * + * `` * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -579,6 +579,7 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); + *`` */ #define rcu_pointer_handoff(p) (p) @@ -778,18 +779,21 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * - * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent - * RCU readers from concurrently accessing this pointer -or- + * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to - * readers either at compile time or via rcu_assign_pointer() -and- - * a. You have not made -any- reader-visible changes to - * this structure since then -or- + * readers either at compile time or via rcu_assign_pointer() *and* + * + * a. You have not made *any* reader-visible changes to + * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to @@ -805,7 +809,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the - * external-to-structure pointer -after- you have completely initialized + * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no @@ -819,6 +823,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 39af9bc0f653..62be8966e837 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? + * @sp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 729a8706751d..6d5880089ff6 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, /** * call_srcu() - Queue a callback for invocation after an SRCU grace period * @sp: srcu_struct in queue the callback - * @head: structure to be used for queueing the SRCU callback. + * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * * The callback function will be invoked some time after a full SRCU diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 50d1861f7759..3f943efcf61c 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) } /** + * rcu_sync_enter_start - Force readers onto slow path for multiple updates + * @rsp: Pointer to rcu_sync structure to use for synchronization + * * Must be called after rcu_sync_init() and before first use. * * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() @@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) /** * rcu_sync_func() - Callback function managing reader access to fastpath - * @rsp: Pointer to rcu_sync structure to use for synchronization + * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization * * This function is passed to one of the call_rcu() functions by * rcu_sync_exit(), so that it is invoked after a grace period following the @@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp) * rcu_sync_exit(). Otherwise, set all state back to idle so that readers * can again use their fastpaths. */ -static void rcu_sync_func(struct rcu_head *rcu) +static void rcu_sync_func(struct rcu_head *rhp) { - struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); + struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; BUG_ON(rsp->gp_state != GP_PASSED); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b0ad62b0e7b8..3e3650e94ae6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, * read-side critical sections have completed. call_rcu_sched() assumes * that the read-side critical sections end on enabling of preemption * or on voluntary preemption. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR - * - anything that disables preemption. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR + * - anything that disables preemption. * * These may be nested. * @@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); * handler. This means that read-side critical sections in process * context must not be interrupted by softirqs. This interface is to be * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. + * RCU read-side critical sections are delimited by: + * + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * + * These may be nested. * * See the description of call_rcu() for more detailed information on * memory ordering guarantees. -- cgit v1.2.3-70-g09d2 From c5709d37693b72761d866cb1cd556093a6607c80 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Mon, 16 Oct 2017 08:13:53 +0200 Subject: dmaengine: altera: Use IRQ-safe spinlock calls in the error paths as well The patch edf10919 [dmaengine: altera: fix spinlock usage] missed to change 2 occurrences of spin_unlock_bh() to spin_unlock_irqrestore(). This patch fixes this by moving to the IRQ-safe call in the error paths as well. Fixes: edf10919 (dmaengine: altera: fix spinlock usage) Signed-off-by: Stefan Roese Reviewed-by: Sylvain Lesne [add fixes tag and fix typo in log] Signed-off-by: Vinod Koul --- drivers/dma/altera-msgdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c index 339186f25a2a..55f9c62ee54b 100644 --- a/drivers/dma/altera-msgdma.c +++ b/drivers/dma/altera-msgdma.c @@ -344,7 +344,7 @@ msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst, spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } @@ -407,7 +407,7 @@ msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, spin_lock_irqsave(&mdev->lock, irqflags); if (desc_cnt > mdev->desc_free_cnt) { - spin_unlock_bh(&mdev->lock); + spin_unlock_irqrestore(&mdev->lock, irqflags); dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev); return NULL; } -- cgit v1.2.3-70-g09d2 From b703798386fb7288d5a995bd2284a984a5e24f3c Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 19 Oct 2017 11:27:24 -0500 Subject: objtool: Fix memory leak in decode_instructions() When an error occurs before adding an allocated insn to the list, free it before returning. Signed-off-by: Kamalesh Babulal Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/336da800bf6070eae11f4e0a3b9ca64c27658114.1508430423.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/check.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a0c518ecf085..c0e26ad1fa7e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -267,12 +267,13 @@ static int decode_instructions(struct objtool_file *file) &insn->immediate, &insn->stack_op); if (ret) - return ret; + goto err; if (!insn->type || insn->type > INSN_LAST) { WARN_FUNC("invalid instruction type %d", insn->sec, insn->offset, insn->type); - return -1; + ret = -1; + goto err; } hash_add(file->insn_hash, &insn->hash, insn->offset); @@ -296,6 +297,10 @@ static int decode_instructions(struct objtool_file *file) } return 0; + +err: + free(insn); + return ret; } /* -- cgit v1.2.3-70-g09d2 From ce56a86e2ade45d052b3228cdfebe913a1ae7381 Mon Sep 17 00:00:00 2001 From: Craig Bergstrom Date: Thu, 19 Oct 2017 13:28:56 -0600 Subject: x86/mm: Limit mmap() of /dev/mem to valid physical addresses Currently, it is possible to mmap() any offset from /dev/mem. If a program mmaps() /dev/mem offsets outside of the addressable limits of a system, the page table can be corrupted by setting reserved bits. For example if you mmap() offset 0x0001000000000000 of /dev/mem on an x86_64 system with a 48-bit bus, the page fault handler will be called with error_code set to RSVD. The kernel then crashes with a page table corruption error. This change prevents this page table corruption on x86 by refusing to mmap offsets higher than the highest valid address in the system. Signed-off-by: Craig Bergstrom Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dsafonov@virtuozzo.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/20171019192856.39672-1-craigb@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 4 ++++ arch/x86/mm/mmap.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index c40a95c33bb8..322d25ae23ab 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -110,6 +110,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a99679826846..320c6237e1d1 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -174,3 +174,15 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +int valid_phys_addr_range(phys_addr_t addr, size_t count) +{ + return addr + count <= __pa(high_memory); +} + +int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; + + return valid_phys_addr_range(addr, count); +} -- cgit v1.2.3-70-g09d2 From 736f20a7060857ff569e9e9586ae6c1204a73e07 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 20 Oct 2017 15:06:34 +0800 Subject: ALSA: hda/realtek - Add support for ALC236/ALC3204 Add support for ALC236/ALC3204. Add headset mode support for ALC236/ALC3204. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ce71111b4e3..00fa80291c96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -327,6 +327,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0225: case 0x10ec0233: + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: case 0x10ec0282: @@ -911,6 +912,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0275, 0x1028, 0, "ALC3260" }, { 0x10ec0899, 0x1028, 0, "ALC3861" }, { 0x10ec0298, 0x1028, 0, "ALC3266" }, + { 0x10ec0236, 0x1028, 0, "ALC3204" }, { 0x10ec0256, 0x1028, 0, "ALC3246" }, { 0x10ec0225, 0x1028, 0, "ALC3253" }, { 0x10ec0295, 0x1028, 0, "ALC3254" }, @@ -3930,6 +3932,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) alc_process_coef_fw(codec, coef0255_1); alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); alc_process_coef_fw(codec, coef0255); @@ -4028,6 +4031,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, }; switch (codec->core.vendor_id) { + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_write_coef_idx(codec, 0x45, 0xc489); @@ -4160,6 +4164,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); break; + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4256,6 +4261,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); break; @@ -4366,6 +4372,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, coef0255); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, coef0256); break; @@ -4451,6 +4458,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: alc_process_coef_fw(codec, coef0255); @@ -4705,6 +4713,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) case 0x10ec0255: alc_process_coef_fw(codec, alc255fw); break; + case 0x10ec0236: case 0x10ec0256: alc_process_coef_fw(codec, alc256fw); break; @@ -6806,6 +6815,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0255: spec->codec_variant = ALC269_TYPE_ALC255; break; + case 0x10ec0236: case 0x10ec0256: spec->codec_variant = ALC269_TYPE_ALC256; spec->shutup = alc256_shutup; @@ -7857,6 +7867,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269), HDA_CODEC_ENTRY(0x10ec0234, "ALC234", patch_alc269), HDA_CODEC_ENTRY(0x10ec0235, "ALC233", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0236, "ALC236", patch_alc269), HDA_CODEC_ENTRY(0x10ec0255, "ALC255", patch_alc269), HDA_CODEC_ENTRY(0x10ec0256, "ALC256", patch_alc269), HDA_CODEC_ENTRY(0x10ec0260, "ALC260", patch_alc260), -- cgit v1.2.3-70-g09d2 From 66b83a4cdd3b73effdc285d1d66763c69ffe2ee8 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Mon, 9 Oct 2017 14:26:56 +0200 Subject: binder: call poll_wait() unconditionally. Because we're not guaranteed that subsequent calls to poll() will have a poll_table_struct parameter with _qproc set. When _qproc is not set, poll_wait() is a noop, and we won't be woken up correctly. Signed-off-by: Martijn Coenen Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0621a95b8597..fddf76ef5bd6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3662,12 +3662,6 @@ static void binder_stat_br(struct binder_proc *proc, } } -static int binder_has_thread_work(struct binder_thread *thread) -{ - return !binder_worklist_empty(thread->proc, &thread->todo) || - thread->looper_need_return; -} - static int binder_put_node_cmd(struct binder_proc *proc, struct binder_thread *thread, void __user **ptrp, @@ -4297,12 +4291,9 @@ static unsigned int binder_poll(struct file *filp, binder_inner_proc_unlock(thread->proc); - if (binder_has_work(thread, wait_for_proc_work)) - return POLLIN; - poll_wait(filp, &thread->wait, wait); - if (binder_has_thread_work(thread)) + if (binder_has_work(thread, wait_for_proc_work)) return POLLIN; return 0; -- cgit v1.2.3-70-g09d2 From eb39a7c0355393c5a8d930f342ad7a6231b552c4 Mon Sep 17 00:00:00 2001 From: David Kozub Date: Thu, 19 Oct 2017 22:57:02 +0200 Subject: clockevents/drivers/cs5535: Improve resilience to spurious interrupts The interrupt handler mfgpt_tick() is not robust versus spurious interrupts which happen before the clock event device is registered and fully initialized. The reason is that the safe guard against spurious interrupts solely checks for the clockevents shutdown state, but lacks a check for detached state. If the interrupt hits while the device is in detached state it passes the safe guard and dereferences the event handler call back which is NULL. Add the missing state check. Fixes: 8f9327cbb6e8 ("clockevents/drivers/cs5535: Migrate to new 'set-state' interface") Suggested-by: Thomas Gleixner Signed-off-by: David Kozub Signed-off-by: Thomas Gleixner Cc: Daniel Lezcano Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171020093103.3317F6004D@linux.fjfi.cvut.cz --- drivers/clocksource/cs5535-clockevt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index a1df588343f2..1de8cac99a0e 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -117,7 +117,8 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id) /* Turn off the clock (and clear the event) */ disable_timer(cs5535_event_clock); - if (clockevent_state_shutdown(&cs5535_clockevent)) + if (clockevent_state_detached(&cs5535_clockevent) || + clockevent_state_shutdown(&cs5535_clockevent)) return IRQ_HANDLED; /* Clear the counter */ -- cgit v1.2.3-70-g09d2 From 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 18 Oct 2017 21:37:49 +0800 Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect Now sctp processes icmp redirect packet in sctp_icmp_redirect where it calls sctp_transport_dst_check in which tp->dst can be released. The problem is before calling sctp_transport_dst_check, it doesn't check sock_owned_by_user, which means tp->dst could be freed while a process is accessing it with owning the socket. An use-after-free issue could be triggered by this. This patch is to fix it by checking sock_owned_by_user before calling sctp_transport_dst_check in sctp_icmp_redirect, so that it would not release tp->dst if users still hold sock lock. Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix routing redirect race") on sctp also needs this check. Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers") Reported-by: Eric Dumazet Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 92a07141fd07..34f10e75f3b9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, { struct dst_entry *dst; - if (!t) + if (sock_owned_by_user(sk) || !t) return; dst = sctp_transport_dst_check(t); if (dst) -- cgit v1.2.3-70-g09d2 From 435bf0d3f99a164df7e8c30428cef266b91d1d3b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:15 -0700 Subject: bpf: enforce TCP only support for sockmap Only TCP sockets have been tested and at the moment the state change callback only handles TCP sockets. This adds a check to ensure that sockets actually being added are TCP sockets. For net-next we can consider UDP support. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 6 ++++++ tools/testing/selftests/bpf/test_maps.c | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..c68899d5b246 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -840,6 +840,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP) { + fput(socket->file); + return -EOPNOTSUPP; + } + err = sock_map_ctx_update_elem(&skops, map, key, flags); fput(socket->file); return err; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index fe3a443a1102..50ce52d2013d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data) int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; int ports[] = {50200, 50201, 50202, 50204}; - int err, i, fd, sfd[6] = {0xdeadbeef}; + int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; int parse_prog, verdict_prog; struct sockaddr_in addr; @@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + /* Test update with unsupported UDP socket */ + udp = socket(AF_INET, SOCK_DGRAM, 0); + i = 0; + err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); + if (!err) { + printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + i, udp); + goto out_sockmap; + } + /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); -- cgit v1.2.3-70-g09d2 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/tcp.h | 5 ++++ kernel/bpf/sockmap.c | 19 +++++++------- net/core/filter.c | 29 +++++++++++----------- samples/sockmap/sockmap_kern.c | 2 +- tools/include/uapi/linux/bpf.h | 3 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 +- tools/testing/selftests/bpf/sockmap_verdict_prog.c | 4 +-- 8 files changed, 36 insertions(+), 30 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/net/tcp.h b/include/net/tcp.h index 89974c5286d8..b1ef98ebce53 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -840,6 +840,11 @@ struct tcp_skb_cb { struct inet6_skb_parm h6; #endif } header; /* For incoming skbs */ + struct { + __u32 key; + __u32 flags; + struct bpf_map *map; + } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -39,6 +39,7 @@ #include #include #include +#include struct bpf_stab { struct bpf_map map; @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) return SK_DROP; skb_orphan(skb); + /* We need to ensure that BPF metadata for maps is also cleared + * when we orphan the skb so that we don't have the possibility + * to reference a stale map. + */ + TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; bpf_compute_data_end(skb); + preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); + preempt_enable(); skb->sk = NULL; return rc; @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) struct sock *sk; int rc; - /* Because we use per cpu values to feed input from sock redirect - * in BPF program to do_sk_redirect_map() call we need to ensure we - * are not preempted. RCU read lock is not sufficient in this case - * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. - */ - preempt_disable(); rc = smap_verdict_func(psock, skb); switch (rc) { case SK_REDIRECT: - sk = do_sk_redirect_map(); - preempt_enable(); + sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) /* Fall through and free skb otherwise */ case SK_DROP: default: - if (rc != SK_REDIRECT) - preempt_enable(); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 74b8c91fb5f4..ca1ba0bbfbc2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); if (unlikely(flags)) return SK_ABORTED; - ri->ifindex = key; - ri->flags = flags; - ri->map = map; + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; return SK_REDIRECT; } -struct sock *do_sk_redirect_map(void) +struct sock *do_sk_redirect_map(struct sk_buff *skb) { - struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct sock *sk = NULL; - if (ri->map) { - sk = __sock_map_lookup_elem(ri->map, ri->ifindex); + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); - ri->ifindex = 0; - ri->map = NULL; - /* we do not clear flags for future lookup */ + tcb->bpf.key = 0; + tcb->bpf.map = NULL; } return sk; @@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { .func = bpf_sk_redirect_map, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c index f9b38ef82dc2..52b0053274f4 100644 --- a/samples/sockmap/sockmap_kern.c +++ b/samples/sockmap/sockmap_kern.c @@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ret = 1; bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); - return bpf_sk_redirect_map(&sock_map, ret, 0); + return bpf_sk_redirect_map(skb, &sock_map, ret, 0); } SEC("sockops") diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 43ab5c402f98..be9a631a69f7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -569,9 +569,10 @@ union bpf_attr { * @flags: reserved for future use * Return: 0 on success or negative error code * - * int bpf_sk_redirect_map(map, key, flags) + * int bpf_sk_redirect_map(skb, map, key, flags) * Redirect skb to a sock in map using key as a lookup key for the * sock in map. + * @skb: pointer to skb * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..b2e02bdcd098 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; -static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = +static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, unsigned long long flags) = diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index 9b99bd10807d..2cd2d552938b 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); if (!map) - return bpf_sk_redirect_map(&sock_map_rx, sk, 0); - return bpf_sk_redirect_map(&sock_map_tx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); } char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-70-g09d2 From f7e9cb1ecb6d922584abff16db07930162c57155 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:58 -0700 Subject: bpf: remove mark access for SK_SKB program types The skb->mark field is a union with reserved_tailroom which is used in the TCP code paths from stream memory allocation. Allowing SK_SKB programs to set this field creates a conflict with future code optimizations, such as "gifting" the skb to the egress path instead of creating a new skb and doing a memcpy. Because we do not have a released version of SK_SKB yet lets just remove it for now. A more appropriate scratch pad to use at the socket layer is dev_scratch, but lets add that in future kernels when needed. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/filter.c | 2 +- tools/testing/selftests/bpf/test_verifier.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index ca1ba0bbfbc2..aa0265997f93 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3684,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size, { if (type == BPF_WRITE) { switch (off) { - case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_index): case bpf_ctx_range(struct __sk_buff, priority): break; @@ -3694,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): case bpf_ctx_range(struct __sk_buff, tc_classid): return false; case bpf_ctx_range(struct __sk_buff, data): diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c7d3a45a3c5..50e15cedbb7f 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", }, { - "check skb->mark is writeable by SK_SKB", + "invalid access of skb->mark for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", + }, + { + "check skb->mark is not writeable by SK_SKB", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = ACCEPT, + .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", }, { "check skb->tc_index is writeable by SK_SKB", -- cgit v1.2.3-70-g09d2 From fb50df8d32283cd95932a182a46a10070c4a8832 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:11:22 -0700 Subject: bpf: require CAP_NET_ADMIN when using sockmap maps Restrict sockmap to CAP_NET_ADMIN. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/sockmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index beaabb21c3a3..2b6eb35ae5d3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -486,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) int err = -EINVAL; u64 cost; + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) -- cgit v1.2.3-70-g09d2 From 9ef2a8cd5c0dcb8e1f1534615c56eb13b630c363 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:11:44 -0700 Subject: bpf: require CAP_NET_ADMIN when using devmap Devmap is used with XDP which requires CAP_NET_ADMIN so lets also make CAP_NET_ADMIN required to use the map. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 920428d84da2..52e0548ba548 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) int err = -EINVAL; u64 cost; + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) -- cgit v1.2.3-70-g09d2 From 9d35593b4f0b89ab0c194349c7d357b3b159e99a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 18 Oct 2017 02:08:40 -0700 Subject: vmbus: hvsock: add proper sync for vmbus_hvsock_device_unregister() Without the patch, vmbus_hvsock_device_unregister() can destroy the device prematurely when close() is called, and can cause NULl dereferencing or potential data loss (the last portion of the data stream may be dropped prematurely). Signed-off-by: Dexuan Cui Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 018d2e0f8ec5..379b0df123be 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -937,7 +937,10 @@ void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) { BUG_ON(!is_hvsock_channel(channel)); - channel->rescind = true; + /* We always get a rescind msg when a connection is closed. */ + while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) + msleep(1); + vmbus_device_unregister(channel->device_obj); } EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); -- cgit v1.2.3-70-g09d2 From 1c9fec470b81ca5e89391c20a11ead31a1e9314b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 20 Oct 2017 07:36:05 -0700 Subject: waitid(): Avoid unbalanced user_access_end() on access_ok() error As pointed out by Linus and David, the earlier waitid() fix resulted in a (currently harmless) unbalanced user_access_end() call. This fixes it to just directly return EFAULT on access_ok() failure. Fixes: 96ca579a1ecc ("waitid(): Add missing access_ok() checks") Acked-by: David Daney Cc: Al Viro Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index cf28528842bc..f6cad39f35df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, return err; if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) - goto Efault; + return -EFAULT; user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); @@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid, return err; if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) - goto Efault; + return -EFAULT; user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); -- cgit v1.2.3-70-g09d2 From c92e8c02fe664155ac4234516e32544bec0f113d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Oct 2017 09:04:13 -0700 Subject: tcp/dccp: fix ireq->opt races syzkaller found another bug in DCCP/TCP stacks [1] For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access ireq->opt unless we own the request sock. Note the opt field is renamed to ireq_opt to ease grep games. [1] BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x40c341 RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 Allocated by task 3295: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 __do_kmalloc mm/slab.c:3725 [inline] __kmalloc+0x162/0x760 mm/slab.c:3734 kmalloc include/linux/slab.h:498 [inline] tcp_v4_save_options include/net/tcp.h:1962 [inline] tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3306: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 __sk_destruct+0xfd/0x910 net/core/sock.c:1560 sk_destruct+0x47/0x80 net/core/sock.c:1595 __sk_free+0x57/0x230 net/core/sock.c:1603 sk_free+0x2a/0x40 net/core/sock.c:1614 sock_put include/net/sock.h:1652 [inline] inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 NF_HOOK include/linux/netfilter.h:249 [inline] ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 dst_input include/net/dst.h:464 [inline] ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 NF_HOOK include/linux/netfilter.h:249 [inline] ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 netif_receive_skb+0xae/0x390 net/core/dev.c:4611 tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 call_write_iter include/linux/fs.h:1770 [inline] new_sync_write fs/read_write.c:468 [inline] __vfs_write+0x68a/0x970 fs/read_write.c:481 vfs_write+0x18f/0x510 fs/read_write.c:543 SYSC_write fs/read_write.c:588 [inline] SyS_write+0xef/0x220 fs/read_write.c:580 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- net/dccp/ipv4.c | 13 ++++++++----- net/ipv4/cipso_ipv4.c | 24 +++++++----------------- net/ipv4/inet_connection_sock.c | 8 +++----- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 22 +++++++++++++--------- 7 files changed, 34 insertions(+), 39 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index aa95053dfc78..425752f768d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -96,7 +96,7 @@ struct inet_request_sock { kmemcheck_bitfield_end(flags); u32 ir_mark; union { - struct ip_options_rcu *opt; + struct ip_options_rcu __rcu *ireq_opt; #if IS_ENABLED(CONFIG_IPV6) struct { struct ipv6_txoptions *ipv6_opt; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 001c08696334..0490916864f9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; - newinet->inet_opt = ireq->opt; - ireq->opt = NULL; + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->inet_id = jiffies; @@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - + if (*own_req) + ireq->ireq_opt = NULL; + else + newinet->inet_opt = NULL; return newsk; exit_overflow: @@ -441,6 +443,7 @@ exit: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto exit; @@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req ireq->ir_rmt_addr); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + rcu_dereference(ireq->ireq_opt)); err = net_xmit_eval(err); } @@ -548,7 +551,7 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } void dccp_syn_ack_timeout(const struct request_sock *req) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2ae8f54cb321..82178cc69c96 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg(&req_inet->opt, opt); + opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); if (opt) kfree_rcu(opt, rcu); @@ -1973,11 +1973,13 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) { + struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); int hdr_delta = 0; - struct ip_options_rcu *opt = *opt_ptr; + if (!opt || opt->opt.cipso == 0) + return 0; if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; @@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) */ void cipso_v4_sock_delattr(struct sock *sk) { - int hdr_delta; - struct ip_options_rcu *opt; struct inet_sock *sk_inet; + int hdr_delta; sk_inet = inet_sk(sk); - opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (!opt || opt->opt.cipso == 0) - return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { @@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options_rcu *opt; - struct inet_request_sock *req_inet; - - req_inet = inet_rsk(req); - opt = req_inet->opt; - if (!opt || opt->opt.cipso == 0) - return; - - cipso_v4_delopt(&req_inet->opt); + cipso_v4_delopt(&inet_rsk(req)->ireq_opt); } /** diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 67aec7a10686..5ec9136a7c36 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -540,9 +540,10 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct net *net = read_pnet(&ireq->ireq_net); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -576,10 +577,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct flowi4 *fl4; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); fl4 = &newinet->cork.fl.u.ip4; - rcu_read_lock(); - opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -592,13 +592,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; - rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: - rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index b1bb1b3a1082..77cf32a80952 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - ireq->opt = tcp_v4_save_options(sock_net(sk), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5d7656beeee..7eec3383702b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6196,7 +6196,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct inet_request_sock *ireq = inet_rsk(req); kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; + ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 85164d4d3e53..4c43365c374c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + rcu_dereference(ireq->ireq_opt)); err = net_xmit_eval(err); } @@ -889,7 +889,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, */ static void tcp_v4_reqsk_destructor(struct request_sock *req) { - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } #ifdef CONFIG_TCP_MD5SIG @@ -1265,10 +1265,11 @@ static void tcp_v4_init_req(struct request_sock *req, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = sock_net(sk_listener); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, @@ -1355,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; - inet_opt = ireq->opt; - rcu_assign_pointer(newinet->inet_opt, inet_opt); - ireq->opt = NULL; + newinet->inet_saddr = ireq->ir_loc_addr; + inet_opt = rcu_dereference(ireq->ireq_opt); + RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; @@ -1403,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - if (*own_req) + if (likely(*own_req)) { tcp_move_syn(newtp, req); - + ireq->ireq_opt = NULL; + } else { + newinet->inet_opt = NULL; + } return newsk; exit_overflow: @@ -1416,6 +1419,7 @@ exit: tcp_listendrop(sk); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto exit; -- cgit v1.2.3-70-g09d2 From 509c7a1ecc8601f94ffba8a00889fefb239c00c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Oct 2017 16:14:52 -0700 Subject: packet: avoid panic in packet_getsockopt() syzkaller got crashes in packet_getsockopt() processing PACKET_ROLLOVER_STATS command while another thread was managing to change po->rollover Using RCU will fix this bug. We might later add proper RCU annotations for sparse sake. In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu() variant, as spotted by John. Fixes: a9b6391814d5 ("packet: rollover statistics") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: John Sperbeck Signed-off-by: David S. Miller --- net/packet/af_packet.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bec01a3daf5b..2986941164b1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1769,7 +1769,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) out: if (err && rollover) { - kfree(rollover); + kfree_rcu(rollover, rcu); po->rollover = NULL; } mutex_unlock(&fanout_mutex); @@ -1796,8 +1796,10 @@ static struct packet_fanout *fanout_release(struct sock *sk) else f = NULL; - if (po->rollover) + if (po->rollover) { kfree_rcu(po->rollover, rcu); + po->rollover = NULL; + } } mutex_unlock(&fanout_mutex); @@ -3851,6 +3853,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; + struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3929,13 +3932,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - if (!po->rollover) + rcu_read_lock(); + rollover = rcu_dereference(po->rollover); + if (rollover) { + rstats.tp_all = atomic_long_read(&rollover->num); + rstats.tp_huge = atomic_long_read(&rollover->num_huge); + rstats.tp_failed = atomic_long_read(&rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); + } + rcu_read_unlock(); + if (!rollover) return -EINVAL; - rstats.tp_all = atomic_long_read(&po->rollover->num); - rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); - rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; -- cgit v1.2.3-70-g09d2 From 6850d0f8b2542112629061808ed950b35eb982e4 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Thu, 19 Oct 2017 13:43:05 +1100 Subject: net/ncsi: Fix AEN HNCDSC packet length Correct the value of the HNCDSC AEN packet. Fixes: 7a82ecf4cfb85 "net/ncsi: NCSI AEN packet handler" Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 6898e7229285..f135938bf781 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -187,7 +187,7 @@ static struct ncsi_aen_handler { } ncsi_aen_handlers[] = { { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc }, { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr }, - { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc } + { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc } }; int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 0795fb2021f07969949f523ea33c39785bfae9d6 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Thu, 19 Oct 2017 13:43:06 +1100 Subject: net/ncsi: Stop monitor if channel times out or is inactive ncsi_channel_monitor() misses stopping the channel monitor in several places that it should, causing a WARN_ON_ONCE() to trigger when the monitor is re-started later, eg: [ 459.040000] WARNING: CPU: 0 PID: 1093 at net/ncsi/ncsi-manage.c:269 ncsi_start_channel_monitor+0x7c/0x90 [ 459.040000] CPU: 0 PID: 1093 Comm: kworker/0:3 Not tainted 4.10.17-gaca2fdd #140 [ 459.040000] Hardware name: ASpeed SoC [ 459.040000] Workqueue: events ncsi_dev_work [ 459.040000] [<80010094>] (unwind_backtrace) from [<8000d950>] (show_stack+0x20/0x24) [ 459.040000] [<8000d950>] (show_stack) from [<801dbf70>] (dump_stack+0x20/0x28) [ 459.040000] [<801dbf70>] (dump_stack) from [<80018d7c>] (__warn+0xe0/0x108) [ 459.040000] [<80018d7c>] (__warn) from [<80018e70>] (warn_slowpath_null+0x30/0x38) [ 459.040000] [<80018e70>] (warn_slowpath_null) from [<803f6a08>] (ncsi_start_channel_monitor+0x7c/0x90) [ 459.040000] [<803f6a08>] (ncsi_start_channel_monitor) from [<803f7664>] (ncsi_configure_channel+0xdc/0x5fc) [ 459.040000] [<803f7664>] (ncsi_configure_channel) from [<803f8160>] (ncsi_dev_work+0xac/0x474) [ 459.040000] [<803f8160>] (ncsi_dev_work) from [<8002d244>] (process_one_work+0x1e0/0x450) [ 459.040000] [<8002d244>] (process_one_work) from [<8002d510>] (worker_thread+0x5c/0x570) [ 459.040000] [<8002d510>] (worker_thread) from [<80033614>] (kthread+0x124/0x164) [ 459.040000] [<80033614>] (kthread) from [<8000a5e8>] (ret_from_fork+0x14/0x2c) This also updates the monitor instead of just returning if ncsi_xmit_cmd() fails to send the get-link-status command so that the monitor properly times out. Fixes: e6f44ed6d04d3 "net/ncsi: Package and channel management" Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index b6a449aa9d4b..b022deb39d31 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -202,11 +202,15 @@ static void ncsi_channel_monitor(unsigned long data) monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || chained) + if (!enabled || chained) { + ncsi_stop_channel_monitor(nc); return; + } if (state != NCSI_CHANNEL_INACTIVE && - state != NCSI_CHANNEL_ACTIVE) + state != NCSI_CHANNEL_ACTIVE) { + ncsi_stop_channel_monitor(nc); return; + } switch (monitor_state) { case NCSI_CHANNEL_MONITOR_START: @@ -217,12 +221,9 @@ static void ncsi_channel_monitor(unsigned long data) nca.type = NCSI_PKT_CMD_GLS; nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); - if (ret) { + if (ret) netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); - return; - } - break; case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; @@ -233,6 +234,8 @@ static void ncsi_channel_monitor(unsigned long data) ndp->flags |= NCSI_DEV_RESHUFFLE; } + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); -- cgit v1.2.3-70-g09d2 From 100ef01f3ea4badbee6479290a41f74abd0e523f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:07 +1100 Subject: net/ncsi: Disable HWA mode when no channels are found When there are no NCSI channels probed, HWA (Hardware Arbitration) mode is enabled. It's not correct because HWA depends on the fact: NCSI channels exist and all of them support HWA mode. This disables HWA when no channels are probed. Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index b022deb39d31..0966eff48ce7 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1005,12 +1005,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) struct ncsi_package *np; struct ncsi_channel *nc; unsigned int cap; + bool has_channel = false; /* The hardware arbitration is disabled if any one channel * doesn't support explicitly. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + has_channel = true; + cap = nc->caps[NCSI_CAP_GENERIC].cap; if (!(cap & NCSI_CAP_GENERIC_HWA) || (cap & NCSI_CAP_GENERIC_HWA_MASK) != @@ -1021,8 +1024,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) } } - ndp->flags |= NCSI_DEV_HWA; - return true; + if (has_channel) { + ndp->flags |= NCSI_DEV_HWA; + return true; + } + + ndp->flags &= ~NCSI_DEV_HWA; + return false; } static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) -- cgit v1.2.3-70-g09d2 From 52b4c8627f9f0d882e969967a207a27a80c9c753 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:08 +1100 Subject: net/ncsi: Enforce failover on link monitor timeout The NCSI channel has been configured to provide service if its link monitor timer is enabled, regardless of its state (inactive or active). So the timeout event on the link monitor indicates the out-of-service on that channel, for which a failover is needed. This sets NCSI_DEV_RESHUFFLE flag to enforce failover on link monitor timeout, regardless the channel's original state (inactive or active). Also, the link is put into "down" state to give the failing channel lowest priority when selecting for the active channel. The state of failing channel should be set to active in order for deinitialization and failover to be done. Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 0966eff48ce7..28c42b22b748 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -189,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_channel *nc = (struct ncsi_channel *)data; struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; + struct ncsi_channel_mode *ncm; struct ncsi_cmd_arg nca; bool enabled, chained; unsigned int monitor_state; @@ -228,20 +229,21 @@ static void ncsi_channel_monitor(unsigned long data) case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: - if (!(ndp->flags & NCSI_DEV_HWA) && - state == NCSI_CHANNEL_ACTIVE) { + if (!(ndp->flags & NCSI_DEV_HWA)) { ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; } ncsi_stop_channel_monitor(nc); + ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; + ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); - nc->state = NCSI_CHANNEL_INACTIVE; + nc->state = NCSI_CHANNEL_ACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); -- cgit v1.2.3-70-g09d2 From 0a90e251988ceedc528c8db98f25b051cf190f44 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 19 Oct 2017 13:43:09 +1100 Subject: net/ncsi: Fix length of GVI response packet The length of GVI (GetVersionInfo) response packet should be 40 instead of 36. This issue was found from /sys/kernel/debug/ncsi/eth0/stats. # ethtool --ncsi eth0 swstats : RESPONSE OK TIMEOUT ERROR ======================================= GVI 0 0 2 With this applied, no error reported on GVI response packets: # ethtool --ncsi eth0 swstats : RESPONSE OK TIMEOUT ERROR ======================================= GVI 2 0 0 Signed-off-by: Gavin Shan Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: David S. Miller --- net/ncsi/ncsi-rsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 265b9a892d41..927dad4759d1 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -959,7 +959,7 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf }, { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf }, { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc }, - { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi }, + { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi }, { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, -- cgit v1.2.3-70-g09d2 From b4562ca7925a3bedada87a3dd072dd5bad043288 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 19 Oct 2017 03:33:14 +0000 Subject: hv_sock: add locking in the open/close/release code paths Without the patch, when hvs_open_connection() hasn't completely established a connection (e.g. it has changed sk->sk_state to SS_CONNECTED, but hasn't inserted the sock into the connected queue), vsock_stream_connect() may see the sk_state change and return the connection to the userspace, and next when the userspace closes the connection quickly, hvs_release() may not see the connection in the connected queue; finally hvs_open_connection() inserts the connection into the queue, but we won't be able to purge the connection for ever. Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Vitaly Kuznetsov Cc: Cathy Avery Cc: Rolf Neugebauer Cc: Marcelo Cerri Signed-off-by: David S. Miller --- net/vmw_vsock/hyperv_transport.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 14ed5a344cdf..e21991fe883a 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -310,11 +310,15 @@ static void hvs_close_connection(struct vmbus_channel *chan) struct sock *sk = get_per_channel_state(chan); struct vsock_sock *vsk = vsock_sk(sk); + lock_sock(sk); + sk->sk_state = SS_UNCONNECTED; sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; sk->sk_state_change(sk); + + release_sock(sk); } static void hvs_open_connection(struct vmbus_channel *chan) @@ -344,6 +348,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (!sk) return; + lock_sock(sk); + if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || (!conn_from_host && sk->sk_state != SS_CONNECTING)) goto out; @@ -395,9 +401,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) vsock_insert_connected(vnew); - lock_sock(sk); vsock_enqueue_accept(sk, new); - release_sock(sk); } else { sk->sk_state = SS_CONNECTED; sk->sk_socket->state = SS_CONNECTED; @@ -410,6 +414,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) out: /* Release refcnt obtained when we called vsock_find_bound_socket() */ sock_put(sk); + + release_sock(sk); } static u32 hvs_get_local_cid(void) @@ -476,13 +482,21 @@ out: static void hvs_release(struct vsock_sock *vsk) { + struct sock *sk = sk_vsock(vsk); struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan = hvs->chan; + struct vmbus_channel *chan; + lock_sock(sk); + + sk->sk_state = SS_DISCONNECTING; + vsock_remove_sock(vsk); + + release_sock(sk); + + chan = hvs->chan; if (chan) hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); - vsock_remove_sock(vsk); } static void hvs_destruct(struct vsock_sock *vsk) -- cgit v1.2.3-70-g09d2 From 772e97b57a4aa00170ad505a40ffad31d987ce1d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 19 Oct 2017 13:31:28 +0200 Subject: geneve: Fix function matching VNI and tunnel ID on big-endian On big-endian machines, functions converting between tunnel ID and VNI use the three LSBs of tunnel ID storage to map VNI. The comparison function eq_tun_id_and_vni(), on the other hand, attempted to map the VNI from the three MSBs. Fix it by using the same check implemented on LE, which maps VNI from the three LSBs of tunnel ID. Fixes: 2e0b26e10352 ("geneve: Optimize geneve device lookup.") Signed-off-by: Stefano Brivio Reviewed-by: Jakub Sitnicki Signed-off-by: David S. Miller --- drivers/net/geneve.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index f6404074b7b0..ed51018a813e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -113,13 +113,7 @@ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) { -#ifdef __BIG_ENDIAN - return (vni[0] == tun_id[2]) && - (vni[1] == tun_id[1]) && - (vni[2] == tun_id[0]); -#else return !memcmp(vni, &tun_id[5], 3); -#endif } static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) -- cgit v1.2.3-70-g09d2 From 197df02cb3d3e969fb1d6fc11f5a634b7bfc2124 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 19 Oct 2017 14:22:17 +0200 Subject: udp: make some messages more descriptive In the UDP code there are two leftover error messages with very few meaning. Replace them with a more descriptive error message as some users reported them as "strange network error". Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e45177ceb0ee..806b298a3bdd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1061,7 +1061,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - net_dbg_ratelimited("cork app bug 2\n"); + net_dbg_ratelimited("socket already corked\n"); err = -EINVAL; goto out; } @@ -1144,7 +1144,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - net_dbg_ratelimited("udp cork app bug 3\n"); + net_dbg_ratelimited("cork failed\n"); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From a0c2baaf81bd53dc76fccdddc721ba7dbb62be21 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Fri, 20 Oct 2017 20:58:58 -0400 Subject: android: binder: Don't get mm from task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use binder_alloc struct's mm_struct rather than getting a reference to the mm struct through get_task_mm to avoid a potential deadlock between lru lock, task lock and dentry lock, since a thread can be holding the task lock and the dentry lock while trying to acquire the lru lock. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 22 +++++++++------------- drivers/android/binder_alloc.h | 1 - 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 064f5e31ec55..e12072b1d507 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -215,17 +215,12 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } } - if (!vma && need_mm) - mm = get_task_mm(alloc->tsk); + if (!vma && need_mm && mmget_not_zero(alloc->vma_vm_mm)) + mm = alloc->vma_vm_mm; if (mm) { down_write(&mm->mmap_sem); vma = alloc->vma; - if (vma && mm != alloc->vma_vm_mm) { - pr_err("%d: vma mm and task mm mismatch\n", - alloc->pid); - vma = NULL; - } } if (!vma && need_mm) { @@ -720,6 +715,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, barrier(); alloc->vma = vma; alloc->vma_vm_mm = vma->vm_mm; + mmgrab(alloc->vma_vm_mm); return 0; @@ -795,6 +791,8 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) vfree(alloc->buffer); } mutex_unlock(&alloc->mutex); + if (alloc->vma_vm_mm) + mmdrop(alloc->vma_vm_mm); binder_alloc_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d buffers %d, pages %d\n", @@ -889,7 +887,6 @@ int binder_alloc_get_allocated_count(struct binder_alloc *alloc) void binder_alloc_vma_close(struct binder_alloc *alloc) { WRITE_ONCE(alloc->vma, NULL); - WRITE_ONCE(alloc->vma_vm_mm, NULL); } /** @@ -926,9 +923,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; vma = alloc->vma; if (vma) { - mm = get_task_mm(alloc->tsk); - if (!mm) - goto err_get_task_mm_failed; + if (!mmget_not_zero(alloc->vma_vm_mm)) + goto err_mmget; + mm = alloc->vma_vm_mm; if (!down_write_trylock(&mm->mmap_sem)) goto err_down_write_mmap_sem_failed; } @@ -963,7 +960,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, err_down_write_mmap_sem_failed: mmput_async(mm); -err_get_task_mm_failed: +err_mmget: err_page_already_freed: mutex_unlock(&alloc->mutex); err_get_alloc_mutex_failed: @@ -1002,7 +999,6 @@ struct shrinker binder_shrinker = { */ void binder_alloc_init(struct binder_alloc *alloc) { - alloc->tsk = current->group_leader; alloc->pid = current->group_leader->pid; mutex_init(&alloc->mutex); INIT_LIST_HEAD(&alloc->buffers); diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index a3a3602c689c..2dd33b6df104 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -100,7 +100,6 @@ struct binder_lru_page { */ struct binder_alloc { struct mutex mutex; - struct task_struct *tsk; struct vm_area_struct *vma; struct mm_struct *vma_vm_mm; void *buffer; -- cgit v1.2.3-70-g09d2 From ae65c8510f3319dfb2114cc48d476b81232e27b3 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Fri, 20 Oct 2017 20:58:59 -0400 Subject: android: binder: Fix null ptr dereference in debug msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't access next->data in kernel debug message when the next buffer is null. Acked-by: Arve Hjønnevåg Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index e12072b1d507..c2819a3d58a6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -560,7 +560,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc, binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC, "%d: merge free, buffer %pK do not share page with %pK or %pK\n", alloc->pid, buffer->data, - prev->data, next->data); + prev->data, next ? next->data : NULL); binder_update_page_range(alloc, 0, buffer_start_page(buffer), buffer_start_page(buffer) + PAGE_SIZE, NULL); -- cgit v1.2.3-70-g09d2 From 65e665e68d097edfe667372f13d54f3e4edcb69c Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:53 +0300 Subject: net: aquantia: Reset nic statistics on interface up/down Internal statistics system on chip never gets reset until hardware reboot. This is quite inconvenient in terms of ethtool statistics usage. This patch implements incremental statistics update inside of service callback. Upon nic initialization, first request is done to fetch initial stat data, current collected stat data gets cleared. Internal statistics mailbox readout is improved to save space and increase readability Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 2 + drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 3 + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 + .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 69 +++++++++++++++++----- .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 16 ++++- 6 files changed, 75 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index bf9b3f020e10..3a8baaef053c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -163,6 +163,8 @@ struct aq_hw_ops { int (*hw_get_regs)(struct aq_hw_s *self, struct aq_hw_caps_s *aq_hw_caps, u32 *regs_buff); + int (*hw_update_stats)(struct aq_hw_s *self); + int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data, unsigned int *p_count); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0a5bb4114eb4..6b49dd658012 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -167,6 +167,9 @@ static void aq_nic_service_timer_cb(unsigned long param) self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, self->aq_nic_cfg.is_interrupt_moderation); + if (self->aq_hw_ops.hw_update_stats) + self->aq_hw_ops.hw_update_stats(self->aq_hw); + memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); for (i = AQ_DIMOF(self->aq_vec); i--;) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index c5a02df7a48b..b0747b2486b2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -885,6 +885,7 @@ static struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_a0_hw_rss_set, .hw_rss_hash_set = hw_atl_a0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, + .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 21784cc39dab..6f6e70aa1047 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -939,6 +939,7 @@ static struct aq_hw_ops hw_atl_ops_ = { .hw_rss_set = hw_atl_b0_hw_rss_set, .hw_rss_hash_set = hw_atl_b0_hw_rss_hash_set, .hw_get_regs = hw_atl_utils_hw_get_regs, + .hw_update_stats = hw_atl_utils_update_stats, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index bf734b32e44b..1fe016fc4bc7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -255,6 +255,15 @@ err_exit: return err; } +int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self, + struct hw_aq_atl_utils_mbox_header *pmbox) +{ + return hw_atl_utils_fw_downld_dwords(self, + PHAL_ATLANTIC->mbox_addr, + (u32 *)(void *)pmbox, + sizeof(*pmbox) / sizeof(u32)); +} + void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, struct hw_aq_atl_utils_mbox *pmbox) { @@ -267,9 +276,6 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, if (err < 0) goto err_exit; - if (pmbox != &PHAL_ATLANTIC->mbox) - memcpy(pmbox, &PHAL_ATLANTIC->mbox, sizeof(*pmbox)); - if (IS_CHIP_FEATURE(REVISION_A0)) { unsigned int mtu = self->aq_nic_cfg ? self->aq_nic_cfg->mtu : 1514U; @@ -299,17 +305,17 @@ void hw_atl_utils_mpi_set(struct aq_hw_s *self, { int err = 0; u32 transaction_id = 0; + struct hw_aq_atl_utils_mbox_header mbox; if (state == MPI_RESET) { - hw_atl_utils_mpi_read_stats(self, &PHAL_ATLANTIC->mbox); + hw_atl_utils_mpi_read_mbox(self, &mbox); - transaction_id = PHAL_ATLANTIC->mbox.transaction_id; + transaction_id = mbox.transaction_id; AQ_HW_WAIT_FOR(transaction_id != - (hw_atl_utils_mpi_read_stats - (self, &PHAL_ATLANTIC->mbox), - PHAL_ATLANTIC->mbox.transaction_id), - 1000U, 100U); + (hw_atl_utils_mpi_read_mbox(self, &mbox), + mbox.transaction_id), + 1000U, 100U); if (err < 0) goto err_exit; } @@ -492,16 +498,51 @@ int hw_atl_utils_hw_set_power(struct aq_hw_s *self, return 0; } +int hw_atl_utils_update_stats(struct aq_hw_s *self) +{ + struct hw_atl_s *hw_self = PHAL_ATLANTIC; + struct hw_aq_atl_utils_mbox mbox; + + if (!self->aq_link_status.mbps) + return 0; + + hw_atl_utils_mpi_read_stats(self, &mbox); + +#define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \ + mbox.stats._N_ - hw_self->last_stats._N_) + + AQ_SDELTA(uprc); + AQ_SDELTA(mprc); + AQ_SDELTA(bprc); + AQ_SDELTA(erpt); + + AQ_SDELTA(uptc); + AQ_SDELTA(mptc); + AQ_SDELTA(bptc); + AQ_SDELTA(erpr); + + AQ_SDELTA(ubrc); + AQ_SDELTA(ubtc); + AQ_SDELTA(mbrc); + AQ_SDELTA(mbtc); + AQ_SDELTA(bbrc); + AQ_SDELTA(bbtc); + AQ_SDELTA(dpc); + +#undef AQ_SDELTA + + memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats)); + + return 0; +} + int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, u64 *data, unsigned int *p_count) { - struct hw_atl_stats_s *stats = NULL; + struct hw_atl_s *hw_self = PHAL_ATLANTIC; + struct hw_atl_stats_s *stats = &hw_self->curr_stats; int i = 0; - hw_atl_utils_mpi_read_stats(self, &PHAL_ATLANTIC->mbox); - - stats = &PHAL_ATLANTIC->mbox.stats; - data[i] = stats->uprc + stats->mprc + stats->bprc; data[++i] = stats->uprc; data[++i] = stats->mprc; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index e0360a6b2202..2218bdb605a7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -115,16 +115,21 @@ struct __packed hw_aq_atl_utils_fw_rpc { }; }; -struct __packed hw_aq_atl_utils_mbox { +struct __packed hw_aq_atl_utils_mbox_header { u32 version; u32 transaction_id; - int error; + u32 error; +}; + +struct __packed hw_aq_atl_utils_mbox { + struct hw_aq_atl_utils_mbox_header header; struct hw_atl_stats_s stats; }; struct __packed hw_atl_s { struct aq_hw_s base; - struct hw_aq_atl_utils_mbox mbox; + struct hw_atl_stats_s last_stats; + struct hw_atl_stats_s curr_stats; u64 speed; u32 itr_tx; u32 itr_rx; @@ -170,6 +175,9 @@ enum hal_atl_utils_fw_state_e { void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p); +int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self, + struct hw_aq_atl_utils_mbox_header *pmbox); + void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self, struct hw_aq_atl_utils_mbox *pmbox); @@ -199,6 +207,8 @@ int hw_atl_utils_hw_deinit(struct aq_hw_s *self); int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version); +int hw_atl_utils_update_stats(struct aq_hw_s *self); + int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, u64 *data, unsigned int *p_count); -- cgit v1.2.3-70-g09d2 From 5d8d84e91d7432cd206b27ad791a11220689ac53 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:54 +0300 Subject: net: aquantia: Add queue restarts stats counter Queue stat strings are cleaned up, duplicate stat name strings removed, queue restarts counter added Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 92 ++++++++-------------- drivers/net/ethernet/aquantia/atlantic/aq_vec.c | 3 + 2 files changed, 37 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a761e91471df..3eab4089e91a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -56,10 +56,6 @@ aq_ethtool_set_link_ksettings(struct net_device *ndev, return aq_nic_set_link_ksettings(aq_nic, cmd); } -/* there "5U" is number of queue[#] stats lines (InPackets+...+InErrors) */ -static const unsigned int aq_ethtool_stat_queue_lines = 5U; -static const unsigned int aq_ethtool_stat_queue_chars = - 5U * ETH_GSTRING_LEN; static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "InPackets", "InUCast", @@ -83,56 +79,26 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "InOctetsDma", "OutOctetsDma", "InDroppedDma", - "Queue[0] InPackets", - "Queue[0] OutPackets", - "Queue[0] InJumboPackets", - "Queue[0] InLroPackets", - "Queue[0] InErrors", - "Queue[1] InPackets", - "Queue[1] OutPackets", - "Queue[1] InJumboPackets", - "Queue[1] InLroPackets", - "Queue[1] InErrors", - "Queue[2] InPackets", - "Queue[2] OutPackets", - "Queue[2] InJumboPackets", - "Queue[2] InLroPackets", - "Queue[2] InErrors", - "Queue[3] InPackets", - "Queue[3] OutPackets", - "Queue[3] InJumboPackets", - "Queue[3] InLroPackets", - "Queue[3] InErrors", - "Queue[4] InPackets", - "Queue[4] OutPackets", - "Queue[4] InJumboPackets", - "Queue[4] InLroPackets", - "Queue[4] InErrors", - "Queue[5] InPackets", - "Queue[5] OutPackets", - "Queue[5] InJumboPackets", - "Queue[5] InLroPackets", - "Queue[5] InErrors", - "Queue[6] InPackets", - "Queue[6] OutPackets", - "Queue[6] InJumboPackets", - "Queue[6] InLroPackets", - "Queue[6] InErrors", - "Queue[7] InPackets", - "Queue[7] OutPackets", - "Queue[7] InJumboPackets", - "Queue[7] InLroPackets", - "Queue[7] InErrors", +}; + +static const char aq_ethtool_queue_stat_names[][ETH_GSTRING_LEN] = { + "Queue[%d] InPackets", + "Queue[%d] OutPackets", + "Queue[%d] Restarts", + "Queue[%d] InJumboPackets", + "Queue[%d] InLroPackets", + "Queue[%d] InErrors", }; static void aq_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); -/* ASSERT: Need add lines to aq_ethtool_stat_names if AQ_CFG_VECS_MAX > 8 */ - BUILD_BUG_ON(AQ_CFG_VECS_MAX > 8); - memset(data, 0, ARRAY_SIZE(aq_ethtool_stat_names) * sizeof(u64)); + memset(data, 0, (ARRAY_SIZE(aq_ethtool_stat_names) + + ARRAY_SIZE(aq_ethtool_queue_stat_names) * + cfg->vecs) * sizeof(u64)); aq_nic_get_stats(aq_nic, data); } @@ -154,8 +120,8 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev, strlcpy(drvinfo->bus_info, pdev ? pci_name(pdev) : "", sizeof(drvinfo->bus_info)); - drvinfo->n_stats = ARRAY_SIZE(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * aq_ethtool_stat_queue_lines; + drvinfo->n_stats = ARRAY_SIZE(aq_ethtool_stat_names) + + cfg->vecs * ARRAY_SIZE(aq_ethtool_queue_stat_names); drvinfo->testinfo_len = 0; drvinfo->regdump_len = regs_count; drvinfo->eedump_len = 0; @@ -164,14 +130,25 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev, static void aq_ethtool_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + int i, si; struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); - - if (stringset == ETH_SS_STATS) - memcpy(data, *aq_ethtool_stat_names, - sizeof(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * - aq_ethtool_stat_queue_chars); + u8 *p = data; + + if (stringset == ETH_SS_STATS) { + memcpy(p, *aq_ethtool_stat_names, + sizeof(aq_ethtool_stat_names)); + p = p + sizeof(aq_ethtool_stat_names); + for (i = 0; i < cfg->vecs; i++) { + for (si = 0; + si < ARRAY_SIZE(aq_ethtool_queue_stat_names); + si++) { + snprintf(p, ETH_GSTRING_LEN, + aq_ethtool_queue_stat_names[si], i); + p += ETH_GSTRING_LEN; + } + } + } } static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset) @@ -182,9 +159,8 @@ static int aq_ethtool_get_sset_count(struct net_device *ndev, int stringset) switch (stringset) { case ETH_SS_STATS: - ret = ARRAY_SIZE(aq_ethtool_stat_names) - - (AQ_CFG_VECS_MAX - cfg->vecs) * - aq_ethtool_stat_queue_lines; + ret = ARRAY_SIZE(aq_ethtool_stat_names) + + cfg->vecs * ARRAY_SIZE(aq_ethtool_queue_stat_names); break; default: ret = -EOPNOTSUPP; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index 305ff8ffac2c..5fecc9a099ef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -373,8 +373,11 @@ int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count) memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); aq_vec_add_stats(self, &stats_rx, &stats_tx); + /* This data should mimic aq_ethtool_queue_stat_names structure + */ data[count] += stats_rx.packets; data[++count] += stats_tx.packets; + data[++count] += stats_tx.queue_restarts; data[++count] += stats_rx.jumbo_packets; data[++count] += stats_rx.lro_packets; data[++count] += stats_rx.errors; -- cgit v1.2.3-70-g09d2 From 93d87b8fbe6cf17f0ad9552a934b5a6623ccd7d1 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:55 +0300 Subject: net: aquantia: Fixed transient link up/down/up notification When doing ifconfig down/up, driver did not reported carrier_off neither in nic_stop nor in nic_start. That caused link to be visible as "up" during couple of seconds immediately after "ifconfig up". Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 6b49dd658012..9378b4877783 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -337,6 +337,7 @@ struct aq_nic_s *aq_nic_alloc_hot(struct net_device *ndev) } if (netif_running(ndev)) netif_tx_disable(ndev); + netif_carrier_off(self->ndev); for (self->aq_vecs = 0; self->aq_vecs < self->aq_nic_cfg.vecs; self->aq_vecs++) { @@ -902,6 +903,7 @@ int aq_nic_stop(struct aq_nic_s *self) unsigned int i = 0U; netif_tx_disable(self->ndev); + netif_carrier_off(self->ndev); del_timer_sync(&self->service_timer); -- cgit v1.2.3-70-g09d2 From 4c8bb609d304df72858aa2e5e74abab5246bd24b Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:56 +0300 Subject: net: aquantia: Limit number of MSIX irqs to the number of cpus There is no much practical use from having MSIX vectors more that number of cpus, thus cap this first with preconfigured limit, then with number of cpus online. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 4c6c882c6a1c..727f0a446ef1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -85,6 +85,7 @@ int aq_pci_func_init(struct aq_pci_func_s *self) int err = 0; unsigned int bar = 0U; unsigned int port = 0U; + unsigned int numvecs = 0U; err = pci_enable_device(self->pdev); if (err < 0) @@ -142,10 +143,12 @@ int aq_pci_func_init(struct aq_pci_func_s *self) } } - /*enable interrupts */ + numvecs = min((u8)AQ_CFG_VECS_DEF, self->aq_hw_caps.msix_irqs); + numvecs = min(numvecs, num_online_cpus()); + + /* enable interrupts */ #if !AQ_CFG_FORCE_LEGACY_INT - err = pci_alloc_irq_vectors(self->pdev, self->aq_hw_caps.msix_irqs, - self->aq_hw_caps.msix_irqs, PCI_IRQ_MSIX); + err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, PCI_IRQ_MSIX); if (err < 0) { err = pci_alloc_irq_vectors(self->pdev, 1, 1, @@ -153,7 +156,7 @@ int aq_pci_func_init(struct aq_pci_func_s *self) if (err < 0) goto err_exit; } -#endif +#endif /* AQ_CFG_FORCE_LEGACY_INT */ /* net device init */ for (port = 0; port < self->ports; ++port) { -- cgit v1.2.3-70-g09d2 From 6849540adc0bcc8c648d7c11be169d2ca267fbca Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:57 +0300 Subject: net: aquantia: mmio unmap was not performed on driver removal That may lead to mmio resource leakage. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 727f0a446ef1..cadaa646c89f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -268,6 +268,9 @@ void aq_pci_func_free(struct aq_pci_func_s *self) aq_nic_ndev_free(self->port[port]); } + if (self->mmio) + iounmap(self->mmio); + kfree(self); err_exit:; -- cgit v1.2.3-70-g09d2 From b82ee71a86b0ea66da79a91959d800ffb696a5cb Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:58 +0300 Subject: net: aquantia: Enable coalescing management via ethtool interface Aquantia NIC allows both TX and RX interrupt throttle rate (ITR) management, but this was used in a very limited way via predefined values. This patch allows to setup ITR default values via module command line arguments and via standard ethtool coalescing settings. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 8 ++- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 65 ++++++++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 3 +- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 36 +++++++--- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 4 +- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 20 +++--- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 76 ++++++++++++---------- .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 3 + .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 2 - 9 files changed, 155 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 0fdaaa643073..57e796870595 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -22,8 +22,12 @@ #define AQ_CFG_FORCE_LEGACY_INT 0U -#define AQ_CFG_IS_INTERRUPT_MODERATION_DEF 1U -#define AQ_CFG_INTERRUPT_MODERATION_RATE_DEF 0xFFFFU +#define AQ_CFG_INTERRUPT_MODERATION_OFF 0 +#define AQ_CFG_INTERRUPT_MODERATION_ON 1 +#define AQ_CFG_INTERRUPT_MODERATION_AUTO 0xFFFFU + +#define AQ_CFG_INTERRUPT_MODERATION_USEC_MAX (0x1FF * 2) + #define AQ_CFG_IRQ_MASK 0x1FFU #define AQ_CFG_VECS_MAX 8U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 3eab4089e91a..d5e99b468870 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -221,6 +221,69 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev, return err; } +int aq_ethtool_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); + + if (cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON || + cfg->itr == AQ_CFG_INTERRUPT_MODERATION_AUTO) { + coal->rx_coalesce_usecs = cfg->rx_itr; + coal->tx_coalesce_usecs = cfg->tx_itr; + coal->rx_max_coalesced_frames = 0; + coal->tx_max_coalesced_frames = 0; + } else { + coal->rx_coalesce_usecs = 0; + coal->tx_coalesce_usecs = 0; + coal->rx_max_coalesced_frames = 1; + coal->tx_max_coalesced_frames = 1; + } + return 0; +} + +int aq_ethtool_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) +{ + struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); + + /* This is not yet supported + */ + if (coal->use_adaptive_rx_coalesce || coal->use_adaptive_tx_coalesce) + return -EOPNOTSUPP; + + /* Atlantic only supports timing based coalescing + */ + if (coal->rx_max_coalesced_frames > 1 || + coal->rx_coalesce_usecs_irq || + coal->rx_max_coalesced_frames_irq) + return -EOPNOTSUPP; + + if (coal->tx_max_coalesced_frames > 1 || + coal->tx_coalesce_usecs_irq || + coal->tx_max_coalesced_frames_irq) + return -EOPNOTSUPP; + + /* We do not support frame counting. Check this + */ + if (!(coal->rx_max_coalesced_frames == !coal->rx_coalesce_usecs)) + return -EOPNOTSUPP; + if (!(coal->tx_max_coalesced_frames == !coal->tx_coalesce_usecs)) + return -EOPNOTSUPP; + + if (coal->rx_coalesce_usecs > AQ_CFG_INTERRUPT_MODERATION_USEC_MAX || + coal->tx_coalesce_usecs > AQ_CFG_INTERRUPT_MODERATION_USEC_MAX) + return -EINVAL; + + cfg->itr = AQ_CFG_INTERRUPT_MODERATION_ON; + + cfg->rx_itr = coal->rx_coalesce_usecs; + cfg->tx_itr = coal->tx_coalesce_usecs; + + return aq_nic_update_interrupt_moderation_settings(aq_nic); +} + const struct ethtool_ops aq_ethtool_ops = { .get_link = aq_ethtool_get_link, .get_regs_len = aq_ethtool_get_regs_len, @@ -235,4 +298,6 @@ const struct ethtool_ops aq_ethtool_ops = { .get_ethtool_stats = aq_ethtool_stats, .get_link_ksettings = aq_ethtool_get_link_ksettings, .set_link_ksettings = aq_ethtool_set_link_ksettings, + .get_coalesce = aq_ethtool_get_coalesce, + .set_coalesce = aq_ethtool_set_coalesce, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 3a8baaef053c..0207927dc8a6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -151,8 +151,7 @@ struct aq_hw_ops { [ETH_ALEN], u32 count); - int (*hw_interrupt_moderation_set)(struct aq_hw_s *self, - bool itr_enabled); + int (*hw_interrupt_moderation_set)(struct aq_hw_s *self); int (*hw_rss_set)(struct aq_hw_s *self, struct aq_rss_parameters *rss_params); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 9378b4877783..483e97691eea 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -16,6 +16,7 @@ #include "aq_pci_func.h" #include "aq_nic_internal.h" +#include #include #include #include @@ -24,6 +25,18 @@ #include #include +static unsigned int aq_itr = AQ_CFG_INTERRUPT_MODERATION_AUTO; +module_param_named(aq_itr, aq_itr, uint, 0644); +MODULE_PARM_DESC(aq_itr, "Interrupt throttling mode"); + +static unsigned int aq_itr_tx; +module_param_named(aq_itr_tx, aq_itr_tx, uint, 0644); +MODULE_PARM_DESC(aq_itr_tx, "TX interrupt throttle rate"); + +static unsigned int aq_itr_rx; +module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644); +MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate"); + static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) { struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; @@ -61,9 +74,9 @@ static void aq_nic_cfg_init_defaults(struct aq_nic_s *self) cfg->is_polling = AQ_CFG_IS_POLLING_DEF; - cfg->is_interrupt_moderation = AQ_CFG_IS_INTERRUPT_MODERATION_DEF; - cfg->itr = cfg->is_interrupt_moderation ? - AQ_CFG_INTERRUPT_MODERATION_RATE_DEF : 0U; + cfg->itr = aq_itr; + cfg->tx_itr = aq_itr_tx; + cfg->rx_itr = aq_itr_rx; cfg->is_rss = AQ_CFG_IS_RSS_DEF; cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF; @@ -126,10 +139,12 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) if (err) return err; - if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) + if (self->link_status.mbps != self->aq_hw->aq_link_status.mbps) { pr_info("%s: link change old %d new %d\n", AQ_CFG_DRV_NAME, self->link_status.mbps, self->aq_hw->aq_link_status.mbps); + aq_nic_update_interrupt_moderation_settings(self); + } self->link_status = self->aq_hw->aq_link_status; if (!netif_carrier_ok(self->ndev) && self->link_status.mbps) { @@ -164,9 +179,6 @@ static void aq_nic_service_timer_cb(unsigned long param) if (err) goto err_exit; - self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, - self->aq_nic_cfg.is_interrupt_moderation); - if (self->aq_hw_ops.hw_update_stats) self->aq_hw_ops.hw_update_stats(self->aq_hw); @@ -425,9 +437,8 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - err = self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw, - self->aq_nic_cfg.is_interrupt_moderation); - if (err < 0) + err = aq_nic_update_interrupt_moderation_settings(self); + if (err) goto err_exit; setup_timer(&self->service_timer, &aq_nic_service_timer_cb, (unsigned long)self); @@ -649,6 +660,11 @@ err_exit: return err; } +int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self) +{ + return self->aq_hw_ops.hw_interrupt_moderation_set(self->aq_hw); +} + int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags) { int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 0ddd556ff901..4309983acdd6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -40,6 +40,8 @@ struct aq_nic_cfg_s { u32 vecs; /* vecs==allocated irqs */ u32 irq_type; u32 itr; + u16 rx_itr; + u16 tx_itr; u32 num_rss_queues; u32 mtu; u32 ucp_0x364; @@ -49,7 +51,6 @@ struct aq_nic_cfg_s { u16 is_mc_list_enabled; u16 mc_list_count; bool is_autoneg; - bool is_interrupt_moderation; bool is_polling; bool is_rss; bool is_lro; @@ -104,5 +105,6 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self); u32 aq_nic_get_fw_version(struct aq_nic_s *self); int aq_nic_change_pm_state(struct aq_nic_s *self, pm_message_t *pm_msg); +int aq_nic_update_interrupt_moderation_settings(struct aq_nic_s *self); #endif /* AQ_NIC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index b0747b2486b2..07b3c49a16a4 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -765,24 +765,23 @@ err_exit: return err; } -static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, - bool itr_enabled) +static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self) { unsigned int i = 0U; + u32 itr_rx; - if (itr_enabled && self->aq_nic_cfg->itr) { - if (self->aq_nic_cfg->itr != 0xFFFFU) { + if (self->aq_nic_cfg->itr) { + if (self->aq_nic_cfg->itr != AQ_CFG_INTERRUPT_MODERATION_AUTO) { u32 itr_ = (self->aq_nic_cfg->itr >> 1); itr_ = min(AQ_CFG_IRQ_MASK, itr_); - PHAL_ATLANTIC_A0->itr_rx = 0x80000000U | - (itr_ << 0x10); + itr_rx = 0x80000000U | (itr_ << 0x10); } else { u32 n = 0xFFFFU & aq_hw_read_reg(self, 0x00002A00U); if (n < self->aq_link_status.mbps) { - PHAL_ATLANTIC_A0->itr_rx = 0U; + itr_rx = 0U; } else { static unsigned int hw_timers_tbl_[] = { 0x01CU, /* 10Gbit */ @@ -797,8 +796,7 @@ static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, hw_atl_utils_mbps_2_speed_index( self->aq_link_status.mbps); - PHAL_ATLANTIC_A0->itr_rx = - 0x80000000U | + itr_rx = 0x80000000U | (hw_timers_tbl_[speed_index] << 0x10U); } @@ -806,11 +804,11 @@ static int hw_atl_a0_hw_interrupt_moderation_set(struct aq_hw_s *self, aq_hw_write_reg(self, 0x00002A00U, 0x8D000000U); } } else { - PHAL_ATLANTIC_A0->itr_rx = 0U; + itr_rx = 0U; } for (i = HW_ATL_A0_RINGS_MAX; i--;) - reg_irq_thr_set(self, PHAL_ATLANTIC_A0->itr_rx, i); + reg_irq_thr_set(self, itr_rx, i); return aq_hw_err_from_flags(self); } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 6f6e70aa1047..11f7e71bf448 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -788,31 +788,37 @@ err_exit: return err; } -static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self, - bool itr_enabled) +static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) { unsigned int i = 0U; + u32 itr_tx = 2U; + u32 itr_rx = 2U; - if (itr_enabled && self->aq_nic_cfg->itr) { + switch (self->aq_nic_cfg->itr) { + case AQ_CFG_INTERRUPT_MODERATION_ON: + case AQ_CFG_INTERRUPT_MODERATION_AUTO: tdm_tx_desc_wr_wb_irq_en_set(self, 0U); tdm_tdm_intr_moder_en_set(self, 1U); rdm_rx_desc_wr_wb_irq_en_set(self, 0U); rdm_rdm_intr_moder_en_set(self, 1U); - PHAL_ATLANTIC_B0->itr_tx = 2U; - PHAL_ATLANTIC_B0->itr_rx = 2U; + if (self->aq_nic_cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON) { + /* HW timers are in 2us units */ + int tx_max_timer = self->aq_nic_cfg->tx_itr / 2; + int tx_min_timer = tx_max_timer / 2; - if (self->aq_nic_cfg->itr != 0xFFFFU) { - unsigned int max_timer = self->aq_nic_cfg->itr / 2U; - unsigned int min_timer = self->aq_nic_cfg->itr / 32U; + int rx_max_timer = self->aq_nic_cfg->rx_itr / 2; + int rx_min_timer = rx_max_timer / 2; - max_timer = min(0x1FFU, max_timer); - min_timer = min(0xFFU, min_timer); + tx_max_timer = min(HW_ATL_INTR_MODER_MAX, tx_max_timer); + tx_min_timer = min(HW_ATL_INTR_MODER_MIN, tx_min_timer); + rx_max_timer = min(HW_ATL_INTR_MODER_MAX, rx_max_timer); + rx_min_timer = min(HW_ATL_INTR_MODER_MIN, rx_min_timer); - PHAL_ATLANTIC_B0->itr_tx |= min_timer << 0x8U; - PHAL_ATLANTIC_B0->itr_tx |= max_timer << 0x10U; - PHAL_ATLANTIC_B0->itr_rx |= min_timer << 0x8U; - PHAL_ATLANTIC_B0->itr_rx |= max_timer << 0x10U; + itr_tx |= tx_min_timer << 0x8U; + itr_tx |= tx_max_timer << 0x10U; + itr_rx |= rx_min_timer << 0x8U; + itr_rx |= rx_max_timer << 0x10U; } else { static unsigned int hw_atl_b0_timers_table_tx_[][2] = { {0xffU, 0xffU}, /* 10Gbit */ @@ -836,34 +842,36 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self, hw_atl_utils_mbps_2_speed_index( self->aq_link_status.mbps); - PHAL_ATLANTIC_B0->itr_tx |= - hw_atl_b0_timers_table_tx_[speed_index] - [0] << 0x8U; /* set min timer value */ - PHAL_ATLANTIC_B0->itr_tx |= - hw_atl_b0_timers_table_tx_[speed_index] - [1] << 0x10U; /* set max timer value */ - - PHAL_ATLANTIC_B0->itr_rx |= - hw_atl_b0_timers_table_rx_[speed_index] - [0] << 0x8U; /* set min timer value */ - PHAL_ATLANTIC_B0->itr_rx |= - hw_atl_b0_timers_table_rx_[speed_index] - [1] << 0x10U; /* set max timer value */ + /* Update user visible ITR settings */ + self->aq_nic_cfg->tx_itr = hw_atl_b0_timers_table_tx_ + [speed_index][1] * 2; + self->aq_nic_cfg->rx_itr = hw_atl_b0_timers_table_rx_ + [speed_index][1] * 2; + + itr_tx |= hw_atl_b0_timers_table_tx_ + [speed_index][0] << 0x8U; + itr_tx |= hw_atl_b0_timers_table_tx_ + [speed_index][1] << 0x10U; + + itr_rx |= hw_atl_b0_timers_table_rx_ + [speed_index][0] << 0x8U; + itr_rx |= hw_atl_b0_timers_table_rx_ + [speed_index][1] << 0x10U; } - } else { + break; + case AQ_CFG_INTERRUPT_MODERATION_OFF: tdm_tx_desc_wr_wb_irq_en_set(self, 1U); tdm_tdm_intr_moder_en_set(self, 0U); rdm_rx_desc_wr_wb_irq_en_set(self, 1U); rdm_rdm_intr_moder_en_set(self, 0U); - PHAL_ATLANTIC_B0->itr_tx = 0U; - PHAL_ATLANTIC_B0->itr_rx = 0U; + itr_tx = 0U; + itr_rx = 0U; + break; } for (i = HW_ATL_B0_RINGS_MAX; i--;) { - reg_tx_intr_moder_ctrl_set(self, - PHAL_ATLANTIC_B0->itr_tx, i); - reg_rx_intr_moder_ctrl_set(self, - PHAL_ATLANTIC_B0->itr_rx, i); + reg_tx_intr_moder_ctrl_set(self, itr_tx, i); + reg_rx_intr_moder_ctrl_set(self, itr_rx, i); } return aq_hw_err_from_flags(self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index fcf89e25a773..9aa2c6edfca2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -139,6 +139,9 @@ #define HW_ATL_B0_FW_VER_EXPECTED 0x01050006U +#define HW_ATL_INTR_MODER_MAX 0x1FF +#define HW_ATL_INTR_MODER_MIN 0xFF + /* Hardware tx descriptor */ struct __packed hw_atl_txd_s { u64 buf_addr; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index 2218bdb605a7..c99cc690e425 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -131,8 +131,6 @@ struct __packed hw_atl_s { struct hw_atl_stats_s last_stats; struct hw_atl_stats_s curr_stats; u64 speed; - u32 itr_tx; - u32 itr_rx; unsigned int chip_features; u32 fw_ver_actual; atomic_t dpc; -- cgit v1.2.3-70-g09d2 From 417a3ae4b14909439bb49790f90201f450399845 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 19 Oct 2017 18:23:59 +0300 Subject: net: aquantia: Bad udp rate on default interrupt coalescing Default Tx rates cause very long ISR delays on Tx. 0xff is 510us delay, giving only ~ 2000 interrupts per seconds for Tx rings cleanup. With these settings udp tx rate was never higher than ~800Mbps on a single stream. Changing min delay to 0xF makes it way better with ~6Gbps TCP stream performance is almost unaffected by this change, since LSO optimizations play important role. CPU load is affected insignificantly by this change. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 11f7e71bf448..ec68c20efcbd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -821,12 +821,12 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) itr_rx |= rx_max_timer << 0x10U; } else { static unsigned int hw_atl_b0_timers_table_tx_[][2] = { - {0xffU, 0xffU}, /* 10Gbit */ - {0xffU, 0x1ffU}, /* 5Gbit */ - {0xffU, 0x1ffU}, /* 5Gbit 5GS */ - {0xffU, 0x1ffU}, /* 2.5Gbit */ - {0xffU, 0x1ffU}, /* 1Gbit */ - {0xffU, 0x1ffU}, /* 100Mbit */ + {0xfU, 0xffU}, /* 10Gbit */ + {0xfU, 0x1ffU}, /* 5Gbit */ + {0xfU, 0x1ffU}, /* 5Gbit 5GS */ + {0xfU, 0x1ffU}, /* 2.5Gbit */ + {0xfU, 0x1ffU}, /* 1Gbit */ + {0xfU, 0x1ffU}, /* 100Mbit */ }; static unsigned int hw_atl_b0_timers_table_rx_[][2] = { -- cgit v1.2.3-70-g09d2 From 1f7c70d6b2bc5de301f30456621e1161fddf4242 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Oct 2017 16:06:52 +0200 Subject: cpu/hotplug: Reset node state after operation The recent rework of the cpu hotplug internals changed the usage of the per cpu state->node field, but missed to clean it up after usage. So subsequent hotplug operations use the stale pointer from a previous operation and hand it into the callback functions. The callbacks then dereference a pointer which either belongs to a different facility or points to freed and potentially reused memory. In either case data corruption and crashes are the obvious consequence. Reset the node and the last pointers in the per cpu state to NULL after the operation which set them has completed. Fixes: 96abb968549c ("smp/hotplug: Allow external multi-instance rollback") Reported-by: Tvrtko Ursulin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Boris Ostrovsky Cc: "Paul E. McKenney" Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1710211606130.3213@nanos --- kernel/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/cpu.c b/kernel/cpu.c index d851df22f5c5..04892a82f6ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, __cpuhp_kick_ap(st); } + /* + * Clean up the leftovers so the next hotplug operation wont use stale + * data. + */ + st->node = st->last = NULL; return ret; } -- cgit v1.2.3-70-g09d2 From b16918a5fd87983d421a7e2241a1314e877c4ea2 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Thu, 19 Oct 2017 16:51:44 +0100 Subject: hwmon: (da9052) Increase sample rate when using TSI The TSI channel, which is usually used for touchscreen support, but can be used as 4 general purpose ADCs. When used as a touchscreen interface the touchscreen driver switches the device into 1ms sampling mode (rather than the default 10ms economy mode) as recommended by the manufacturer. When using the TSI channels as a general purpose ADC we are currently not doing this and testing suggests that this can result in ADC timeouts: [ 5827.198289] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5827.728293] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5993.808335] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5994.328441] da9052 spi2.0: timeout waiting for ADC conversion interrupt [ 5994.848291] da9052 spi2.0: timeout waiting for ADC conversion interrupt Switching to the 1ms timing resolves this issue. Fixes: 4f16cab19a3d5 ("hwmon: da9052: Add support for TSI channel") Signed-off-by: Martyn Welch Acked-by: Steve Twiss Signed-off-by: Guenter Roeck --- drivers/hwmon/da9052-hwmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c index 97a62f5b9ea4..a973eb6a2890 100644 --- a/drivers/hwmon/da9052-hwmon.c +++ b/drivers/hwmon/da9052-hwmon.c @@ -477,6 +477,11 @@ static int da9052_hwmon_probe(struct platform_device *pdev) /* disable touchscreen features */ da9052_reg_write(hwmon->da9052, DA9052_TSI_CONT_A_REG, 0x00); + /* Sample every 1ms */ + da9052_reg_update(hwmon->da9052, DA9052_ADC_CONT_REG, + DA9052_ADCCONT_ADCMODE, + DA9052_ADCCONT_ADCMODE); + err = da9052_request_irq(hwmon->da9052, DA9052_IRQ_TSIREADY, "tsiready-irq", da9052_tsi_datardy_irq, hwmon); -- cgit v1.2.3-70-g09d2 From 8b95f4f730cba02ef6febbdc4ca7e55ca045b00e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 20 Oct 2017 15:07:41 +0800 Subject: drm/amd/powerplay: fix uninitialized variable refresh_rate was not initialized when program display gap. this patch can fix vce ring test failed when do S3 on Polaris10. bug: https://bugs.freedesktop.org/show_bug.cgi?id=103102 bug: https://bugzilla.kernel.org/show_bug.cgi?id=196615 Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c2743233ba10..b526f49be65d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -830,7 +830,7 @@ uint32_t smu7_get_xclk(struct pp_hwmgr *hwmgr) { uint32_t reference_clock, tmp; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; @@ -3948,10 +3948,9 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) uint32_t ref_clock; uint32_t refresh_rate = 0; struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info; + struct cgs_mode_info mode_info = {0}; info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); num_active_displays = info.display_count; @@ -3967,6 +3966,7 @@ static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); -- cgit v1.2.3-70-g09d2 From 8695a5395661fbb4a4f26c97f801f3800ae4754e Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 19 Oct 2017 09:03:52 -0700 Subject: bpf: devmap fix arithmetic overflow in bitmap_size calculation An integer overflow is possible in dev_map_bitmap_size() when calculating the BITS_TO_LONG logic which becomes, after macro replacement, (((n) + (d) - 1)/ (d)) where 'n' is a __u32 and 'd' is (8 * sizeof(long)). To avoid overflow cast to u64 before arithmetic. Reported-by: Richard Weinberger Acked-by: Daniel Borkmann Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 52e0548ba548..e745d6a88224 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list); static u64 dev_map_bitmap_size(const union bpf_attr *attr) { - return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); + return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); } static struct bpf_map *dev_map_alloc(union bpf_attr *attr) -- cgit v1.2.3-70-g09d2 From fb2a311a31d3457fe8c3ee16f5609877e2ead9f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:21 +0200 Subject: bpf: fix off by one for range markings with L{T, E} patterns During review I noticed that the current logic for direct packet access marking in check_cond_jmp_op() has an off by one for the upper right range border when marking in find_good_pkt_pointers() with BPF_JLT and BPF_JLE. It's not really harmful given access up to pkt_end is always safe, but we should nevertheless correct the range marking before it becomes ABI. If pkt_data' denotes a pkt_data derived pointer (pkt_data + X), then for pkt_data' < pkt_end in the true branch as well as for pkt_end <= pkt_data' in the false branch we mark the range with X although it should really be X - 1 in these cases. For example, X could be pkt_end - pkt_data, then when testing for pkt_data' < pkt_end the verifier simulation cannot deduce that a byte load of pkt_data' - 1 would succeed in this branch. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20f3889c006e..49cb5ad14746 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2430,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } static void find_good_pkt_pointers(struct bpf_verifier_state *state, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + bool range_right_open) { struct bpf_reg_state *regs = state->regs, *reg; + u16 new_range; int i; - if (dst_reg->off < 0) + if (dst_reg->off < 0 || + (dst_reg->off == 0 && range_right_open)) /* This doesn't give us any range */ return; @@ -2446,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, */ return; - /* LLVM can generate four kind of checks: + new_range = dst_reg->off; + if (range_right_open) + new_range--; + + /* Examples for register markings: * - * Type 1/2: + * pkt_data in dst register: * * r2 = r3; * r2 += 8; @@ -2465,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r2=pkt(id=n,off=8,r=0) * r3=pkt(id=n,off=0,r=0) * - * Type 3/4: + * pkt_data in src register: * * r2 = r3; * r2 += 8; @@ -2483,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r3=pkt(id=n,off=0,r=0) * * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access. + * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) + * and [r3, r3 + 8-1) respectively is safe to access depending on + * the check. */ /* If our ids match, then we must have the same max_value. And we @@ -2494,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) /* keep the maximum range already checked */ - regs[i].range = max_t(u16, regs[i].range, dst_reg->off); + regs[i].range = max(regs[i].range, new_range); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) - reg->range = max_t(u16, reg->range, dst_reg->off); + reg->range = max(reg->range, new_range); } } @@ -2865,19 +2874,19 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(this_branch, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(other_branch, dst_reg); + find_good_pkt_pointers(other_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; -- cgit v1.2.3-70-g09d2 From 0fd4759c5515b7f2297d7fee5c45e5d9dd733001 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:22 +0200 Subject: bpf: fix pattern matches for direct packet access Alexander had a test program with direct packet access, where the access test was in the form of data + X > data_end. In an unrelated change to the program LLVM decided to swap the branches and emitted code for the test in form of data + X <= data_end. We hadn't seen these being generated previously, thus verifier would reject the program. Therefore, fix up the verifier to detect all test cases, so we don't run into such issues in the future. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Reported-by: Alexander Alemayhu Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 49cb5ad14746..c48ca2a34b5e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2874,18 +2874,42 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' > pkt_end */ find_good_pkt_pointers(this_branch, dst_reg, false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end > pkt_data' */ + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' < pkt_end */ find_good_pkt_pointers(other_branch, dst_reg, true); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end < pkt_data' */ + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' >= pkt_end */ + find_good_pkt_pointers(this_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end >= pkt_data' */ find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' <= pkt_end */ + find_good_pkt_pointers(other_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end <= pkt_data' */ find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); -- cgit v1.2.3-70-g09d2 From b37242c773b21edcd566e3bf995fb91d06b9537a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:23 +0200 Subject: bpf: add test cases to bpf selftests to cover all access tests Lets add test cases to cover really all possible direct packet access tests for good/bad access cases so we keep tracking them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 480 ++++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 50e15cedbb7f..64ae21f64489 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6671,6 +6671,486 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "XDP pkt read, pkt_end mangling, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end mangling, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' > pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end > pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' < pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end < pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' >= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end >= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_data' <= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "XDP pkt read, pkt_end <= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3-70-g09d2 From 54d431176429e9cf064461589e5174349a9f73da Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 19 Oct 2017 12:40:39 -0400 Subject: sock: correct sk_wmem_queued accounting on efault in tcp zerocopy Syzkaller hits WARN_ON(sk->sk_wmem_queued) in sk_stream_kill_queues after triggering an EFAULT in __zerocopy_sg_from_iter. On this error, skb_zerocopy_stream_iter resets the skb to its state before the operation with __pskb_trim. It cannot kfree_skb like datagram callers, as the skb may have data from a previous send call. __pskb_trim calls skb_condense for unowned skbs, which adjusts their truesize. These tcp skbuffs are owned and their truesize must add up to sk_wmem_queued. But they match because their skb->sk is NULL until tcp_transmit_skb. Temporarily set skb->sk when calling __pskb_trim to signal that the skbuffs are owned and avoid the skb_condense path. Fixes: 52267790ef52 ("sock: add MSG_ZEROCOPY") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e62476beee95..24656076906d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1124,9 +1124,13 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { + struct sock *save_sk = skb->sk; + /* Streams do not free skb on error. Reset to prev state. */ msg->msg_iter = orig_iter; + skb->sk = sk; ___pskb_trim(skb, orig_len); + skb->sk = save_sk; return err; } -- cgit v1.2.3-70-g09d2 From 66c54517540cedf5a22911c6b7f5c7d8b5d1e1be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 19 Oct 2017 20:17:32 +0300 Subject: net: bridge: fix returning of vlan range op errors When vlan tunnels were introduced, vlan range errors got silently dropped and instead 0 was returned always. Restore the previous behaviour and return errors to user-space. Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support") Signed-off-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3bc890716c89..de2152730809 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -573,7 +573,7 @@ static int br_process_vlan_info(struct net_bridge *br, } *vinfo_last = NULL; - return 0; + return err; } return br_vlan_info(br, p, cmd, vinfo_curr); -- cgit v1.2.3-70-g09d2 From 1b5f962e71bfad6284574655c406597535c3ea7a Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 19 Oct 2017 15:00:29 -0400 Subject: soreuseport: fix initialization race Syzkaller stumbled upon a way to trigger WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41 reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39 There are two initialization paths for the sock_reuseport structure in a socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through SO_ATTACH_REUSEPORT_[CE]BPF before bind. The existing implementation assumedthat the socket lock protected both of these paths when it actually only protects the SO_ATTACH_REUSEPORT path. Syzkaller triggered this double allocation by running these paths concurrently. This patch moves the check for double allocation into the reuseport_alloc function which is protected by a global spin lock. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection") Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 12 +++++++++--- net/ipv4/inet_hashtables.c | 5 +---- net/ipv4/udp.c | 5 +---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index eed1ebf7f29d..b1e0dbea1e8c 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk) * soft irq of receive path or setsockopt from process context */ spin_lock_bh(&reuseport_lock); - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "multiple allocations for the same socket"); + + /* Allocation attempts can occur concurrently via the setsockopt path + * and the bind/hash path. Nothing to do when we lose the race. + */ + if (rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock))) + goto out; + reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { spin_unlock_bh(&reuseport_lock); @@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk) reuse->num_socks = 1; rcu_assign_pointer(sk->sk_reuseport_cb, reuse); +out: spin_unlock_bh(&reuseport_lock); return 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 597bb4cfe805..e7d15fb0d94d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -456,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk, return reuseport_add_sock(sk, sk2); } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } int __inet_hash(struct sock *sk, struct sock *osk) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 806b298a3bdd..ebfbccae62fd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } /** -- cgit v1.2.3-70-g09d2 From 95491e3cf37840c518d81e1a3a6a8ef554e03c54 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Fri, 20 Oct 2017 01:32:08 +0200 Subject: net: ethtool: remove error check for legacy setting transceiver type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9cab88726929605 ("net: ethtool: Add back transceiver type") restores the transceiver type to struct ethtool_link_settings and convert_link_ksettings_to_legacy_settings() but forgets to remove the error check for the same in convert_legacy_settings_to_link_ksettings(). This prevents older versions of ethtool to change link settings. # ethtool --version ethtool version 3.16 # ethtool -s eth0 autoneg on speed 100 duplex full Cannot set new settings: Invalid argument not setting speed not setting duplex not setting autoneg While newer versions of ethtool works. # ethtool --version ethtool version 4.10 # ethtool -s eth0 autoneg on speed 100 duplex full [ 57.703268] sh-eth ee700000.ethernet eth0: Link is Down [ 59.618227] sh-eth ee700000.ethernet eth0: Link is Up - 100Mbps/Full - flow control rx/tx Fixes: 19cab88726929605 ("net: ethtool: Add back transceiver type") Signed-off-by: Niklas Söderlund Reported-by: Renjith R V Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/core/ethtool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3228411ada0f..9a9a3d77e327 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -436,7 +436,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields - * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated + * maxtxpkt/maxrxpkt. rest of ksettings always updated */ static bool convert_legacy_settings_to_link_ksettings( @@ -451,8 +451,7 @@ convert_legacy_settings_to_link_ksettings( * deprecated legacy fields, and they should not use * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS */ - if (legacy_settings->transceiver || - legacy_settings->maxtxpkt || + if (legacy_settings->maxtxpkt || legacy_settings->maxrxpkt) retval = false; -- cgit v1.2.3-70-g09d2 From 14aefd9011f14ecf1f821fcd1754f009f4ab3df9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 20 Oct 2017 09:16:15 +0200 Subject: mlxsw: reg: Add Tunneling IPinIP General Configuration Register The TIGCR register is used for setting up the IPinIP Tunnel configuration. Fixes: ee954d1a91b2 ("mlxsw: spectrum_router: Support GRE tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index cc27c5de5a1d..4afc8486eb9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -6401,6 +6401,36 @@ static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, mlxsw_reg_mgpc_opcode_set(payload, opcode); } +/* TIGCR - Tunneling IPinIP General Configuration Register + * ------------------------------------------------------- + * The TIGCR register is used for setting up the IPinIP Tunnel configuration. + */ +#define MLXSW_REG_TIGCR_ID 0xA801 +#define MLXSW_REG_TIGCR_LEN 0x10 + +MLXSW_REG_DEFINE(tigcr, MLXSW_REG_TIGCR_ID, MLXSW_REG_TIGCR_LEN); + +/* reg_tigcr_ipip_ttlc + * For IPinIP Tunnel encapsulation: whether to copy the ttl from the packet + * header. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttlc, 0x04, 8, 1); + +/* reg_tigcr_ipip_ttl_uc + * The TTL for IPinIP Tunnel encapsulation of unicast packets if + * reg_tigcr_ipip_ttlc is unset. + * Access: RW + */ +MLXSW_ITEM32(reg, tigcr, ttl_uc, 0x04, 0, 8); + +static inline void mlxsw_reg_tigcr_pack(char *payload, bool ttlc, u8 ttl_uc) +{ + MLXSW_REG_ZERO(tigcr, payload); + mlxsw_reg_tigcr_ttlc_set(payload, ttlc); + mlxsw_reg_tigcr_ttl_uc_set(payload, ttl_uc); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -6881,6 +6911,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mcc), MLXSW_REG(mcda), MLXSW_REG(mgpc), + MLXSW_REG(tigcr), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), -- cgit v1.2.3-70-g09d2 From dcbda2820ff91a692338fed2c99bb9b1af37a05a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 20 Oct 2017 09:16:16 +0200 Subject: mlxsw: spectrum_router: Configure TIGCR on init Spectrum tunnels do not default to ttl of "inherit" like the Linux ones do. Configure TIGCR on router init so that the TTL of tunnel packets is copied from the overlay packets. Fixes: ee954d1a91b2 ("mlxsw: spectrum_router: Support GRE tunnels") Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c16718d296d3..5189022a1c8c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5896,11 +5896,20 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->rifs); } +static int +mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp) +{ + char tigcr_pl[MLXSW_REG_TIGCR_LEN]; + + mlxsw_reg_tigcr_pack(tigcr_pl, true, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl); +} + static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); - return 0; + return mlxsw_sp_ipip_config_tigcr(mlxsw_sp); } static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) -- cgit v1.2.3-70-g09d2 From 9c8080d068b861a80d430ba0b42d8c9b07366b66 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:34 +0100 Subject: net: stmmac: Add missing call to dev_kfree_skb() When RX HW timestamp is enabled and a frame is discarded we are not freeing the skb but instead only setting to NULL the entry. Add a call to dev_kfree_skb_any() so that skb entry is correctly freed. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1763e48c84e2..d67638c7078e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3333,6 +3333,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) * them in stmmac_rx_refill() function so that * device can reuse it. */ + dev_kfree_skb_any(rx_q->rx_skbuff[entry]); rx_q->rx_skbuff[entry] = NULL; dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[entry], -- cgit v1.2.3-70-g09d2 From 98870943a561c64aca22d10820a881aa4fa728e4 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:35 +0100 Subject: net: stmmac: Fix stmmac_get_rx_hwtstamp() When using GMAC4 the valid timestamp is from CTX next desc but we are passing the previous desc to get_rx_timestamp_status() callback. Fix this and while at it rework a little bit the function logic. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d67638c7078e..284c10720daf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -473,19 +473,18 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, struct dma_desc *np, struct sk_buff *skb) { struct skb_shared_hwtstamps *shhwtstamp = NULL; + struct dma_desc *desc = p; u64 ns; if (!priv->hwts_rx_en) return; + /* For GMAC4, the valid timestamp is from CTX next desc. */ + if (priv->plat->has_gmac4) + desc = np; /* Check if timestamp is available */ - if (priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { - /* For GMAC4, the valid timestamp is from CTX next desc. */ - if (priv->plat->has_gmac4) - ns = priv->hw->desc->get_timestamp(np, priv->adv_ts); - else - ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - + if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { + ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); -- cgit v1.2.3-70-g09d2 From 9454360dec1c96800576693955b92a2792b74def Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 20 Oct 2017 14:37:36 +0100 Subject: net: stmmac: Prevent infinite loop in get_rx_timestamp_status() Prevent infinite loop by correctly setting the loop condition to break when i == 10. Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index e0ef02f9503b..4b286e27c4ca 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -275,7 +275,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) goto exit; i++; - } while ((ret == 1) || (i < 10)); + } while ((ret == 1) && (i < 10)); if (i == 10) ret = -EBUSY; -- cgit v1.2.3-70-g09d2 From 6cb3ece9685f78f9b288dd2afea58c35784e40b8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Oct 2017 17:01:22 +0100 Subject: rxrpc: Don't release call mutex on error pointer Don't release call mutex at the end of rxrpc_kernel_begin_call() if the call pointer actually holds an error value. Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg") Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/af_rxrpc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fb17552fd292..4b0a8288c98a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -308,10 +308,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, gfp); /* The socket has been unlocked. */ - if (!IS_ERR(call)) + if (!IS_ERR(call)) { call->notify_rx = notify_rx; + mutex_unlock(&call->user_mutex); + } - mutex_unlock(&call->user_mutex); _leave(" = %p", call); return call; } -- cgit v1.2.3-70-g09d2 From 7433a8d6fa60a2f6910206fa10f3550c8f11f45f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 20 Oct 2017 12:15:52 -0700 Subject: textsearch: fix typos in library helpers Fix spellos (typos) in textsearch library helpers. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- lib/ts_fsm.c | 2 +- lib/ts_kmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c index 5696a35184e4..69557c74ef9f 100644 --- a/lib/ts_fsm.c +++ b/lib/ts_fsm.c @@ -11,7 +11,7 @@ * ========================================================================== * * A finite state machine consists of n states (struct ts_fsm_token) - * representing the pattern as a finite automation. The data is read + * representing the pattern as a finite automaton. The data is read * sequentially on an octet basis. Every state token specifies the number * of recurrences and the type of value accepted which can be either a * specific character or ctype based set of characters. The available diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c index 632f783e65f1..ffbe66cbb0ed 100644 --- a/lib/ts_kmp.c +++ b/lib/ts_kmp.c @@ -27,7 +27,7 @@ * * [1] Cormen, Leiserson, Rivest, Stein * Introdcution to Algorithms, 2nd Edition, MIT Press - * [2] See finite automation theory + * [2] See finite automaton theory */ #include -- cgit v1.2.3-70-g09d2 From 66bdede495c71da9c5ce18542976fae53642880b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Oct 2017 13:54:03 +0200 Subject: of_mdio: Fix broken PHY IRQ in case of probe deferral If an Ethernet PHY is initialized before the interrupt controller it is connected to, a message like the following is printed: irq: no irq domain found for /interrupt-controller@e61c0000 ! However, the actual error is ignored, leading to a non-functional (POLL) PHY interrupt later: Micrel KSZ8041RNLI ee700000.ethernet-ffffffff:01: attached PHY driver [Micrel KSZ8041RNLI] (mii_bus:phy_addr=ee700000.ethernet-ffffffff:01, irq=POLL) Depending on whether the PHY driver will fall back to polling, Ethernet may or may not work. To fix this: 1. Switch of_mdiobus_register_phy() from irq_of_parse_and_map() to of_irq_get(). Unlike the former, the latter returns -EPROBE_DEFER if the interrupt controller is not yet available, so this condition can be detected. Other errors are handled the same as before, i.e. use the passed mdio->irq[addr] as interrupt. 2. Propagate and handle errors from of_mdiobus_register_phy() and of_mdiobus_register_device(). Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index d94dd8b77abd..98258583abb0 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -44,7 +44,7 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id) return -EINVAL; } -static void of_mdiobus_register_phy(struct mii_bus *mdio, +static int of_mdiobus_register_phy(struct mii_bus *mdio, struct device_node *child, u32 addr) { struct phy_device *phy; @@ -60,9 +60,13 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio, else phy = get_phy_device(mdio, addr, is_c45); if (IS_ERR(phy)) - return; + return PTR_ERR(phy); - rc = irq_of_parse_and_map(child, 0); + rc = of_irq_get(child, 0); + if (rc == -EPROBE_DEFER) { + phy_device_free(phy); + return rc; + } if (rc > 0) { phy->irq = rc; mdio->irq[addr] = rc; @@ -84,22 +88,23 @@ static void of_mdiobus_register_phy(struct mii_bus *mdio, if (rc) { phy_device_free(phy); of_node_put(child); - return; + return rc; } dev_dbg(&mdio->dev, "registered phy %s at address %i\n", child->name, addr); + return 0; } -static void of_mdiobus_register_device(struct mii_bus *mdio, - struct device_node *child, u32 addr) +static int of_mdiobus_register_device(struct mii_bus *mdio, + struct device_node *child, u32 addr) { struct mdio_device *mdiodev; int rc; mdiodev = mdio_device_create(mdio, addr); if (IS_ERR(mdiodev)) - return; + return PTR_ERR(mdiodev); /* Associate the OF node with the device structure so it * can be looked up later. @@ -112,11 +117,12 @@ static void of_mdiobus_register_device(struct mii_bus *mdio, if (rc) { mdio_device_free(mdiodev); of_node_put(child); - return; + return rc; } dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n", child->name, addr); + return 0; } /* The following is a list of PHY compatible strings which appear in @@ -219,9 +225,11 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) } if (of_mdiobus_child_is_phy(child)) - of_mdiobus_register_phy(mdio, child, addr); + rc = of_mdiobus_register_phy(mdio, child, addr); else - of_mdiobus_register_device(mdio, child, addr); + rc = of_mdiobus_register_device(mdio, child, addr); + if (rc) + goto unregister; } if (!scanphys) @@ -242,12 +250,19 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) dev_info(&mdio->dev, "scan phy %s at address %i\n", child->name, addr); - if (of_mdiobus_child_is_phy(child)) - of_mdiobus_register_phy(mdio, child, addr); + if (of_mdiobus_child_is_phy(child)) { + rc = of_mdiobus_register_phy(mdio, child, addr); + if (rc) + goto unregister; + } } } return 0; + +unregister: + mdiobus_unregister(mdio); + return rc; } EXPORT_SYMBOL(of_mdiobus_register); -- cgit v1.2.3-70-g09d2 From 864e2a1f8aac05effac6063ce316b480facb46ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 21 Oct 2017 12:26:23 -0700 Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage When syzkaller team brought us a C repro for the crash [1] that had been reported many times in the past, I finally could find the root cause. If FlowLabel info is merged by fl6_merge_options(), we leave part of the opt_space storage provided by udp/raw/l2tp with random value in opt_space.tot_len, unless a control message was provided at sendmsg() time. Then ip6_setup_cork() would use this random value to perform a kzalloc() call. Undefined behavior and crashes. Fix is to properly set tot_len in fl6_merge_options() At the same time, we can also avoid consuming memory and cpu cycles to clear it, if every option is copied via a kmemdup(). This is the change in ip6_setup_cork(). [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801cb64a100 task.stack: ffff8801cc350000 RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: 0018:ffff8801cc357550 EFLAGS: 00010203 RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010 RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014 RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10 R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0 R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0 FS: 00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0 DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729 udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x358/0x5a0 net/socket.c:1750 SyS_sendto+0x40/0x50 net/socket.c:1718 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4520a9 RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9 RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016 RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029 Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 1 + net/ipv6/ip6_output.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 8081bafe441b..15535ee327c5 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, } opt_space->dst1opt = fopt->dst1opt; opt_space->opt_flen = fopt->opt_flen; + opt_space->tot_len = fopt->tot_len; return opt_space; } EXPORT_SYMBOL_GPL(fl6_merge_options); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 43ca864327c7..5110a418cc4d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1161,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!v6_cork->opt)) return -ENOBUFS; - v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->tot_len = sizeof(*opt); v6_cork->opt->opt_flen = opt->opt_flen; v6_cork->opt->opt_nflen = opt->opt_nflen; -- cgit v1.2.3-70-g09d2 From 8d5f4b07174976c55a5f5d6967777373c6826944 Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sat, 21 Oct 2017 06:51:30 +0000 Subject: stmmac: Don't access tx_q->dirty_tx before netif_tx_lock This is the possible reason for different hard to reproduce problems on my ARMv7-SMP test system. The symptoms are in recent kernels imprecise external aborts, and in older kernels various kinds of network stalls and unexpected page allocation failures. My testing indicates that the trouble started between v4.5 and v4.6 and prevails up to v4.14. Using the dirty_tx before acquiring the spin lock is clearly wrong and was first introduced with v4.6. Fixes: e3ad57c96715 ("stmmac: review RX/TX ring management") Signed-off-by: Bernd Edlinger Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 284c10720daf..16bd50929084 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1799,12 +1799,13 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; - unsigned int entry = tx_q->dirty_tx; + unsigned int entry; netif_tx_lock(priv->dev); priv->xstats.tx_clean++; + entry = tx_q->dirty_tx; while (entry != tx_q->cur_tx) { struct sk_buff *skb = tx_q->tx_skbuff[entry]; struct dma_desc *p; -- cgit v1.2.3-70-g09d2 From bfc1168de949cd3e9ca18c3480b5085deff1ea7c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Oct 2017 12:47:31 +0200 Subject: x86/cpu/AMD: Apply the Erratum 688 fix when the BIOS doesn't Some F14h machines have an erratum which, "under a highly specific and detailed set of internal timing conditions" can lead to skipping instructions and RIP corruption. Add the fix for those machines when their BIOS doesn't apply it or there simply isn't BIOS update for them. Tested-by: Signed-off-by: Borislav Petkov Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sherry Hurwitz Cc: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20171022104731.28249-1-bp@alien8.de Link: https://bugzilla.kernel.org/show_bug.cgi?id=197285 [ Added pr_info() that we activated the workaround. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_nb.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 458da8509b75..6db28f17ff28 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = { {} }; +#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704 + const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, @@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; @@ -402,11 +406,48 @@ void amd_flush_garts(void) } EXPORT_SYMBOL_GPL(amd_flush_garts); +static void __fix_erratum_688(void *info) +{ +#define MSR_AMD64_IC_CFG 0xC0011021 + + msr_set_bit(MSR_AMD64_IC_CFG, 3); + msr_set_bit(MSR_AMD64_IC_CFG, 14); +} + +/* Apply erratum 688 fix so machines without a BIOS fix work. */ +static __init void fix_erratum_688(void) +{ + struct pci_dev *F4; + u32 val; + + if (boot_cpu_data.x86 != 0x14) + return; + + if (!amd_northbridges.num) + return; + + F4 = node_to_amd_nb(0)->link; + if (!F4) + return; + + if (pci_read_config_dword(F4, 0x164, &val)) + return; + + if (val & BIT(2)) + return; + + on_each_cpu(__fix_erratum_688, NULL, 0); + + pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n"); +} + static __init int init_amd_nbs(void) { amd_cache_northbridges(); amd_cache_gart(); + fix_erratum_688(); + return 0; } -- cgit v1.2.3-70-g09d2 From 09c3e01b255fe89eb1f1b7ded68b1b7d55e6d02b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 22 Oct 2017 11:42:29 -0700 Subject: Input: do not use property bits when generating module alias The commit 8724ecb07229 ("Input: allow matching device IDs on property bits") started using property bits when generating module aliases for input handlers, but did not adjust the generation of MODALIAS attribute on input device uevents, breaking automatic module loading. Given that no handler currently uses property bits in their module tables, let's revert this part of the commit for now. Reported-by: Damien Wyart Tested-by: Damien Wyart Fixes: 8724ecb07229 ("Input: allow matching device IDs on property bits") Signed-off-by: Dmitry Torokhov --- scripts/mod/devicetable-offsets.c | 1 - scripts/mod/file2alias.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index 812657ab5aa3..e4d90e50f6fe 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -105,7 +105,6 @@ int main(void) DEVID_FIELD(input_device_id, sndbit); DEVID_FIELD(input_device_id, ffbit); DEVID_FIELD(input_device_id, swbit); - DEVID_FIELD(input_device_id, propbit); DEVID(eisa_device_id); DEVID_FIELD(eisa_device_id, sig); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index bc25898f6df0..29d6699d5a06 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -761,7 +761,7 @@ static void do_input(char *alias, sprintf(alias + strlen(alias), "%X,*", i); } -/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwXprX where X is comma-separated %02X. */ +/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ static int do_input_entry(const char *filename, void *symval, char *alias) { @@ -779,7 +779,6 @@ static int do_input_entry(const char *filename, void *symval, DEF_FIELD_ADDR(symval, input_device_id, sndbit); DEF_FIELD_ADDR(symval, input_device_id, ffbit); DEF_FIELD_ADDR(symval, input_device_id, swbit); - DEF_FIELD_ADDR(symval, input_device_id, propbit); sprintf(alias, "input:"); @@ -817,9 +816,6 @@ static int do_input_entry(const char *filename, void *symval, sprintf(alias + strlen(alias), "w*"); if (flags & INPUT_DEVICE_ID_MATCH_SWBIT) do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX); - sprintf(alias + strlen(alias), "pr*"); - if (flags & INPUT_DEVICE_ID_MATCH_PROPBIT) - do_input(alias, *propbit, 0, INPUT_DEVICE_ID_PROP_MAX); return 1; } ADD_TO_DEVTABLE("input", input_device_id, do_input_entry); -- cgit v1.2.3-70-g09d2 From 3a91d29f20276fa7cd4d0c9c7f3e78b30708159d Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Sun, 22 Oct 2017 13:13:16 +0900 Subject: tcp: do tcp_mstamp_refresh before retransmits on TSQ handler When retransmission on TSQ handler was introduced in the commit f9616c35a0d7 ("tcp: implement TSQ for retransmits"), the retransmitted skbs' timestamps were updated on the actual transmission. In the later commit 385e20706fac ("tcp: use tp->tcp_mstamp in output path"), it stops being done so. In the commit, the comment says "We try to refresh tp->tcp_mstamp only when necessary", and at present tcp_tsq_handler and tcp_v4_mtu_reduced applies to this. About the latter, it's okay since it's rare enough. About the former, even though possible retransmissions on the tasklet comes just after the destructor run in NET_RX softirq handling, the time between them could be nonnegligibly large to the extent that tcp_rack_advance or rto rearming be affected if other (remaining) RX, BLOCK and (preceding) TASKLET sofirq handlings are unexpectedly heavy. So in the same way as tcp_write_timer_handler does, doing tcp_mstamp_refresh ensures the accuracy of algorithms relying on it. Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Koichiro Den Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0bc9e46a5369..973befc36fd4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -739,8 +739,10 @@ static void tcp_tsq_handler(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) + tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); + } tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); -- cgit v1.2.3-70-g09d2 From a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Oct 2017 12:33:57 -0700 Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req() This patch fixes the following lockdep splat in inet_csk_route_req() lockdep_rcu_suspicious inet_csk_route_req tcp_v4_send_synack tcp_rtx_synack inet_rtx_syn_ack tcp_fastopen_synack_time tcp_retransmit_timer tcp_write_timer_handler tcp_write_timer call_timer_fn Thread running inet_csk_route_req() owns a reference on the request socket, so we have the guarantee ireq->ireq_opt wont be changed or freed. lockdep can enforce this invariant for us. Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5ec9136a7c36..18cd2eae758f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -543,7 +543,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = rcu_dereference(ireq->ireq_opt); + opt = rcu_dereference_protected(ireq->ireq_opt, + refcount_read(&req->rsk_refcnt) > 0); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), -- cgit v1.2.3-70-g09d2 From 1137b5e2529a8f5ca8ee709288ecba3e68044df2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 19 Oct 2017 20:51:10 +0800 Subject: ipsec: Fix aborted xfrm policy dump crash An independent security researcher, Mohamed Ghannam, has reported this vulnerability to Beyond Security's SecuriTeam Secure Disclosure program. The xfrm_dump_policy_done function expects xfrm_dump_policy to have been called at least once or it will crash. This can be triggered if a dump fails because the target socket's receive buffer is full. This patch fixes it by using the cb->start mechanism to ensure that the initialisation is always done regardless of the buffer situation. Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b997f1395357..e44a0fed48dd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1693,32 +1693,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct net *net = sock_net(cb->skb->sk); xfrm_policy_walk_done(walk, net); return 0; } +static int xfrm_dump_policy_start(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; + + BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args)); + + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct xfrm_dump_info info; - BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > - sizeof(cb->args) - sizeof(cb->args[0])); - info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - if (!cb->args[0]) { - cb->args[0] = 1; - xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); - } - (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; @@ -2474,6 +2476,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); const struct nla_policy *nla_pol; @@ -2487,6 +2490,7 @@ static const struct xfrm_link { [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, + .start = xfrm_dump_policy_start, .dump = xfrm_dump_policy, .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, @@ -2539,6 +2543,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, { struct netlink_dump_control c = { + .start = link->start, .dump = link->dump, .done = link->done, }; -- cgit v1.2.3-70-g09d2 From 8fe8ffb12c81b36877984274db184953c337db73 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Oct 2017 11:46:45 -0700 Subject: scsi: Suppress a kernel warning in case the prep function returns BLKPREP_DEFER The legacy block layer handles requests as follows: - If the prep function returns BLKPREP_OK, let blk_peek_request() return the pointer to that request. - If the prep function returns BLKPREP_DEFER, keep the RQF_STARTED flag and retry calling the prep function later. - If the prep function returns BLKPREP_KILL or BLKPREP_INVALID, end the request. In none of these cases it is correct to clear the SCMD_INITIALIZED flag from inside scsi_prep_fn(). Since scsi_prep_fn() already guarantees that scsi_init_command() will be called once even if scsi_prep_fn() is called multiple times, remove the code that clears SCMD_INITIALIZED from scsi_prep_fn(). The scsi-mq code handles requests as follows: - If scsi_mq_prep_fn() returns BLKPREP_OK, set the RQF_DONTPREP flag and submit the request to the SCSI LLD. - If scsi_mq_prep_fn() returns BLKPREP_DEFER, call blk_mq_delay_run_hw_queue() and return BLK_STS_RESOURCE. - If the prep function returns BLKPREP_KILL or BLKPREP_INVALID, call scsi_mq_uninit_cmd() and let the blk-mq core end the request. In none of these cases scsi_mq_prep_fn() should clear the SCMD_INITIALIZED flag. Hence remove the code from scsi_mq_prep_fn() function that clears that flag. This patch avoids that the following warning is triggered when using the legacy block layer: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 4198 at drivers/scsi/scsi_lib.c:654 scsi_end_request+0x1de/0x220 CPU: 1 PID: 4198 Comm: mkfs.f2fs Not tainted 4.14.0-rc5+ #1 task: ffff91c147a4b800 task.stack: ffffb282c37b8000 RIP: 0010:scsi_end_request+0x1de/0x220 Call Trace: scsi_io_completion+0x204/0x5e0 scsi_finish_command+0xce/0xe0 scsi_softirq_done+0x126/0x130 blk_done_softirq+0x6e/0x80 __do_softirq+0xcf/0x2a8 irq_exit+0xab/0xb0 do_IRQ+0x7b/0xc0 common_interrupt+0x90/0x90 RIP: 0010:_raw_spin_unlock_irqrestore+0x9/0x10 __test_set_page_writeback+0xc7/0x2c0 __block_write_full_page+0x158/0x3b0 block_write_full_page+0xc4/0xd0 blkdev_writepage+0x13/0x20 __writepage+0x12/0x40 write_cache_pages+0x204/0x500 generic_writepages+0x48/0x70 blkdev_writepages+0x9/0x10 do_writepages+0x34/0xc0 __filemap_fdatawrite_range+0x6c/0x90 file_write_and_wait_range+0x31/0x90 blkdev_fsync+0x16/0x40 vfs_fsync_range+0x44/0xa0 do_fsync+0x38/0x60 SyS_fsync+0xb/0x10 entry_SYSCALL_64_fastpath+0x13/0x94 ---[ end trace 86e8ef85a4a6c1d1 ]--- Fixes: commit 64104f703212 ("scsi: Call scsi_initialize_rq() for filesystem requests") Signed-off-by: Bart Van Assche Cc: Damien Le Moal Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9cf6a80fe297..ad3ea24f0885 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1379,8 +1379,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) ret = scsi_setup_cmnd(sdev, req); out: - if (ret != BLKPREP_OK) - cmd->flags &= ~SCMD_INITIALIZED; return scsi_prep_return(q, req, ret); } @@ -1900,7 +1898,6 @@ static int scsi_mq_prep_fn(struct request *req) struct scsi_device *sdev = req->q->queuedata; struct Scsi_Host *shost = sdev->host; struct scatterlist *sg; - int ret; scsi_init_command(sdev, cmd); @@ -1934,10 +1931,7 @@ static int scsi_mq_prep_fn(struct request *req) blk_mq_start_request(req); - ret = scsi_setup_cmnd(sdev, req); - if (ret != BLK_STS_OK) - cmd->flags &= ~SCMD_INITIALIZED; - return ret; + return scsi_setup_cmnd(sdev, req); } static void scsi_mq_done(struct scsi_cmnd *cmd) -- cgit v1.2.3-70-g09d2 From bb176f67090ca54869fc1262c913aa69d2ede070 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Oct 2017 06:49:47 -0400 Subject: Linux 4.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 46bfb0ed2257..01875d606f44 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3-70-g09d2 From 98990a33b77dda9babf91cb235654f6729e5702e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 20 Oct 2017 11:21:33 -0500 Subject: x86/entry: Fix idtentry unwind hint This fixes the following ORC warning in the 'int3' entry code: WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b The ORC metadata had the wrong stack offset for the iret registers. Their location on the stack is dependent on whether the exception has an error code. Reported-and-tested-by: Andrei Vagin Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") Link: http://lkml.kernel.org/r/931d57f0551ed7979d5e7e05370d445c8e5137f8.1508516398.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 49167258d587..f6cdb7a1455e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -808,7 +808,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) - UNWIND_HINT_IRET_REGS offset=8 + UNWIND_HINT_IRET_REGS offset=\has_error_code*8 /* Sanity check */ .if \shift_ist != -1 && \paranoid == 0 -- cgit v1.2.3-70-g09d2 From 58c3862b521ead4f69a24ef009a679cb3c519620 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 20 Oct 2017 11:21:34 -0500 Subject: x86/unwind: Show function name+offset in ORC error messages Improve the warning messages to show the relevant function name+offset. This makes it much easier to diagnose problems with the ORC metadata. Before: WARNING: can't dereference iret registers at ffff8801c5f17fe0 for ip ffffffff95f0d94b After: WARNING: can't dereference iret registers at ffff880178f5ffe0 for ip int3+0x5b/0x60 Reported-by: Andrei Vagin Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Link: http://lkml.kernel.org/r/6bada6b9eac86017e16bd79e1e77877935cb50bb.1508516398.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 570b70d3f604..b95007e7c1b3 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip) idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; if (unlikely((idx >= lookup_num_blocks-1))) { - orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n", - idx, lookup_num_blocks, ip); + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); return NULL; } @@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip) if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || (__start_orc_unwind + stop > __stop_orc_unwind))) { - orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n", - idx, lookup_num_blocks, start, stop, ip); + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); return NULL; } @@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R10: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R10 at ip %p\n", + orc_warn("missing regs for base reg R10 at ip %pB\n", (void *)state->ip); goto done; } @@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_R13: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg R13 at ip %p\n", + orc_warn("missing regs for base reg R13 at ip %pB\n", (void *)state->ip); goto done; } @@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DI: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DI at ip %p\n", + orc_warn("missing regs for base reg DI at ip %pB\n", (void *)state->ip); goto done; } @@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_REG_DX: if (!state->regs || !state->full_regs) { - orc_warn("missing regs for base reg DX at ip %p\n", + orc_warn("missing regs for base reg DX at ip %pB\n", (void *)state->ip); goto done; } @@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown SP base reg %d for ip %p\n", + orc_warn("unknown SP base reg %d for ip %pB\n", orc->sp_reg, (void *)state->ip); goto done; } @@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS: if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { - orc_warn("can't dereference registers at %p for ip %p\n", + orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } @@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state) case ORC_TYPE_REGS_IRET: if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { - orc_warn("can't dereference iret registers at %p for ip %p\n", + orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } @@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown .orc_unwind entry type %d\n", orc->type); + orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", + orc->type, (void *)orig_ip); break; } @@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state) break; default: - orc_warn("unknown BP base reg %d for ip %p\n", + orc_warn("unknown BP base reg %d for ip %pB\n", orc->bp_reg, (void *)orig_ip); goto done; } @@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state) if (state->stack_info.type == prev_type && on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) && state->sp <= prev_sp) { - orc_warn("stack going in the wrong direction? ip=%p\n", + orc_warn("stack going in the wrong direction? ip=%pB\n", (void *)orig_ip); goto done; } -- cgit v1.2.3-70-g09d2 From 88796e7e5c457cae72833196cb98e6895dd107e2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 20 Oct 2017 10:13:46 -0700 Subject: sched/swait: Document it clearly that the swait facilities are special and shouldn't be used We currently welcome using swait over wait whenever possible because it is a slimmer data structure. However, Linus has made it very clear that he does not want this used, unless under very specific RT scenarios (such as current users). Update the comments before kernel hipsters start thinking swait is the cool thing to do. Signed-off-by: Davidlohr Bueso Acked-by: Luis R. Rodriguez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: wagi@monom.org Link: http://lkml.kernel.org/r/20171020171346.24445-1-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/swait.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/include/linux/swait.h b/include/linux/swait.h index 73e97a08d3d0..cf30f5022472 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -9,13 +9,16 @@ /* * Simple wait queues * - * While these are very similar to the other/complex wait queues (wait.h) the - * most important difference is that the simple waitqueue allows for - * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold - * times. + * While these are very similar to regular wait queues (wait.h) the most + * important difference is that the simple waitqueue allows for deterministic + * behaviour -- IOW it has strictly bounded IRQ and lock hold times. * - * In order to make this so, we had to drop a fair number of features of the - * other waitqueue code; notably: + * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all + * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher + * priority task a chance to run. + * + * Secondly, we had to drop a fair number of features of the other waitqueue + * code; notably: * * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right @@ -24,12 +27,14 @@ * - the exclusive mode; because this requires preserving the list order * and this is hard. * - * - custom wake functions; because you cannot give any guarantees about - * random code. - * - * As a side effect of this; the data structures are slimmer. + * - custom wake callback functions; because you cannot give any guarantees + * about random code. This also allows swait to be used in RT, such that + * raw spinlock can be used for the swait queue head. * - * One would recommend using this wait queue where possible. + * As a side effect of these; the data structures are slimmer albeit more ad-hoc. + * For all the above, note that simple wait queues should _only_ be used under + * very specific realtime constraints -- it is best to stick with the regular + * wait queues in most cases. */ struct task_struct; -- cgit v1.2.3-70-g09d2 From 83beee5c88a6c71ded70e2eef5ca7406a02605cc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Mon, 4 Sep 2017 22:37:21 -0700 Subject: platform/x86: intel_pmc_ipc: Use devm_* calls in driver probe function This patch cleans up unnecessary free/alloc calls in ipc_plat_probe(), ipc_pci_probe() and ipc_plat_get_res() functions by using devm_* calls. This patch also adds proper error handling for failure cases in ipc_pci_probe() function. Signed-off-by: Kuppuswamy Sathyanarayanan [andy: fixed style issues, missed devm_free_irq(), removed unnecessary log message] Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_ipc.c | 94 +++++++++++------------------------- 1 file changed, 28 insertions(+), 66 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index bb792a52248b..751b1212d01c 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -480,52 +480,39 @@ static irqreturn_t ioc(int irq, void *dev_id) static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - resource_size_t pci_resource; + struct intel_pmc_ipc_dev *pmc = &ipcdev; int ret; - int len; - ipcdev.dev = &pci_dev_get(pdev)->dev; - ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; + /* Only one PMC is supported */ + if (pmc->dev) + return -EBUSY; - ret = pci_enable_device(pdev); + pmc->irq_mode = IPC_TRIGGER_MODE_IRQ; + + ret = pcim_enable_device(pdev); if (ret) return ret; - ret = pci_request_regions(pdev, "intel_pmc_ipc"); + ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev)); if (ret) return ret; - pci_resource = pci_resource_start(pdev, 0); - len = pci_resource_len(pdev, 0); - if (!pci_resource || !len) { - dev_err(&pdev->dev, "Failed to get resource\n"); - return -ENOMEM; - } + init_completion(&pmc->cmd_complete); - init_completion(&ipcdev.cmd_complete); + pmc->ipc_base = pcim_iomap_table(pdev)[0]; - if (request_irq(pdev->irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) { + ret = devm_request_irq(&pdev->dev, pdev->irq, ioc, 0, "intel_pmc_ipc", + pmc); + if (ret) { dev_err(&pdev->dev, "Failed to request irq\n"); - return -EBUSY; + return ret; } - ipcdev.ipc_base = ioremap_nocache(pci_resource, len); - if (!ipcdev.ipc_base) { - dev_err(&pdev->dev, "Failed to ioremap ipc base\n"); - free_irq(pdev->irq, &ipcdev); - ret = -ENOMEM; - } + pmc->dev = &pdev->dev; - return ret; -} + pci_set_drvdata(pdev, pmc); -static void ipc_pci_remove(struct pci_dev *pdev) -{ - free_irq(pdev->irq, &ipcdev); - pci_release_regions(pdev); - pci_dev_put(pdev); - iounmap(ipcdev.ipc_base); - ipcdev.dev = NULL; + return 0; } static const struct pci_device_id ipc_pci_ids[] = { @@ -540,7 +527,6 @@ static struct pci_driver ipc_pci_driver = { .name = "intel_pmc_ipc", .id_table = ipc_pci_ids, .probe = ipc_pci_probe, - .remove = ipc_pci_remove, }; static ssize_t intel_pmc_ipc_simple_cmd_store(struct device *dev, @@ -850,17 +836,12 @@ static int ipc_plat_get_res(struct platform_device *pdev) return -ENXIO; } size = PLAT_RESOURCE_IPC_SIZE + PLAT_RESOURCE_GCR_SIZE; + res->end = res->start + size - 1; + + addr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(addr)) + return PTR_ERR(addr); - if (!request_mem_region(res->start, size, pdev->name)) { - dev_err(&pdev->dev, "Failed to request ipc resource\n"); - return -EBUSY; - } - addr = ioremap_nocache(res->start, size); - if (!addr) { - dev_err(&pdev->dev, "I/O memory remapping failed\n"); - release_mem_region(res->start, size); - return -ENOMEM; - } ipcdev.ipc_base = addr; ipcdev.gcr_mem_base = addr + PLAT_RESOURCE_GCR_OFFSET; @@ -917,7 +898,6 @@ MODULE_DEVICE_TABLE(acpi, ipc_acpi_ids); static int ipc_plat_probe(struct platform_device *pdev) { - struct resource *res; int ret; ipcdev.dev = &pdev->dev; @@ -939,11 +919,11 @@ static int ipc_plat_probe(struct platform_device *pdev) ret = ipc_create_pmc_devices(); if (ret) { dev_err(&pdev->dev, "Failed to create pmc devices\n"); - goto err_device; + return ret; } - if (request_irq(ipcdev.irq, ioc, IRQF_NO_SUSPEND, - "intel_pmc_ipc", &ipcdev)) { + if (devm_request_irq(&pdev->dev, ipcdev.irq, ioc, IRQF_NO_SUSPEND, + "intel_pmc_ipc", &ipcdev)) { dev_err(&pdev->dev, "Failed to request irq\n"); ret = -EBUSY; goto err_irq; @@ -960,40 +940,22 @@ static int ipc_plat_probe(struct platform_device *pdev) return 0; err_sys: - free_irq(ipcdev.irq, &ipcdev); + devm_free_irq(&pdev->dev, ipcdev.irq, &ipcdev); err_irq: platform_device_unregister(ipcdev.tco_dev); platform_device_unregister(ipcdev.punit_dev); platform_device_unregister(ipcdev.telemetry_dev); -err_device: - iounmap(ipcdev.ipc_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, - PLAT_RESOURCE_IPC_INDEX); - if (res) { - release_mem_region(res->start, - PLAT_RESOURCE_IPC_SIZE + - PLAT_RESOURCE_GCR_SIZE); - } + return ret; } static int ipc_plat_remove(struct platform_device *pdev) { - struct resource *res; - sysfs_remove_group(&pdev->dev.kobj, &intel_ipc_group); - free_irq(ipcdev.irq, &ipcdev); + devm_free_irq(&pdev->dev, ipcdev.irq, &ipcdev); platform_device_unregister(ipcdev.tco_dev); platform_device_unregister(ipcdev.punit_dev); platform_device_unregister(ipcdev.telemetry_dev); - iounmap(ipcdev.ipc_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, - PLAT_RESOURCE_IPC_INDEX); - if (res) { - release_mem_region(res->start, - PLAT_RESOURCE_IPC_SIZE + - PLAT_RESOURCE_GCR_SIZE); - } ipcdev.dev = NULL; return 0; } -- cgit v1.2.3-70-g09d2 From 6687aeb9cd3d40904d1f9e884d2145603c23adfa Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Sat, 7 Oct 2017 15:19:51 -0700 Subject: platform/x86: intel_pmc_ipc: Use spin_lock to protect GCR updates Currently, update_no_reboot_bit() function implemented in this driver uses mutex_lock() to protect its register updates. But this function is called with in atomic context in iTCO_wdt_start() and iTCO_wdt_stop() functions in iTCO_wdt.c driver, which in turn causes "sleeping into atomic context" issue. This patch fixes this issue by replacing the mutex_lock() with spin_lock() to protect the GCR read/write/update APIs. Fixes: 9d855d4 ("platform/x86: intel_pmc_ipc: Fix iTCO_wdt GCS memory mapping failure") Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_ipc.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 751b1212d01c..e03fa31446ca 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -131,6 +132,7 @@ static struct intel_pmc_ipc_dev { /* gcr */ void __iomem *gcr_mem_base; bool has_gcr_regs; + spinlock_t gcr_lock; /* punit */ struct platform_device *punit_dev; @@ -225,17 +227,17 @@ int intel_pmc_gcr_read(u32 offset, u32 *data) { int ret; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) { - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } *data = readl(ipcdev.gcr_mem_base + offset); - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return 0; } @@ -255,17 +257,17 @@ int intel_pmc_gcr_write(u32 offset, u32 data) { int ret; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) { - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } writel(data, ipcdev.gcr_mem_base + offset); - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return 0; } @@ -287,7 +289,7 @@ int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) u32 new_val; int ret = 0; - mutex_lock(&ipclock); + spin_lock(&ipcdev.gcr_lock); ret = is_gcr_valid(offset); if (ret < 0) @@ -309,7 +311,7 @@ int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val) } gcr_ipc_unlock: - mutex_unlock(&ipclock); + spin_unlock(&ipcdev.gcr_lock); return ret; } EXPORT_SYMBOL_GPL(intel_pmc_gcr_update); @@ -489,6 +491,8 @@ static int ipc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pmc->irq_mode = IPC_TRIGGER_MODE_IRQ; + spin_lock_init(&ipcdev.gcr_lock); + ret = pcim_enable_device(pdev); if (ret) return ret; @@ -903,6 +907,7 @@ static int ipc_plat_probe(struct platform_device *pdev) ipcdev.dev = &pdev->dev; ipcdev.irq_mode = IPC_TRIGGER_MODE_IRQ; init_completion(&ipcdev.cmd_complete); + spin_lock_init(&ipcdev.gcr_lock); ipcdev.irq = platform_get_irq(pdev, 0); if (ipcdev.irq < 0) { -- cgit v1.2.3-70-g09d2 From d87e47e13a9b347801be08da81f16ae65f1eda0f Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 19 Oct 2017 11:17:05 +0800 Subject: kbuild doc: a bundle of fixes on makefiles.txt It does several fixes: 1. move the displaced ld example to its reasonable place. 2. add new example for command gzip. 3. fix 2 number errors. 4. fix format of chapter 7.x, make it looks the same as other chapters. Signed-off-by: Cao jin Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.txt | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 329e740adea7..f6f80380dff2 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1108,14 +1108,6 @@ When kbuild executes, the following steps are followed (roughly): ld Link target. Often, LDFLAGS_$@ is used to set specific options to ld. - objcopy - Copy binary. Uses OBJCOPYFLAGS usually specified in - arch/$(ARCH)/Makefile. - OBJCOPYFLAGS_$@ may be used to set additional options. - - gzip - Compress target. Use maximum compression to compress target. - Example: #arch/x86/boot/Makefile LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary @@ -1139,6 +1131,19 @@ When kbuild executes, the following steps are followed (roughly): resulting in the target file being recompiled for no obvious reason. + objcopy + Copy binary. Uses OBJCOPYFLAGS usually specified in + arch/$(ARCH)/Makefile. + OBJCOPYFLAGS_$@ may be used to set additional options. + + gzip + Compress target. Use maximum compression to compress target. + + Example: + #arch/x86/boot/compressed/Makefile + $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,gzip) + dtc Create flattened device tree blob object suitable for linking into vmlinux. Device tree blobs linked into vmlinux are placed @@ -1219,7 +1224,7 @@ When kbuild executes, the following steps are followed (roughly): that may be shared between individual architectures. The recommended approach how to use a generic header file is to list the file in the Kbuild file. - See "7.3 generic-y" for further info on syntax etc. + See "7.2 generic-y" for further info on syntax etc. --- 6.11 Post-link pass @@ -1254,13 +1259,13 @@ A Kbuild file may be defined under arch//include/uapi/asm/ and arch//include/asm/ to list asm files coming from asm-generic. See subsequent chapter for the syntax of the Kbuild file. - --- 7.1 no-export-headers +--- 7.1 no-export-headers no-export-headers is essentially used by include/uapi/linux/Kbuild to avoid exporting specific headers (e.g. kvm.h) on architectures that do not support it. It should be avoided as much as possible. - --- 7.2 generic-y +--- 7.2 generic-y If an architecture uses a verbatim copy of a header from include/asm-generic then this is listed in the file @@ -1287,7 +1292,7 @@ See subsequent chapter for the syntax of the Kbuild file. Example: termios.h #include - --- 7.3 generated-y +--- 7.3 generated-y If an architecture generates other header files alongside generic-y wrappers, generated-y specifies them. @@ -1299,7 +1304,7 @@ See subsequent chapter for the syntax of the Kbuild file. #arch/x86/include/asm/Kbuild generated-y += syscalls_32.h - --- 7.5 mandatory-y +--- 7.4 mandatory-y mandatory-y is essentially used by include/uapi/asm-generic/Kbuild.asm to define the minimum set of headers that must be exported in -- cgit v1.2.3-70-g09d2 From bb3f38c3c5b759163e09b9152629cc789731de47 Mon Sep 17 00:00:00 2001 From: David Lin Date: Fri, 20 Oct 2017 14:09:13 -0700 Subject: kbuild: clang: fix build failures with sparse check We should avoid using the space character when passing arguments to clang, because static code analysis check tool such as sparse may misinterpret the arguments followed by spaces as build targets hence cause the build to fail. Signed-off-by: David Lin Signed-off-by: Masahiro Yamada --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d5ed3cec7938..8593c4081193 100644 --- a/Makefile +++ b/Makefile @@ -697,11 +697,11 @@ KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TARGET := -target $(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN := $(realpath $(dir $(shell which $(LD)))/..) endif ifneq ($(GCC_TOOLCHAIN),) -CLANG_GCC_TC := -gcc-toolchain $(GCC_TOOLCHAIN) +CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -- cgit v1.2.3-70-g09d2 From 942491c9e6d631c012f3c4ea8e7777b0b02edeab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 23 Oct 2017 18:31:50 -0700 Subject: xfs: fix AIM7 regression Apparently our current rwsem code doesn't like doing the trylock, then lock for real scheme. So change our read/write methods to just do the trylock for the RWF_NOWAIT case. This fixes a ~25% regression in AIM7. Fixes: 91f9943e ("fs: support RWF_NOWAIT for buffered reads") Reported-by: kernel test robot Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 56d0e526870c..6526ef0e2a23 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -237,11 +237,13 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } + ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -259,9 +261,10 @@ xfs_file_buffered_aio_read( trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; + } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } ret = generic_file_read_iter(iocb, to); @@ -552,9 +555,10 @@ xfs_file_dio_aio_write( iolock = XFS_IOLOCK_SHARED; } - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } @@ -606,9 +610,10 @@ xfs_file_dax_write( size_t count; loff_t pos; - if (!xfs_ilock_nowait(ip, iolock)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; + } else { xfs_ilock(ip, iolock); } -- cgit v1.2.3-70-g09d2 From 9d11b06638f6aa30d099090e6b8a540c558295ac Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Mon, 23 Oct 2017 18:10:23 +0800 Subject: drivers/net/usb: add device id for TP-LINK UE300 USB 3.0 Ethernet This product is named 'TP-LINK USB 3.0 Gigabit Ethernet Network Adapter (Model No.is UE300)'. It uses chip RTL8153 and works with driver drivers/net/usb/r8152.c Signed-off-by: Ran Wang Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 8 ++++++++ drivers/net/usb/r8152.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 52ea80bcd639..5529bd136624 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -561,6 +561,7 @@ static const struct driver_info wwan_info = { #define HP_VENDOR_ID 0x03f0 #define MICROSOFT_VENDOR_ID 0x045e #define UBLOX_VENDOR_ID 0x1546 +#define TPLINK_VENDOR_ID 0x2357 static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -813,6 +814,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, + /* TP-LINK UE300 USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(TPLINK_VENDOR_ID, 0x0601, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 941ece08ba78..d51d9abf7986 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -615,6 +615,7 @@ enum rtl8152_flags { #define VENDOR_ID_LENOVO 0x17ef #define VENDOR_ID_LINKSYS 0x13b1 #define VENDOR_ID_NVIDIA 0x0955 +#define VENDOR_ID_TPLINK 0x2357 #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -5319,6 +5320,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, + {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, {} }; -- cgit v1.2.3-70-g09d2 From 07f37efdaa3fa327ecbfd519110bc6bd0c2582cc Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Mon, 23 Oct 2017 17:16:41 +0200 Subject: cdc_ether: flag the Huawei ME906/ME909 as WWAN The Huawei ME906 (12d1:15c1) comes with a standard ECM interface that requires management via AT commands sent over one of the control TTYs (e.g. connected with AT^NDISDUP). Signed-off-by: Aleksander Morgado Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 5529bd136624..3e7a3ac3a362 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -871,6 +871,12 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (kernel_ulong_t)&wwan_info, +}, { + /* Huawei ME906 and ME909 */ + USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0x15c1, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { /* ZTE modules */ USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, USB_CLASS_COMM, -- cgit v1.2.3-70-g09d2 From 822eaf7cfb7c4783375bceadbc7651137346ac00 Mon Sep 17 00:00:00 2001 From: Yan Markman Date: Mon, 23 Oct 2017 15:24:29 +0200 Subject: net: mvpp2: fix TSO headers allocation and management TSO headers are managed with txq index and therefore should be aligned with the txq size, not with the aggregated txq size. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Reported-by: Marc Zyngier Signed-off-by: Yan Markman Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 9c86cb7cb988..72e43d848034 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -5609,7 +5609,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->tso_headers = dma_alloc_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, &txq_pcpu->tso_headers_dma, GFP_KERNEL); if (!txq_pcpu->tso_headers) @@ -5623,7 +5623,7 @@ cleanup: kfree(txq_pcpu->buffs); dma_free_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, txq_pcpu->tso_headers, txq_pcpu->tso_headers_dma); } @@ -5647,7 +5647,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, kfree(txq_pcpu->buffs); dma_free_coherent(port->dev->dev.parent, - MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE, + txq_pcpu->size * TSO_HEADER_SIZE, txq_pcpu->tso_headers, txq_pcpu->tso_headers_dma); } -- cgit v1.2.3-70-g09d2 From 20920267885218fda08dc12c7d3814938ab15b54 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Oct 2017 15:24:30 +0200 Subject: net: mvpp2: do not unmap TSO headers buffers The TSO header buffers are coming from a per cpu pool and should not be unmapped as they are reused. The PPv2 driver was unmapping all descriptors buffers unconditionally. This patch fixes this by checking the buffers dma addresses before unmapping them, and by not unmapping those who are located in the TSO header pool. Fixes: 186cd4d4e414 ("net: mvpp2: software tso support") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 72e43d848034..3de05a8c468a 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1167,6 +1167,11 @@ struct mvpp2_bm_pool { u32 port_map; }; +#define IS_TSO_HEADER(txq_pcpu, addr) \ + ((addr) >= (txq_pcpu)->tso_headers_dma && \ + (addr) < (txq_pcpu)->tso_headers_dma + \ + (txq_pcpu)->size * TSO_HEADER_SIZE) + /* Queue modes */ #define MVPP2_QDIST_SINGLE_MODE 0 #define MVPP2_QDIST_MULTI_MODE 1 @@ -5321,8 +5326,9 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, struct mvpp2_txq_pcpu_buf *tx_buf = txq_pcpu->buffs + txq_pcpu->txq_get_index; - dma_unmap_single(port->dev->dev.parent, tx_buf->dma, - tx_buf->size, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq_pcpu, tx_buf->dma)) + dma_unmap_single(port->dev->dev.parent, tx_buf->dma, + tx_buf->size, DMA_TO_DEVICE); if (tx_buf->skb) dev_kfree_skb_any(tx_buf->skb); @@ -6212,12 +6218,15 @@ static inline void tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, struct mvpp2_tx_desc *desc) { + struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu); + dma_addr_t buf_dma_addr = mvpp2_txdesc_dma_addr_get(port, desc); size_t buf_sz = mvpp2_txdesc_size_get(port, desc); - dma_unmap_single(port->dev->dev.parent, buf_dma_addr, - buf_sz, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq_pcpu, buf_dma_addr)) + dma_unmap_single(port->dev->dev.parent, buf_dma_addr, + buf_sz, DMA_TO_DEVICE); mvpp2_txq_desc_put(txq); } -- cgit v1.2.3-70-g09d2 From 082297e61480c4d72ed75b31077e74aca0e7c799 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 23 Oct 2017 15:24:31 +0200 Subject: net: mvpp2: do not call txq_done from the Tx path when Tx irqs are used When Tx IRQs are used, txq_bufs_free() can be called from both the Tx path and from NAPI poll(). This led to CPU stalls as if these two tasks (Tx and Poll) are scheduled on two CPUs at the same time, DMA unmapping operations are done on the same txq buffers. This patch adds a check not to call txq_done() from the Tx path if Tx interrupts are used as it does not make sense to do so. Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 3de05a8c468a..28c6e8a5e118 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6499,7 +6499,7 @@ out: } /* Finalize TX processing */ - if (txq_pcpu->count >= txq->done_pkts_coal) + if (!port->has_tx_irqs && txq_pcpu->count >= txq->done_pkts_coal) mvpp2_txq_done(port, txq, txq_pcpu); /* Set the timer in case not all frags were processed */ -- cgit v1.2.3-70-g09d2 From b71d21c274eff20a9db8158882b545b141b73ab8 Mon Sep 17 00:00:00 2001 From: Laszlo Toth Date: Mon, 23 Oct 2017 19:19:33 +0200 Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind") introduced support for the above options as v4 sctp did, so patched sctp_v6_available(). In the v4 implementation it's enough, because sctp_inet_bind_verify() just returns with sctp_v4_available(). However sctp_inet6_bind_verify() has an extra check before that for link-local scope_id, which won't respect the above options. Added the checks before calling ipv6_chk_addr(), but not before the validation of scope_id. before (w/ both options): ./v6test fe80::10 sctp bind failed, errno: 99 (Cannot assign requested address) ./v6test fe80::10 tcp bind success, errno: 0 (Success) after (w/ both options): ./v6test fe80::10 sctp bind success, errno: 0 (Success) Signed-off-by: Laszlo Toth Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 51c488769590..7fe9e1d1b7ec 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -882,8 +882,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); - if (!dev || - !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { + if (!dev || !(opt->inet.freebind || + net->ipv6.sysctl.ip_nonlocal_bind || + ipv6_chk_addr(net, &addr->v6.sin6_addr, + dev, 0))) { rcu_read_unlock(); return 0; } -- cgit v1.2.3-70-g09d2 From 3a379f5b36ae039dfeb6f73316e47ab1af4945df Mon Sep 17 00:00:00 2001 From: Gerhard Bertelsmann Date: Thu, 17 Aug 2017 15:59:49 +0200 Subject: can: sun4i: fix loopback mode Fix loopback mode by setting the right flag and remove presume mode. Signed-off-by: Gerhard Bertelsmann Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 68ef0a4cd821..b0c80859f746 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -342,7 +342,7 @@ static int sun4i_can_start(struct net_device *dev) /* enter the selected mode */ mod_reg_val = readl(priv->base + SUN4I_REG_MSEL_ADDR); - if (priv->can.ctrlmode & CAN_CTRLMODE_PRESUME_ACK) + if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) mod_reg_val |= SUN4I_MSEL_LOOPBACK_MODE; else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) mod_reg_val |= SUN4I_MSEL_LISTEN_ONLY_MODE; @@ -811,7 +811,6 @@ static int sun4ican_probe(struct platform_device *pdev) priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_LOOPBACK | - CAN_CTRLMODE_PRESUME_ACK | CAN_CTRLMODE_3_SAMPLES; priv->base = addr; priv->clk = clk; -- cgit v1.2.3-70-g09d2 From 8f65a923e6b628e187d5e791cf49393dd5e8c2f9 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 24 Oct 2017 12:23:28 +0200 Subject: can: kvaser_usb: Correct return value in printout If the return value from kvaser_usb_send_simple_msg() was non-zero, the return value from kvaser_usb_flush_queue() was printed in the kernel warning. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 18cc529fb807..861e90efab86 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1609,7 +1609,8 @@ static int kvaser_usb_close(struct net_device *netdev) if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); - if (kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, priv->channel)) + err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, priv->channel); + if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); err = kvaser_usb_stop_chip(priv); -- cgit v1.2.3-70-g09d2 From e1d2d1329a5722dbecc9c278303fcc4aa01f8790 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Tue, 24 Oct 2017 12:23:29 +0200 Subject: can: kvaser_usb: Ignore CMD_FLUSH_QUEUE_REPLY messages To avoid kernel warning "Unhandled message (68)", ignore the CMD_FLUSH_QUEUE_REPLY message for now. As of Leaf v2 firmware version v4.1.844 (2017-02-15), flush tx queue is synchronous. There is a capability bit indicating whether flushing tx queue is synchronous or asynchronous. A proper solution would be to query the device for capabilities. If the synchronous tx flush capability bit is set, we should wait for CMD_FLUSH_QUEUE_REPLY message, while flushing the tx queue. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 861e90efab86..9b18d96ef526 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -137,6 +137,7 @@ static inline bool kvaser_is_usbcan(const struct usb_device_id *id) #define CMD_RESET_ERROR_COUNTER 49 #define CMD_TX_ACKNOWLEDGE 50 #define CMD_CAN_ERROR_EVENT 51 +#define CMD_FLUSH_QUEUE_REPLY 68 #define CMD_LEAF_USB_THROTTLE 77 #define CMD_LEAF_LOG_MESSAGE 106 @@ -1301,6 +1302,11 @@ static void kvaser_usb_handle_message(const struct kvaser_usb *dev, goto warn; break; + case CMD_FLUSH_QUEUE_REPLY: + if (dev->family != KVASER_LEAF) + goto warn; + break; + default: warn: dev_warn(dev->udev->dev.parent, "Unhandled message (%d)\n", msg->id); -- cgit v1.2.3-70-g09d2 From 2eece390bf68ec8f733d7e4a3ba8a5ea350082ae Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 23 Oct 2017 15:35:33 +0300 Subject: perf/x86/intel/bts: Fix exclusive event reference leak Commit: d2878d642a4ed ("perf/x86/intel/bts: Disallow use by unprivileged users on paranoid systems") ... adds a privilege check in the exactly wrong place in the event init path: after the 'LBR exclusive' reference has been taken, and doesn't release it in the case of insufficient privileges. After this, nobody in the system gets to use PT or LBR afterwards. This patch moves the privilege check to where it should have been in the first place. Signed-off-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d2878d642a4ed ("perf/x86/intel/bts: Disallow use by unprivileged users on paranoid systems") Link: http://lkml.kernel.org/r/20171023123533.16973-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 16076eb34699..141e07b06216 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -546,9 +546,6 @@ static int bts_event_init(struct perf_event *event) if (event->attr.type != bts_pmu.type) return -ENOENT; - if (x86_add_exclusive(x86_lbr_exclusive_bts)) - return -EBUSY; - /* * BTS leaks kernel addresses even when CPL0 tracing is * disabled, so disallow intel_bts driver for unprivileged @@ -562,6 +559,9 @@ static int bts_event_init(struct perf_event *event) !capable(CAP_SYS_ADMIN)) return -EACCES; + if (x86_add_exclusive(x86_lbr_exclusive_bts)) + return -EBUSY; + ret = x86_reserve_hardware(); if (ret) { x86_del_exclusive(x86_lbr_exclusive_bts); -- cgit v1.2.3-70-g09d2 From f265788c336979090ac80b9ae173aa817c4fe40d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 24 Oct 2017 16:53:34 +0800 Subject: ALSA: hda - fix headset mic problem for Dell machines with alc236 We have several Dell laptops which use the codec alc236, the headset mic can't work on these machines. Following the commit 736f20a70, we add the pin cfg table to make the headset mic work. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 00fa80291c96..546d515f3c1f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6428,6 +6428,14 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, {0x1b, 0x90170110}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x14, 0x90170110}, + {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x14, 0x90170150}, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE, {0x14, 0x90170110}, {0x21, 0x02211020}), -- cgit v1.2.3-70-g09d2 From d0725439354a58f2b13b9f5234420641b662b9c4 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 23 Oct 2017 17:36:03 -0700 Subject: hwmon: (tmp102) Fix first temperature reading Commit 3d8f7a89a197 ("hwmon: (tmp102) Improve handling of initial read delay") reduced the initial temperature read delay and made it dependent on the chip's shutdown mode. If the chip was not in shutdown mode at probe, the read delay no longer applies. This ignores the fact that the chip initialization changes the temperature sensor resolution, and that the temperature register values change when the resolution is changed. As a result, the reported temperature is twice as high as the real temperature until the first temperature conversion after the configuration change is complete. This can result in unexpected behavior and, worst case, in a system shutdown. To fix the problem, let's just always wait for a conversion to complete before reporting a temperature. Fixes: 3d8f7a89a197 ("hwmon: (tmp102) Improve handling of initial read delay") Link: https://bugzilla.kernel.org/show_bug.cgi?id=197167 Reported-by: Ralf Goebel Cc: Ralf Goebel Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck --- drivers/hwmon/tmp102.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 5eafbaada795..dfc40c740d07 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -268,14 +268,11 @@ static int tmp102_probe(struct i2c_client *client, return err; } - tmp102->ready_time = jiffies; - if (tmp102->config_orig & TMP102_CONF_SD) { - /* - * Mark that we are not ready with data until the first - * conversion is complete - */ - tmp102->ready_time += msecs_to_jiffies(CONVERSION_TIME_MS); - } + /* + * Mark that we are not ready with data until the first + * conversion is complete + */ + tmp102->ready_time = jiffies + msecs_to_jiffies(CONVERSION_TIME_MS); hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name, tmp102, -- cgit v1.2.3-70-g09d2 From 0cc2b4e5a020fc7f4d1795741c116c983e9467d7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 Oct 2017 15:20:45 +0200 Subject: PM / QoS: Fix device resume latency PM QoS The special value of 0 for device resume latency PM QoS means "no restriction", but there are two problems with that. First, device resume latency PM QoS requests with 0 as the value are always put in front of requests with positive values in the priority lists used internally by the PM QoS framework, causing 0 to be chosen as an effective constraint value. However, that 0 is then interpreted as "no restriction" effectively overriding the other requests with specific restrictions which is incorrect. Second, the users of device resume latency PM QoS have no way to specify that *any* resume latency at all should be avoided, which is an artificial limitation in general. To address these issues, modify device resume latency PM QoS to use S32_MAX as the "no constraint" value and 0 as the "no latency at all" one and rework its users (the cpuidle menu governor, the genpd QoS governor and the runtime PM framework) to follow these changes. Also add a special "n/a" value to the corresponding user space I/F to allow user space to indicate that it cannot accept any resume latencies at all for the given device. Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints) Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323 Reported-by: Reinette Chatre Tested-by: Reinette Chatre Signed-off-by: Rafael J. Wysocki Acked-by: Alex Shi Cc: All applicable --- Documentation/ABI/testing/sysfs-devices-power | 4 +- drivers/base/cpu.c | 3 +- drivers/base/power/domain_governor.c | 53 +++++++++++++++------------ drivers/base/power/qos.c | 2 +- drivers/base/power/runtime.c | 2 +- drivers/base/power/sysfs.c | 25 +++++++++++-- drivers/cpuidle/governors/menu.c | 4 +- include/linux/pm_qos.h | 5 ++- 8 files changed, 63 insertions(+), 35 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 676fdf5f2a99..5cbb6f038615 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -211,7 +211,9 @@ Description: device, after it has been suspended at run time, from a resume request to the moment the device will be ready to process I/O, in microseconds. If it is equal to 0, however, this means that - the PM QoS resume latency may be arbitrary. + the PM QoS resume latency may be arbitrary and the special value + "n/a" means that user space cannot accept any resume latency at + all for the given device. Not all drivers support this attribute. If it isn't supported, it is not present. diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b4d817..227bac5f1191 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -377,7 +377,8 @@ int register_cpu(struct cpu *cpu, int num) per_cpu(cpu_sys_devices, num) = &cpu->dev; register_cpu_under_node(num, cpu_to_node(num)); - dev_pm_qos_expose_latency_limit(&cpu->dev, 0); + dev_pm_qos_expose_latency_limit(&cpu->dev, + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); return 0; } diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 281f949c5ffe..51751cc8c9e6 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,23 +14,20 @@ static int dev_update_qos_constraint(struct device *dev, void *data) { s64 *constraint_ns_p = data; - s32 constraint_ns = -1; + s64 constraint_ns = -1; if (dev->power.subsys_data && dev->power.subsys_data->domain_data) constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; - if (constraint_ns < 0) { + if (constraint_ns < 0) constraint_ns = dev_pm_qos_read_value(dev); - constraint_ns *= NSEC_PER_USEC; - } - if (constraint_ns == 0) + + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) return 0; - /* - * constraint_ns cannot be negative here, because the device has been - * suspended. - */ - if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) + constraint_ns *= NSEC_PER_USEC; + + if (constraint_ns < *constraint_ns_p || *constraint_ns_p < 0) *constraint_ns_p = constraint_ns; return 0; @@ -63,10 +60,14 @@ static bool default_suspend_ok(struct device *dev) spin_unlock_irqrestore(&dev->power.lock, flags); - if (constraint_ns < 0) + if (constraint_ns == 0) return false; - constraint_ns *= NSEC_PER_USEC; + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + constraint_ns = -1; + else + constraint_ns *= NSEC_PER_USEC; + /* * We can walk the children without any additional locking, because * they all have been suspended at this point and their @@ -76,14 +77,19 @@ static bool default_suspend_ok(struct device *dev) device_for_each_child(dev, &constraint_ns, dev_update_qos_constraint); - if (constraint_ns > 0) { - constraint_ns -= td->suspend_latency_ns + - td->resume_latency_ns; - if (constraint_ns == 0) - return false; + if (constraint_ns < 0) { + /* The children have no constraints. */ + td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + td->cached_suspend_ok = true; + } else { + constraint_ns -= td->suspend_latency_ns + td->resume_latency_ns; + if (constraint_ns > 0) { + td->effective_constraint_ns = constraint_ns; + td->cached_suspend_ok = true; + } else { + td->effective_constraint_ns = 0; + } } - td->effective_constraint_ns = constraint_ns; - td->cached_suspend_ok = constraint_ns >= 0; /* * The children have been suspended already, so we don't need to take @@ -145,13 +151,14 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, td = &to_gpd_data(pdd)->td; constraint_ns = td->effective_constraint_ns; /* default_suspend_ok() need not be called before us. */ - if (constraint_ns < 0) { + if (constraint_ns < 0) constraint_ns = dev_pm_qos_read_value(pdd->dev); - constraint_ns *= NSEC_PER_USEC; - } - if (constraint_ns == 0) + + if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) continue; + constraint_ns *= NSEC_PER_USEC; + /* * constraint_ns cannot be negative here, because the device has * been suspended. diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 277d43a83f53..7d29286d9313 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -189,7 +189,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) plist_head_init(&c->list); c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; - c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; + c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; c->type = PM_QOS_MIN; c->notifiers = n; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 7bcf80fa9ada..13e015905543 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -253,7 +253,7 @@ static int rpm_check_suspend_allowed(struct device *dev) || (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; - else if (__dev_pm_qos_read_value(dev) < 0) + else if (__dev_pm_qos_read_value(dev) == 0) retval = -EPERM; else if (dev->power.runtime_status == RPM_SUSPENDED) retval = 1; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 156ab57bca77..632077f05c57 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -218,7 +218,14 @@ static ssize_t pm_qos_resume_latency_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev)); + s32 value = dev_pm_qos_requested_resume_latency(dev); + + if (value == 0) + return sprintf(buf, "n/a\n"); + else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + value = 0; + + return sprintf(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_store(struct device *dev, @@ -228,11 +235,21 @@ static ssize_t pm_qos_resume_latency_store(struct device *dev, s32 value; int ret; - if (kstrtos32(buf, 0, &value)) - return -EINVAL; + if (!kstrtos32(buf, 0, &value)) { + /* + * Prevent users from writing negative or "no constraint" values + * directly. + */ + if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + return -EINVAL; - if (value < 0) + if (value == 0) + value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) { + value = 0; + } else { return -EINVAL; + } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 48eaf2879228..aa390404e85f 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -298,8 +298,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->needs_update = 0; } - /* resume_latency is 0 means no restriction */ - if (resume_latency && resume_latency < latency_req) + if (resume_latency < latency_req && + resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) latency_req = resume_latency; /* Special case when user has set very strict latency requirement */ diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 032b55909145..6737a8c9e8c6 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -27,16 +27,17 @@ enum pm_qos_flags_status { PM_QOS_FLAGS_ALL, }; -#define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_DEFAULT_VALUE (-1) +#define PM_QOS_LATENCY_ANY S32_MAX #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) -#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) #define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) -- cgit v1.2.3-70-g09d2 From 6eaf011144af10cad34c0d46f82e50d382c8e926 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2017 19:03:04 +0300 Subject: ovl: fix EIO from lookup of non-indexed upper Commit fbaf94ee3cd5 ("ovl: don't set origin on broken lower hardlink") attempt to avoid the condition of non-indexed upper inode with lower hardlink as origin. If this condition is found, lookup returns EIO. The protection of commit mentioned above does not cover the case of lower that is not a hardlink when it is copied up (with either index=off/on) and then lower is hardlinked while overlay is offline. Changes to lower layer while overlayfs is offline should not result in unexpected behavior, so a permanent EIO error after creating a link in lower layer should not be considered as correct behavior. This fix replaces EIO error with success in cases where upper has origin but no index is found, or index is found that does not match upper inode. In those cases, lookup will not fail and the returned overlay inode will be hashed by upper inode instead of by lower origin inode. Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 20 ++++++++++++++++---- fs/overlayfs/namei.c | 20 ++++++++------------ fs/overlayfs/overlayfs.h | 3 ++- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a619addecafc..321511ed8c42 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -598,18 +598,30 @@ static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, return true; } -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry) +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index) { struct dentry *lowerdentry = ovl_dentry_lower(dentry); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; + /* Already indexed or could be indexed on copy up? */ + bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + + if (WARN_ON(upperdentry && indexed && !lowerdentry)) + return ERR_PTR(-EIO); if (!realinode) realinode = d_inode(lowerdentry); - if (!S_ISDIR(realinode->i_mode) && - (upperdentry || (lowerdentry && ovl_indexdir(dentry->d_sb)))) { - struct inode *key = d_inode(lowerdentry ?: upperdentry); + /* + * Copy up origin (lower) may exist for non-indexed upper, but we must + * not use lower as hash key in that case. + * Hash inodes that are or could be indexed by origin inode and + * non-indexed upper inodes that could be hard linked by upper inode. + */ + if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { + struct inode *key = d_inode(indexed ? lowerdentry : + upperdentry); unsigned int nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index e08164156cfe..05e9a0ad5c08 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -516,18 +516,9 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, inode = d_inode(index); if (d_is_negative(index)) { - if (upper && d_inode(origin)->i_nlink > 1) { - pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", - d_inode(origin)->i_ino); - goto fail; - } - - dput(index); - index = NULL; + goto out_dput; } else if (upper && d_inode(upper) != inode) { - pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", - index, inode->i_ino, d_inode(upper)->i_ino); - goto fail; + goto out_dput; } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { /* @@ -547,6 +538,11 @@ out: kfree(name.name); return index; +out_dput: + dput(index); + index = NULL; + goto out; + fail: dput(index); index = ERR_PTR(-EIO); @@ -710,7 +706,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, upperdentry = dget(index); if (upperdentry || ctr) { - inode = ovl_get_inode(dentry, upperdentry); + inode = ovl_get_inode(dentry, upperdentry, index); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_free_oe; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c706a6f99928..d9a0edd4e57e 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -286,7 +286,8 @@ int ovl_update_time(struct inode *inode, struct timespec *ts, int flags); bool ovl_is_private_xattr(const char *name); struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); -struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry); +struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, + struct dentry *index); static inline void ovl_copyattr(struct inode *from, struct inode *to) { to->i_uid = from->i_uid; -- cgit v1.2.3-70-g09d2 From 7937a56fdf0b064c2ffa33025210f725a4ebc822 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 20 Oct 2017 17:19:06 +0300 Subject: ovl: handle ENOENT on index lookup Treat ENOENT from index entry lookup the same way as treating a returned negative dentry. Apparently, either could be returned if file is not found, depending on the underlying file system. Fixes: 359f392ca53e ("ovl: lookup index entry for copy up origin") Cc: # v4.13 Signed-off-by: Amir Goldstein --- fs/overlayfs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 05e9a0ad5c08..0d9b8ce5ea43 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -507,6 +507,10 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len); if (IS_ERR(index)) { err = PTR_ERR(index); + if (err == -ENOENT) { + index = NULL; + goto out; + } pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" "overlayfs: mount with '-o index=off' to disable inodes index.\n", d_inode(origin)->i_ino, name.len, name.name, -- cgit v1.2.3-70-g09d2 From fa0096e3bad69ed6f34843fd7ae1c45ca987012a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 24 Oct 2017 12:24:11 +0300 Subject: ovl: do not cleanup unsupported index entries With index=on, ovl_indexdir_cleanup() tries to cleanup invalid index entries (e.g. bad index name). This behavior could result in cleaning of entries created by newer kernels and is therefore undesirable. Instead, abort mount if such entries are encountered. We still cleanup 'stale' entries and 'orphan' entries, both those cases can be a result of offline changes to lower and upper dirs. When encoutering an index entry of type directory or whiteout, kernel was supposed to fallback to read-only mount, but the fill_super() operation returns EROFS in this case instead of returning success with read-only mount flag, so mount fails when encoutering directory or whiteout index entries. Bless this behavior by returning -EINVAL on directory and whiteout index entries as we do for all unsupported index entries. Fixes: 61b674710cd9 ("ovl: do not cleanup directory and whiteout index..") Cc: # v4.13 Signed-off-by: Amir Goldstein --- fs/overlayfs/namei.c | 7 +++---- fs/overlayfs/readdir.c | 11 +++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 0d9b8ce5ea43..a12dc10bf726 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -405,14 +405,13 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, * be treated as stale (i.e. after unlink of the overlay inode). * We don't know the verification rules for directory and whiteout * index entries, because they have not been implemented yet, so return - * EROFS if those entries are found to avoid corrupting an index that - * was created by a newer kernel. + * EINVAL if those entries are found to abort the mount to avoid + * corrupting an index that was created by a newer kernel. */ - err = -EROFS; + err = -EINVAL; if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; - err = -EINVAL; if (index->d_name.len < sizeof(struct ovl_fh)*2) goto fail; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0f85ee9c3268..698b74dd750e 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1021,13 +1021,12 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, break; } err = ovl_verify_index(index, lowerstack, numlower); - if (err) { - if (err == -EROFS) - break; + /* Cleanup stale and orphan index entries */ + if (err && (err == -ESTALE || err == -ENOENT)) err = ovl_cleanup(dir, index); - if (err) - break; - } + if (err) + break; + dput(index); index = NULL; } -- cgit v1.2.3-70-g09d2 From 57a95b41869b8f0d1949c24df2a9dac1ca7082ee Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 24 Oct 2017 11:08:18 -0700 Subject: Input: elan_i2c - add ELAN0611 to the ACPI table ELAN0611 touchpad uses elan_i2c as its driver. It can be found on Lenovo ideapad 320-15IKB. So add it to ACPI table to enable the touchpad. [Ido Adiv reports that the same ACPI ID is used for Elan touchpad in ideapad 520]. BugLink: https://bugs.launchpad.net/bugs/1723736 Signed-off-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 0e761d079dc4..6d6b092e2da9 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1258,6 +1258,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0605", 0 }, { "ELAN0609", 0 }, { "ELAN060B", 0 }, + { "ELAN0611", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3-70-g09d2 From 32e67a3a06b88904155170560b7a63d372b320bd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 24 Oct 2017 15:57:18 -0400 Subject: nbd: handle interrupted sendmsg with a sndtimeo set If you do not set sk_sndtimeo you will get -ERESTARTSYS if there is a pending signal when you enter sendmsg, which we handle properly. However if you set a timeout for your commands we'll set sk_sndtimeo to that timeout, which means that sendmsg will start returning -EINTR instead of -ERESTARTSYS. Fix this by checking either cases and doing the correct thing. Cc: stable@vger.kernel.org Fixes: dc88e34d69d8 ("nbd: set sk->sk_sndtimeo for our sockets") Reported-and-tested-by: Daniel Xu Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index baebbdfd74d5..9adfb5445f8d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -386,6 +386,15 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, return result; } +/* + * Different settings for sk->sk_sndtimeo can result in different return values + * if there is a signal pending when we enter sendmsg, because reasons? + */ +static inline int was_interrupted(int result) +{ + return result == -ERESTARTSYS || result == -EINTR; +} + /* always call with the tx_lock held */ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { @@ -458,7 +467,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); if (result <= 0) { - if (result == -ERESTARTSYS) { + if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, * however if we have sent something we need to make * sure we only allow this req to be sent until we are @@ -502,7 +511,7 @@ send_pages: } result = sock_xmit(nbd, index, 1, &from, flags, &sent); if (result <= 0) { - if (result == -ERESTARTSYS) { + if (was_interrupted(result)) { /* We've already sent the header, we * have no choice but to set pending and * return BUSY. -- cgit v1.2.3-70-g09d2 From 829385f08ae99740276cbd46c9db29764c519211 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 20 Oct 2017 16:40:43 -0700 Subject: strparser: Use delayed work instead of timer for msg timeout Sock lock may be taken in the message timer function which is a problem since timers run in BH. Instead of timers use delayed_work. Reported-by: Eric Dumazet Fixes: bbb03029a899 ("strparser: Generalize strparser") Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/strparser.h | 3 +-- net/strparser/strparser.c | 17 ++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/net/strparser.h b/include/net/strparser.h index 7dc131d62ad5..d96b59f45eba 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -74,10 +74,9 @@ struct strparser { u32 unrecov_intr : 1; struct sk_buff **skb_nextp; - struct timer_list msg_timer; struct sk_buff *skb_head; unsigned int need_bytes; - struct delayed_work delayed_work; + struct delayed_work msg_timer_work; struct work_struct work; struct strp_stats stats; struct strp_callbacks cb; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index d4ea46a5f233..c5fda15ba319 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -49,7 +49,7 @@ static void strp_abort_strp(struct strparser *strp, int err) { /* Unrecoverable error in receive */ - del_timer(&strp->msg_timer); + cancel_delayed_work(&strp->msg_timer_work); if (strp->stopped) return; @@ -68,7 +68,7 @@ static void strp_abort_strp(struct strparser *strp, int err) static void strp_start_timer(struct strparser *strp, long timeo) { if (timeo) - mod_timer(&strp->msg_timer, timeo); + mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } /* Lower lock held */ @@ -319,7 +319,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, eaten += (cand_len - extra); /* Hurray, we have a new message! */ - del_timer(&strp->msg_timer); + cancel_delayed_work(&strp->msg_timer_work); strp->skb_head = NULL; STRP_STATS_INCR(strp->stats.msgs); @@ -450,9 +450,10 @@ static void strp_work(struct work_struct *w) do_strp_work(container_of(w, struct strparser, work)); } -static void strp_msg_timeout(unsigned long arg) +static void strp_msg_timeout(struct work_struct *w) { - struct strparser *strp = (struct strparser *)arg; + struct strparser *strp = container_of(w, struct strparser, + msg_timer_work.work); /* Message assembly timed out */ STRP_STATS_INCR(strp->stats.msg_timeouts); @@ -505,9 +506,7 @@ int strp_init(struct strparser *strp, struct sock *sk, strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; - setup_timer(&strp->msg_timer, strp_msg_timeout, - (unsigned long)strp); - + INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout); INIT_WORK(&strp->work, strp_work); return 0; @@ -532,7 +531,7 @@ void strp_done(struct strparser *strp) { WARN_ON(!strp->stopped); - del_timer_sync(&strp->msg_timer); + cancel_delayed_work_sync(&strp->msg_timer_work); cancel_work_sync(&strp->work); if (strp->skb_head) { -- cgit v1.2.3-70-g09d2 From 3eb8feeb1708c7dbfd2e97df92a2a407c116606e Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 24 Oct 2017 16:37:19 -0400 Subject: net: dsa: check master device before put In the case of pdata, the dsa_cpu_parse function calls dev_put() before making sure it isn't NULL. Fix this. Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data") Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 873af0108e24..045d8a176279 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -496,14 +496,15 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, if (!ethernet) return -EINVAL; ethernet_dev = of_find_net_device_by_node(ethernet); + if (!ethernet_dev) + return -EPROBE_DEFER; } else { ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); + if (!ethernet_dev) + return -EPROBE_DEFER; dev_put(ethernet_dev); } - if (!ethernet_dev) - return -EPROBE_DEFER; - if (!dst->cpu_dp) { dst->cpu_dp = port; dst->cpu_dp->netdev = ethernet_dev; -- cgit v1.2.3-70-g09d2 From 0a5e2ec2647737907d267c09dc9a25fab1468865 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 5 Oct 2017 08:29:47 +0200 Subject: s390/kvm: fix detection of guest machine checks The new detection code for guest machine checks added a check based on %r11 to .Lcleanup_sie to distinguish between normal asynchronous interrupts and machine checks. But the funtion is called from the program check handler as well with an undefined value in %r11. The effect is that all program exceptions pointing to the SIE instruction will set the CIF_MCCK_GUEST bit. The bit stays set for the CPU until the next machine check comes in which will incorrectly be interpreted as a guest machine check. The simplest fix is to stop using .Lcleanup_sie in the program check handler and duplicate a few instructions. Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest") Cc: # v4.13+ Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 21900e1cee9c..d185aa3965bf 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -521,12 +521,15 @@ ENTRY(pgm_check_handler) tmhh %r8,0x0001 # test problem state bit jnz 2f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for sie64a + # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) jhe 0f - brasl %r14,.Lcleanup_sie + lg %r14,__SF_EMPTY(%r15) # get control block pointer + ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce + larl %r9,sie_exit # skip forward to sie_exit #endif 0: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 1f # -> enabled, can't be a double fault -- cgit v1.2.3-70-g09d2 From 6c2838fbdedb9b72a81c931d49e56b229b6cdbca Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Oct 2017 08:52:58 -0400 Subject: ceph: unlock dangling spinlock in try_flush_caps() sparse warns: fs/ceph/caps.c:2042:9: warning: context imbalance in 'try_flush_caps' - wrong count at exit We need to exit this function with the lock unlocked, but a couple of cases leave it locked. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton Reviewed-by: "Yan, Zheng" Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 157fe59fbabe..1978a8cb1cb1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1991,6 +1991,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) retry: spin_lock(&ci->i_ceph_lock); if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + spin_unlock(&ci->i_ceph_lock); dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); goto out; } @@ -2008,8 +2009,10 @@ retry: mutex_lock(&session->s_mutex); goto retry; } - if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) + if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) { + spin_unlock(&ci->i_ceph_lock); goto out; + } flushing = __mark_caps_flushing(inode, session, true, &flush_tid, &oldest_flush_tid); -- cgit v1.2.3-70-g09d2 From cfbb0d90a7abb289edc91833d0905931f8805f12 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Oct 2017 21:12:13 +0200 Subject: mac80211: don't compare TKIP TX MIC key in reinstall prevention For the reinstall prevention, the code I had added compares the whole key. It turns out though that iwlwifi firmware doesn't provide the TKIP TX MIC key as it's not needed in client mode, and thus the comparison will always return false. For client mode, thus always zero out the TX MIC key part before doing the comparison in order to avoid accepting the reinstall of the key with identical encryption and RX MIC key, but not the same TX MIC key (since the supplicant provides the real one.) Fixes: fdf7cb4185b6 ("mac80211: accept key reinstall without changing anything") Signed-off-by: Johannes Berg --- net/mac80211/key.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 035d16fe926e..938049395f90 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -610,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key) ieee80211_key_free_common(key); } +static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, + struct ieee80211_key *old, + struct ieee80211_key *new) +{ + u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP]; + u8 *tk_old, *tk_new; + + if (!old || new->conf.keylen != old->conf.keylen) + return false; + + tk_old = old->conf.key; + tk_new = new->conf.key; + + /* + * In station mode, don't compare the TX MIC key, as it's never used + * and offloaded rekeying may not care to send it to the host. This + * is the case in iwlwifi, for example. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + new->conf.cipher == WLAN_CIPHER_SUITE_TKIP && + new->conf.keylen == WLAN_KEY_LEN_TKIP && + !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP); + memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP); + memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + tk_old = tkip_old; + tk_new = tkip_new; + } + + return !crypto_memneq(tk_old, tk_new, new->conf.keylen); +} + int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) @@ -635,8 +668,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * Silently accept key re-installation without really installing the * new version of the key to avoid nonce reuse or replay issues. */ - if (old_key && key->conf.keylen == old_key->conf.keylen && - !crypto_memneq(key->conf.key, old_key->conf.key, key->conf.keylen)) { + if (ieee80211_key_identical(sdata, old_key, key)) { ieee80211_key_free_unused(key); ret = 0; goto out; -- cgit v1.2.3-70-g09d2 From 092e72c9edab16d4d6ad10c683a95047d53b6db4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Oct 2017 11:04:47 +0100 Subject: efi/efi_test: Prevent an Oops in efi_runtime_query_capsulecaps() If "qcaps.capsule_count" is ULONG_MAX then "qcaps.capsule_count + 1" will overflow to zero and kcalloc() will return the ZERO_SIZE_PTR. We try to dereference it inside the loop and crash. Signed-off-by: Dan Carpenter Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Acked-by: Ivan Hu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: ff6301dabc3c ("efi: Add efi_test driver for exporting UEFI runtime service interfaces") Link: http://lkml.kernel.org/r/20171025100448.26056-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/test/efi_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c index 08129b7b80ab..41c48a1e8baa 100644 --- a/drivers/firmware/efi/test/efi_test.c +++ b/drivers/firmware/efi/test/efi_test.c @@ -593,6 +593,9 @@ static long efi_runtime_query_capsulecaps(unsigned long arg) if (copy_from_user(&qcaps, qcaps_user, sizeof(qcaps))) return -EFAULT; + if (qcaps.capsule_count == ULONG_MAX) + return -EINVAL; + capsules = kcalloc(qcaps.capsule_count + 1, sizeof(efi_capsule_header_t), GFP_KERNEL); if (!capsules) -- cgit v1.2.3-70-g09d2 From 38fb6652229c2149e8694d57db442878fdf8a1bd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Oct 2017 11:04:48 +0100 Subject: efi/libstub/arm: Don't randomize runtime regions when CONFIG_HIBERNATION=y Commit: e69176d68d26 ("ef/libstub/arm/arm64: Randomize the base of the UEFI rt services region") implemented randomization of the virtual mapping that the OS chooses for the UEFI runtime services. This was motivated by the fact that UEFI usually does not bother to specify any permission restrictions for those regions, making them prime real estate for exploitation now that the OS is getting more and more careful not to leave any R+W+X mapped regions lying around. However, this randomization breaks assumptions in the resume from hibernation code, which expects all memory regions populated by UEFI to remain in the same place, including their virtual mapping into the OS memory space. While this assumption may not be entirely reasonable in the first place, breaking it deliberately does not make a lot of sense either. So let's refrain from this randomization pass if CONFIG_HIBERNATION=y. Signed-off-by: Ard Biesheuvel Cc: James Morse Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20171025100448.26056-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/arm-stub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 1cb2d1c070c3..a94601d5939e 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -238,7 +238,8 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_random_get_seed(sys_table); - if (!nokaslr()) { + /* hibernation expects the runtime regions to stay in the same place */ + if (!IS_ENABLED(CONFIG_HIBERNATION) && !nokaslr()) { /* * Randomize the base of the UEFI runtime services region. * Preserve the 2 MB alignment of the region by taking a -- cgit v1.2.3-70-g09d2 From d3daa2c7865cbfa830651b11c8ad1df23465b46e Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Mon, 23 Oct 2017 11:27:35 -0400 Subject: drm/amd/amdgpu: Remove workaround check for UVD6 on APUs On APUs the uvd6 driver was skipping proper suspend/resume routines resulting in a broken state upon resume. Signed-off-by: Tom St Denis Acked-by: Alex Deucher Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 31db356476f8..430a6b4dfac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -225,11 +225,7 @@ static int uvd_v6_0_suspend(void *handle) if (r) return r; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) - r = amdgpu_uvd_suspend(adev); - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v6_0_resume(void *handle) @@ -237,12 +233,10 @@ static int uvd_v6_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_uvd_resume(adev); - if (r) - return r; - } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + return uvd_v6_0_hw_init(adev); } -- cgit v1.2.3-70-g09d2 From c6cdd51404b7ac12dd95173ddfc548c59ecf037f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 25 Oct 2017 16:34:27 +0200 Subject: fuse: fix READDIRPLUS skipping an entry Marios Titas running a Haskell program noticed a problem with fuse's readdirplus: when it is interrupted by a signal, it skips one directory entry. The reason is that fuse erronously updates ctx->pos after a failed dir_emit(). The issue originates from the patch adding readdirplus support. Reported-by: Jakob Unterwurzacher Tested-by: Marios Titas Signed-off-by: Miklos Szeredi Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support") Cc: # v3.9 --- fs/fuse/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 622081b97426..24967382a7b1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1308,7 +1308,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, */ over = !dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, dirent->type); - ctx->pos = dirent->off; + if (!over) + ctx->pos = dirent->off; } buf += reclen; -- cgit v1.2.3-70-g09d2 From 7277f755048da562eb2489becacd38d0d05e1e06 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 24 Oct 2017 16:27:28 +0100 Subject: drm/i915/perf: fix perf enable/disable ioctls with 32bits userspace The compat callback was missing and triggered failures in 32bits userspace when enabling/disable the perf stream. We don't require any particular processing here as these ioctls don't take any argument. Signed-off-by: Lionel Landwerlin Fixes: eec688e1420 ("drm/i915: Add i915 perf infrastructure") Cc: linux-stable Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171024152728.4873-1-lionel.g.landwerlin@intel.com (cherry picked from commit 191f896085cf3b5d85920d58a759da4eea141721) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_perf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 94185d610673..370b9d248fed 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2537,6 +2537,10 @@ static const struct file_operations fops = { .poll = i915_perf_poll, .read = i915_perf_read, .unlocked_ioctl = i915_perf_ioctl, + /* Our ioctl have no arguments, so it's safe to use the same function + * to handle 32bits compatibility. + */ + .compat_ioctl = i915_perf_ioctl, }; -- cgit v1.2.3-70-g09d2 From 298d275d4d9bea3524ff4bc76678c140611d8a8d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 25 Oct 2017 17:08:07 +0200 Subject: xen/gntdev: avoid out of bounds access in case of partial gntdev_mmap() In case gntdev_mmap() succeeds only partially in mapping grant pages it will leave some vital information uninitialized needed later for cleanup. This will lead to an out of bounds array access when unmapping the already mapped pages. So just initialize the data needed for unmapping the pages a little bit earlier. Cc: Reported-by: Arthur Borsboom Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 82360594fa8e..57efbd3b053b 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1024,6 +1024,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) mutex_unlock(&priv->lock); if (use_ptemod) { + map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, find_grant_ptes, map); @@ -1061,7 +1062,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) set_grant_ptes_as_special, NULL); } #endif - map->pages_vm_start = vma->vm_start; } return 0; -- cgit v1.2.3-70-g09d2 From 5b454a64555055aaa5769b3ba877bd911d375d5a Mon Sep 17 00:00:00 2001 From: Benjamin Gilbert Date: Thu, 19 Oct 2017 13:09:29 -0700 Subject: cifs: Select all required crypto modules Some dependencies were lost when CIFS_SMB2 was merged into CIFS. Fixes: 2a38e12053b7 ("[SMB3] Remove ifdef since SMB3 (and later) now STRONGLY preferred") Signed-off-by: Benjamin Gilbert Reviewed-by: Aurelien Aptel CC: Stable Signed-off-by: Steve French --- fs/cifs/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index f7243617316c..d5b2e12b5d02 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -5,9 +5,14 @@ config CIFS select CRYPTO select CRYPTO_MD4 select CRYPTO_MD5 + select CRYPTO_SHA256 + select CRYPTO_CMAC select CRYPTO_HMAC select CRYPTO_ARC4 + select CRYPTO_AEAD2 + select CRYPTO_CCM select CRYPTO_ECB + select CRYPTO_AES select CRYPTO_DES help This is the client VFS module for the SMB3 family of NAS protocols, -- cgit v1.2.3-70-g09d2 From 48923d2a9d4f6ca909102061a4240b9896ff8ea2 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Tue, 17 Oct 2017 14:47:17 +0200 Subject: CIFS: do not send invalid input buffer on QUERY_INFO requests query_info() doesn't use the InputBuffer field of the QUERY_INFO request, therefore according to [MS-SMB2] it must: a) set the InputBufferOffset to 0 b) send a zero-length InputBuffer Doing a) is trivial but b) is a bit more tricky. The packet is allocated according to it's StructureSize, which takes into account an extra 1 byte buffer which we don't need here. StructureSize fields must have constant values no matter the actual length of the whole packet so we can't just edit that constant. Both the NetBIOS-over-TCP message length ("rfc1002 length") L and the iovec length L' have to be updated. Since L' is computed from L we just update L by decrementing it by one. Signed-off-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ba3865b338d8..fa17caa56128 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2191,9 +2191,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; req->AdditionalInformation = cpu_to_le32(additional_info); - /* 4 for rfc1002 length field and 1 for Buffer */ - req->InputBufferOffset = - cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + + /* + * We do not use the input buffer (do not send extra byte) + */ + req->InputBufferOffset = 0; + inc_rfc1001_len(req, -1); + req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; -- cgit v1.2.3-70-g09d2 From db3b5474f462e77b82ca1e27627f03c47b622c99 Mon Sep 17 00:00:00 2001 From: Aurélien Aptel Date: Wed, 11 Oct 2017 13:23:36 +0200 Subject: CIFS: Fix NULL pointer deref on SMB2_tcon() failure If SendReceive2() fails rsp is set to NULL but is dereferenced in the error handling code. Cc: stable@vger.kernel.org Signed-off-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fa17caa56128..3efcd96b52c5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1255,7 +1255,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct smb2_tree_connect_req *req; struct smb2_tree_connect_rsp *rsp = NULL; struct kvec iov[2]; - struct kvec rsp_iov; + struct kvec rsp_iov = { NULL, 0 }; int rc = 0; int resp_buftype; int unc_path_len; @@ -1372,7 +1372,7 @@ tcon_exit: return rc; tcon_error_exit: - if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { + if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); } goto tcon_exit; -- cgit v1.2.3-70-g09d2 From fe83bebc05228e838ed5cbbc62712ab50dd40e18 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 20 Oct 2017 14:49:37 +0200 Subject: SMB: fix leak of validate negotiate info response buffer Fixes: ff1c038addc4 ("Check SMB3 dialects against downgrade attacks") Signed-off-by: David Disseldorp Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3efcd96b52c5..6b03e2d45d8e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -648,7 +648,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { int rc = 0; struct validate_negotiate_info_req vneg_inbuf; - struct validate_negotiate_info_rsp *pneg_rsp; + struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ @@ -728,7 +728,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* relax check since Mac returns max bufsize allowed on ioctl */ if (rsplen > CIFSMaxBufSize) - return -EIO; + goto err_rsp_free; } /* check validate negotiate info response matches what we got earlier */ @@ -747,10 +747,13 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* validate negotiate successful */ cifs_dbg(FYI, "validate negotiate info successful\n"); + kfree(pneg_rsp); return 0; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); +err_rsp_free: + kfree(pneg_rsp); return -EIO; } -- cgit v1.2.3-70-g09d2 From a2d9daad1d2dfbd307ab158044d1c323d7babbde Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 20 Oct 2017 14:49:38 +0200 Subject: SMB: fix validate negotiate info uninitialised memory use An undersize validate negotiate info server response causes the client to use uninitialised memory for struct validate_negotiate_info_rsp comparisons of Dialect, SecurityMode and/or Capabilities members. Link: https://bugzilla.samba.org/show_bug.cgi?id=13092 Fixes: 7db0a6efdc3e ("SMB3: Work around mount failure when using SMB3 dialect to Macs") Signed-off-by: David Disseldorp Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6b03e2d45d8e..ba58af747b43 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -727,7 +727,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if (rsplen > CIFSMaxBufSize) + if ((rsplen > CIFSMaxBufSize) + || (rsplen < sizeof(struct validate_negotiate_info_rsp))) goto err_rsp_free; } -- cgit v1.2.3-70-g09d2 From b4d91aeb6e120b7e2f207021c31b914895c69bc4 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Tue, 24 Oct 2017 08:41:01 -0400 Subject: RDMA/netlink: OOPs in rdma_nl_rcv_msg() from misinterpreted flag rdma_nl_rcv_msg() checks to see if it should use the .dump() callback or the .doit() callback. The check is done with this check: if (flags & NLM_F_DUMP) ... The NLM_F_DUMP flag is two bits (NLM_F_ROOT | NLM_F_MATCH). When an RDMA_NL_LS message (response) is received, the bit used for indicating an error is the same bit as NLM_F_ROOT. NLM_F_ROOT == (0x100) == RDMA_NL_LS_F_ERR. ibacm sends a response with the RDMA_NL_LS_F_ERR bit set if an error occurs in the service. The current code then misinterprets the NLM_F_DUMP bit and trys to call the .dump() callback. If the .dump() callback for the specified request is not available (which is true for the RDMA_NL_LS messages) the following Oops occurs: [ 4555.960256] BUG: unable to handle kernel NULL pointer dereference at (null) [ 4555.969046] IP: (null) [ 4555.972664] PGD 10543f1067 P4D 10543f1067 PUD 1033f93067 PMD 0 [ 4555.979287] Oops: 0010 [#1] SMP [ 4555.982809] Modules linked in: rpcrdma ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm dm_mirror dm_region_hash dm_log dm_mod dax sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd hfi1 rdmavt iTCO_wdt iTCO_vendor_support ib_core mei_me lpc_ich pcspkr mei ioatdma sg shpchp i2c_i801 mfd_core wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2 sd_mod mgag200 drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm igb ahci crc32c_intel ptp libahci pps_core drm dca libata i2c_algo_bit i2c_core [ 4556.061190] CPU: 54 PID: 9841 Comm: ibacm Tainted: G I 4.14.0-rc2+ #6 [ 4556.069667] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015 [ 4556.081339] task: ffff880855f42d00 task.stack: ffffc900246b4000 [ 4556.087967] RIP: 0010: (null) [ 4556.092166] RSP: 0018:ffffc900246b7bc8 EFLAGS: 00010246 [ 4556.098018] RAX: ffffffff81dbe9e0 RBX: ffff881058bb1000 RCX: 0000000000000000 [ 4556.105997] RDX: 0000000000001100 RSI: ffff881058bb1320 RDI: ffff881056362000 [ 4556.113984] RBP: ffffc900246b7bf8 R08: 0000000000000ec0 R09: 0000000000001100 [ 4556.121971] R10: ffff8810573a5000 R11: 0000000000000000 R12: ffff881056362000 [ 4556.129957] R13: 0000000000000ec0 R14: ffff881058bb1320 R15: 0000000000000ec0 [ 4556.137945] FS: 00007fe0ba5a38c0(0000) GS:ffff88105f080000(0000) knlGS:0000000000000000 [ 4556.147000] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4556.153433] CR2: 0000000000000000 CR3: 0000001056f5d003 CR4: 00000000001606e0 [ 4556.161419] Call Trace: [ 4556.164167] ? netlink_dump+0x12c/0x290 [ 4556.168468] __netlink_dump_start+0x186/0x1f0 [ 4556.173357] rdma_nl_rcv_msg+0x193/0x1b0 [ib_core] [ 4556.178724] rdma_nl_rcv+0xdc/0x130 [ib_core] [ 4556.183604] netlink_unicast+0x181/0x240 [ 4556.187998] netlink_sendmsg+0x2c2/0x3b0 [ 4556.192392] sock_sendmsg+0x38/0x50 [ 4556.196299] SYSC_sendto+0x102/0x190 [ 4556.200308] ? __audit_syscall_entry+0xaf/0x100 [ 4556.205387] ? syscall_trace_enter+0x1d0/0x2b0 [ 4556.210366] ? __audit_syscall_exit+0x209/0x290 [ 4556.215442] SyS_sendto+0xe/0x10 [ 4556.219060] do_syscall_64+0x67/0x1b0 [ 4556.223165] entry_SYSCALL64_slow_path+0x25/0x25 [ 4556.228328] RIP: 0033:0x7fe0b9db2a63 [ 4556.232333] RSP: 002b:00007ffc55edc260 EFLAGS: 00000293 ORIG_RAX: 000000000000002c [ 4556.240808] RAX: ffffffffffffffda RBX: 0000000000000010 RCX: 00007fe0b9db2a63 [ 4556.248796] RDX: 0000000000000010 RSI: 00007ffc55edc280 RDI: 000000000000000d [ 4556.256782] RBP: 00007ffc55edc670 R08: 00007ffc55edc270 R09: 000000000000000c [ 4556.265321] R10: 0000000000000000 R11: 0000000000000293 R12: 00007ffc55edc280 [ 4556.273846] R13: 000000000260b400 R14: 000000000000000d R15: 0000000000000001 [ 4556.282368] Code: Bad RIP value. [ 4556.286629] RIP: (null) RSP: ffffc900246b7bc8 [ 4556.293013] CR2: 0000000000000000 [ 4556.297292] ---[ end trace 8d67abcfd10ec209 ]--- [ 4556.305465] Kernel panic - not syncing: Fatal exception [ 4556.313786] Kernel Offset: disabled [ 4556.321563] ---[ end Kernel panic - not syncing: Fatal exception [ 4556.328960] ------------[ cut here ]------------ Special case RDMA_NL_LS response messages to call the appropriate callback. Additionally, make sure that the .dump() callback is not NULL before calling it. Fixes: 647c75ac59a48a54 ("RDMA/netlink: Convert LS to doit callback") Reviewed-by: Mike Marciniszyn Reviewed-by: Kaike Wan Reviewed-by: Alex Estrin Signed-off-by: Michael J. Ruhl Reviewed-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/core/netlink.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index b12e58787c3d..1fb72c356e36 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -175,13 +175,24 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; + /* + * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't + * mistakenly call the .dump() function. + */ + if (index == RDMA_NL_LS) { + if (cb_table[op].doit) + return cb_table[op].doit(skb, nlh, extack); + return -EINVAL; + } /* FIXME: Convert IWCM to properly handle doit callbacks */ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM || index == RDMA_NL_IWCM) { struct netlink_dump_control c = { .dump = cb_table[op].dump, }; - return netlink_dump_start(nls, skb, nlh, &c); + if (c.dump) + return netlink_dump_start(nls, skb, nlh, &c); + return -EINVAL; } if (cb_table[op].doit) -- cgit v1.2.3-70-g09d2 From 0f5da659d8f1810f44de14acf2c80cd6499623a0 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 25 Oct 2017 10:16:42 -0700 Subject: net/unix: don't show information about sockets from other namespaces socket_diag shows information only about sockets from a namespace where a diag socket lives. But if we request information about one unix socket, the kernel don't check that its netns is matched with a diag socket namespace, so any user can get information about any unix socket in a system. This looks like a bug. v2: add a Fixes tag Fixes: 51d7cccf0723 ("net: make sock diag per-namespace") Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/unix/diag.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/unix/diag.c b/net/unix/diag.c index 4d9679701a6d..384c84e83462 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, err = -ENOENT; if (sk == NULL) goto out_nosk; + if (!net_eq(sock_net(sk), net)) + goto out; err = sock_diag_check_cookie(sk, req->udiag_cookie); if (err) -- cgit v1.2.3-70-g09d2 From d309ae5c6a00648198d1932e6db483d612c2e260 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 25 Oct 2017 11:47:05 -0700 Subject: nfp: refuse offloading filters that redirects to upper devices Previously we did not ensure that a netdev is a representative netdev before dereferencing its private data. This can occur when an upper netdev is created on a representative netdev. This patch corrects this by first ensuring that the netdev is a representative netdev before using it. Checking only switchdev_port_same_parent_id is not sufficient to ensure that we can safely use the netdev. Failing to check that the netdev is also a representative netdev would result in incorrect dereferencing. Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads") Signed-off-by: Jakub Kicinski Signed-off-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index db9750695dc7..8ea9320014ee 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -110,6 +110,8 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, */ if (!switchdev_port_same_parent_id(in_dev, out_dev)) return -EOPNOTSUPP; + if (!nfp_netdev_is_nfp_repr(out_dev)) + return -EOPNOTSUPP; output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); if (!output->port) -- cgit v1.2.3-70-g09d2 From 5c25f65fd1e42685f7ccd80e0621829c105785d9 Mon Sep 17 00:00:00 2001 From: Julien Gomes Date: Wed, 25 Oct 2017 11:50:50 -0700 Subject: tun: allow positive return values on dev_get_valid_name() call If the name argument of dev_get_valid_name() contains "%d", it will try to assign it a unit number in __dev__alloc_name() and return either the unit number (>= 0) or an error code (< 0). Considering positive values as error values prevent tun device creations relying this mechanism, therefor we should only consider negative values as errors here. Signed-off-by: Julien Gomes Acked-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e21bf90b819f..b9973fb868b7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2028,7 +2028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (!dev) return -ENOMEM; err = dev_get_valid_name(net, dev, name); - if (err) + if (err < 0) goto err_free_dev; dev_net_set(dev, net); -- cgit v1.2.3-70-g09d2 From 6377ed0bbae6fa28853e1679d068a9106c8a8908 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 19 Oct 2017 14:14:29 +0300 Subject: net/mlx5: Fix health work queue spin lock to IRQ safe spin_lock/unlock of health->wq_lock should be IRQ safe. It was changed to spin_lock_irqsave since adding commit 0179720d6be2 ("net/mlx5: Introduce trigger_health_work function") which uses spin_lock from asynchronous event (IRQ) context. Thus, all spin_lock/unlock of health->wq_lock should have been moved to IRQ safe mode. However, one occurrence on new code using this lock missed that change, resulting in possible deadlock: kernel: Possible unsafe locking scenario: kernel: CPU0 kernel: ---- kernel: lock(&(&health->wq_lock)->rlock); kernel: kernel: lock(&(&health->wq_lock)->rlock); kernel: #012 *** DEADLOCK *** Fixes: 2a0165a034ac ("net/mlx5: Cancel delayed recovery work when unloading the driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8aea0a065e56..db86e1506c8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -356,10 +356,11 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; - spin_lock(&health->wq_lock); + spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock(&health->wq_lock); + spin_unlock_irqrestore(&health->wq_lock, flags); cancel_delayed_work_sync(&dev->priv.health.recover_work); } -- cgit v1.2.3-70-g09d2 From 4ca637a20a524cd8ddbca696f12bfa92111c96e3 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 4 Oct 2017 17:58:21 -0500 Subject: net/mlx5: Delay events till mlx5 interface's add complete for pci resume mlx5_ib_add is called during mlx5_pci_resume after a pci error. Before mlx5_ib_add completes, there are multiple events which trigger function mlx5_ib_event. This cause kernel panic because mlx5_ib_event accesses unitialized resources. The fix is to extend Erez Shitrit's patch <97834eba7c19> ("net/mlx5: Delay events till ib registration ends") to cover the pci resume code path. Trace: mlx5_core 0001:01:00.6: mlx5_pci_resume was called mlx5_core 0001:01:00.6: firmware version: 16.20.1011 mlx5_core 0001:01:00.6: mlx5_attach_interface:164:(pid 779): mlx5_ib_event:2996:(pid 34777): warning: event on port 1 mlx5_ib_event:2996:(pid 34782): warning: event on port 1 Unable to handle kernel paging request for data at address 0x0001c104 Faulting instruction address: 0xd000000008f411fc Oops: Kernel access of bad area, sig: 11 [#1] ... ... Call Trace: [c000000fff77bb70] [d000000008f4119c] mlx5_ib_event+0x64/0x470 [mlx5_ib] (unreliable) [c000000fff77bc60] [d000000008e67130] mlx5_core_event+0xb8/0x210 [mlx5_core] [c000000fff77bd10] [d000000008e4bd00] mlx5_eq_int+0x528/0x860[mlx5_core] Fixes: 97834eba7c19 ("net/mlx5: Delay events till ib registration ends") Signed-off-by: Huy Nguyen Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 70 ++++++++++++++++----------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ff60cf7342ca..fc281712869b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -77,35 +77,41 @@ static void add_delayed_event(struct mlx5_priv *priv, list_add_tail(&delayed_event->list, &priv->waiting_events_list); } -static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx, - struct mlx5_core_dev *dev, - struct mlx5_priv *priv) +static void delayed_event_release(struct mlx5_device_context *dev_ctx, + struct mlx5_priv *priv) { + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); struct mlx5_delayed_event *de; struct mlx5_delayed_event *n; + struct list_head temp; - /* stop delaying events */ - priv->is_accum_events = false; + INIT_LIST_HEAD(&temp); + + spin_lock_irq(&priv->ctx_lock); - /* fire all accumulated events before new event comes */ - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + priv->is_accum_events = false; + list_splice_init(&priv->waiting_events_list, &temp); + if (!dev_ctx->context) + goto out; + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + +out: + spin_unlock_irq(&priv->ctx_lock); + + list_for_each_entry_safe(de, n, &temp, list) { list_del(&de->list); kfree(de); } } -static void cleanup_delayed_evets(struct mlx5_priv *priv) +/* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ +static void delayed_event_start(struct mlx5_priv *priv) { - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = false; - list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { - list_del(&de->list); - kfree(de); - } + priv->is_accum_events = true; spin_unlock_irq(&priv->ctx_lock); } @@ -122,11 +128,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) return; dev_ctx->intf = intf; - /* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ - priv->is_accum_events = true; + delayed_event_start(priv); dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -137,8 +140,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - fire_delayed_event_locked(dev_ctx, dev, priv); - #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (dev_ctx->intf->pfault) { if (priv->pfault) { @@ -150,11 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) } #endif spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - /* delete all accumulated events */ - cleanup_delayed_evets(priv); } + + delayed_event_release(dev_ctx, priv); + + if (!dev_ctx->context) + kfree(dev_ctx); } static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, @@ -205,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; + delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - return; + goto out; intf->attach(dev, dev_ctx->context); set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - return; + goto out; dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } + +out: + delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -414,8 +420,14 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, if (priv->is_accum_events) add_delayed_event(priv, dev, event, param); + /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is + * still in priv->ctx_list. In this case, only notify the dev_ctx if its + * ADDED or ATTACHED bit are set. + */ list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) + if (dev_ctx->intf->event && + (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || + test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) dev_ctx->intf->event(dev, dev_ctx->context, event, param); spin_unlock_irqrestore(&priv->ctx_lock, flags); -- cgit v1.2.3-70-g09d2 From 3c37745ec614ff048d5dce38f976804b05d307ee Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 17 Oct 2017 12:33:43 +0200 Subject: net/mlx5e: Properly deal with encap flows add/del under neigh update Currently, the encap action offload is handled in the actions parse function and not in mlx5e_tc_add_fdb_flow() where we deal with all the other aspects of offloading actions (vlan, modify header) and the rule itself. When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the encap entry and offloads the related flows, we wrongly call again into mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle again the offloading of vlans and header re-write which puts things in non consistent state and step on freed memory (e.g the modify header parse buffer which is already freed). Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the encap entry, it causes a corruption at the neigh update code which goes over the list of flows associated with this encap entry, or double free when the tc flow is later deleted by user-space. When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related to an encap entry which is now invalid, we do a partial repeat of the eswitch flow removal code which is wrong too. To fix things up we do the following: (1) handle the encap action offload in the eswitch flow add function mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself. (2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only deal with the encap entry and rules delete/add and not with any of the other offloaded actions. Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') Signed-off-by: Or Gerlitz Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +++++++++++++++---------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 1aa2028ed995..9ba1f72060aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -78,9 +78,11 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; struct mlx5_flow_spec spec; int num_mod_hdr_actions; void *mod_hdr_actions; + int mirred_ifindex; }; enum { @@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +static int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct ip_tunnel_info *tun_info, + struct net_device *mirred_dev, + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct mlx5_flow_handle *rule; + struct net_device *out_dev, *encap_dev = NULL; + struct mlx5_flow_handle *rule = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; int err; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + out_dev = __dev_get_by_index(dev_net(priv->netdev), + attr->parse_attr->mirred_ifindex); + err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow); + if (err) { + rule = ERR_PTR(err); + if (err != -EAGAIN) + goto err_attach_encap; + } + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + } + err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) { rule = ERR_PTR(err); @@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } } - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - + /* we get here if (1) there's no error (rule being null) or when + * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + */ + if (rule != ERR_PTR(-EAGAIN)) { + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + } return rule; err_add_rule: @@ -361,6 +391,7 @@ err_mod_hdr: err_add_vlan: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); +err_attach_encap: return rule; } @@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; struct mlx5e_tc_flow *flow; int err; @@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, mlx5e_rep_queue_neigh_stats_work(priv); list_for_each_entry(flow, &e->flows, encap) { - flow->esw_attr->encap_id = e->encap_id; - flow->rule = mlx5e_tc_add_fdb_flow(priv, - flow->esw_attr->parse_attr, - flow); + esw_attr = flow->esw_attr; + esw_attr->encap_id = e->encap_id; + flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", @@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); } } @@ -1942,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev, *encap_dev = NULL; + struct net_device *out_dev; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1955,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, rpriv = out_priv->ppriv; attr->out_rep = rpriv->rep; } else if (encap) { - err = mlx5e_attach_encap(priv, info, - out_dev, &encap_dev, flow); - if (err && err != -EAGAIN) - return err; + parse_attr->mirred_ifindex = ifindex; + parse_attr->tun_info = *info; + attr->parse_attr = parse_attr; attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - attr->out_rep = rpriv->rep; - attr->parse_attr = parse_attr; + /* attr->out_rep is resolved when we handle encap */ } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -2047,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_handle_encap_flow; + goto err_free; flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); @@ -2058,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + if (err != -EAGAIN) + goto err_free; } - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + if (err != -EAGAIN) + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) @@ -2075,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, err_del_rule: mlx5e_tc_del_flow(priv, flow); -err_handle_encap_flow: - if (err == -EAGAIN) { - err = rhashtable_insert_fast(&tc->ht, &flow->node, - tc->ht_params); - if (err) - mlx5e_tc_del_flow(priv, flow); - else - return 0; - } - err_free: kvfree(parse_attr); kfree(flow); -- cgit v1.2.3-70-g09d2 From be0f161ef141e4df368aa3f417a1c2ab9c362e75 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 28 Sep 2017 15:33:50 -0500 Subject: net/mlx5e: DCBNL, Implement tc with ets type and zero bandwidth Previously, tc with ets type and zero bandwidth is not accepted by driver. This behavior does not follow the IEEE802.1qaz spec. If there are tcs with ets type and zero bandwidth, these tcs are assigned to the lowest priority tc_group #0. We equally distribute 100% bw of the tc_group #0 to these zero bandwidth ets tcs. Also, the non zero bandwidth ets tcs are assigned to tc_group #1. If there is no zero bandwidth ets tc, the non zero bandwidth ets tcs are assigned to tc_group #0. Fixes: cdcf11212b22 ("net/mlx5e: Validate BW weight values of ETS") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 113 +++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/port.c | 21 ++++ include/linux/mlx5/port.h | 2 + 3 files changed, 106 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index c1d384fca4dc..51c4cc00a186 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -41,6 +41,11 @@ #define MLX5E_CEE_STATE_UP 1 #define MLX5E_CEE_STATE_DOWN 0 +enum { + MLX5E_VENDOR_TC_GROUP_NUM = 7, + MLX5E_LOWEST_PRIO_GROUP = 0, +}; + /* If dcbx mode is non-host set the dcbx mode to host. */ static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, @@ -85,6 +90,9 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + u8 tc_group[IEEE_8021QAZ_MAX_TCS]; + bool is_tc_group_6_exist = false; + bool is_zero_bw_ets_tc = false; int err = 0; int i; @@ -96,37 +104,64 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]); if (err) return err; - } - for (i = 0; i < ets->ets_cap; i++) { + err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]); + if (err) + return err; + err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]); if (err) return err; + + if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC && + tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1)) + is_zero_bw_ets_tc = true; + + if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1)) + is_tc_group_6_exist = true; + } + + /* Report 0% ets tc if exits*/ + if (is_zero_bw_ets_tc) { + for (i = 0; i < ets->ets_cap; i++) + if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP) + ets->tc_tx_bw[i] = 0; + } + + /* Update tc_tsa based on fw setting*/ + for (i = 0; i < ets->ets_cap; i++) { if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC) priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM && + !is_tc_group_6_exist) + priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR; } - memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa)); return err; } -enum { - MLX5E_VENDOR_TC_GROUP_NUM = 7, - MLX5E_ETS_TC_GROUP_NUM = 0, -}; - static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) { bool any_tc_mapped_to_ets = false; + bool ets_zero_bw = false; int strict_group; int i; - for (i = 0; i <= max_tc; i++) - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { any_tc_mapped_to_ets = true; + if (!ets->tc_tx_bw[i]) + ets_zero_bw = true; + } + } - strict_group = any_tc_mapped_to_ets ? 1 : 0; + /* strict group has higher priority than ets group */ + strict_group = MLX5E_LOWEST_PRIO_GROUP; + if (any_tc_mapped_to_ets) + strict_group++; + if (ets_zero_bw) + strict_group++; for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { @@ -137,7 +172,9 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) tc_group[i] = strict_group++; break; case IEEE_8021QAZ_TSA_ETS: - tc_group[i] = MLX5E_ETS_TC_GROUP_NUM; + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP; + if (ets->tc_tx_bw[i] && ets_zero_bw) + tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1; break; } } @@ -146,8 +183,22 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc) static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, u8 *tc_group, int max_tc) { + int bw_for_ets_zero_bw_tc = 0; + int last_ets_zero_bw_tc = -1; + int num_ets_zero_bw = 0; int i; + for (i = 0; i <= max_tc; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS && + !ets->tc_tx_bw[i]) { + num_ets_zero_bw++; + last_ets_zero_bw_tc = i; + } + } + + if (num_ets_zero_bw) + bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw; + for (i = 0; i <= max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_VENDOR: @@ -157,12 +208,26 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i]; + tc_tx_bw[i] = ets->tc_tx_bw[i] ? + ets->tc_tx_bw[i] : + bw_for_ets_zero_bw_tc; break; } } + + /* Make sure the total bw for ets zero bw group is 100% */ + if (last_ets_zero_bw_tc != -1) + tc_tx_bw[last_ets_zero_bw_tc] += + MLX5E_MAX_BW_ALLOC % num_ets_zero_bw; } +/* If there are ETS BW 0, + * Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%. + * Set group #0 to all the ETS BW 0 tcs and + * equally splits the 100% BW between them + * Report both group #0 and #1 as ETS type. + * All the tcs in group #0 will be reported with 0% BW. + */ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) { struct mlx5_core_dev *mdev = priv->mdev; @@ -188,7 +253,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return err; memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); - return err; } @@ -209,17 +273,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } /* Validate Bandwidth Sum */ - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { - if (!ets->tc_tx_bw[i]) { - netdev_err(netdev, - "Failed to validate ETS: BW 0 is illegal\n"); - return -EINVAL; - } - + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) bw_sum += ets->tc_tx_bw[i]; - } - } if (bw_sum != 0 && bw_sum != 100) { netdev_err(netdev, @@ -533,8 +589,7 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, int pgid, u8 *bw_pct) { - struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; + struct ieee_ets ets; if (pgid >= CEE_DCBX_MAX_PGS) { netdev_err(netdev, @@ -542,8 +597,8 @@ static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev, return; } - if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct)) - *bw_pct = 0; + mlx5e_dcbnl_ieee_getets(netdev, &ets); + *bw_pct = ets.tc_tx_bw[pgid]; } static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev, @@ -739,8 +794,6 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) ets.prio_tc[i] = i; } - memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa)); - /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ ets.prio_tc[0] = 1; ets.prio_tc[1] = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1975d4388d4f..e07061f565d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -677,6 +677,27 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) } EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group) +{ + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; + void *ets_tcn_conf; + int err; + + err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out)); + if (err) + return err; + + ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out, + tc_configuration[tc]); + + *tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf, + group); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); + int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index c57d4b7de3a8..c59af8ab753a 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -157,6 +157,8 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, u8 prio, u8 *tc); int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 tc, u8 *bw_pct); -- cgit v1.2.3-70-g09d2 From ef4816f0ee576d4a27ed35cd1090904121391cb9 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:26 +0200 Subject: net: mvpp2: fix typo in the tcam setup This patch fixes a typo in the mvpp2_prs_tcam_data_cmp() function, as the shift value is inverted with the data. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 28c6e8a5e118..0b2017170d5b 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -1539,7 +1539,7 @@ static bool mvpp2_prs_tcam_data_cmp(struct mvpp2_prs_entry *pe, int offs, int off = MVPP2_PRS_TCAM_DATA_BYTE(offs); u16 tcam_data; - tcam_data = (8 << pe->tcam.byte[off + 1]) | pe->tcam.byte[off]; + tcam_data = (pe->tcam.byte[off + 1] << 8) | pe->tcam.byte[off]; if (tcam_data != data) return false; return true; -- cgit v1.2.3-70-g09d2 From 20746d717ea390ac6ac3aa531f27ac156bf2e747 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:27 +0200 Subject: net: mvpp2: fix invalid parameters order when calling the tcam init When calling mvpp2_prs_mac_multi_set() from mvpp2_prs_mac_init(), two parameters (the port index and the table index) are inverted. Fixes this. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 0b2017170d5b..c8ce652a4dd7 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -2614,8 +2614,8 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) /* place holders only - no ports */ mvpp2_prs_mac_drop_all_set(priv, 0, false); mvpp2_prs_mac_promisc_set(priv, 0, false); - mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_ALL, 0, false); - mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_IP6, 0, false); + mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false); + mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false); } /* Set default entries for various types of dsa packets */ -- cgit v1.2.3-70-g09d2 From 239dd4ee4838523419ad16e05b16a2003b71d317 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 24 Oct 2017 11:41:28 +0200 Subject: net: mvpp2: do not sleep in set_rx_mode This patch replaces GFP_KERNEL by GFP_ATOMIC to avoid sleeping in the ndo_set_rx_mode() call which is called with BH disabled. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index c8ce652a4dd7..a37af5813f33 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -3396,7 +3396,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, struct mvpp2_prs_entry *pe; int tid; - pe = kzalloc(sizeof(*pe), GFP_KERNEL); + pe = kzalloc(sizeof(*pe), GFP_ATOMIC); if (!pe) return NULL; mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC); @@ -3458,7 +3458,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port, if (tid < 0) return tid; - pe = kzalloc(sizeof(*pe), GFP_KERNEL); + pe = kzalloc(sizeof(*pe), GFP_ATOMIC); if (!pe) return -ENOMEM; mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC); -- cgit v1.2.3-70-g09d2 From e9a0b99804ff662d02b78a556a84e22308066fe1 Mon Sep 17 00:00:00 2001 From: Håkon Bugge Date: Tue, 24 Oct 2017 18:17:18 +0200 Subject: rds: ib: Fix uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit send_flags needs to be initialized before calling rds_ib_set_wr_signal_state(). Signed-off-by: Håkon Bugge Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_send.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6ab39dbcca01..8f46755477ae 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -792,6 +792,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; send->s_atomic_wr.swap_mask = 0; } + send->s_wr.send_flags = 0; nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_atomic_wr.wr.num_sge = 1; send->s_atomic_wr.wr.next = NULL; -- cgit v1.2.3-70-g09d2 From a0c0865fa0abcbc142c11fabec3a2bffc1a4229d Mon Sep 17 00:00:00 2001 From: Håkon Bugge Date: Tue, 24 Oct 2017 16:16:28 +0200 Subject: rds: Fix inaccurate accounting of unsignaled wrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of unsignaled work-requests posted to the IB send queue is tracked by a counter in the rds_ib_connection struct. When it reaches zero, or the caller explicitly asks for it, the send-signaled bit is set in send_flags and the counter is reset. This is performed by the rds_ib_set_wr_signal_state() function. However, this function is not always used which yields inaccurate accounting. This commit fixes this, re-factors a code bloat related to the matter, and makes the actual parameter type to the function consistent. Signed-off-by: Håkon Bugge Signed-off-by: David S. Miller --- net/rds/ib_send.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 8f46755477ae..8557a1cae041 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } } - rds_ib_set_wr_signal_state(ic, send, 0); + rds_ib_set_wr_signal_state(ic, send, false); /* * Always signal the last one if we're stopping due to flow control. */ - if (ic->i_flowctl && flow_controlled && i == (work_alloc-1)) - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) { + rds_ib_set_wr_signal_state(ic, send, true); + send->s_wr.send_flags |= IB_SEND_SOLICITED; + } if (send->s_wr.send_flags & IB_SEND_SIGNALED) nr_sig++; @@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_op = ic->i_data_op; prev->s_wr.send_flags |= IB_SEND_SOLICITED; - if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) { - ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; - prev->s_wr.send_flags |= IB_SEND_SIGNALED; - nr_sig++; - } + if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) + nr_sig += rds_ib_set_wr_signal_state(ic, prev, true); ic->i_data_op = NULL; } -- cgit v1.2.3-70-g09d2 From 06f877d613be3621604c2520ec0351d9fbdca15f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Oct 2017 08:20:31 -0700 Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my first attempt to fix the lockdep splat, I forgot we could enter inet_csk_route_req() with a freshly allocated request socket, for which refcount has not yet been elevated, due to complex SLAB_TYPESAFE_BY_RCU rules. We either are in rcu_read_lock() section _or_ we own a refcount on the request. Correct RCU verb to use here is rcu_dereference_check(), although it is not possible to prove we actually own a reference on a shared refcount :/ In v2, I added ireq_opt_deref() helper and use in three places, to fix other possible splats. [ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 [ 49.846487] inet_csk_route_req+0x53/0x14d [ 49.848334] tcp_v4_route_req+0xe/0x10 [ 49.850174] tcp_conn_request+0x31c/0x6a0 [ 49.851992] ? __lock_acquire+0x614/0x822 [ 49.854015] tcp_v4_conn_request+0x5a/0x79 [ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 [ 49.858052] tcp_rcv_state_process+0x98/0xdcc [ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 [ 49.862085] tcp_v4_do_rcv+0xfc/0x145 [ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 [ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 [ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 [ 49.870064] ip_local_deliver+0x1b2/0x1c5 [ 49.871775] ? inet_del_offload+0x45/0x45 [ 49.873916] ip_rcv_finish+0x3f7/0x471 [ 49.875476] ip_rcv+0x3f1/0x42f [ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 [ 49.878791] __netif_receive_skb_core+0x6d3/0x950 [ 49.880701] ? process_backlog+0x7e/0x216 [ 49.882589] __netif_receive_skb+0x1d/0x5e [ 49.884122] process_backlog+0x10c/0x216 [ 49.885812] net_rx_action+0x147/0x3df Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") Signed-off-by: Eric Dumazet Reported-by: kernel test robot Reported-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_sock.h | 6 ++++++ net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 425752f768d2..db8162dd8c0b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -132,6 +132,12 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } +static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) +{ + return rcu_dereference_check(ireq->ireq_opt, + refcount_read(&ireq->req.rsk_refcnt) > 0); +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0490916864f9..e65fcb45c3f6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -495,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req ireq->ir_rmt_addr); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - rcu_dereference(ireq->ireq_opt)); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 18cd2eae758f..b47a59cb3573 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -543,8 +543,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = rcu_dereference_protected(ireq->ireq_opt, - refcount_read(&req->rsk_refcnt) > 0); + opt = ireq_opt_deref(ireq); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c43365c374c..5b027c69cbc5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,7 +877,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - rcu_dereference(ireq->ireq_opt)); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } -- cgit v1.2.3-70-g09d2 From 5889e2c0e441d84060e66211ed5c4517ca591167 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Tue, 24 Oct 2017 16:44:42 -0700 Subject: tcp: call tcp_rate_skb_sent() when retransmit with unaligned skb->data Current implementation calls tcp_rate_skb_sent() when tcp_transmit_skb() is called when it clones skb only. Not calling tcp_rate_skb_sent() is OK for all such code paths except from __tcp_retransmit_skb() which happens when skb->data address is not aligned. This may rarely happen e.g. when small amount of data is sent initially and the receiver partially acks odd number of bytes for some reason, possibly malicious. Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 973befc36fd4..1151870018e3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2843,8 +2843,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; - if (!err) + if (!err) { skb->skb_mstamp = tp->tcp_mstamp; + tcp_rate_skb_sent(sk, skb); + } } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } -- cgit v1.2.3-70-g09d2 From 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Wed, 25 Oct 2017 00:23:04 -0700 Subject: tap: double-free in error path in tap_open() Double free of skb_array in tap module is causing kernel panic. When tap_set_queue() fails we free skb_array right away by calling skb_array_cleanup(). However, later on skb_array_cleanup() is called again by tap_sock_destruct through sock_put(). This patch fixes that issue. Fixes: 362899b8725b35e3 (macvtap: switch to use skb array) Signed-off-by: Girish Moodalbail Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tap.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 21b71ae947fd..98ee6cc2875d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -517,6 +517,10 @@ static int tap_open(struct inode *inode, struct file *file) &tap_proto, 0); if (!q) goto err; + if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { + sk_free(&q->sk); + goto err; + } RCU_INIT_POINTER(q->sock.wq, &q->wq); init_waitqueue_head(&q->wq.wait); @@ -540,22 +544,18 @@ static int tap_open(struct inode *inode, struct file *file) if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); - err = -ENOMEM; - if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) - goto err_array; - err = tap_set_queue(tap, file, q); - if (err) - goto err_queue; + if (err) { + /* tap_sock_destruct() will take care of freeing skb_array */ + goto err_put; + } dev_put(tap->dev); rtnl_unlock(); return err; -err_queue: - skb_array_cleanup(&q->skb_array); -err_array: +err_put: sock_put(&q->sk); err: if (tap) -- cgit v1.2.3-70-g09d2 From 5266b8e4445cc836c46689d80a9ff539fa3bfbda Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 26 Oct 2017 11:50:56 +0200 Subject: xen: fix booting ballooned down hvm guest Commit 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 ("xen/balloon: don't online new memory initially") introduced a regression when booting a HVM domain with memory less than mem-max: instead of ballooning down immediately the system would try to use the memory up to mem-max resulting in Xen crashing the domain. For HVM domains the current size will be reflected in Xenstore node memory/static-max instead of memory/target. Additionally we have to trigger the ballooning process at once. Cc: # 4.13 Fixes: 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 ("xen/balloon: don't online new memory initially") Reported-by: Simon Gaiser Suggested-by: Boris Ostrovsky Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-balloon.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index e89136ab851e..b437fccd4e62 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -57,7 +57,7 @@ static int register_balloon(struct device *dev); static void watch_target(struct xenbus_watch *watch, const char *path, const char *token) { - unsigned long long new_target; + unsigned long long new_target, static_max; int err; static bool watch_fired; static long target_diff; @@ -72,13 +72,20 @@ static void watch_target(struct xenbus_watch *watch, * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ new_target >>= PAGE_SHIFT - 10; - if (watch_fired) { - balloon_set_new_target(new_target - target_diff); - return; + + if (!watch_fired) { + watch_fired = true; + err = xenbus_scanf(XBT_NIL, "memory", "static-max", "%llu", + &static_max); + if (err != 1) + static_max = new_target; + else + static_max >>= PAGE_SHIFT - 10; + target_diff = xen_pv_domain() ? 0 + : static_max - balloon_stats.target_pages; } - watch_fired = true; - target_diff = new_target - balloon_stats.target_pages; + balloon_set_new_target(new_target - target_diff); } static struct xenbus_watch target_watch = { .node = "memory/target", -- cgit v1.2.3-70-g09d2 From 44c445c3d1b4eacff23141fa7977c3b2ec3a45c9 Mon Sep 17 00:00:00 2001 From: Vincenzo Maffione Date: Sat, 16 Sep 2017 18:00:00 +0200 Subject: e1000: fix race condition between e1000_down() and e1000_watchdog This patch fixes a race condition that can result into the interface being up and carrier on, but with transmits disabled in the hardware. The bug may show up by repeatedly IFF_DOWN+IFF_UP the interface, which allows e1000_watchdog() interleave with e1000_down(). CPU x CPU y -------------------------------------------------------------------- e1000_down(): netif_carrier_off() e1000_watchdog(): if (carrier == off) { netif_carrier_on(); enable_hw_transmit(); } disable_hw_transmit(); e1000_watchdog(): /* carrier on, do nothing */ Signed-off-by: Vincenzo Maffione Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 98375e1e1185..1982f7917a8d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -520,8 +520,6 @@ void e1000_down(struct e1000_adapter *adapter) struct net_device *netdev = adapter->netdev; u32 rctl, tctl; - netif_carrier_off(netdev); - /* disable receives in the hardware */ rctl = er32(RCTL); ew32(RCTL, rctl & ~E1000_RCTL_EN); @@ -537,6 +535,15 @@ void e1000_down(struct e1000_adapter *adapter) E1000_WRITE_FLUSH(); msleep(10); + /* Set the carrier off after transmits have been disabled in the + * hardware, to avoid race conditions with e1000_watchdog() (which + * may be running concurrently to us, checking for the carrier + * bit to decide whether it should enable transmits again). Such + * a race condition would result into transmission being disabled + * in the hardware until the next IFF_DOWN+IFF_UP cycle. + */ + netif_carrier_off(netdev); + napi_disable(&adapter->napi); e1000_irq_disable(adapter); -- cgit v1.2.3-70-g09d2 From 5983587c8c5ef00d6886477544ad67d495bc5479 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 18:13:48 +0100 Subject: e1000: avoid null pointer dereference on invalid stat type Currently if the stat type is invalid then data[i] is being set either by dereferencing a null pointer p, or it is reading from an incorrect previous location if we had a valid stat type previously. Fix this by skipping over the read of p on an invalid stat type. Detected by CoverityScan, CID#113385 ("Explicit null dereferenced") Signed-off-by: Colin Ian King Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index ec8aa4562cc9..3b3983a1ffbb 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -1824,11 +1824,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); int i; - char *p = NULL; const struct e1000_stats *stat = e1000_gstrings_stats; e1000_update_stats(adapter); - for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) { + for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++, stat++) { + char *p; + switch (stat->type) { case NETDEV_STATS: p = (char *)netdev + stat->stat_offset; @@ -1839,15 +1840,13 @@ static void e1000_get_ethtool_stats(struct net_device *netdev, default: WARN_ONCE(1, "Invalid E1000 stat type: %u index %d\n", stat->type, i); - break; + continue; } if (stat->sizeof_stat == sizeof(u64)) data[i] = *(u64 *)p; else data[i] = *(u32 *)p; - - stat++; } /* BUG_ON(i != E1000_STATS_LEN); */ } -- cgit v1.2.3-70-g09d2 From 104ba83363d1d42af62abb247f1426c09a80fced Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 19 Oct 2017 20:07:36 +0100 Subject: igb: Fix TX map failure path When the driver cannot map a TX buffer, instead of rolling back gracefully and retrying later, we currently get a panic: [ 159.885994] igb 0000:00:00.0: TX DMA map failed [ 159.886588] Unable to handle kernel paging request at virtual address ffff00000a08c7a8 ... [ 159.897031] PC is at igb_xmit_frame_ring+0x9c8/0xcb8 Fix the erroneous test that leads to this situation. Signed-off-by: Jean-Philippe Brucker Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd4a46b03cc8..ea69af267d63 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5326,7 +5326,7 @@ dma_error: DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); - if (i--) + if (i-- == 0) i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; } -- cgit v1.2.3-70-g09d2 From 069db9cd0bbde92d3aa947ed86a09cbd4ceb5f67 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Oct 2017 17:07:13 -0400 Subject: ixgbe: Fix Tx map failure path This patch is a partial revert of "ixgbe: Don't bother clearing buffer memory for descriptor rings". Specifically I messed up the exception handling path a bit and this resulted in us incorrectly adding the count back in when we didn't need to. In order to make this simpler I am reverting most of the exception handling path change and instead just replacing the bit that was handled by the unmap_and_free call. Fixes: ffed21bcee7a ("ixgbe: Don't bother clearing buffer memory for descriptor rings") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4d76afd13868..6d5f31e94358 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8020,29 +8020,23 @@ static int ixgbe_tx_map(struct ixgbe_ring *tx_ring, return 0; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); - tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - while (tx_buffer != first) { + for (;;) { + tx_buffer = &tx_ring->tx_buffer_info[i]; if (dma_unmap_len(tx_buffer, len)) dma_unmap_page(tx_ring->dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); dma_unmap_len_set(tx_buffer, len, 0); - - if (i--) + if (tx_buffer == first) + break; + if (i == 0) i += tx_ring->count; - tx_buffer = &tx_ring->tx_buffer_info[i]; + i--; } - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - dev_kfree_skb_any(first->skb); first->skb = NULL; -- cgit v1.2.3-70-g09d2 From 10781348cadebbd5291c8fb193e850365c914da8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 20 Oct 2017 13:59:20 -0700 Subject: i40e: Fix incorrect use of tx_itr_setting when checking for Rx ITR setup It looks like there was either a copy/paste error or just a typo that resulted in the Tx ITR setting being used to determine if we were using adaptive Rx interrupt moderation or not. This patch fixes the typo. Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation") Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2756131495f0..ab142e05e196 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2269,7 +2269,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, goto enable_int; } - if (ITR_IS_DYNAMIC(tx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } -- cgit v1.2.3-70-g09d2 From 62b4c6694dfd3821bd5ea5bed48238bbabd5fe8b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 21 Oct 2017 18:12:29 -0700 Subject: i40e: Add programming descriptors to cleaned_count This patch updates the i40e driver to include programming descriptors in the cleaned_count. Without this change it becomes possible for us to leak memory as we don't trigger a large enough allocation when the time comes to allocate new buffers and we end up overwriting a number of rx_buffers equal to the number of programming descriptors we encountered. Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status") Signed-off-by: Alexander Duyck Tested-by: Anders K. Pedersen Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ab142e05e196..120c68f78951 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2102,6 +2102,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (unlikely(i40e_rx_is_programming_status(qword))) { i40e_clean_programming_status(rx_ring, rx_desc, qword); + cleaned_count++; continue; } size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> -- cgit v1.2.3-70-g09d2 From 814eae5982cc55988f642f7f1d10eaf340845c00 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 26 Oct 2017 15:54:15 +0100 Subject: alpha/PCI: Move pci_map_irq()/pci_swizzle() out of initdata The introduction of {map/swizzle}_irq() hooks in the struct pci_host_bridge allowed to replace the pci_fixup_irqs() PCI IRQ allocation in alpha arch PCI code with per-bridge map/swizzle functions with commit 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks"). As a side effect of converting PCI IRQ allocation to the struct pci_host_bridge {map/swizzle}_irq() hooks mechanism, the actual PCI IRQ allocation function (ie pci_assign_irq()) is carried out per-device in pci_device_probe() that is called when a PCI device driver is about to be probed. This means that, for drivers compiled as loadable modules, the actual PCI device IRQ allocation can now happen after the system has booted so the struct pci_host_bridge {map/swizzle}_irq() hooks pci_assign_irq() relies on must stay valid after the system has booted so that PCI core can carry out PCI IRQ allocation correctly. Most of the alpha board structures pci_map_irq() and pci_swizzle() hooks (that are used to initialize their struct pci_host_bridge equivalent through the alpha_mv global variable - that represents the struct alpha_machine_vector of the running kernel) are marked as __init/__initdata; this causes freed memory dereferences when PCI IRQ allocation is carried out after the kernel has booted (ie when loading PCI drivers as loadable module) because when the kernel tries to bind the PCI device to its (module) driver, the function pci_assign_irq() is called, that in turn retrieves the struct pci_host_bridge {map/swizzle}_irq() hooks to carry out PCI IRQ allocation; if those hooks are marked as __init code/__initdata they point at freed/invalid memory. Fix the issue by removing the __init/__initdata markers from all subarch struct alpha_machine_vector.pci_map_irq()/pci_swizzle() functions (and data). Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Link: http://lkml.kernel.org/r/alpine.LRH.2.21.1710251043170.7098@math.ut.ee Reported-by: Meelis Roos Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Meelis Roos Cc: Matt Turner --- arch/alpha/kernel/sys_alcor.c | 4 ++-- arch/alpha/kernel/sys_cabriolet.c | 12 ++++++------ arch/alpha/kernel/sys_dp264.c | 20 ++++++++++---------- arch/alpha/kernel/sys_eb64p.c | 4 ++-- arch/alpha/kernel/sys_eiger.c | 4 ++-- arch/alpha/kernel/sys_miata.c | 6 +++--- arch/alpha/kernel/sys_mikasa.c | 4 ++-- arch/alpha/kernel/sys_nautilus.c | 2 +- arch/alpha/kernel/sys_noritake.c | 6 +++--- arch/alpha/kernel/sys_rawhide.c | 4 ++-- arch/alpha/kernel/sys_ruffian.c | 6 +++--- arch/alpha/kernel/sys_rx164.c | 4 ++-- arch/alpha/kernel/sys_sable.c | 10 +++++----- arch/alpha/kernel/sys_sio.c | 8 ++++---- arch/alpha/kernel/sys_sx164.c | 4 ++-- arch/alpha/kernel/sys_takara.c | 6 +++--- arch/alpha/kernel/sys_wildfire.c | 4 ++-- 17 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 118dc6af1805..7ad074fd5ab5 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -181,10 +181,10 @@ alcor_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int alcor_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ /* note: IDSEL 17 is XLT only */ {16+13, 16+13, 16+13, 16+13, 16+13}, /* IdSel 17, TULIP */ diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 4c50f8f40cbb..c0fa1fe5ce77 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -173,10 +173,10 @@ pc164_init_irq(void) * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. */ -static inline int __init +static inline int eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ {16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J25 */ {16+1, 16+1, 16+6, 16+10, 16+14}, /* IdSel 7, slot 1, J26 */ @@ -203,10 +203,10 @@ eb66p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) * because it is the Saturn IO (SIO) PCI/ISA Bridge Chip. */ -static inline int __init +static inline int cabriolet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+2, 16+2, 16+7, 16+11, 16+15}, /* IdSel 5, slot 2, J21 */ { 16+0, 16+0, 16+5, 16+9, 16+13}, /* IdSel 6, slot 0, J19 */ @@ -287,10 +287,10 @@ cia_cab_init_pci(void) * */ -static inline int __init +static inline int alphapc164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ { 16+2, 16+2, 16+9, 16+13, 16+17}, /* IdSel 5, slot 2, J20 */ { 16+0, 16+0, 16+7, 16+11, 16+15}, /* IdSel 6, slot 0, J29 */ diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 6c35159bc00e..9e1e40ea1d14 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -356,7 +356,7 @@ clipper_init_irq(void) * 10 64 bit PCI option slot 3 (not bus 0) */ -static int __init +static int isa_irq_fixup(const struct pci_dev *dev, int irq) { u8 irq8; @@ -372,10 +372,10 @@ isa_irq_fixup(const struct pci_dev *dev, int irq) return irq8 & 0xf; } -static int __init +static int dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[6][5] __initdata = { + static char irq_tab[6][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 5 ISA Bridge */ { 16+ 3, 16+ 3, 16+ 2, 16+ 2, 16+ 2}, /* IdSel 6 SCSI builtin*/ @@ -394,10 +394,10 @@ dp264_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, irq); } -static int __init +static int monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[13][5] __initdata = { + static char irq_tab[13][5] = { /*INT INTA INTB INTC INTD */ { 45, 45, 45, 45, 45}, /* IdSel 3 21143 PCI1 */ { -1, -1, -1, -1, -1}, /* IdSel 4 unused */ @@ -423,7 +423,7 @@ monet_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } -static u8 __init +static u8 monet_swizzle(struct pci_dev *dev, u8 *pinp) { struct pci_controller *hose = dev->sysdata; @@ -456,10 +456,10 @@ monet_swizzle(struct pci_dev *dev, u8 *pinp) return slot; } -static int __init +static int webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[13][5] __initdata = { + static char irq_tab[13][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 7 ISA Bridge */ { -1, -1, -1, -1, -1}, /* IdSel 8 unused */ @@ -478,10 +478,10 @@ webbrick_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } -static int __init +static int clipper_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[7][5] __initdata = { + static char irq_tab[7][5] = { /*INT INTA INTB INTC INTD */ { 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */ { 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */ diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index ad40a425e841..372661c56537 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -167,10 +167,10 @@ eb64p_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int eb64p_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ {16+7, 16+7, 16+7, 16+7, 16+7}, /* IdSel 5, slot ?, ?? */ {16+0, 16+0, 16+2, 16+4, 16+9}, /* IdSel 6, slot ?, ?? */ diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 15f42083bdb3..2731738b5872 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -141,7 +141,7 @@ eiger_init_irq(void) } } -static int __init +static int eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u8 irq_orig; @@ -158,7 +158,7 @@ eiger_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq_orig - 0x80; } -static u8 __init +static u8 eiger_swizzle(struct pci_dev *dev, u8 *pinp) { struct pci_controller *hose = dev->sysdata; diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c index d5b9776a608d..731d693fa1f9 100644 --- a/arch/alpha/kernel/sys_miata.c +++ b/arch/alpha/kernel/sys_miata.c @@ -149,10 +149,10 @@ miata_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[18][5] __initdata = { + static char irq_tab[18][5] = { /*INT INTA INTB INTC INTD */ {16+ 8, 16+ 8, 16+ 8, 16+ 8, 16+ 8}, /* IdSel 14, DC21142 */ { -1, -1, -1, -1, -1}, /* IdSel 15, EIDE */ @@ -196,7 +196,7 @@ miata_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 miata_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 5e82dc1ad6f2..350ec9c8335b 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -145,10 +145,10 @@ mikasa_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int mikasa_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[8][5] __initdata = { + static char irq_tab[8][5] = { /*INT INTA INTB INTC INTD */ {16+12, 16+12, 16+12, 16+12, 16+12}, /* IdSel 17, SCSI */ { -1, -1, -1, -1, -1}, /* IdSel 18, PCEB */ diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 8ae04a121186..d019e4ce07bd 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -62,7 +62,7 @@ nautilus_init_irq(void) common_init_isa_dma(); } -static int __init +static int nautilus_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* Preserve the IRQ set up by the console. */ diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 063e594fd969..2301678d9f9d 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -193,10 +193,10 @@ noritake_init_irq(void) * comes in on. This makes interrupt processing much easier. */ -static int __init +static int noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[15][5] __initdata = { + static char irq_tab[15][5] = { /*INT INTA INTB INTC INTD */ /* note: IDSELs 16, 17, and 25 are CORELLE only */ { 16+1, 16+1, 16+1, 16+1, 16+1}, /* IdSel 16, QLOGIC */ @@ -221,7 +221,7 @@ noritake_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 noritake_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index dfd510ae5d8c..546822d07dc7 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -221,10 +221,10 @@ rawhide_init_irq(void) * */ -static int __init +static int rawhide_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+16, 16+16, 16+16, 16+16, 16+16}, /* IdSel 1 SCSI PCI 1 */ { 16+ 0, 16+ 0, 16+ 1, 16+ 2, 16+ 3}, /* IdSel 2 slot 2 */ diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index a3f485257170..3b35e1913492 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -117,10 +117,10 @@ ruffian_kill_arch (int mode) * */ -static int __init +static int ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[11][5] __initdata = { + static char irq_tab[11][5] = { /*INT INTA INTB INTC INTD */ {-1, -1, -1, -1, -1}, /* IdSel 13, 21052 */ {-1, -1, -1, -1, -1}, /* IdSel 14, SIO */ @@ -139,7 +139,7 @@ ruffian_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 ruffian_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index 08ee737d4fba..e178007107ef 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -142,7 +142,7 @@ rx164_init_irq(void) * */ -static int __init +static int rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { #if 0 @@ -156,7 +156,7 @@ rx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { 16+1, 16+1, 16+6, 16+11, 16+16}, /* IdSel 10, slot 4 */ }; #else - static char irq_tab[6][5] __initdata = { + static char irq_tab[6][5] = { /*INT INTA INTB INTC INTD */ { 16+0, 16+0, 16+6, 16+11, 16+16}, /* IdSel 5, slot 0 */ { 16+1, 16+1, 16+7, 16+12, 16+17}, /* IdSel 6, slot 1 */ diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 8a0aa6d67b53..86d259c2612d 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -192,10 +192,10 @@ sable_init_irq(void) * with the values in the irq swizzling tables above. */ -static int __init +static int sable_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[9][5] __initdata = { + static char irq_tab[9][5] = { /*INT INTA INTB INTC INTD */ { 32+0, 32+0, 32+0, 32+0, 32+0}, /* IdSel 0, TULIP */ { 32+1, 32+1, 32+1, 32+1, 32+1}, /* IdSel 1, SCSI */ @@ -374,10 +374,10 @@ lynx_init_irq(void) * with the values in the irq swizzling tables above. */ -static int __init +static int lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[19][5] __initdata = { + static char irq_tab[19][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 13, PCEB */ { -1, -1, -1, -1, -1}, /* IdSel 14, PPB */ @@ -404,7 +404,7 @@ lynx_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 lynx_swizzle(struct pci_dev *dev, u8 *pinp) { int slot, pin = *pinp; diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index febd24eba7a6..9fd2895639d5 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -144,7 +144,7 @@ sio_fixup_irq_levels(unsigned int level_bits) outb((level_bits >> 8) & 0xff, 0x4d1); } -static inline int __init +static inline int noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* @@ -165,7 +165,7 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) * that they use the default INTA line, if they are interrupt * driven at all). */ - static char irq_tab[][5] __initdata = { + static char irq_tab[][5] = { /*INT A B C D */ { 3, 3, 3, 3, 3}, /* idsel 6 (53c810) */ {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ @@ -183,10 +183,10 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return irq >= 0 ? tmp : -1; } -static inline int __init +static inline int p2k_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[][5] __initdata = { + static char irq_tab[][5] = { /*INT A B C D */ { 0, 0, -1, -1, -1}, /* idsel 6 (53c810) */ {-1, -1, -1, -1, -1}, /* idsel 7 (SIO: PCI/ISA bridge) */ diff --git a/arch/alpha/kernel/sys_sx164.c b/arch/alpha/kernel/sys_sx164.c index d063b360efed..23eee54d714a 100644 --- a/arch/alpha/kernel/sys_sx164.c +++ b/arch/alpha/kernel/sys_sx164.c @@ -94,10 +94,10 @@ sx164_init_irq(void) * 9 32 bit PCI option slot 3 */ -static int __init +static int sx164_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[5][5] __initdata = { + static char irq_tab[5][5] = { /*INT INTA INTB INTC INTD */ { 16+ 9, 16+ 9, 16+13, 16+17, 16+21}, /* IdSel 5 slot 2 J17 */ { 16+11, 16+11, 16+15, 16+19, 16+23}, /* IdSel 6 slot 0 J19 */ diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index dd0f1eae3c68..9101f2bb6176 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -155,10 +155,10 @@ takara_init_irq(void) * assign it whatever the hell IRQ we like and it doesn't matter. */ -static int __init +static int takara_map_irq_srm(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[15][5] __initdata = { + static char irq_tab[15][5] = { { 16+3, 16+3, 16+3, 16+3, 16+3}, /* slot 6 == device 3 */ { 16+2, 16+2, 16+2, 16+2, 16+2}, /* slot 7 == device 2 */ { 16+1, 16+1, 16+1, 16+1, 16+1}, /* slot 8 == device 1 */ @@ -210,7 +210,7 @@ takara_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return COMMON_TABLE_LOOKUP; } -static u8 __init +static u8 takara_swizzle(struct pci_dev *dev, u8 *pinp) { int slot = PCI_SLOT(dev->devfn); diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index ee1874887776..c3f8b79fe214 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -288,10 +288,10 @@ wildfire_device_interrupt(unsigned long vector) * 7 64 bit PCI 1 option slot 7 */ -static int __init +static int wildfire_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - static char irq_tab[8][5] __initdata = { + static char irq_tab[8][5] = { /*INT INTA INTB INTC INTD */ { -1, -1, -1, -1, -1}, /* IdSel 0 ISA Bridge */ { 36, 36, 36+1, 36+2, 36+3}, /* IdSel 1 SCSI builtin */ -- cgit v1.2.3-70-g09d2 From 4587eee04e2ac7ac3ac9fa2bc164fb6e548f99cd Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 25 Oct 2017 15:58:31 -0500 Subject: SMB3: Validate negotiate request must always be signed According to MS-SMB2 3.2.55 validate_negotiate request must always be signed. Some Windows can fail the request if you send it unsigned See kernel bugzilla bug 197311 CC: Stable Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ba58af747b43..5331631386a2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1979,6 +1979,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, } else iov[0].iov_len = get_rfc1002_length(req) + 4; + /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) + req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); -- cgit v1.2.3-70-g09d2 From 80c094a47dd4ea63375e3f60b5e076064f16e857 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 26 Oct 2017 19:35:35 +0200 Subject: Revert "apparmor: add base infastructure for socket mediation" This reverts commit 651e28c5537abb39076d3949fb7618536f1d242e. This caused a regression: "The specific problem is that dnsmasq refuses to start on openSUSE Leap 42.2. The specific cause is that and attempt to open a PF_LOCAL socket gets EACCES. This means that networking doesn't function on a system with a 4.14-rc2 system." Sadly, the developers involved seemed to be in denial for several weeks about this, delaying the revert. This has not been a good release for the security subsystem, and this area needs to change development practices. Reported-and-bisected-by: James Bottomley Tracked-by: Thorsten Leemhuis Cc: John Johansen Cc: Vlastimil Babka Cc: Seth Arnold Signed-off-by: Linus Torvalds --- security/apparmor/.gitignore | 1 - security/apparmor/Makefile | 43 +---- security/apparmor/apparmorfs.c | 1 - security/apparmor/file.c | 30 --- security/apparmor/include/audit.h | 26 +-- security/apparmor/include/net.h | 114 ----------- security/apparmor/include/perms.h | 5 +- security/apparmor/include/policy.h | 13 -- security/apparmor/lib.c | 5 +- security/apparmor/lsm.c | 387 ------------------------------------- security/apparmor/net.c | 184 ------------------ security/apparmor/policy_unpack.c | 47 +---- 12 files changed, 16 insertions(+), 840 deletions(-) delete mode 100644 security/apparmor/include/net.h delete mode 100644 security/apparmor/net.c diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index d5b291e94264..9cdec70d72b8 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore @@ -1,6 +1,5 @@ # # Generated include files # -net_names.h capability_names.h rlim_names.h diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index dafdd387d42b..81a34426d024 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -4,44 +4,11 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ - resource.o secid.o file.o policy_ns.o label.o mount.o net.o + resource.o secid.o file.o policy_ns.o label.o mount.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o -clean-files := capability_names.h rlim_names.h net_names.h +clean-files := capability_names.h rlim_names.h -# Build a lower case string table of address family names -# Transform lines from -# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ -# #define AF_INET 2 /* Internet IP Protocol */ -# to -# [1] = "local", -# [2] = "inet", -# -# and build the securityfs entries for the mapping. -# Transforms lines from -# #define AF_INET 2 /* Internet IP Protocol */ -# to -# #define AA_SFS_AF_MASK "local inet" -quiet_cmd_make-af = GEN $@ -cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ - sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ - 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ - echo "};" >> $@ ;\ - printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\ - sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ - 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\ - $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ - -# Build a lower case string table of sock type names -# Transform lines from -# SOCK_STREAM = 1, -# to -# [1] = "stream", -quiet_cmd_make-sock = GEN $@ -cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\ - sed $^ >>$@ -r -n \ - -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ - echo "};" >> $@ # Build a lower case string table of capability names # Transforms lines from @@ -94,7 +61,6 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \ tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ $(obj)/capability.o : $(obj)/capability_names.h -$(obj)/net.o : $(obj)/net_names.h $(obj)/resource.o : $(obj)/rlim_names.h $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(src)/Makefile @@ -102,8 +68,3 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \ $(src)/Makefile $(call cmd,make-rlim) -$(obj)/net_names.h : $(srctree)/include/linux/socket.h \ - $(srctree)/include/linux/net.h \ - $(src)/Makefile - $(call cmd,make-af) - $(call cmd,make-sock) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 518d5928661b..caaf51dda648 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2202,7 +2202,6 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("policy", aa_sfs_entry_policy), AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), - AA_SFS_DIR("network", aa_sfs_entry_network), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), diff --git a/security/apparmor/file.c b/security/apparmor/file.c index db80221891c6..3382518b87fa 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -21,7 +21,6 @@ #include "include/context.h" #include "include/file.h" #include "include/match.h" -#include "include/net.h" #include "include/path.h" #include "include/policy.h" #include "include/label.h" @@ -567,32 +566,6 @@ static int __file_path_perm(const char *op, struct aa_label *label, return error; } -static int __file_sock_perm(const char *op, struct aa_label *label, - struct aa_label *flabel, struct file *file, - u32 request, u32 denied) -{ - struct socket *sock = (struct socket *) file->private_data; - int error; - - AA_BUG(!sock); - - /* revalidation due to label out of date. No revocation at this time */ - if (!denied && aa_label_is_subset(flabel, label)) - return 0; - - /* TODO: improve to skip profiles cached in flabel */ - error = aa_sock_file_perm(label, op, request, sock); - if (denied) { - /* TODO: improve to skip profiles checked above */ - /* check every profile in file label to is cached */ - last_error(error, aa_sock_file_perm(flabel, op, request, sock)); - } - if (!error) - update_file_ctx(file_ctx(file), label, request); - - return error; -} - /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -637,9 +610,6 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, error = __file_path_perm(op, label, flabel, file, request, denied); - else if (S_ISSOCK(file_inode(file)->i_mode)) - error = __file_sock_perm(op, label, flabel, file, request, - denied); done: rcu_read_unlock(); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index ff4316e1068d..620e81169659 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,29 +121,21 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - union { - struct { - kuid_t ouid; - const char *target; - } fs; - struct { - int type, protocol; - struct sock *peer_sk; - void *addr; - int addrlen; - } net; - int signal; - struct { - int rlim; - unsigned long max; - } rlim; - }; + struct { + const char *target; + kuid_t ouid; + } fs; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; + int signal; + struct { + int rlim; + unsigned long max; + } rlim; struct { const char *src_name; const char *type; diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h deleted file mode 100644 index 140c8efcf364..000000000000 --- a/security/apparmor/include/net.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * AppArmor security module - * - * This file contains AppArmor network mediation definitions. - * - * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2017 Canonical Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#ifndef __AA_NET_H -#define __AA_NET_H - -#include -#include - -#include "apparmorfs.h" -#include "label.h" -#include "perms.h" -#include "policy.h" - -#define AA_MAY_SEND AA_MAY_WRITE -#define AA_MAY_RECEIVE AA_MAY_READ - -#define AA_MAY_SHUTDOWN AA_MAY_DELETE - -#define AA_MAY_CONNECT AA_MAY_OPEN -#define AA_MAY_ACCEPT 0x00100000 - -#define AA_MAY_BIND 0x00200000 -#define AA_MAY_LISTEN 0x00400000 - -#define AA_MAY_SETOPT 0x01000000 -#define AA_MAY_GETOPT 0x02000000 - -#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ - AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN | \ - AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \ - AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT) - -#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ - AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\ - AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD | \ - AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK | \ - AA_MAY_MPROT) - -#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \ - AA_MAY_ACCEPT) -struct aa_sk_ctx { - struct aa_label *label; - struct aa_label *peer; - struct path path; -}; - -#define SK_CTX(X) ((X)->sk_security) -#define SOCK_ctx(X) SOCK_INODE(X)->i_security -#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ - struct lsm_network_audit NAME ## _net = { .sk = (SK), \ - .family = (F)}; \ - DEFINE_AUDIT_DATA(NAME, \ - ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \ - LSM_AUDIT_DATA_NONE, \ - OP); \ - NAME.u.net = &(NAME ## _net); \ - aad(&NAME)->net.type = (T); \ - aad(&NAME)->net.protocol = (P) - -#define DEFINE_AUDIT_SK(NAME, OP, SK) \ - DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ - (SK)->sk_protocol) - -/* struct aa_net - network confinement data - * @allow: basic network families permissions - * @audit: which network permissions to force audit - * @quiet: which network permissions to quiet rejects - */ -struct aa_net { - u16 allow[AF_MAX]; - u16 audit[AF_MAX]; - u16 quiet[AF_MAX]; -}; - - -extern struct aa_sfs_entry aa_sfs_entry_network[]; - -void audit_net_cb(struct audit_buffer *ab, void *va); -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, - u32 request, u16 family, int type); -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, - int type, int protocol); -static inline int aa_profile_af_sk_perm(struct aa_profile *profile, - struct common_audit_data *sa, - u32 request, - struct sock *sk) -{ - return aa_profile_af_perm(profile, sa, request, sk->sk_family, - sk->sk_type); -} -int aa_sk_perm(const char *op, u32 request, struct sock *sk); - -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, - struct socket *sock); - - -static inline void aa_free_net_rules(struct aa_net *new) -{ - /* NOP */ -} - -#endif /* __AA_NET_H */ diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index af04d5a7d73d..2b27bb79aec4 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -135,10 +135,9 @@ extern struct aa_perms allperms; void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask); -void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, - u32 mask); +void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask); void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char * const *names, u32 namesmask); + u32 chrsmask, const char **names, u32 namesmask); void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 4364088a0b9e..17fe41a9cac3 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -30,7 +30,6 @@ #include "file.h" #include "lib.h" #include "label.h" -#include "net.h" #include "perms.h" #include "resource.h" @@ -112,7 +111,6 @@ struct aa_data { * @policy: general match rules governing policy * @file: The set of rules governing basic file access and domain transitions * @caps: capabilities for the profile - * @net: network controls for the profile * @rlimits: rlimits for the profile * * @dents: dentries for the profiles file entries in apparmorfs @@ -150,7 +148,6 @@ struct aa_profile { struct aa_policydb policy; struct aa_file_rules file; struct aa_caps caps; - struct aa_net net; struct aa_rlimit rlimits; struct aa_loaddata *rawdata; @@ -223,16 +220,6 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile, return 0; } -static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile, - u16 AF) { - unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET); - u16 be_af = cpu_to_be16(AF); - - if (!state) - return 0; - return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2); -} - /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 8818621b5d95..08ca26bcca77 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -211,8 +211,7 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask) *str = '\0'; } -void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, - u32 mask) +void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; @@ -230,7 +229,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char * const *names, u32 namesmask) + u32 chrsmask, const char **names, u32 namesmask) { char str[33]; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 72b915dfcaf7..1346ee5be04f 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -33,7 +33,6 @@ #include "include/context.h" #include "include/file.h" #include "include/ipc.h" -#include "include/net.h" #include "include/path.h" #include "include/label.h" #include "include/policy.h" @@ -737,368 +736,6 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info, return error; } -/** - * apparmor_sk_alloc_security - allocate and attach the sk_security field - */ -static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) -{ - struct aa_sk_ctx *ctx; - - ctx = kzalloc(sizeof(*ctx), flags); - if (!ctx) - return -ENOMEM; - - SK_CTX(sk) = ctx; - - return 0; -} - -/** - * apparmor_sk_free_security - free the sk_security field - */ -static void apparmor_sk_free_security(struct sock *sk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - SK_CTX(sk) = NULL; - aa_put_label(ctx->label); - aa_put_label(ctx->peer); - path_put(&ctx->path); - kfree(ctx); -} - -/** - * apparmor_clone_security - clone the sk_security field - */ -static void apparmor_sk_clone_security(const struct sock *sk, - struct sock *newsk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - struct aa_sk_ctx *new = SK_CTX(newsk); - - new->label = aa_get_label(ctx->label); - new->peer = aa_get_label(ctx->peer); - new->path = ctx->path; - path_get(&new->path); -} - -static int aa_sock_create_perm(struct aa_label *label, int family, int type, - int protocol) -{ - AA_BUG(!label); - AA_BUG(in_interrupt()); - - return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type, - protocol); -} - - -/** - * apparmor_socket_create - check perms before creating a new socket - */ -static int apparmor_socket_create(int family, int type, int protocol, int kern) -{ - struct aa_label *label; - int error = 0; - - label = begin_current_label_crit_section(); - if (!(kern || unconfined(label))) - error = aa_sock_create_perm(label, family, type, protocol); - end_current_label_crit_section(label); - - return error; -} - -/** - * apparmor_socket_post_create - setup the per-socket security struct - * - * Note: - * - kernel sockets currently labeled unconfined but we may want to - * move to a special kernel label - * - socket may not have sk here if created with sock_create_lite or - * sock_alloc. These should be accept cases which will be handled in - * sock_graft. - */ -static int apparmor_socket_post_create(struct socket *sock, int family, - int type, int protocol, int kern) -{ - struct aa_label *label; - - if (kern) { - struct aa_ns *ns = aa_get_current_ns(); - - label = aa_get_label(ns_unconfined(ns)); - aa_put_ns(ns); - } else - label = aa_get_current_label(); - - if (sock->sk) { - struct aa_sk_ctx *ctx = SK_CTX(sock->sk); - - aa_put_label(ctx->label); - ctx->label = aa_get_label(label); - } - aa_put_label(label); - - return 0; -} - -/** - * apparmor_socket_bind - check perms before bind addr to socket - */ -static int apparmor_socket_bind(struct socket *sock, - struct sockaddr *address, int addrlen) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!address); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); -} - -/** - * apparmor_socket_connect - check perms before connecting @sock to @address - */ -static int apparmor_socket_connect(struct socket *sock, - struct sockaddr *address, int addrlen) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!address); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); -} - -/** - * apparmor_socket_list - check perms before allowing listen - */ -static int apparmor_socket_listen(struct socket *sock, int backlog) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); -} - -/** - * apparmor_socket_accept - check perms before accepting a new connection. - * - * Note: while @newsock is created and has some information, the accept - * has not been done. - */ -static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!newsock); - AA_BUG(in_interrupt()); - - return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); -} - -static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, - struct msghdr *msg, int size) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(!msg); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_socket_sendmsg - check perms before sending msg to another socket - */ -static int apparmor_socket_sendmsg(struct socket *sock, - struct msghdr *msg, int size) -{ - return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); -} - -/** - * apparmor_socket_recvmsg - check perms before receiving a message - */ -static int apparmor_socket_recvmsg(struct socket *sock, - struct msghdr *msg, int size, int flags) -{ - return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size); -} - -/* revaliation, get/set attr, shutdown */ -static int aa_sock_perm(const char *op, u32 request, struct socket *sock) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_socket_getsockname - check perms before getting the local address - */ -static int apparmor_socket_getsockname(struct socket *sock) -{ - return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock); -} - -/** - * apparmor_socket_getpeername - check perms before getting remote address - */ -static int apparmor_socket_getpeername(struct socket *sock) -{ - return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock); -} - -/* revaliation, get/set attr, opt */ -static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, - int level, int optname) -{ - AA_BUG(!sock); - AA_BUG(!sock->sk); - AA_BUG(in_interrupt()); - - return aa_sk_perm(op, request, sock->sk); -} - -/** - * apparmor_getsockopt - check perms before getting socket options - */ -static int apparmor_socket_getsockopt(struct socket *sock, int level, - int optname) -{ - return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock, - level, optname); -} - -/** - * apparmor_setsockopt - check perms before setting socket options - */ -static int apparmor_socket_setsockopt(struct socket *sock, int level, - int optname) -{ - return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock, - level, optname); -} - -/** - * apparmor_socket_shutdown - check perms before shutting down @sock conn - */ -static int apparmor_socket_shutdown(struct socket *sock, int how) -{ - return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); -} - -/** - * apparmor_socket_sock_recv_skb - check perms before associating skb to sk - * - * Note: can not sleep may be called with locks held - * - * dont want protocol specific in __skb_recv_datagram() - * to deny an incoming connection socket_sock_rcv_skb() - */ -static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - - -static struct aa_label *sk_peer_label(struct sock *sk) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - if (ctx->peer) - return ctx->peer; - - return ERR_PTR(-ENOPROTOOPT); -} - -/** - * apparmor_socket_getpeersec_stream - get security context of peer - * - * Note: for tcp only valid if using ipsec or cipso on lan - */ -static int apparmor_socket_getpeersec_stream(struct socket *sock, - char __user *optval, - int __user *optlen, - unsigned int len) -{ - char *name; - int slen, error = 0; - struct aa_label *label; - struct aa_label *peer; - - label = begin_current_label_crit_section(); - peer = sk_peer_label(sock->sk); - if (IS_ERR(peer)) { - error = PTR_ERR(peer); - goto done; - } - slen = aa_label_asxprint(&name, labels_ns(label), peer, - FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | - FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); - /* don't include terminating \0 in slen, it breaks some apps */ - if (slen < 0) { - error = -ENOMEM; - } else { - if (slen > len) { - error = -ERANGE; - } else if (copy_to_user(optval, name, slen)) { - error = -EFAULT; - goto out; - } - if (put_user(slen, optlen)) - error = -EFAULT; -out: - kfree(name); - - } - -done: - end_current_label_crit_section(label); - - return error; -} - -/** - * apparmor_socket_getpeersec_dgram - get security label of packet - * @sock: the peer socket - * @skb: packet data - * @secid: pointer to where to put the secid of the packet - * - * Sets the netlabel socket state on sk from parent - */ -static int apparmor_socket_getpeersec_dgram(struct socket *sock, - struct sk_buff *skb, u32 *secid) - -{ - /* TODO: requires secid support */ - return -ENOPROTOOPT; -} - -/** - * apparmor_sock_graft - Initialize newly created socket - * @sk: child sock - * @parent: parent socket - * - * Note: could set off of SOCK_CTX(parent) but need to track inode and we can - * just set sk security information off of current creating process label - * Labeling of sk for accept case - probably should be sock based - * instead of task, because of the case where an implicitly labeled - * socket is shared by different tasks. - */ -static void apparmor_sock_graft(struct sock *sk, struct socket *parent) -{ - struct aa_sk_ctx *ctx = SK_CTX(sk); - - if (!ctx->label) - ctx->label = aa_get_current_label(); -} - static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -1133,30 +770,6 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), - LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), - LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), - LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), - - LSM_HOOK_INIT(socket_create, apparmor_socket_create), - LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), - LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), - LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), - LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), - LSM_HOOK_INIT(socket_accept, apparmor_socket_accept), - LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg), - LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg), - LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname), - LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername), - LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt), - LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt), - LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown), - LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb), - LSM_HOOK_INIT(socket_getpeersec_stream, - apparmor_socket_getpeersec_stream), - LSM_HOOK_INIT(socket_getpeersec_dgram, - apparmor_socket_getpeersec_dgram), - LSM_HOOK_INIT(sock_graft, apparmor_sock_graft), - LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), LSM_HOOK_INIT(cred_free, apparmor_cred_free), LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare), diff --git a/security/apparmor/net.c b/security/apparmor/net.c deleted file mode 100644 index 33d54435f8d6..000000000000 --- a/security/apparmor/net.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * AppArmor security module - * - * This file contains AppArmor network mediation - * - * Copyright (C) 1998-2008 Novell/SUSE - * Copyright 2009-2017 Canonical Ltd. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#include "include/apparmor.h" -#include "include/audit.h" -#include "include/context.h" -#include "include/label.h" -#include "include/net.h" -#include "include/policy.h" - -#include "net_names.h" - - -struct aa_sfs_entry aa_sfs_entry_network[] = { - AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), - { } -}; - -static const char * const net_mask_names[] = { - "unknown", - "send", - "receive", - "unknown", - - "create", - "shutdown", - "connect", - "unknown", - - "setattr", - "getattr", - "setcred", - "getcred", - - "chmod", - "chown", - "chgrp", - "lock", - - "mmap", - "mprot", - "unknown", - "unknown", - - "accept", - "bind", - "listen", - "unknown", - - "setopt", - "getopt", - "unknown", - "unknown", - - "unknown", - "unknown", - "unknown", - "unknown", -}; - - -/* audit callback for net specific fields */ -void audit_net_cb(struct audit_buffer *ab, void *va) -{ - struct common_audit_data *sa = va; - - audit_log_format(ab, " family="); - if (address_family_names[sa->u.net->family]) - audit_log_string(ab, address_family_names[sa->u.net->family]); - else - audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family); - audit_log_format(ab, " sock_type="); - if (sock_type_names[aad(sa)->net.type]) - audit_log_string(ab, sock_type_names[aad(sa)->net.type]); - else - audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type); - audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol); - - if (aad(sa)->request & NET_PERMS_MASK) { - audit_log_format(ab, " requested_mask="); - aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0, - net_mask_names, NET_PERMS_MASK); - - if (aad(sa)->denied & NET_PERMS_MASK) { - audit_log_format(ab, " denied_mask="); - aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0, - net_mask_names, NET_PERMS_MASK); - } - } - if (aad(sa)->peer) { - audit_log_format(ab, " peer="); - aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, - FLAGS_NONE, GFP_ATOMIC); - } -} - - -/* Generic af perm */ -int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, - u32 request, u16 family, int type) -{ - struct aa_perms perms = { }; - - AA_BUG(family >= AF_MAX); - AA_BUG(type < 0 || type >= SOCK_MAX); - - if (profile_unconfined(profile)) - return 0; - - perms.allow = (profile->net.allow[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - perms.audit = (profile->net.audit[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - perms.quiet = (profile->net.quiet[family] & (1 << type)) ? - ALL_PERMS_MASK : 0; - aa_apply_modes_to_perms(profile, &perms); - - return aa_check_perms(profile, &perms, request, sa, audit_net_cb); -} - -int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, - int type, int protocol) -{ - struct aa_profile *profile; - DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol); - - return fn_for_each_confined(label, profile, - aa_profile_af_perm(profile, &sa, request, family, - type)); -} - -static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, - struct sock *sk) -{ - struct aa_profile *profile; - DEFINE_AUDIT_SK(sa, op, sk); - - AA_BUG(!label); - AA_BUG(!sk); - - if (unconfined(label)) - return 0; - - return fn_for_each_confined(label, profile, - aa_profile_af_sk_perm(profile, &sa, request, sk)); -} - -int aa_sk_perm(const char *op, u32 request, struct sock *sk) -{ - struct aa_label *label; - int error; - - AA_BUG(!sk); - AA_BUG(in_interrupt()); - - /* TODO: switch to begin_current_label ???? */ - label = begin_current_label_crit_section(); - error = aa_label_sk_perm(label, op, request, sk); - end_current_label_crit_section(label); - - return error; -} - - -int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, - struct socket *sock) -{ - AA_BUG(!label); - AA_BUG(!sock); - AA_BUG(!sock->sk); - - return aa_label_sk_perm(label, op, request, sock->sk); -} diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5a2aec358322..4ede87c30f8b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -275,19 +275,6 @@ fail: return 0; } -static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name) -{ - if (unpack_nameX(e, AA_U16, name)) { - if (!inbounds(e, sizeof(u16))) - return 0; - if (data) - *data = le16_to_cpu(get_unaligned((__le16 *) e->pos)); - e->pos += sizeof(u16); - return 1; - } - return 0; -} - static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name) { if (unpack_nameX(e, AA_U32, name)) { @@ -597,7 +584,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) struct aa_profile *profile = NULL; const char *tmpname, *tmpns = NULL, *name = NULL; const char *info = "failed to unpack profile"; - size_t size = 0, ns_len; + size_t ns_len; struct rhashtable_params params = { 0 }; char *key = NULL; struct aa_data *data; @@ -730,38 +717,6 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - size = unpack_array(e, "net_allowed_af"); - if (size) { - - for (i = 0; i < size; i++) { - /* discard extraneous rules that this kernel will - * never request - */ - if (i >= AF_MAX) { - u16 tmp; - - if (!unpack_u16(e, &tmp, NULL) || - !unpack_u16(e, &tmp, NULL) || - !unpack_u16(e, &tmp, NULL)) - goto fail; - continue; - } - if (!unpack_u16(e, &profile->net.allow[i], NULL)) - goto fail; - if (!unpack_u16(e, &profile->net.audit[i], NULL)) - goto fail; - if (!unpack_u16(e, &profile->net.quiet[i], NULL)) - goto fail; - } - if (!unpack_nameX(e, AA_ARRAYEND, NULL)) - goto fail; - } - if (VERSION_LT(e->version, v7)) { - /* pre v7 policy always allowed these */ - profile->net.allow[AF_UNIX] = 0xffff; - profile->net.allow[AF_NETLINK] = 0xffff; - } - if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; -- cgit v1.2.3-70-g09d2 From 8f63fc2bc64716c16e269ab951130eeda78fe37a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 19 Oct 2017 13:54:06 +0800 Subject: drm/i915/gvt: properly check per_ctx bb valid state Need to check valid state for per_ctx bb and bypass batch buffer combine for scan if necessary. Otherwise adding invalid MI batch buffer start cmd for per_ctx bb will cause scan failure, which is taken as -EFAULT now so vGPU would be put in failsafe. This trys to fix that by checking per_ctx bb valid state. Also remove old invalid WARNING that indirect ctx bb shouldn't depend on valid per_ctx bb. Signed-off-by: Zhenyu Wang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 3 +++ drivers/gpu/drm/i915/gvt/execlist.c | 3 +-- drivers/gpu/drm/i915/gvt/scheduler.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 21c36e256884..d4726a3358a4 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2723,6 +2723,9 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) uint32_t per_ctx_start[CACHELINE_DWORDS] = {0}; unsigned char *bb_start_sva; + if (!wa_ctx->per_ctx.valid) + return 0; + per_ctx_start[0] = 0x18800001; per_ctx_start[1] = wa_ctx->per_ctx.guest_gma; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 91b4300f3b39..e5320b4eb698 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -701,8 +701,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, CACHELINE_BYTES; workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; - - WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1)); + workload->wa_ctx.per_ctx.valid = per_ctx & 1; } if (emulate_schedule_in) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 0d431a968a32..93a49eb0209e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -68,6 +68,7 @@ struct shadow_indirect_ctx { struct shadow_per_ctx { unsigned long guest_gma; unsigned long shadow_gma; + unsigned valid; }; struct intel_shadow_wa_ctx { -- cgit v1.2.3-70-g09d2 From edee7ecdb4d7311f351feaeb53e269f416bb1b57 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sat, 30 Sep 2017 17:32:16 +0800 Subject: drm/i915/gvt: Refine MMIO_RING_F() Inspect if the host has VCS2 ring by host i915 macro in MMIO_RING_F(). Also this helps on reducing some LOCs. Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 44 ++----------------------------------- drivers/gpu/drm/i915/gvt/reg.h | 3 --- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2294466dd415..812f411d1c7d 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1589,6 +1589,8 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \ MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \ + if (HAS_BSD2(dev_priv)) \ + MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \ } while (0) #define MMIO_RING_D(prefix, d) \ @@ -1636,7 +1638,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x6c) MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_ALL, instdone_mmio_read, NULL); #undef RING_REG MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); @@ -2411,9 +2412,6 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, - intel_vgpu_reg_imr_handler); - MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IER(0), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler); MMIO_DH(GEN8_GT_IIR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler); @@ -2476,68 +2474,33 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - - MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); - MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, - F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, ring_reset_ctl_write); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, - ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, - ring_reset_ctl_write); #undef RING_REG #define RING_REG(base) (base + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); - MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write); #undef RING_REG #define RING_REG(base) (base + 0x234) MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, - ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 48, F_RO, 0, ~0, D_BDW_PLUS, - NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x3a0) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); - MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, NULL, NULL); #undef RING_REG MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS); @@ -2557,11 +2520,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #define RING_REG(base) (base + 0x270) MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2849,7 +2810,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL | D_KBL); MMIO_D(0x320f0, D_SKL | D_KBL); - MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL_PLUS); MMIO_D(0x71034, D_SKL_PLUS); MMIO_D(0x72034, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index fbd023a16f18..7d01c77a0f7a 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -54,9 +54,6 @@ #define VGT_SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B) -#define _REG_VECS_EXCC 0x1A028 -#define _REG_VCS2_EXCC 0x1c028 - #define _REG_701C0(pipe, plane) (0x701c0 + pipe * 0x1000 + (plane - 1) * 0x100) #define _REG_701C4(pipe, plane) (0x701c4 + pipe * 0x1000 + (plane - 1) * 0x100) -- cgit v1.2.3-70-g09d2 From 20a2bcdec5071f78bebe48c5eecdb89de6e96acb Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Sat, 14 Oct 2017 06:34:46 +0800 Subject: drm/i915/gvt: Extract mmio_read_from_hw() common function The mmio read handler for ring timestmap / instdone register are same as reading hw value directly. Extract it as common function to reduce code duplications. Signed-off-by: Xiong Zhang Signed-off-by: Zhi Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 812f411d1c7d..c24341f174d3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1429,18 +1429,7 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } -static int ring_timestamp_mmio_read(struct intel_vgpu *vgpu, - unsigned int offset, void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); - return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); -} - -static int instdone_mmio_read(struct intel_vgpu *vgpu, +static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -1637,9 +1626,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x6c) - MMIO_RING_DFH(RING_REG, D_ALL, 0, instdone_mmio_read, NULL); + MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL); #undef RING_REG - MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, instdone_mmio_read, NULL); + MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL); MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); @@ -1663,9 +1652,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, - ring_timestamp_mmio_read, NULL); + mmio_read_from_hw, NULL); MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, -- cgit v1.2.3-70-g09d2 From 894e287b3dcc8bfc8d974f883dab3b5c66344089 Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Sat, 14 Oct 2017 06:34:47 +0800 Subject: drm/i915/gvt: Adding ACTHD mmio read handler When a workload is too heavy to finish it in gpu hang check timer intervals(1.5), gpu hang check function will check ACTHD register value to decide whether gpu is real dead or not. On real hw, ACTHD is updated by HW when workload is running, then host kernel won't think it is gpu hang. while guest kernel always read a constant ACTHD value as GVT doesn't supply ACTHD emulate handler, then guest kernel detects a fake gpu hang. To remove such guest fake gpu hang, this patch supply ACTHD mmio read handler which read real HW ACTHD register directly. Signed-off-by: Xiong Zhang Signed-off-by: Zhi Wang Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/b4c9a097-3e62-124e-6856-b0c37764df7b@intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c24341f174d3..a5bed2e71b92 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1638,7 +1638,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, mmio_read_from_hw, NULL); MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ @@ -2463,7 +2463,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, + mmio_read_from_hw, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, -- cgit v1.2.3-70-g09d2 From a494ee6cfd771c2b37721a18a1c9209b61bdf81d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 5 Oct 2017 14:56:53 +0200 Subject: arm/xen: don't inclide rwlock.h directly. rwlock.h should not be included directly. Instead linux/splinlock.h should be included. One thing it does is to break the RT build. Cc: Stefano Stabellini Cc: xen-devel@lists.xenproject.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- arch/arm/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index e71eefa2e427..0641ba54ab62 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From 8fc669ea1639f3f6fb2c3a4a4a1e31d975d0e254 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 26 Oct 2017 12:31:54 +0200 Subject: maintainers: drop Chris Wright from pvops Mails to chrisw@sous-sol.org are not deliverable since several months. Drop him as PARAVIRT_OPS maintainer. Signed-off-by: Juergen Gross Acked-by: Chris Wright Signed-off-by: Juergen Gross --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..7ebc2a6c1bc9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10156,7 +10156,6 @@ F: Documentation/parport*.txt PARAVIRT_OPS INTERFACE M: Juergen Gross -M: Chris Wright M: Alok Kataria M: Rusty Russell L: virtualization@lists.linux-foundation.org -- cgit v1.2.3-70-g09d2 From 90edaac62729d3b9cbb97756261a0049a7fdd6a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 27 Oct 2017 10:03:13 +0200 Subject: Revert "x86/mm: Limit mmap() of /dev/mem to valid physical addresses" This reverts commit ce56a86e2ade45d052b3228cdfebe913a1ae7381. There's unanticipated interaction with some boot parameters like 'mem=', which now cause the new checks via valid_mmap_phys_addr_range() to be too restrictive, crashing a Qemu bootup in fact, as reported by Fengguang Wu. So while the motivation of the change is still entirely valid, we need a few more rounds of testing to get it right - it's way too late after -rc6, so revert it for now. Reported-by: Fengguang Wu Signed-off-by: Ingo Molnar Acked-by: Craig Bergstrom Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dsafonov@virtuozzo.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: oleg@redhat.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 4 ---- arch/x86/mm/mmap.c | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 322d25ae23ab..c40a95c33bb8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -110,10 +110,6 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif -#define ARCH_HAS_VALID_PHYS_ADDR_RANGE -extern int valid_phys_addr_range(phys_addr_t addr, size_t size); -extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); - /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 320c6237e1d1..a99679826846 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -174,15 +174,3 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } - -int valid_phys_addr_range(phys_addr_t addr, size_t count) -{ - return addr + count <= __pa(high_memory); -} - -int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) -{ - phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; - - return valid_phys_addr_range(addr, count); -} -- cgit v1.2.3-70-g09d2 From 4894ac6b6c25b2693dc21c611621dc9fd21f4090 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 26 Oct 2017 09:51:33 +0100 Subject: net: stmmac: dwc-qos-eth: Fix typo in DT bindings parsing According to DT bindings documentation we are expecting a property called "snps,read-requests" but we are parsing instead a property called "read,read-requests". This is clearly a typo. Fix it. Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 5efef8001edf..3256e5cbad27 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -74,7 +74,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev, plat_dat->axi->axi_wr_osr_lmt--; } - if (of_property_read_u32(np, "read,read-requests", + if (of_property_read_u32(np, "snps,read-requests", &plat_dat->axi->axi_rd_osr_lmt)) { /** * Since the register has a reset value of 1, if property -- cgit v1.2.3-70-g09d2 From 6d9f0790af8d33476f936ac84a07cac42f808f6c Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Thu, 26 Oct 2017 10:07:12 +0100 Subject: net: stmmac: First Queue must always be in DCB mode According to DWMAC databook the first queue operating mode must always be in DCB. As MTL_QUEUE_DCB = 1, we need to always set the first queue operating mode to DCB otherwise driver will think that queue is in AVB mode (because MTL_QUEUE_AVB = 0). Signed-off-by: Jose Abreu Cc: Joao Pinto Cc: David S. Miller Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 8a280b48e3a9..6383695004a5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -150,6 +150,13 @@ static void stmmac_mtl_setup(struct platform_device *pdev, plat->rx_queues_to_use = 1; plat->tx_queues_to_use = 1; + /* First Queue must always be in DCB mode. As MTL_QUEUE_DCB = 1 we need + * to always set this, otherwise Queue will be classified as AVB + * (because MTL_QUEUE_AVB = 0). + */ + plat->rx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB; + plat->tx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB; + rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0); if (!rx_node) return; -- cgit v1.2.3-70-g09d2 From f3594f0a7ea36661d7fd942facd7f31a64245f1a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:19:56 +0800 Subject: ipip: only increase err_count for some certain type icmp in ipip_err t->err_count is used to count the link failure on tunnel and an err will be reported to user socket in tx path if t->err_count is not 0. udp socket could even return EHOSTUNREACH to users. Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed the 'switch check' for icmp type in ipip_err(), err_count would be increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link failure would be reported out due to this. In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet, udp netperf failed with the err: send_data: data send error: No route to host (errno 113) We expect this error reported from tunnel to socket when receiving some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED or ICMP_PARAMETERPROB ones. This patch is to bring 'switch check' for icmp type back to ipip_err so that it only reports link failure for the right type icmp, just as in ipgre_err() and ipip6_err(). Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 59 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22b5e29..cdd627355ed1 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly; static int ipip_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - struct ip_tunnel *t; - int err; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct ip_tunnel *t; + int err = 0; + + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + /* Impossible event. */ + goto out; + default: + /* All others are translated to HOST_UNREACH. + * rfc2003 contains "deep thoughts" about NET_UNREACH, + * I believe they are just ether pollution. --ANK + */ + break; + } + break; + + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + goto out; + break; + + case ICMP_REDIRECT: + break; + + default: + goto out; + } - err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (!t) + if (!t) { + err = -ENOENT; goto out; + } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, iph->protocol, 0); - err = 0; + ipv4_update_pmtu(skb, net, info, t->parms.link, 0, + iph->protocol, 0); goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - iph->protocol, 0); - err = 0; + ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); goto out; } - if (t->parms.iph.daddr == 0) + if (t->parms.iph.daddr == 0) { + err = -ENOENT; goto out; + } - err = 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.2.3-70-g09d2 From f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:23:27 +0800 Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err The similar fix in patch 'ipip: only increase err_count for some certain type icmp in ipip_err' is needed for ip6gre_err. In Jianlin's case, udp netperf broke even when receiving a TooBig icmpv6 packet. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1602b491b281..fb595e8dc15b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); - break; + if (code != ICMPV6_PORT_UNREACH) + break; + return; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); + break; } - break; + return; case ICMPV6_PARAMPROB: teli = 0; if (code == ICMPV6_HDR_FIELD) @@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } - break; + return; case ICMPV6_PKT_TOOBIG: mtu = be32_to_cpu(info) - offset - t->tun_hlen; if (t->dev->type == ARPHRD_ETHER) @@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - break; + return; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) -- cgit v1.2.3-70-g09d2 From 8aec4959d832bae0889a8e2f348973b5e4abffef Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Oct 2017 19:27:17 +0800 Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit When receiving a Toobig icmpv6 packet, ip6gre_err would just set tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may still be using the old value, it has no chance to be updated with tunnel dev's mtu. Jianlin found this issue by reducing route's mtu while running netperf, the performance went to 0. ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig to upper socket after setting tunnel dev's mtu. We couldn't do that for ip6_gre, as gre's inner packet could be any protocol, it's difficult to handle them (like lookup upper dst) in a good way. So this patch is to fix it by updating skb_dst(skb)'s pmtu when dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no performance regression can be caused by this. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fb595e8dc15b..59c121b932ac 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -503,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - __be16 protocol = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : proto; + struct dst_entry *dst = skb_dst(skb); + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -518,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, tunnel->o_seqno++; /* Push GRE header. */ + protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + /* TooBig packet may have updated dst->dev's mtu */ + if (dst && dst_mtu(dst) > dst->dev->mtu) + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } -- cgit v1.2.3-70-g09d2 From a50829479f58416a013a4ccca791336af3c584c7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 23 Oct 2017 16:46:00 -0700 Subject: Input: gtco - fix potential out-of-bound access parse_hid_report_descriptor() has a while (i < length) loop, which only guarantees that there's at least 1 byte in the buffer, but the loop body can read multiple bytes which causes out-of-bounds access. Reported-by: Andrey Konovalov Reviewed-by: Andrey Konovalov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/gtco.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index b796e891e2ee..4b8b9d7aa75e 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -230,13 +230,17 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, /* Walk this report and pull out the info we need */ while (i < length) { - prefix = report[i]; - - /* Skip over prefix */ - i++; + prefix = report[i++]; /* Determine data size and save the data in the proper variable */ - size = PREF_SIZE(prefix); + size = (1U << PREF_SIZE(prefix)) >> 1; + if (i + size > length) { + dev_err(ddev, + "Not enough data (need %d, have %d)\n", + i + size, length); + break; + } + switch (size) { case 1: data = report[i]; @@ -244,8 +248,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, case 2: data16 = get_unaligned_le16(&report[i]); break; - case 3: - size = 4; + case 4: data32 = get_unaligned_le32(&report[i]); break; } -- cgit v1.2.3-70-g09d2 From 3e64fcbdbd10e46dede502d507dbcc104837cd59 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Wed, 25 Oct 2017 09:30:16 -0700 Subject: Input: synaptics-rmi4 - limit the range of what GPIOs are buttons By convention the first 6 bits of F30 Ctrl 2 and 3 are used to signify GPIOs which are connected to buttons. Additional GPIOs may be used as input GPIOs to signal the touch controller of some event (ie disable touchpad). These additional GPIOs may meet the criteria of a button in rmi_f30_is_valid_button() but should not be considered buttons. This patch limits the GPIOs which are mapped to buttons to just the first 6. Signed-off-by: Andrew Duggan Reported-by: Daniel Martin Tested-by: Daniel Martin Acked-By: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f30.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c index 34dfee555b20..82e0f0d43d55 100644 --- a/drivers/input/rmi4/rmi_f30.c +++ b/drivers/input/rmi4/rmi_f30.c @@ -232,9 +232,10 @@ static int rmi_f30_map_gpios(struct rmi_function *fn, unsigned int trackstick_button = BTN_LEFT; bool button_mapped = false; int i; + int button_count = min_t(u8, f30->gpioled_count, TRACKSTICK_RANGE_END); f30->gpioled_key_map = devm_kcalloc(&fn->dev, - f30->gpioled_count, + button_count, sizeof(f30->gpioled_key_map[0]), GFP_KERNEL); if (!f30->gpioled_key_map) { @@ -242,7 +243,7 @@ static int rmi_f30_map_gpios(struct rmi_function *fn, return -ENOMEM; } - for (i = 0; i < f30->gpioled_count; i++) { + for (i = 0; i < button_count; i++) { if (!rmi_f30_is_valid_button(i, f30->ctrl)) continue; -- cgit v1.2.3-70-g09d2 From a0cb2b5c390151837b08e5f7bca4a6ecddbcd39c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 27 Oct 2017 20:35:31 -0700 Subject: Fix tracing sample code warning. Commit 6575257c60e1 ("tracing/samples: Fix creation and deletion of simple_thread_fn creation") introduced a new warning due to using a boolean as a counter. Just make it "int". Fixes: 6575257c60e1 ("tracing/samples: Fix creation and deletion of simple_thread_fn creation") Cc: Steven Rostedt Signed-off-by: Linus Torvalds --- samples/trace_events/trace-events-sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 446beb7ac48d..5522692100ba 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -78,7 +78,7 @@ static int simple_thread_fn(void *arg) } static DEFINE_MUTEX(thread_mutex); -static bool simple_thread_cnt; +static int simple_thread_cnt; int foo_bar_reg(void) { -- cgit v1.2.3-70-g09d2 From 63b9ab65bd76e5de6479bb14b4014b64aa1a317a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 27 Oct 2017 11:05:44 +0800 Subject: tuntap: properly align skb->head before building skb An unaligned alloc_frag->offset caused by previous allocation will result an unaligned skb->head. This will lead unaligned skb_shared_info and then unaligned dataref which requires to be aligned for accessing on some architecture. Fix this by aligning alloc_frag->offset before the frag refilling. Fixes: 0bbd7dad34f8 ("tun: make tun_build_skb() thread safe") Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Wei Wei Cc: Dmitry Vyukov Cc: Mark Rutland Reported-by: Wei Wei Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b9973fb868b7..5550f56cb895 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1286,6 +1286,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, buflen += SKB_DATA_ALIGN(len + pad); rcu_read_unlock(); + alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) return ERR_PTR(-ENOMEM); -- cgit v1.2.3-70-g09d2 From ee1836aec4f5a977c1699a311db4d9027ef21ac8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Oct 2017 21:21:40 -0700 Subject: tcp: refresh tp timestamp before tcp_mtu_probe() In the unlikely event tcp_mtu_probe() is sending a packet, we want tp->tcp_mstamp being as accurate as possible. This means we need to call tcp_mstamp_refresh() a bit earlier in tcp_write_xmit(). Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1151870018e3..ae60dd3faed0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2239,6 +2239,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; + tcp_mstamp_refresh(tp); if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); @@ -2250,7 +2251,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); - tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; -- cgit v1.2.3-70-g09d2 From dea6e19f4ef746aa18b4c33d1a7fed54356796ed Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Fri, 27 Oct 2017 00:00:16 -0700 Subject: tap: reference to KVA of an unloaded module causes kernel panic The commit 9a393b5d5988 ("tap: tap as an independent module") created a separate tap module that implements tap functionality and exports interfaces that will be used by macvtap and ipvtap modules to create create respective tap devices. However, that patch introduced a regression wherein the modules macvtap and ipvtap can be removed (through modprobe -r) while there are applications using the respective /dev/tapX devices. These applications cause kernel to hold reference to /dev/tapX through 'struct cdev macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap modules respectively. So, when the application is later closed the kernel panics because we are referencing KVA that is present in the unloaded modules. ----------8<------- Example ----------8<---------- $ sudo ip li add name mv0 link enp7s0 type macvtap $ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}' 14:mv0@enp7s0: $ cat /dev/tap14 & $ lsmod |egrep -i 'tap|vlan' macvtap 16384 0 macvlan 24576 1 macvtap tap 24576 3 macvtap $ sudo modprobe -r macvtap $ fg cat /dev/tap14 ^C <...system panics...> BUG: unable to handle kernel paging request at ffffffffa038c500 IP: cdev_put+0xf/0x30 ----------8<-----------------8<---------- The fix is to set cdev.owner to the module that creates the tap device (either macvtap or ipvtap). With this set, the operations (in fs/char_dev.c) on char device holds and releases the module through cdev_get() and cdev_put() and will not allow the module to unload prematurely. Fixes: 9a393b5d5988ea4e (tap: tap as an independent module) Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvtap.c | 4 ++-- drivers/net/macvtap.c | 4 ++-- drivers/net/tap.c | 5 +++-- include/linux/if_tap.h | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 5dea2063dbc8..0bcc07f346c3 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -197,8 +197,8 @@ static int ipvtap_init(void) { int err; - err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap"); - + err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap", + THIS_MODULE); if (err) goto out1; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index c2d0ea2fb019..cba5cb3b849a 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -204,8 +204,8 @@ static int macvtap_init(void) { int err; - err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); - + err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap", + THIS_MODULE); if (err) goto out1; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 98ee6cc2875d..1b10fcc6a58d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1249,8 +1249,8 @@ static int tap_list_add(dev_t major, const char *device_name) return 0; } -int tap_create_cdev(struct cdev *tap_cdev, - dev_t *tap_major, const char *device_name) +int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, + const char *device_name, struct module *module) { int err; @@ -1259,6 +1259,7 @@ int tap_create_cdev(struct cdev *tap_cdev, goto out1; cdev_init(tap_cdev, &tap_fops); + tap_cdev->owner = module; err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); if (err) goto out2; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 4837157da0dc..9ae41cdd0d4c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap); int tap_get_minor(dev_t major, struct tap_dev *tap); void tap_free_minor(dev_t major, struct tap_dev *tap); int tap_queue_resize(struct tap_dev *tap); -int tap_create_cdev(struct cdev *tap_cdev, - dev_t *tap_major, const char *device_name); +int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, + const char *device_name, struct module *module); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); #endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3-70-g09d2 From ea6789980fdaa610d7eb63602c746bf6ec70cd2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Oct 2017 23:32:27 +0100 Subject: assoc_array: Fix a buggy node-splitting case This fixes CVE-2017-12193. Fix a case in the assoc_array implementation in which a new leaf is added that needs to go into a node that happens to be full, where the existing leaves in that node cluster together at that level to the exclusion of new leaf. What needs to happen is that the existing leaves get moved out to a new node, N1, at level + 1 and the existing node needs replacing with one, N0, that has pointers to the new leaf and to N1. The code that tries to do this gets this wrong in two ways: (1) The pointer that should've pointed from N0 to N1 is set to point recursively to N0 instead. (2) The backpointer from N0 needs to be set correctly in the case N0 is either the root node or reached through a shortcut. Fix this by removing this path and using the split_node path instead, which achieves the same end, but in a more general way (thanks to Eric Biggers for spotting the redundancy). The problem manifests itself as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: assoc_array_apply_edit+0x59/0xe5 Fixes: 3cb989501c26 ("Add a generic associative array implementation.") Reported-and-tested-by: WU Fan Signed-off-by: David Howells Cc: stable@vger.kernel.org [v3.13-rc1+] Signed-off-by: Linus Torvalds --- lib/assoc_array.c | 51 +++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 155c55d8db5f..4e53be8bc590 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -598,21 +598,31 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) goto all_leaves_cluster_together; - /* Otherwise we can just insert a new node ahead of the old - * one. + /* Otherwise all the old leaves cluster in the same slot, but + * the new leaf wants to go into a different slot - so we + * create a new node (n0) to hold the new leaf and a pointer to + * a new node (n1) holding all the old leaves. + * + * This can be done by falling through to the node splitting + * path. */ - goto present_leaves_cluster_but_not_new_leaf; + pr_devel("present leaves cluster but not new leaf\n"); } split_node: pr_devel("split node\n"); - /* We need to split the current node; we know that the node doesn't - * simply contain a full set of leaves that cluster together (it - * contains meta pointers and/or non-clustering leaves). + /* We need to split the current node. The node must contain anything + * from a single leaf (in the one leaf case, this leaf will cluster + * with the new leaf) and the rest meta-pointers, to all leaves, some + * of which may cluster. + * + * It won't contain the case in which all the current leaves plus the + * new leaves want to cluster in the same slot. * * We need to expel at least two leaves out of a set consisting of the - * leaves in the node and the new leaf. + * leaves in the node and the new leaf. The current meta pointers can + * just be copied as they shouldn't cluster with any of the leaves. * * We need a new node (n0) to replace the current one and a new node to * take the expelled nodes (n1). @@ -717,33 +727,6 @@ found_slot_for_multiple_occupancy: pr_devel("<--%s() = ok [split node]\n", __func__); return true; -present_leaves_cluster_but_not_new_leaf: - /* All the old leaves cluster in the same slot, but the new leaf wants - * to go into a different slot, so we create a new node to hold the new - * leaf and a pointer to a new node holding all the old leaves. - */ - pr_devel("present leaves cluster but not new leaf\n"); - - new_n0->back_pointer = node->back_pointer; - new_n0->parent_slot = node->parent_slot; - new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; - new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); - new_n1->parent_slot = edit->segment_cache[0]; - new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; - edit->adjust_count_on = new_n0; - - for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) - new_n1->slots[i] = node->slots[i]; - - new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); - edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; - - edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; - edit->set[0].to = assoc_array_node_to_ptr(new_n0); - edit->excised_meta[0] = assoc_array_node_to_ptr(node); - pr_devel("<--%s() = ok [insert node before]\n", __func__); - return true; - all_leaves_cluster_together: /* All the leaves, new and old, want to cluster together in this node * in the same slot, so we have to replace this node with a shortcut to -- cgit v1.2.3-70-g09d2 From 8108a77515126f6db4374e8593956e20430307c0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:34 -0700 Subject: bpf: bpf_compute_data uses incorrect cb structure SK_SKB program types use bpf_compute_data to store the end of the packet data. However, bpf_compute_data assumes the cb is stored in the qdisc layer format. But, for SK_SKB this is the wrong layer of the stack for this type. It happens to work (sort of!) because in most cases nothing happens to be overwritten today. This is very fragile and error prone. Fortunately, we have another hole in tcp_skb_cb we can use so lets put the data_end value there. Note, SK_SKB program types do not use data_meta, they are failed by sk_skb_is_valid_access(). Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + kernel/bpf/sockmap.c | 12 ++++++++++-- net/core/filter.c | 27 ++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index b1ef98ebce53..33599d17522d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -844,6 +844,7 @@ struct tcp_skb_cb { __u32 key; __u32 flags; struct bpf_map *map; + void *data_end; } bpf; }; }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 2b6eb35ae5d3..6778fb773934 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -93,6 +93,14 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +/* compute the linear packet data range [data, data_end) for skb when + * sk_skb type programs are in use. + */ +static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); +} + static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); @@ -108,7 +116,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) */ TCP_SKB_CB(skb)->bpf.map = NULL; skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_end_sk_skb(skb); preempt_disable(); rc = (*prog->bpf_func)(skb, prog->insnsi); preempt_enable(); @@ -368,7 +376,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_end_sk_skb(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); diff --git a/net/core/filter.c b/net/core/filter.c index aa0265997f93..68eaa2f81a8e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4243,6 +4243,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct __sk_buff, data_end): + off = si->off; + off -= offsetof(struct __sk_buff, data_end); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct tcp_skb_cb, bpf.data_end); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + default: + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); + } + + return insn - insn_buf; +} + const struct bpf_verifier_ops sk_filter_prog_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -4301,7 +4326,7 @@ const struct bpf_verifier_ops sock_ops_prog_ops = { const struct bpf_verifier_ops sk_skb_prog_ops = { .get_func_proto = sk_skb_func_proto, .is_valid_access = sk_skb_is_valid_access, - .convert_ctx_access = bpf_convert_ctx_access, + .convert_ctx_access = sk_skb_convert_ctx_access, .gen_prologue = sk_skb_prologue, }; -- cgit v1.2.3-70-g09d2 From bfa640757e9378c2f26867e723f1287e94f5a7ad Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 27 Oct 2017 09:45:53 -0700 Subject: bpf: rename sk_actions to align with bpf infrastructure Recent additions to support multiple programs in cgroups impose a strict requirement, "all yes is yes, any no is no". To enforce this the infrastructure requires the 'no' return code, SK_DROP in this case, to be 0. To apply these rules to SK_SKB program types the sk_actions return codes need to be adjusted. This fix adds SK_PASS and makes 'SK_DROP = 0'. Finally, remove SK_ABORTED to remove any chance that the API may allow aborted program flows to be passed up the stack. This would be incorrect behavior and allow programs to break existing policies. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 6 +++--- kernel/bpf/sockmap.c | 3 ++- net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f90860d1f897..0d7948ce2128 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -575,7 +575,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -786,8 +786,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6778fb773934..66f00a2b27f4 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -122,7 +122,8 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) preempt_enable(); skb->sk = NULL; - return rc; + return rc == SK_PASS ? + (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; } static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) diff --git a/net/core/filter.c b/net/core/filter.c index 68eaa2f81a8e..6ae94f825f72 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1844,14 +1844,15 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, { struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + /* If user passes invalid input drop the packet. */ if (unlikely(flags)) - return SK_ABORTED; + return SK_DROP; tcb->bpf.key = key; tcb->bpf.flags = flags; tcb->bpf.map = map; - return SK_REDIRECT; + return SK_PASS; } struct sock *do_sk_redirect_map(struct sk_buff *skb) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 24b35a1fd4d6..c174971afbe6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -787,8 +787,8 @@ struct xdp_md { }; enum sk_action { - SK_ABORTED = 0, - SK_DROP, + SK_DROP = 0, + SK_PASS, SK_REDIRECT, }; -- cgit v1.2.3-70-g09d2 From d04adf1b355181e737b6b1e23d801b07f0b7c4c0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 02:13:29 +0800 Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock Now when migrating sock to another one in sctp_sock_migrate(), it only resets owner sk for the data in receive queues, not the chunks on out queues. It would cause that data chunks length on the sock is not consistent with sk sk_wmem_alloc. When closing the sock or freeing these chunks, the old sk would never be freed, and the new sock may crash due to the overflow sk_wmem_alloc. syzbot found this issue with this series: r0 = socket$inet_sctp() sendto$inet(r0) listen(r0) accept4(r0) close(r0) Although listen() should have returned error when one TCP-style socket is in connecting (I may fix this one in another patch), it could also be reproduced by peeling off an assoc. This issue is there since very beginning. This patch is to reset owner sk for the chunks on out queues so that sk sk_wmem_alloc has correct value after accept one sock or peeloff an assoc to one sock. Note that when resetting owner sk for chunks on outqueue, it has to sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk first and then sctp_set_owner_w them after changing assoc->base.sk, due to that sctp_wfree and it's callees are using assoc->base.sk. Reported-by: Dmitry Vyukov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 17841ab30798..6f45d1713452 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -170,6 +170,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sk_mem_charge(sk, chunk->skb->truesize); } +static void sctp_clear_owner_w(struct sctp_chunk *chunk) +{ + skb_orphan(chunk->skb); +} + +static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + void (*cb)(struct sctp_chunk *)) + +{ + struct sctp_outq *q = &asoc->outqueue; + struct sctp_transport *t; + struct sctp_chunk *chunk; + + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) + list_for_each_entry(chunk, &t->transmitted, transmitted_list) + cb(chunk); + + list_for_each_entry(chunk, &q->retransmit, list) + cb(chunk); + + list_for_each_entry(chunk, &q->sacked, list) + cb(chunk); + + list_for_each_entry(chunk, &q->abandoned, list) + cb(chunk); + + list_for_each_entry(chunk, &q->out_chunk_list, list) + cb(chunk); +} + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@ -8212,7 +8242,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); + sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); + sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. -- cgit v1.2.3-70-g09d2 From 50317fce2cc70a2bbbc4b42c31bbad510382a53c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 27 Oct 2017 22:08:56 -0700 Subject: net_sched: avoid matching qdisc with zero handle Davide found the following script triggers a NULL pointer dereference: ip l a name eth0 type dummy tc q a dev eth0 parent :1 handle 1: htb This is because for a freshly created netdevice noop_qdisc is attached and when passing 'parent :1', kernel actually tries to match the major handle which is 0 and noop_qdisc has handle 0 so is matched by mistake. Commit 69012ae425d7 tries to fix a similar bug but still misses this case. Handle 0 is not a valid one, should be just skipped. In fact, kernel uses it as TC_H_UNSPEC. Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash") Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable") Reported-by: Davide Caratti Cc: Jiri Kosina Cc: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c6deb74e3d2f..22bc6fc48311 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -301,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; + if (!handle) + return NULL; q = qdisc_match_from_root(dev->qdisc, handle); if (q) goto out; -- cgit v1.2.3-70-g09d2 From 1da4fc97cbf89514e417a3df46eaec864a9b8a48 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:54 +0800 Subject: sctp: fix some type cast warnings introduced by stream reconf These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian when coding stream reconf patches. Since commit c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") enabled stream reconf feature for users, the Fixes tag below would use it. Fixes: c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 32 ++++++++++++++++---------------- include/net/sctp/sm.h | 2 +- include/net/sctp/ulpevent.h | 2 +- net/sctp/sm_make_chunk.c | 5 +++-- net/sctp/stream.c | 26 +++++++++++++++++--------- net/sctp/ulpevent.c | 2 +- 6 files changed, 39 insertions(+), 30 deletions(-) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 82b171e1aa0b..09d7412e9cb0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -716,28 +716,28 @@ struct sctp_reconf_chunk { struct sctp_strreset_outreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u32 response_seq; - __u32 send_reset_at_tsn; - __u16 list_of_streams[0]; + __be32 request_seq; + __be32 response_seq; + __be32 send_reset_at_tsn; + __be16 list_of_streams[0]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 list_of_streams[0]; + __be32 request_seq; + __be16 list_of_streams[0]; }; struct sctp_strreset_tsnreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; + __be32 request_seq; }; struct sctp_strreset_addstrm { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 number_of_streams; - __u16 reserved; + __be32 request_seq; + __be16 number_of_streams; + __be16 reserved; }; enum { @@ -752,16 +752,16 @@ enum { struct sctp_strreset_resp { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; + __be32 response_seq; + __be32 result; }; struct sctp_strreset_resptsn { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; - __u32 senders_next_tsn; - __u32 receivers_next_tsn; + __be32 response_seq; + __be32 result; + __be32 senders_next_tsn; + __be32 receivers_next_tsn; }; #endif /* __LINUX_SCTP_H__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2db3d3a9ce1d..88233cf8b8d4 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -261,7 +261,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, struct sctp_fwdtsn_skip *skiplist); struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc); struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, + __u16 stream_num, __be16 *stream_list, bool out, bool in); struct sctp_chunk *sctp_make_strreset_tsnreq( const struct sctp_association *asoc); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index b8c86ec1a8f5..231dc42f1da6 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -130,7 +130,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, - __u16 stream_num, __u16 *stream_list, gfp_t gfp); + __u16 stream_num, __be16 *stream_list, gfp_t gfp); struct sctp_ulpevent *sctp_ulpevent_make_assoc_reset_event( const struct sctp_association *asoc, __u16 flags, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ca8f196b6c6c..57c55045f5a7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3591,7 +3591,7 @@ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc, */ struct sctp_chunk *sctp_make_strreset_req( const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, + __u16 stream_num, __be16 *stream_list, bool out, bool in) { struct sctp_strreset_outreq outreq; @@ -3788,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc, { struct sctp_reconf_chunk *hdr; union sctp_params param; - __u16 last = 0, cnt = 0; + __be16 last = 0; + __u16 cnt = 0; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; sctp_walk_params(param, hdr, params) { diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 63ea15503714..fa8371ff05c4 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -118,6 +118,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc, __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; + __be16 *nstr_list; bool out, in; if (!asoc->peer.reconf_capable || @@ -148,13 +149,18 @@ int sctp_send_reset_streams(struct sctp_association *asoc, if (str_list[i] >= stream->incnt) goto out; + nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL); + if (!nstr_list) { + retval = -ENOMEM; + goto out; + } + for (i = 0; i < str_nums; i++) - str_list[i] = htons(str_list[i]); + nstr_list[i] = htons(str_list[i]); - chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); - for (i = 0; i < str_nums; i++) - str_list[i] = ntohs(str_list[i]); + kfree(nstr_list); if (!chunk) { retval = -ENOMEM; @@ -305,7 +311,7 @@ out: } static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( - struct sctp_association *asoc, __u32 resp_seq, + struct sctp_association *asoc, __be32 resp_seq, __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; @@ -345,8 +351,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( { struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; - __u16 i, nums, flags = 0, *str_p = NULL; __u32 result = SCTP_STRRESET_DENIED; + __u16 i, nums, flags = 0; + __be16 *str_p = NULL; __u32 request_seq; request_seq = ntohl(outreq->request_seq); @@ -439,8 +446,9 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; - __u16 i, nums, *str_p; __u32 request_seq; + __u16 i, nums; + __be16 *str_p; request_seq = ntohl(inreq->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -769,7 +777,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { struct sctp_strreset_outreq *outreq; - __u16 *str_p; + __be16 *str_p; outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; @@ -794,7 +802,7 @@ struct sctp_chunk *sctp_process_strreset_resp( nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { struct sctp_strreset_inreq *inreq; - __u16 *str_p; + __be16 *str_p; /* if the result is performed, it's impossible for inreq */ if (result == SCTP_STRRESET_PERFORMED) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 67abc0194f30..5447228bf1a0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, __u16 stream_num, - __u16 *stream_list, gfp_t gfp) + __be16 *stream_list, gfp_t gfp) { struct sctp_stream_reset_event *sreset; struct sctp_ulpevent *event; -- cgit v1.2.3-70-g09d2 From 8d32503efde82db4e0a370981e90628ebd6718b5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:55 +0800 Subject: sctp: fix some type cast warnings introduced by transport rhashtable These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian for the port when coding transport rhashtable patches. Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 34f10e75f3b9..621b5ca3fd1c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -794,7 +794,7 @@ hit: struct sctp_hash_cmp_arg { const union sctp_addr *paddr; const struct net *net; - u16 lport; + __be16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, @@ -820,37 +820,37 @@ out: return err; } -static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; const struct net *net = sock_net(t->asoc->base.sk); - u16 lport = htons(t->asoc->base.bind_addr.port); - u32 addr; + __be16 lport = htons(t->asoc->base.bind_addr.port); + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } -static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; const union sctp_addr *paddr = x->paddr; const struct net *net = x->net; - u16 lport = x->lport; - u32 addr; + __be16 lport = x->lport; + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } -- cgit v1.2.3-70-g09d2 From f6fc6bc0b8e0bb13a210bd7386ffdcb1a5f30ef1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:56 +0800 Subject: sctp: fix a type cast warnings that causes a_rwnd gets the wrong value These warnings were found by running 'make C=2 M=net/sctp/'. Commit d4d6fb5787a6 ("sctp: Try not to change a_rwnd when faking a SACK from SHUTDOWN.") expected to use the peers old rwnd and add our flight size to the a_rwnd. But with the wrong Endian, it may not work as well as expected. So fix it by converting to the right value. Fixes: d4d6fb5787a6 ("sctp: Try not to change a_rwnd when faking a SACK from SHUTDOWN.") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e6a2974e020e..8f2762bba879 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1680,8 +1680,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ sackh.cum_tsn_ack = cmd->obj.be32; - sackh.a_rwnd = asoc->peer.rwnd + - asoc->outqueue.outstanding_bytes; + sackh.a_rwnd = htonl(asoc->peer.rwnd + + asoc->outqueue.outstanding_bytes); sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; chunk->subh.sack_hdr = &sackh; -- cgit v1.2.3-70-g09d2 From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:57 +0800 Subject: sctp: fix some type cast warnings introduced since very beginning These warnings were found by running 'make C=2 M=net/sctp/'. They are there since very beginning. Note after this patch, there still one warning left in sctp_outq_flush(): sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM) Since it has been moved to sctp_stream_outq_migrate on net-next, to avoid the extra job when merging net-next to net, I will post the fix for it after the merging is done. Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- include/uapi/linux/sctp.h | 2 +- net/sctp/ipv6.c | 2 +- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_sideeffect.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 09d7412e9cb0..da803dfc7a39 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -231,7 +231,7 @@ struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; - __be32 ppid; + __u32 ppid; __u8 payload[0]; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6217ff8500a1..84fc2914b7fb 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -376,7 +376,7 @@ struct sctp_remote_error { __u16 sre_type; __u16 sre_flags; __u32 sre_length; - __u16 sre_error; + __be16 sre_error; sctp_assoc_t sre_assoc_id; __u8 sre_data[0]; }; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7fe9e1d1b7ec..a6dfa86c0201 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb) /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { - return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20); + return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 57c55045f5a7..514465b03829 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); @@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = SCTP_PARAM_DEL_IP; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8f2762bba879..e2d9a4b49c9c 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1607,12 +1607,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.err); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.err); + subtype, chunk, cmd->obj.u32); break; case SCTP_CMD_INIT_COUNTER_INC: -- cgit v1.2.3-70-g09d2 From 7aa0045dadb6ef37485ea9f2a7d28278ca588b51 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:28 -0700 Subject: net_sched: introduce a workqueue for RCU callbacks of tc filter This patch introduces a dedicated workqueue for tc filters so that each tc filter's RCU callback could defer their action destroy work to this workqueue. The helper tcf_queue_work() is introduced for them to use. Because we hold RTNL lock when calling tcf_block_put(), we can not simply flush works inside it, therefore we have to defer it again to this workqueue and make sure all flying RCU callbacks have already queued their work before this one, in other words, to ensure this is the last one to execute to prevent any use-after-free. On the other hand, this makes tcf_block_put() ugly and harder to understand. Since David and Eric strongly dislike adding synchronize_rcu(), this is probably the only solution that could make everyone happy. Please also see the code comments below. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 3 +++ include/net/sch_generic.h | 2 ++ net/sched/cls_api.c | 68 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e80edd8879ef..3009547f3c66 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -17,6 +18,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +bool tcf_queue_work(struct work_struct *work); + #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..0dec8a23be57 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,7 @@ struct tcf_chain { struct tcf_block { struct list_head chain_list; + struct work_struct work; }; static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b2219adf520..045d13679ad6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -77,6 +77,8 @@ out: } EXPORT_SYMBOL(register_tcf_proto_ops); +static struct workqueue_struct *tc_filter_wq; + int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; @@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) * tcf_proto_ops's destroy() handler. */ rcu_barrier(); + flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { @@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); +bool tcf_queue_work(struct work_struct *work) +{ + return queue_work(tc_filter_wq, work); +} +EXPORT_SYMBOL(tcf_queue_work); + /* Select new prio value from the range, managed by kernel. */ static inline u32 tcf_auto_prio(struct tcf_proto *tp) @@ -266,23 +275,30 @@ err_chain_create: } EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +static void tcf_block_put_final(struct work_struct *work) { + struct tcf_block *block = container_of(work, struct tcf_block, work); struct tcf_chain *chain, *tmp; - if (!block) - return; - - /* XXX: Standalone actions are not allowed to jump to any chain, and - * bound actions should be all removed after flushing. However, - * filters are destroyed in RCU callbacks, we have to hold the chains - * first, otherwise we would always race with RCU callbacks on this list - * without proper locking. - */ + /* At this point, all the chains should have refcnt == 1. */ + rtnl_lock(); + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_put(chain); + rtnl_unlock(); + kfree(block); +} - /* Wait for existing RCU callbacks to cool down. */ - rcu_barrier(); +/* XXX: Standalone actions are not allowed to jump to any chain, and bound + * actions should be all removed after flushing. However, filters are destroyed + * in RCU callbacks, we have to hold the chains first, otherwise we would + * always race with RCU callbacks on this list without proper locking. + */ +static void tcf_block_put_deferred(struct work_struct *work) +{ + struct tcf_block *block = container_of(work, struct tcf_block, work); + struct tcf_chain *chain; + rtnl_lock(); /* Hold a refcnt for all chains, except 0, in case they are gone. */ list_for_each_entry(chain, &block->chain_list, list) if (chain->index) @@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block) list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); - /* Wait for RCU callbacks to release the reference count. */ + INIT_WORK(&block->work, tcf_block_put_final); + /* Wait for RCU callbacks to release the reference count and make + * sure their works have been queued before this. + */ rcu_barrier(); + tcf_queue_work(&block->work); + rtnl_unlock(); +} - /* At this point, all the chains should have refcnt == 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) - tcf_chain_put(chain); - kfree(block); +void tcf_block_put(struct tcf_block *block) +{ + if (!block) + return; + + INIT_WORK(&block->work, tcf_block_put_deferred); + /* Wait for existing RCU callbacks to cool down, make sure their works + * have been queued before this. We can not flush pending works here + * because we are holding the RTNL lock. + */ + rcu_barrier(); + tcf_queue_work(&block->work); } EXPORT_SYMBOL(tcf_block_put); @@ -1030,6 +1060,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev); static int __init tc_filter_init(void) { + tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); + if (!tc_filter_wq) + return -ENOMEM; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, -- cgit v1.2.3-70-g09d2 From c96a48385d53089ee9977dd0bce82a9493984484 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:29 -0700 Subject: net_sched: use tcf_queue_work() in basic filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d89ebafd2239..f177649a2419 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -34,7 +34,10 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp) return 0; } -static void basic_delete_filter(struct rcu_head *head) +static void basic_delete_filter_work(struct work_struct *work) { - struct basic_filter *f = container_of(head, struct basic_filter, rcu); + struct basic_filter *f = container_of(work, struct basic_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + rtnl_unlock(); + kfree(f); } +static void basic_delete_filter(struct rcu_head *head) +{ + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + + INIT_WORK(&f->work, basic_delete_filter_work); + tcf_queue_work(&f->work); +} + static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); -- cgit v1.2.3-70-g09d2 From e910af676b565ecc16bcd6c896ecb68157396ecc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:30 -0700 Subject: net_sched: use tcf_queue_work() in bpf filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 520c5027646a..037a3ae86829 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -49,7 +49,10 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog); } +static void cls_bpf_delete_prog_work(struct work_struct *work) +{ + struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); + + rtnl_lock(); + __cls_bpf_delete_prog(prog); + rtnl_unlock(); +} + static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + INIT_WORK(&prog->work, cls_bpf_delete_prog_work); + tcf_queue_work(&prog->work); } static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) -- cgit v1.2.3-70-g09d2 From b1b5b04fdb6da262aef37ef83b9f2e41326720ef Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:31 -0700 Subject: net_sched: use tcf_queue_work() in cgroup filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d48452f87975..a97e069bee89 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,7 +23,10 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void cls_cgroup_destroy_work(struct work_struct *work) +{ + struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head, + work); + rtnl_lock(); + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + kfree(head); + rtnl_unlock(); +} + static void cls_cgroup_destroy_rcu(struct rcu_head *root) { struct cls_cgroup_head *head = container_of(root, struct cls_cgroup_head, rcu); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + INIT_WORK(&head->work, cls_cgroup_destroy_work); + tcf_queue_work(&head->work); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, -- cgit v1.2.3-70-g09d2 From 94cdb47566b799649e996e1fb9de2a503dada763 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:32 -0700 Subject: net_sched: use tcf_queue_work() in flow filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2a3a60ec5b86..67f3a2af6aab 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,7 +57,10 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline u32 addr_fold(void *addr) @@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter(struct rcu_head *head) +static void flow_destroy_filter_work(struct work_struct *work) { - struct flow_filter *f = container_of(head, struct flow_filter, rcu); + struct flow_filter *f = container_of(work, struct flow_filter, work); + rtnl_lock(); del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); kfree(f); + rtnl_unlock(); +} + +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + INIT_WORK(&f->work, flow_destroy_filter_work); + tcf_queue_work(&f->work); } static int flow_change(struct net *net, struct sk_buff *in_skb, -- cgit v1.2.3-70-g09d2 From 0552c8afa077889b4704ef5ee88b03063ad45023 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:33 -0700 Subject: net_sched: use tcf_queue_work() in flower filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b480d7c792ba..5b5722c8b32c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -87,7 +87,10 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct net_device *hw_dev; }; @@ -215,12 +218,22 @@ static int fl_init(struct tcf_proto *tp) return 0; } -static void fl_destroy_filter(struct rcu_head *head) +static void fl_destroy_filter_work(struct work_struct *work) { - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + INIT_WORK(&f->work, fl_destroy_filter_work); + tcf_queue_work(&f->work); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) -- cgit v1.2.3-70-g09d2 From e071dff2a6beeccb6f9744f9a0251ab773ca2ab8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:34 -0700 Subject: net_sched: use tcf_queue_work() in fw filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 941245ad07fd..99183b8621ec 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -46,7 +46,10 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static u32 fw_hash(u32 handle) @@ -119,12 +122,22 @@ static int fw_init(struct tcf_proto *tp) return 0; } -static void fw_delete_filter(struct rcu_head *head) +static void fw_delete_filter_work(struct work_struct *work) { - struct fw_filter *f = container_of(head, struct fw_filter, rcu); + struct fw_filter *f = container_of(work, struct fw_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void fw_delete_filter(struct rcu_head *head) +{ + struct fw_filter *f = container_of(head, struct fw_filter, rcu); + + INIT_WORK(&f->work, fw_delete_filter_work); + tcf_queue_work(&f->work); } static void fw_destroy(struct tcf_proto *tp) -- cgit v1.2.3-70-g09d2 From df2735ee8e6ca202a8630f237b59401a25193be1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:35 -0700 Subject: net_sched: use tcf_queue_work() in matchall filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index eeac606c95ab..c33f711b9019 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,7 +21,10 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -41,13 +44,23 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void mall_destroy_work(struct work_struct *work) +{ + struct cls_mall_head *head = container_of(work, struct cls_mall_head, + work); + rtnl_lock(); + tcf_exts_destroy(&head->exts); + kfree(head); + rtnl_unlock(); +} + static void mall_destroy_rcu(struct rcu_head *rcu) { struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, rcu); - tcf_exts_destroy(&head->exts); - kfree(head); + INIT_WORK(&head->work, mall_destroy_work); + tcf_queue_work(&head->work); } static int mall_replace_hw_filter(struct tcf_proto *tp, -- cgit v1.2.3-70-g09d2 From c0d378ef1266546a39f2df00a56ff1f74166a2b7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:36 -0700 Subject: net_sched: use tcf_queue_work() in u32 filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10b8d851fc6b..dadd1b344497 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -68,7 +68,10 @@ struct tc_u_knode { u32 __percpu *pcpu_success; #endif struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -418,11 +421,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ +static void u32_delete_key_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, false); + rtnl_unlock(); +} + static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, false); + INIT_WORK(&key->work, u32_delete_key_work); + tcf_queue_work(&key->work); } /* u32_delete_key_freepf_rcu is the rcu callback variant @@ -432,11 +445,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) * for the variant that should be used with keys return from * u32_init_knode() */ +static void u32_delete_key_freepf_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, true); + rtnl_unlock(); +} + static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, true); + INIT_WORK(&key->work, u32_delete_key_freepf_work); + tcf_queue_work(&key->work); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) -- cgit v1.2.3-70-g09d2 From c2f3f31d402be4849b06282c3a5278f2865c9fcc Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:37 -0700 Subject: net_sched: use tcf_queue_work() in route filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 9ddde65915d2..4b14ccd8b8f2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,7 +57,10 @@ struct route4_filter { u32 handle; struct route4_bucket *bkt; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -254,12 +257,22 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter_work(struct work_struct *work) { - struct route4_filter *f = container_of(head, struct route4_filter, rcu); + struct route4_filter *f = container_of(work, struct route4_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void route4_delete_filter(struct rcu_head *head) +{ + struct route4_filter *f = container_of(head, struct route4_filter, rcu); + + INIT_WORK(&f->work, route4_delete_filter_work); + tcf_queue_work(&f->work); } static void route4_destroy(struct tcf_proto *tp) -- cgit v1.2.3-70-g09d2 From d4f84a41dc615c166555cd332b0235bf6b9bcb4a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:38 -0700 Subject: net_sched: use tcf_queue_work() in rsvp filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index b1f6ed48bc72..bdbc541787f8 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,7 +97,10 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -282,12 +285,22 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static void rsvp_delete_filter_rcu(struct rcu_head *head) +static void rsvp_delete_filter_work(struct work_struct *work) { - struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); + rtnl_lock(); tcf_exts_destroy(&f->exts); kfree(f); + rtnl_unlock(); +} + +static void rsvp_delete_filter_rcu(struct rcu_head *head) +{ + struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + + INIT_WORK(&f->work, rsvp_delete_filter_work); + tcf_queue_work(&f->work); } static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -- cgit v1.2.3-70-g09d2 From 27ce4f05e2abbe2d3ec7434e456619a5178cd3bd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:39 -0700 Subject: net_sched: use tcf_queue_work() in tcindex filter Defer the tcf_exts_destroy() in RCU callback to tc filter workqueue and get RTNL lock. Reported-by: Chris Mi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 14a7e08b2fa9..beaa95e09c25 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -27,14 +27,20 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; @@ -133,12 +139,34 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void tcindex_destroy_rexts_work(struct work_struct *work) +{ + struct tcindex_filter_result *r; + + r = container_of(work, struct tcindex_filter_result, work); + rtnl_lock(); + tcf_exts_destroy(&r->exts); + rtnl_unlock(); +} + static void tcindex_destroy_rexts(struct rcu_head *head) { struct tcindex_filter_result *r; r = container_of(head, struct tcindex_filter_result, rcu); - tcf_exts_destroy(&r->exts); + INIT_WORK(&r->work, tcindex_destroy_rexts_work); + tcf_queue_work(&r->work); +} + +static void tcindex_destroy_fexts_work(struct work_struct *work) +{ + struct tcindex_filter *f = container_of(work, struct tcindex_filter, + work); + + rtnl_lock(); + tcf_exts_destroy(&f->result.exts); + kfree(f); + rtnl_unlock(); } static void tcindex_destroy_fexts(struct rcu_head *head) @@ -146,8 +174,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head) struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); - tcf_exts_destroy(&f->result.exts); - kfree(f); + INIT_WORK(&f->work, tcindex_destroy_fexts_work); + tcf_queue_work(&f->work); } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) -- cgit v1.2.3-70-g09d2 From 2d132eba1d972ea6c0e47286e4c821b4a3c5b84d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:40 -0700 Subject: net_sched: add rtnl assertion to tcf_exts_destroy() After previous patches, it is now safe to claim that tcf_exts_destroy() is always called with RTNL lock. Cc: Daniel Borkmann Cc: Jiri Pirko Cc: John Fastabend Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 045d13679ad6..231181c602ed 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -909,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); + ASSERT_RTNL(); tcf_exts_to_list(exts, &actions); tcf_action_destroy(&actions, TCA_ACT_UNBIND); kfree(exts->actions); -- cgit v1.2.3-70-g09d2 From 46e235c15ca44f34cb79f4dbec909b8c51999dc1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Oct 2017 18:24:41 -0700 Subject: net_sched: fix call_rcu() race on act_sample module removal Similar to commit c78e1746d3ad ("net: sched: fix call_rcu() race on classifier module unloads"), we need to wait for flying RCU callback tcf_sample_cleanup_rcu(). Cc: Yotam Gigi Cc: Daniel Borkmann Cc: Jiri Pirko Cc: Jamal Hadi Salim Cc: "Paul E. McKenney" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_sample.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index ec986ae52808..a9f9a2ccc664 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -264,6 +264,7 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { + rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } -- cgit v1.2.3-70-g09d2 From 7f071998474a9e5f7b98103d3058a1b8ca5887e6 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:42 -0700 Subject: selftests: Introduce a new script to generate tc batch file # ./tdc_batch.py -h usage: tdc_batch.py [-h] [-n NUMBER] [-o] [-s] [-p] device file TC batch file generator positional arguments: device device name file batch file name optional arguments: -h, --help show this help message and exit -n NUMBER, --number NUMBER how many lines in batch file -o, --skip_sw skip_sw (offload), by default skip_hw -s, --share_action all filters share the same action -p, --prio all filters have different prio Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc_batch.py | 62 +++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/tdc_batch.py diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py new file mode 100755 index 000000000000..707c6bfef689 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -0,0 +1,62 @@ +#!/usr/bin/python3 + +""" +tdc_batch.py - a script to generate TC batch file + +Copyright (C) 2017 Chris Mi +""" + +import argparse + +parser = argparse.ArgumentParser(description='TC batch file generator') +parser.add_argument("device", help="device name") +parser.add_argument("file", help="batch file name") +parser.add_argument("-n", "--number", type=int, + help="how many lines in batch file") +parser.add_argument("-o", "--skip_sw", + help="skip_sw (offload), by default skip_hw", + action="store_true") +parser.add_argument("-s", "--share_action", + help="all filters share the same action", + action="store_true") +parser.add_argument("-p", "--prio", + help="all filters have different prio", + action="store_true") +args = parser.parse_args() + +device = args.device +file = open(args.file, 'w') + +number = 1 +if args.number: + number = args.number + +skip = "skip_hw" +if args.skip_sw: + skip = "skip_sw" + +share_action = "" +if args.share_action: + share_action = "index 1" + +prio = "prio 1" +if args.prio: + prio = "" + if number > 0x4000: + number = 0x4000 + +index = 0 +for i in range(0x100): + for j in range(0x100): + for k in range(0x100): + mac = ("%02x:%02x:%02x" % (i, j, k)) + src_mac = "e4:11:00:" + mac + dst_mac = "e4:12:00:" + mac + cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s " + "src_mac %s dst_mac %s action drop %s" % + (device, prio, skip, src_mac, dst_mac, share_action)) + file.write("%s\n" % cmd) + index += 1 + if index >= number: + file.close() + exit(0) -- cgit v1.2.3-70-g09d2 From 31c2611b66e01378b54f7ef641cb0d23fcd8502f Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 26 Oct 2017 18:24:43 -0700 Subject: selftests: Introduce a new test case to tc testsuite In this patchset, we fixed a tc bug. This patch adds the test case that reproduces the bug. To run this test case, user should specify an existing NIC device: # sudo ./tdc.py -d enp4s0f0 This test case belongs to category "flower". If user doesn't specify a NIC device, the test cases belong to "flower" will not be run. In this test case, we create 1M filters and all filters share the same action. When destroying all filters, kernel should not panic. It takes about 18s to run it. Acked-by: Jamal Hadi Salim Acked-by: Lucas Bates Signed-off-by: Chris Mi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/filters/tests.json | 23 +++++++++++++++++++++- tools/testing/selftests/tc-testing/tdc.py | 20 +++++++++++++++---- tools/testing/selftests/tc-testing/tdc_config.py | 2 ++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index c727b96a59b0..5fa02d86b35f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -17,5 +17,26 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "d052", + "name": "Add 1M filters with the same action", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" + ], + "cmdUnderTest": "$TC -b $BATCH_FILE", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm $BATCH_FILE" + ] } -] \ No newline at end of file +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index cd61b7844c0d..5f11f5d7456e 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -88,7 +88,7 @@ def prepare_env(cmdlist): exit(1) -def test_runner(filtered_tests): +def test_runner(filtered_tests, args): """ Driver function for the unit tests. @@ -105,6 +105,8 @@ def test_runner(filtered_tests): for tidx in testlist: result = True tresult = "" + if "flower" in tidx["category"] and args.device == None: + continue print("Test " + tidx["id"] + ": " + tidx["name"]) prepare_env(tidx["setup"]) (p, procout) = exec_cmd(tidx["cmdUnderTest"]) @@ -152,6 +154,10 @@ def ns_create(): exec_cmd(cmd, False) cmd = 'ip -s $NS link set $DEV1 up' exec_cmd(cmd, False) + cmd = 'ip link set $DEV2 netns $NS' + exec_cmd(cmd, False) + cmd = 'ip -s $NS link set $DEV2 up' + exec_cmd(cmd, False) def ns_destroy(): @@ -211,7 +217,8 @@ def set_args(parser): help='Execute the single test case with specified ID') parser.add_argument('-i', '--id', action='store_true', dest='gen_id', help='Generate ID numbers for new test cases') - return parser + parser.add_argument('-d', '--device', + help='Execute the test case in flower category') return parser @@ -225,6 +232,8 @@ def check_default_settings(args): if args.path != None: NAMES['TC'] = args.path + if args.device != None: + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -381,14 +390,17 @@ def set_operation_mode(args): if (len(alltests) == 0): print("Cannot find a test case with ID matching " + target_id) exit(1) - catresults = test_runner(alltests) + catresults = test_runner(alltests, args) print("All test results: " + "\n\n" + catresults) elif (len(target_category) > 0): + if (target_category == "flower") and args.device == None: + print("Please specify a NIC device (-d) to run category flower") + exit(1) if (target_category not in ucat): print("Specified category is not present in this file.") exit(1) else: - catresults = test_runner(testcases[target_category]) + catresults = test_runner(testcases[target_category], args) print("Category " + target_category + "\n\n" + catresults) ns_destroy() diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index 01087375a7c3..b6352515c1b5 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -12,6 +12,8 @@ NAMES = { # Name of veth devices to be created for the namespace 'DEV0': 'v0p0', 'DEV1': 'v0p1', + 'DEV2': '', + 'BATCH_FILE': './batch.txt', # Name of the namespace to use 'NS': 'tcut' } -- cgit v1.2.3-70-g09d2 From 0b07194bb55ed836c2cc7c22e866b87a14681984 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Oct 2017 13:58:38 -0700 Subject: Linux 4.14-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2900c54f34ce..5f91a28a3cea 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3-70-g09d2