diff options
276 files changed, 3697 insertions, 1527 deletions
@@ -70,6 +70,8 @@ Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com> Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com> Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com> Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com> +Ben Dooks <ben-linux@fluff.org> <ben.dooks@simtec.co.uk> +Ben Dooks <ben-linux@fluff.org> <ben.dooks@sifive.com> Ben Gardner <bgardner@wabtec.com> Ben M Cahill <ben.m.cahill@intel.com> Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net> @@ -233,6 +235,7 @@ Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com> Johan Hovold <johan@kernel.org> <jhovold@gmail.com> Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com> John Crispin <john@phrozen.org> <blogic@openwrt.org> +John Keeping <john@keeping.me.uk> <john@metanate.com> John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> John Stultz <johnstul@us.ibm.com> <jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com> diff --git a/Documentation/devicetree/bindings/ata/ahci-common.yaml b/Documentation/devicetree/bindings/ata/ahci-common.yaml index 7fdf40954a4c..38770c4c85fd 100644 --- a/Documentation/devicetree/bindings/ata/ahci-common.yaml +++ b/Documentation/devicetree/bindings/ata/ahci-common.yaml @@ -8,7 +8,7 @@ title: Common Properties for Serial ATA AHCI controllers maintainers: - Hans de Goede <hdegoede@redhat.com> - - Damien Le Moal <damien.lemoal@opensource.wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: This document defines device tree properties for a common AHCI SATA diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml index 998e5cce652f..380cb6d80025 100644 --- a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml +++ b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 Clock maintainers: - - Damien Le Moal <damien.lemoal@wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: | Canaan Kendryte K210 SoC clocks driver bindings. The clock diff --git a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml index 62f3ca66274f..32c821f97779 100644 --- a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml @@ -44,7 +44,7 @@ required: - clock-names - clocks -additionalProperties: true +unevaluatedProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml index 8459d3642205..3b3beab9db3f 100644 --- a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml +++ b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 System Controller maintainers: - - Damien Le Moal <damien.lemoal@wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: Canaan Inc. Kendryte K210 SoC system controller which provides a diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml index 8cc2b9924680..043e118c605c 100644 --- a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml @@ -11,7 +11,7 @@ maintainers: - Alistair Francis <alistair@alistair23.me> description: - RTL8723CS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part + RTL8723BS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part is connected over SDIO, while BT is connected over serial. It speaks H5 protocol with few extra commands to upload firmware and change module speed. @@ -27,7 +27,7 @@ properties: - items: - enum: - realtek,rtl8821cs-bt - - const: realtek,rtl8822cs-bt + - const: realtek,rtl8723bs-bt device-wake-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml index 7f4f36a58e56..739a08f00467 100644 --- a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml +++ b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 FPIOA maintainers: - - Damien Le Moal <damien.lemoal@wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: The Canaan Kendryte K210 SoC Fully Programmable IO Array (FPIOA) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml index c91d3e3a094b..80f960671857 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml @@ -144,8 +144,9 @@ $defs: enum: [0, 1, 2, 3, 4, 5, 6, 7] qcom,paired: - - description: - Indicates that the pin should be operating in paired mode. + type: boolean + description: + Indicates that the pin should be operating in paired mode. required: - pins diff --git a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml index ee8a2dcf5dfa..0c0135964b91 100644 --- a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml +++ b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 Reset Controller maintainers: - - Damien Le Moal <damien.lemoal@wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: | Canaan Kendryte K210 reset controller driver which supports the SoC diff --git a/Documentation/devicetree/bindings/riscv/canaan.yaml b/Documentation/devicetree/bindings/riscv/canaan.yaml index f8f3f286bd55..41fd11f70a49 100644 --- a/Documentation/devicetree/bindings/riscv/canaan.yaml +++ b/Documentation/devicetree/bindings/riscv/canaan.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan SoC-based boards maintainers: - - Damien Le Moal <damien.lemoal@wdc.com> + - Damien Le Moal <dlemoal@kernel.org> description: Canaan Kendryte K210 SoC-based boards diff --git a/Documentation/devicetree/usage-model.rst b/Documentation/devicetree/usage-model.rst index b6a287955ee5..0717426856b2 100644 --- a/Documentation/devicetree/usage-model.rst +++ b/Documentation/devicetree/usage-model.rst @@ -415,6 +415,6 @@ When using the DT, this creates problems for of_platform_populate() because it must decide whether to register each node as either a platform_device or an amba_device. This unfortunately complicates the device creation model a little bit, but the solution turns out not to -be too invasive. If a node is compatible with "arm,amba-primecell", then +be too invasive. If a node is compatible with "arm,primecell", then of_platform_populate() will register it as an amba_device instead of a platform_device. diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index 07d5a5623e2a..634aa222b410 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -16,6 +16,24 @@ tested code over experimental code. We wish to extend these same principles to the RISC-V-related code that will be accepted for inclusion in the kernel. +Patchwork +--------- + +RISC-V has a patchwork instance, where the status of patches can be checked: + + https://patchwork.kernel.org/project/linux-riscv/list/ + +If your patch does not appear in the default view, the RISC-V maintainers have +likely either requested changes, or expect it to be applied to another tree. + +Automation runs against this patchwork instance, building/testing patches as +they arrive. The automation applies patches against the current HEAD of the +RISC-V `for-next` and `fixes` branches, depending on whether the patch has been +detected as a fix. Failing those, it will use the RISC-V `master` branch. +The exact commit to which a series has been applied will be noted on patchwork. +Patches for which any of the checks fail are unlikely to be applied and in most +cases will need to be resubmitted. + Submit Checklist Addendum ------------------------- We'll only accept patches for new modules or extensions if the diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst index f79987e16cf4..e7b07313550a 100644 --- a/Documentation/trace/user_events.rst +++ b/Documentation/trace/user_events.rst @@ -14,10 +14,6 @@ Programs can view status of the events via /sys/kernel/tracing/user_events_status and can both register and write data out via /sys/kernel/tracing/user_events_data. -Programs can also use /sys/kernel/tracing/dynamic_events to register and -delete user based events via the u: prefix. The format of the command to -dynamic_events is the same as the ioctl with the u: prefix applied. - Typically programs will register a set of events that they wish to expose to tools that can read trace_events (such as ftrace and perf). The registration process tells the kernel which address and bit to reflect if any tool has @@ -144,6 +140,9 @@ its name. Delete will only succeed if there are no references left to the event (in both user and kernel space). User programs should use a separate file to request deletes than the one used for registration due to this. +**NOTE:** By default events will auto-delete when there are no references left +to the event. Flags in the future may change this logic. + Unregistering ------------- If after registering an event it is no longer wanted to be updated then it can diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst index c6aee82c7e6e..19ba4ae0cd81 100644 --- a/Documentation/translations/zh_CN/devicetree/usage-model.rst +++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst @@ -325,6 +325,6 @@ Primecell设备。然而,棘手的一点是,AMBA总线上的所有设备并é 当使用DT时,这给of_platform_populate()带æ¥äº†é—®é¢˜ï¼Œå› 为它必须决定是å¦å°† æ¯ä¸ªèŠ‚点注册为platform_device或amba_device。ä¸å¹¸çš„是,这使设备创建模型 -å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,amba-primecell†+å˜å¾—有点å¤æ‚,但解决方案原æ¥å¹¶ä¸æ˜¯å¤ªå…·æœ‰ä¾µç•¥æ€§ã€‚如果一个节点与“arm,primecell†兼容,那么of_platform_populate()将把它注册为amba_device而ä¸æ˜¯ platform_device。 diff --git a/MAINTAINERS b/MAINTAINERS index f794002a192e..ad32db6c81cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17827,7 +17827,7 @@ F: tools/testing/selftests/rtc/ Real-time Linux Analysis (RTLA) tools M: Daniel Bristot de Oliveira <bristot@kernel.org> M: Steven Rostedt <rostedt@goodmis.org> -L: linux-trace-devel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org S: Maintained F: Documentation/tools/rtla/ F: tools/tracing/rtla/ @@ -18397,7 +18397,7 @@ F: drivers/infiniband/ulp/rtrs/ RUNTIME VERIFICATION (RV) M: Daniel Bristot de Oliveira <bristot@kernel.org> M: Steven Rostedt <rostedt@goodmis.org> -L: linux-trace-devel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org S: Maintained F: Documentation/trace/rv/ F: include/linux/rv.h @@ -19140,6 +19140,9 @@ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Karsten Graul <kgraul@linux.ibm.com> M: Wenjia Zhang <wenjia@linux.ibm.com> M: Jan Karcher <jaka@linux.ibm.com> +R: D. Wythe <alibuda@linux.alibaba.com> +R: Tony Lu <tonylu@linux.alibaba.com> +R: Wen Gu <guwen@linux.alibaba.com> L: linux-s390@vger.kernel.org S: Supported F: net/smc/ @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index a406454578f0..f1b8a04ee9f2 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -67,7 +67,7 @@ static int __init hyperv_init(void) if (ret) return ret; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online", hv_common_cpu_init, hv_common_cpu_die); if (ret < 0) { hv_common_free(); diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index b3323ab5b78d..35e8a52fea11 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1496,7 +1496,7 @@ __BUILD_CSR_OP(tlbidx) #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ - " movgr2fcsr %0, "__stringify(dest)" \n" \ + " movgr2fcsr "__stringify(dest)", %0 \n" \ : : "r" (val)); \ } while (0) diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 8b98d22a145b..de46a6b1e9f1 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -22,12 +22,14 @@ #define _PAGE_PFN_SHIFT 12 #define _PAGE_SWP_EXCLUSIVE_SHIFT 23 #define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_PRESENT_INVALID_SHIFT 60 #define _PAGE_NO_READ_SHIFT 61 #define _PAGE_NO_EXEC_SHIFT 62 #define _PAGE_RPLV_SHIFT 63 /* Used by software */ #define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_PRESENT_INVALID (_ULCAST_(1) << _PAGE_PRESENT_INVALID_SHIFT) #define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) #define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index d28fb9dbec59..9a9f9ff9b709 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -213,7 +213,7 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) - return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PRESENT_INVALID)); return pmd_val(pmd) != (unsigned long)invalid_pte_table; } @@ -558,6 +558,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_mkinvalid(pmd_t pmd) { + pmd_val(pmd) |= _PAGE_PRESENT_INVALID; pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); return pmd; diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index 2406c95b34cc..021b59c248fa 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -396,6 +396,8 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) alignment_mask = 0x7; + else + alignment_mask = 0x3; offset = hw->address & alignment_mask; hw->address &= ~alignment_mask; diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index ff28f99b47d7..0491bf453cd4 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -271,7 +271,7 @@ static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); /* Make sure interrupt enabled. */ - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); @@ -594,7 +594,7 @@ static struct pmu pmu = { static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) { - return (pev->event_id & 0xff); + return M_PERFCTL_EVENT(pev->event_id); } static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) @@ -849,7 +849,7 @@ static void resume_local_counters(void) static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) { - raw_event.event_id = config & 0xff; + raw_event.event_id = M_PERFCTL_EVENT(config); return &raw_event; } diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index bdff825d29ef..85fae3d2d71a 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,7 +485,7 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (!d) + if (IS_ERR_OR_NULL(d)) return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts index 56339bef3247..0e7e5b0dd685 100644 --- a/arch/nios2/boot/dts/10m50_devboard.dts +++ b/arch/nios2/boot/dts/10m50_devboard.dts @@ -97,7 +97,7 @@ rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; address-bits = <48>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [00 00 00 00 00 00]; altr,has-supplementary-unicast; altr,enable-sup-addr = <1>; diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts index d10fb81686c7..3ee316906379 100644 --- a/arch/nios2/boot/dts/3c120_devboard.dts +++ b/arch/nios2/boot/dts/3c120_devboard.dts @@ -106,7 +106,7 @@ interrupt-names = "rx_irq", "tx_irq"; rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [ 00 00 00 00 00 00 ]; phy-mode = "rgmii-id"; phy-handle = <&phy0>; diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 0f0d4a496fef..75677b526b2b 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -90,10 +90,6 @@ #include <asm/asmregs.h> #include <asm/psw.h> - sp = 30 - gp = 27 - ipsw = 22 - /* * We provide two versions of each macro to convert from physical * to virtual and vice versa. The "_r1" versions take one argument diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 6f5e2727963c..78473d69cd2b 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -5,6 +5,11 @@ KCSAN_SANITIZE := n targets += trampoline_$(BITS).o purgatory.ro +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined $(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 5730797a6b40..bd2e27f82532 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -35,6 +35,11 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5f9474f08e1..6c04b52f139b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -416,7 +416,7 @@ void __init hyperv_init(void) goto free_vp_assist_page; } - cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) goto free_ghcb_page; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1ba5d3b99b16..85d38b9f3586 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 82fec66d46d2..42abd6af1198 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE CFLAGS_sha256.o := -D__DISABLE_EXPORTS +# When profile-guided optimization is enabled, llvm emits two different +# overlapping text sections, which is not supported by kexec. Remove profile +# optimization flags. +KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ce64dd73cfe..f0b5c9c41cde 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -34,6 +34,8 @@ #include "blk-ioprio.h" #include "blk-throttle.h" +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu); + /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. * blkcg_pol_register_mutex nests outside of it and synchronizes entire @@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; +static DEFINE_RAW_SPINLOCK(blkg_stat_lock); + #define BLKG_DESTROY_BATCH_SIZE 64 /* @@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg) static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + struct blkcg *blkcg = blkg->blkcg; + int cpu; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); #endif + /* + * Flush all the non-empty percpu lockless lists before releasing + * us, given these stat belongs to us. + * + * blkg_stat_lock is for serializing blkg stat update + */ + for_each_possible_cpu(cpu) + __blkcg_rstat_flush(blkcg, cpu); /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -951,17 +965,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } -static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) { - struct blkcg *blkcg = css_to_blkcg(css); struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); struct llist_node *lnode; struct blkg_iostat_set *bisc, *next_bisc; - /* Root-level stats are sourced from system-wide IO stats */ - if (!cgroup_parent(css->cgroup)) - return; - rcu_read_lock(); lnode = llist_del_all(lhead); @@ -969,6 +978,14 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) goto out; /* + * For covering concurrent parent blkg update from blkg_release(). + * + * When flushing from cgroup, cgroup_rstat_lock is always held, so + * this lock won't cause contention most of time. + */ + raw_spin_lock(&blkg_stat_lock); + + /* * Iterate only the iostat_cpu's queued in the lockless list. */ llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) { @@ -991,13 +1008,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent && parent->parent) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); - percpu_ref_put(&blkg->refcnt); } - + raw_spin_unlock(&blkg_stat_lock); out: rcu_read_unlock(); } +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + /* Root-level stats are sourced from system-wide IO stats */ + if (cgroup_parent(css->cgroup)) + __blkcg_rstat_flush(css_to_blkcg(css), cpu); +} + /* * We source root cgroup stats from the system-wide stats to avoid * tracking the same information twice and incurring overhead when no @@ -2075,7 +2098,6 @@ void blk_cgroup_bio_start(struct bio *bio) llist_add(&bis->lnode, lhead); WRITE_ONCE(bis->lqueued, true); - percpu_ref_get(&bis->blkg->refcnt); } u64_stats_update_end_irqrestore(&bis->sync, flags); diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 2d0828db28d8..b5ba550a0c04 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -97,6 +97,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file) cleanup_usr: cleanup_srcu_struct(&usr->qddev_lock); + ida_free(&qaic_usrs, usr->handle); free_usr: kfree(usr); dev_unlock: @@ -224,6 +225,9 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) struct qaic_user *usr; qddev = qdev->qddev; + qdev->qddev = NULL; + if (!qddev) + return; /* * Existing users get unresolvable errors till they close FDs. diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index ebf8fd373cf7..79bbfe00d241 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -101,8 +101,6 @@ acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, acpi_event_status *event_status); -acpi_status acpi_hw_disable_all_gpes(void); - acpi_status acpi_hw_enable_all_runtime_gpes(void); acpi_status acpi_hw_enable_all_wakeup_gpes(void); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 72470b9f16c4..f32570f72b90 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -636,11 +636,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state) } /* - * Disable and clear GPE status before interrupt is enabled. Some GPEs - * (like wakeup GPE) haven't handler, this can avoid such GPE misfire. - * acpi_leave_sleep_state will reenable specific GPEs later + * Disable all GPE and clear their status bits before interrupts are + * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can + * prevent them from producing spurious interrups. + * + * acpi_leave_sleep_state() will reenable specific GPEs later. + * + * Because this code runs on one CPU with disabled interrupts (all of + * the other CPUs are offline at this time), it need not acquire any + * sleeping locks which may trigger an implicit preemption point even + * if there is no contention, so avoid doing that by using a low-level + * library routine here. */ - acpi_disable_all_gpes(); + acpi_hw_disable_all_gpes(); /* Allow EC transactions to happen. */ acpi_ec_unblock_transactions(); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8bf612bdd61a..b4f246f0cac7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); - INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); + INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap) WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); cancel_delayed_work_sync(&ap->hotplug_task); + cancel_delayed_work_sync(&ap->scsi_rescan_task); skip_eh: /* clean up zpodd on port removal */ diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a6c901811802..6f8d14191593 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, ehc->i.flags |= ATA_EHI_SETMODE; /* schedule the scsi_rescan_device() here */ - schedule_work(&(ap->scsi_rescan_task)); + schedule_delayed_work(&ap->scsi_rescan_task, 0); } else if (dev->class == ATA_DEV_UNKNOWN && ehc->tries[dev->devno] && ata_class_enabled(ehc->classes[dev->devno])) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 8ce90284eb34..551077cea4e4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, void ata_scsi_dev_rescan(struct work_struct *work) { struct ata_port *ap = - container_of(work, struct ata_port, scsi_rescan_task); + container_of(work, struct ata_port, scsi_rescan_task.work); struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool delay_rescan = false; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + /* + * If the rescan work was scheduled because of a resume + * event, the port is already fully resumed, but the + * SCSI device may not yet be fully resumed. In such + * case, executing scsi_rescan_device() may cause a + * deadlock with the PM code on device_lock(). Prevent + * this by giving up and retrying rescan after a short + * delay. + */ + delay_rescan = sdev->sdev_gendev.power.is_suspended; + if (delay_rescan) { + scsi_device_put(sdev); + break; + } + spin_unlock_irqrestore(ap->lock, flags); scsi_rescan_device(&(sdev->sdev_gendev)); scsi_device_put(sdev); @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work) spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); + + if (delay_rescan) + schedule_delayed_work(&ap->scsi_rescan_task, + msecs_to_jiffies(5)); } diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 029564695dbb..97c681fcf9f6 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -284,6 +284,9 @@ static bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg, { int ret; + if (!regmap_writeable(map, reg)) + return false; + /* If we don't know the chip just got reset, then sync everything. */ if (!map->no_sync_defaults) return true; diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c index 4c2b94b3e30b..6af692844c19 100644 --- a/drivers/base/regmap/regmap-spi-avmm.c +++ b/drivers/base/regmap/regmap-spi-avmm.c @@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = { .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, .val_format_endian_default = REGMAP_ENDIAN_NATIVE, .max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT, - .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, + .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, .free_context = spi_avmm_bridge_ctx_free, }; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2b918e28acaa..b47358da92a2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -348,63 +348,33 @@ static inline void virtblk_request_done(struct request *req) blk_mq_end_request(req, status); } -static void virtblk_complete_batch(struct io_comp_batch *iob) -{ - struct request *req; - - rq_list_for_each(&iob->req_list, req) { - virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); - virtblk_cleanup_cmd(req); - } - blk_mq_end_request_batch(iob); -} - -static int virtblk_handle_req(struct virtio_blk_vq *vq, - struct io_comp_batch *iob) -{ - struct virtblk_req *vbr; - int req_done = 0; - unsigned int len; - - while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { - struct request *req = blk_mq_rq_from_pdu(vbr); - - if (likely(!blk_should_fake_timeout(req->q)) && - !blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), - virtblk_complete_batch)) - virtblk_request_done(req); - req_done++; - } - - return req_done; -} - static void virtblk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; - struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index]; - int req_done = 0; + bool req_done = false; + int qid = vq->index; + struct virtblk_req *vbr; unsigned long flags; - DEFINE_IO_COMP_BATCH(iob); + unsigned int len; - spin_lock_irqsave(&vblk_vq->lock, flags); + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); do { virtqueue_disable_cb(vq); - req_done += virtblk_handle_req(vblk_vq, &iob); + while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + if (likely(!blk_should_fake_timeout(req->q))) + blk_mq_complete_request(req); + req_done = true; + } if (unlikely(virtqueue_is_broken(vq))) break; } while (!virtqueue_enable_cb(vq)); - if (req_done) { - if (!rq_list_empty(iob.req_list)) - iob.complete(&iob); - - /* In case queue is stopped waiting for more buffers. */ + /* In case queue is stopped waiting for more buffers. */ + if (req_done) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); - } - spin_unlock_irqrestore(&vblk_vq->lock, flags); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) @@ -1283,15 +1253,37 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set) } } +static void virtblk_complete_batch(struct io_comp_batch *iob) +{ + struct request *req; + + rq_list_for_each(&iob->req_list, req) { + virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); + virtblk_cleanup_cmd(req); + } + blk_mq_end_request_batch(iob); +} + static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct virtio_blk *vblk = hctx->queue->queuedata; struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); + struct virtblk_req *vbr; unsigned long flags; + unsigned int len; int found = 0; spin_lock_irqsave(&vq->lock, flags); - found = virtblk_handle_req(vq, iob); + + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + found++; + if (!blk_mq_complete_request_remote(req) && + !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), + virtblk_complete_batch)) + virtblk_request_done(req); + } if (found) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index edfa94641bbf..66759fe28fad 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -119,7 +119,10 @@ static int clk_composite_determine_rate(struct clk_hw *hw, if (ret) continue; - rate_diff = abs(req->rate - tmp_req.rate); + if (req->rate >= tmp_req.rate) + rate_diff = req->rate - tmp_req.rate; + else + rate_diff = tmp_req.rate - req->rate; if (!rate_diff || !req->best_parent_hw || best_rate_diff > rate_diff) { diff --git a/drivers/clk/clk-loongson2.c b/drivers/clk/clk-loongson2.c index 70ae1dd2e474..bacdcbb287ac 100644 --- a/drivers/clk/clk-loongson2.c +++ b/drivers/clk/clk-loongson2.c @@ -40,7 +40,7 @@ static struct clk_hw *loongson2_clk_register(struct device *dev, { int ret; struct clk_hw *hw; - struct clk_init_data init; + struct clk_init_data init = { }; hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL); if (!hw) diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index 6b4e193f648d..c87a6c4a7967 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -23,6 +23,7 @@ static DEFINE_SPINLOCK(mt8365_clk_lock); static const struct mtk_fixed_clk top_fixed_clks[] = { + FIXED_CLK(CLK_TOP_CLK_NULL, "clk_null", NULL, 0), FIXED_CLK(CLK_TOP_I2S0_BCK, "i2s0_bck", NULL, 26000000), FIXED_CLK(CLK_TOP_DSI0_LNTC_DSICK, "dsi0_lntc_dsick", "clk26m", 75000000), @@ -559,6 +560,14 @@ static const struct mtk_clk_divider top_adj_divs[] = { 0x324, 16, 8, CLK_DIVIDER_ROUND_CLOSEST), DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV3, "apll12_ck_div3", "apll_i2s3_sel", 0x324, 24, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4, "apll12_ck_div4", "apll_tdmout_sel", + 0x328, 0, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4B, "apll12_ck_div4b", "apll_tdmout_sel", + 0x328, 8, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5, "apll12_ck_div5", "apll_tdmin_sel", + 0x328, 16, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5B, "apll12_ck_div5b", "apll_tdmin_sel", + 0x328, 24, 8, CLK_DIVIDER_ROUND_CLOSEST), DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV6, "apll12_ck_div6", "apll_spdif_sel", 0x32c, 0, 8, CLK_DIVIDER_ROUND_CLOSEST), }; @@ -583,15 +592,15 @@ static const struct mtk_gate_regs top2_cg_regs = { #define GATE_TOP0(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top0_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr_inv) + _shift, &mtk_clk_gate_ops_no_setclr) #define GATE_TOP1(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top1_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr) + _shift, &mtk_clk_gate_ops_no_setclr_inv) #define GATE_TOP2(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top2_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr) + _shift, &mtk_clk_gate_ops_no_setclr_inv) static const struct mtk_gate top_clk_gates[] = { GATE_TOP0(CLK_TOP_CONN_32K, "conn_32k", "clk32k", 10), @@ -696,6 +705,7 @@ static const struct mtk_gate ifr_clks[] = { GATE_IFR3(CLK_IFR_GCPU, "ifr_gcpu", "axi_sel", 8), GATE_IFR3(CLK_IFR_TRNG, "ifr_trng", "axi_sel", 9), GATE_IFR3(CLK_IFR_AUXADC, "ifr_auxadc", "clk26m", 10), + GATE_IFR3(CLK_IFR_CPUM, "ifr_cpum", "clk26m", 11), GATE_IFR3(CLK_IFR_AUXADC_MD, "ifr_auxadc_md", "clk26m", 14), GATE_IFR3(CLK_IFR_AP_DMA, "ifr_ap_dma", "axi_sel", 18), GATE_IFR3(CLK_IFR_DEBUGSYS, "ifr_debugsys", "axi_sel", 24), @@ -717,6 +727,8 @@ static const struct mtk_gate ifr_clks[] = { GATE_IFR5(CLK_IFR_PWRAP_TMR, "ifr_pwrap_tmr", "clk26m", 12), GATE_IFR5(CLK_IFR_PWRAP_SPI, "ifr_pwrap_spi", "clk26m", 13), GATE_IFR5(CLK_IFR_PWRAP_SYS, "ifr_pwrap_sys", "clk26m", 14), + GATE_MTK_FLAGS(CLK_IFR_MCU_PM_BK, "ifr_mcu_pm_bk", NULL, &ifr5_cg_regs, + 17, &mtk_clk_gate_ops_setclr, CLK_IGNORE_UNUSED), GATE_IFR5(CLK_IFR_IRRX_26M, "ifr_irrx_26m", "clk26m", 22), GATE_IFR5(CLK_IFR_IRRX_32K, "ifr_irrx_32k", "clk32k", 23), GATE_IFR5(CLK_IFR_I2C0_AXI, "ifr_i2c0_axi", "i2c_sel", 24), diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 42958a542662..621e298f101a 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -164,7 +164,7 @@ void pxa3xx_clk_update_accr(u32 disable, u32 enable, u32 xclkcfg, u32 mask) accr &= ~disable; accr |= enable; - writel(accr, ACCR); + writel(accr, clk_regs + ACCR); if (xclkcfg) __asm__("mcr p14, 0, %0, c6, c0, 0\n" : : "r"(xclkcfg)); diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 01f2e86f3f7c..12cf6bb2e3ce 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -12,7 +12,6 @@ #include <linux/shmem_fs.h> #include <linux/slab.h> #include <linux/udmabuf.h> -#include <linux/hugetlb.h> #include <linux/vmalloc.h> #include <linux/iosys-map.h> @@ -207,9 +206,7 @@ static long udmabuf_create(struct miscdevice *device, struct udmabuf *ubuf; struct dma_buf *buf; pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit; - struct page *page, *hpage = NULL; - pgoff_t subpgoff, maxsubpgs; - struct hstate *hpstate; + struct page *page; int seals, ret = -EINVAL; u32 i, flags; @@ -245,7 +242,7 @@ static long udmabuf_create(struct miscdevice *device, if (!memfd) goto err; mapping = memfd->f_mapping; - if (!shmem_mapping(mapping) && !is_file_hugepages(memfd)) + if (!shmem_mapping(mapping)) goto err; seals = memfd_fcntl(memfd, F_GET_SEALS, 0); if (seals == -EINVAL) @@ -256,48 +253,16 @@ static long udmabuf_create(struct miscdevice *device, goto err; pgoff = list[i].offset >> PAGE_SHIFT; pgcnt = list[i].size >> PAGE_SHIFT; - if (is_file_hugepages(memfd)) { - hpstate = hstate_file(memfd); - pgoff = list[i].offset >> huge_page_shift(hpstate); - subpgoff = (list[i].offset & - ~huge_page_mask(hpstate)) >> PAGE_SHIFT; - maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT; - } for (pgidx = 0; pgidx < pgcnt; pgidx++) { - if (is_file_hugepages(memfd)) { - if (!hpage) { - hpage = find_get_page_flags(mapping, pgoff, - FGP_ACCESSED); - if (!hpage) { - ret = -EINVAL; - goto err; - } - } - page = hpage + subpgoff; - get_page(page); - subpgoff++; - if (subpgoff == maxsubpgs) { - put_page(hpage); - hpage = NULL; - subpgoff = 0; - pgoff++; - } - } else { - page = shmem_read_mapping_page(mapping, - pgoff + pgidx); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto err; - } + page = shmem_read_mapping_page(mapping, pgoff + pgidx); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + goto err; } ubuf->pages[pgbuf++] = page; } fput(memfd); memfd = NULL; - if (hpage) { - put_page(hpage); - hpage = NULL; - } } exp_info.ops = &udmabuf_ops; diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index e4ccfb6a8fa5..ec056f6f40ce 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -2124,6 +2124,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware file, blocks, le32_to_cpu(blk->len), type, le32_to_cpu(blk->id)); + region_name = cs_dsp_mem_region_name(type); mem = cs_dsp_find_region(dsp, type); if (!mem) { cs_dsp_err(dsp, "No base for region %x\n", type); @@ -2147,8 +2148,8 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware reg = dsp->ops->region_to_reg(mem, reg); reg += offset; } else { - cs_dsp_err(dsp, "No %x for algorithm %x\n", - type, le32_to_cpu(blk->id)); + cs_dsp_err(dsp, "No %s for algorithm %x\n", + region_name, le32_to_cpu(blk->id)); } break; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index abeff7dc0b58..34b9e7876538 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -361,24 +361,6 @@ static void __init efi_debugfs_init(void) static inline void efi_debugfs_init(void) {} #endif -static void refresh_nv_rng_seed(struct work_struct *work) -{ - u8 seed[EFI_RANDOM_SEED_SIZE]; - - get_random_bytes(seed, sizeof(seed)); - efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed); - memzero_explicit(seed, sizeof(seed)); -} -static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data) -{ - static DECLARE_WORK(work, refresh_nv_rng_seed); - schedule_work(&work); - return NOTIFY_DONE; -} -static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification }; - /* * We register the efi subsystem with the firmware subsystem and the * efivars subsystem with the efi subsystem, if the system was booted with @@ -451,9 +433,6 @@ static int __init efisubsys_init(void) platform_device_register_simple("efi_secret", 0, NULL, 0); #endif - if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE)) - execute_with_initialized_rng(&refresh_nv_rng_seed_nb); - return 0; err_remove_group: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b1ca1ab6d6ad..393b6fb7a71d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1615,6 +1615,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x5874, 0x5940, 0x5941, + 0x5b70, 0x5b72, 0x5b73, 0x5b74, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3b225be89cb7..a70103ac0026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -140,7 +140,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; - else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) + else places[c].flags |= TTM_PL_FLAG_TOPDOWN; if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9d7e6e0e73ed..a150b7a4b4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3548,6 +3548,9 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, void *fw_pri_cpu_addr; int ret; + if (adev->psp.vbflash_image_size == 0) + return -EINVAL; + dev_info(adev->dev, "VBIOS flash to PSP started"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, @@ -3599,13 +3602,13 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, } static const struct bin_attribute psp_vbflash_bin_attr = { - .attr = {.name = "psp_vbflash", .mode = 0664}, + .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, .write = amdgpu_psp_vbflash_write, .read = amdgpu_psp_vbflash_read, }; -static DEVICE_ATTR(psp_vbflash_status, 0444, amdgpu_psp_vbflash_status, NULL); +static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index dc474b809604..49de3a3eebc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -581,3 +581,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_end(ring); } + +void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL); +} + +void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE); +} + +void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d8749444b689..2474cb71e476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -227,6 +227,9 @@ struct amdgpu_ring_funcs { int (*preempt_ib)(struct amdgpu_ring *ring); void (*emit_mem_sync)(struct amdgpu_ring *ring); void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable); + void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); + void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); + void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); }; struct amdgpu_ring { @@ -318,10 +321,16 @@ struct amdgpu_ring { #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) +#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) +#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) +#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_ring_ib_end(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index 62079f0e3ee8..73516abef662 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) amdgpu_fence_update_start_timestamp(e->ring, chunk->sync_seq, ktime_get()); + if (chunk->sync_seq == + le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) { + if (chunk->cntl_offset <= e->ring->buf_mask) + amdgpu_ring_patch_cntl(e->ring, + chunk->cntl_offset); + if (chunk->ce_offset <= e->ring->buf_mask) + amdgpu_ring_patch_ce(e->ring, chunk->ce_offset); + if (chunk->de_offset <= e->ring->buf_mask) + amdgpu_ring_patch_de(e->ring, chunk->de_offset); + } amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end); @@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) amdgpu_ring_mux_end_ib(mux, ring); } +void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + unsigned offset; + + offset = ring->wptr & ring->buf_mask; + + amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type); +} + void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) { struct amdgpu_mux_entry *e; @@ -429,6 +450,10 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r } chunk->start = ring->wptr; + /* the initialized value used to check if they are set by the ib submission*/ + chunk->cntl_offset = ring->buf_mask + 1; + chunk->de_offset = ring->buf_mask + 1; + chunk->ce_offset = ring->buf_mask + 1; list_add_tail(&chunk->entry, &e->list); } @@ -454,6 +479,41 @@ static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct a } } +void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring, u64 offset, + enum amdgpu_ring_mux_offset_type type) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); + if (!chunk) { + DRM_ERROR("cannot find chunk!\n"); + return; + } + + switch (type) { + case AMDGPU_MUX_OFFSET_TYPE_CONTROL: + chunk->cntl_offset = offset; + break; + case AMDGPU_MUX_OFFSET_TYPE_DE: + chunk->de_offset = offset; + break; + case AMDGPU_MUX_OFFSET_TYPE_CE: + chunk->ce_offset = offset; + break; + default: + DRM_ERROR("invalid type (%d)\n", type); + break; + } +} + void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) { struct amdgpu_mux_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h index 4be45fc14954..b22d4fb2a847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -50,6 +50,12 @@ struct amdgpu_mux_entry { struct list_head list; }; +enum amdgpu_ring_mux_offset_type { + AMDGPU_MUX_OFFSET_TYPE_CONTROL, + AMDGPU_MUX_OFFSET_TYPE_DE, + AMDGPU_MUX_OFFSET_TYPE_CE, +}; + struct amdgpu_ring_mux { struct amdgpu_ring *real_ring; @@ -72,12 +78,18 @@ struct amdgpu_ring_mux { * @sync_seq: the fence seqno related with the saved IB. * @start:- start location on the software ring. * @end:- end location on the software ring. + * @control_offset:- the PRE_RESUME bit position used for resubmission. + * @de_offset:- the anchor in write_data for de meta of resubmission. + * @ce_offset:- the anchor in write_data for ce meta of resubmission. */ struct amdgpu_mux_chunk { struct list_head entry; uint32_t sync_seq; u64 start; u64 end; + u64 cntl_offset; + u64 de_offset; + u64 ce_offset; }; int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, @@ -89,6 +101,8 @@ u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ri u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + u64 offset, enum amdgpu_ring_mux_offset_type type); bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); @@ -97,6 +111,7 @@ void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); +void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type); const char *amdgpu_sw_ring_name(int idx); unsigned int amdgpu_sw_ring_priority(int idx); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e7f2b7bf0ff5..a674c8a58dc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -5127,7 +5127,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, gfx_v9_0_ring_emit_de_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? - true : false); + true : false, + job->gds_size > 0 && job->gds_base != 0); } amdgpu_ring_write(ring, header); @@ -5138,9 +5139,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, #endif lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_ib_on_emit_cntl(ring); amdgpu_ring_write(ring, control); } +static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, + unsigned offset) +{ + u32 control = ring->ring[offset]; + + control |= INDIRECT_BUFFER_PRE_RESUME(1); + ring->ring[offset] = control; +} + +static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *ce_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_ce_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + +static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *de_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_de_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -5336,6 +5411,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_ce(ring); + if (resume) amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, sizeof(ce_payload) >> 2); @@ -5369,10 +5446,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) amdgpu_ring_alloc(ring, 13); gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); - /*reset the CP_VMID_PREEMPT after trailing fence*/ - amdgpu_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), - 0x0); /* assert IB preemption, emit the trailing fence */ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, @@ -5395,6 +5468,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); } + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); amdgpu_ring_commit(ring); /* deassert preemption condition */ @@ -5402,7 +5479,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) { struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; @@ -5433,8 +5510,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) PAGE_SIZE); } - de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); - de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + if (usegds) { + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + } cnt = (sizeof(de_payload) >> 2) + 4 - 2; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); @@ -5445,6 +5524,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_de(ring); if (resume) amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, sizeof(de_payload) >> 2); @@ -6855,6 +6935,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .patch_cntl = gfx_v9_0_ring_patch_cntl, + .patch_de = gfx_v9_0_ring_patch_de_meta, + .patch_ce = gfx_v9_0_ring_patch_ce_meta, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index e5fd1e00914d..da126ff8bcbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - atomic_set(&adev->vcn.inst[i].sched_score, 0); + /* Init instance 0 sched_score to 1, so it's scheduled after other instances */ + if (i == 0) + atomic_set(&adev->vcn.inst[i].sched_score, 1); + else + atomic_set(&adev->vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5cec03eaa8d..7acd73e5004f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7196,7 +7196,13 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - amdgpu_dm_connector_add_common_modes(encoder, connector); + /* most eDP supports only timings from its edid, + * usually only detailed timings are available + * from eDP edid. timings which are not from edid + * may damage eDP + */ + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); @@ -8198,6 +8204,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, if (acrtc_state->abm_level != dm_old_crtc_state->abm_level) bundle->stream_update.abm_level = &acrtc_state->abm_level; + mutex_lock(&dm->dc_lock); + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && + acrtc_state->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(acrtc_state->stream); + mutex_unlock(&dm->dc_lock); + /* * If FreeSync state on the stream has changed then we need to * re-adjust the min/max bounds now that DC doesn't handle this @@ -8211,10 +8223,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } mutex_lock(&dm->dc_lock); - if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); - update_planes_and_stream_adapter(dm->dc, acrtc_state->update_type, planes_count, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index a131e30fd7d6..d471d58aba92 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -980,6 +980,11 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER)) converter_disable_audio = true; + + /* limited link rate to HBR3 for DPIA until we implement USB4 V2 */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->reported_link_cap.link_rate > LINK_RATE_HIGH3) + link->reported_link_cap.link_rate = LINK_RATE_HIGH3; break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 09405ef1e3c8..08577d1b84ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1696,10 +1696,39 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && + (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || + ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) { + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, + (void *)(&activity_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } + + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_CUSTOM); + } else { + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, smu->power_profile_mode); + } + if (workload_type < 0) return -EINVAL; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 7a748785c545..4676cf2900df 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -298,6 +298,10 @@ static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata) if (refclk_lut[i] == refclk_rate) break; + /* avoid buffer overflow and "1" is the default rate in the datasheet. */ + if (i >= refclk_lut_size) + i = 1; + regmap_update_bits(pdata->regmap, SN_DPPLL_SRC_REG, REFCLK_FREQ_MASK, REFCLK_FREQ(i)); diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 8cf096f841a9..a2ae8c21e4dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -220,6 +220,9 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out int optimus_funcs; struct pci_dev *parent_pdev; + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA) + return; + *has_pr3 = false; parent_pdev = pci_upstream_bridge(pdev); if (parent_pdev) { diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 086b66b60d91..f75c6f09dd2a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -730,7 +730,8 @@ out: #endif nouveau_connector_set_edid(nv_connector, edid); - nouveau_connector_set_encoder(connector, nv_encoder); + if (nv_encoder) + nouveau_connector_set_encoder(connector, nv_encoder); return status; } @@ -966,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector) /* Determine display colour depth for everything except LVDS now, * DP requires this before mode_valid() is called. */ - if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) + if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode) nouveau_connector_detect_depth(connector); /* Find the native mode if this is a digital panel, if we didn't @@ -987,7 +988,7 @@ nouveau_connector_get_modes(struct drm_connector *connector) * "native" mode as some VBIOS tables require us to use the * pixel clock as part of the lookup... */ - if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) + if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode) nouveau_connector_detect_depth(connector); if (nv_encoder->dcb->type == DCB_OUTPUT_TV) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index cc7c5b4a05fd..7aac9384600e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -137,10 +137,16 @@ nouveau_name(struct drm_device *dev) static inline bool nouveau_cli_work_ready(struct dma_fence *fence) { - if (!dma_fence_is_signaled(fence)) - return false; - dma_fence_put(fence); - return true; + bool ret = true; + + spin_lock_irq(fence->lock); + if (!dma_fence_is_signaled_locked(fence)) + ret = false; + spin_unlock_irq(fence->lock); + + if (ret == true) + dma_fence_put(fence); + return ret; } static void diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index fe76e29910ef..8f6c3aef0962 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -307,6 +307,7 @@ static void radeon_fbdev_client_unregister(struct drm_client_dev *client) if (fb_helper->info) { vga_switcheroo_client_fb_set(rdev->pdev, NULL); + drm_helper_force_disable_all(dev); drm_fb_helper_unregister_info(fb_helper); } else { drm_client_release(&fb_helper->client); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 007f26d5f1a4..2f4d09ce027a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void) if (completion_done(&vmbus_connection.unload_event)) goto completed; - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + /* + * In a CoCo VM the synic_message_page is not allocated + * in hv_synic_alloc(). Instead it is set/cleared in + * hv_synic_enable_regs() and hv_synic_disable_regs() + * such that it is set only when the CPU is online. If + * not all present CPUs are online, the message page + * might be NULL, so skip such CPUs. + */ page_addr = hv_cpu->synic_message_page; + if (!page_addr) + continue; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; @@ -867,11 +878,14 @@ completed: * maybe-pending messages on all CPUs to be able to receive new * messages after we reconnect. */ - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); page_addr = hv_cpu->synic_message_page; + if (!page_addr) + continue; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; msg->header.message_type = HVMSG_NONE; } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 64f9ceca887b..542a1d53b303 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu) flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); - if (!(*inputarg)) - return -ENOMEM; - if (hv_root_partition) { - outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + /* + * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already + * allocated if this CPU was previously online and then taken offline + */ + if (!*inputarg) { + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); + if (!(*inputarg)) + return -ENOMEM; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + } } msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); @@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu) int hv_common_cpu_die(unsigned int cpu) { - unsigned long flags; - void **inputarg, **outputarg; - void *mem; - - local_irq_save(flags); - - inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - mem = *inputarg; - *inputarg = NULL; - - if (hv_root_partition) { - outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *outputarg = NULL; - } - - local_irq_restore(flags); - - kfree(mem); + /* + * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory + * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg + * may be used by the Hyper-V vPCI driver in reassigning interrupts + * as part of the offlining process. The interrupt reassignment + * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and + * called this function. + * + * If a previously offlined CPU is brought back online again, the + * originally allocated memory is reused in hv_common_cpu_init(). + */ return 0; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 1c65a6dfb9fa..67f95a29aeca 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", hv_synic_init, hv_synic_cleanup); if (ret < 0) - goto err_cpuhp; + goto err_alloc; hyperv_cpuhp_online = ret; ret = vmbus_connect(); @@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void) err_connect: cpuhp_remove_state(hyperv_cpuhp_online); -err_cpuhp: - hv_synic_free(); err_alloc: + hv_synic_free(); if (vmbus_irq == -1) { hv_remove_vmbus_handler(); } else { diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 93a1c48d0c32..6b3f4384e46a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->traffic_class = tos; route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; - route->path_rec->rate = iboe_get_rate(ndev); + route->path_rec->rate = IB_RATE_PORT_CURRENT; dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; /* In case ACK timeout is set, use this value to calculate @@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!ndev) return -ENODEV; - ib.rec.rate = iboe_get_rate(ndev); + ib.rec.rate = IB_RATE_PORT_CURRENT; ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4796f6a8828c..e836c9c477f6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, attr->path_mtu = cmd->base.path_mtu; if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE) attr->path_mig_state = cmd->base.path_mig_state; - if (cmd->base.attr_mask & IB_QP_QKEY) + if (cmd->base.attr_mask & IB_QP_QKEY) { + if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) { + ret = -EPERM; + goto release_qp; + } attr->qkey = cmd->base.qkey; + } if (cmd->base.attr_mask & IB_QP_RQ_PSN) attr->rq_psn = cmd->base.rq_psn; if (cmd->base.attr_mask & IB_QP_SQ_PSN) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fbace69672ca..7c9c79c13941 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, spin_lock_irq(&ev_queue->lock); while (list_empty(&ev_queue->event_list)) { - spin_unlock_irq(&ev_queue->lock); + if (ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); + return -EIO; + } + spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; @@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, return -ERESTARTSYS; spin_lock_irq(&ev_queue->lock); - - /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { - spin_unlock_irq(&ev_queue->lock); - return -EIO; - } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 5a2baf49ecaa..2c95e6f3d47a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -135,8 +135,6 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; - u16 active_speed; - u8 active_width; /* FP Notification Queue (CQ & SRQ) */ struct tasklet_struct nq_task; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b1c36412025f..952811c40c54 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + int rc; memset(port_attr, 0, sizeof(*port_attr)); @@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, port_attr->sm_sl = 0; port_attr->subnet_timeout = 0; port_attr->init_type_reply = 0; - port_attr->active_speed = rdev->active_speed; - port_attr->active_width = rdev->active_width; + rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width); - return 0; + return rc; } int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e34eccd178d0..3073398a2183 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) return rc; } dev_info(rdev_to_dev(rdev), "Device registered with IB successfully"); - ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, - &rdev->active_width); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ? diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 1c06920505d2..93257fa5aae8 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, !vport_qcounters_supported(dev)) || !port_num) return &dev->port[0].cnts; - return &dev->port[port_num - 1].cnts; + return is_mdev_switchdev_mode(dev->mdev) ? + &dev->port[1].cnts : &dev->port[port_num - 1].cnts; } /** @@ -262,7 +263,7 @@ static struct rdma_hw_stats * mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); return do_alloc_stats(cnts); } @@ -329,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, { u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + struct mlx5_core_dev *mdev; __be32 val; int ret, i; @@ -336,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK) return 0; + mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw); + if (!mdev) + return -EOPNOTSUPP; + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); MLX5_SET(query_q_counter_in, in, other_vport, 1); MLX5_SET(query_q_counter_in, in, vport_number, dev->port[port_num].rep->vport); MLX5_SET(query_q_counter_in, in, aggregate, 1); - ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out); + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); if (ret) return ret; @@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, bool is_vport = is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF; const struct mlx5_ib_counter *names; - int j = 0, i; + int j = 0, i, size; names = is_vport ? vport_basic_q_cnts : basic_q_cnts; - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : + ARRAY_SIZE(basic_q_cnts); + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = basic_q_cnts[i].offset; + offsets[j] = names[i].offset; } names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts; + size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : + ARRAY_SIZE(out_of_seq_q_cnts); if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts; + size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : + ARRAY_SIZE(retrans_q_cnts); if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = retrans_q_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_extended_err_cnts : extended_err_cnts; + size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : + ARRAY_SIZE(extended_err_cnts); if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = extended_err_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts; + size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : + ARRAY_SIZE(roce_accl_cnts); if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = roce_accl_cnts[i].offset; + offsets[j] = names[i].offset; } } @@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts, u32 port_num) { - u32 num_counters, num_op_counters = 0; + bool is_vport = is_mdev_switchdev_mode(dev->mdev) && + port_num != MLX5_VPORT_PF; + u32 num_counters, num_op_counters = 0, size; - num_counters = ARRAY_SIZE(basic_q_cnts); + size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : + ARRAY_SIZE(basic_q_cnts); + num_counters = size; + size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : + ARRAY_SIZE(out_of_seq_q_cnts); if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : + ARRAY_SIZE(retrans_q_cnts); if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : + ARRAY_SIZE(extended_err_cnts); if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : + ARRAY_SIZE(roce_accl_cnts); if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); + num_counters += size; cnts->num_q_counters = num_counters; - if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF) + if (is_vport) goto skip_non_qcounters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -725,11 +753,11 @@ err: static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; + int num_cnt_ports = dev->num_ports; int i, j; - num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || - vport_qcounters_supported(dev)) ? dev->num_ports : 1; + if (is_mdev_switchdev_mode(dev->mdev)) + num_cnt_ports = min(2, num_cnt_ports); MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); @@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; + int num_cnt_ports = dev->num_ports; int err = 0; int i; bool is_shared; MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || - vport_qcounters_supported(dev)) ? dev->num_ports : 1; + + /* + * In switchdev we need to allocate two ports, one that is used for + * the device Q_counters and it is essentially the real Q_counters of + * this device, while the other is used as a helper for PF to be able to + * query all other vports. + */ + if (is_mdev_switchdev_mode(dev->mdev)) + num_cnt_ports = min(2, num_cnt_ports); for (i = 0; i < num_cnt_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i); diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 3008632a6c20..1e419e080b53 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; - if (mlx5_ib_shared_ft_allowed(&dev->ib_dev)) - ft_attr.uid = MLX5_SHARED_RESOURCE_UID; ft_attr.prio = priority; ft_attr.max_fte = num_entries; ft_attr.flags = flags; @@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int steering_anchor_create_ft(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + + if (ft_prio->anchor.ft) + return 0; + + ns = mlx5_get_flow_namespace(dev->mdev, ns_type); + if (!ns) + return -EOPNOTSUPP; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.uid = MLX5_SHARED_RESOURCE_UID; + ft_attr.prio = 0; + ft_attr.max_fte = 2; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + ft_prio->anchor.ft = ft; + + return 0; +} + +static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.ft) { + mlx5_destroy_flow_table(ft_prio->anchor.ft); + ft_prio->anchor.ft = NULL; + } +} + +static int +steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *flow_group_in; + int err = 0; + + if (ft_prio->anchor.fg_drop) + return 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto out; + } + + ft_prio->anchor.fg_drop = fg; + +out: + kvfree(flow_group_in); + + return err; +} + +static void +steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.fg_drop) { + mlx5_destroy_flow_group(ft_prio->anchor.fg_drop); + ft_prio->anchor.fg_drop = NULL; + } +} + +static int +steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *flow_group_in; + int err = 0; + + if (ft_prio->anchor.fg_goto_table) + return 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto out; + } + ft_prio->anchor.fg_goto_table = fg; + +out: + kvfree(flow_group_in); + + return err; +} + +static void +steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.fg_goto_table) { + mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table); + ft_prio->anchor.fg_goto_table = NULL; + } +} + +static int +steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + + if (ft_prio->anchor.rule_drop) + return 0; + + flow_act.fg = ft_prio->anchor.fg_drop; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act, + NULL, 0); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ft_prio->anchor.rule_drop = handle; + + return 0; +} + +static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.rule_drop) { + mlx5_del_flow_rules(ft_prio->anchor.rule_drop); + ft_prio->anchor.rule_drop = NULL; + } +} + +static int +steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + + if (ft_prio->anchor.rule_goto_table) + return 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.fg = ft_prio->anchor.fg_goto_table; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = ft_prio->flow_table; + + handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act, + &dest, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ft_prio->anchor.rule_goto_table = handle; + + return 0; +} + +static void +steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.rule_goto_table) { + mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table); + ft_prio->anchor.rule_goto_table = NULL; + } +} + +static int steering_anchor_create_res(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + enum mlx5_flow_namespace_type ns_type) +{ + int err; + + err = steering_anchor_create_ft(dev, ft_prio, ns_type); + if (err) + return err; + + err = steering_anchor_create_fg_drop(ft_prio); + if (err) + goto destroy_ft; + + err = steering_anchor_create_fg_goto_table(ft_prio); + if (err) + goto destroy_fg_drop; + + err = steering_anchor_create_rule_drop(ft_prio); + if (err) + goto destroy_fg_goto_table; + + err = steering_anchor_create_rule_goto_table(ft_prio); + if (err) + goto destroy_rule_drop; + + return 0; + +destroy_rule_drop: + steering_anchor_destroy_rule_drop(ft_prio); +destroy_fg_goto_table: + steering_anchor_destroy_fg_goto_table(ft_prio); +destroy_fg_drop: + steering_anchor_destroy_fg_drop(ft_prio); +destroy_ft: + steering_anchor_destroy_ft(ft_prio); + + return err; +} + +static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio) +{ + steering_anchor_destroy_rule_goto_table(ft_prio); + steering_anchor_destroy_rule_drop(ft_prio); + steering_anchor_destroy_fg_goto_table(ft_prio); + steering_anchor_destroy_fg_drop(ft_prio); + steering_anchor_destroy_ft(ft_prio); +} + static int steering_anchor_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) @@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject, return -EBUSY; mutex_lock(&obj->dev->flow_db->lock); + if (!--obj->ft_prio->anchor.rule_goto_table_ref) + steering_anchor_destroy_rule_goto_table(obj->ft_prio); + put_flow_table(obj->dev, obj->ft_prio, true); mutex_unlock(&obj->dev->flow_db->lock); @@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject, return 0; } +static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio, + int count) +{ + while (count--) + mlx5_steering_anchor_destroy_res(&prio[count]); +} + +void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) +{ + fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS); + fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS); + fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS); + fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT); +} + static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, struct mlx5_ib_flow_matcher *obj) { @@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( return -ENOMEM; mutex_lock(&dev->flow_db->lock); + ft_prio = _get_flow_table(dev, priority, ns_type, 0); if (IS_ERR(ft_prio)) { - mutex_unlock(&dev->flow_db->lock); err = PTR_ERR(ft_prio); goto free_obj; } ft_prio->refcount++; - ft_id = mlx5_flow_table_id(ft_prio->flow_table); - mutex_unlock(&dev->flow_db->lock); + + if (!ft_prio->anchor.rule_goto_table_ref) { + err = steering_anchor_create_res(dev, ft_prio, ns_type); + if (err) + goto put_flow_table; + } + + ft_prio->anchor.rule_goto_table_ref++; + + ft_id = mlx5_flow_table_id(ft_prio->anchor.ft); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID, &ft_id, sizeof(ft_id)); if (err) - goto put_flow_table; + goto destroy_res; + + mutex_unlock(&dev->flow_db->lock); uobj->object = obj; obj->dev = dev; @@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( return 0; +destroy_res: + --ft_prio->anchor.rule_goto_table_ref; + mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: - mutex_lock(&dev->flow_db->lock); put_flow_table(dev, ft_prio, true); mutex_unlock(&dev->flow_db->lock); free_obj: diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h index ad320adaf321..b9734904f5f0 100644 --- a/drivers/infiniband/hw/mlx5/fs.h +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -10,6 +10,7 @@ #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); +void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev); #else static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) { @@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) mutex_init(&dev->flow_db->lock); return 0; } + +inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {} #endif + static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) { + /* When a steering anchor is created, a special flow table is also + * created for the user to reference. Since the user can reference it, + * the kernel cannot trust that when the user destroys the steering + * anchor, they no longer reference the flow table. + * + * To address this issue, when a user destroys a steering anchor, only + * the flow steering rule in the table is destroyed, but the table + * itself is kept to deal with the above scenario. The remaining + * resources are only removed when the RDMA device is destroyed, which + * is a safe assumption that all references are gone. + */ + mlx5_ib_fs_cleanup_anchor(dev); kfree(dev->flow_db); } #endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5d45de223c43..f0b394ed7452 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, + mlx5_ib_stage_delay_drop_init, + mlx5_ib_stage_delay_drop_cleanup), STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, mlx5_ib_restrack_init, NULL), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efa4dc6e7dee..2dfa6f49a6f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -237,8 +237,19 @@ enum { #define MLX5_IB_NUM_SNIFFER_FTS 2 #define MLX5_IB_NUM_EGRESS_FTS 1 #define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS + +struct mlx5_ib_anchor { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg_goto_table; + struct mlx5_flow_group *fg_drop; + struct mlx5_flow_handle *rule_goto_table; + struct mlx5_flow_handle *rule_drop; + unsigned int rule_goto_table_ref; +}; + struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; + struct mlx5_ib_anchor anchor; unsigned int refcount; }; @@ -1587,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) return 0; + if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 70ca8ffa9256..78b96bfb4e6a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (!mlx5_ib_lag_should_assign_affinity(dev) && + mlx5_lag_is_lacp_owner(dev->mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 20ff0c0c4605..6ca2a05b6a2a 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -113,8 +113,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - spin_unlock_irqrestore(&cq->cq_lock, flags); - if ((cq->notify == IB_CQ_NEXT_COMP) || (cq->notify == IB_CQ_SOLICITED && solicited)) { cq->notify = 0; @@ -122,6 +120,8 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } + spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index a38fab19bed1..cd59666158b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) pkt->mask = RXE_GRH_MASK; pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); + /* remove udp header */ + skb_pull(skb, sizeof(struct udphdr)); + rxe_rcv(skb); return 0; @@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) return -EIO; } + /* remove udp header */ + skb_pull(skb, sizeof(struct udphdr)); + rxe_rcv(skb); return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 61a2eb77d999..a0f206431cf8 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); + skb_queue_head_init(&qp->req_pkts); + skb_queue_head_init(&qp->resp_pkts); + atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); } @@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - skb_queue_head_init(&qp->req_pkts); - rxe_init_task(&qp->req.task, qp, rxe_requester); rxe_init_task(&qp->comp.task, qp, rxe_completer); @@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, } } - skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(&qp->resp.task, qp, rxe_responder); qp->resp.opcode = OPCODE_NONE; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1da044f6b7d4..ee68306555b9 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -489,8 +489,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_put(mw); rxe_get(mr); + rxe_put(mw); + mw = NULL; } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index f290cd49698e..92e1e7587af8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -657,9 +657,13 @@ static int isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + struct isert_np *isert_np = cma_id->context; ib_drain_qp(isert_conn->qp); + + mutex_lock(&isert_np->mutex); list_del_init(&isert_conn->node); + mutex_unlock(&isert_np->mutex); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); @@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = np->np_context; struct isert_conn *isert_conn, *n; + LIST_HEAD(drop_conn_list); if (isert_np->cm_id) rdma_destroy_id(isert_np->cm_id); @@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np) node) { isert_info("cleaning isert_conn %p state (%d)\n", isert_conn, isert_conn->state); - isert_connect_release(isert_conn); + list_move_tail(&isert_conn->node, &drop_conn_list); } } @@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np) node) { isert_info("cleaning isert_conn %p state (%d)\n", isert_conn, isert_conn->state); - isert_connect_release(isert_conn); + list_move_tail(&isert_conn->node, &drop_conn_list); } } mutex_unlock(&isert_np->mutex); + list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) { + list_del_init(&isert_conn->node); + isert_connect_release(isert_conn); + } + np->np_context = NULL; kfree(isert_np); } @@ -2560,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn) isert_put_unsol_pending_cmds(conn); isert_wait4cmds(conn); isert_wait4logout(isert_conn); - - queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsit_conn *conn) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index edb2e3a25880..cfb50bfe53c3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2040,6 +2040,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, return 0; } +/* The caller should do the cleanup in case of error */ static int create_cm(struct rtrs_clt_con *con) { struct rtrs_path *s = con->c.path; @@ -2062,14 +2063,14 @@ static int create_cm(struct rtrs_clt_con *con) err = rdma_set_reuseaddr(cm_id, 1); if (err != 0) { rtrs_err(s, "Set address reuse failed, err: %d\n", err); - goto destroy_cm; + return err; } err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr, (struct sockaddr *)&clt_path->s.dst_addr, RTRS_CONNECT_TIMEOUT_MS); if (err) { rtrs_err(s, "Failed to resolve address, err: %d\n", err); - goto destroy_cm; + return err; } /* * Combine connection status and session events. This is needed @@ -2084,29 +2085,15 @@ static int create_cm(struct rtrs_clt_con *con) if (err == 0) err = -ETIMEDOUT; /* Timedout or interrupted */ - goto errr; - } - if (con->cm_err < 0) { - err = con->cm_err; - goto errr; + return err; } - if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) { + if (con->cm_err < 0) + return con->cm_err; + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) /* Device removal */ - err = -ECONNABORTED; - goto errr; - } + return -ECONNABORTED; return 0; - -errr: - stop_cm(con); - mutex_lock(&con->con_mutex); - destroy_con_cq_qp(con); - mutex_unlock(&con->con_mutex); -destroy_cm: - destroy_cm(con); - - return err; } static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path) @@ -2334,7 +2321,7 @@ static void rtrs_clt_close_work(struct work_struct *work) static int init_conns(struct rtrs_clt_path *clt_path) { unsigned int cid; - int err; + int err, i; /* * On every new session connections increase reconnect counter @@ -2350,10 +2337,8 @@ static int init_conns(struct rtrs_clt_path *clt_path) goto destroy; err = create_cm(to_clt_con(clt_path->s.con[cid])); - if (err) { - destroy_con(to_clt_con(clt_path->s.con[cid])); + if (err) goto destroy; - } } err = alloc_path_reqs(clt_path); if (err) @@ -2364,15 +2349,21 @@ static int init_conns(struct rtrs_clt_path *clt_path) return 0; destroy: - while (cid--) { - struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]); + /* Make sure we do the cleanup in the order they are created */ + for (i = 0; i <= cid; i++) { + struct rtrs_clt_con *con; - stop_cm(con); + if (!clt_path->s.con[i]) + break; - mutex_lock(&con->con_mutex); - destroy_con_cq_qp(con); - mutex_unlock(&con->con_mutex); - destroy_cm(con); + con = to_clt_con(clt_path->s.con[i]); + if (con->c.cm_id) { + stop_cm(con); + mutex_lock(&con->con_mutex); + destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); + destroy_cm(con); + } destroy_con(con); } /* diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 4bf9d868cc52..3696f367ff51 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask, goto err; iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir); - if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) + if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) { + kfree(iu->buf); goto err; + } iu->cqe.done = done; iu->size = size; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 9e0c69958587..acffed750e3e 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs cmd root_lock). - * - must take shrinker_mutex without holding cmd->root_lock + * - must take shrinker_rwsem without holding cmd->root_lock */ new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_MAX_CONCURRENT_LOCKS); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index cc77cf3d4109..7d5c9c582ed2 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1168,13 +1168,10 @@ static int do_resume(struct dm_ioctl *param) /* Do we need to load a new map ? */ if (new_map) { sector_t old_size, new_size; - int srcu_idx; /* Suspend if it isn't already suspended */ - old_map = dm_get_live_table(md, &srcu_idx); - if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map) + if (param->flags & DM_SKIP_LOCKFS_FLAG) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; - dm_put_live_table(md, srcu_idx); if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; if (!dm_suspended_md(md)) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9f5cb52c5763..9dd0409848ab 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1756,13 +1756,15 @@ int dm_thin_remove_range(struct dm_thin_device *td, int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { - int r; + int r = -EINVAL; uint32_t ref_count; down_read(&pmd->root_lock); - r = dm_sm_get_count(pmd->data_sm, b, &ref_count); - if (!r) - *result = (ref_count > 1); + if (!pmd->fail_io) { + r = dm_sm_get_count(pmd->data_sm, b, &ref_count); + if (!r) + *result = (ref_count > 1); + } up_read(&pmd->root_lock); return r; @@ -1770,10 +1772,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) { - int r = 0; + int r = -EINVAL; pmd_write_lock(pmd); - r = dm_sm_inc_blocks(pmd->data_sm, b, e); + if (!pmd->fail_io) + r = dm_sm_inc_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; @@ -1781,10 +1784,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) { - int r = 0; + int r = -EINVAL; pmd_write_lock(pmd); - r = dm_sm_dec_blocks(pmd->data_sm, b, e); + if (!pmd->fail_io) + r = dm_sm_dec_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; @@ -1887,7 +1891,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) * Replacement block manager (new_bm) is created and old_bm destroyed outside of * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of * shrinker associated with the block manager's bufio client vs pmd root_lock). - * - must take shrinker_mutex without holding pmd->root_lock + * - must take shrinker_rwsem without holding pmd->root_lock */ new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, THIN_MAX_CONCURRENT_LOCKS); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b13c949bd72..39410bf186cf 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -401,8 +401,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da sector_t s = block_to_sectors(tc->pool, data_b); sector_t len = block_to_sectors(tc->pool, data_e - data_b); - return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT, - &op->bio); + return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio); } static void end_discard(struct discard_op *op, int r) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3b694ba3a106..fffb0cbe2ac8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1172,7 +1172,8 @@ static inline sector_t max_io_len_target_boundary(struct dm_target *ti, } static sector_t __max_io_len(struct dm_target *ti, sector_t sector, - unsigned int max_granularity) + unsigned int max_granularity, + unsigned int max_sectors) { sector_t target_offset = dm_target_offset(ti, sector); sector_t len = max_io_len_target_boundary(ti, target_offset); @@ -1186,13 +1187,13 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector, if (!max_granularity) return len; return min_t(sector_t, len, - min(queue_max_sectors(ti->table->md->queue), + min(max_sectors ? : queue_max_sectors(ti->table->md->queue), blk_chunk_sectors_left(target_offset, max_granularity))); } static inline sector_t max_io_len(struct dm_target *ti, sector_t sector) { - return __max_io_len(ti, sector, ti->max_io_len); + return __max_io_len(ti, sector, ti->max_io_len, 0); } int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) @@ -1581,12 +1582,13 @@ static void __send_empty_flush(struct clone_info *ci) static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, unsigned int num_bios, - unsigned int max_granularity) + unsigned int max_granularity, + unsigned int max_sectors) { unsigned int len, bios; len = min_t(sector_t, ci->sector_count, - __max_io_len(ti, ci->sector, max_granularity)); + __max_io_len(ti, ci->sector, max_granularity, max_sectors)); atomic_add(num_bios, &ci->io->io_count); bios = __send_duplicate_bios(ci, ti, num_bios, &len); @@ -1623,23 +1625,27 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, { unsigned int num_bios = 0; unsigned int max_granularity = 0; + unsigned int max_sectors = 0; struct queue_limits *limits = dm_get_queue_limits(ti->table->md); switch (bio_op(ci->bio)) { case REQ_OP_DISCARD: num_bios = ti->num_discard_bios; + max_sectors = limits->max_discard_sectors; if (ti->max_discard_granularity) - max_granularity = limits->max_discard_sectors; + max_granularity = max_sectors; break; case REQ_OP_SECURE_ERASE: num_bios = ti->num_secure_erase_bios; + max_sectors = limits->max_secure_erase_sectors; if (ti->max_secure_erase_granularity) - max_granularity = limits->max_secure_erase_sectors; + max_granularity = max_sectors; break; case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; + max_sectors = limits->max_write_zeroes_sectors; if (ti->max_write_zeroes_granularity) - max_granularity = limits->max_write_zeroes_sectors; + max_granularity = max_sectors; break; default: break; @@ -1654,7 +1660,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, if (unlikely(!num_bios)) return BLK_STS_NOTSUPP; - __send_changing_extent_only(ci, ti, num_bios, max_granularity); + __send_changing_extent_only(ci, ti, num_bios, + max_granularity, max_sectors); return BLK_STS_OK; } @@ -2808,6 +2815,10 @@ retry: } map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + if (!map) { + /* avoid deadlock with fs/namespace.c:do_mount() */ + suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; + } r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); if (r) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index bc6950a5740f..9293b058ab99 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -817,26 +817,15 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) dev_dbg(fe->dvb->device, "%s:\n", __func__); - mutex_lock(&fe->remove_mutex); - if (fe->exit != DVB_FE_DEVICE_REMOVED) fe->exit = DVB_FE_NORMAL_EXIT; mb(); - if (!fepriv->thread) { - mutex_unlock(&fe->remove_mutex); + if (!fepriv->thread) return; - } kthread_stop(fepriv->thread); - mutex_unlock(&fe->remove_mutex); - - if (fepriv->dvbdev->users < -1) { - wait_event(fepriv->dvbdev->wait_queue, - fepriv->dvbdev->users == -1); - } - sema_init(&fepriv->sem, 1); fepriv->state = FESTATE_IDLE; @@ -2780,13 +2769,9 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) struct dvb_adapter *adapter = fe->dvb; int ret; - mutex_lock(&fe->remove_mutex); - dev_dbg(fe->dvb->device, "%s:\n", __func__); - if (fe->exit == DVB_FE_DEVICE_REMOVED) { - ret = -ENODEV; - goto err_remove_mutex; - } + if (fe->exit == DVB_FE_DEVICE_REMOVED) + return -ENODEV; if (adapter->mfe_shared == 2) { mutex_lock(&adapter->mfe_lock); @@ -2794,8 +2779,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_dvbdev && !adapter->mfe_dvbdev->writers) { mutex_unlock(&adapter->mfe_lock); - ret = -EBUSY; - goto err_remove_mutex; + return -EBUSY; } adapter->mfe_dvbdev = dvbdev; } @@ -2818,10 +2802,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) while (mferetry-- && (mfedev->users != -1 || mfepriv->thread)) { if (msleep_interruptible(500)) { - if (signal_pending(current)) { - ret = -EINTR; - goto err_remove_mutex; - } + if (signal_pending(current)) + return -EINTR; } } @@ -2833,8 +2815,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (mfedev->users != -1 || mfepriv->thread) { mutex_unlock(&adapter->mfe_lock); - ret = -EBUSY; - goto err_remove_mutex; + return -EBUSY; } adapter->mfe_dvbdev = dvbdev; } @@ -2893,8 +2874,6 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); - - mutex_unlock(&fe->remove_mutex); return ret; err3: @@ -2916,9 +2895,6 @@ err1: err0: if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); - -err_remove_mutex: - mutex_unlock(&fe->remove_mutex); return ret; } @@ -2929,8 +2905,6 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) struct dvb_frontend_private *fepriv = fe->frontend_priv; int ret; - mutex_lock(&fe->remove_mutex); - dev_dbg(fe->dvb->device, "%s:\n", __func__); if ((file->f_flags & O_ACCMODE) != O_RDONLY) { @@ -2952,18 +2926,10 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) } mutex_unlock(&fe->dvb->mdev_lock); #endif + if (fe->exit != DVB_FE_NO_EXIT) + wake_up(&dvbdev->wait_queue); if (fe->ops.ts_bus_ctrl) fe->ops.ts_bus_ctrl(fe, 0); - - if (fe->exit != DVB_FE_NO_EXIT) { - mutex_unlock(&fe->remove_mutex); - wake_up(&dvbdev->wait_queue); - } else { - mutex_unlock(&fe->remove_mutex); - } - - } else { - mutex_unlock(&fe->remove_mutex); } dvb_frontend_put(fe); @@ -3064,7 +3030,6 @@ int dvb_register_frontend(struct dvb_adapter *dvb, fepriv = fe->frontend_priv; kref_init(&fe->refcount); - mutex_init(&fe->remove_mutex); /* * After initialization, there need to be two references: one diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index cfb3faeaa5bf..d172a3e9736c 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1263,7 +1263,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) /* Consider the standard Ethernet overhead of 8 octets preamble+SFD, * 4 octets FCS, 12 octets IFG. */ - needed_bit_time_ps = (maxlen + 24) * picos_per_byte; + needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte; dev_dbg(ocelot->dev, "port %d: max frame size %d needs %llu ps at speed %d\n", diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 637d162bbcfa..1e7a6f1d4223 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14294,11 +14294,16 @@ static void bnx2x_io_resume(struct pci_dev *pdev) bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK; - if (netif_running(dev)) - bnx2x_nic_load(bp, LOAD_NORMAL); + if (netif_running(dev)) { + if (bnx2x_nic_load(bp, LOAD_NORMAL)) { + netdev_err(bp->dev, "Error during driver initialization, try unloading/reloading the driver\n"); + goto done; + } + } netif_device_attach(dev); +done: rtnl_unlock(); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 83c27bbbc6ed..126007ab70f6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -181,8 +181,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) int bw_sum = 0; u8 bw; - prio_top = netdev_get_prio_tc_map(ndev, tc_nums - 1); - prio_next = netdev_get_prio_tc_map(ndev, tc_nums - 2); + prio_top = tc_nums - 1; + prio_next = tc_nums - 2; /* Support highest prio and second prio tc in cbs mode */ if (tc != prio_top && tc != prio_next) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 9abaff1f2aff..39d0fe76a38f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -525,7 +525,7 @@ void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask); +void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2de4baff4c20..4a66873882d1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -359,21 +359,18 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) } /** - * iavf_irq_enable_queues - Enable interrupt for specified queues + * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure - * @mask: bitmap of queues to enable **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask) +void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; for (i = 1; i < adapter->num_msix_vectors; i++) { - if (mask & BIT(i - 1)) { - wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), - IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); - } + wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), + IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); } } @@ -387,7 +384,7 @@ void iavf_irq_enable(struct iavf_adapter *adapter, bool flush) struct iavf_hw *hw = &adapter->hw; iavf_misc_irq_enable(adapter); - iavf_irq_enable_queues(adapter, ~0); + iavf_irq_enable_queues(adapter); if (flush) iavf_flush(hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h index bf793332fc9d..a19e88898a0b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_register.h +++ b/drivers/net/ethernet/intel/iavf/iavf_register.h @@ -40,7 +40,7 @@ #define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3 #define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT) -#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */ +#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */ #define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0 #define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2 diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index bd0ed155e11b..75c9de675f20 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -96,12 +96,7 @@ static void ice_gnss_read(struct kthread_work *work) int err = 0; pf = gnss->back; - if (!pf) { - err = -EFAULT; - goto exit; - } - - if (!test_bit(ICE_FLAG_GNSS, pf->flags)) + if (!pf || !test_bit(ICE_FLAG_GNSS, pf->flags)) return; hw = &pf->hw; @@ -159,7 +154,6 @@ free_buf: free_page((unsigned long)buf); requeue: kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, delay); -exit: if (err) dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err); } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1f7c8edc22f..42c318ceff61 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4802,9 +4802,13 @@ err_init_pf: static void ice_deinit_dev(struct ice_pf *pf) { ice_free_irq_msix_misc(pf); - ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + + /* Service task is already stopped, so call reset directly. */ + ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pf->pdev); + ice_clear_interrupt_scheme(pf); } static void ice_init_features(struct ice_pf *pf) @@ -5094,10 +5098,6 @@ int ice_load(struct ice_pf *pf) struct ice_vsi *vsi; int err; - err = ice_reset(&pf->hw, ICE_RESET_PFR); - if (err) - return err; - err = ice_init_dev(pf); if (err) return err; @@ -5354,12 +5354,6 @@ static void ice_remove(struct pci_dev *pdev) ice_setup_mc_magic_wake(pf); ice_set_wake(pf); - /* Issue a PFR as part of the prescribed driver unload flow. Do not - * do it via ice_schedule_reset() since there is no need to rebuild - * and the service task is already stopped. - */ - ice_reset(&pf->hw, ICE_RESET_PFR); - pci_wait_for_pending_transaction(pdev); pci_disable_device(pdev); } @@ -7056,6 +7050,10 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); + if (ice_is_xdp_ena_vsi(vsi)) + ice_for_each_xdp_txq(vsi, i) + ice_clean_tx_ring(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7d60da1b7bf4..319ed601eaa1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -822,6 +822,8 @@ static int igb_set_eeprom(struct net_device *netdev, */ ret_val = hw->nvm.ops.read(hw, last_word, 1, &eeprom_buff[last_word - first_word]); + if (ret_val) + goto out; } /* Device's eeprom is always little-endian, word addressable */ @@ -841,6 +843,7 @@ static int igb_set_eeprom(struct net_device *netdev, hw->nvm.ops.update(hw); igb_set_fw_version(adapter); +out: kfree(eeprom_buff); return ret_val; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 58872a4c2540..bb3db387d49c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6947,6 +6947,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) struct e1000_hw *hw = &adapter->hw; struct ptp_clock_event event; struct timespec64 ts; + unsigned long flags; if (pin < 0 || pin >= IGB_N_SDP) return; @@ -6954,9 +6955,12 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) if (hw->mac.type == e1000_82580 || hw->mac.type == e1000_i354 || hw->mac.type == e1000_i350) { - s64 ns = rd32(auxstmpl); + u64 ns = rd32(auxstmpl); - ns += ((s64)(rd32(auxstmph) & 0xFF)) << 32; + ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32; + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->tc, ns); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); ts = ns_to_timespec64(ns); } else { ts.tv_nsec = rd32(auxstmpl); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1c4676882082..fa764190f270 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); + /* Zero out the buffer ring */ + memset(tx_ring->tx_buffer_info, 0, + sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) */ void igc_free_tx_resources(struct igc_ring *tx_ring) { - igc_clean_tx_ring(tx_ring); + igc_disable_tx_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -6723,6 +6730,9 @@ static void igc_remove(struct pci_dev *pdev) igc_ptp_stop(adapter); + pci_disable_ptm(pdev); + pci_clear_master(pdev); + set_bit(__IGC_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index e1853da280f9..43eb6e871351 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -981,6 +981,9 @@ int octep_device_setup(struct octep_device *oct) oct->mmio[i].hw_addr = ioremap(pci_resource_start(oct->pdev, i * 2), pci_resource_len(oct->pdev, i * 2)); + if (!oct->mmio[i].hw_addr) + goto unmap_prev; + oct->mmio[i].mapped = 1; } @@ -1015,7 +1018,9 @@ int octep_device_setup(struct octep_device *oct) return 0; unsupported_dev: - for (i = 0; i < OCTEP_MMIO_REGIONS; i++) + i = OCTEP_MMIO_REGIONS; +unmap_prev: + while (i--) iounmap(oct->mmio[i].hw_addr); kfree(oct->conf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4ad707e758b9..f01d057ad025 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1878,7 +1878,8 @@ static int nix_check_txschq_alloc_req(struct rvu *rvu, int lvl, u16 pcifunc, free_cnt = rvu_rsrc_free_count(&txsch->schq); } - if (free_cnt < req_schq || req_schq > MAX_TXSCHQ_PER_FUNC) + if (free_cnt < req_schq || req->schq[lvl] > MAX_TXSCHQ_PER_FUNC || + req->schq_contig[lvl] > MAX_TXSCHQ_PER_FUNC) return NIX_AF_ERR_TLX_ALLOC_FAIL; /* If contiguous queues are needed, check for availability */ @@ -4080,10 +4081,6 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs) { - /* CN10k supports 72KB FIFO size and max packet size of 64k */ - if (rvu->hw->lbk_bufsize == 0x12000) - return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16; - return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */ } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 51209119f0f2..9f11c1e40737 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1164,10 +1164,8 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; - bool promisc; table = rvu->hw->table; - promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1179,16 +1177,13 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - if (promisc) - goto done; - - /* If all rules are deleted and not already in promisc mode; disable cam */ + /* If all rules are deleted, disable cam */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added and not already in promisc mode; enable cam */ + /* If rule got added, enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1443,7 +1438,6 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1466,17 +1460,8 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = false; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d, cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } @@ -1494,7 +1479,6 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1517,17 +1501,8 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = true; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..229520405d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return pci_num_vf(dev->pdev) ? true : false; } -static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) -{ - /* LACP owner conditions: - * 1) Function is physical. - * 2) LAG is supported by FW. - * 3) LAG is managed by driver (currently the only option). - */ - return MLX5_CAP_GEN(dev, vport_group_manager) && - (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && - MLX5_CAP_GEN(dev, lag_master); -} - int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index aace87139cea..fa6d6202b129 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -347,17 +347,6 @@ out: return -ENOMEM; } -static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) -{ - struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; - - gq->ring_size = TS_RING_SIZE; - gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, - sizeof(struct rswitch_ts_desc) * - (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); - return !gq->ts_ring ? -ENOMEM : 0; -} - static void rswitch_desc_set_dptr(struct rswitch_desc *desc, dma_addr_t addr) { desc->dptrl = cpu_to_le32(lower_32_bits(addr)); @@ -533,6 +522,28 @@ static void rswitch_gwca_linkfix_free(struct rswitch_private *priv) gwca->linkfix_table = NULL; } +static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) +{ + struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; + struct rswitch_ts_desc *desc; + + gq->ring_size = TS_RING_SIZE; + gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct rswitch_ts_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); + + if (!gq->ts_ring) + return -ENOMEM; + + rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); + desc = &gq->ts_ring[gq->ring_size]; + desc->desc.die_dt = DT_LINKFIX; + rswitch_desc_set_dptr(&desc->desc, gq->ring_dma); + INIT_LIST_HEAD(&priv->gwca.ts_info_list); + + return 0; +} + static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv) { struct rswitch_gwca_queue *gq; @@ -1780,9 +1791,6 @@ static int rswitch_init(struct rswitch_private *priv) if (err < 0) goto err_ts_queue_alloc; - rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); - INIT_LIST_HEAD(&priv->gwca.ts_info_list); - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { err = rswitch_device_alloc(priv, i); if (err < 0) { diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index fcea3ea809d7..41b33a75333c 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -301,6 +301,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -322,6 +323,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index 06ed74994e36..1776f7f8a7a9 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -302,6 +302,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -323,6 +324,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_siena_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 52cab9de05f2..87510951f4e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3873,7 +3873,6 @@ irq_error: stmmac_hw_teardown(dev); init_error: - free_dma_desc_resources(priv, &priv->dma_conf); phylink_disconnect_phy(priv->phylink); init_phy_error: pm_runtime_put(priv->device); @@ -3891,6 +3890,9 @@ static int stmmac_open(struct net_device *dev) return PTR_ERR(dma_conf); ret = __stmmac_open(dev, dma_conf); + if (ret) + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); return ret; } @@ -5633,12 +5635,15 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) stmmac_release(dev); ret = __stmmac_open(dev, dma_conf); - kfree(dma_conf); if (ret) { + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); netdev_err(priv->dev, "failed reopening the interface after MTU change\n"); return ret; } + kfree(dma_conf); + stmmac_set_rx_mode(dev); } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 11cbcd9e2c72..bebcfd5e6b57 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2068,7 +2068,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* Initialize the Serdes PHY for the port */ ret = am65_cpsw_init_serdes_phy(dev, port_np, port); if (ret) - return ret; + goto of_node_put; port->slave.mac_only = of_property_read_bool(port_np, "ti,mac-only"); diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c index 71712ea25403..d5b05e803219 100644 --- a/drivers/net/ipvlan/ipvlan_l3s.c +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -102,6 +102,10 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, skb->dev = addr->master->dev; skb->skb_iif = skb->dev->ifindex; +#if IS_ENABLED(CONFIG_IPV6) + if (addr->atype == IPVL_IPV6) + IP6CB(skb)->iif = skb->dev->ifindex; +#endif len = skb->len + ETH_HLEN; ipvlan_count_rx(addr->master, len, true, false); out: diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3427993f94f7..984dfa5d6c11 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3997,17 +3997,15 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) return -ENOMEM; secy->tx_sc.stats = netdev_alloc_pcpu_stats(struct pcpu_tx_sc_stats); - if (!secy->tx_sc.stats) { - free_percpu(macsec->stats); + if (!secy->tx_sc.stats) return -ENOMEM; - } secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); - if (!secy->tx_sc.md_dst) { - free_percpu(secy->tx_sc.stats); - free_percpu(macsec->stats); + if (!secy->tx_sc.md_dst) + /* macsec and secy percpu stats will be freed when unregistering + * net_device in macsec_free_netdev() + */ return -ENOMEM; - } if (sci == MACSEC_UNDEF_SCI) sci = dev_to_sci(dev, MACSEC_PORT_ES); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b4831110003c..5efdeb59f4b2 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -188,6 +188,7 @@ static int phylink_interface_max_speed(phy_interface_t interface) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_GMII: return SPEED_1000; @@ -204,7 +205,6 @@ static int phylink_interface_max_speed(phy_interface_t interface) case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_10GKR: case PHY_INTERFACE_MODE_USXGMII: - case PHY_INTERFACE_MODE_QUSGMII: return SPEED_10000; case PHY_INTERFACE_MODE_25GBASER: @@ -3299,6 +3299,41 @@ void phylink_decode_usxgmii_word(struct phylink_link_state *state, EXPORT_SYMBOL_GPL(phylink_decode_usxgmii_word); /** + * phylink_decode_usgmii_word() - decode the USGMII word from a MAC PCS + * @state: a pointer to a struct phylink_link_state. + * @lpa: a 16 bit value which stores the USGMII auto-negotiation word + * + * Helper for MAC PCS supporting the USGMII protocol and the auto-negotiation + * code word. Decode the USGMII code word and populate the corresponding fields + * (speed, duplex) into the phylink_link_state structure. The structure for this + * word is the same as the USXGMII word, except it only supports speeds up to + * 1Gbps. + */ +static void phylink_decode_usgmii_word(struct phylink_link_state *state, + uint16_t lpa) +{ + switch (lpa & MDIO_USXGMII_SPD_MASK) { + case MDIO_USXGMII_10: + state->speed = SPEED_10; + break; + case MDIO_USXGMII_100: + state->speed = SPEED_100; + break; + case MDIO_USXGMII_1000: + state->speed = SPEED_1000; + break; + default: + state->link = false; + return; + } + + if (lpa & MDIO_USXGMII_FULL_DUPLEX) + state->duplex = DUPLEX_FULL; + else + state->duplex = DUPLEX_HALF; +} + +/** * phylink_mii_c22_pcs_decode_state() - Decode MAC PCS state from MII registers * @state: a pointer to a &struct phylink_link_state. * @bmsr: The value of the %MII_BMSR register @@ -3335,9 +3370,11 @@ void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: phylink_decode_sgmii_word(state, lpa); break; + case PHY_INTERFACE_MODE_QUSGMII: + phylink_decode_usgmii_word(state, lpa); + break; default: state->link = false; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f1865d047971..2e7c7b0cdc54 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1220,7 +1220,9 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9080, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9083, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9091, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */ + {QMI_QUIRK_SET_DTR(0x05c6, 0x90db, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index d62a904d2e42..56326f38fe8a 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -384,6 +384,9 @@ static int lapbeth_new_device(struct net_device *dev) ASSERT_RTNL(); + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", NET_NAME_UNKNOWN, lapbeth_setup); if (!ndev) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 23266d0c9ce4..9a20468345e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2692,7 +2692,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, lq_sta = mvm_sta; - spin_lock(&lq_sta->pers.lock); + spin_lock_bh(&lq_sta->pers.lock); iwl_mvm_hwrate_to_tx_rate_v1(lq_sta->last_rate_n_flags, info->band, &info->control.rates[0]); info->control.rates[0].count = 1; @@ -2707,7 +2707,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, iwl_mvm_hwrate_to_tx_rate_v1(last_ucode_rate, info->band, &txrc->reported_rate); } - spin_unlock(&lq_sta->pers.lock); + spin_unlock_bh(&lq_sta->pers.lock); } static void *rs_drv_alloc_sta(void *mvm_rate, struct ieee80211_sta *sta, @@ -3264,11 +3264,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, /* If it's locked we are in middle of init flow * just wait for next tx status to update the lq_sta data */ - if (!spin_trylock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock)) + if (!spin_trylock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock)) return; __iwl_mvm_rs_tx_status(mvm, sta, tid, info, ndp); - spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); } #ifdef CONFIG_MAC80211_DEBUGFS @@ -4117,9 +4117,9 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, } else { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - spin_lock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_lock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); rs_drv_rate_init(mvm, sta, band); - spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); } } diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 2e01960f1aeb..7feb643f1370 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -811,6 +811,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs) if (!fragment->target) { pr_err("symbols in overlay, but not in live tree\n"); ret = -EINVAL; + of_node_put(node); goto err_out; } diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index bc32662c6bb7..2d93d0c4f10d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -489,7 +489,10 @@ struct hv_pcibus_device { struct fwnode_handle *fwnode; /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; + + struct mutex state_lock; enum hv_pcibus_state state; + struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -545,19 +548,10 @@ struct hv_dr_state { struct hv_pcidev_description func[]; }; -enum hv_pcichild_state { - hv_pcichild_init = 0, - hv_pcichild_requirements, - hv_pcichild_resourced, - hv_pcichild_ejecting, - hv_pcichild_maximum -}; - struct hv_pci_dev { /* List protected by pci_rescan_remove_lock */ struct list_head list_entry; refcount_t refs; - enum hv_pcichild_state state; struct pci_slot *pci_slot; struct hv_pcidev_description desc; bool reported_missing; @@ -635,6 +629,11 @@ static void hv_arch_irq_unmask(struct irq_data *data) pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); int_desc = data->chip_data; + if (!int_desc) { + dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n", + __func__, data->irq); + return; + } local_irq_save(flags); @@ -2004,12 +2003,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) hv_pci_onchannelcallback(hbus); spin_unlock_irqrestore(&channel->sched_lock, flags); - if (hpdev->state == hv_pcichild_ejecting) { - dev_err_once(&hbus->hdev->device, - "the device is being ejected\n"); - goto enable_tasklet; - } - udelay(100); } @@ -2615,6 +2608,8 @@ static void pci_devices_present_work(struct work_struct *work) if (!dr) return; + mutex_lock(&hbus->state_lock); + /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); list_for_each_entry(hpdev, &hbus->children, list_entry) { @@ -2696,6 +2691,8 @@ static void pci_devices_present_work(struct work_struct *work) break; } + mutex_unlock(&hbus->state_lock); + kfree(dr); } @@ -2844,7 +2841,7 @@ static void hv_eject_device_work(struct work_struct *work) hpdev = container_of(work, struct hv_pci_dev, wrk); hbus = hpdev->hbus; - WARN_ON(hpdev->state != hv_pcichild_ejecting); + mutex_lock(&hbus->state_lock); /* * Ejection can come before or after the PCI bus has been set up, so @@ -2882,6 +2879,8 @@ static void hv_eject_device_work(struct work_struct *work) put_pcichild(hpdev); put_pcichild(hpdev); /* hpdev has been freed. Do not use it any more. */ + + mutex_unlock(&hbus->state_lock); } /** @@ -2902,7 +2901,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) return; } - hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); queue_work(hbus->wq, &hpdev->wrk); @@ -3331,8 +3329,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev) struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; + bool retry = true; int ret; +enter_d0_retry: /* * Tell the host that the bus is ready to use, and moved into the * powered-on state. This includes telling the host which region @@ -3359,6 +3359,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev) if (ret) goto exit; + /* + * In certain case (Kdump) the pci device of interest was + * not cleanly shut down and resource is still held on host + * side, the host could return invalid device status. + * We need to explicitly request host to release the resource + * and try to enter D0 again. + */ + if (comp_pkt.completion_status < 0 && retry) { + retry = false; + + dev_err(&hdev->device, "Retrying D0 Entry\n"); + + /* + * Hv_pci_bus_exit() calls hv_send_resource_released() + * to free up resources of its child devices. + * In the kdump kernel we need to set the + * wslot_res_allocated to 255 so it scans all child + * devices to release resources allocated in the + * normal kernel before panic happened. + */ + hbus->wslot_res_allocated = 255; + + ret = hv_pci_bus_exit(hdev, true); + + if (ret == 0) { + kfree(pkt); + goto enter_d0_retry; + } + dev_err(&hdev->device, + "Retrying D0 failed with ret %d\n", ret); + } + if (comp_pkt.completion_status < 0) { dev_err(&hdev->device, "PCI Pass-through VSP failed D0 Entry with status %x\n", @@ -3401,6 +3433,24 @@ static int hv_pci_query_relations(struct hv_device *hdev) if (!ret) ret = wait_for_response(hdev, &comp); + /* + * In the case of fast device addition/removal, it's possible that + * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we + * already got a PCI_BUS_RELATIONS* message from the host and the + * channel callback already scheduled a work to hbus->wq, which can be + * running pci_devices_present_work() -> survey_child_resources() -> + * complete(&hbus->survey_event), even after hv_pci_query_relations() + * exits and the stack variable 'comp' is no longer valid; as a result, + * a hang or a page fault may happen when the complete() calls + * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from + * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is + * -ENODEV, there can't be any more work item scheduled to hbus->wq + * after the flush_workqueue(): see vmbus_onoffer_rescind() -> + * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() -> + * channel->rescind = true. + */ + flush_workqueue(hbus->wq); + return ret; } @@ -3586,7 +3636,6 @@ static int hv_pci_probe(struct hv_device *hdev, struct hv_pcibus_device *hbus; u16 dom_req, dom; char *name; - bool enter_d0_retry = true; int ret; bridge = devm_pci_alloc_host_bridge(&hdev->device, 0); @@ -3598,6 +3647,7 @@ static int hv_pci_probe(struct hv_device *hdev, return -ENOMEM; hbus->bridge = bridge; + mutex_init(&hbus->state_lock); hbus->state = hv_pcibus_init; hbus->wslot_res_allocated = -1; @@ -3703,49 +3753,15 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_fwnode; -retry: ret = hv_pci_query_relations(hdev); if (ret) goto free_irq_domain; - ret = hv_pci_enter_d0(hdev); - /* - * In certain case (Kdump) the pci device of interest was - * not cleanly shut down and resource is still held on host - * side, the host could return invalid device status. - * We need to explicitly request host to release the resource - * and try to enter D0 again. - * Since the hv_pci_bus_exit() call releases structures - * of all its child devices, we need to start the retry from - * hv_pci_query_relations() call, requesting host to send - * the synchronous child device relations message before this - * information is needed in hv_send_resources_allocated() - * call later. - */ - if (ret == -EPROTO && enter_d0_retry) { - enter_d0_retry = false; - - dev_err(&hdev->device, "Retrying D0 Entry\n"); - - /* - * Hv_pci_bus_exit() calls hv_send_resources_released() - * to free up resources of its child devices. - * In the kdump kernel we need to set the - * wslot_res_allocated to 255 so it scans all child - * devices to release resources allocated in the - * normal kernel before panic happened. - */ - hbus->wslot_res_allocated = 255; - ret = hv_pci_bus_exit(hdev, true); - - if (ret == 0) - goto retry; + mutex_lock(&hbus->state_lock); - dev_err(&hdev->device, - "Retrying D0 failed with ret %d\n", ret); - } + ret = hv_pci_enter_d0(hdev); if (ret) - goto free_irq_domain; + goto release_state_lock; ret = hv_pci_allocate_bridge_windows(hbus); if (ret) @@ -3763,12 +3779,15 @@ retry: if (ret) goto free_windows; + mutex_unlock(&hbus->state_lock); return 0; free_windows: hv_pci_free_bridge_windows(hbus); exit_d0: (void) hv_pci_bus_exit(hdev, true); +release_state_lock: + mutex_unlock(&hbus->state_lock); free_irq_domain: irq_domain_remove(hbus->irq_domain); free_fwnode: @@ -4018,20 +4037,26 @@ static int hv_pci_resume(struct hv_device *hdev) if (ret) goto out; + mutex_lock(&hbus->state_lock); + ret = hv_pci_enter_d0(hdev); if (ret) - goto out; + goto release_state_lock; ret = hv_send_resources_allocated(hdev); if (ret) - goto out; + goto release_state_lock; prepopulate_bars(hbus); hv_pci_restore_msi_state(hbus); hbus->state = hv_pcibus_installed; + mutex_unlock(&hbus->state_lock); return 0; + +release_state_lock: + mutex_unlock(&hbus->state_lock); out: vmbus_close(hdev->channel); return ret; diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index b0a58c62b1e2..f3b280af0773 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1057,21 +1057,21 @@ static const struct rpmh_vreg_init_data pm8450_vreg_data[] = { }; static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { - RPMH_VREG("ldo1", "ldo%s1", &pmic5_pldo, "vdd-l1-l4-l10"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1-l4-l10"), RPMH_VREG("ldo2", "ldo%s2", &pmic5_pldo, "vdd-l2-l13-l14"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), - RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l1-l4-l10"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), + RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo515, "vdd-l1-l4-l10"), RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l16"), - RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo_lv, "vdd-l6-l7"), - RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l6-l7"), - RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"), + RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l6-l7"), + RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo, "vdd-l6-l7"), + RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), - RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"), - RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"), + RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo515, "vdd-l1-l4-l10"), + RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo515, "vdd-l11"), RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), - RPMH_VREG("ldo15", "ldo%s15", &pmic5_pldo, "vdd-l15"), + RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo515, "vdd-l15"), RPMH_VREG("ldo16", "ldo%s16", &pmic5_pldo, "vdd-l5-l16"), RPMH_VREG("ldo17", "ldo%s17", &pmic5_pldo, "vdd-l17"), RPMH_VREG("bob1", "bob%s1", &pmic5_bob, "vdd-bob1"), @@ -1086,9 +1086,9 @@ static const struct rpmh_vreg_init_data pm8550vs_vreg_data[] = { RPMH_VREG("smps4", "smp%s4", &pmic5_ftsmps525_lv, "vdd-s4"), RPMH_VREG("smps5", "smp%s5", &pmic5_ftsmps525_lv, "vdd-s5"), RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_mv, "vdd-s6"), - RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"), - RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), {} }; @@ -1101,9 +1101,9 @@ static const struct rpmh_vreg_init_data pm8550ve_vreg_data[] = { RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_lv, "vdd-s6"), RPMH_VREG("smps7", "smp%s7", &pmic5_ftsmps525_lv, "vdd-s7"), RPMH_VREG("smps8", "smp%s8", &pmic5_ftsmps525_lv, "vdd-s8"), - RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"), - RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), {} }; diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 8acb9eba691b..c2096e4bba31 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -771,14 +771,6 @@ static int __init ism_init(void) static void __exit ism_exit(void) { - struct ism_dev *ism; - - mutex_lock(&ism_dev_list.mutex); - list_for_each_entry(ism, &ism_dev_list.list, list) { - ism_dev_exit(ism); - } - mutex_unlock(&ism_dev_list.mutex); - pci_unregister_driver(&ism_driver); debug_unregister(ism_debug_info); } diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 5e115e8b2ba4..7c6efde75da6 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,6 +1678,7 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support; + u8 use_map_queue; }; #define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index deb32c9f4b3e..3f062e4013ab 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr; + u32 blk_tag; + int i; - fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag]; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + i = blk_mq_unique_tag_to_tag(blk_tag); + fibptr = &dev->fibs[i]; /* * Null out fields that depend on being zero at the start of * each I/O diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 68f4dbcfff49..c4a36c0be527 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -19,6 +19,7 @@ #include <linux/compat.h> #include <linux/blkdev.h> +#include <linux/blk-mq-pci.h> #include <linux/completion.h> #include <linux/init.h> #include <linux/interrupt.h> @@ -504,6 +505,15 @@ common_config: return 0; } +static void aac_map_queues(struct Scsi_Host *shost) +{ + struct aac_dev *aac = (struct aac_dev *)shost->hostdata; + + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + aac->pdev, 0); + aac->use_map_queue = true; +} + /** * aac_change_queue_depth - alter queue depths * @sdev: SCSI device we are considering @@ -1488,6 +1498,7 @@ static const struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure, + .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort, @@ -1775,6 +1786,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN; pci_set_drvdata(pdev, shost); + shost->nr_hw_queues = aac->max_msix; + shost->host_tagset = 1; error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1906,6 +1919,7 @@ static void aac_remove_one(struct pci_dev *pdev) struct aac_dev *aac = (struct aac_dev *)shost->hostdata; aac_cancel_rescan_worker(aac); + aac->use_map_queue = false; scsi_remove_host(shost); __aac_shutdown(aac); diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 11ef58204e96..61949f374188 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 vector_no; + struct scsi_cmnd *scmd; + u32 blk_tag; + struct Scsi_Host *shost = dev->scsi_host_ptr; + struct blk_mq_queue_map *qmap; atomic_inc(&q->numpending); @@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev); - else - vector_no = fib->vector_no; + else { + if (!fib->vector_no || !fib->callback_data) { + if (shost && dev->use_map_queue) { + qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + vector_no = qmap->mq_map[raw_smp_processor_id()]; + } + /* + * We hardcode the vector_no for + * reserved commands as a valid shost is + * absent during the init + */ + else + vector_no = 0; + } else { + scmd = (struct scsi_cmnd *)fib->callback_data; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + vector_no = blk_mq_unique_tag_to_hwq(blk_tag); + } + } if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) { diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 9a322a3a2150..595dca92e8db 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -889,7 +889,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocbq) { uint32_t evt_req_id = 0; - uint32_t cmd; + u16 cmd; struct lpfc_dmabuf *dmabuf = NULL; struct lpfc_bsg_event *evt; struct event_data *evt_dat = NULL; @@ -915,7 +915,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt; evt_req_id = ct_req->FsType; - cmd = ct_req->CommandResponse.bits.CmdRsp; + cmd = be16_to_cpu(ct_req->CommandResponse.bits.CmdRsp); spin_lock_irqsave(&phba->ct_ev_lock, flags); list_for_each_entry(evt, &phba->ct_ev_waiters, node) { @@ -3186,8 +3186,8 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) ctreq->RevisionId.bits.InId = 0; ctreq->FsType = SLI_CT_ELX_LOOPBACK; ctreq->FsSubType = 0; - ctreq->CommandResponse.bits.CmdRsp = ELX_LOOPBACK_DATA; - ctreq->CommandResponse.bits.Size = size; + ctreq->CommandResponse.bits.CmdRsp = cpu_to_be16(ELX_LOOPBACK_DATA); + ctreq->CommandResponse.bits.Size = cpu_to_be16(size); segment_offset = ELX_LOOPBACK_HEADER_SZ; } else segment_offset = 0; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e6bc622954cf..659196a2f63a 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1567,6 +1567,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice) { blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); + /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */ + sdevice->no_report_opcodes = 1; sdevice->no_write_same = 1; /* diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 6ddb2dfc0f00..32449bef4415 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1756,8 +1756,11 @@ static int cqspi_probe(struct platform_device *pdev) cqspi->slow_sram = true; if (of_device_is_compatible(pdev->dev.of_node, - "xlnx,versal-ospi-1.0")) - dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + "xlnx,versal-ospi-1.0")) { + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + goto probe_reset_failed; + } } ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index 5f2aee69c1c1..15f5e9cb54ad 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -274,7 +274,7 @@ static void dw_spi_elba_set_cs(struct spi_device *spi, bool enable) */ spi_set_chipselect(spi, 0, 0); dw_spi_set_cs(spi, enable); - spi_get_chipselect(spi, cs); + spi_set_chipselect(spi, 0, cs); } static int dw_spi_elba_init(struct platform_device *pdev, diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 4339485d202c..674cfe05f411 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1002,7 +1002,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, static int dspi_setup(struct spi_device *spi) { struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller); + u32 period_ns = DIV_ROUND_UP(NSEC_PER_SEC, spi->max_speed_hz); unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0; + u32 quarter_period_ns = DIV_ROUND_UP(period_ns, 4); u32 cs_sck_delay = 0, sck_cs_delay = 0; struct fsl_dspi_platform_data *pdata; unsigned char pasc = 0, asc = 0; @@ -1031,6 +1033,19 @@ static int dspi_setup(struct spi_device *spi) sck_cs_delay = pdata->sck_cs_delay; } + /* Since tCSC and tASC apply to continuous transfers too, avoid SCK + * glitches of half a cycle by never allowing tCSC + tASC to go below + * half a SCK period. + */ + if (cs_sck_delay < quarter_period_ns) + cs_sck_delay = quarter_period_ns; + if (sck_cs_delay < quarter_period_ns) + sck_cs_delay = quarter_period_ns; + + dev_dbg(&spi->dev, + "DSPI controller timing params: CS-to-SCK delay %u ns, SCK-to-CS delay %u ns\n", + cs_sck_delay, sck_cs_delay); + clkrate = clk_get_rate(dspi->clk); hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate); diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index a98b781b103a..b293428760bc 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -646,6 +646,8 @@ static int spi_geni_init(struct spi_geni_master *mas) geni_se_select_mode(se, GENI_GPI_DMA); dev_dbg(mas->dev, "Using GPI DMA mode for SPI\n"); break; + } else if (ret == -EPROBE_DEFER) { + goto out_pm; } /* * in case of failure to get gpi dma channel, we can still do the diff --git a/drivers/staging/octeon/TODO b/drivers/staging/octeon/TODO index 67a0a1f6b922..044e48e3d65f 100644 --- a/drivers/staging/octeon/TODO +++ b/drivers/staging/octeon/TODO @@ -6,4 +6,3 @@ TODO: - make driver self-contained instead of being split between staging and arch/mips/cavium-octeon. -Contact: Aaro Koskinen <aaro.koskinen@iki.fi> diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 86adff2a86ed..687adc9e086c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -504,6 +504,8 @@ target_setup_session(struct se_portal_group *tpg, free_sess: transport_free_session(sess); + return ERR_PTR(rc); + free_cnt: target_free_cmd_counter(cmd_cnt); return ERR_PTR(rc); diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c index f99dc7e4ae89..db97499f4f0a 100644 --- a/drivers/thermal/intel/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel/intel_soc_dts_iosf.c @@ -398,7 +398,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init( spin_lock_init(&sensors->intr_notify_lock); mutex_init(&sensors->dts_update_lock); sensors->intr_type = intr_type; - sensors->tj_max = tj_max; + sensors->tj_max = tj_max * 1000; if (intr_type == INTEL_SOC_DTS_INTERRUPT_NONE) notification = false; else diff --git a/drivers/thunderbolt/dma_test.c b/drivers/thunderbolt/dma_test.c index 3bedecb236e0..14bb6dec6c4b 100644 --- a/drivers/thunderbolt/dma_test.c +++ b/drivers/thunderbolt/dma_test.c @@ -192,9 +192,9 @@ static int dma_test_start_rings(struct dma_test *dt) } ret = tb_xdomain_enable_paths(dt->xd, dt->tx_hopid, - dt->tx_ring ? dt->tx_ring->hop : 0, + dt->tx_ring ? dt->tx_ring->hop : -1, dt->rx_hopid, - dt->rx_ring ? dt->rx_ring->hop : 0); + dt->rx_ring ? dt->rx_ring->hop : -1); if (ret) { dma_test_free_rings(dt); return ret; @@ -218,9 +218,9 @@ static void dma_test_stop_rings(struct dma_test *dt) tb_ring_stop(dt->tx_ring); ret = tb_xdomain_disable_paths(dt->xd, dt->tx_hopid, - dt->tx_ring ? dt->tx_ring->hop : 0, + dt->tx_ring ? dt->tx_ring->hop : -1, dt->rx_hopid, - dt->rx_ring ? dt->rx_ring->hop : 0); + dt->rx_ring ? dt->rx_ring->hop : -1); if (ret) dev_warn(&dt->svc->dev, "failed to disable DMA paths\n"); diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index c0aee5dc5237..e58beac44295 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -56,9 +56,14 @@ static int ring_interrupt_index(const struct tb_ring *ring) static void nhi_mask_interrupt(struct tb_nhi *nhi, int mask, int ring) { - if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) - return; - iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring); + if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) { + u32 val; + + val = ioread32(nhi->iobase + REG_RING_INTERRUPT_BASE + ring); + iowrite32(val & ~mask, nhi->iobase + REG_RING_INTERRUPT_BASE + ring); + } else { + iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring); + } } static void nhi_clear_interrupt(struct tb_nhi *nhi, int ring) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 7bfbc9ca9ba4..c1af712ca728 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -737,6 +737,7 @@ static void tb_scan_port(struct tb_port *port) { struct tb_cm *tcm = tb_priv(port->sw->tb); struct tb_port *upstream_port; + bool discovery = false; struct tb_switch *sw; int ret; @@ -804,8 +805,10 @@ static void tb_scan_port(struct tb_port *port) * tunnels and know which switches were authorized already by * the boot firmware. */ - if (!tcm->hotplug_active) + if (!tcm->hotplug_active) { dev_set_uevent_suppress(&sw->dev, true); + discovery = true; + } /* * At the moment Thunderbolt 2 and beyond (devices with LC) we @@ -835,10 +838,14 @@ static void tb_scan_port(struct tb_port *port) * CL0s and CL1 are enabled and supported together. * Silently ignore CLx enabling in case CLx is not supported. */ - ret = tb_switch_enable_clx(sw, TB_CL1); - if (ret && ret != -EOPNOTSUPP) - tb_sw_warn(sw, "failed to enable %s on upstream port\n", - tb_switch_clx_name(TB_CL1)); + if (discovery) { + tb_sw_dbg(sw, "discovery, not touching CL states\n"); + } else { + ret = tb_switch_enable_clx(sw, TB_CL1); + if (ret && ret != -EOPNOTSUPP) + tb_sw_warn(sw, "failed to enable %s on upstream port\n", + tb_switch_clx_name(TB_CL1)); + } if (tb_switch_is_clx_enabled(sw, TB_CL1)) /* diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 9099ae73e78f..4f222673d651 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -526,7 +526,7 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) * Perform connection manager handshake between IN and OUT ports * before capabilities exchange can take place. */ - ret = tb_dp_cm_handshake(in, out, 1500); + ret = tb_dp_cm_handshake(in, out, 3000); if (ret) return ret; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 7486a2b8556c..7fd30fcc10c6 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -310,7 +310,7 @@ static const struct lpuart_soc_data ls1021a_data = { static const struct lpuart_soc_data ls1028a_data = { .devtype = LS1028A_LPUART, .iotype = UPIO_MEM32, - .rx_watermark = 1, + .rx_watermark = 0, }; static struct lpuart_soc_data imx7ulp_data = { diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index a58e9277dfad..f1387f1024db 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -250,6 +250,7 @@ lqasc_err_int(int irq, void *_port) struct ltq_uart_port *ltq_port = to_ltq_uart_port(port); spin_lock_irqsave(<q_port->lock, flags); + __raw_writel(ASC_IRNCR_EIR, port->membase + LTQ_ASC_IRNCR); /* clear any pending interrupts */ asc_update_bits(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE | ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 7b2ce013cc5b..d68958e151a7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1929,6 +1929,11 @@ static int dwc3_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); + /* + * HACK: Clear the driver data, which is currently accessed by parent + * glue drivers, before allowing the parent to suspend. + */ + platform_set_drvdata(pdev, NULL); pm_runtime_set_suspended(&pdev->dev); dwc3_free_event_buffers(dwc); diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 959fc925ca7c..79b22abf9727 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -308,7 +308,16 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom) /* Only usable in contexts where the role can not change. */ static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom) { - struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); + struct dwc3 *dwc; + + /* + * FIXME: Fix this layering violation. + */ + dwc = platform_get_drvdata(qcom->dwc3); + + /* Core driver may not have probed yet. */ + if (!dwc) + return false; return dwc->xhci; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d831f5acf7b5..b78599dd705c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -198,6 +198,7 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, list_del(&req->list); req->remaining = 0; req->needs_extra_trb = false; + req->num_trbs = 0; if (req->request.status == -EINPROGRESS) req->request.status = status; diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 52e6d2e84e35..83fd1de14784 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -37,6 +37,14 @@ static const struct bus_type gadget_bus_type; * @vbus: for udcs who care about vbus status, this value is real vbus status; * for udcs who do not care about vbus status, this value is always true * @started: the UDC's started state. True if the UDC had started. + * @allow_connect: Indicates whether UDC is allowed to be pulled up. + * Set/cleared by gadget_(un)bind_driver() after gadget driver is bound or + * unbound. + * @connect_lock: protects udc->started, gadget->connect, + * gadget->allow_connect and gadget->deactivate. The routines + * usb_gadget_connect_locked(), usb_gadget_disconnect_locked(), + * usb_udc_connect_control_locked(), usb_gadget_udc_start_locked() and + * usb_gadget_udc_stop_locked() are called with this lock held. * * This represents the internal data structure which is used by the UDC-class * to hold information about udc driver and gadget together. @@ -48,6 +56,9 @@ struct usb_udc { struct list_head list; bool vbus; bool started; + bool allow_connect; + struct work_struct vbus_work; + struct mutex connect_lock; }; static struct class *udc_class; @@ -687,17 +698,8 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect); -/** - * usb_gadget_connect - software-controlled connect to USB host - * @gadget:the peripheral being connected - * - * Enables the D+ (or potentially D-) pullup. The host will start - * enumerating this gadget when the pullup is active and a VBUS session - * is active (the link is powered). - * - * Returns zero on success, else negative errno. - */ -int usb_gadget_connect(struct usb_gadget *gadget) +static int usb_gadget_connect_locked(struct usb_gadget *gadget) + __must_hold(&gadget->udc->connect_lock) { int ret = 0; @@ -706,10 +708,12 @@ int usb_gadget_connect(struct usb_gadget *gadget) goto out; } - if (gadget->deactivated) { + if (gadget->deactivated || !gadget->udc->allow_connect || !gadget->udc->started) { /* - * If gadget is deactivated we only save new state. - * Gadget will be connected automatically after activation. + * If the gadget isn't usable (because it is deactivated, + * unbound, or not yet started), we only save the new state. + * The gadget will be connected automatically when it is + * activated/bound/started. */ gadget->connected = true; goto out; @@ -724,22 +728,31 @@ out: return ret; } -EXPORT_SYMBOL_GPL(usb_gadget_connect); /** - * usb_gadget_disconnect - software-controlled disconnect from USB host - * @gadget:the peripheral being disconnected - * - * Disables the D+ (or potentially D-) pullup, which the host may see - * as a disconnect (when a VBUS session is active). Not all systems - * support software pullup controls. + * usb_gadget_connect - software-controlled connect to USB host + * @gadget:the peripheral being connected * - * Following a successful disconnect, invoke the ->disconnect() callback - * for the current gadget driver so that UDC drivers don't need to. + * Enables the D+ (or potentially D-) pullup. The host will start + * enumerating this gadget when the pullup is active and a VBUS session + * is active (the link is powered). * * Returns zero on success, else negative errno. */ -int usb_gadget_disconnect(struct usb_gadget *gadget) +int usb_gadget_connect(struct usb_gadget *gadget) +{ + int ret; + + mutex_lock(&gadget->udc->connect_lock); + ret = usb_gadget_connect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_connect); + +static int usb_gadget_disconnect_locked(struct usb_gadget *gadget) + __must_hold(&gadget->udc->connect_lock) { int ret = 0; @@ -751,7 +764,7 @@ int usb_gadget_disconnect(struct usb_gadget *gadget) if (!gadget->connected) goto out; - if (gadget->deactivated) { + if (gadget->deactivated || !gadget->udc->started) { /* * If gadget is deactivated we only save new state. * Gadget will stay disconnected after activation. @@ -774,6 +787,30 @@ out: return ret; } + +/** + * usb_gadget_disconnect - software-controlled disconnect from USB host + * @gadget:the peripheral being disconnected + * + * Disables the D+ (or potentially D-) pullup, which the host may see + * as a disconnect (when a VBUS session is active). Not all systems + * support software pullup controls. + * + * Following a successful disconnect, invoke the ->disconnect() callback + * for the current gadget driver so that UDC drivers don't need to. + * + * Returns zero on success, else negative errno. + */ +int usb_gadget_disconnect(struct usb_gadget *gadget) +{ + int ret; + + mutex_lock(&gadget->udc->connect_lock); + ret = usb_gadget_disconnect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); + + return ret; +} EXPORT_SYMBOL_GPL(usb_gadget_disconnect); /** @@ -791,13 +828,14 @@ int usb_gadget_deactivate(struct usb_gadget *gadget) { int ret = 0; + mutex_lock(&gadget->udc->connect_lock); if (gadget->deactivated) - goto out; + goto unlock; if (gadget->connected) { - ret = usb_gadget_disconnect(gadget); + ret = usb_gadget_disconnect_locked(gadget); if (ret) - goto out; + goto unlock; /* * If gadget was being connected before deactivation, we want @@ -807,7 +845,8 @@ int usb_gadget_deactivate(struct usb_gadget *gadget) } gadget->deactivated = true; -out: +unlock: + mutex_unlock(&gadget->udc->connect_lock); trace_usb_gadget_deactivate(gadget, ret); return ret; @@ -827,8 +866,9 @@ int usb_gadget_activate(struct usb_gadget *gadget) { int ret = 0; + mutex_lock(&gadget->udc->connect_lock); if (!gadget->deactivated) - goto out; + goto unlock; gadget->deactivated = false; @@ -837,9 +877,11 @@ int usb_gadget_activate(struct usb_gadget *gadget) * while it was being deactivated, we call usb_gadget_connect(). */ if (gadget->connected) - ret = usb_gadget_connect(gadget); + ret = usb_gadget_connect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); -out: +unlock: + mutex_unlock(&gadget->udc->connect_lock); trace_usb_gadget_activate(gadget, ret); return ret; @@ -1078,12 +1120,22 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state); /* ------------------------------------------------------------------------- */ -static void usb_udc_connect_control(struct usb_udc *udc) +/* Acquire connect_lock before calling this function. */ +static void usb_udc_connect_control_locked(struct usb_udc *udc) __must_hold(&udc->connect_lock) { if (udc->vbus) - usb_gadget_connect(udc->gadget); + usb_gadget_connect_locked(udc->gadget); else - usb_gadget_disconnect(udc->gadget); + usb_gadget_disconnect_locked(udc->gadget); +} + +static void vbus_event_work(struct work_struct *work) +{ + struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work); + + mutex_lock(&udc->connect_lock); + usb_udc_connect_control_locked(udc); + mutex_unlock(&udc->connect_lock); } /** @@ -1094,6 +1146,14 @@ static void usb_udc_connect_control(struct usb_udc *udc) * * The udc driver calls it when it wants to connect or disconnect gadget * according to vbus status. + * + * This function can be invoked from interrupt context by irq handlers of + * the gadget drivers, however, usb_udc_connect_control() has to run in + * non-atomic context due to the following: + * a. Some of the gadget driver implementations expect the ->pullup + * callback to be invoked in non-atomic context. + * b. usb_gadget_disconnect() acquires udc_lock which is a mutex. + * Hence offload invocation of usb_udc_connect_control() to workqueue. */ void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status) { @@ -1101,7 +1161,7 @@ void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status) if (udc) { udc->vbus = status; - usb_udc_connect_control(udc); + schedule_work(&udc->vbus_work); } } EXPORT_SYMBOL_GPL(usb_udc_vbus_handler); @@ -1124,7 +1184,7 @@ void usb_gadget_udc_reset(struct usb_gadget *gadget, EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); /** - * usb_gadget_udc_start - tells usb device controller to start up + * usb_gadget_udc_start_locked - tells usb device controller to start up * @udc: The UDC to be started * * This call is issued by the UDC Class driver when it's about @@ -1135,8 +1195,11 @@ EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); * necessary to have it powered on. * * Returns zero on success, else negative errno. + * + * Caller should acquire connect_lock before invoking this function. */ -static inline int usb_gadget_udc_start(struct usb_udc *udc) +static inline int usb_gadget_udc_start_locked(struct usb_udc *udc) + __must_hold(&udc->connect_lock) { int ret; @@ -1153,7 +1216,7 @@ static inline int usb_gadget_udc_start(struct usb_udc *udc) } /** - * usb_gadget_udc_stop - tells usb device controller we don't need it anymore + * usb_gadget_udc_stop_locked - tells usb device controller we don't need it anymore * @udc: The UDC to be stopped * * This call is issued by the UDC Class driver after calling @@ -1162,8 +1225,11 @@ static inline int usb_gadget_udc_start(struct usb_udc *udc) * The details are implementation specific, but it can go as * far as powering off UDC completely and disable its data * line pullups. + * + * Caller should acquire connect lock before invoking this function. */ -static inline void usb_gadget_udc_stop(struct usb_udc *udc) +static inline void usb_gadget_udc_stop_locked(struct usb_udc *udc) + __must_hold(&udc->connect_lock) { if (!udc->started) { dev_err(&udc->dev, "UDC had already stopped\n"); @@ -1322,12 +1388,14 @@ int usb_add_gadget(struct usb_gadget *gadget) udc->gadget = gadget; gadget->udc = udc; + mutex_init(&udc->connect_lock); udc->started = false; mutex_lock(&udc_lock); list_add_tail(&udc->list, &udc_list); mutex_unlock(&udc_lock); + INIT_WORK(&udc->vbus_work, vbus_event_work); ret = device_add(&udc->dev); if (ret) @@ -1459,6 +1527,7 @@ void usb_del_gadget(struct usb_gadget *gadget) flush_work(&gadget->work); device_del(&gadget->dev); ida_free(&gadget_id_numbers, gadget->id_number); + cancel_work_sync(&udc->vbus_work); device_unregister(&udc->dev); } EXPORT_SYMBOL_GPL(usb_del_gadget); @@ -1523,11 +1592,16 @@ static int gadget_bind_driver(struct device *dev) if (ret) goto err_bind; - ret = usb_gadget_udc_start(udc); - if (ret) + mutex_lock(&udc->connect_lock); + ret = usb_gadget_udc_start_locked(udc); + if (ret) { + mutex_unlock(&udc->connect_lock); goto err_start; + } usb_gadget_enable_async_callbacks(udc); - usb_udc_connect_control(udc); + udc->allow_connect = true; + usb_udc_connect_control_locked(udc); + mutex_unlock(&udc->connect_lock); kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); return 0; @@ -1558,12 +1632,16 @@ static void gadget_unbind_driver(struct device *dev) kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); - usb_gadget_disconnect(gadget); + udc->allow_connect = false; + cancel_work_sync(&udc->vbus_work); + mutex_lock(&udc->connect_lock); + usb_gadget_disconnect_locked(gadget); usb_gadget_disable_async_callbacks(udc); if (gadget->irq) synchronize_irq(gadget->irq); udc->driver->unbind(gadget); - usb_gadget_udc_stop(udc); + usb_gadget_udc_stop_locked(udc); + mutex_unlock(&udc->connect_lock); mutex_lock(&udc_lock); driver->is_bound = false; @@ -1649,11 +1727,15 @@ static ssize_t soft_connect_store(struct device *dev, } if (sysfs_streq(buf, "connect")) { - usb_gadget_udc_start(udc); - usb_gadget_connect(udc->gadget); + mutex_lock(&udc->connect_lock); + usb_gadget_udc_start_locked(udc); + usb_gadget_connect_locked(udc->gadget); + mutex_unlock(&udc->connect_lock); } else if (sysfs_streq(buf, "disconnect")) { - usb_gadget_disconnect(udc->gadget); - usb_gadget_udc_stop(udc); + mutex_lock(&udc->connect_lock); + usb_gadget_disconnect_locked(udc->gadget); + usb_gadget_udc_stop_locked(udc); + mutex_unlock(&udc->connect_lock); } else { dev_err(dev, "unsupported command '%s'\n", buf); ret = -EINVAL; diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index aac8bc185afa..eb008e873361 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2877,9 +2877,9 @@ static int renesas_usb3_probe(struct platform_device *pdev) struct rzv2m_usb3drd *ddata = dev_get_drvdata(pdev->dev.parent); usb3->drd_reg = ddata->reg; - ret = devm_request_irq(ddata->dev, ddata->drd_irq, + ret = devm_request_irq(&pdev->dev, ddata->drd_irq, renesas_usb3_otg_irq, 0, - dev_name(ddata->dev), usb3); + dev_name(&pdev->dev), usb3); if (ret < 0) return ret; } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 644a55447fd7..fd42e3a0bd18 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -248,6 +248,8 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_EM061K_LTA 0x0123 +#define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 @@ -266,6 +268,8 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM520N 0x0801 #define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 +#define QUECTEL_PRODUCT_EM061K_LWW 0x6008 +#define QUECTEL_PRODUCT_EM061K_LCN 0x6009 #define QUECTEL_PRODUCT_EC200T 0x6026 #define QUECTEL_PRODUCT_RM500K 0x7001 @@ -1189,6 +1193,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, diff --git a/drivers/usb/typec/pd.c b/drivers/usb/typec/pd.c index 0bcde1ff4d39..8cc66e4467c4 100644 --- a/drivers/usb/typec/pd.c +++ b/drivers/usb/typec/pd.c @@ -95,7 +95,7 @@ peak_current_show(struct device *dev, struct device_attribute *attr, char *buf) static ssize_t fast_role_swap_current_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%u\n", to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3; + return sysfs_emit(buf, "%u\n", (to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3); } static DEVICE_ATTR_RO(fast_role_swap_current); diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 2b472ec01dc4..b664ecbb798b 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -132,10 +132,8 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (ret) return ret; - if (cci & UCSI_CCI_BUSY) { - ucsi->ops->async_write(ucsi, UCSI_CANCEL, NULL, 0); - return -EBUSY; - } + if (cmd != UCSI_CANCEL && cci & UCSI_CCI_BUSY) + return ucsi_exec_command(ucsi, UCSI_CANCEL); if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; @@ -149,6 +147,11 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) return ucsi_read_error(ucsi); } + if (cmd == UCSI_CANCEL && cci & UCSI_CCI_CANCEL_COMPLETE) { + ret = ucsi_acknowledge_command(ucsi); + return ret ? ret : -EBUSY; + } + return UCSI_CCI_LENGTH(cci); } diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index d1c7068b4346..58452b86e672 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -115,8 +115,8 @@ responded: } } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; diff --git a/fs/afs/write.c b/fs/afs/write.c index c822d6006033..8750b99c3f56 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -731,6 +731,7 @@ static int afs_writepages_region(struct address_space *mapping, * (changing page->mapping to NULL), or even swizzled * back from swapper_space to tmpfs file mapping */ +try_again: if (wbc->sync_mode != WB_SYNC_NONE) { ret = folio_lock_killable(folio); if (ret < 0) { @@ -757,12 +758,14 @@ static int afs_writepages_region(struct address_space *mapping, #ifdef CONFIG_AFS_FSCACHE folio_wait_fscache(folio); #endif - } else { - start += folio_size(folio); + goto try_again; } + + start += folio_size(folio); if (wbc->sync_mode == WB_SYNC_NONE) { if (skips >= 5 || need_resched()) { *_next = start; + folio_batch_release(&fbatch); _leave(" = 0 [%llx]", *_next); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b1b227505f3..dabc79c1af1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; @@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; + u64 start = max_t(u64, eb->start, page_offset(p)); + u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE); + u32 len = end - start; - ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, - start, p, start - page_offset(p), mirror_num); + ret = btrfs_repair_io_failure(fs_info, 0, start, len, + start, p, offset_in_page(start), mirror_num); if (ret) break; - start += PAGE_SIZE; } return ret; @@ -995,13 +996,18 @@ int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; + int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); - ASSERT(!tmp); - return tmp ? -EEXIST : 0; + if (tmp) { + ret = -EEXIST; + btrfs_warn(fs_info, "global root %llu %llu already exists", + root->root_key.objectid, root->root_key.offset); + } + return ret; } void btrfs_global_root_delete(struct btrfs_root *root) @@ -2841,6 +2847,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) /* We can't trust the free space cache either */ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 19c707bc8801..7fcafcc5292c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1864,7 +1864,7 @@ static int can_nocow_file_extent(struct btrfs_path *path, ret = btrfs_cross_ref_exist(root, btrfs_ino(inode), key->offset - args->extent_offset, - args->disk_bytenr, false, path); + args->disk_bytenr, args->strict, path); WARN_ON_ONCE(ret > 0 && is_freespace_inode); if (ret != 0) goto out; @@ -7264,7 +7264,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, static int btrfs_get_blocks_direct_write(struct extent_map **map, struct inode *inode, struct btrfs_dio_data *dio_data, - u64 start, u64 len, + u64 start, u64 *lenp, unsigned int iomap_flags) { const bool nowait = (iomap_flags & IOMAP_NOWAIT); @@ -7275,6 +7275,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, struct btrfs_block_group *bg; bool can_nocow = false; bool space_reserved = false; + u64 len = *lenp; u64 prev_len; int ret = 0; @@ -7345,15 +7346,19 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, free_extent_map(em); *map = NULL; - if (nowait) - return -EAGAIN; + if (nowait) { + ret = -EAGAIN; + goto out; + } /* * If we could not allocate data space before locking the file * range and we can't do a NOCOW write, then we have to fail. */ - if (!dio_data->data_space_reserved) - return -ENOSPC; + if (!dio_data->data_space_reserved) { + ret = -ENOSPC; + goto out; + } /* * We have to COW and we have already reserved data space before, @@ -7394,6 +7399,7 @@ out: btrfs_delalloc_release_extents(BTRFS_I(inode), len); btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true); } + *lenp = len; return ret; } @@ -7570,7 +7576,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (write) { ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, - start, len, flags); + start, &len, flags); if (ret < 0) goto unlock_err; unlock_extents = true; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7c666517d3d3..bceaa8c2007e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -134,8 +134,14 @@ struct scrub_stripe { * The errors hit during the initial read of the stripe. * * Would be utilized for error reporting and repair. + * + * The remaining init_nr_* records the number of errors hit, only used + * by error reporting. */ unsigned long init_error_bitmap; + unsigned int init_nr_io_errors; + unsigned int init_nr_csum_errors; + unsigned int init_nr_meta_errors; /* * The following error bitmaps are all for the current status. @@ -1003,12 +1009,9 @@ skip: sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits; sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits; sctx->stat.no_csum += nr_nodatacsum_sectors; - sctx->stat.read_errors += - bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors); - sctx->stat.csum_errors += - bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors); - sctx->stat.verify_errors += - bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); + sctx->stat.read_errors += stripe->init_nr_io_errors; + sctx->stat.csum_errors += stripe->init_nr_csum_errors; + sctx->stat.verify_errors += stripe->init_nr_meta_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; @@ -1041,6 +1044,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); /* Save the initial failed bitmap for later repair and report usage. */ stripe->init_error_bitmap = stripe->error_bitmap; + stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, + stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; @@ -1490,6 +1499,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) { stripe->extent_sector_bitmap = 0; stripe->init_error_bitmap = 0; + stripe->init_nr_io_errors = 0; + stripe->init_nr_csum_errors = 0; + stripe->init_nr_meta_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; @@ -1730,7 +1742,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) break; } } - } else { + } else if (!sctx->readonly) { for (int i = 0; i < nr_stripes; i++) { unsigned long repaired; @@ -2254,7 +2266,7 @@ next: } out: ret2 = flush_scrub_stripes(sctx); - if (!ret2) + if (!ret) ret = ret2; if (sctx->raid56_data_stripes) { for (int i = 0; i < nr_data_stripes(map); i++) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ec18e2210602..efeb1a9d040a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1841,6 +1841,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); + + /* + * If we've gone from readonly -> read/write, we need to get + * our sync/async discard lists in the right state. + */ + btrfs_discard_resume(fs_info); } out: /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 841e799dece5..e60beb14852a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5975,12 +5975,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; /* stripe_offset is the offset of this block in its stripe */ - stripe_offset = offset - (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset = offset - ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >> BTRFS_STRIPE_LEN_SHIFT; stripe_cnt = stripe_nr_end - stripe_nr; - stripe_end_offset = (stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) - + stripe_end_offset = ((u64)stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) - (offset + length); /* * after this, stripe_nr is the number of stripes on this @@ -6023,7 +6023,7 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, for (i = 0; i < *num_stripes; i++) { stripes[i].physical = map->stripes[stripe_index].physical + - stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | @@ -6196,9 +6196,11 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, * not ensured to be power of 2. */ *full_stripe_start = - rounddown(*stripe_nr, nr_data_stripes(map)) << + (u64)rounddown(*stripe_nr, nr_data_stripes(map)) << BTRFS_STRIPE_LEN_SHIFT; + ASSERT(*full_stripe_start + full_stripe_len > offset); + ASSERT(*full_stripe_start <= offset); /* * For writes to RAID56, allow to write a full stripe set, but * no straddling of stripe sets. @@ -6221,7 +6223,7 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup * { dst->dev = map->stripes[stripe_index].dev; dst->physical = map->stripes[stripe_index].physical + - stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); } int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 980483455cc0..266d45c7685b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1805,7 +1805,11 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, { int ret = default_wake_function(wq_entry, mode, sync, key); - list_del_init(&wq_entry->entry); + /* + * Pairs with list_empty_careful in ep_poll, and ensures future loop + * iterations see the cause of this wakeup. + */ + list_del_init_careful(&wq_entry->entry); return ret; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c1edde817be8..1f72f977c6db 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -324,17 +324,15 @@ static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb, struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info **grp_info; - long indexv, indexh; - - if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) { - ext4_error(sb, "invalid group %u", group); - return NULL; - } - indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); - indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); - return grp_info[indexh]; + struct ext4_group_info **grp_info; + long indexv, indexh; + + if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) + return NULL; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* @@ -886,7 +884,10 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, if (!ext4_bg_has_super(sb, group)) return 0; - return EXT4_SB(sb)->s_gdb_count; + if (ext4_has_feature_meta_bg(sb)) + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + else + return EXT4_SB(sb)->s_gdb_count; } /** diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index e956f886a1a1..5710833ac1cc 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -285,6 +285,14 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, if (nbh == NULL) { /* blocksize == pagesize */ xa_erase_irq(&btnc->i_pages, newkey); unlock_page(ctxt->bh->b_page); - } else - brelse(nbh); + } else { + /* + * When canceling a buffer that a prepare operation has + * allocated to copy a node block to another location, use + * nilfs_btnode_delete() to initialize and release the buffer + * so that the buffer flags will not be in an inconsistent + * state when it is reallocated. + */ + nilfs_btnode_delete(nbh); + } } diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 5cf30827f244..b4e54d079b7d 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -370,7 +370,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) struct folio *folio = fbatch.folios[i]; folio_lock(folio); - nilfs_clear_dirty_page(&folio->page, silent); + + /* + * This folio may have been removed from the address + * space by truncation or invalidation when the lock + * was acquired. Skip processing in that case. + */ + if (likely(folio->mapping == mapping)) + nilfs_clear_dirty_page(&folio->page, silent); + folio_unlock(folio); } folio_batch_release(&fbatch); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1362ccb64ec7..6e59dc19a732 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) if (unlikely(!bh)) return -ENOMEM; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); nilfs_segbuf_add_segsum_buffer(segbuf, bh); return 0; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ac949fd7603f..c2553024bd25 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -981,10 +981,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, unsigned int isz, srsz; bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; + + lock_buffer(bh_sr); raw_sr = (struct nilfs_super_root *)bh_sr->b_data; isz = nilfs->ns_inode_size; srsz = NILFS_SR_BYTES(isz); + raw_sr->sr_sum = 0; /* Ensure initialization within this update */ raw_sr->sr_bytes = cpu_to_le16(srsz); raw_sr->sr_nongc_ctime = cpu_to_le64(nilfs_doing_gc() ? @@ -998,6 +1001,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + NILFS_SR_SUFILE_OFFSET(isz), 1); memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); + set_buffer_uptodate(bh_sr); + unlock_buffer(bh_sr); } static void nilfs_redirty_inodes(struct list_head *head) @@ -1780,6 +1785,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1791,6 +1797,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) b_assoc_buffers) { clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { + clear_buffer_uptodate(bh); if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index dc359b56fdfa..2c6078a6b8ec 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -779,6 +779,15 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) goto out_header; sui->ncleansegs -= nsegs - newnsegs; + + /* + * If the sufile is successfully truncated, immediately adjust + * the segment allocation space while locking the semaphore + * "mi_sem" so that nilfs_sufile_alloc() never allocates + * segments in the truncated space. + */ + sui->allocmax = newnsegs - 1; + sui->allocmin = 0; } kaddr = kmap_atomic(header_bh->b_page); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 77f1e5778d1c..9ba4933087af 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -372,10 +372,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) goto out; } nsbp = (void *)nsbh->b_data + offset; - memset(nsbp, 0, nilfs->ns_blocksize); + lock_buffer(nsbh); if (sb2i >= 0) { + /* + * The position of the second superblock only changes by 4KiB, + * which is larger than the maximum superblock data size + * (= 1KiB), so there is no need to use memmove() to allow + * overlap between source and destination. + */ memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); + + /* + * Zero fill after copy to avoid overwriting in case of move + * within the same block. + */ + memset(nsbh->b_data, 0, offset); + memset((void *)nsbp + nilfs->ns_sbsize, 0, + nsbh->b_size - offset - nilfs->ns_sbsize); + } else { + memset(nsbh->b_data, 0, nsbh->b_size); + } + set_buffer_uptodate(nsbh); + unlock_buffer(nsbh); + + if (sb2i >= 0) { brelse(nilfs->ns_sbh[sb2i]); nilfs->ns_sbh[sb2i] = nsbh; nilfs->ns_sbp[sb2i] = nsbp; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2894152a6b25..0f0667957c81 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -405,6 +405,18 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs) 100)); } +/** + * nilfs_max_segment_count - calculate the maximum number of segments + * @nilfs: nilfs object + */ +static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) +{ + u64 max_count = U64_MAX; + + do_div(max_count, nilfs->ns_blocks_per_segment); + return min_t(u64, max_count, ULONG_MAX); +} + void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) { nilfs->ns_nsegments = nsegs; @@ -414,6 +426,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { + u64 nsegments, nblocks; + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { nilfs_err(nilfs->ns_sb, "unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).", @@ -457,7 +471,34 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, return -EINVAL; } - nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments)); + nsegments = le64_to_cpu(sbp->s_nsegments); + if (nsegments > nilfs_max_segment_count(nilfs)) { + nilfs_err(nilfs->ns_sb, + "segment count %llu exceeds upper limit (%llu segments)", + (unsigned long long)nsegments, + (unsigned long long)nilfs_max_segment_count(nilfs)); + return -EINVAL; + } + + nblocks = sb_bdev_nr_blocks(nilfs->ns_sb); + if (nblocks) { + u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment; + /* + * To avoid failing to mount early device images without a + * second superblock, exclude that block count from the + * "min_block_count" calculation. + */ + + if (nblocks < min_block_count) { + nilfs_err(nilfs->ns_sb, + "total number of segment blocks %llu exceeds device size (%llu blocks)", + (unsigned long long)min_block_count, + (unsigned long long)nblocks); + return -EINVAL; + } + } + + nilfs_set_nsegments(nilfs, nsegments); nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); return 0; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index efb09de4343d..b173c36bcab3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2100,14 +2100,20 @@ static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, struct ocfs2_space_resv sr; int change_size = 1; int cmd = OCFS2_IOC_RESVSP64; + int ret = 0; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; - if (mode & FALLOC_FL_KEEP_SIZE) + if (mode & FALLOC_FL_KEEP_SIZE) { change_size = 0; + } else { + ret = inode_newsize_ok(inode, offset + len); + if (ret) + return ret; + } if (mode & FALLOC_FL_PUNCH_HOLE) cmd = OCFS2_IOC_UNRESVSP64; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0b0e6a132101..988d1c076861 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -952,8 +952,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (!sb_has_quota_loaded(sb, type)) continue; - oinfo = sb_dqinfo(sb, type)->dqi_priv; - cancel_delayed_work_sync(&oinfo->dqi_sync_work); + if (!sb_has_quota_suspended(sb, type)) { + oinfo = sb_dqinfo(sb, type)->dqi_priv; + cancel_delayed_work_sync(&oinfo->dqi_sync_work); + } inode = igrab(sb->s_dquot.files[type]); /* Turn off quotas. This will remove all dquot structures from * memory and so they will be automatically synced to global diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 5034b862cec2..b279f745466e 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/uaccess.h> +#include <uapi/linux/ethtool.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -130,12 +131,14 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) struct TCP_Server_Info *server = chan->server; seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx" - "\n\t\tNumber of credits: %d Dialect 0x%x" + "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x" "\n\t\tTCP status: %d Instance: %d" "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d" "\n\t\tIn Send: %d In MaxReq Wait: %d", i+1, server->conn_id, server->credits, + server->echo_credits, + server->oplock_credits, server->dialect, server->tcpStatus, server->reconnect_instance, @@ -146,18 +149,62 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) atomic_read(&server->num_waiters)); } +static inline const char *smb_speed_to_str(size_t bps) +{ + size_t mbps = bps / 1000 / 1000; + + switch (mbps) { + case SPEED_10: + return "10Mbps"; + case SPEED_100: + return "100Mbps"; + case SPEED_1000: + return "1Gbps"; + case SPEED_2500: + return "2.5Gbps"; + case SPEED_5000: + return "5Gbps"; + case SPEED_10000: + return "10Gbps"; + case SPEED_14000: + return "14Gbps"; + case SPEED_20000: + return "20Gbps"; + case SPEED_25000: + return "25Gbps"; + case SPEED_40000: + return "40Gbps"; + case SPEED_50000: + return "50Gbps"; + case SPEED_56000: + return "56Gbps"; + case SPEED_100000: + return "100Gbps"; + case SPEED_200000: + return "200Gbps"; + case SPEED_400000: + return "400Gbps"; + case SPEED_800000: + return "800Gbps"; + default: + return "Unknown"; + } +} + static void cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) { struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr; struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr; - seq_printf(m, "\tSpeed: %zu bps\n", iface->speed); + seq_printf(m, "\tSpeed: %s\n", smb_speed_to_str(iface->speed)); seq_puts(m, "\t\tCapabilities: "); if (iface->rdma_capable) seq_puts(m, "rdma "); if (iface->rss_capable) seq_puts(m, "rss "); + if (!iface->rdma_capable && !iface->rss_capable) + seq_puts(m, "None"); seq_putc(m, '\n'); if (iface->sockaddr.ss_family == AF_INET) seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr); @@ -350,8 +397,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->mr_used_count)); skip_rdma: #endif - seq_printf(m, "\nNumber of credits: %d Dialect 0x%x", - server->credits, server->dialect); + seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", + server->credits, + server->echo_credits, + server->oplock_credits, + server->dialect); if (server->compress_algorithm == SMB3_COMPRESS_LZNT1) seq_printf(m, " COMPRESS_LZNT1"); else if (server->compress_algorithm == SMB3_COMPRESS_LZ77) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0d84bb1a8cd9..b212a4e16b39 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -970,43 +970,6 @@ release_iface(struct kref *ref) kfree(iface); } -/* - * compare two interfaces a and b - * return 0 if everything matches. - * return 1 if a has higher link speed, or rdma capable, or rss capable - * return -1 otherwise. - */ -static inline int -iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) -{ - int cmp_ret = 0; - - WARN_ON(!a || !b); - if (a->speed == b->speed) { - if (a->rdma_capable == b->rdma_capable) { - if (a->rss_capable == b->rss_capable) { - cmp_ret = memcmp(&a->sockaddr, &b->sockaddr, - sizeof(a->sockaddr)); - if (!cmp_ret) - return 0; - else if (cmp_ret > 0) - return 1; - else - return -1; - } else if (a->rss_capable > b->rss_capable) - return 1; - else - return -1; - } else if (a->rdma_capable > b->rdma_capable) - return 1; - else - return -1; - } else if (a->speed > b->speed) - return 1; - else - return -1; -} - struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c1c704990b98..d127aded2f28 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -87,6 +87,7 @@ extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); extern int smb3_parse_opt(const char *options, const char *key, char **val); +extern int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 8e9a672320ab..9d16626e7a66 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1288,6 +1288,56 @@ next_pdu: module_put_and_kthread_exit(0); } +int +cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs) +{ + struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr; + struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs; + struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; + struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs; + + switch (srcaddr->sa_family) { + case AF_UNSPEC: + switch (rhs->sa_family) { + case AF_UNSPEC: + return 0; + case AF_INET: + case AF_INET6: + return 1; + default: + return -1; + } + case AF_INET: { + switch (rhs->sa_family) { + case AF_UNSPEC: + return -1; + case AF_INET: + return memcmp(saddr4, vaddr4, + sizeof(struct sockaddr_in)); + case AF_INET6: + return 1; + default: + return -1; + } + } + case AF_INET6: { + switch (rhs->sa_family) { + case AF_UNSPEC: + case AF_INET: + return -1; + case AF_INET6: + return memcmp(saddr6, + vaddr6, + sizeof(struct sockaddr_in6)); + default: + return -1; + } + } + default: + return -1; /* don't expect to be here */ + } +} + /* * Returns true if srcaddr isn't specified and rhs isn't specified, or * if srcaddr is specified and matches the IP address of the rhs argument @@ -4086,16 +4136,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->status == TID_GOOD) { + spin_unlock(&tcon->tc_lock); + return 0; + } + if (tcon->status != TID_NEW && tcon->status != TID_NEED_TCON) { spin_unlock(&tcon->tc_lock); return -EHOSTDOWN; } - if (tcon->status == TID_GOOD) { - spin_unlock(&tcon->tc_lock); - return 0; - } tcon->status = TID_IN_TCON; spin_unlock(&tcon->tc_lock); diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 2f93bf8c3325..2390b2fedd6a 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -575,16 +575,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->status == TID_GOOD) { + spin_unlock(&tcon->tc_lock); + return 0; + } + if (tcon->status != TID_NEW && tcon->status != TID_NEED_TCON) { spin_unlock(&tcon->tc_lock); return -EHOSTDOWN; } - if (tcon->status == TID_GOOD) { - spin_unlock(&tcon->tc_lock); - return 0; - } tcon->status = TID_IN_TCON; spin_unlock(&tcon->tc_lock); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index df88b8c04d03..051283386e22 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4942,9 +4942,13 @@ oplock_break_ack: * disconnected since oplock already released by the server */ if (!oplock_break_cancelled) { - rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, + /* check for server null since can race with kill_sb calling tree disconnect */ + if (tcon->ses && tcon->ses->server) { + rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, volatile_fid, net_fid, cinode); - cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } else + pr_warn_once("lease break not sent for unmounted share\n"); } cifs_done_oplock_break(cinode); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 6e3be58cfe49..a8bb9d00d33a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -34,6 +34,8 @@ static int change_conf(struct TCP_Server_Info *server) { server->credits += server->echo_credits + server->oplock_credits; + if (server->credits > server->max_credits) + server->credits = server->max_credits; server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: @@ -91,6 +93,7 @@ smb2_add_credits(struct TCP_Server_Info *server, server->conn_id, server->hostname, *val, add, server->in_flight); } + WARN_ON_ONCE(server->in_flight == 0); server->in_flight--; if (server->in_flight == 0 && ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) && @@ -510,6 +513,43 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) return rsize; } +/* + * compare two interfaces a and b + * return 0 if everything matches. + * return 1 if a is rdma capable, or rss capable, or has higher link speed + * return -1 otherwise. + */ +static int +iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) +{ + int cmp_ret = 0; + + WARN_ON(!a || !b); + if (a->rdma_capable == b->rdma_capable) { + if (a->rss_capable == b->rss_capable) { + if (a->speed == b->speed) { + cmp_ret = cifs_ipaddr_cmp((struct sockaddr *) &a->sockaddr, + (struct sockaddr *) &b->sockaddr); + if (!cmp_ret) + return 0; + else if (cmp_ret > 0) + return 1; + else + return -1; + } else if (a->speed > b->speed) + return 1; + else + return -1; + } else if (a->rss_capable > b->rss_capable) + return 1; + else + return -1; + } else if (a->rdma_capable > b->rdma_capable) + return 1; + else + return -1; +} + static int parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, size_t buf_len, struct cifs_ses *ses, bool in_mount) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 7063b395d22f..17fe212ab895 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1305,7 +1305,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) } /* enough to enable echos and oplocks and one max size write */ - req->hdr.CreditRequest = cpu_to_le16(130); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 130)); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -1899,7 +1904,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, rqst.rq_nvec = 2; /* Need 64 for max size write so ask for more in case not there yet */ - req->hdr.CreditRequest = cpu_to_le16(64); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 64)); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4227,6 +4237,7 @@ smb2_async_readv(struct cifs_readdata *rdata) struct TCP_Server_Info *server; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); unsigned int total_len; + int credit_request; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -4258,7 +4269,13 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (rc) @@ -4468,6 +4485,7 @@ smb2_async_writev(struct cifs_writedata *wdata, unsigned int total_len; struct cifs_io_parms _io_parms; struct cifs_io_parms *io_parms = NULL; + int credit_request; if (!wdata->server) server = wdata->server = cifs_pick_channel(tcon->ses); @@ -4572,7 +4590,13 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &wdata->credits, io_parms->length); if (rc) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 24bdd5f4d3bc..0474d0bba0a2 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -55,7 +55,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); - /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ + /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; temp->server = server; diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index f9b2e0f19b03..ced7a9e916f0 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -185,24 +185,31 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, goto send; } - if (conn->ops->check_user_session) { - rc = conn->ops->check_user_session(work); - if (rc < 0) { - command = conn->ops->get_cmd_val(work); - conn->ops->set_rsp_status(work, - STATUS_USER_SESSION_DELETED); - goto send; - } else if (rc > 0) { - rc = conn->ops->get_ksmbd_tcon(work); + do { + if (conn->ops->check_user_session) { + rc = conn->ops->check_user_session(work); if (rc < 0) { - conn->ops->set_rsp_status(work, - STATUS_NETWORK_NAME_DELETED); + if (rc == -EINVAL) + conn->ops->set_rsp_status(work, + STATUS_INVALID_PARAMETER); + else + conn->ops->set_rsp_status(work, + STATUS_USER_SESSION_DELETED); goto send; + } else if (rc > 0) { + rc = conn->ops->get_ksmbd_tcon(work); + if (rc < 0) { + if (rc == -EINVAL) + conn->ops->set_rsp_status(work, + STATUS_INVALID_PARAMETER); + else + conn->ops->set_rsp_status(work, + STATUS_NETWORK_NAME_DELETED); + goto send; + } } } - } - do { rc = __process_request(work, conn, &command); if (rc == SERVER_HANDLER_ABORT) break; diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index 0ffe663b7590..33b7e6c4ceff 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -351,9 +351,16 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) int command; __u32 clc_len; /* calculated length */ __u32 len = get_rfc1002_len(work->request_buf); + __u32 req_struct_size, next_cmd = le32_to_cpu(hdr->NextCommand); - if (le32_to_cpu(hdr->NextCommand) > 0) - len = le32_to_cpu(hdr->NextCommand); + if ((u64)work->next_smb2_rcv_hdr_off + next_cmd > len) { + pr_err("next command(%u) offset exceeds smb msg size\n", + next_cmd); + return 1; + } + + if (next_cmd > 0) + len = next_cmd; else if (work->next_smb2_rcv_hdr_off) len -= work->next_smb2_rcv_hdr_off; @@ -373,17 +380,9 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } if (smb2_req_struct_sizes[command] != pdu->StructureSize2) { - if (command != SMB2_OPLOCK_BREAK_HE && - (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) { - /* error packets have 9 byte structure size */ - ksmbd_debug(SMB, - "Illegal request size %u for command %d\n", - le16_to_cpu(pdu->StructureSize2), command); - return 1; - } else if (command == SMB2_OPLOCK_BREAK_HE && - hdr->Status == 0 && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) { + if (command == SMB2_OPLOCK_BREAK_HE && + le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 && + le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) { /* special case for SMB2.1 lease break message */ ksmbd_debug(SMB, "Illegal request size %d for oplock break\n", @@ -392,6 +391,14 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } } + req_struct_size = le16_to_cpu(pdu->StructureSize2) + + __SMB2_HEADER_STRUCTURE_SIZE; + if (command == SMB2_LOCK_HE) + req_struct_size -= sizeof(struct smb2_lock_element); + + if (req_struct_size > len + 1) + return 1; + if (smb2_calc_size(hdr, &clc_len)) return 1; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 25c0ba04c59d..da1787c68ba0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -91,7 +91,6 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) unsigned int cmd = le16_to_cpu(req_hdr->Command); int tree_id; - work->tcon = NULL; if (cmd == SMB2_TREE_CONNECT_HE || cmd == SMB2_CANCEL_HE || cmd == SMB2_LOGOFF_HE) { @@ -105,10 +104,28 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) } tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId); + + /* + * If request is not the first in Compound request, + * Just validate tree id in header with work->tcon->id. + */ + if (work->next_smb2_rcv_hdr_off) { + if (!work->tcon) { + pr_err("The first operation in the compound does not have tcon\n"); + return -EINVAL; + } + if (work->tcon->id != tree_id) { + pr_err("tree id(%u) is different with id(%u) in first operation\n", + tree_id, work->tcon->id); + return -EINVAL; + } + return 1; + } + work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id); if (!work->tcon) { pr_err("Invalid tid %d\n", tree_id); - return -EINVAL; + return -ENOENT; } return 1; @@ -547,7 +564,6 @@ int smb2_check_user_session(struct ksmbd_work *work) unsigned int cmd = conn->ops->get_cmd_val(work); unsigned long long sess_id; - work->sess = NULL; /* * SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not * require a session id, so no need to validate user session's for @@ -558,15 +574,33 @@ int smb2_check_user_session(struct ksmbd_work *work) return 0; if (!ksmbd_conn_good(conn)) - return -EINVAL; + return -EIO; sess_id = le64_to_cpu(req_hdr->SessionId); + + /* + * If request is not the first in Compound request, + * Just validate session id in header with work->sess->id. + */ + if (work->next_smb2_rcv_hdr_off) { + if (!work->sess) { + pr_err("The first operation in the compound does not have sess\n"); + return -EINVAL; + } + if (work->sess->id != sess_id) { + pr_err("session id(%llu) is different with the first operation(%lld)\n", + sess_id, work->sess->id); + return -EINVAL; + } + return 1; + } + /* Check for validity of user session */ work->sess = ksmbd_session_lookup_all(conn, sess_id); if (work->sess) return 1; ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id); - return -EINVAL; + return -ENOENT; } static void destroy_previous_session(struct ksmbd_conn *conn, @@ -2249,7 +2283,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* delete the EA only when it exits */ if (rc > 0) { rc = ksmbd_vfs_remove_xattr(idmap, - path->dentry, + path, attr_name); if (rc < 0) { @@ -2263,8 +2297,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* if the EA doesn't exist, just do nothing. */ rc = 0; } else { - rc = ksmbd_vfs_setxattr(idmap, - path->dentry, attr_name, value, + rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, le16_to_cpu(eabuf->EaValueLength), 0); if (rc < 0) { ksmbd_debug(SMB, @@ -2321,8 +2354,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, return -EBADF; } - rc = ksmbd_vfs_setxattr(idmap, path->dentry, - xattr_stream_name, NULL, 0, 0); + rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0); if (rc < 0) pr_err("Failed to store XATTR stream name :%d\n", rc); return 0; @@ -2350,7 +2382,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, path->dentry, + err = ksmbd_vfs_remove_xattr(idmap, path, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", @@ -2397,8 +2429,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), - path->dentry, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -2972,7 +3003,7 @@ int smb2_open(struct ksmbd_work *work) struct inode *inode = d_inode(path.dentry); posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap, - path.dentry, + &path, d_inode(path.dentry->d_parent)); if (posix_acl_rc) ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc); @@ -2988,7 +3019,7 @@ int smb2_open(struct ksmbd_work *work) if (rc) { if (posix_acl_rc) ksmbd_vfs_set_init_posix_acl(idmap, - path.dentry); + &path); if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { @@ -3028,7 +3059,7 @@ int smb2_open(struct ksmbd_work *work) rc = ksmbd_vfs_set_sd_xattr(conn, idmap, - path.dentry, + &path, pntsd, pntsd_size); kfree(pntsd); @@ -5464,7 +5495,7 @@ static int smb2_rename(struct ksmbd_work *work, goto out; rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp), - fp->filp->f_path.dentry, + &fp->filp->f_path, xattr_stream_name, NULL, 0, 0); if (rc < 0) { @@ -5629,8 +5660,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, - filp->f_path.dentry, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da); if (rc) ksmbd_debug(SMB, "failed to restore file attribute in EA\n"); @@ -7485,7 +7515,7 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, da.attr = le32_to_cpu(fp->f_ci->m_fattr); ret = ksmbd_vfs_set_dos_attrib_xattr(idmap, - fp->filp->f_path.dentry, &da); + &fp->filp->f_path, &da); if (ret) fp->f_ci->m_fattr = old_fattr; } diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 0a5862a61c77..ad919a4239d0 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1162,8 +1162,7 @@ pass: pntsd_size += sizeof(struct smb_acl) + nt_size; } - ksmbd_vfs_set_sd_xattr(conn, idmap, - path->dentry, pntsd, pntsd_size); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size); kfree(pntsd); } @@ -1383,7 +1382,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, newattrs.ia_valid |= ATTR_MODE; newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777); - ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry); + ksmbd_vfs_remove_acl_xattrs(idmap, path); /* Update posix acls */ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) { rc = set_posix_acl(idmap, path->dentry, @@ -1414,9 +1413,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { /* Update WinACL in xattr */ - ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry); - ksmbd_vfs_set_sd_xattr(conn, idmap, - path->dentry, pntsd, ntsd_len); + ksmbd_vfs_remove_sd_xattrs(idmap, path); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len); } out: diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index f9fb778247e7..81489fdedd8e 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -170,6 +170,10 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) return err; } + err = mnt_want_write(path.mnt); + if (err) + goto out_err; + mode |= S_IFREG; err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), dentry, mode, true); @@ -179,6 +183,9 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } else { pr_err("File(%s): creation failed (err:%d)\n", name, err); } + mnt_drop_write(path.mnt); + +out_err: done_path_create(&path, dentry); return err; } @@ -209,30 +216,35 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) return err; } + err = mnt_want_write(path.mnt); + if (err) + goto out_err2; + idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); - if (err) { - goto out; - } else if (d_unhashed(dentry)) { + if (!err && d_unhashed(dentry)) { struct dentry *d; d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, dentry->d_name.len); if (IS_ERR(d)) { err = PTR_ERR(d); - goto out; + goto out_err1; } if (unlikely(d_is_negative(d))) { dput(d); err = -ENOENT; - goto out; + goto out_err1; } ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); dput(d); } -out: + +out_err1: + mnt_drop_write(path.mnt); +out_err2: done_path_create(&path, dentry); if (err) pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); @@ -443,7 +455,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, memcpy(&stream_buf[*pos], buf, count); err = ksmbd_vfs_setxattr(idmap, - fp->filp->f_path.dentry, + &fp->filp->f_path, fp->stream.name, (void *)stream_buf, size, @@ -589,6 +601,10 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) goto out_err; } + err = mnt_want_write(path->mnt); + if (err) + goto out_err; + idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { err = vfs_rmdir(idmap, d_inode(parent), path->dentry); @@ -599,6 +615,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) if (err) ksmbd_debug(VFS, "unlink failed, err %d\n", err); } + mnt_drop_write(path->mnt); out_err: ksmbd_revert_fsids(work); @@ -644,11 +661,16 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, goto out3; } + err = mnt_want_write(newpath.mnt); + if (err) + goto out3; + err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt), d_inode(newpath.dentry), dentry, NULL); if (err) ksmbd_debug(VFS, "vfs_link failed err %d\n", err); + mnt_drop_write(newpath.mnt); out3: done_path_create(&newpath, dentry); @@ -694,6 +716,10 @@ retry: goto out2; } + err = mnt_want_write(old_path->mnt); + if (err) + goto out2; + trap = lock_rename_child(old_child, new_path.dentry); old_parent = dget(old_child->d_parent); @@ -757,6 +783,7 @@ out4: out3: dput(old_parent); unlock_rename(old_parent, new_path.dentry); + mnt_drop_write(old_path->mnt); out2: path_put(&new_path); @@ -897,19 +924,24 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, * Return: 0 on success, otherwise error */ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *attr_name, + const struct path *path, const char *attr_name, void *attr_value, size_t attr_size, int flags) { int err; + err = mnt_want_write(path->mnt); + if (err) + return err; + err = vfs_setxattr(idmap, - dentry, + path->dentry, attr_name, attr_value, attr_size, flags); if (err) ksmbd_debug(VFS, "setxattr failed, err %d\n", err); + mnt_drop_write(path->mnt); return err; } @@ -1013,9 +1045,18 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, } int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, char *attr_name) + const struct path *path, char *attr_name) { - return vfs_removexattr(idmap, dentry, attr_name); + int err; + + err = mnt_want_write(path->mnt); + if (err) + return err; + + err = vfs_removexattr(idmap, path->dentry, attr_name); + mnt_drop_write(path->mnt); + + return err; } int ksmbd_vfs_unlink(struct file *filp) @@ -1024,6 +1065,10 @@ int ksmbd_vfs_unlink(struct file *filp) struct dentry *dir, *dentry = filp->f_path.dentry; struct mnt_idmap *idmap = file_mnt_idmap(filp); + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + dir = dget_parent(dentry); err = ksmbd_vfs_lock_parent(dir, dentry); if (err) @@ -1041,6 +1086,7 @@ int ksmbd_vfs_unlink(struct file *filp) ksmbd_debug(VFS, "failed to delete, err %d\n", err); out: dput(dir); + mnt_drop_write(filp->f_path.mnt); return err; } @@ -1244,13 +1290,13 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, } int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry) + const struct path *path) { char *name, *xattr_list = NULL; ssize_t xattr_list_len; int err = 0; - xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list); + xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (xattr_list_len < 0) { goto out; } else if (!xattr_list_len) { @@ -1258,6 +1304,10 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, goto out; } + err = mnt_want_write(path->mnt); + if (err) + goto out; + for (name = xattr_list; name - xattr_list < xattr_list_len; name += strlen(name) + 1) { ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); @@ -1266,25 +1316,26 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) || !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) { - err = vfs_remove_acl(idmap, dentry, name); + err = vfs_remove_acl(idmap, path->dentry, name); if (err) ksmbd_debug(SMB, "remove acl xattr failed : %s\n", name); } } + mnt_drop_write(path->mnt); + out: kvfree(xattr_list); return err; } -int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry) +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) { char *name, *xattr_list = NULL; ssize_t xattr_list_len; int err = 0; - xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list); + xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (xattr_list_len < 0) { goto out; } else if (!xattr_list_len) { @@ -1297,7 +1348,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, dentry, name); + err = ksmbd_vfs_remove_xattr(idmap, path, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } @@ -1374,13 +1425,14 @@ out: int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct smb_ntsd *pntsd, int len) { int rc; struct ndr sd_ndr = {0}, acl_ndr = {0}; struct xattr_ntacl acl = {0}; struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); acl.version = 4; @@ -1432,7 +1484,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, goto out; } - rc = ksmbd_vfs_setxattr(idmap, dentry, + rc = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_SD, sd_ndr.data, sd_ndr.offset, 0); if (rc < 0) @@ -1522,7 +1574,7 @@ free_n_data: } int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct xattr_dos_attrib *da) { struct ndr n; @@ -1532,7 +1584,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, if (err) return err; - err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE, + err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE, (void *)n.data, n.offset, 0); if (err) ksmbd_debug(SMB, "failed to store dos attribute in xattr\n"); @@ -1769,10 +1821,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock) } int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry) + struct path *path) { struct posix_acl_state acl_state; struct posix_acl *acls; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); int rc; @@ -1802,6 +1855,11 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, return -ENOMEM; } posix_state_to_acl(&acl_state, acls->a_entries); + + rc = mnt_want_write(path->mnt); + if (rc) + goto out_err; + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1813,16 +1871,20 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } + mnt_drop_write(path->mnt); + +out_err: free_acl_state(&acl_state); posix_acl_release(acls); return rc; } int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *parent_inode) + struct path *path, struct inode *parent_inode) { struct posix_acl *acls; struct posix_acl_entry *pace; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); int rc, i; @@ -1841,6 +1903,10 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, } } + rc = mnt_want_write(path->mnt); + if (rc) + goto out_err; + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1852,6 +1918,9 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } + mnt_drop_write(path->mnt); + +out_err: posix_acl_release(acls); return rc; } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index a4ae89f3230d..8c0931d4d531 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -108,12 +108,12 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name, int attr_name_len); int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *attr_name, + const struct path *path, const char *attr_name, void *attr_value, size_t attr_size, int flags); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, char *attr_name); + const struct path *path, char *attr_name); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *path, bool caseless); @@ -139,26 +139,25 @@ void ksmbd_vfs_posix_lock_wait(struct file_lock *flock); int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout); void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock); int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry); -int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry); + const struct path *path); +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path); int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct smb_ntsd *pntsd, int len); int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd); int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct xattr_dos_attrib *da); int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry); + struct path *path); int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry, + struct path *path, struct inode *parent_inode); #endif /* __KSMBD_VFS_H__ */ diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 2d0138e72d78..f41f8d6108ce 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -252,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) { ci->m_flags &= ~S_DEL_ON_CLS_STREAM; err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), - filp->f_path.dentry, + &filp->f_path, fp->stream.name); if (err) pr_err("remove xattr failed : %s\n", diff --git a/fs/super.c b/fs/super.c index 34afe411cf2b..04bc62ab7dfe 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = { * One thing we have to be careful of with a per-sb shrinker is that we don't * drop the last active reference to the superblock from within the shrinker. * If that happens we could trigger unregistering the shrinker from within the - * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we + * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we * take a passive reference to the superblock to avoid this from occurring. */ static unsigned long super_cache_scan(struct shrinker *shrink, diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 0fd96d6e39ce..4e800bb7d2ab 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1332,6 +1332,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, bool basic_ioctls; unsigned long start, end, vma_end; struct vma_iterator vmi; + pgoff_t pgoff; user_uffdio_register = (struct uffdio_register __user *) arg; @@ -1459,6 +1460,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma_iter_set(&vmi, start); prev = vma_prev(&vmi); + if (vma->vm_start < start) + prev = vma; ret = 0; for_each_vma_range(vmi, vma, end) { @@ -1482,8 +1485,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma_end = min(end, vma->vm_end); new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, - vma->anon_vma, vma->vm_file, vma->vm_pgoff, + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), anon_vma_name(vma)); @@ -1563,6 +1567,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, unsigned long start, end, vma_end; const void __user *buf = (void __user *)arg; struct vma_iterator vmi; + pgoff_t pgoff; ret = -EFAULT; if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) @@ -1625,6 +1630,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, vma_iter_set(&vmi, start); prev = vma_prev(&vmi); + if (vma->vm_start < start) + prev = vma; + ret = 0; for_each_vma_range(vmi, vma, end) { cond_resched(); @@ -1662,8 +1670,9 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, uffd_wp_range(vma, start, vma_end - start, false); new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, - vma->anon_vma, vma->vm_file, vma->vm_pgoff, + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e6098a08c914..9ffdc0425bc2 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -761,6 +761,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_event_status *event_status)) ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f1001dca0e0..3ceb9dfa0993 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -200,6 +200,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_HYPERV_ONLINE, CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, diff --git a/include/linux/libata.h b/include/linux/libata.h index 311cd93377c7..dd5797fb6305 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -836,7 +836,7 @@ struct ata_port { struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; - struct work_struct scsi_rescan_task; + struct delayed_work scsi_rescan_task; unsigned int hsm_task_state; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 94d2be5848ae..4b9626cd83e4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1238,6 +1238,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 2aba75145144..86544707236a 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } +#ifdef CONFIG_TREE_SRCU #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .head = NULL, \ + .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } +#else +#define SRCU_NOTIFIER_INIT(name, pcpu) \ + { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .head = NULL, \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ + } +#endif #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 3c01c2bf84f5..505c908dbb81 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -196,11 +196,11 @@ enum { /* PCA9450_REG_LDO3_VOLT bits */ #define LDO3_EN_MASK 0xC0 -#define LDO3OUT_MASK 0x0F +#define LDO3OUT_MASK 0x1F /* PCA9450_REG_LDO4_VOLT bits */ #define LDO4_EN_MASK 0xC0 -#define LDO4OUT_MASK 0x0F +#define LDO4OUT_MASK 0x1F /* PCA9450_REG_LDO5_VOLT bits */ #define LDO5L_EN_MASK 0xC0 diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index 367d5381217b..e7c44870f20d 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -686,10 +686,7 @@ struct dtv_frontend_properties { * @id: Frontend ID * @exit: Used to inform the DVB core that the frontend * thread should exit (usually, means that the hardware - * got disconnected). - * @remove_mutex: mutex that avoids a race condition between a callback - * called when the hardware is disconnected and the - * file_operations of dvb_frontend. + * got disconnected. */ struct dvb_frontend { @@ -707,7 +704,6 @@ struct dvb_frontend { int (*callback)(void *adapter_priv, int component, int cmd, int arg); int id; unsigned int exit; - struct mutex remove_mutex; }; /** diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index ebb28ec5b6fa..f37f9f34430c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -268,7 +268,7 @@ int flow_offload_route_init(struct flow_offload *flow, int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow); + struct flow_offload *flow, bool force); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2e24ea1d744c..83db182decc8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -462,7 +462,8 @@ struct nft_set_ops { const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, @@ -557,6 +558,7 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; + struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:14, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 27271f2b37cb..12eadecf8cd0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -137,6 +137,13 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return true; + return refcount_dec_if_one(&qdisc->refcnt); +} + /* Intended to be used by unlocked users, when concurrent qdisc release is * possible. */ @@ -652,6 +659,7 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); +void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index d808dc3d239e..811a0f11d0db 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) return 0; } -static inline int iboe_get_rate(struct net_device *dev) -{ - struct ethtool_link_ksettings cmd; - int err; - - rtnl_lock(); - err = __ethtool_get_link_ksettings(dev, &cmd); - rtnl_unlock(); - if (err) - return IB_RATE_PORT_CURRENT; - - if (cmd.base.speed >= 40000) - return IB_RATE_40_GBPS; - else if (cmd.base.speed >= 30000) - return IB_RATE_30_GBPS; - else if (cmd.base.speed >= 20000) - return IB_RATE_20_GBPS; - else if (cmd.base.speed >= 10000) - return IB_RATE_10_GBPS; - else - return IB_RATE_PORT_CURRENT; -} - static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 86b2a82da546..54e353c9f919 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_folio_template, strscpy_pad(__entry->name, bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : NULL), 32); - __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0; __entry->index = folio->index; ), diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 1ebf8d455f07..73e2c10dc2cc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -783,7 +783,7 @@ enum { /* add new constants above here */ __ETHTOOL_A_STATS_GRP_CNT, - ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1) + ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) }; enum { diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index b2715988791e..399e9a15c38d 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -220,10 +220,12 @@ static void io_worker_exit(struct io_worker *worker) list_del_rcu(&worker->all_list); raw_spin_unlock(&wq->lock); io_wq_dec_running(worker); - worker->flags = 0; - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - preempt_enable(); + /* + * this worker is a goner, clear ->worker_private to avoid any + * inc/dec running calls that could happen as part of exit from + * touching 'worker'. + */ + current->worker_private = NULL; kfree_rcu(worker, rcu); io_worker_ref_put(wq); diff --git a/io_uring/net.c b/io_uring/net.c index 89e839013837..51b0f7fbb4f5 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,6 +65,7 @@ struct io_sr_msg { u16 addr_len; u16 buf_group; void __user *addr; + void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; @@ -195,11 +196,15 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int ret; iomsg->msg.msg_name = &iomsg->addr; iomsg->free_iov = iomsg->fast_iov; - return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, + ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, &iomsg->free_iov); + /* save msg_control as sys_sendmsg() overwrites it */ + sr->msg_control = iomsg->msg.msg_control; + return ret; } int io_send_prep_async(struct io_kiocb *req) @@ -297,6 +302,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f989f5f1933b..69ee4a29136f 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -901,10 +901,22 @@ static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, } offset = ALIGN(offset, align); + + /* + * Check if the segment contains the entry point, if so, + * calculate the value of image->start based on it. + * If the compiler has produced more than one .text section + * (Eg: .text.hot), they are generally after the main .text + * section, and they shall not be used to calculate + * image->start. So do not re-calculate image->start if it + * is not set to the initial value, and warn the user so they + * have a chance to fix their purgatory's linker script. + */ if (sechdrs[i].sh_flags & SHF_EXECINSTR && pi->ehdr->e_entry >= sechdrs[i].sh_addr && pi->ehdr->e_entry < (sechdrs[i].sh_addr - + sechdrs[i].sh_size)) { + + sechdrs[i].sh_size) && + !WARN_ON(kbuf->image->start != pi->ehdr->e_entry)) { kbuf->image->start -= sechdrs[i].sh_addr; kbuf->image->start += kbuf->mem + offset; } diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index dbb14705d0d3..8df0550415e7 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -50,6 +50,18 @@ #define EVENT_STATUS_OTHER BIT(7) /* + * User register flags are not allowed yet, keep them here until we are + * ready to expose them out to the user ABI. + */ +enum user_reg_flag { + /* Event will not delete upon last reference closing */ + USER_EVENT_REG_PERSIST = 1U << 0, + + /* This value or above is currently non-ABI */ + USER_EVENT_REG_MAX = 1U << 1, +}; + +/* * Stores the system name, tables, and locks for a group of events. This * allows isolation for events by various means. */ @@ -85,8 +97,10 @@ struct user_event { struct hlist_node node; struct list_head fields; struct list_head validators; + struct work_struct put_work; refcount_t refcnt; int min_size; + int reg_flags; char status; }; @@ -165,76 +179,151 @@ typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, - struct user_event **newuser); + struct user_event **newuser, int reg_flags); static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); static struct user_event_mm *user_event_mm_get_all(struct user_event *user); static void user_event_mm_put(struct user_event_mm *mm); +static int destroy_user_event(struct user_event *user); static u32 user_event_key(char *name) { return jhash(name, strlen(name), 0); } -static void user_event_group_destroy(struct user_event_group *group) +static struct user_event *user_event_get(struct user_event *user) { - kfree(group->system_name); - kfree(group); + refcount_inc(&user->refcnt); + + return user; } -static char *user_event_group_system_name(struct user_namespace *user_ns) +static void delayed_destroy_user_event(struct work_struct *work) { - char *system_name; - int len = sizeof(USER_EVENTS_SYSTEM) + 1; + struct user_event *user = container_of( + work, struct user_event, put_work); - if (user_ns != &init_user_ns) { + mutex_lock(&event_mutex); + + if (!refcount_dec_and_test(&user->refcnt)) + goto out; + + if (destroy_user_event(user)) { /* - * Unexpected at this point: - * We only currently support init_user_ns. - * When we enable more, this will trigger a failure so log. + * The only reason this would fail here is if we cannot + * update the visibility of the event. In this case the + * event stays in the hashtable, waiting for someone to + * attempt to delete it later. */ - pr_warn("user_events: Namespace other than init_user_ns!\n"); - return NULL; + pr_warn("user_events: Unable to delete event\n"); + refcount_set(&user->refcnt, 1); } +out: + mutex_unlock(&event_mutex); +} - system_name = kmalloc(len, GFP_KERNEL); +static void user_event_put(struct user_event *user, bool locked) +{ + bool delete; - if (!system_name) - return NULL; + if (unlikely(!user)) + return; - snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM); + /* + * When the event is not enabled for auto-delete there will always + * be at least 1 reference to the event. During the event creation + * we initially set the refcnt to 2 to achieve this. In those cases + * the caller must acquire event_mutex and after decrement check if + * the refcnt is 1, meaning this is the last reference. When auto + * delete is enabled, there will only be 1 ref, IE: refcnt will be + * only set to 1 during creation to allow the below checks to go + * through upon the last put. The last put must always be done with + * the event mutex held. + */ + if (!locked) { + lockdep_assert_not_held(&event_mutex); + delete = refcount_dec_and_mutex_lock(&user->refcnt, &event_mutex); + } else { + lockdep_assert_held(&event_mutex); + delete = refcount_dec_and_test(&user->refcnt); + } - return system_name; + if (!delete) + return; + + /* + * We now have the event_mutex in all cases, which ensures that + * no new references will be taken until event_mutex is released. + * New references come through find_user_event(), which requires + * the event_mutex to be held. + */ + + if (user->reg_flags & USER_EVENT_REG_PERSIST) { + /* We should not get here when persist flag is set */ + pr_alert("BUG: Auto-delete engaged on persistent event\n"); + goto out; + } + + /* + * Unfortunately we have to attempt the actual destroy in a work + * queue. This is because not all cases handle a trace_event_call + * being removed within the class->reg() operation for unregister. + */ + INIT_WORK(&user->put_work, delayed_destroy_user_event); + + /* + * Since the event is still in the hashtable, we have to re-inc + * the ref count to 1. This count will be decremented and checked + * in the work queue to ensure it's still the last ref. This is + * needed because a user-process could register the same event in + * between the time of event_mutex release and the work queue + * running the delayed destroy. If we removed the item now from + * the hashtable, this would result in a timing window where a + * user process would fail a register because the trace_event_call + * register would fail in the tracing layers. + */ + refcount_set(&user->refcnt, 1); + + if (WARN_ON_ONCE(!schedule_work(&user->put_work))) { + /* + * If we fail we must wait for an admin to attempt delete or + * another register/close of the event, whichever is first. + */ + pr_warn("user_events: Unable to queue delayed destroy\n"); + } +out: + /* Ensure if we didn't have event_mutex before we unlock it */ + if (!locked) + mutex_unlock(&event_mutex); } -static inline struct user_event_group -*user_event_group_from_user_ns(struct user_namespace *user_ns) +static void user_event_group_destroy(struct user_event_group *group) { - if (user_ns == &init_user_ns) - return init_group; - - return NULL; + kfree(group->system_name); + kfree(group); } -static struct user_event_group *current_user_event_group(void) +static char *user_event_group_system_name(void) { - struct user_namespace *user_ns = current_user_ns(); - struct user_event_group *group = NULL; + char *system_name; + int len = sizeof(USER_EVENTS_SYSTEM) + 1; - while (user_ns) { - group = user_event_group_from_user_ns(user_ns); + system_name = kmalloc(len, GFP_KERNEL); - if (group) - break; + if (!system_name) + return NULL; - user_ns = user_ns->parent; - } + snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM); - return group; + return system_name; } -static struct user_event_group -*user_event_group_create(struct user_namespace *user_ns) +static struct user_event_group *current_user_event_group(void) +{ + return init_group; +} + +static struct user_event_group *user_event_group_create(void) { struct user_event_group *group; @@ -243,7 +332,7 @@ static struct user_event_group if (!group) return NULL; - group->system_name = user_event_group_system_name(user_ns); + group->system_name = user_event_group_system_name(); if (!group->system_name) goto error; @@ -259,12 +348,13 @@ error: return NULL; }; -static void user_event_enabler_destroy(struct user_event_enabler *enabler) +static void user_event_enabler_destroy(struct user_event_enabler *enabler, + bool locked) { list_del_rcu(&enabler->mm_enablers_link); /* No longer tracking the event via the enabler */ - refcount_dec(&enabler->event->refcnt); + user_event_put(enabler->event, locked); kfree(enabler); } @@ -326,7 +416,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work) /* User asked for enabler to be removed during fault */ if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, true); goto out; } @@ -501,14 +591,12 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig, if (!enabler) return false; - enabler->event = orig->event; + enabler->event = user_event_get(orig->event); enabler->addr = orig->addr; /* Only dup part of value (ignore future flags, etc) */ enabler->values = orig->values & ENABLE_VAL_DUP_MASK; - refcount_inc(&enabler->event->refcnt); - /* Enablers not exposed yet, RCU not required */ list_add(&enabler->mm_enablers_link, &mm->enablers); @@ -625,7 +713,7 @@ static void user_event_mm_destroy(struct user_event_mm *mm) struct user_event_enabler *enabler, *next; list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link) - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, false); mmdrop(mm->mm); kfree(mm); @@ -780,7 +868,7 @@ retry: * exit or run exec(), which includes forks and clones. */ if (!*write_result) { - refcount_inc(&enabler->event->refcnt); + user_event_get(user); list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers); } @@ -803,7 +891,12 @@ out: static __always_inline __must_check bool user_event_last_ref(struct user_event *user) { - return refcount_read(&user->refcnt) == 1; + int last = 0; + + if (user->reg_flags & USER_EVENT_REG_PERSIST) + last = 1; + + return refcount_read(&user->refcnt) == last; } static __always_inline __must_check @@ -842,7 +935,8 @@ static struct list_head *user_event_get_fields(struct trace_event_call *call) * Upon success user_event has its ref count increased by 1. */ static int user_event_parse_cmd(struct user_event_group *group, - char *raw_command, struct user_event **newuser) + char *raw_command, struct user_event **newuser, + int reg_flags) { char *name = raw_command; char *args = strpbrk(name, " "); @@ -856,7 +950,7 @@ static int user_event_parse_cmd(struct user_event_group *group, if (flags) *flags++ = '\0'; - return user_event_parse(group, name, args, flags, newuser); + return user_event_parse(group, name, args, flags, newuser, reg_flags); } static int user_field_array_size(const char *type) @@ -1367,10 +1461,8 @@ static struct user_event *find_user_event(struct user_event_group *group, *outkey = key; hash_for_each_possible(group->register_table, user, node, key) - if (!strcmp(EVENT_NAME(user), name)) { - refcount_inc(&user->refcnt); - return user; - } + if (!strcmp(EVENT_NAME(user), name)) + return user_event_get(user); return NULL; } @@ -1432,7 +1524,7 @@ static void user_event_ftrace(struct user_event *user, struct iov_iter *i, if (unlikely(!entry)) return; - if (unlikely(!copy_nofault(entry + 1, i->count, i))) + if (unlikely(i->count != 0 && !copy_nofault(entry + 1, i->count, i))) goto discard; if (!list_empty(&user->validators) && @@ -1473,7 +1565,7 @@ static void user_event_perf(struct user_event *user, struct iov_iter *i, perf_fetch_caller_regs(regs); - if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) + if (unlikely(i->count != 0 && !copy_nofault(perf_entry + 1, i->count, i))) goto discard; if (!list_empty(&user->validators) && @@ -1584,12 +1676,12 @@ static int user_event_reg(struct trace_event_call *call, return ret; inc: - refcount_inc(&user->refcnt); + user_event_get(user); update_enable_bit_for(user); return 0; dec: update_enable_bit_for(user); - refcount_dec(&user->refcnt); + user_event_put(user, true); return 0; } @@ -1620,10 +1712,11 @@ static int user_event_create(const char *raw_command) mutex_lock(&group->reg_mutex); - ret = user_event_parse_cmd(group, name, &user); + /* Dyn events persist, otherwise they would cleanup immediately */ + ret = user_event_parse_cmd(group, name, &user, USER_EVENT_REG_PERSIST); if (!ret) - refcount_dec(&user->refcnt); + user_event_put(user, false); mutex_unlock(&group->reg_mutex); @@ -1745,6 +1838,8 @@ static bool user_event_match(const char *system, const char *event, if (match && argc > 0) match = user_fields_match(user, argc, argv); + else if (match && argc == 0) + match = list_empty(&user->fields); return match; } @@ -1781,11 +1876,17 @@ static int user_event_trace_register(struct user_event *user) */ static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, - struct user_event **newuser) + struct user_event **newuser, int reg_flags) { int ret; u32 key; struct user_event *user; + int argc = 0; + char **argv; + + /* User register flags are not ready yet */ + if (reg_flags != 0 || flags != NULL) + return -EINVAL; /* Prevent dyn_event from racing */ mutex_lock(&event_mutex); @@ -1793,13 +1894,35 @@ static int user_event_parse(struct user_event_group *group, char *name, mutex_unlock(&event_mutex); if (user) { - *newuser = user; - /* - * Name is allocated by caller, free it since it already exists. - * Caller only worries about failure cases for freeing. - */ - kfree(name); + if (args) { + argv = argv_split(GFP_KERNEL, args, &argc); + if (!argv) { + ret = -ENOMEM; + goto error; + } + + ret = user_fields_match(user, argc, (const char **)argv); + argv_free(argv); + + } else + ret = list_empty(&user->fields); + + if (ret) { + *newuser = user; + /* + * Name is allocated by caller, free it since it already exists. + * Caller only worries about failure cases for freeing. + */ + kfree(name); + } else { + ret = -EADDRINUSE; + goto error; + } + return 0; +error: + user_event_put(user, false); + return ret; } user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT); @@ -1852,8 +1975,15 @@ static int user_event_parse(struct user_event_group *group, char *name, if (ret) goto put_user_lock; - /* Ensure we track self ref and caller ref (2) */ - refcount_set(&user->refcnt, 2); + user->reg_flags = reg_flags; + + if (user->reg_flags & USER_EVENT_REG_PERSIST) { + /* Ensure we track self ref and caller ref (2) */ + refcount_set(&user->refcnt, 2); + } else { + /* Ensure we track only caller ref (1) */ + refcount_set(&user->refcnt, 1); + } dyn_event_init(&user->devent, &user_event_dops); dyn_event_add(&user->devent, &user->call); @@ -1885,7 +2015,7 @@ static int delete_user_event(struct user_event_group *group, char *name) if (!user) return -ENOENT; - refcount_dec(&user->refcnt); + user_event_put(user, true); if (!user_event_last_ref(user)) return -EBUSY; @@ -2044,9 +2174,7 @@ static int user_events_ref_add(struct user_event_file_info *info, for (i = 0; i < count; ++i) new_refs->events[i] = refs->events[i]; - new_refs->events[i] = user; - - refcount_inc(&user->refcnt); + new_refs->events[i] = user_event_get(user); rcu_assign_pointer(info->refs, new_refs); @@ -2077,8 +2205,8 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) if (ret) return ret; - /* Ensure no flags, since we don't support any yet */ - if (kreg->flags != 0) + /* Ensure only valid flags */ + if (kreg->flags & ~(USER_EVENT_REG_MAX-1)) return -EINVAL; /* Ensure supported size */ @@ -2150,7 +2278,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, return ret; } - ret = user_event_parse_cmd(info->group, name, &user); + ret = user_event_parse_cmd(info->group, name, &user, reg.flags); if (ret) { kfree(name); @@ -2160,7 +2288,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, ret = user_events_ref_add(info, user); /* No longer need parse ref, ref_add either worked or not */ - refcount_dec(&user->refcnt); + user_event_put(user, false); /* Positive number is index and valid */ if (ret < 0) @@ -2309,7 +2437,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, true); /* Removed at least one */ ret = 0; @@ -2367,7 +2495,6 @@ static int user_events_release(struct inode *node, struct file *file) struct user_event_file_info *info = file->private_data; struct user_event_group *group; struct user_event_refs *refs; - struct user_event *user; int i; if (!info) @@ -2391,12 +2518,9 @@ static int user_events_release(struct inode *node, struct file *file) * The underlying user_events are ref counted, and cannot be freed. * After this decrement, the user_events may be freed elsewhere. */ - for (i = 0; i < refs->count; ++i) { - user = refs->events[i]; + for (i = 0; i < refs->count; ++i) + user_event_put(refs->events[i], false); - if (user) - refcount_dec(&user->refcnt); - } out: file->private_data = NULL; @@ -2577,7 +2701,7 @@ static int __init trace_events_user_init(void) if (!fault_cache) return -ENOMEM; - init_group = user_event_group_create(&init_user_ns); + init_group = user_event_group_create(); if (!init_group) { kmem_cache_destroy(fault_cache); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 15f05faaae44..1e33f367783e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -847,7 +847,7 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c int ret; void *pos; - list_for_each_entry(field, head, link) { + list_for_each_entry_reverse(field, head, link) { trace_seq_printf(&iter->seq, " %s=", field->name); if (field->offset + field->size > iter->ent_size) { trace_seq_puts(&iter->seq, "<OVERFLOW>"); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 049ba132f7ef..1a31065b2036 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -27,6 +27,8 @@ #include <linux/string.h> #include <linux/xarray.h> +#include "radix-tree.h" + /* * Radix tree node cache. */ diff --git a/lib/radix-tree.h b/lib/radix-tree.h new file mode 100644 index 000000000000..40d5c03e2b09 --- /dev/null +++ b/lib/radix-tree.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* radix-tree helpers that are only shared with xarray */ + +struct kmem_cache; +struct rcu_head; + +extern struct kmem_cache *radix_tree_node_cachep; +extern void radix_tree_node_rcu_free(struct rcu_head *head); diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 9dd9745d365f..3718d9886407 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -369,7 +369,7 @@ vm_map_ram_test(void) int i; map_nr_pages = nr_pages > 0 ? nr_pages:1; - pages = kmalloc(map_nr_pages * sizeof(struct page), GFP_KERNEL); + pages = kcalloc(map_nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) return -1; diff --git a/lib/xarray.c b/lib/xarray.c index ea9ce1f0b386..2071a3718f4e 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -12,6 +12,8 @@ #include <linux/slab.h> #include <linux/xarray.h> +#include "radix-tree.h" + /* * Coding conventions in this file: * @@ -247,10 +249,6 @@ void *xas_load(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_load); -/* Move the radix tree node cache here */ -extern struct kmem_cache *radix_tree_node_cachep; -extern void radix_tree_node_rcu_free(struct rcu_head *head); - #define XA_RCU_FREE ((struct xarray *)1) static void xa_node_free(struct xa_node *node) diff --git a/mm/damon/core.c b/mm/damon/core.c index d9ef62047bf5..91cff7f2997e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -551,6 +551,8 @@ int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs) return -EINVAL; if (attrs->min_nr_regions > attrs->max_nr_regions) return -EINVAL; + if (attrs->sample_interval > attrs->aggr_interval) + return -EINVAL; damon_update_monitoring_results(ctx, attrs); ctx->attrs = *attrs; diff --git a/mm/filemap.c b/mm/filemap.c index b4c9bd368b7e..83dda76d1fc3 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1760,7 +1760,9 @@ bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'return - index >= max_scan' will be true). - * In the rare case of index wrap-around, 0 will be returned. + * In the rare case of index wrap-around, 0 will be returned. 0 will also + * be returned if index == 0 and there is a gap at the index. We can not + * wrap-around if passed index == 0. */ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1770,12 +1772,13 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_next(&xas); if (!entry || xa_is_value(entry)) - break; - if (xas.xa_index == 0) - break; + return xas.xa_index; + if (xas.xa_index == 0 && index != 0) + return xas.xa_index; } - return xas.xa_index; + /* No gaps in range and no wrap-around, return index beyond range */ + return xas.xa_index + 1; } EXPORT_SYMBOL(page_cache_next_miss); @@ -1796,7 +1799,9 @@ EXPORT_SYMBOL(page_cache_next_miss); * * Return: The index of the gap if found, otherwise an index outside the * range specified (in which case 'index - return >= max_scan' will be true). - * In the rare case of wrap-around, ULONG_MAX will be returned. + * In the rare case of wrap-around, ULONG_MAX will be returned. ULONG_MAX + * will also be returned if index == ULONG_MAX and there is a gap at the + * index. We can not wrap-around if passed index == ULONG_MAX. */ pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan) @@ -1806,12 +1811,13 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, while (max_scan--) { void *entry = xas_prev(&xas); if (!entry || xa_is_value(entry)) - break; - if (xas.xa_index == ULONG_MAX) - break; + return xas.xa_index; + if (xas.xa_index == ULONG_MAX && index != ULONG_MAX) + return xas.xa_index; } - return xas.xa_index; + /* No gaps in range and no wrap-around, return index beyond range */ + return xas.xa_index - 1; } EXPORT_SYMBOL(page_cache_prev_miss); diff --git a/mm/gup_test.c b/mm/gup_test.c index 8ae7307a1bb6..c0421b786dcd 100644 --- a/mm/gup_test.c +++ b/mm/gup_test.c @@ -381,6 +381,7 @@ static int gup_test_release(struct inode *inode, struct file *file) static const struct file_operations gup_test_fops = { .open = nonseekable_open, .unlocked_ioctl = gup_test_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = gup_test_release, }; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6b9d39d65b73..2d0d58fb4e7f 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2070,7 +2070,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH); xas_lock_irq(&xas); - xas_set(&xas, index); VM_BUG_ON_PAGE(page != xas_load(&xas), page); diff --git a/mm/memfd.c b/mm/memfd.c index 69b90c31d38c..e763e76f1106 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -371,12 +371,15 @@ SYSCALL_DEFINE2(memfd_create, inode->i_mode &= ~0111; file_seals = memfd_file_seals_ptr(file); - *file_seals &= ~F_SEAL_SEAL; - *file_seals |= F_SEAL_EXEC; + if (file_seals) { + *file_seals &= ~F_SEAL_SEAL; + *file_seals |= F_SEAL_EXEC; + } } else if (flags & MFD_ALLOW_SEALING) { /* MFD_EXEC and MFD_ALLOW_SEALING are set */ file_seals = memfd_file_seals_ptr(file); - *file_seals &= ~F_SEAL_SEAL; + if (file_seals) + *file_seals &= ~F_SEAL_SEAL; } fd_install(fd, file); diff --git a/mm/mmap.c b/mm/mmap.c index 13678edaa22c..d600404580b2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2318,21 +2318,6 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, return __split_vma(vmi, vma, addr, new_below); } -static inline int munmap_sidetree(struct vm_area_struct *vma, - struct ma_state *mas_detach) -{ - vma_start_write(vma); - mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1); - if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) - return -ENOMEM; - - vma_mark_detached(vma, true); - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm -= vma_pages(vma); - - return 0; -} - /* * do_vmi_align_munmap() - munmap the aligned region from @start to @end. * @vmi: The vma iterator @@ -2354,6 +2339,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct maple_tree mt_detach; int count = 0; int error = -ENOMEM; + unsigned long locked_vm = 0; MA_STATE(mas_detach, &mt_detach, 0, 0); mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); @@ -2399,9 +2385,13 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, if (error) goto end_split_failed; } - error = munmap_sidetree(next, &mas_detach); - if (error) - goto munmap_sidetree_failed; + vma_start_write(next); + mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); + if (mas_store_gfp(&mas_detach, next, GFP_KERNEL)) + goto munmap_gather_failed; + vma_mark_detached(next, true); + if (next->vm_flags & VM_LOCKED) + locked_vm += vma_pages(next); count++; #ifdef CONFIG_DEBUG_VM_MAPLE_TREE @@ -2447,10 +2437,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, } #endif /* Point of no return */ + error = -ENOMEM; vma_iter_set(vmi, start); if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL)) - return -ENOMEM; + goto clear_tree_failed; + mm->locked_vm -= locked_vm; mm->map_count -= count; /* * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or @@ -2480,9 +2472,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, validate_mm(mm); return downgrade ? 1 : 0; +clear_tree_failed: userfaultfd_error: -munmap_sidetree_failed: +munmap_gather_failed: end_split_failed: + mas_set(&mas_detach, 0); + mas_for_each(&mas_detach, next, end) + vma_mark_detached(next, false); + __mt_destroy(&mt_detach); start_split_failed: map_count_exceeded: diff --git a/mm/mprotect.c b/mm/mprotect.c index 92d3d3ca390a..c59e7561698c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -867,7 +867,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, } tlb_finish_mmu(&tlb); - if (!error && vma_iter_end(&vmi) < end) + if (!error && tmp < end) error = -ENOMEM; out: diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index fe10436d9911..3ab53fad8876 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -5,12 +5,10 @@ #include <linux/seq_file.h> #include <linux/shrinker.h> #include <linux/memcontrol.h> -#include <linux/srcu.h> /* defined in vmscan.c */ -extern struct mutex shrinker_mutex; +extern struct rw_semaphore shrinker_rwsem; extern struct list_head shrinker_list; -extern struct srcu_struct shrinker_srcu; static DEFINE_IDA(shrinker_debugfs_ida); static struct dentry *shrinker_debugfs_root; @@ -51,13 +49,18 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v) struct mem_cgroup *memcg; unsigned long total; bool memcg_aware; - int ret = 0, nid, srcu_idx; + int ret, nid; count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL); if (!count_per_node) return -ENOMEM; - srcu_idx = srcu_read_lock(&shrinker_srcu); + ret = down_read_killable(&shrinker_rwsem); + if (ret) { + kfree(count_per_node); + return ret; + } + rcu_read_lock(); memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE; @@ -88,7 +91,8 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v) } } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); - srcu_read_unlock(&shrinker_srcu, srcu_idx); + rcu_read_unlock(); + up_read(&shrinker_rwsem); kfree(count_per_node); return ret; @@ -111,8 +115,9 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file, .gfp_mask = GFP_KERNEL, }; struct mem_cgroup *memcg = NULL; - int nid, srcu_idx; + int nid; char kbuf[72]; + ssize_t ret; read_len = size < (sizeof(kbuf) - 1) ? size : (sizeof(kbuf) - 1); if (copy_from_user(kbuf, buf, read_len)) @@ -141,7 +146,11 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file, return -EINVAL; } - srcu_idx = srcu_read_lock(&shrinker_srcu); + ret = down_read_killable(&shrinker_rwsem); + if (ret) { + mem_cgroup_put(memcg); + return ret; + } sc.nid = nid; sc.memcg = memcg; @@ -150,7 +159,7 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file, shrinker->scan_objects(shrinker, &sc); - srcu_read_unlock(&shrinker_srcu, srcu_idx); + up_read(&shrinker_rwsem); mem_cgroup_put(memcg); return size; @@ -168,7 +177,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker) char buf[128]; int id; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); /* debugfs isn't initialized yet, add debugfs entries later. */ if (!shrinker_debugfs_root) @@ -211,7 +220,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) if (!new) return -ENOMEM; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); old = shrinker->name; shrinker->name = new; @@ -229,7 +238,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) shrinker->debugfs_entry = entry; } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); kfree_const(old); @@ -242,7 +251,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker, { struct dentry *entry = shrinker->debugfs_entry; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); kfree_const(shrinker->name); shrinker->name = NULL; @@ -271,14 +280,14 @@ static int __init shrinker_debugfs_init(void) shrinker_debugfs_root = dentry; /* Create debugfs entries for shrinkers registered at boot */ - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); list_for_each_entry(shrinker, &shrinker_list, list) if (!shrinker->debugfs_entry) { ret = shrinker_debugfs_add(shrinker); if (ret) break; } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9683573f1225..1d13d71687d7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3098,11 +3098,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * allocation request, free them via vfree() if any. */ if (area->nr_pages != nr_small_pages) { - /* vm_area_alloc_pages() can also fail due to a fatal signal */ - if (!fatal_signal_pending(current)) + /* + * vm_area_alloc_pages() can fail due to insufficient memory but + * also:- + * + * - a pending fatal signal + * - insufficient huge page-order pages + * + * Since we always retry allocations at order-0 in the huge page + * case a warning for either is spurious. + */ + if (!fatal_signal_pending(current) && page_order == 0) warn_alloc(gfp_mask, NULL, - "vmalloc error: size %lu, page order %u, failed to allocate pages", - area->nr_pages * PAGE_SIZE, page_order); + "vmalloc error: size %lu, failed to allocate pages", + area->nr_pages * PAGE_SIZE); goto fail; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 6d0cd2840cf0..5bf98d0a22c9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -35,7 +35,7 @@ #include <linux/cpuset.h> #include <linux/compaction.h> #include <linux/notifier.h> -#include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/freezer.h> @@ -57,7 +57,6 @@ #include <linux/khugepaged.h> #include <linux/rculist_nulls.h> #include <linux/random.h> -#include <linux/srcu.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -190,9 +189,7 @@ struct scan_control { int vm_swappiness = 60; LIST_HEAD(shrinker_list); -DEFINE_MUTEX(shrinker_mutex); -DEFINE_SRCU(shrinker_srcu); -static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0); +DECLARE_RWSEM(shrinker_rwsem); #ifdef CONFIG_MEMCG static int shrinker_nr_max; @@ -211,21 +208,8 @@ static inline int shrinker_defer_size(int nr_items) static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, int nid) { - return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info, - &shrinker_srcu, - lockdep_is_held(&shrinker_mutex)); -} - -static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg, - int nid) -{ - return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info, - &shrinker_srcu); -} - -static void free_shrinker_info_rcu(struct rcu_head *head) -{ - kvfree(container_of(head, struct shrinker_info, rcu)); + return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info, + lockdep_is_held(&shrinker_rwsem)); } static int expand_one_shrinker_info(struct mem_cgroup *memcg, @@ -266,7 +250,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, defer_size - old_defer_size); rcu_assign_pointer(pn->shrinker_info, new); - call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu); + kvfree_rcu(old, rcu); } return 0; @@ -292,7 +276,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) int nid, size, ret = 0; int map_size, defer_size = 0; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); map_size = shrinker_map_size(shrinker_nr_max); defer_size = shrinker_defer_size(shrinker_nr_max); size = map_size + defer_size; @@ -308,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) info->map_nr_max = shrinker_nr_max; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } @@ -324,7 +308,7 @@ static int expand_shrinker_info(int new_id) if (!root_mem_cgroup) goto out; - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); map_size = shrinker_map_size(new_nr_max); defer_size = shrinker_defer_size(new_nr_max); @@ -352,16 +336,15 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) { if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) { struct shrinker_info *info; - int srcu_idx; - srcu_idx = srcu_read_lock(&shrinker_srcu); - info = shrinker_info_srcu(memcg, nid); + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { /* Pairs with smp mb in shrink_slab() */ smp_mb__before_atomic(); set_bit(shrinker_id, info->map); } - srcu_read_unlock(&shrinker_srcu, srcu_idx); + rcu_read_unlock(); } } @@ -374,7 +357,8 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) if (mem_cgroup_disabled()) return -ENOSYS; - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); + /* This may call shrinker, so it must use down_read_trylock() */ id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); if (id < 0) goto unlock; @@ -388,7 +372,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) shrinker->id = id; ret = 0; unlock: - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return ret; } @@ -398,7 +382,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker) BUG_ON(id < 0); - lockdep_assert_held(&shrinker_mutex); + lockdep_assert_held(&shrinker_rwsem); idr_remove(&shrinker_idr, id); } @@ -408,7 +392,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, { struct shrinker_info *info; - info = shrinker_info_srcu(memcg, nid); + info = shrinker_info_protected(memcg, nid); return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); } @@ -417,7 +401,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, { struct shrinker_info *info; - info = shrinker_info_srcu(memcg, nid); + info = shrinker_info_protected(memcg, nid); return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); } @@ -433,7 +417,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) parent = root_mem_cgroup; /* Prevent from concurrent shrinker_info expand */ - mutex_lock(&shrinker_mutex); + down_read(&shrinker_rwsem); for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); @@ -442,7 +426,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) atomic_long_add(nr, &parent_info->nr_deferred[i]); } } - mutex_unlock(&shrinker_mutex); + up_read(&shrinker_rwsem); } static bool cgroup_reclaim(struct scan_control *sc) @@ -743,9 +727,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker) shrinker->name = NULL; #endif if (shrinker->flags & SHRINKER_MEMCG_AWARE) { - mutex_lock(&shrinker_mutex); + down_write(&shrinker_rwsem); unregister_memcg_shrinker(shrinker); - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); return; } @@ -755,11 +739,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker) void register_shrinker_prepared(struct shrinker *shrinker) { - mutex_lock(&shrinker_mutex); - list_add_tail_rcu(&shrinker->list, &shrinker_list); + down_write(&shrinker_rwsem); + list_add_tail(&shrinker->list, &shrinker_list); shrinker->flags |= SHRINKER_REGISTERED; shrinker_debugfs_add(shrinker); - mutex_unlock(&shrinker_mutex); + up_write(&shrinker_rwsem); } static int __register_shrinker(struct shrinker *shrinker) @@ -810,16 +794,13 @@ void unregister_shrinker(struct shrinker *shrinker) if (!(shrinker->flags & SHRINKER_REGISTERED)) return; - mutex_lock(&shrinker_mutex); - list_del_rcu(&shrinker->list); + down_write(&shrinker_rwsem); + list_del(&shrinker->list); shrinker->flags &= ~SHRINKER_REGISTERED; if (shrinker->flags & SHRINKER_MEMCG_AWARE) unregister_memcg_shrinker(shrinker); debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id); - mutex_unlock(&shrinker_mutex); - - atomic_inc(&shrinker_srcu_generation); - synchronize_srcu(&shrinker_srcu); + up_write(&shrinker_rwsem); shrinker_debugfs_remove(debugfs_entry, debugfs_id); @@ -831,13 +812,15 @@ EXPORT_SYMBOL(unregister_shrinker); /** * synchronize_shrinkers - Wait for all running shrinkers to complete. * - * This is useful to guarantee that all shrinker invocations have seen an - * update, before freeing memory. + * This is equivalent to calling unregister_shrink() and register_shrinker(), + * but atomically and with less overhead. This is useful to guarantee that all + * shrinker invocations have seen an update, before freeing memory, similar to + * rcu. */ void synchronize_shrinkers(void) { - atomic_inc(&shrinker_srcu_generation); - synchronize_srcu(&shrinker_srcu); + down_write(&shrinker_rwsem); + up_write(&shrinker_rwsem); } EXPORT_SYMBOL(synchronize_shrinkers); @@ -946,20 +929,19 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, { struct shrinker_info *info; unsigned long ret, freed = 0; - int srcu_idx, generation; - int i = 0; + int i; if (!mem_cgroup_online(memcg)) return 0; -again: - srcu_idx = srcu_read_lock(&shrinker_srcu); - info = shrinker_info_srcu(memcg, nid); + if (!down_read_trylock(&shrinker_rwsem)) + return 0; + + info = shrinker_info_protected(memcg, nid); if (unlikely(!info)) goto unlock; - generation = atomic_read(&shrinker_srcu_generation); - for_each_set_bit_from(i, info->map, info->map_nr_max) { + for_each_set_bit(i, info->map, info->map_nr_max) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, @@ -1005,14 +987,14 @@ again: set_shrinker_bit(memcg, nid, i); } freed += ret; - if (atomic_read(&shrinker_srcu_generation) != generation) { - srcu_read_unlock(&shrinker_srcu, srcu_idx); - i++; - goto again; + + if (rwsem_is_contended(&shrinker_rwsem)) { + freed = freed ? : 1; + break; } } unlock: - srcu_read_unlock(&shrinker_srcu, srcu_idx); + up_read(&shrinker_rwsem); return freed; } #else /* CONFIG_MEMCG */ @@ -1049,7 +1031,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, { unsigned long ret, freed = 0; struct shrinker *shrinker; - int srcu_idx, generation; /* * The root memcg might be allocated even though memcg is disabled @@ -1061,11 +1042,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority); - srcu_idx = srcu_read_lock(&shrinker_srcu); + if (!down_read_trylock(&shrinker_rwsem)) + goto out; - generation = atomic_read(&shrinker_srcu_generation); - list_for_each_entry_srcu(shrinker, &shrinker_list, list, - srcu_read_lock_held(&shrinker_srcu)) { + list_for_each_entry(shrinker, &shrinker_list, list) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, @@ -1076,14 +1056,19 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, if (ret == SHRINK_EMPTY) ret = 0; freed += ret; - - if (atomic_read(&shrinker_srcu_generation) != generation) { + /* + * Bail out if someone want to register a new shrinker to + * prevent the registration from being stalled for long periods + * by parallel ongoing shrinking. + */ + if (rwsem_is_contended(&shrinker_rwsem)) { freed = freed ? : 1; break; } } - srcu_read_unlock(&shrinker_srcu, srcu_idx); + up_read(&shrinker_rwsem); +out: cond_resched(); return freed; } diff --git a/mm/zswap.c b/mm/zswap.c index 59da2a415fbb..30092d9a3b23 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1174,9 +1174,16 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, goto reject; } + /* + * XXX: zswap reclaim does not work with cgroups yet. Without a + * cgroup-aware entry LRU, we will push out entries system-wide based on + * local cgroup limits. + */ objcg = get_obj_cgroup_from_page(page); - if (objcg && !obj_cgroup_may_zswap(objcg)) - goto shrink; + if (objcg && !obj_cgroup_may_zswap(objcg)) { + ret = -ENOMEM; + goto reject; + } /* reclaim space if needed */ if (zswap_is_full()) { diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a06b5641287a..b0ebf853cb07 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -191,6 +191,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); + icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index 8aeaadca844f..4dac965c99df 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -31,7 +31,6 @@ struct handshake_req { struct list_head hr_list; struct rhash_head hr_rhash; unsigned long hr_flags; - struct file *hr_file; const struct handshake_proto *hr_proto; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); diff --git a/net/handshake/request.c b/net/handshake/request.c index d78d41abb3d9..94d5cef3e048 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -239,7 +239,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, } req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; - req->hr_file = sock->file; ret = -EOPNOTSUPP; net = sock_net(req->hr_sk); @@ -335,9 +334,6 @@ bool handshake_req_cancel(struct sock *sk) return false; } - /* Request accepted and waiting for DONE */ - fput(req->hr_file); - out_true: trace_handshake_cancel(net, req, sk); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 56d94d23b9e0..143f93a12f25 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -22,6 +22,8 @@ static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c4835dbdfcff..f804c11e2146 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -114,7 +114,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || (addr_type & IPV6_ADDR_MAPPED) || - (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if && + l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; ipcm6_init_sk(&ipc6, np); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3bab0cc13697..8e010d07917a 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -8,6 +8,8 @@ * Changes: * Fixes: */ +#define pr_fmt(fmt) "UDPLite6: " fmt + #include <linux/export.h> #include <linux/proc_fs.h> #include "udp_impl.h" @@ -16,6 +18,8 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 86b2036d73ff..f2d08dbccfb7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4865,11 +4865,16 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int res; if (wdev->use_4addr) return -EOPNOTSUPP; - return ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_lock(&sdata->local->mtx); + res = ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); + + return res; } static void ieee80211_del_intf_link(struct wiphy *wiphy, @@ -4878,7 +4883,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + mutex_lock(&sdata->local->mtx); ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); } static int sta_add_link_station(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b0372e76f373..4159fb65038b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2312,7 +2312,7 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action, return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss); } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos); +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); extern const int ieee802_1d_to_ac[8]; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index e82db88a47f8..40f030b8ece9 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -409,6 +409,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED); WARN_ON_ONCE(ret); + ieee80211_mgd_set_link_qos_params(link); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE | @@ -423,7 +424,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_TWT | BSS_CHANGED_HE_OBSS_PD | BSS_CHANGED_HE_BSS_COLOR); - ieee80211_mgd_set_link_qos_params(link); } old_active = sdata->vif.active_links; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bd8d6f9545f5..5a4303130ef2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1372,10 +1372,11 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, ieee80211_add_non_inheritance_elem(skb, outer_present_elems, link_present_elems); - ieee80211_fragment_element(skb, subelem_len); + ieee80211_fragment_element(skb, subelem_len, + IEEE80211_MLE_SUBELEM_FRAGMENT); } - ieee80211_fragment_element(skb, ml_elem_len); + ieee80211_fragment_element(skb, ml_elem_len, WLAN_EID_FRAGMENT); } static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7f1c7f67014b..13b522dab0a3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4445,7 +4445,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned long links = sdata->vif.valid_links; + unsigned long links = sdata->vif.active_links; unsigned int link; u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX; @@ -6040,7 +6040,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf))) - link = ffs(sdata->vif.valid_links) - 1; + link = ffs(sdata->vif.active_links) - 1; } IEEE80211_SKB_CB(skb)->control.flags |= @@ -6076,7 +6076,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, band = chanctx_conf->def.chan->band; } else { WARN_ON(link_id >= 0 && - !(sdata->vif.valid_links & BIT(link_id))); + !(sdata->vif.active_links & BIT(link_id))); /* MLD transmissions must not rely on the band */ band = 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4bf76150925d..3bd07a0a782f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5049,7 +5049,7 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) { unsigned int elem_len; @@ -5069,7 +5069,7 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); /* place the fragment ID */ len_pos += 255 + 1; - *len_pos = WLAN_EID_FRAGMENT; + *len_pos = frag_id; /* and point to fragment length to update later */ len_pos++; } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 04bd0ed4d2ae..b0ef48b21dcb 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -317,12 +317,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow) + struct flow_offload *flow, bool force) { u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (timeout - READ_ONCE(flow->timeout) > HZ) + if (force || timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); else return; @@ -334,6 +334,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_refresh); +static bool nf_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -423,7 +429,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct)) + nf_ct_is_dying(flow->ct) || + nf_flow_is_outdated(flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 19efba1e51ef..3bbaf9c7ea46 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); thoff -= offset; @@ -650,7 +650,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0519d45ede6b..69bceefaa5c8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3844,7 +3844,8 @@ err_destroy_flow_rule: if (flow) nft_flow_rule_destroy(flow); err_release_rule: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { if (expr_info[i].ops) { @@ -4919,6 +4920,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) @@ -9275,10 +9277,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -9453,6 +9470,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -9467,6 +9489,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; } + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -9529,6 +9556,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); @@ -9588,10 +9617,25 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && @@ -9701,6 +9745,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: @@ -9711,6 +9761,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) if (!nft_setelem_is_catchall(te->set, &te->elem)) te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -9753,6 +9808,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ae7146475d17..c9fbe0f707b5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -533,7 +533,8 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 06d46d182634..15e451dc3fc4 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1600,17 +1600,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1625,7 +1618,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1635,7 +1640,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1660,6 +1665,26 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference(priv->match); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1667,8 +1692,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1684,8 +1708,6 @@ static void nft_pipapo_activate(const struct net *net, nft_set_elem_change_active(net, set, &e->ext); nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1931,7 +1953,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -2230,6 +2251,8 @@ const struct nft_set_type nft_set_pipapo_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; @@ -2252,6 +2275,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 54c083003947..27511c90a26f 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, offset -= iter->startbit; idx = offset / NETLBL_CATMAP_MAPSIZE; - iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE); + iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap + << (offset % NETLBL_CATMAP_MAPSIZE); return 0; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9cc0bc7c71ed..abc71a06d634 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; struct tcphdr *tcph = NULL; + bool force_refresh = false; struct flow_offload *flow; struct nf_conn *ct; u8 dir; @@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, * established state, then don't refresh. */ return false; + force_refresh = true; } if (tcph && (unlikely(tcph->fin || tcph->rst))) { @@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; - flow_offload_refresh(nf_ft, flow); + flow_offload_refresh(nf_ft, flow, force_refresh); + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Process this flow in SW to allow promoting to ASSURED */ + return false; + } + nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); if (nf_ft->flags & NF_FLOWTABLE_COUNTER) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fc945c7e4123..c819b812a899 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -13,7 +13,10 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/slab.h> +#include <net/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> @@ -327,28 +330,58 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } -static void pedit_skb_hdr_offset(struct sk_buff *skb, +static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) +{ + const int noff = skb_network_offset(skb); + int ret = -EINVAL; + struct iphdr _iph; + + switch (skb->protocol) { + case htons(ETH_P_IP): { + const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); + + if (!iph) + goto out; + *hoffset = noff + iph->ihl * 4; + ret = 0; + break; + } + case htons(ETH_P_IPV6): + ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL; + break; + } +out: + return ret; +} + +static int pedit_skb_hdr_offset(struct sk_buff *skb, enum pedit_header_type htype, int *hoffset) { + int ret = -EINVAL; /* 'htype' is validated in the netlink parsing */ switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb)) { *hoffset = skb_mac_offset(skb); + ret = 0; + } break; case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: *hoffset = skb_network_offset(skb); + ret = 0; break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP); + break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - if (skb_transport_header_was_set(skb)) - *hoffset = skb_transport_offset(skb); + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP); break; default: break; } + return ret; } TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, @@ -384,6 +417,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, int hoffset = 0; u32 *ptr, hdata; u32 val; + int rc; if (tkey_ex) { htype = tkey_ex->htype; @@ -392,7 +426,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex++; } - pedit_skb_hdr_offset(skb, htype, &hoffset); + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype); + goto bad; + } if (tkey->offmask) { u8 *d, _d; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c877a6343fd4..a193cc7b3241 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -657,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; + unsigned int refcnt, non_act_refcnt; bool free_block = false; - unsigned int refcnt; void *tmplt_priv; mutex_lock(&block->lock); @@ -678,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, * save these to temporary variables. */ refcnt = --chain->refcnt; + non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; - /* The last dropped non-action reference will trigger notification. */ - if (refcnt - chain->action_refcnt == 0 && !by_act) { - tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, - block, NULL, 0, 0, false); + if (non_act_refcnt == chain->explicitly_created && !by_act) { + if (non_act_refcnt == 0) + tc_chain_notify_delete(tmplt_ops, tmplt_priv, + chain->index, block, NULL, 0, 0, + false); /* Last reference to chain, no need to lock. */ chain->flushing = false; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4e2e269f121f..d15d50de7980 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { - int err; + int err, ifindex = -1; err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, fl_flags, extack); if (err < 0) return err; + if (tb[TCA_U32_INDEV]) { + ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); + if (ifindex < 0) + return -EINVAL; + } + if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; @@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &n->res, base); } - if (tb[TCA_U32_INDEV]) { - int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); - if (ret < 0) - return -EINVAL; - n->ifindex = ret; - } + if (ifindex >= 0) + n->ifindex = ifindex; + return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e4b6452318c0..aa6b1fe65151 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1079,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (parent == NULL) { unsigned int i, num_q, ingress; + struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { - num_q = 1; ingress = 1; - if (!dev_ingress_queue(dev)) { + dev_queue = dev_ingress_queue(dev); + if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } + + q = rtnl_dereference(dev_queue->qdisc_sleeping); + + /* This is the counterpart of that qdisc_refcount_inc_nz() call in + * __tcf_qdisc_find() for filter requests. + */ + if (!qdisc_refcount_dec_if_one(q)) { + NL_SET_ERR_MSG(extack, + "Current ingress or clsact Qdisc has ongoing filter requests"); + return -EBUSY; + } } if (dev->flags & IFF_UP) @@ -1100,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && new->ops->attach && !ingress) goto skip; - for (i = 0; i < num_q; i++) { - struct netdev_queue *dev_queue = dev_ingress_queue(dev); - - if (!ingress) + if (!ingress) { + for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); + old = dev_graft_qdisc(dev_queue, new); - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - qdisc_refcount_inc(new); - - if (!ingress) + if (new && i > 0) + qdisc_refcount_inc(new); qdisc_put(old); + } + } else { + old = dev_graft_qdisc(dev_queue, NULL); + + /* {ingress,clsact}_destroy() @old before grafting @new to avoid + * unprotected concurrent accesses to net_device::miniq_{in,e}gress + * pointer(s) in mini_qdisc_pair_swap(). + */ + qdisc_notify(net, skb, n, classid, old, new, extack); + qdisc_destroy(old); + + dev_graft_qdisc(dev_queue, new); } skip: @@ -1125,8 +1145,6 @@ skip: if (new && new->ops->attach) new->ops->attach(new); - } else { - notify_and_destroy(net, skb, n, classid, old, new, extack); } if (dev->flags & IFF_UP) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3248259eba32..5d7e23f4cc0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head) qdisc_free(q); } -static void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc) call_rcu(&qdisc->rcu, qdisc_free_cb); } +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + + __qdisc_destroy(qdisc); +} + void qdisc_put(struct Qdisc *qdisc) { if (!qdisc) @@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc) !refcount_dec_and_test(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); } EXPORT_SYMBOL(qdisc_put); @@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc) !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); rtnl_unlock(); } EXPORT_SYMBOL(qdisc_put_unlocked); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index dd7dea2f6e83..cf0e61ed9225 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -797,6 +797,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch, taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]); + if (q->cur_txq[tc] >= dev->num_tx_queues) + q->cur_txq[tc] = first_txq; + if (skb) return skb; } while (q->cur_txq[tc] != first_txq); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 7fbeb99d8d32..23d6633966b1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1250,7 +1250,10 @@ static int sctp_side_effects(enum sctp_event_type event_type, default: pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); - BUG(); + error = status; + if (error >= 0) + error = -EINVAL; + WARN_ON_ONCE(1); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 97f1155a2045..08fdf1251f46 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4482,7 +4482,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) - return -ENOMEM; + return SCTP_DISPOSITION_NOMEM; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 53881406e200..cdcd2731860b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1258,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; @@ -1307,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) int err; char *name; struct tipc_media *m; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2e497cf26ef2..69b508743e57 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2022 Intel Corporation + * Copyright (C) 2018, 2021-2023 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -1441,8 +1441,8 @@ rdev_del_intf_link(struct cfg80211_registered_device *rdev, unsigned int link_id) { trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id); - if (rdev->ops->add_intf_link) - rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id); + if (rdev->ops->del_intf_link) + rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id); trace_rdev_return_void(&rdev->wiphy); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 949e1fb3bec6..26f11e4746c0 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2404,11 +2404,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: - wiphy_lock(wiphy); ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); - wiphy_unlock(wiphy); - if (!ret) return ret; break; diff --git a/net/wireless/util.c b/net/wireless/util.c index 3bc0c3072e78..9755ef281040 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -2558,6 +2558,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev) { unsigned int link_id; + /* + * links are controlled by upper layers (userspace/cfg) + * only for AP mode, so only remove them here for AP + */ + if (wdev->iftype != NL80211_IFTYPE_AP) + return; + wdev_lock(wdev); if (wdev->valid_links) { for_each_valid_link(wdev, link_id) diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index 471300ba176c..50a92c4e9984 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -48,12 +48,12 @@ if IS_BUILTIN(CONFIG_COMMON_CLK): LX_GDBPARSED(CLK_GET_RATE_NOCACHE) /* linux/fs.h */ -LX_VALUE(SB_RDONLY) -LX_VALUE(SB_SYNCHRONOUS) -LX_VALUE(SB_MANDLOCK) -LX_VALUE(SB_DIRSYNC) -LX_VALUE(SB_NOATIME) -LX_VALUE(SB_NODIRATIME) +LX_GDBPARSED(SB_RDONLY) +LX_GDBPARSED(SB_SYNCHRONOUS) +LX_GDBPARSED(SB_MANDLOCK) +LX_GDBPARSED(SB_DIRSYNC) +LX_GDBPARSED(SB_NOATIME) +LX_GDBPARSED(SB_NODIRATIME) /* linux/htimer.h */ LX_GDBPARSED(hrtimer_resolution) diff --git a/scripts/gfp-translate b/scripts/gfp-translate index b2ce416d944b..6c9aed17cf56 100755 --- a/scripts/gfp-translate +++ b/scripts/gfp-translate @@ -63,11 +63,11 @@ fi # Extract GFP flags from the kernel source TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1 -grep -q ___GFP $SOURCE/include/linux/gfp.h +grep -q ___GFP $SOURCE/include/linux/gfp_types.h if [ $? -eq 0 ]; then - grep "^#define ___GFP" $SOURCE/include/linux/gfp.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE + grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE else - grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE + grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE fi # Parse the flags diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 07efb38f58ac..f2940b29595f 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -37,6 +37,7 @@ struct seq_oss_midi { struct snd_midi_event *coder; /* MIDI event coder */ struct seq_oss_devinfo *devinfo; /* assigned OSSseq device */ snd_use_lock_t use_lock; + struct mutex open_mutex; }; @@ -172,6 +173,7 @@ snd_seq_oss_midi_check_new_port(struct snd_seq_port_info *pinfo) mdev->flags = pinfo->capability; mdev->opened = 0; snd_use_lock_init(&mdev->use_lock); + mutex_init(&mdev->open_mutex); /* copy and truncate the name of synth device */ strscpy(mdev->name, pinfo->name, sizeof(mdev->name)); @@ -322,15 +324,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) int perm; struct seq_oss_midi *mdev; struct snd_seq_port_subscribe subs; + int err; mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; + mutex_lock(&mdev->open_mutex); /* already used? */ if (mdev->opened && mdev->devinfo != dp) { - snd_use_lock_free(&mdev->use_lock); - return -EBUSY; + err = -EBUSY; + goto unlock; } perm = 0; @@ -340,14 +344,14 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) perm |= PERM_READ; perm &= mdev->flags; if (perm == 0) { - snd_use_lock_free(&mdev->use_lock); - return -ENXIO; + err = -ENXIO; + goto unlock; } /* already opened? */ if ((mdev->opened & perm) == perm) { - snd_use_lock_free(&mdev->use_lock); - return 0; + err = 0; + goto unlock; } perm &= ~mdev->opened; @@ -372,13 +376,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) } if (! mdev->opened) { - snd_use_lock_free(&mdev->use_lock); - return -ENXIO; + err = -ENXIO; + goto unlock; } mdev->devinfo = dp; + err = 0; + + unlock: + mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); - return 0; + return err; } /* @@ -393,10 +401,9 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev) mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; - if (! mdev->opened || mdev->devinfo != dp) { - snd_use_lock_free(&mdev->use_lock); - return 0; - } + mutex_lock(&mdev->open_mutex); + if (!mdev->opened || mdev->devinfo != dp) + goto unlock; memset(&subs, 0, sizeof(subs)); if (mdev->opened & PERM_WRITE) { @@ -415,6 +422,8 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev) mdev->opened = 0; mdev->devinfo = NULL; + unlock: + mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); return 0; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a5d55a7063d3..308ec7034cc9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11740,6 +11740,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON), SND_PCI_QUIRK(0x1b35, 0x1234, "CZC ET26", ALC662_FIXUP_CZC_ET26), SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T), + SND_PCI_QUIRK(0x1c6c, 0x1239, "Compaq N14JP6-V2", ALC897_FIXUP_HP_HSMIC_VERB), #if 0 /* Below is a quirk table taken from the old code. diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 468c8e77de21..0b69cebc9a33 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -117,6 +117,9 @@ int tegra_pcm_open(struct snd_soc_component *component, return ret; } + /* Set wait time to 500ms by default */ + substream->wait_time = 500; + return 0; } EXPORT_SYMBOL_GPL(tegra_pcm_open); diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index eec5232f9fb2..08bf535ed163 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -650,6 +650,10 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) goto unlock; } + ret = snd_usb_pcm_change_state(subs, UAC3_PD_STATE_D0); + if (ret < 0) + goto unlock; + again: if (subs->sync_endpoint) { ret = snd_usb_endpoint_prepare(chip, subs->sync_endpoint); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 3ecd1ba7fd4b..6cf55b7f7a04 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2191,6 +2191,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x2ab6, /* T+A devices */ QUIRK_FLAG_DSD_RAW), + VENDOR_FLG(0x3336, /* HEM devices */ + QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3353, /* Khadas devices */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3842, /* EVGA */ diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index caf32a9b9608..7527f738b4a1 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \ - -fsanitize=undefined +CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ + -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray maple @@ -49,6 +49,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h generated/bit-length.h \ ../../../include/linux/xarray.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ + ../../../lib/radix-tree.h \ ../../../include/linux/idr.h radix-tree.c: ../../../lib/radix-tree.c diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 23af4633f0f4..4f0c50c33ba7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -5,12 +5,15 @@ LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h include local_config.mk +ifeq ($(ARCH),) + ifeq ($(CROSS_COMPILE),) uname_M := $(shell uname -m 2>/dev/null || echo not) else uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+') endif -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') +ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') +endif # Without this, failed build products remain, with up-to-date timestamps, # thus tricking Make (and you!) into believing that All Is Well, in subsequent @@ -65,7 +68,7 @@ TEST_GEN_PROGS += ksm_tests TEST_GEN_PROGS += ksm_functional_tests TEST_GEN_PROGS += mdwe_test -ifeq ($(MACHINE),x86_64) +ifeq ($(ARCH),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) @@ -87,13 +90,13 @@ TEST_GEN_PROGS += $(BINARIES_64) endif else -ifneq (,$(findstring $(MACHINE),ppc64)) +ifneq (,$(findstring $(ARCH),ppc64)) TEST_GEN_PROGS += protection_keys endif endif -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) TEST_GEN_PROGS += va_high_addr_switch TEST_GEN_PROGS += virtual_address_range TEST_GEN_PROGS += write_to_hugetlbfs @@ -112,7 +115,7 @@ $(TEST_GEN_PROGS): vm_util.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c -ifeq ($(MACHINE),x86_64) +ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh index 432fe8469851..48584a51388f 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -84,8 +84,9 @@ h2_destroy() router_rp1_200_create() { - ip link add name $rp1.200 up \ - link $rp1 addrgenmode eui64 type vlan id 200 + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up ip address add dev $rp1.200 192.0.2.2/28 ip address add dev $rp1.200 2001:db8:1::2/64 ip stats set dev $rp1.200 l3_stats on @@ -256,9 +257,11 @@ reapply_config() router_rp1_200_destroy - ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200 + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode none ip stats set dev $rp1.200 l3_stats on - ip link set dev $rp1.200 up addrgenmode eui64 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up ip address add dev $rp1.200 192.0.2.2/28 ip address add dev $rp1.200 2001:db8:1::2/64 } diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 38021a0dd527..6032f9b23c4c 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,3 +1,4 @@ +CONFIG_KALLSYMS=y CONFIG_MPTCP=y CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4eacdb1ab962..fa9e09ad97d9 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -55,16 +55,20 @@ __chk_nr() { local command="$1" local expected=$2 - local msg nr + local msg="$3" + local skip="${4:-SKIP}" + local nr - shift 2 - msg=$* nr=$(eval $command) printf "%-50s" "$msg" if [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then + echo "[ skip ] Feature probably not supported" + else + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + fi else echo "[ ok ]" fi @@ -76,12 +80,12 @@ __chk_msk_nr() local condition=$1 shift 1 - __chk_nr "ss -inmHMN $ns | $condition" $* + __chk_nr "ss -inmHMN $ns | $condition" "$@" } chk_msk_nr() { - __chk_msk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" "$@" } wait_msk_nr() @@ -119,37 +123,26 @@ wait_msk_nr() chk_msk_fallback_nr() { - __chk_msk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" "$@" } chk_msk_remote_key_nr() { - __chk_msk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" "$@" } __chk_listen() { local filter="$1" local expected=$2 + local msg="$3" - shift 2 - msg=$* - - nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN) - printf "%-50s" "$msg" - - if [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" - ret=$test_cnt - else - echo "[ ok ]" - fi + __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0 } chk_msk_listen() { lport=$1 - local msg="check for listen socket" # destination port search should always return empty list __chk_listen "dport $lport" 0 "listen match for dport $lport" @@ -167,10 +160,9 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 + local msg="$2" local listen_nr - shift 1 - listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) @@ -181,7 +173,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected $* + __chk_nr get_msk_inuse $expected "$msg" 0 } # $1: ns, $2: port diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index c1f7bac19942..773dd770a567 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -144,6 +144,7 @@ cleanup() } mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -695,6 +696,15 @@ run_test_transparent() return 0 fi + # IP(V6)_TRANSPARENT has been added after TOS support which came with + # the required infrastructure in MPTCP sockopt code. To support TOS, the + # following function has been exported (T). Not great but better than + # checking for a specific kernel version. + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then + echo "INFO: ${msg} not supported by the kernel: SKIP" + return + fi + ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" flush ruleset table inet mangle { @@ -767,6 +777,11 @@ run_tests_peekmode() run_tests_mptfo() { + if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then + echo "INFO: TFO not supported by the kernel: SKIP" + return + fi + echo "INFO: with MPTFO start" ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 @@ -787,6 +802,11 @@ run_tests_disconnect() local old_cin=$cin local old_sin=$sin + if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then + echo "INFO: Full disconnect not supported: SKIP" + return + fi + cat $cin $cin $cin > "$cin".disconnect # force do_transfer to cope with the multiple tranmissions diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 29f0c99d9a46..0ae8cafde439 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -25,6 +25,8 @@ capout="" ns1="" ns2="" ksft_skip=4 +iptables="iptables" +ip6tables="ip6tables" timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) capture=0 @@ -82,7 +84,7 @@ init_partial() ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.mptcp.pm_type=0 + ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 if [ $checksum -eq 1 ]; then @@ -140,13 +142,18 @@ cleanup_partial() check_tools() { mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms if ! ip -Version &> /dev/null; then echo "SKIP: Could not run test without ip tool" exit $ksft_skip fi - if ! iptables -V &> /dev/null; then + # Use the legacy version if available to support old kernel versions + if iptables-legacy -V &> /dev/null; then + iptables="iptables-legacy" + ip6tables="ip6tables-legacy" + elif ! iptables -V &> /dev/null; then echo "SKIP: Could not run all tests without iptables tool" exit $ksft_skip fi @@ -185,6 +192,32 @@ cleanup() cleanup_partial } +# $1: msg +print_title() +{ + printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${1}" +} + +# [ $1: fail msg ] +mark_as_skipped() +{ + local msg="${1:-"Feature not supported"}" + + mptcp_lib_fail_if_expected_feature "${msg}" + + print_title "[ skip ] ${msg}" + printf "\n" +} + +# $@: condition +continue_if() +{ + if ! "${@}"; then + mark_as_skipped + return 1 + fi +} + skip_test() { if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then @@ -228,6 +261,19 @@ reset() return 0 } +# $1: test name ; $2: counter to check +reset_check_counter() +{ + reset "${1}" || return 1 + + local counter="${2}" + + if ! nstat -asz "${counter}" | grep -wq "${counter}"; then + mark_as_skipped "counter '${counter}' is not available" + return 1 + fi +} + # $1: test name reset_with_cookies() { @@ -247,17 +293,21 @@ reset_with_add_addr_timeout() reset "${1}" || return 1 - tables="iptables" + tables="${iptables}" if [ $ip -eq 6 ]; then - tables="ip6tables" + tables="${ip6tables}" fi ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 - ip netns exec $ns2 $tables -A OUTPUT -p tcp \ - -m tcp --tcp-option 30 \ - -m bpf --bytecode \ - "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ - -j DROP + + if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP; then + mark_as_skipped "unable to set the 'add addr' rule" + return 1 + fi } # $1: test name @@ -301,22 +351,17 @@ reset_with_allow_join_id0() # tc action pedit offset 162 out of bounds # # Netfilter is used to mark packets with enough data. -reset_with_fail() +setup_fail_rules() { - reset "${1}" || return 1 - - ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 - ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 - check_invert=1 validate_checksum=1 - local i="$2" - local ip="${3:-4}" + local i="$1" + local ip="${2:-4}" local tables - tables="iptables" + tables="${iptables}" if [ $ip -eq 6 ]; then - tables="ip6tables" + tables="${ip6tables}" fi ip netns exec $ns2 $tables \ @@ -326,15 +371,32 @@ reset_with_fail() -p tcp \ -m length --length 150:9999 \ -m statistic --mode nth --packet 1 --every 99999 \ - -j MARK --set-mark 42 || exit 1 + -j MARK --set-mark 42 || return ${ksft_skip} - tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1 + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip} tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ - index 100 || exit 1 + index 100 || return ${ksft_skip} +} + +reset_with_fail() +{ + reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1 + shift + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 + + local rc=0 + setup_fail_rules "${@}" || rc=$? + + if [ ${rc} -eq ${ksft_skip} ]; then + mark_as_skipped "unable to set the 'fail' rules" + return 1 + fi } reset_with_events() @@ -349,6 +411,25 @@ reset_with_events() evts_ns2_pid=$! } +reset_with_tcp_filter() +{ + reset "${1}" || return 1 + shift + + local ns="${!1}" + local src="${2}" + local target="${3}" + + if ! ip netns exec "${ns}" ${iptables} \ + -A INPUT \ + -s "${src}" \ + -p tcp \ + -j "${target}"; then + mark_as_skipped "unable to set the filter rules" + return 1 + fi +} + fail_test() { ret=1 @@ -467,11 +548,25 @@ wait_local_port_listen() done } -rm_addr_count() +# $1: ns ; $2: counter +get_counter() { - local ns=${1} + local ns="${1}" + local counter="${2}" + local count - ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}' + count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + +rm_addr_count() +{ + get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -494,11 +589,11 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}') + old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}') + cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done @@ -698,15 +793,6 @@ pm_nl_check_endpoint() fi } -filter_tcp_from() -{ - local ns="${1}" - local src="${2}" - local target="${3}" - - ip netns exec "${ns}" iptables -A INPUT -s "${src}" -p tcp -j "${target}" -} - do_transfer() { local listener_ns="$1" @@ -1157,12 +1243,13 @@ chk_csum_nr() fi printf "%-${nr_blank}s %s" " " "sum" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi - if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns1" fail_test @@ -1171,12 +1258,13 @@ chk_csum_nr() echo -n "[ ok ]" fi echo -n " - csum " - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi - if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns2" fail_test @@ -1218,12 +1306,13 @@ chk_fail_nr() fi printf "%-${nr_blank}s %s" " " "ftx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi - if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx" fail_test @@ -1233,12 +1322,13 @@ chk_fail_nr() fi echo -n " - failrx" - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi - if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx" fail_test @@ -1270,10 +1360,11 @@ chk_fclose_nr() fi printf "%-${nr_blank}s %s" " " "ctx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}') - [ -z "$count" ] && count=0 - [ "$count" != "$fclose_tx" ] && extra_msg="$extra_msg,tx=$count" - if [ "$count" != "$fclose_tx" ]; then + count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$fclose_tx" ]; then + extra_msg="$extra_msg,tx=$count" echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx" fail_test dump_stats=1 @@ -1282,10 +1373,11 @@ chk_fclose_nr() fi echo -n " - fclzrx" - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}') - [ -z "$count" ] && count=0 - [ "$count" != "$fclose_rx" ] && extra_msg="$extra_msg,rx=$count" - if [ "$count" != "$fclose_rx" ]; then + count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$fclose_rx" ]; then + extra_msg="$extra_msg,rx=$count" echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx" fail_test dump_stats=1 @@ -1316,9 +1408,10 @@ chk_rst_nr() fi printf "%-${nr_blank}s %s" " " "rtx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count -lt $rst_tx ]; then + count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ $count -lt $rst_tx ]; then echo "[fail] got $count MP_RST[s] TX expected $rst_tx" fail_test dump_stats=1 @@ -1327,9 +1420,10 @@ chk_rst_nr() fi echo -n " - rstrx " - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" -lt "$rst_rx" ]; then + count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" -lt "$rst_rx" ]; then echo "[fail] got $count MP_RST[s] RX expected $rst_rx" fail_test dump_stats=1 @@ -1350,9 +1444,10 @@ chk_infi_nr() local dump_stats printf "%-${nr_blank}s %s" " " "itx" - count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$infi_tx" ]; then + count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$infi_tx" ]; then echo "[fail] got $count infinite map[s] TX expected $infi_tx" fail_test dump_stats=1 @@ -1361,9 +1456,10 @@ chk_infi_nr() fi echo -n " - infirx" - count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$infi_rx" ]; then + count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$infi_rx" ]; then echo "[fail] got $count infinite map[s] RX expected $infi_rx" fail_test dump_stats=1 @@ -1395,9 +1491,10 @@ chk_join_nr() fi printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_nr" ]; then echo "[fail] got $count JOIN[s] syn expected $syn_nr" fail_test dump_stats=1 @@ -1407,9 +1504,10 @@ chk_join_nr() echo -n " - synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation @@ -1425,9 +1523,10 @@ chk_join_nr() fi echo -n " - ack" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$ack_nr" ]; then echo "[fail] got $count JOIN[s] ack expected $ack_nr" fail_test dump_stats=1 @@ -1459,12 +1558,12 @@ chk_stale_nr() local recover_nr printf "%-${nr_blank}s %-18s" " " "stale" - stale_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}') - [ -z "$stale_nr" ] && stale_nr=0 - recover_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}') - [ -z "$recover_nr" ] && recover_nr=0 - if [ $stale_nr -lt $stale_min ] || + stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then + echo "[skip]" + elif [ $stale_nr -lt $stale_min ] || { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } || [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \ @@ -1500,12 +1599,12 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) printf "%-${nr_blank}s %s" " " "add" - count=$(ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}') - [ -z "$count" ] && count=0 - + count=$(get_counter ${ns2} "MPTcpExtAddAddr") + if [ -z "$count" ]; then + echo -n "[skip]" # if the test configured a short timeout tolerate greater then expected # add addrs options, due to retransmissions - if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" fail_test dump_stats=1 @@ -1514,9 +1613,10 @@ chk_add_nr() fi echo -n " - echo " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$echo_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$echo_nr" ]; then echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" fail_test dump_stats=1 @@ -1526,9 +1626,10 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then echo -n " - pt " - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$port_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtPortAdd") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$port_nr" ]; then echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" fail_test dump_stats=1 @@ -1537,10 +1638,10 @@ chk_add_nr() fi printf "%-${nr_blank}s %s" " " "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_nr" ]; then echo "[fail] got $count JOIN[s] syn with a different \ port-number expected $syn_nr" fail_test @@ -1550,10 +1651,10 @@ chk_add_nr() fi echo -n " - synack" - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_ack_nr" ]; then echo "[fail] got $count JOIN[s] synack with a different \ port-number expected $syn_ack_nr" fail_test @@ -1563,10 +1664,10 @@ chk_add_nr() fi echo -n " - ack" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$ack_nr" ]; then echo "[fail] got $count JOIN[s] ack with a different \ port-number expected $ack_nr" fail_test @@ -1576,10 +1677,10 @@ chk_add_nr() fi printf "%-${nr_blank}s %s" " " "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mis_syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$mis_syn_nr" ]; then echo "[fail] got $count JOIN[s] syn with a mismatched \ port-number expected $mis_syn_nr" fail_test @@ -1589,10 +1690,10 @@ chk_add_nr() fi echo -n " - ack " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mis_ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$mis_ack_nr" ]; then echo "[fail] got $count JOIN[s] ack with a mismatched \ port-number expected $mis_ack_nr" fail_test @@ -1636,9 +1737,10 @@ chk_rm_nr() fi printf "%-${nr_blank}s %s" " " "rm " - count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_addr_nr" ]; then + count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" fail_test dump_stats=1 @@ -1647,29 +1749,27 @@ chk_rm_nr() fi echo -n " - rmsf " - count=$(ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ -n "$simult" ]; then + count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ -n "$simult" ]; then local cnt suffix - cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}') + cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable - [ -z "$cnt" ] && cnt=0 count=$((count + cnt)) [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then - echo "[ ok ] $suffix" + echo -n "[ ok ] $suffix" else echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" fail_test dump_stats=1 fi - return - fi - if [ "$count" != "$rm_subflow_nr" ]; then + elif [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" fail_test dump_stats=1 @@ -1690,9 +1790,10 @@ chk_prio_nr() local dump_stats printf "%-${nr_blank}s %s" " " "ptx" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mp_prio_nr_tx" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$mp_prio_nr_tx" ]; then echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" fail_test dump_stats=1 @@ -1701,9 +1802,10 @@ chk_prio_nr() fi echo -n " - prx " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mp_prio_nr_rx" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$mp_prio_nr_rx" ]; then echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" fail_test dump_stats=1 @@ -1819,7 +1921,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}') + cnt=$(get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) @@ -1912,23 +2014,23 @@ subflows_error_tests() fi # multiple subflows, with subflow creation error - if reset "multi subflows, with failing subflow"; then + if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ - if reset "multi subflows, with subflow timeout"; then + if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 DROP run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 1 1 1 fi @@ -1936,11 +2038,11 @@ subflows_error_tests() # multiple subflows, check that the endpoint corresponding to # closed subflow (due to reset) is not reused if additional # subflows are added later - if reset "multi subflows, fair usage on close"; then + if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & # mpj subflow will be in TW after the reset @@ -2040,11 +2142,18 @@ signal_address_tests() # the peer could possibly miss some addr notification, allow retransmission ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow - chk_join_nr 3 3 3 - # the server will not signal the address terminating - # the MPC subflow - chk_add_nr 3 3 + # It is not directly linked to the commit introducing this + # symbol but for the parent one which is linked anyway. + if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + chk_join_nr 3 3 2 + chk_add_nr 4 4 + else + chk_join_nr 3 3 3 + # the server will not signal the address terminating + # the MPC subflow + chk_add_nr 3 3 + fi fi } @@ -2285,7 +2394,12 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr 3 3 3 - chk_rm_nr 0 3 simult + + if mptcp_lib_kversion_ge 5.18; then + chk_rm_nr 0 3 simult + else + chk_rm_nr 3 3 + fi fi # addresses flush @@ -2523,7 +2637,8 @@ v4mapped_tests() mixed_tests() { - if reset "IPv4 sockets do not use IPv6 addresses"; then + if reset "IPv4 sockets do not use IPv6 addresses" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal @@ -2532,7 +2647,8 @@ mixed_tests() fi # Need an IPv6 mptcp socket to allow subflows of both families - if reset "simult IPv4 and IPv6 subflows"; then + if reset "simult IPv4 and IPv6 subflows" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.1.1 flags signal @@ -2541,7 +2657,8 @@ mixed_tests() fi # cross families subflows will not be created even in fullmesh mode - if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh @@ -2552,7 +2669,8 @@ mixed_tests() # fullmesh still tries to create all the possibly subflows with # matching family - if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2565,7 +2683,8 @@ mixed_tests() backup_tests() { # single subflow, backup - if reset "single subflow, backup"; then + if reset "single subflow, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup @@ -2575,7 +2694,8 @@ backup_tests() fi # single address, backup - if reset "single address, backup"; then + if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 @@ -2586,7 +2706,8 @@ backup_tests() fi # single address with port, backup - if reset "single address with port, backup"; then + if reset "single address with port, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 @@ -2596,14 +2717,16 @@ backup_tests() chk_prio_nr 1 1 fi - if reset "mpc backup"; then + if reset "mpc backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 0 0 0 chk_prio_nr 0 1 fi - if reset "mpc backup both sides"; then + if reset "mpc backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow @@ -2611,14 +2734,16 @@ backup_tests() chk_prio_nr 1 1 fi - if reset "mpc switch to backup"; then + if reset "mpc switch to backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup chk_join_nr 0 0 0 chk_prio_nr 0 1 fi - if reset "mpc switch to backup both sides"; then + if reset "mpc switch to backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup @@ -2644,38 +2769,41 @@ verify_listener_events() local family local saddr local sport + local name if [ $e_type = $LISTENER_CREATED ]; then - stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\ - $e_saddr $e_sport + name="LISTENER_CREATED" elif [ $e_type = $LISTENER_CLOSED ]; then - stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\ - $e_saddr $e_sport + name="LISTENER_CLOSED" + else + name="$e_type" fi - type=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + printf "%-${nr_blank}s %s %s:%s " " " "$name" "$e_saddr" "$e_sport" + + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + printf "[skip]: event not supported\n" + return + fi + + type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') else - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') fi if [ $type ] && [ $type = $e_type ] && [ $family ] && [ $family = $e_family ] && [ $saddr ] && [ $saddr = $e_saddr ] && [ $sport ] && [ $sport = $e_sport ]; then - stdbuf -o0 -e0 printf "[ ok ]\n" + echo "[ ok ]" return 0 fi fail_test - stdbuf -o0 -e0 printf "[fail]\n" + echo "[fail]" } add_addr_ports_tests() @@ -2981,7 +3109,8 @@ fullmesh_tests() fi # set fullmesh flag - if reset "set fullmesh flag test"; then + if reset "set fullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 @@ -2991,7 +3120,8 @@ fullmesh_tests() fi # set nofullmesh flag - if reset "set nofullmesh flag test"; then + if reset "set nofullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh pm_nl_set_limits $ns2 4 4 @@ -3001,7 +3131,8 @@ fullmesh_tests() fi # set backup,fullmesh flags - if reset "set backup,fullmesh flags test"; then + if reset "set backup,fullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 @@ -3012,7 +3143,8 @@ fullmesh_tests() fi # set nobackup,nofullmesh flags - if reset "set nobackup,nofullmesh flags test"; then + if reset "set nobackup,nofullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_set_limits $ns2 4 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh @@ -3025,14 +3157,14 @@ fullmesh_tests() fastclose_tests() { - if reset "fastclose test"; then + if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client chk_join_nr 0 0 0 chk_fclose_nr 1 1 chk_rst_nr 1 1 invert fi - if reset "fastclose server test"; then + if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert @@ -3070,7 +3202,8 @@ fail_tests() userspace_tests() { # userspace pm type prevents add_addr - if reset "userspace pm type prevents add_addr"; then + if reset "userspace pm type prevents add_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 @@ -3081,7 +3214,8 @@ userspace_tests() fi # userspace pm type does not echo add_addr without daemon - if reset "userspace pm no echo w/o daemon"; then + if reset "userspace pm no echo w/o daemon" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 @@ -3092,7 +3226,8 @@ userspace_tests() fi # userspace pm type rejects join - if reset "userspace pm type rejects join"; then + if reset "userspace pm type rejects join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3102,7 +3237,8 @@ userspace_tests() fi # userspace pm type does not send join - if reset "userspace pm type does not send join"; then + if reset "userspace pm type does not send join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3112,7 +3248,8 @@ userspace_tests() fi # userspace pm type prevents mp_prio - if reset "userspace pm type prevents mp_prio"; then + if reset "userspace pm type prevents mp_prio" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3123,7 +3260,8 @@ userspace_tests() fi # userspace pm type prevents rm_addr - if reset "userspace pm type prevents rm_addr"; then + if reset "userspace pm type prevents rm_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 @@ -3135,7 +3273,8 @@ userspace_tests() fi # userspace pm add & remove address - if reset_with_events "userspace pm add & remove address"; then + if reset_with_events "userspace pm add & remove address" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow @@ -3146,7 +3285,8 @@ userspace_tests() fi # userspace pm create destroy subflow - if reset_with_events "userspace pm create destroy subflow"; then + if reset_with_events "userspace pm create destroy subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow @@ -3158,8 +3298,10 @@ userspace_tests() endpoint_tests() { + # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr - if reset "implicit EP"; then + if reset "implicit EP" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -3179,7 +3321,8 @@ endpoint_tests() kill_tests_wait fi - if reset "delete and re-add"; then + if reset "delete and re-add" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 3286536b79d5..f32045b23b89 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -38,3 +38,67 @@ mptcp_lib_check_mptcp() { exit ${KSFT_SKIP} fi } + +mptcp_lib_check_kallsyms() { + if ! mptcp_lib_has_file "/proc/kallsyms"; then + echo "SKIP: CONFIG_KALLSYMS is missing" + exit ${KSFT_SKIP} + fi +} + +# Internal: use mptcp_lib_kallsyms_has() instead +__mptcp_lib_kallsyms_has() { + local sym="${1}" + + mptcp_lib_check_kallsyms + + grep -q " ${sym}" /proc/kallsyms +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_has() { + local sym="${1}" + + if __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol not found" +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_doesnt_have() { + local sym="${1}" + + if ! __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol has been found" +} + +# !!!AVOID USING THIS!!! +# Features might not land in the expected version and features can be backported +# +# $1: kernel version, e.g. 6.3 +mptcp_lib_kversion_ge() { + local exp_maj="${1%.*}" + local exp_min="${1#*.}" + local v maj min + + # If the kernel has backported features, set this env var to 1: + if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then + return 0 + fi + + v=$(uname -r | cut -d'.' -f1,2) + maj=${v%.*} + min=${v#*.} + + if [ "${maj}" -gt "${exp_maj}" ] || + { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index ae61f39556ca..b35148edbf02 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -87,6 +87,10 @@ struct so_state { uint64_t tcpi_rcv_delta; }; +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + static void die_perror(const char *msg) { perror(msg); @@ -349,13 +353,14 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)"); assert(olen <= sizeof(ti)); - assert(ti.d.size_user == ti.d.size_kernel); - assert(ti.d.size_user == sizeof(struct tcp_info)); + assert(ti.d.size_kernel > 0); + assert(ti.d.size_user == + MIN(ti.d.size_kernel, sizeof(struct tcp_info))); assert(ti.d.num_subflows == 1); assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); olen -= sizeof(struct mptcp_subflow_data); - assert(olen == sizeof(struct tcp_info)); + assert(olen == ti.d.size_user); if (ti.ti[0].tcpi_bytes_sent == w && ti.ti[0].tcpi_bytes_received == r) @@ -401,13 +406,14 @@ static void do_getsockopt_subflow_addrs(int fd) die_perror("getsockopt MPTCP_SUBFLOW_ADDRS"); assert(olen <= sizeof(addrs)); - assert(addrs.d.size_user == addrs.d.size_kernel); - assert(addrs.d.size_user == sizeof(struct mptcp_subflow_addrs)); + assert(addrs.d.size_kernel > 0); + assert(addrs.d.size_user == + MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs))); assert(addrs.d.num_subflows == 1); assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); olen -= sizeof(struct mptcp_subflow_data); - assert(olen == sizeof(struct mptcp_subflow_addrs)); + assert(olen == addrs.d.size_user); llen = sizeof(local); ret = getsockname(fd, (struct sockaddr *)&local, &llen); diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index ff5adbb9c7f2..f295a371ff14 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -87,6 +87,7 @@ cleanup() } mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -186,9 +187,14 @@ do_transfer() local_addr="0.0.0.0" fi + cmsg="TIMESTAMPNS" + if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + cmsg+=",TCPINQ" + fi + timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ ${local_addr} < "$sin" > "$sout" & local spid=$! @@ -196,7 +202,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ $connect_addr < "$cin" > "$cout" & local cpid=$! @@ -253,6 +259,11 @@ do_mptcp_sockopt_tests() { local lret=0 + if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then + echo "INFO: MPTCP sockopt not supported: SKIP" + return + fi + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? @@ -307,6 +318,11 @@ do_tcpinq_tests() { local lret=0 + if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + echo "INFO: TCP_INQ not supported: SKIP" + return + fi + local args for args in "-t tcp" "-r tcp"; do do_tcpinq_test $args diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 32f7533e0919..d02e0d63a8f9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -73,8 +73,12 @@ check() } check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 + +default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +if mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 subflows 2" "defaults limits" +fi ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo @@ -121,12 +125,10 @@ ip netns exec $ns1 ./pm_nl_ctl flush check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" ip netns exec $ns1 ./pm_nl_ctl limits 9 1 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "rcv addrs above hard limit" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 1 9 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "subflows above hard limit" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 @@ -176,14 +178,19 @@ subflow,backup 10.0.1.1" "set flags (backup)" ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nobackup)" + +# fullmesh support has been added later ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || + mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,fullmesh 10.0.1.1" " (fullmesh)" -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nofullmesh)" -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +fi exit $ret diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 8092399d911f..98d9e4d2d3fc 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -4,11 +4,17 @@ . "$(dirname "${0}")/mptcp_lib.sh" mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms + +if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + echo "userspace pm tests are not supported by the kernel: SKIP" + exit ${KSFT_SKIP} +fi ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Cannot not run test without ip tool" - exit 1 + exit ${KSFT_SKIP} fi ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED @@ -909,6 +915,11 @@ test_listener() { print_title "Listener tests" + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + stdbuf -o0 -e0 printf "LISTENER events \t[SKIP] Not supported\n" + return + fi + # Capture events on the network namespace running the client :>$client_evts diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 198ad5f32187..cfa9562f3cd8 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -502,11 +502,11 @@ int main(int argc, char *argv[]) interval = t2 - t1; offset = (t2 + t1) / 2 - tp; - printf("system time: %lld.%u\n", + printf("system time: %lld.%09u\n", (pct+2*i)->sec, (pct+2*i)->nsec); - printf("phc time: %lld.%u\n", + printf("phc time: %lld.%09u\n", (pct+2*i+1)->sec, (pct+2*i+1)->nsec); - printf("system time: %lld.%u\n", + printf("system time: %lld.%09u\n", (pct+2*i+2)->sec, (pct+2*i+2)->nsec); printf("system/phc clock time offset is %" PRId64 " ns\n" "system clock time delay is %" PRId64 " ns\n", diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 4638c63a339f..6e73b09c20c8 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -6,20 +6,18 @@ CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_NAT=m +CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NET_SCHED=y # # Queueing/Scheduling # -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m @@ -57,8 +55,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 CONFIG_NET_EMATCH_CMP=m diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json index ba2f5e79cdbf..e21c7f22c6d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json @@ -58,10 +58,10 @@ "setup": [ "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 10", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 100", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 10ms", + "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 100ms", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root", diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index afb0cd86fa3d..eb357bd7923c 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -2,5 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 modprobe netdevsim +modprobe sch_teql ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index 8879a7b04c6a..d6979a48478f 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -16,42 +16,140 @@ #include "../kselftest_harness.h" -const char *dyn_file = "/sys/kernel/tracing/dynamic_events"; -const char *clear = "!u:__test_event"; +const char *abi_file = "/sys/kernel/tracing/user_events_data"; +const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable"; -static int Append(const char *value) +static bool wait_for_delete(void) { - int fd = open(dyn_file, O_RDWR | O_APPEND); - int ret = write(fd, value, strlen(value)); + int i; + + for (i = 0; i < 1000; ++i) { + int fd = open(enable_file, O_RDONLY); + + if (fd == -1) + return true; + + close(fd); + usleep(1000); + } + + return false; +} + +static int reg_event(int fd, int *check, int bit, const char *value) +{ + struct user_reg reg = {0}; + + reg.size = sizeof(reg); + reg.name_args = (__u64)value; + reg.enable_bit = bit; + reg.enable_addr = (__u64)check; + reg.enable_size = sizeof(*check); + + if (ioctl(fd, DIAG_IOCSREG, ®) == -1) + return -1; + + return 0; +} + +static int unreg_event(int fd, int *check, int bit) +{ + struct user_unreg unreg = {0}; + + unreg.size = sizeof(unreg); + unreg.disable_bit = bit; + unreg.disable_addr = (__u64)check; + + return ioctl(fd, DIAG_IOCSUNREG, &unreg); +} + +static int parse(int *check, const char *value) +{ + int fd = open(abi_file, O_RDWR); + int ret; + + if (fd == -1) + return -1; + + /* Until we have persist flags via dynamic events, use the base name */ + if (value[0] != 'u' || value[1] != ':') { + close(fd); + return -1; + } + + ret = reg_event(fd, check, 31, value + 2); + + if (ret != -1) { + if (unreg_event(fd, check, 31) == -1) + printf("WARN: Couldn't unreg event\n"); + } close(fd); + return ret; } -#define CLEAR() \ +static int check_match(int *check, const char *first, const char *second, bool *match) +{ + int fd = open(abi_file, O_RDWR); + int ret = -1; + + if (fd == -1) + return -1; + + if (reg_event(fd, check, 31, first) == -1) + goto cleanup; + + if (reg_event(fd, check, 30, second) == -1) { + if (errno == EADDRINUSE) { + /* Name is in use, with different fields */ + *match = false; + ret = 0; + } + + goto cleanup; + } + + *match = true; + ret = 0; +cleanup: + unreg_event(fd, check, 31); + unreg_event(fd, check, 30); + + close(fd); + + wait_for_delete(); + + return ret; +} + +#define TEST_MATCH(x, y) \ do { \ - int ret = Append(clear); \ - if (ret == -1) \ - ASSERT_EQ(ENOENT, errno); \ + bool match; \ + ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \ + ASSERT_EQ(true, match); \ } while (0) -#define TEST_PARSE(x) \ +#define TEST_NMATCH(x, y) \ do { \ - ASSERT_NE(-1, Append(x)); \ - CLEAR(); \ + bool match; \ + ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \ + ASSERT_EQ(false, match); \ } while (0) -#define TEST_NPARSE(x) ASSERT_EQ(-1, Append(x)) +#define TEST_PARSE(x) ASSERT_NE(-1, parse(&self->check, x)) + +#define TEST_NPARSE(x) ASSERT_EQ(-1, parse(&self->check, x)) FIXTURE(user) { + int check; }; FIXTURE_SETUP(user) { - CLEAR(); } FIXTURE_TEARDOWN(user) { - CLEAR(); + wait_for_delete(); } TEST_F(user, basic_types) { @@ -95,33 +193,30 @@ TEST_F(user, size_types) { TEST_NPARSE("u:__test_event char a 20"); } -TEST_F(user, flags) { - /* Should work */ - TEST_PARSE("u:__test_event:BPF_ITER u32 a"); - /* Forward compat */ - TEST_PARSE("u:__test_event:BPF_ITER,FLAG_FUTURE u32 a"); -} - TEST_F(user, matching) { - /* Register */ - ASSERT_NE(-1, Append("u:__test_event struct custom a 20")); - /* Should not match */ - TEST_NPARSE("!u:__test_event struct custom b"); - /* Should match */ - TEST_PARSE("!u:__test_event struct custom a"); - /* Multi field reg */ - ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b")); - /* Non matching cases */ - TEST_NPARSE("!u:__test_event u32 a"); - TEST_NPARSE("!u:__test_event u32 b"); - TEST_NPARSE("!u:__test_event u32 a; u32 "); - TEST_NPARSE("!u:__test_event u32 a; u32 a"); - /* Matching case */ - TEST_PARSE("!u:__test_event u32 a; u32 b"); - /* Register */ - ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b")); - /* Ensure trailing semi-colon case */ - TEST_PARSE("!u:__test_event u32 a; u32 b;"); + /* Single name matches */ + TEST_MATCH("__test_event u32 a", + "__test_event u32 a"); + + /* Multiple names match */ + TEST_MATCH("__test_event u32 a; u32 b", + "__test_event u32 a; u32 b"); + + /* Multiple names match with dangling ; */ + TEST_MATCH("__test_event u32 a; u32 b", + "__test_event u32 a; u32 b;"); + + /* Single name doesn't match */ + TEST_NMATCH("__test_event u32 a", + "__test_event u32 b"); + + /* Multiple names don't match */ + TEST_NMATCH("__test_event u32 a; u32 b", + "__test_event u32 b; u32 a"); + + /* Types don't match */ + TEST_NMATCH("__test_event u64 a; u64 b", + "__test_event u32 a; u32 b"); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 7c99cef94a65..eb6904d89f14 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -102,30 +102,56 @@ err: return -1; } +static bool wait_for_delete(void) +{ + int i; + + for (i = 0; i < 1000; ++i) { + int fd = open(enable_file, O_RDONLY); + + if (fd == -1) + return true; + + close(fd); + usleep(1000); + } + + return false; +} + static int clear(int *check) { struct user_unreg unreg = {0}; + int fd; unreg.size = sizeof(unreg); unreg.disable_bit = 31; unreg.disable_addr = (__u64)check; - int fd = open(data_file, O_RDWR); + fd = open(data_file, O_RDWR); if (fd == -1) return -1; if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1) if (errno != ENOENT) - return -1; - - if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) - if (errno != ENOENT) - return -1; + goto fail; + + if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) { + if (errno == EBUSY) { + if (!wait_for_delete()) + goto fail; + } else if (errno != ENOENT) + goto fail; + } close(fd); return 0; +fail: + close(fd); + + return -1; } static int check_print_fmt(const char *event, const char *expected, int *check) @@ -155,9 +181,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check) /* Register should work */ ret = ioctl(fd, DIAG_IOCSREG, ®); - close(fd); - if (ret != 0) { + close(fd); printf("Reg failed in fmt\n"); return ret; } @@ -165,6 +190,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check) /* Ensure correct print_fmt */ ret = get_print_fmt(print_fmt, sizeof(print_fmt)); + close(fd); + if (ret != 0) return ret; @@ -228,6 +255,12 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); + /* Multiple registers to same name but different args should fail */ + reg.enable_bit = 29; + reg.name_args = (__u64)"__test_event u32 field1;"; + ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(EADDRINUSE, errno); + /* Ensure disabled */ self->enable_fd = open(enable_file, O_RDWR); ASSERT_NE(-1, self->enable_fd); @@ -250,10 +283,10 @@ TEST_F(user, register_events) { unreg.disable_bit = 30; ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); - /* Delete should work only after close and unregister */ + /* Delete should have been auto-done after close and unregister */ close(self->data_fd); - self->data_fd = open(data_file, O_RDWR); - ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event")); + + ASSERT_EQ(true, wait_for_delete()); } TEST_F(user, write_events) { @@ -310,6 +343,39 @@ TEST_F(user, write_events) { ASSERT_EQ(EINVAL, errno); } +TEST_F(user, write_empty_events) { + struct user_reg reg = {0}; + struct iovec io[1]; + int before = 0, after = 0; + + reg.size = sizeof(reg); + reg.name_args = (__u64)"__test_event"; + reg.enable_bit = 31; + reg.enable_addr = (__u64)&self->check; + reg.enable_size = sizeof(self->check); + + io[0].iov_base = ®.write_index; + io[0].iov_len = sizeof(reg.write_index); + + /* Register should work */ + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + ASSERT_EQ(0, self->check); + + /* Enable event */ + self->enable_fd = open(enable_file, O_RDWR); + ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) + + /* Event should now be enabled */ + ASSERT_EQ(1 << reg.enable_bit, self->check); + + /* Write should make it out to ftrace buffers */ + before = trace_bytes(); + ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 1)); + after = trace_bytes(); + ASSERT_GT(after, before); +} + TEST_F(user, write_fault) { struct user_reg reg = {0}; struct iovec io[2]; diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index a070258d4449..8b09be566fa2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -81,6 +81,32 @@ static int get_offset(void) return offset; } +static int clear(int *check) +{ + struct user_unreg unreg = {0}; + + unreg.size = sizeof(unreg); + unreg.disable_bit = 31; + unreg.disable_addr = (__u64)check; + + int fd = open(data_file, O_RDWR); + + if (fd == -1) + return -1; + + if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1) + if (errno != ENOENT) + return -1; + + if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) + if (errno != ENOENT) + return -1; + + close(fd); + + return 0; +} + FIXTURE(user) { int data_fd; int check; @@ -93,6 +119,9 @@ FIXTURE_SETUP(user) { FIXTURE_TEARDOWN(user) { close(self->data_fd); + + if (clear(&self->check) != 0) + printf("WARNING: Clear didn't work!\n"); } TEST_F(user, perf_write) { @@ -160,6 +189,59 @@ TEST_F(user, perf_write) { ASSERT_EQ(0, self->check); } +TEST_F(user, perf_empty_events) { + struct perf_event_attr pe = {0}; + struct user_reg reg = {0}; + struct perf_event_mmap_page *perf_page; + int page_size = sysconf(_SC_PAGESIZE); + int id, fd; + __u32 *val; + + reg.size = sizeof(reg); + reg.name_args = (__u64)"__test_event"; + reg.enable_bit = 31; + reg.enable_addr = (__u64)&self->check; + reg.enable_size = sizeof(self->check); + + /* Register should work */ + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + ASSERT_EQ(0, self->check); + + /* Id should be there */ + id = get_id(); + ASSERT_NE(-1, id); + + pe.type = PERF_TYPE_TRACEPOINT; + pe.size = sizeof(pe); + pe.config = id; + pe.sample_type = PERF_SAMPLE_RAW; + pe.sample_period = 1; + pe.wakeup_events = 1; + + /* Tracepoint attach should work */ + fd = perf_event_open(&pe, 0, -1, -1, 0); + ASSERT_NE(-1, fd); + + perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, perf_page); + + /* Status should be updated */ + ASSERT_EQ(1 << reg.enable_bit, self->check); + + /* Ensure write shows up at correct offset */ + ASSERT_NE(-1, write(self->data_fd, ®.write_index, + sizeof(reg.write_index))); + val = (void *)(((char *)perf_page) + perf_page->data_offset); + ASSERT_EQ(PERF_RECORD_SAMPLE, *val); + + munmap(perf_page, page_size * 2); + close(fd); + + /* Status should be updated */ + ASSERT_EQ(0, self->check); +} + int main(int argc, char **argv) { return test_harness_run(argc, argv); |