diff options
226 files changed, 4789 insertions, 1899 deletions
diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.txt b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.txt deleted file mode 100644 index beec612dbe6a..000000000000 --- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.txt +++ /dev/null @@ -1,88 +0,0 @@ -Binding for the Qualcomm APCS global block -========================================== - -This binding describes the APCS "global" block found in various Qualcomm -platforms. - -- compatible: - Usage: required - Value type: <string> - Definition: must be one of: - "qcom,msm8916-apcs-kpss-global", - "qcom,msm8996-apcs-hmss-global" - "qcom,msm8998-apcs-hmss-global" - "qcom,qcs404-apcs-apps-global" - "qcom,sc7180-apss-shared" - "qcom,sdm845-apss-shared" - "qcom,sm8150-apss-shared" - "qcom,ipq8074-apcs-apps-global" - -- reg: - Usage: required - Value type: <prop-encoded-array> - Definition: must specify the base address and size of the global block - -- clocks: - Usage: required if #clock-names property is present - Value type: <phandle array> - Definition: phandles to the two parent clocks of the clock driver. - -- #mbox-cells: - Usage: required - Value type: <u32> - Definition: as described in mailbox.txt, must be 1 - -- #clock-cells: - Usage: optional - Value type: <u32> - Definition: as described in clock.txt, must be 0 - -- clock-names: - Usage: required if the platform data based clock driver needs to - retrieve the parent clock names from device tree. - This will requires two mandatory clocks to be defined. - Value type: <string-array> - Definition: must be "pll" and "aux" - -= EXAMPLE -The following example describes the APCS HMSS found in MSM8996 and part of the -GLINK RPM referencing the "rpm_hlos" doorbell therein. - - apcs_glb: mailbox@9820000 { - compatible = "qcom,msm8996-apcs-hmss-global"; - reg = <0x9820000 0x1000>; - - #mbox-cells = <1>; - }; - - rpm-glink { - compatible = "qcom,glink-rpm"; - - interrupts = <GIC_SPI 168 IRQ_TYPE_EDGE_RISING>; - - qcom,rpm-msg-ram = <&rpm_msg_ram>; - - mboxes = <&apcs_glb 0>; - mbox-names = "rpm_hlos"; - }; - -Below is another example of the APCS binding on MSM8916 platforms: - - apcs: mailbox@b011000 { - compatible = "qcom,msm8916-apcs-kpss-global"; - reg = <0xb011000 0x1000>; - #mbox-cells = <1>; - clocks = <&a53pll>; - #clock-cells = <0>; - }; - -Below is another example of the APCS binding on QCS404 platforms: - - apcs_glb: mailbox@b011000 { - compatible = "qcom,qcs404-apcs-apps-global", "syscon"; - reg = <0x0b011000 0x1000>; - #mbox-cells = <1>; - clocks = <&apcs_hfpll>, <&gcc GCC_GPLL0_AO_OUT_MAIN>; - clock-names = "pll", "aux"; - #clock-cells = <0>; - }; diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml new file mode 100644 index 000000000000..12eff942708d --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/mailbox/qcom,apcs-kpss-global.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm APCS global block bindings + +description: + This binding describes the APCS "global" block found in various Qualcomm + platforms. + +maintainers: + - Sivaprakash Murugesan <sivaprak@codeaurora.org> + +properties: + compatible: + enum: + - qcom,ipq8074-apcs-apps-global + - qcom,msm8916-apcs-kpss-global + - qcom,msm8996-apcs-hmss-global + - qcom,msm8998-apcs-hmss-global + - qcom,qcs404-apcs-apps-global + - qcom,sc7180-apss-shared + - qcom,sdm845-apss-shared + - qcom,sm8150-apss-shared + + reg: + maxItems: 1 + + clocks: + description: phandles to the parent clocks of the clock driver + items: + - description: primary pll parent of the clock driver + - description: auxiliary parent + + '#mbox-cells': + const: 1 + + '#clock-cells': + const: 0 + + clock-names: + items: + - const: pll + - const: aux + +required: + - compatible + - reg + - '#mbox-cells' + +additionalProperties: false + +examples: + + # Example apcs with msm8996 + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + apcs_glb: mailbox@9820000 { + compatible = "qcom,msm8996-apcs-hmss-global"; + reg = <0x9820000 0x1000>; + + #mbox-cells = <1>; + }; + + rpm-glink { + compatible = "qcom,glink-rpm"; + interrupts = <GIC_SPI 168 IRQ_TYPE_EDGE_RISING>; + qcom,rpm-msg-ram = <&rpm_msg_ram>; + mboxes = <&apcs_glb 0>; + mbox-names = "rpm_hlos"; + }; + + # Example apcs with qcs404 + - | + #define GCC_APSS_AHB_CLK_SRC 1 + #define GCC_GPLL0_AO_OUT_MAIN 123 + apcs: mailbox@b011000 { + compatible = "qcom,qcs404-apcs-apps-global"; + reg = <0x0b011000 0x1000>; + #mbox-cells = <1>; + clocks = <&apcs_hfpll>, <&gcc GCC_GPLL0_AO_OUT_MAIN>; + clock-names = "pll", "aux"; + #clock-cells = <0>; + }; diff --git a/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml new file mode 100644 index 000000000000..4ac2123d9193 --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mailbox/qcom-ipcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Technologies, Inc. Inter-Processor Communication Controller + +maintainers: + - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + +description: + The Inter-Processor Communication Controller (IPCC) is a centralized hardware + to route interrupts across various subsystems. It involves a three-level + addressing scheme called protocol, client and signal. For example, consider an + entity on the Application Processor Subsystem (APSS) that wants to listen to + Modem's interrupts via Shared Memory Point to Point (SMP2P) interface. In such + a case, the client would be Modem (client-id is 2) and the signal would be + SMP2P (signal-id is 2). The SMP2P itself falls under the Multiprocessor (MPROC) + protocol (protocol-id is 0). Refer include/dt-bindings/mailbox/qcom-ipcc.h + for the list of such IDs. + +properties: + compatible: + items: + - enum: + - qcom,sm8250-ipcc + - const: qcom,ipcc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-controller: true + + "#interrupt-cells": + const: 3 + description: + The first cell is the client-id, the second cell is the signal-id and the + third cell is the interrupt type. + + "#mbox-cells": + const: 2 + description: + The first cell is the client-id, and the second cell is the signal-id. + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - "#interrupt-cells" + - "#mbox-cells" + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/mailbox/qcom-ipcc.h> + + mailbox@408000 { + compatible = "qcom,sm8250-ipcc", "qcom,ipcc"; + reg = <0x408000 0x1000>; + interrupts = <GIC_SPI 229 IRQ_TYPE_LEVEL_HIGH>; + interrupt-controller; + #interrupt-cells = <3>; + #mbox-cells = <2>; + }; + + smp2p-modem { + compatible = "qcom,smp2p"; + interrupts-extended = <&ipcc_mproc IPCC_CLIENT_MPSS + IPCC_MPROC_SIGNAL_SMP2P IRQ_TYPE_EDGE_RISING>; + mboxes = <&ipcc_mproc IPCC_CLIENT_MPSS IPCC_MPROC_SIGNAL_SMP2P>; + + /* Other SMP2P fields */ + }; diff --git a/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml new file mode 100644 index 000000000000..0f7451b42d7e --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/mailbox/sprd-mailbox.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Spreadtrum mailbox controller bindings + +maintainers: + - Orson Zhai <orsonzhai@gmail.com> + - Baolin Wang <baolin.wang7@gmail.com> + - Chunyan Zhang <zhang.lyra@gmail.com> + +properties: + compatible: + enum: + - sprd,sc9860-mailbox + + reg: + items: + - description: inbox registers' base address + - description: outbox registers' base address + + interrupts: + items: + - description: inbox interrupt + - description: outbox interrupt + + clocks: + maxItems: 1 + + clock-names: + items: + - const: enable + + "#mbox-cells": + const: 1 + +required: + - compatible + - reg + - interrupts + - "#mbox-cells" + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + mailbox: mailbox@400a0000 { + compatible = "sprd,sc9860-mailbox"; + reg = <0 0x400a0000 0 0x8000>, <0 0x400a8000 0 0x8000>; + #mbox-cells = <1>; + clock-names = "enable"; + clocks = <&aon_gate 53>; + interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>; + }; +... diff --git a/Documentation/devicetree/bindings/sound/tdm-slot.txt b/Documentation/devicetree/bindings/sound/tdm-slot.txt index 34cf70e2cbc4..4bb513ae62fc 100644 --- a/Documentation/devicetree/bindings/sound/tdm-slot.txt +++ b/Documentation/devicetree/bindings/sound/tdm-slot.txt @@ -14,8 +14,8 @@ For instance: dai-tdm-slot-tx-mask = <0 1>; dai-tdm-slot-rx-mask = <1 0>; -And for each spcified driver, there could be one .of_xlate_tdm_slot_mask() -to specify a explicit mapping of the channels and the slots. If it's absent +And for each specified driver, there could be one .of_xlate_tdm_slot_mask() +to specify an explicit mapping of the channels and the slots. If it's absent the default snd_soc_of_xlate_tdm_slot_mask() will be used to generating the tx and rx masks. diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index eb71156bcb7c..318605de83f3 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -429,6 +429,7 @@ prototypes:: int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); + bool (*lm_breaker_owns_lease)(struct file_lock *); locking rules: @@ -439,6 +440,7 @@ lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no +lm_breaker_owns_lease: no no no ========== ============= ================= ========= buffer_head diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index ca983328976b..f65b51523014 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -159,11 +159,15 @@ Byte sequences distance = 16384 + (H << 14) + D state = S (copy S literals after this block) End of stream is reached if distance == 16384 + In version 1 only, to prevent ambiguity with the RLE case when + ((distance & 0x803f) == 0x803f) && (261 <= length <= 264), the + compressor must not emit block copies where distance and length + meet these conditions. In version 1 only, this instruction is also used to encode a run of - zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. + zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. In this case, it is followed by a fourth byte, X. - run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4. + run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4 0 0 1 L L L L L (32..63) Copy of small block within 16kB distance (preferably less than 34B) diff --git a/MAINTAINERS b/MAINTAINERS index 9b4e836543a3..573cf64204c8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14187,6 +14187,14 @@ L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/qcom_iommu.c +QUALCOMM IPCC MAILBOX DRIVER +M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> +L: linux-arm-msm@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/mailbox/qcom-ipcc.yaml +F: drivers/mailbox/qcom-ipcc.c +F: include/dt-bindings/mailbox/qcom-ipcc.h + QUALCOMM RMNET DRIVER M: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> M: Sean Tranchetti <stranche@codeaurora.org> diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index f5c42a8fcf9c..53520f8cb904 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -430,8 +430,13 @@ register_cpus(void) arch_initcall(register_cpus); #ifdef CONFIG_MAGIC_SYSRQ +static void sysrq_reboot_handler(int unused) +{ + machine_halt(); +} + static const struct sysrq_key_op srm_sysrq_reboot_op = { - .handler = machine_halt, + .handler = sysrq_reboot_handler, .help_msg = "reboot(b)", .action_msg = "Resetting", .enable_mask = SYSRQ_ENABLE_BOOT, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7f9d38444d6d..8a46ed3ab429 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1299,6 +1299,14 @@ config COMPAT_VDSO You must have a 32-bit build of glibc 2.22 or later for programs to seamlessly take advantage of this. +config THUMB2_COMPAT_VDSO + bool "Compile the 32-bit vDSO for Thumb-2 mode" if EXPERT + depends on COMPAT_VDSO + default y + help + Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y, + otherwise with '-marm'. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on SYSCTL @@ -1740,8 +1748,9 @@ config ARM64_DEBUG_PRIORITY_MASKING endif config RELOCATABLE - bool + bool "Build a relocatable kernel image" if EXPERT select ARCH_HAS_RELR + default y help This builds the kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index b263e239cb59..a45366c3909b 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,6 +12,7 @@ #include <linux/efi.h> #include <linux/memblock.h> #include <linux/psci.h> +#include <linux/stddef.h> #include <asm/cputype.h> #include <asm/io.h> @@ -31,14 +32,14 @@ * is therefore used to delimit the MADT GICC structure minimum length * appropriately. */ -#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ +#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \ struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) -#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ +#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) /* Basic configuration for ACPI */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 15e80c876d46..5df49366e9ab 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -130,7 +130,7 @@ static int clear_os_lock(unsigned int cpu) return 0; } -static int debug_monitors_init(void) +static int __init debug_monitors_init(void) { return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, "arm64/debug_monitors:starting", diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8618faa82e6d..86a5cf9bc19a 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -69,7 +69,8 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) if (addr == FTRACE_ADDR) return &plt[FTRACE_PLT_IDX]; - if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS)) + if (addr == FTRACE_REGS_ADDR && + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) return &plt[FTRACE_REGS_PLT_IDX]; #endif return NULL; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3fd2c11c09fc..93b3844cf442 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -319,6 +319,10 @@ void __init setup_arch(char **cmdline_p) xen_early_init(); efi_init(); + + if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0) + pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); + arm64_memblock_init(); paging_init(); diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 3964738ebbde..7ea1e827e505 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -105,6 +105,14 @@ VDSO_CFLAGS += -D__uint128_t='void*' VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) VDSO_CFLAGS += -Wno-int-to-pointer-cast +# Compile as THUMB2 or ARM. Unwinding via frame-pointers in THUMB2 is +# unreliable. +ifeq ($(CONFIG_THUMB2_COMPAT_VDSO), y) +VDSO_CFLAGS += -mthumb -fomit-frame-pointer +else +VDSO_CFLAGS += -marm +endif + VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 62b0eb6cf69a..84eab0f5e00a 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -216,8 +216,10 @@ static int __init mcf_pci_init(void) /* Keep a virtual mapping to IO/config space active */ iospace = (unsigned long) ioremap(PCI_IO_PA, PCI_IO_SIZE); - if (iospace == 0) + if (iospace == 0) { + pci_free_host_bridge(bridge); return -ENODEV; + } pr_info("Coldfire: PCI IO/config window mapped to 0x%x\n", (u32) iospace); diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 27fa9465d19d..2b746f55f419 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -48,7 +48,6 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PLATRAM=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set diff --git a/arch/m68k/include/asm/uaccess_no.h b/arch/m68k/include/asm/uaccess_no.h index a24cfe4a0d32..dcfb69361408 100644 --- a/arch/m68k/include/asm/uaccess_no.h +++ b/arch/m68k/include/asm/uaccess_no.h @@ -42,7 +42,7 @@ static inline int _access_ok(unsigned long addr, unsigned long size) __put_user_asm(__pu_err, __pu_val, ptr, l); \ break; \ case 8: \ - memcpy(ptr, &__pu_val, sizeof (*(ptr))); \ + memcpy((void __force *)ptr, &__pu_val, sizeof(*(ptr))); \ break; \ default: \ __pu_err = __put_user_bad(); \ @@ -60,7 +60,7 @@ extern int __put_user_bad(void); * aliasing issues. */ -#define __ptr(x) ((unsigned long *)(x)) +#define __ptr(x) ((unsigned long __user *)(x)) #define __put_user_asm(err,x,ptr,bwl) \ __asm__ ("move" #bwl " %0,%1" \ @@ -85,7 +85,7 @@ extern int __put_user_bad(void); u64 l; \ __typeof__(*(ptr)) t; \ } __gu_val; \ - memcpy(&__gu_val.l, ptr, sizeof(__gu_val.l)); \ + memcpy(&__gu_val.l, (const void __force *)ptr, sizeof(__gu_val.l)); \ (x) = __gu_val.t; \ break; \ } \ diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 25db70be4c9c..266a6ca5e15e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -127,7 +127,7 @@ static void update_csb(struct vas_window *window, return; } - use_mm(window->mm); + kthread_use_mm(window->mm); rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier @@ -139,7 +139,7 @@ static void update_csb(struct vas_window *window, smp_mb(); rc = copy_to_user(csb_addr, &csb, sizeof(u8)); } - unuse_mm(window->mm); + kthread_unuse_mm(window->mm); put_task_struct(tsk); /* Success */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c733007b90ab..128192e14ff2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -12,64 +12,70 @@ config 32BIT config RISCV def_bool y - select OF - select OF_EARLY_FLATTREE - select OF_IRQ + select ARCH_CLOCKSOURCE_INIT select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_MMIOWB + select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS + select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select CLONE_BACKWARDS select COMMON_CLK + select EDAC_SUPPORT + select GENERIC_ARCH_TOPOLOGY if SMP + select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS + select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO + select GENERIC_IOREMAP + select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_PTDUMP if MMU select GENERIC_SCHED_CLOCK + select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER if MMU select GENERIC_STRNLEN_USER if MMU - select GENERIC_SMP_IDLE_THREAD - select GENERIC_ATOMIC64 if !64BIT - select GENERIC_IOREMAP - select GENERIC_PTDUMP if MMU + select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_KASAN if MMU && 64BIT + select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB_QXFER_PKT + select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK select HAVE_ASM_MODVERSIONS + select HAVE_COPY_THREAD_TLS select HAVE_DMA_CONTIGUOUS if MMU + select HAVE_EBPF_JIT if MMU select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_VDSO if MMU && 64BIT + select HAVE_PCI select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN - select SPARSE_IRQ - select SYSCTL_EXCEPTION_TRACE - select HAVE_ARCH_TRACEHOOK - select HAVE_PCI select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES - select THREAD_INFO_IN_TASK + select OF + select OF_EARLY_FLATTREE + select OF_IRQ select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI + select RISCV_INTC select RISCV_TIMER - select GENERIC_IRQ_MULTI_HANDLER - select GENERIC_ARCH_TOPOLOGY if SMP - select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_MMIOWB - select ARCH_HAS_DEBUG_VIRTUAL if MMU - select HAVE_EBPF_JIT if MMU - select EDAC_SUPPORT - select ARCH_HAS_GIGANTIC_PAGE - select ARCH_HAS_SET_DIRECT_MAP - select ARCH_HAS_SET_MEMORY - select ARCH_HAS_STRICT_KERNEL_RWX if MMU - select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT - select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU - select HAVE_ARCH_MMAP_RND_BITS if MMU - select ARCH_HAS_GCOV_PROFILE_ALL - select HAVE_COPY_THREAD_TLS - select HAVE_ARCH_KASAN if MMU && 64BIT - select HAVE_ARCH_KGDB - select HAVE_ARCH_KGDB_QXFER_PKT + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -196,11 +202,11 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE if MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select SWIOTLB if MMU endchoice diff --git a/arch/riscv/include/asm/clocksource.h b/arch/riscv/include/asm/clocksource.h new file mode 100644 index 000000000000..482185566b0c --- /dev/null +++ b/arch/riscv/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_CLOCKSOURCE_H +#define _ASM_CLOCKSOURCE_H + +#include <asm/vdso/clocksource.h> + +#endif diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 6e1b0e0325eb..9807ad164015 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -10,11 +10,6 @@ #include <linux/interrupt.h> #include <linux/linkage.h> -#define NR_IRQS 0 - -void riscv_timer_interrupt(void); -void riscv_software_interrupt(void); - #include <asm-generic/irq.h> #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3ddb798264f1..bdddcd5c1b71 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -8,6 +8,8 @@ #include <linux/const.h> +#include <vdso/processor.h> + #include <asm/ptrace.h> /* @@ -58,16 +60,6 @@ static inline void release_thread(struct task_struct *dead_task) extern unsigned long get_wchan(struct task_struct *p); -static inline void cpu_relax(void) -{ -#ifdef __riscv_muldiv - int dummy; - /* In lieu of a halt instruction, induce a long-latency stall. */ - __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); -#endif - barrier(); -} - static inline void wait_for_interrupt(void) { __asm__ __volatile__ ("wfi"); @@ -75,6 +67,7 @@ static inline void wait_for_interrupt(void) struct device_node; int riscv_of_processor_hartid(struct device_node *node); +int riscv_of_parent_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index f4c7cfda6b7f..40bb1c15a731 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -28,6 +28,9 @@ void show_ipi_stats(struct seq_file *p, int prec); /* SMP initialization hook for setup_arch */ void __init setup_smp(void); +/* Called from C code, this handles an IPI. */ +void handle_IPI(struct pt_regs *regs); + /* Hook for the generic smp_call_function_many() routine. */ void arch_send_call_function_ipi_mask(struct cpumask *mask); diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 7a7fce63c474..8454f746bbfd 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,8 +10,10 @@ #include <linux/types.h> +#ifndef GENERIC_TIME_VSYSCALL struct vdso_data { }; +#endif /* * The VDSO symbols are mapped into Linux so we can just use regular symbol diff --git a/arch/riscv/include/asm/vdso/clocksource.h b/arch/riscv/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..df6ea65c1dec --- /dev/null +++ b/arch/riscv/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSOCLOCKSOURCE_H +#define __ASM_VDSOCLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_ARCHTIMER + +#endif diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..c8e818688ec1 --- /dev/null +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <asm/csr.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct __kernel_old_timeval *tv asm("a0") = _tv; + register struct timezone *tz asm("a1") = _tz; + register long ret asm("a0"); + register long nr asm("a7") = __NR_gettimeofday; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(tv), "r"(tz), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long ret asm("a0"); + register long nr asm("a7") = __NR_clock_gettime; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(clkid), "r"(ts), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long ret asm("a0"); + register long nr asm("a7") = __NR_clock_getres; + + asm volatile ("ecall\n" + : "=r" (ret) + : "r"(clkid), "r"(ts), "r"(nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + /* + * The purpose of csr_read(CSR_TIME) is to trap the system into + * M-mode to obtain the value of CSR_TIME. Hence, unlike other + * architecture, no fence instructions surround the csr_read() + */ + return csr_read(CSR_TIME); +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h new file mode 100644 index 000000000000..82a5693b1861 --- /dev/null +++ b/arch/riscv/include/asm/vdso/processor.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +static inline void cpu_relax(void) +{ +#ifdef __riscv_muldiv + int dummy; + /* In lieu of a halt instruction, induce a long-latency stall. */ + __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy)); +#endif + barrier(); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..82fd5d83bd60 --- /dev/null +++ b/arch/riscv/include/asm/vdso/vsyscall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void) +{ + return vdso_data; +} + +#define __arch_get_k_vdso_data __riscv_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 40a3c442ac5f..6d59e6906fdd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -44,6 +44,22 @@ int riscv_of_processor_hartid(struct device_node *node) return hart; } +/* + * Find hart ID of the CPU DT node under which given DT node falls. + * + * To achieve this, we walk up the DT tree until we find an active + * RISC-V core (HART) node and extract the cpuid from it. + */ +int riscv_of_parent_hartid(struct device_node *node) +{ + for (; node; node = node->parent) { + if (of_device_is_compatible(node, "riscv")) + return riscv_of_processor_hartid(node); + } + + return -1; +} + #ifdef CONFIG_PROC_FS static void print_isa(struct seq_file *f, const char *isa) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 56d071b2c0a1..cae7e6d4c7ef 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -106,7 +106,9 @@ _save_context: /* Handle interrupts */ move a0, sp /* pt_regs */ - tail do_IRQ + la a1, handle_arch_irq + REG_L a1, (a1) + jr a1 1: /* * Exceptions run with interrupts enabled or disabled depending on the diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index 345c4f2eba13..7207fa08d78f 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -7,7 +7,6 @@ #include <linux/interrupt.h> #include <linux/irqchip.h> -#include <linux/irqdomain.h> #include <linux/seq_file.h> #include <asm/smp.h> @@ -17,37 +16,9 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - switch (regs->cause & ~CAUSE_IRQ_FLAG) { - case RV_IRQ_TIMER: - riscv_timer_interrupt(); - break; -#ifdef CONFIG_SMP - case RV_IRQ_SOFT: - /* - * We only use software interrupts to pass IPIs, so if a non-SMP - * system gets one, then we don't know what to do. - */ - riscv_software_interrupt(); - break; -#endif - case RV_IRQ_EXT: - handle_arch_irq(regs); - break; - default: - pr_alert("unexpected interrupt cause 0x%lx", regs->cause); - BUG(); - } - irq_exit(); - - set_irq_regs(old_regs); -} - void __init init_IRQ(void) { irqchip_init(); + if (!handle_arch_irq) + panic("No interrupt controller found."); } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 5805791cd5b5..d4a64dfed342 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -11,6 +11,7 @@ #include <asm/kprobes.h> #include <asm/cacheflush.h> #include <asm/fixmap.h> +#include <asm/patch.h> struct patch_insn { void *addr; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index a65a8fa0c22d..b1d4f452f843 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -123,11 +123,14 @@ static inline void clear_ipi(void) clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id())); } -void riscv_software_interrupt(void) +void handle_IPI(struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; unsigned long *stats = ipi_data[smp_processor_id()].stats; + irq_enter(); + clear_ipi(); while (true) { @@ -138,7 +141,7 @@ void riscv_software_interrupt(void) ops = xchg(pending_ipis, 0); if (ops == 0) - return; + goto done; if (ops & (1 << IPI_RESCHEDULE)) { stats[IPI_RESCHEDULE]++; @@ -160,6 +163,10 @@ void riscv_software_interrupt(void) /* Order data access and bit testing. */ mb(); } + +done: + irq_exit(); + set_irq_regs(old_regs); } static const char * const ipi_names[] = { diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 6a53c02e9c73..4d3a1048ad8b 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -26,3 +26,12 @@ void __init time_init(void) lpj_fine = riscv_timebase / HZ; timer_probe(); } + +void clocksource_arch_init(struct clocksource *cs) +{ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY + cs->vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER; +#else + cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; +#endif +} diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 5080fdf8c296..ecec1778e3a4 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -183,6 +183,4 @@ void trap_init(void) csr_write(CSR_SCRATCH, 0); /* Set the exception vector address */ csr_write(CSR_TVEC, &handle_exception); - /* Enable interrupts */ - csr_write(CSR_IE, IE_SIE); } diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index e827fae3bf90..678204231700 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -11,8 +11,12 @@ #include <linux/slab.h> #include <linux/binfmts.h> #include <linux/err.h> - +#include <asm/page.h> +#ifdef GENERIC_TIME_VSYSCALL +#include <vdso/datapage.h> +#else #include <asm/vdso.h> +#endif extern char vdso_start[], vdso_end[]; @@ -26,7 +30,7 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -static struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = &vdso_data_store.data; static int __init vdso_init(void) { @@ -75,13 +79,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, */ mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_len, + ret = + install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), vdso_pagelist); - if (unlikely(ret)) + if (unlikely(ret)) { mm->context.vdso = NULL; + goto end; + } + vdso_base += (vdso_pages << PAGE_SHIFT); + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]); + + if (unlikely(ret)) + mm->context.vdso = NULL; end: mmap_write_unlock(mm); return ret; @@ -91,5 +104,8 @@ const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso)) return "[vdso]"; + if (vma->vm_mm && (vma->vm_start == + (long)vma->vm_mm->context.vdso + PAGE_SIZE)) + return "[vdso_data]"; return NULL; } diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 4c8b2a4a6a70..38ba55b0eb9d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-only # Copied from arch/tile/kernel/vdso/Makefile +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT +include $(srctree)/lib/vdso/Makefile # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT -vdso-syms += gettimeofday -vdso-syms += clock_gettime -vdso-syms += clock_getres +vdso-syms += vgettimeofday endif vdso-syms += getcpu vdso-syms += flush_icache @@ -14,6 +16,10 @@ vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) +endif + # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S deleted file mode 100644 index 91378a52eb22..000000000000 --- a/arch/riscv/kernel/vdso/clock_getres.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__vdso_clock_getres) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_clock_getres - ecall - ret - .cfi_endproc -ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S deleted file mode 100644 index 5371fd9bc01f..000000000000 --- a/arch/riscv/kernel/vdso/clock_gettime.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__vdso_clock_gettime) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_clock_gettime - ecall - ret - .cfi_endproc -ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S deleted file mode 100644 index e6fb8af88632..000000000000 --- a/arch/riscv/kernel/vdso/gettimeofday.S +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2017 SiFive - */ - -#include <linux/linkage.h> -#include <asm/unistd.h> - - .text -/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__vdso_gettimeofday) - .cfi_startproc - /* For now, just do the syscall. */ - li a7, __NR_gettimeofday - ecall - ret - .cfi_endproc -ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index f66a091cb890..e6f558bca71b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -2,11 +2,13 @@ /* * Copyright (C) 2012 Regents of the University of California */ +#include <asm/page.h> OUTPUT_ARCH(riscv) SECTIONS { + PROVIDE(_vdso_data = . + PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..d264943e2e47 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copied from arch/arm64/kernel/vdso/vgettimeofday.c + * + * Copyright (C) 2018 ARM Ltd. + * Copyright (C) 2020 SiFive + */ + +#include <linux/time.h> +#include <linux/types.h> + +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9996f49959bd..f4adb3684f3d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -480,17 +480,6 @@ static void __init setup_vm_final(void) csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } - -void free_initmem(void) -{ - unsigned long init_begin = (unsigned long)__init_begin; - unsigned long init_end = (unsigned long)__init_end; - - /* Make the region as non-execuatble. */ - set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) { diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 8f1e94f29a16..a338a6deb950 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -89,6 +89,8 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 9a6dc9b4ec99..fb81fea99093 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + /* * x86 specific delta calculation. * diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4b1d31be50b4..bf4acb0b5365 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2060,7 +2060,7 @@ void __init init_apic_mappings(void) unsigned int new_apicid; if (apic_validate_deadline_timer()) - pr_debug("TSC deadline timer available\n"); + pr_info("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b6f887be440c..0b71970d2d3d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); + + spectre_v2_user_ibpb = mode; } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* * Indirect branch speculation is always disabled in strict - * mode. + * mode. It can neither be enabled if it was force-disabled + * by a previous prctl call. + */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + task_spec_ib_force_disable(task)) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - return PR_SPEC_DISABLE; - default: + } else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1594,7 +1610,7 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 63926c94eb5f..c25a67a34bd3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn) static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), {} }; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8e3d0347b664..f362ce0d5ac0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e040ba6be27b..0ec7ced727fe 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 371a6b348e44..e42faa792c07 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,10 +25,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1bf7e312361f..7c35556c7827 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) /* * On 64-bit, align RODATA to 2MB so we retain large page mappings for diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 3579ac0f6ec1..23632a33ed39 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -281,7 +281,6 @@ bool bio_integrity_prep(struct bio *bio) if (ret == 0) { printk(KERN_ERR "could not attach integrity payload\n"); - kfree(buf); status = BLK_STS_RESOURCE; goto err_end_io; } diff --git a/block/bio.c b/block/bio.c index 5235da6434aa..a7366c02c9b5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1434,8 +1434,7 @@ again: } if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bio->bi_disk->queue, bio, - blk_status_to_errno(bio->bi_status)); + trace_block_bio_complete(bio->bi_disk->queue, bio); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 96a39d0724a2..44f3d0967cb4 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -191,6 +191,33 @@ found_tag: return tag + tag_offset; } +bool __blk_mq_get_driver_tag(struct request *rq) +{ + struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; + unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; + bool shared = blk_mq_tag_busy(rq->mq_hctx); + int tag; + + if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { + bt = &rq->mq_hctx->tags->breserved_tags; + tag_offset = 0; + } + + if (!hctx_may_queue(rq->mq_hctx, bt)) + return false; + tag = __sbitmap_queue_get(bt); + if (tag == BLK_MQ_NO_TAG) + return false; + + rq->tag = tag + tag_offset; + if (shared) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&rq->mq_hctx->nr_active); + } + rq->mq_hctx->tags->rqs[rq->tag] = rq; + return true; +} + void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag) { @@ -269,6 +296,7 @@ struct bt_tags_iter_data { #define BT_TAG_ITER_RESERVED (1 << 0) #define BT_TAG_ITER_STARTED (1 << 1) +#define BT_TAG_ITER_STATIC_RQS (1 << 2) static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { @@ -282,9 +310,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /* * We can hit rq == NULL here, because the tagging functions - * test and set the bit before assining ->rqs[]. + * test and set the bit before assigning ->rqs[]. */ - rq = tags->rqs[bitnr]; + if (iter_data->flags & BT_TAG_ITER_STATIC_RQS) + rq = tags->static_rqs[bitnr]; + else + rq = tags->rqs[bitnr]; if (!rq) return true; if ((iter_data->flags & BT_TAG_ITER_STARTED) && @@ -339,11 +370,13 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, * indicates whether or not @rq is a reserved request. Return * true to continue iterating tags, false to stop. * @priv: Will be passed as second argument to @fn. + * + * Caller has to pass the tag map from which requests are allocated. */ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { - return __blk_mq_all_tag_iter(tags, fn, priv, 0); + return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); } /** diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d38e48f2a0a4..2e4ef51cdb32 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -51,6 +51,14 @@ enum { BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, }; +bool __blk_mq_get_driver_tag(struct request *rq); +static inline bool blk_mq_get_driver_tag(struct request *rq) +{ + if (rq->tag != BLK_MQ_NO_TAG) + return true; + return __blk_mq_get_driver_tag(rq); +} + extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9a36ac1c1fa1..4f57d27bfa73 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1052,35 +1052,6 @@ static inline unsigned int queued_to_index(unsigned int queued) return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); } -bool blk_mq_get_driver_tag(struct request *rq) -{ - struct blk_mq_alloc_data data = { - .q = rq->q, - .hctx = rq->mq_hctx, - .flags = BLK_MQ_REQ_NOWAIT, - .cmd_flags = rq->cmd_flags, - }; - bool shared; - - if (rq->tag != BLK_MQ_NO_TAG) - return true; - - if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) - data.flags |= BLK_MQ_REQ_RESERVED; - - shared = blk_mq_tag_busy(data.hctx); - rq->tag = blk_mq_get_tag(&data); - if (rq->tag >= 0) { - if (shared) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&data.hctx->nr_active); - } - data.hctx->tags->rqs[rq->tag] = rq; - } - - return rq->tag != BLK_MQ_NO_TAG; -} - static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { diff --git a/block/blk-mq.h b/block/blk-mq.h index a139b0631817..b3ce0f3a2ad2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -44,7 +44,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); -bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); diff --git a/block/blk.h b/block/blk.h index aa16e524dc35..b5d1f0fc6547 100644 --- a/block/blk.h +++ b/block/blk.h @@ -420,9 +420,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); part->nr_sects = size; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 2e96d8b8758b..c33bbbfd1bd9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1390,7 +1390,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unfreeze; /* Mask out flags that can't be set using LOOP_SET_STATUS. */ - lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS; + lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS; /* For those flags, use the previous values instead */ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; /* For flags that can't be cleared, use previous values too */ diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0b944ac96d6b..27a33adc41e4 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1613,7 +1613,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, disc_information di; track_information ti; __u32 last_track; - int ret = -1; + int ret; ret = pkt_get_disc_info(pd, &di); if (ret) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index d84e8a878df2..1e2aa5ae2796 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -784,7 +784,7 @@ static const struct block_device_operations mm_fops = { static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { - int ret = -ENODEV; + int ret; struct cardinfo *card = &cards[num_cards]; unsigned char mem_present; unsigned char batt_status; diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index c4f15c4068c0..9de1dabfb126 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -12,8 +12,11 @@ #include <linux/cpu.h> #include <linux/delay.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/sched_clock.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/interrupt.h> +#include <linux/of_irq.h> #include <asm/smp.h> #include <asm/sbi.h> @@ -39,6 +42,7 @@ static int riscv_clock_next_event(unsigned long delta, return 0; } +static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", .features = CLOCK_EVT_FEAT_ONESHOT, @@ -74,30 +78,36 @@ static int riscv_timer_starting_cpu(unsigned int cpu) struct clock_event_device *ce = per_cpu_ptr(&riscv_clock_event, cpu); ce->cpumask = cpumask_of(cpu); + ce->irq = riscv_clock_event_irq; clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff); - csr_set(CSR_IE, IE_TIE); + enable_percpu_irq(riscv_clock_event_irq, + irq_get_trigger_type(riscv_clock_event_irq)); return 0; } static int riscv_timer_dying_cpu(unsigned int cpu) { - csr_clear(CSR_IE, IE_TIE); + disable_percpu_irq(riscv_clock_event_irq); return 0; } /* called directly from the low-level interrupt handler */ -void riscv_timer_interrupt(void) +static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event); csr_clear(CSR_IE, IE_TIE); evdev->event_handler(evdev); + + return IRQ_HANDLED; } static int __init riscv_timer_init_dt(struct device_node *n) { int cpuid, hartid, error; + struct device_node *child; + struct irq_domain *domain; hartid = riscv_of_processor_hartid(n); if (hartid < 0) { @@ -115,6 +125,25 @@ static int __init riscv_timer_init_dt(struct device_node *n) if (cpuid != smp_processor_id()) return 0; + domain = NULL; + child = of_get_compatible_child(n, "riscv,cpu-intc"); + if (!child) { + pr_err("Failed to find INTC node [%pOF]\n", n); + return -ENODEV; + } + domain = irq_find_host(child); + of_node_put(child); + if (!domain) { + pr_err("Failed to find IRQ domain for node [%pOF]\n", n); + return -ENODEV; + } + + riscv_clock_event_irq = irq_create_mapping(domain, RV_IRQ_TIMER); + if (!riscv_clock_event_irq) { + pr_err("Failed to map timer interrupt for node [%pOF]\n", n); + return -ENODEV; + } + pr_info("%s: Registering clocksource cpuid [%d] hartid [%d]\n", __func__, cpuid, hartid); error = clocksource_register_hz(&riscv_clocksource, riscv_timebase); @@ -126,6 +155,14 @@ static int __init riscv_timer_init_dt(struct device_node *n) sched_clock_register(riscv_sched_clock, 64, riscv_timebase); + error = request_percpu_irq(riscv_clock_event_irq, + riscv_timer_interrupt, + "riscv-timer", &riscv_clock_event); + if (error) { + pr_err("registering percpu irq failed [%d]\n", error); + return error; + } + error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING, "clockevents/riscv/timer:starting", riscv_timer_starting_cpu, riscv_timer_dying_cpu); diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index 788c6607078b..cee2a2713038 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -278,7 +278,7 @@ static void nitrox_remove_from_devlist(struct nitrox_device *ndev) struct nitrox_device *nitrox_get_first_device(void) { - struct nitrox_device *ndev = NULL; + struct nitrox_device *ndev; mutex_lock(&devlist_lock); list_for_each_entry(ndev, &ndevlist, list) { @@ -286,7 +286,7 @@ struct nitrox_device *nitrox_get_first_device(void) break; } mutex_unlock(&devlist_lock); - if (!ndev) + if (&ndev->list == &ndevlist) return NULL; refcount_inc(&ndev->refcnt); diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c index 32dc00dc570b..9f937bdc53a7 100644 --- a/drivers/crypto/omap-aes-gcm.c +++ b/drivers/crypto/omap-aes-gcm.c @@ -77,7 +77,6 @@ static void omap_aes_gcm_done_task(struct omap_aes_dev *dd) tag = (u8 *)rctx->auth_tag; for (i = 0; i < dd->authsize; i++) { if (tag[i]) { - dev_err(dd->dev, "GCM decryption: Tag Message is wrong\n"); ret = -EBADMSG; } } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 824ddf2a66ff..b5aff20c5900 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1269,13 +1269,17 @@ static int omap_aes_remove(struct platform_device *pdev) spin_unlock(&list_lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) - for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { crypto_unregister_skcipher( &dd->pdata->algs_info[i].algs_list[j]); + dd->pdata->algs_info[i].registered--; + } - for (i = dd->pdata->aead_algs_info->size - 1; i >= 0; i--) { + for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) { aalg = &dd->pdata->aead_algs_info->algs_list[i]; crypto_unregister_aead(aalg); + dd->pdata->aead_algs_info->registered--; + } crypto_engine_exit(dd->engine); diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c index cc88b7362bc2..94b2dba90f0d 100644 --- a/drivers/crypto/omap-crypto.c +++ b/drivers/crypto/omap-crypto.c @@ -178,11 +178,17 @@ static void omap_crypto_copy_data(struct scatterlist *src, amt = min(src->length - srco, dst->length - dsto); amt = min(len, amt); - srcb = sg_virt(src) + srco; - dstb = sg_virt(dst) + dsto; + srcb = kmap_atomic(sg_page(src)) + srco + src->offset; + dstb = kmap_atomic(sg_page(dst)) + dsto + dst->offset; memcpy(dstb, srcb, amt); + if (!PageSlab(sg_page(dst))) + flush_kernel_dcache_page(sg_page(dst)); + + kunmap_atomic(srcb); + kunmap_atomic(dstb); + srco += amt; dsto += amt; len -= amt; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 063ad5d03f33..82691a057d2a 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -168,8 +168,6 @@ struct omap_sham_hmac_ctx { }; struct omap_sham_ctx { - struct omap_sham_dev *dd; - unsigned long flags; /* fallback stuff */ @@ -750,8 +748,17 @@ static int omap_sham_align_sgs(struct scatterlist *sg, int offset = rctx->offset; int bufcnt = rctx->bufcnt; - if (!sg || !sg->length || !nbytes) + if (!sg || !sg->length || !nbytes) { + if (bufcnt) { + bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs; + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, bufcnt); + rctx->sg = rctx->sgl; + rctx->sg_len = 1; + } + return 0; + } new_len = nbytes; @@ -895,7 +902,7 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update) if (hash_later < 0) hash_later = 0; - if (hash_later) { + if (hash_later && hash_later <= rctx->buflen) { scatterwalk_map_and_copy(rctx->buffer, req->src, req->nbytes - hash_later, @@ -925,27 +932,35 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) return 0; } +struct omap_sham_dev *omap_sham_find_dev(struct omap_sham_reqctx *ctx) +{ + struct omap_sham_dev *dd; + + if (ctx->dd) + return ctx->dd; + + spin_lock_bh(&sham.lock); + dd = list_first_entry(&sham.dev_list, struct omap_sham_dev, list); + list_move_tail(&dd->list, &sham.dev_list); + ctx->dd = dd; + spin_unlock_bh(&sham.lock); + + return dd; +} + static int omap_sham_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm); struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - struct omap_sham_dev *dd = NULL, *tmp; + struct omap_sham_dev *dd; int bs = 0; - spin_lock_bh(&sham.lock); - if (!tctx->dd) { - list_for_each_entry(tmp, &sham.dev_list, list) { - dd = tmp; - break; - } - tctx->dd = dd; - } else { - dd = tctx->dd; - } - spin_unlock_bh(&sham.lock); + ctx->dd = NULL; - ctx->dd = dd; + dd = omap_sham_find_dev(ctx); + if (!dd) + return -ENODEV; ctx->flags = 0; @@ -1215,8 +1230,7 @@ err1: static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - struct omap_sham_dev *dd = tctx->dd; + struct omap_sham_dev *dd = ctx->dd; ctx->op = op; @@ -1226,7 +1240,7 @@ static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) static int omap_sham_update(struct ahash_request *req) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - struct omap_sham_dev *dd = ctx->dd; + struct omap_sham_dev *dd = omap_sham_find_dev(ctx); if (!req->nbytes) return 0; @@ -1319,21 +1333,8 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, struct omap_sham_hmac_ctx *bctx = tctx->base; int bs = crypto_shash_blocksize(bctx->shash); int ds = crypto_shash_digestsize(bctx->shash); - struct omap_sham_dev *dd = NULL, *tmp; int err, i; - spin_lock_bh(&sham.lock); - if (!tctx->dd) { - list_for_each_entry(tmp, &sham.dev_list, list) { - dd = tmp; - break; - } - tctx->dd = dd; - } else { - dd = tctx->dd; - } - spin_unlock_bh(&sham.lock); - err = crypto_shash_setkey(tctx->fallback, key, keylen); if (err) return err; @@ -1350,7 +1351,7 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, memset(bctx->ipad + keylen, 0, bs - keylen); - if (!test_bit(FLAGS_AUTO_XOR, &dd->flags)) { + if (!test_bit(FLAGS_AUTO_XOR, &sham.flags)) { memcpy(bctx->opad, bctx->ipad, bs); for (i = 0; i < bs; i++) { @@ -1571,7 +1572,8 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), @@ -1592,7 +1594,8 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), @@ -1614,7 +1617,8 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + @@ -1637,7 +1641,8 @@ static struct ahash_alg algs_sha224_sha256[] = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + @@ -1662,7 +1667,8 @@ static struct ahash_alg algs_sha384_sha512[] = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), @@ -1683,7 +1689,8 @@ static struct ahash_alg algs_sha384_sha512[] = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), @@ -1705,7 +1712,8 @@ static struct ahash_alg algs_sha384_sha512[] = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + @@ -1728,7 +1736,8 @@ static struct ahash_alg algs_sha384_sha512[] = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC | + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + @@ -2154,6 +2163,7 @@ static int omap_sham_probe(struct platform_device *pdev) } dd->flags |= dd->pdata->flags; + sham.flags |= dd->pdata->flags; pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); @@ -2181,6 +2191,9 @@ static int omap_sham_probe(struct platform_device *pdev) spin_unlock(&sham.lock); for (i = 0; i < dd->pdata->algs_info_size; i++) { + if (dd->pdata->algs_info[i].registered) + break; + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { struct ahash_alg *alg; @@ -2232,9 +2245,11 @@ static int omap_sham_remove(struct platform_device *pdev) list_del(&dd->list); spin_unlock(&sham.lock); for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) - for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) { crypto_unregister_ahash( &dd->pdata->algs_info[i].algs_list[j]); + dd->pdata->algs_info[i].registered--; + } tasklet_kill(&dd->done_task); pm_runtime_disable(&pdev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 53b4126373a5..ffe149aafc39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,6 +27,7 @@ #include <linux/types.h> #include <linux/mm.h> +#include <linux/kthread.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -195,10 +196,10 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s pagefault_disable(); \ if ((mmptr) == current->mm) { \ valid = !get_user((dst), (wptr)); \ - } else if (current->mm == NULL) { \ - use_mm(mmptr); \ + } else if (current->flags & PF_KTHREAD) { \ + kthread_use_mm(mmptr); \ valid = !get_user((dst), (wptr)); \ - unuse_mm(mmptr); \ + kthread_unuse_mm(mmptr); \ } \ pagefault_enable(); \ } \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 6529caca88fe..35d4a5ab0228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -22,7 +22,6 @@ #include <linux/module.h> #include <linux/fdtable.h> #include <linux/uaccess.h> -#include <linux/mmu_context.h> #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 691c89705bcd..bf927f432506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -19,7 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index c6944739183a..744366c7ee85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "cikd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 2f4bdc80a6b2..feab4cc6e836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gfx_v8_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index df841c2ac5e7..c7fd0c47b254 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -19,8 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include <linux/mmu_context.h> - #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "gc/gc_9_0_offset.h" diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 7d39b858c9f1..3a3a511670c9 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -226,6 +226,7 @@ static void ast_set_vbios_color_reg(struct ast_private *ast, case 3: case 4: color_index = TrueCModeIndex; + break; default: return; } diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b1099e1251a2..d877ddc6dc57 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -27,6 +27,7 @@ #include <drm/drm_print.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> +#include <drm/drm_sysfs.h> #include <linux/uaccess.h> @@ -523,6 +524,10 @@ int drm_connector_register(struct drm_connector *connector) drm_mode_object_register(connector->dev, &connector->base); connector->registration_state = DRM_CONNECTOR_REGISTERED; + + /* Let userspace know we have a new connector */ + drm_sysfs_hotplug_event(connector->dev); + goto unlock; err_debugfs: diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 939f0032aab1..f0336c804639 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -291,9 +291,6 @@ int drm_sysfs_connector_add(struct drm_connector *connector) return PTR_ERR(connector->kdev); } - /* Let userspace know we have a new connector */ - drm_sysfs_hotplug_event(dev); - if (connector->ddc) return sysfs_create_link(&connector->kdev->kobj, &connector->ddc->dev.kobj, "ddc"); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 40d42dcff0b7..ed9e53c373a7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5206,6 +5206,9 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, struct intel_crtc_state *crtc_state, unsigned int type) { + if (encoder->type != INTEL_OUTPUT_DDI) + return; + switch (type) { case DP_SDP_VSC: intel_read_dp_vsc_sdp(encoder, crtc_state, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3ce185670ca4..db8eb1c6afe9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1988,6 +1988,38 @@ static const struct dma_fence_work_ops eb_parse_ops = { .release = __eb_parse_release, }; +static inline int +__parser_mark_active(struct i915_vma *vma, + struct intel_timeline *tl, + struct dma_fence *fence) +{ + struct intel_gt_buffer_pool_node *node = vma->private; + + return i915_active_ref(&node->active, tl, fence); +} + +static int +parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) +{ + int err; + + mutex_lock(&tl->mutex); + + err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); + if (err) + goto unlock; + + if (pw->trampoline) { + err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); + if (err) + goto unlock; + } + +unlock: + mutex_unlock(&tl->mutex); + return err; +} + static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *shadow, struct i915_vma *trampoline) @@ -2022,20 +2054,25 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, pw->shadow = shadow; pw->trampoline = trampoline; + /* Mark active refs early for this worker, in case we get interrupted */ + err = parser_mark_active(pw, eb->context->timeline); + if (err) + goto err_commit; + err = dma_resv_lock_interruptible(pw->batch->resv, NULL); if (err) - goto err_trampoline; + goto err_commit; err = dma_resv_reserve_shared(pw->batch->resv, 1); if (err) - goto err_batch_unlock; + goto err_commit_unlock; /* Wait for all writes (and relocs) into the batch to complete */ err = i915_sw_fence_await_reservation(&pw->base.chain, pw->batch->resv, NULL, false, 0, I915_FENCE_GFP); if (err < 0) - goto err_batch_unlock; + goto err_commit_unlock; /* Keep the batch alive and unwritten as we parse */ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); @@ -2050,11 +2087,13 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, dma_fence_work_commit_imm(&pw->base); return 0; -err_batch_unlock: +err_commit_unlock: dma_resv_unlock(pw->batch->resv); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); +err_commit: + i915_sw_fence_set_error_once(&pw->base.chain, err); + dma_fence_work_commit_imm(&pw->base); + return err; + err_shadow: i915_active_release(&shadow->active); err_batch: @@ -2100,6 +2139,7 @@ static int eb_parse(struct i915_execbuffer *eb) goto err; } i915_gem_object_set_readonly(shadow->obj); + shadow->private = pool; trampoline = NULL; if (CMDPARSER_USES_GGTT(eb->i915)) { @@ -2113,6 +2153,7 @@ static int eb_parse(struct i915_execbuffer *eb) shadow = trampoline; goto err_shadow; } + shadow->private = pool; eb->batch_flags |= I915_DISPATCH_SECURE; } @@ -2129,7 +2170,6 @@ static int eb_parse(struct i915_execbuffer *eb) eb->trampoline = trampoline; eb->batch_start_offset = 0; - shadow->private = pool; return 0; err_trampoline: diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index eee530453aa6..ad8a9df49f29 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -31,7 +31,7 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/sched/mm.h> #include <linux/types.h> #include <linux/list.h> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index a3dde770226d..02559da61e6e 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -65,7 +65,7 @@ i915_param_named_unsafe(vbt_sdvo_panel_type, int, 0400, "Override/Ignore selection of SDVO panel mode in the VBT " "(-2=ignore, -1=auto [default], index in VBT BIOS table)"); -i915_param_named_unsafe(reset, int, 0600, +i915_param_named_unsafe(reset, uint, 0600, "Attempt GPU resets (0=disabled, 1=full gpu reset, 2=engine reset [default])"); i915_param_named_unsafe(vbt_firmware, charp, 0400, diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi.h b/drivers/gpu/drm/sun4i/sun4i_hdmi.h index 7ad3f06c127e..00ca35f07ba5 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi.h @@ -148,7 +148,7 @@ #define SUN4I_HDMI_DDC_CMD_IMPLICIT_WRITE 3 #define SUN4I_HDMI_DDC_CLK_REG 0x528 -#define SUN4I_HDMI_DDC_CLK_M(m) (((m) & 0x7) << 3) +#define SUN4I_HDMI_DDC_CLK_M(m) (((m) & 0xf) << 3) #define SUN4I_HDMI_DDC_CLK_N(n) ((n) & 0x7) #define SUN4I_HDMI_DDC_LINE_CTRL_REG 0x540 diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c index 2ff780114106..12430b9d4e93 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_ddc_clk.c @@ -33,7 +33,7 @@ static unsigned long sun4i_ddc_calc_divider(unsigned long rate, unsigned long best_rate = 0; u8 best_m = 0, best_n = 0, _m, _n; - for (_m = 0; _m < 8; _m++) { + for (_m = 0; _m < 16; _m++) { for (_n = 0; _n < 8; _n++) { unsigned long tmp_rate; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 66b9a68f5e9f..29fead208cad 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -493,6 +493,19 @@ config TI_SCI_INTA_IRQCHIP If you wish to use interrupt aggregator irq resources managed by the TI System Controller, say Y here. Otherwise, say N. +config RISCV_INTC + bool "RISC-V Local Interrupt Controller" + depends on RISCV + default y + help + This enables support for the per-HART local interrupt controller + found in standard RISC-V systems. The per-HART local interrupt + controller handles timer interrupts, software interrupts, and + hardware interrupts. Without a per-HART local interrupt controller, + a RISC-V system will be unable to handle any interrupts. + + If you don't know what to do here, say Y. + config SIFIVE_PLIC bool "SiFive Platform-Level Interrupt Controller" depends on RISCV diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 3a4ce283189a..133f9c45744a 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -98,6 +98,7 @@ obj-$(CONFIG_NDS32) += irq-ativic32.o obj-$(CONFIG_QCOM_PDC) += qcom-pdc.o obj-$(CONFIG_CSKY_MPINTC) += irq-csky-mpintc.o obj-$(CONFIG_CSKY_APB_INTC) += irq-csky-apb-intc.o +obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o obj-$(CONFIG_IMX_INTMUX) += irq-imx-intmux.o diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c new file mode 100644 index 000000000000..a6f97fa6ff69 --- /dev/null +++ b/drivers/irqchip/irq-riscv-intc.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017-2018 SiFive + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#define pr_fmt(fmt) "riscv-intc: " fmt +#include <linux/atomic.h> +#include <linux/bits.h> +#include <linux/cpu.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/smp.h> + +static struct irq_domain *intc_domain; + +static asmlinkage void riscv_intc_irq(struct pt_regs *regs) +{ + unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + + if (unlikely(cause >= BITS_PER_LONG)) + panic("unexpected interrupt cause"); + + switch (cause) { +#ifdef CONFIG_SMP + case RV_IRQ_SOFT: + /* + * We only use software interrupts to pass IPIs, so if a + * non-SMP system gets one, then we don't know what to do. + */ + handle_IPI(regs); + break; +#endif + default: + handle_domain_irq(intc_domain, cause, regs); + break; + } +} + +/* + * On RISC-V systems local interrupts are masked or unmasked by writing + * the SIE (Supervisor Interrupt Enable) CSR. As CSRs can only be written + * on the local hart, these functions can only be called on the hart that + * corresponds to the IRQ chip. + */ + +static void riscv_intc_irq_mask(struct irq_data *d) +{ + csr_clear(CSR_IE, BIT(d->hwirq)); +} + +static void riscv_intc_irq_unmask(struct irq_data *d) +{ + csr_set(CSR_IE, BIT(d->hwirq)); +} + +static int riscv_intc_cpu_starting(unsigned int cpu) +{ + csr_set(CSR_IE, BIT(RV_IRQ_SOFT)); + return 0; +} + +static int riscv_intc_cpu_dying(unsigned int cpu) +{ + csr_clear(CSR_IE, BIT(RV_IRQ_SOFT)); + return 0; +} + +static struct irq_chip riscv_intc_chip = { + .name = "RISC-V INTC", + .irq_mask = riscv_intc_irq_mask, + .irq_unmask = riscv_intc_irq_unmask, +}; + +static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_percpu_devid(irq); + irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data, + handle_percpu_devid_irq, NULL, NULL); + + return 0; +} + +static const struct irq_domain_ops riscv_intc_domain_ops = { + .map = riscv_intc_domain_map, + .xlate = irq_domain_xlate_onecell, +}; + +static int __init riscv_intc_init(struct device_node *node, + struct device_node *parent) +{ + int rc, hartid; + + hartid = riscv_of_parent_hartid(node); + if (hartid < 0) { + pr_warn("unable to fine hart id for %pOF\n", node); + return 0; + } + + /* + * The DT will have one INTC DT node under each CPU (or HART) + * DT node so riscv_intc_init() function will be called once + * for each INTC DT node. We only need to do INTC initialization + * for the INTC DT node belonging to boot CPU (or boot HART). + */ + if (riscv_hartid_to_cpuid(hartid) != smp_processor_id()) + return 0; + + intc_domain = irq_domain_add_linear(node, BITS_PER_LONG, + &riscv_intc_domain_ops, NULL); + if (!intc_domain) { + pr_err("unable to add IRQ domain\n"); + return -ENXIO; + } + + rc = set_handle_irq(&riscv_intc_irq); + if (rc) { + pr_err("failed to set irq handler\n"); + return rc; + } + + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_STARTING, + "irqchip/riscv/intc:starting", + riscv_intc_cpu_starting, + riscv_intc_cpu_dying); + + pr_info("%d local interrupts mapped\n", BITS_PER_LONG); + + return 0; +} + +IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init); diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index d9c53f85a68e..eaa3e9fe54e9 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -9,6 +9,7 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/irqchip.h> +#include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/of.h> @@ -76,6 +77,7 @@ struct plic_handler { void __iomem *enable_base; struct plic_priv *priv; }; +static int plic_parent_irq; static bool plic_cpuhp_setup_done; static DEFINE_PER_CPU(struct plic_handler, plic_handlers); @@ -219,15 +221,17 @@ static const struct irq_domain_ops plic_irqdomain_ops = { * that source ID back to the same claim register. This automatically enables * and disables the interrupt, so there's nothing else to do. */ -static void plic_handle_irq(struct pt_regs *regs) +static void plic_handle_irq(struct irq_desc *desc) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + struct irq_chip *chip = irq_desc_get_chip(desc); void __iomem *claim = handler->hart_base + CONTEXT_CLAIM; irq_hw_number_t hwirq; WARN_ON_ONCE(!handler->present); - csr_clear(CSR_IE, IE_EIE); + chained_irq_enter(chip, desc); + while ((hwirq = readl(claim))) { int irq = irq_find_mapping(handler->priv->irqdomain, hwirq); @@ -237,21 +241,8 @@ static void plic_handle_irq(struct pt_regs *regs) else generic_handle_irq(irq); } - csr_set(CSR_IE, IE_EIE); -} - -/* - * Walk up the DT tree until we find an active RISC-V core (HART) node and - * extract the cpuid from it. - */ -static int plic_find_hart_id(struct device_node *node) -{ - for (; node; node = node->parent) { - if (of_device_is_compatible(node, "riscv")) - return riscv_of_processor_hartid(node); - } - return -1; + chained_irq_exit(chip, desc); } static void plic_set_threshold(struct plic_handler *handler, u32 threshold) @@ -262,10 +253,8 @@ static void plic_set_threshold(struct plic_handler *handler, u32 threshold) static int plic_dying_cpu(unsigned int cpu) { - struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - - csr_clear(CSR_IE, IE_EIE); - plic_set_threshold(handler, PLIC_DISABLE_THRESHOLD); + if (plic_parent_irq) + disable_percpu_irq(plic_parent_irq); return 0; } @@ -274,7 +263,11 @@ static int plic_starting_cpu(unsigned int cpu) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - csr_set(CSR_IE, IE_EIE); + if (plic_parent_irq) + enable_percpu_irq(plic_parent_irq, + irq_get_trigger_type(plic_parent_irq)); + else + pr_warn("cpu%d: parent irq not available\n", cpu); plic_set_threshold(handler, PLIC_ENABLE_THRESHOLD); return 0; @@ -330,7 +323,7 @@ static int __init plic_init(struct device_node *node, if (parent.args[0] != RV_IRQ_EXT) continue; - hartid = plic_find_hart_id(parent.np); + hartid = riscv_of_parent_hartid(parent.np); if (hartid < 0) { pr_warn("failed to parse hart ID for context %d.\n", i); continue; @@ -342,6 +335,14 @@ static int __init plic_init(struct device_node *node, continue; } + /* Find parent domain and register chained handler */ + if (!plic_parent_irq && irq_find_host(parent.np)) { + plic_parent_irq = irq_of_parse_and_map(node, i); + if (plic_parent_irq) + irq_set_chained_handler(plic_parent_irq, + plic_handle_irq); + } + /* * When running in M-mode we need to ignore the S-mode handler. * Here we assume it always comes later, but that might be a @@ -382,7 +383,6 @@ done: pr_info("%pOFP: mapped %d interrupts with %d handlers for" " %d contexts.\n", node, nr_irqs, nr_handlers, nr_contexts); - set_handle_irq(plic_handle_irq); return 0; out_iounmap: diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 5a577a6734cf..05b1009e2820 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -236,4 +236,22 @@ config SUN6I_MSGBOX various Allwinner SoCs. This mailbox is used for communication between the application CPUs and the power management coprocessor. +config SPRD_MBOX + tristate "Spreadtrum Mailbox" + depends on ARCH_SPRD || COMPILE_TEST + help + Mailbox driver implementation for the Spreadtrum platform. It is used + to send message between application processors and MCU. Say Y here if + you want to build the Spreatrum mailbox controller driver. + +config QCOM_IPCC + bool "Qualcomm Technologies, Inc. IPCC driver" + depends on ARCH_QCOM || COMPILE_TEST + help + Qualcomm Technologies, Inc. Inter-Processor Communication Controller + (IPCC) driver for MSM devices. The driver provides mailbox support for + sending interrupts to the clients. On the other hand, the driver also + acts as an interrupt controller for receiving interrupts from clients. + Say Y here if you want to build this driver. + endif diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile index 2e4364ef5c47..60d224b723a1 100644 --- a/drivers/mailbox/Makefile +++ b/drivers/mailbox/Makefile @@ -50,3 +50,7 @@ obj-$(CONFIG_MTK_CMDQ_MBOX) += mtk-cmdq-mailbox.o obj-$(CONFIG_ZYNQMP_IPI_MBOX) += zynqmp-ipi-mailbox.o obj-$(CONFIG_SUN6I_MSGBOX) += sun6i-msgbox.o + +obj-$(CONFIG_SPRD_MBOX) += sprd-mailbox.o + +obj-$(CONFIG_QCOM_IPCC) += qcom-ipcc.o diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 7906624a731c..7205b825c8b5 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -12,6 +12,7 @@ #include <linux/mailbox_controller.h> #include <linux/module.h> #include <linux/of_device.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> #define IMX_MU_xSR_GIPn(x) BIT(28 + (3 - (x))) @@ -66,6 +67,8 @@ struct imx_mu_priv { struct clk *clk; int irq; + u32 xcr; + bool side_b; }; @@ -154,12 +157,17 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv, switch (cp->type) { case IMX_MU_TYPE_TX: - if (msg->hdr.size > sizeof(*msg)) { + /* + * msg->hdr.size specifies the number of u32 words while + * sizeof yields bytes. + */ + + if (msg->hdr.size > sizeof(*msg) / 4) { /* * The real message size can be different to * struct imx_sc_rpc_msg_max size */ - dev_err(priv->dev, "Exceed max msg size (%zu) on TX, got: %i\n", sizeof(*msg), msg->hdr.size); + dev_err(priv->dev, "Maximal message size (%zu bytes) exceeded on TX; got: %i bytes\n", sizeof(*msg), msg->hdr.size << 2); return -EINVAL; } @@ -198,9 +206,8 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv, imx_mu_xcr_rmw(priv, 0, IMX_MU_xCR_RIEn(0)); *data++ = imx_mu_read(priv, priv->dcfg->xRR[0]); - if (msg.hdr.size > sizeof(msg)) { - dev_err(priv->dev, "Exceed max msg size (%zu) on RX, got: %i\n", - sizeof(msg), msg.hdr.size); + if (msg.hdr.size > sizeof(msg) / 4) { + dev_err(priv->dev, "Maximal message size (%zu bytes) exceeded on RX; got: %i bytes\n", sizeof(msg), msg.hdr.size << 2); return -EINVAL; } @@ -285,8 +292,10 @@ static int imx_mu_startup(struct mbox_chan *chan) { struct imx_mu_priv *priv = to_imx_mu_priv(chan->mbox); struct imx_mu_con_priv *cp = chan->con_priv; + unsigned long irq_flag = IRQF_SHARED; int ret; + pm_runtime_get_sync(priv->dev); if (cp->type == IMX_MU_TYPE_TXDB) { /* Tx doorbell don't have ACK support */ tasklet_init(&cp->txdb_tasklet, imx_mu_txdb_tasklet, @@ -294,8 +303,12 @@ static int imx_mu_startup(struct mbox_chan *chan) return 0; } - ret = request_irq(priv->irq, imx_mu_isr, IRQF_SHARED | - IRQF_NO_SUSPEND, cp->irq_desc, chan); + /* IPC MU should be with IRQF_NO_SUSPEND set */ + if (!priv->dev->pm_domain) + irq_flag |= IRQF_NO_SUSPEND; + + ret = request_irq(priv->irq, imx_mu_isr, irq_flag, + cp->irq_desc, chan); if (ret) { dev_err(priv->dev, "Unable to acquire IRQ %d\n", priv->irq); @@ -323,6 +336,7 @@ static void imx_mu_shutdown(struct mbox_chan *chan) if (cp->type == IMX_MU_TYPE_TXDB) { tasklet_kill(&cp->txdb_tasklet); + pm_runtime_put_sync(priv->dev); return; } @@ -341,6 +355,7 @@ static void imx_mu_shutdown(struct mbox_chan *chan) } free_irq(priv->irq, chan); + pm_runtime_put_sync(priv->dev); } static const struct mbox_chan_ops imx_mu_ops = { @@ -374,7 +389,7 @@ static struct mbox_chan *imx_mu_scu_xlate(struct mbox_controller *mbox, break; default: dev_err(mbox->dev, "Invalid chan type: %d\n", type); - return NULL; + return ERR_PTR(-EINVAL); } if (chan >= mbox->num_chans) { @@ -508,14 +523,39 @@ static int imx_mu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - return devm_mbox_controller_register(dev, &priv->mbox); + ret = devm_mbox_controller_register(dev, &priv->mbox); + if (ret) { + clk_disable_unprepare(priv->clk); + return ret; + } + + pm_runtime_enable(dev); + + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + goto disable_runtime_pm; + } + + ret = pm_runtime_put_sync(dev); + if (ret < 0) + goto disable_runtime_pm; + + clk_disable_unprepare(priv->clk); + + return 0; + +disable_runtime_pm: + pm_runtime_disable(dev); + clk_disable_unprepare(priv->clk); + return ret; } static int imx_mu_remove(struct platform_device *pdev) { struct imx_mu_priv *priv = platform_get_drvdata(pdev); - clk_disable_unprepare(priv->clk); + pm_runtime_disable(priv->dev); return 0; } @@ -558,12 +598,69 @@ static const struct of_device_id imx_mu_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, imx_mu_dt_ids); +static int imx_mu_suspend_noirq(struct device *dev) +{ + struct imx_mu_priv *priv = dev_get_drvdata(dev); + + if (!priv->clk) + priv->xcr = imx_mu_read(priv, priv->dcfg->xCR); + + return 0; +} + +static int imx_mu_resume_noirq(struct device *dev) +{ + struct imx_mu_priv *priv = dev_get_drvdata(dev); + + /* + * ONLY restore MU when context lost, the TIE could + * be set during noirq resume as there is MU data + * communication going on, and restore the saved + * value will overwrite the TIE and cause MU data + * send failed, may lead to system freeze. This issue + * is observed by testing freeze mode suspend. + */ + if (!imx_mu_read(priv, priv->dcfg->xCR) && !priv->clk) + imx_mu_write(priv, priv->xcr, priv->dcfg->xCR); + + return 0; +} + +static int imx_mu_runtime_suspend(struct device *dev) +{ + struct imx_mu_priv *priv = dev_get_drvdata(dev); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static int imx_mu_runtime_resume(struct device *dev) +{ + struct imx_mu_priv *priv = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(priv->clk); + if (ret) + dev_err(dev, "failed to enable clock\n"); + + return ret; +} + +static const struct dev_pm_ops imx_mu_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_mu_suspend_noirq, + imx_mu_resume_noirq) + SET_RUNTIME_PM_OPS(imx_mu_runtime_suspend, + imx_mu_runtime_resume, NULL) +}; + static struct platform_driver imx_mu_driver = { .probe = imx_mu_probe, .remove = imx_mu_remove, .driver = { .name = "imx_mu", .of_match_table = imx_mu_dt_ids, + .pm = &imx_mu_pm_ops, }, }; module_platform_driver(imx_mu_driver); diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 34844b7a3675..8c7fac38bb1c 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -568,7 +568,7 @@ static int pcc_mbox_probe(struct platform_device *pdev) return ret; } -struct platform_driver pcc_mbox_driver = { +static struct platform_driver pcc_mbox_driver = { .probe = pcc_mbox_probe, .driver = { .name = "PCCT", diff --git a/drivers/mailbox/qcom-apcs-ipc-mailbox.c b/drivers/mailbox/qcom-apcs-ipc-mailbox.c index eeebafd546e5..cec34f0af6ce 100644 --- a/drivers/mailbox/qcom-apcs-ipc-mailbox.c +++ b/drivers/mailbox/qcom-apcs-ipc-mailbox.c @@ -24,6 +24,35 @@ struct qcom_apcs_ipc { struct platform_device *clk; }; +struct qcom_apcs_ipc_data { + int offset; + char *clk_name; +}; + +static const struct qcom_apcs_ipc_data ipq6018_apcs_data = { + .offset = 8, .clk_name = "qcom,apss-ipq6018-clk" +}; + +static const struct qcom_apcs_ipc_data ipq8074_apcs_data = { + .offset = 8, .clk_name = NULL +}; + +static const struct qcom_apcs_ipc_data msm8916_apcs_data = { + .offset = 8, .clk_name = "qcom-apcs-msm8916-clk" +}; + +static const struct qcom_apcs_ipc_data msm8996_apcs_data = { + .offset = 16, .clk_name = NULL +}; + +static const struct qcom_apcs_ipc_data msm8998_apcs_data = { + .offset = 8, .clk_name = NULL +}; + +static const struct qcom_apcs_ipc_data apps_shared_apcs_data = { + .offset = 12, .clk_name = NULL +}; + static const struct regmap_config apcs_regmap_config = { .reg_bits = 32, .reg_stride = 4, @@ -48,17 +77,12 @@ static const struct mbox_chan_ops qcom_apcs_ipc_ops = { static int qcom_apcs_ipc_probe(struct platform_device *pdev) { struct qcom_apcs_ipc *apcs; + const struct qcom_apcs_ipc_data *apcs_data; struct regmap *regmap; struct resource *res; - unsigned long offset; void __iomem *base; unsigned long i; int ret; - const struct of_device_id apcs_clk_match_table[] = { - { .compatible = "qcom,msm8916-apcs-kpss-global", }, - { .compatible = "qcom,qcs404-apcs-apps-global", }, - {} - }; apcs = devm_kzalloc(&pdev->dev, sizeof(*apcs), GFP_KERNEL); if (!apcs) @@ -73,10 +97,10 @@ static int qcom_apcs_ipc_probe(struct platform_device *pdev) if (IS_ERR(regmap)) return PTR_ERR(regmap); - offset = (unsigned long)of_device_get_match_data(&pdev->dev); + apcs_data = of_device_get_match_data(&pdev->dev); apcs->regmap = regmap; - apcs->offset = offset; + apcs->offset = apcs_data->offset; /* Initialize channel identifiers */ for (i = 0; i < ARRAY_SIZE(apcs->mbox_chans); i++) @@ -93,9 +117,9 @@ static int qcom_apcs_ipc_probe(struct platform_device *pdev) return ret; } - if (of_match_device(apcs_clk_match_table, &pdev->dev)) { + if (apcs_data->clk_name) { apcs->clk = platform_device_register_data(&pdev->dev, - "qcom-apcs-msm8916-clk", + apcs_data->clk_name, PLATFORM_DEVID_NONE, NULL, 0); if (IS_ERR(apcs->clk)) @@ -119,14 +143,15 @@ static int qcom_apcs_ipc_remove(struct platform_device *pdev) /* .data is the offset of the ipc register within the global block */ static const struct of_device_id qcom_apcs_ipc_of_match[] = { - { .compatible = "qcom,msm8916-apcs-kpss-global", .data = (void *)8 }, - { .compatible = "qcom,msm8996-apcs-hmss-global", .data = (void *)16 }, - { .compatible = "qcom,msm8998-apcs-hmss-global", .data = (void *)8 }, - { .compatible = "qcom,qcs404-apcs-apps-global", .data = (void *)8 }, - { .compatible = "qcom,sc7180-apss-shared", .data = (void *)12 }, - { .compatible = "qcom,sdm845-apss-shared", .data = (void *)12 }, - { .compatible = "qcom,sm8150-apss-shared", .data = (void *)12 }, - { .compatible = "qcom,ipq8074-apcs-apps-global", .data = (void *)8 }, + { .compatible = "qcom,ipq6018-apcs-apps-global", .data = &ipq6018_apcs_data }, + { .compatible = "qcom,ipq8074-apcs-apps-global", .data = &ipq8074_apcs_data }, + { .compatible = "qcom,msm8916-apcs-kpss-global", .data = &msm8916_apcs_data }, + { .compatible = "qcom,msm8996-apcs-hmss-global", .data = &msm8996_apcs_data }, + { .compatible = "qcom,msm8998-apcs-hmss-global", .data = &msm8998_apcs_data }, + { .compatible = "qcom,qcs404-apcs-apps-global", .data = &msm8916_apcs_data }, + { .compatible = "qcom,sc7180-apss-shared", .data = &apps_shared_apcs_data }, + { .compatible = "qcom,sdm845-apss-shared", .data = &apps_shared_apcs_data }, + { .compatible = "qcom,sm8150-apss-shared", .data = &apps_shared_apcs_data }, {} }; MODULE_DEVICE_TABLE(of, qcom_apcs_ipc_of_match); diff --git a/drivers/mailbox/qcom-ipcc.c b/drivers/mailbox/qcom-ipcc.c new file mode 100644 index 000000000000..2d13c72944c6 --- /dev/null +++ b/drivers/mailbox/qcom-ipcc.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + */ + +#include <linux/bitfield.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/mailbox_controller.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include <dt-bindings/mailbox/qcom-ipcc.h> + +#define IPCC_MBOX_MAX_CHAN 48 + +/* IPCC Register offsets */ +#define IPCC_REG_SEND_ID 0x0c +#define IPCC_REG_RECV_ID 0x10 +#define IPCC_REG_RECV_SIGNAL_ENABLE 0x14 +#define IPCC_REG_RECV_SIGNAL_DISABLE 0x18 +#define IPCC_REG_RECV_SIGNAL_CLEAR 0x1c +#define IPCC_REG_CLIENT_CLEAR 0x38 + +#define IPCC_SIGNAL_ID_MASK GENMASK(15, 0) +#define IPCC_CLIENT_ID_MASK GENMASK(31, 16) + +#define IPCC_NO_PENDING_IRQ GENMASK(31, 0) + +/** + * struct qcom_ipcc_chan_info - Per-mailbox-channel info + * @client_id: The client-id to which the interrupt has to be triggered + * @signal_id: The signal-id to which the interrupt has to be triggered + */ +struct qcom_ipcc_chan_info { + u16 client_id; + u16 signal_id; +}; + +/** + * struct qcom_ipcc - Holder for the mailbox driver + * @dev: Device associated with this instance + * @base: Base address of the IPCC frame associated to APSS + * @irq_domain: The irq_domain associated with this instance + * @chan: The mailbox channels array + * @mchan: The per-mailbox channel info array + * @mbox: The mailbox controller + * @irq: Summary irq + */ +struct qcom_ipcc { + struct device *dev; + void __iomem *base; + struct irq_domain *irq_domain; + struct mbox_chan chan[IPCC_MBOX_MAX_CHAN]; + struct qcom_ipcc_chan_info mchan[IPCC_MBOX_MAX_CHAN]; + struct mbox_controller mbox; + int irq; +}; + +static inline struct qcom_ipcc *to_qcom_ipcc(struct mbox_controller *mbox) +{ + return container_of(mbox, struct qcom_ipcc, mbox); +} + +static inline u32 qcom_ipcc_get_hwirq(u16 client_id, u16 signal_id) +{ + return FIELD_PREP(IPCC_CLIENT_ID_MASK, client_id) | + FIELD_PREP(IPCC_SIGNAL_ID_MASK, signal_id); +} + +static irqreturn_t qcom_ipcc_irq_fn(int irq, void *data) +{ + struct qcom_ipcc *ipcc = data; + u32 hwirq; + int virq; + + for (;;) { + hwirq = readl(ipcc->base + IPCC_REG_RECV_ID); + if (hwirq == IPCC_NO_PENDING_IRQ) + break; + + virq = irq_find_mapping(ipcc->irq_domain, hwirq); + writel(hwirq, ipcc->base + IPCC_REG_RECV_SIGNAL_CLEAR); + generic_handle_irq(virq); + } + + return IRQ_HANDLED; +} + +static void qcom_ipcc_mask_irq(struct irq_data *irqd) +{ + struct qcom_ipcc *ipcc = irq_data_get_irq_chip_data(irqd); + irq_hw_number_t hwirq = irqd_to_hwirq(irqd); + + writel(hwirq, ipcc->base + IPCC_REG_RECV_SIGNAL_DISABLE); +} + +static void qcom_ipcc_unmask_irq(struct irq_data *irqd) +{ + struct qcom_ipcc *ipcc = irq_data_get_irq_chip_data(irqd); + irq_hw_number_t hwirq = irqd_to_hwirq(irqd); + + writel(hwirq, ipcc->base + IPCC_REG_RECV_SIGNAL_ENABLE); +} + +static struct irq_chip qcom_ipcc_irq_chip = { + .name = "ipcc", + .irq_mask = qcom_ipcc_mask_irq, + .irq_unmask = qcom_ipcc_unmask_irq, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static int qcom_ipcc_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + struct qcom_ipcc *ipcc = d->host_data; + + irq_set_chip_and_handler(irq, &qcom_ipcc_irq_chip, handle_level_irq); + irq_set_chip_data(irq, ipcc); + irq_set_noprobe(irq); + + return 0; +} + +static int qcom_ipcc_domain_xlate(struct irq_domain *d, + struct device_node *node, const u32 *intspec, + unsigned int intsize, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (intsize != 3) + return -EINVAL; + + *out_hwirq = qcom_ipcc_get_hwirq(intspec[0], intspec[1]); + *out_type = intspec[2] & IRQ_TYPE_SENSE_MASK; + + return 0; +} + +static const struct irq_domain_ops qcom_ipcc_irq_ops = { + .map = qcom_ipcc_domain_map, + .xlate = qcom_ipcc_domain_xlate, +}; + +static int qcom_ipcc_mbox_send_data(struct mbox_chan *chan, void *data) +{ + struct qcom_ipcc *ipcc = to_qcom_ipcc(chan->mbox); + struct qcom_ipcc_chan_info *mchan = chan->con_priv; + u32 hwirq; + + hwirq = qcom_ipcc_get_hwirq(mchan->client_id, mchan->signal_id); + writel(hwirq, ipcc->base + IPCC_REG_SEND_ID); + + return 0; +} + +static struct mbox_chan *qcom_ipcc_mbox_xlate(struct mbox_controller *mbox, + const struct of_phandle_args *ph) +{ + struct qcom_ipcc *ipcc = to_qcom_ipcc(mbox); + struct qcom_ipcc_chan_info *mchan; + struct mbox_chan *chan; + unsigned int i; + + if (ph->args_count != 2) + return ERR_PTR(-EINVAL); + + for (i = 0; i < IPCC_MBOX_MAX_CHAN; i++) { + chan = &ipcc->chan[i]; + if (!chan->con_priv) { + mchan = &ipcc->mchan[i]; + mchan->client_id = ph->args[0]; + mchan->signal_id = ph->args[1]; + chan->con_priv = mchan; + break; + } + + chan = NULL; + } + + return chan ?: ERR_PTR(-EBUSY); +} + +static const struct mbox_chan_ops ipcc_mbox_chan_ops = { + .send_data = qcom_ipcc_mbox_send_data, +}; + +static int qcom_ipcc_setup_mbox(struct qcom_ipcc *ipcc) +{ + struct mbox_controller *mbox; + struct device *dev = ipcc->dev; + + mbox = &ipcc->mbox; + mbox->dev = dev; + mbox->num_chans = IPCC_MBOX_MAX_CHAN; + mbox->chans = ipcc->chan; + mbox->ops = &ipcc_mbox_chan_ops; + mbox->of_xlate = qcom_ipcc_mbox_xlate; + mbox->txdone_irq = false; + mbox->txdone_poll = false; + + return devm_mbox_controller_register(dev, mbox); +} + +static int qcom_ipcc_probe(struct platform_device *pdev) +{ + struct qcom_ipcc *ipcc; + int ret; + + ipcc = devm_kzalloc(&pdev->dev, sizeof(*ipcc), GFP_KERNEL); + if (!ipcc) + return -ENOMEM; + + ipcc->dev = &pdev->dev; + + ipcc->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ipcc->base)) + return PTR_ERR(ipcc->base); + + ipcc->irq = platform_get_irq(pdev, 0); + if (ipcc->irq < 0) + return ipcc->irq; + + ipcc->irq_domain = irq_domain_add_tree(pdev->dev.of_node, + &qcom_ipcc_irq_ops, ipcc); + if (!ipcc->irq_domain) + return -ENOMEM; + + ret = qcom_ipcc_setup_mbox(ipcc); + if (ret) + goto err_mbox; + + ret = devm_request_irq(&pdev->dev, ipcc->irq, qcom_ipcc_irq_fn, + IRQF_TRIGGER_HIGH, "ipcc", ipcc); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to register the irq: %d\n", ret); + goto err_mbox; + } + + enable_irq_wake(ipcc->irq); + platform_set_drvdata(pdev, ipcc); + + return 0; + +err_mbox: + irq_domain_remove(ipcc->irq_domain); + + return ret; +} + +static int qcom_ipcc_remove(struct platform_device *pdev) +{ + struct qcom_ipcc *ipcc = platform_get_drvdata(pdev); + + disable_irq_wake(ipcc->irq); + irq_domain_remove(ipcc->irq_domain); + + return 0; +} + +static const struct of_device_id qcom_ipcc_of_match[] = { + { .compatible = "qcom,ipcc"}, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_ipcc_of_match); + +static struct platform_driver qcom_ipcc_driver = { + .probe = qcom_ipcc_probe, + .remove = qcom_ipcc_remove, + .driver = { + .name = "qcom-ipcc", + .of_match_table = qcom_ipcc_of_match, + }, +}; + +static int __init qcom_ipcc_init(void) +{ + return platform_driver_register(&qcom_ipcc_driver); +} +arch_initcall(qcom_ipcc_init); + +MODULE_AUTHOR("Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>"); +MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>"); +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. IPCC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mailbox/sprd-mailbox.c b/drivers/mailbox/sprd-mailbox.c new file mode 100644 index 000000000000..f6fab24ae8a9 --- /dev/null +++ b/drivers/mailbox/sprd-mailbox.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Spreadtrum mailbox driver + * + * Copyright (c) 2020 Spreadtrum Communications Inc. + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/mailbox_controller.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/clk.h> + +#define SPRD_MBOX_ID 0x0 +#define SPRD_MBOX_MSG_LOW 0x4 +#define SPRD_MBOX_MSG_HIGH 0x8 +#define SPRD_MBOX_TRIGGER 0xc +#define SPRD_MBOX_FIFO_RST 0x10 +#define SPRD_MBOX_FIFO_STS 0x14 +#define SPRD_MBOX_IRQ_STS 0x18 +#define SPRD_MBOX_IRQ_MSK 0x1c +#define SPRD_MBOX_LOCK 0x20 +#define SPRD_MBOX_FIFO_DEPTH 0x24 + +/* Bit and mask definiation for inbox's SPRD_MBOX_FIFO_STS register */ +#define SPRD_INBOX_FIFO_DELIVER_MASK GENMASK(23, 16) +#define SPRD_INBOX_FIFO_OVERLOW_MASK GENMASK(15, 8) +#define SPRD_INBOX_FIFO_DELIVER_SHIFT 16 +#define SPRD_INBOX_FIFO_BUSY_MASK GENMASK(7, 0) + +/* Bit and mask definiation for SPRD_MBOX_IRQ_STS register */ +#define SPRD_MBOX_IRQ_CLR BIT(0) + +/* Bit and mask definiation for outbox's SPRD_MBOX_FIFO_STS register */ +#define SPRD_OUTBOX_FIFO_FULL BIT(0) +#define SPRD_OUTBOX_FIFO_WR_SHIFT 16 +#define SPRD_OUTBOX_FIFO_RD_SHIFT 24 +#define SPRD_OUTBOX_FIFO_POS_MASK GENMASK(7, 0) + +/* Bit and mask definiation for inbox's SPRD_MBOX_IRQ_MSK register */ +#define SPRD_INBOX_FIFO_BLOCK_IRQ BIT(0) +#define SPRD_INBOX_FIFO_OVERFLOW_IRQ BIT(1) +#define SPRD_INBOX_FIFO_DELIVER_IRQ BIT(2) +#define SPRD_INBOX_FIFO_IRQ_MASK GENMASK(2, 0) + +/* Bit and mask definiation for outbox's SPRD_MBOX_IRQ_MSK register */ +#define SPRD_OUTBOX_FIFO_NOT_EMPTY_IRQ BIT(0) +#define SPRD_OUTBOX_FIFO_IRQ_MASK GENMASK(4, 0) + +#define SPRD_MBOX_CHAN_MAX 8 + +struct sprd_mbox_priv { + struct mbox_controller mbox; + struct device *dev; + void __iomem *inbox_base; + void __iomem *outbox_base; + struct clk *clk; + u32 outbox_fifo_depth; + + struct mbox_chan chan[SPRD_MBOX_CHAN_MAX]; +}; + +static struct sprd_mbox_priv *to_sprd_mbox_priv(struct mbox_controller *mbox) +{ + return container_of(mbox, struct sprd_mbox_priv, mbox); +} + +static u32 sprd_mbox_get_fifo_len(struct sprd_mbox_priv *priv, u32 fifo_sts) +{ + u32 wr_pos = (fifo_sts >> SPRD_OUTBOX_FIFO_WR_SHIFT) & + SPRD_OUTBOX_FIFO_POS_MASK; + u32 rd_pos = (fifo_sts >> SPRD_OUTBOX_FIFO_RD_SHIFT) & + SPRD_OUTBOX_FIFO_POS_MASK; + u32 fifo_len; + + /* + * If the read pointer is equal with write pointer, which means the fifo + * is full or empty. + */ + if (wr_pos == rd_pos) { + if (fifo_sts & SPRD_OUTBOX_FIFO_FULL) + fifo_len = priv->outbox_fifo_depth; + else + fifo_len = 0; + } else if (wr_pos > rd_pos) { + fifo_len = wr_pos - rd_pos; + } else { + fifo_len = priv->outbox_fifo_depth - rd_pos + wr_pos; + } + + return fifo_len; +} + +static irqreturn_t sprd_mbox_outbox_isr(int irq, void *data) +{ + struct sprd_mbox_priv *priv = data; + struct mbox_chan *chan; + u32 fifo_sts, fifo_len, msg[2]; + int i, id; + + fifo_sts = readl(priv->outbox_base + SPRD_MBOX_FIFO_STS); + + fifo_len = sprd_mbox_get_fifo_len(priv, fifo_sts); + if (!fifo_len) { + dev_warn_ratelimited(priv->dev, "spurious outbox interrupt\n"); + return IRQ_NONE; + } + + for (i = 0; i < fifo_len; i++) { + msg[0] = readl(priv->outbox_base + SPRD_MBOX_MSG_LOW); + msg[1] = readl(priv->outbox_base + SPRD_MBOX_MSG_HIGH); + id = readl(priv->outbox_base + SPRD_MBOX_ID); + + chan = &priv->chan[id]; + mbox_chan_received_data(chan, (void *)msg); + + /* Trigger to update outbox FIFO pointer */ + writel(0x1, priv->outbox_base + SPRD_MBOX_TRIGGER); + } + + /* Clear irq status after reading all message. */ + writel(SPRD_MBOX_IRQ_CLR, priv->outbox_base + SPRD_MBOX_IRQ_STS); + + return IRQ_HANDLED; +} + +static irqreturn_t sprd_mbox_inbox_isr(int irq, void *data) +{ + struct sprd_mbox_priv *priv = data; + struct mbox_chan *chan; + u32 fifo_sts, send_sts, busy, id; + + fifo_sts = readl(priv->inbox_base + SPRD_MBOX_FIFO_STS); + + /* Get the inbox data delivery status */ + send_sts = (fifo_sts & SPRD_INBOX_FIFO_DELIVER_MASK) >> + SPRD_INBOX_FIFO_DELIVER_SHIFT; + if (!send_sts) { + dev_warn_ratelimited(priv->dev, "spurious inbox interrupt\n"); + return IRQ_NONE; + } + + while (send_sts) { + id = __ffs(send_sts); + send_sts &= (send_sts - 1); + + chan = &priv->chan[id]; + + /* + * Check if the message was fetched by remote traget, if yes, + * that means the transmission has been completed. + */ + busy = fifo_sts & SPRD_INBOX_FIFO_BUSY_MASK; + if (!(busy & BIT(id))) + mbox_chan_txdone(chan, 0); + } + + /* Clear FIFO delivery and overflow status */ + writel(fifo_sts & + (SPRD_INBOX_FIFO_DELIVER_MASK | SPRD_INBOX_FIFO_OVERLOW_MASK), + priv->inbox_base + SPRD_MBOX_FIFO_RST); + + /* Clear irq status */ + writel(SPRD_MBOX_IRQ_CLR, priv->inbox_base + SPRD_MBOX_IRQ_STS); + + return IRQ_HANDLED; +} + +static int sprd_mbox_send_data(struct mbox_chan *chan, void *msg) +{ + struct sprd_mbox_priv *priv = to_sprd_mbox_priv(chan->mbox); + unsigned long id = (unsigned long)chan->con_priv; + u32 *data = msg; + + /* Write data into inbox FIFO, and only support 8 bytes every time */ + writel(data[0], priv->inbox_base + SPRD_MBOX_MSG_LOW); + writel(data[1], priv->inbox_base + SPRD_MBOX_MSG_HIGH); + + /* Set target core id */ + writel(id, priv->inbox_base + SPRD_MBOX_ID); + + /* Trigger remote request */ + writel(0x1, priv->inbox_base + SPRD_MBOX_TRIGGER); + + return 0; +} + +static int sprd_mbox_flush(struct mbox_chan *chan, unsigned long timeout) +{ + struct sprd_mbox_priv *priv = to_sprd_mbox_priv(chan->mbox); + unsigned long id = (unsigned long)chan->con_priv; + u32 busy; + + timeout = jiffies + msecs_to_jiffies(timeout); + + while (time_before(jiffies, timeout)) { + busy = readl(priv->inbox_base + SPRD_MBOX_FIFO_STS) & + SPRD_INBOX_FIFO_BUSY_MASK; + if (!(busy & BIT(id))) { + mbox_chan_txdone(chan, 0); + return 0; + } + + udelay(1); + } + + return -ETIME; +} + +static int sprd_mbox_startup(struct mbox_chan *chan) +{ + struct sprd_mbox_priv *priv = to_sprd_mbox_priv(chan->mbox); + u32 val; + + /* Select outbox FIFO mode and reset the outbox FIFO status */ + writel(0x0, priv->outbox_base + SPRD_MBOX_FIFO_RST); + + /* Enable inbox FIFO overflow and delivery interrupt */ + val = readl(priv->inbox_base + SPRD_MBOX_IRQ_MSK); + val &= ~(SPRD_INBOX_FIFO_OVERFLOW_IRQ | SPRD_INBOX_FIFO_DELIVER_IRQ); + writel(val, priv->inbox_base + SPRD_MBOX_IRQ_MSK); + + /* Enable outbox FIFO not empty interrupt */ + val = readl(priv->outbox_base + SPRD_MBOX_IRQ_MSK); + val &= ~SPRD_OUTBOX_FIFO_NOT_EMPTY_IRQ; + writel(val, priv->outbox_base + SPRD_MBOX_IRQ_MSK); + + return 0; +} + +static void sprd_mbox_shutdown(struct mbox_chan *chan) +{ + struct sprd_mbox_priv *priv = to_sprd_mbox_priv(chan->mbox); + + /* Disable inbox & outbox interrupt */ + writel(SPRD_INBOX_FIFO_IRQ_MASK, priv->inbox_base + SPRD_MBOX_IRQ_MSK); + writel(SPRD_OUTBOX_FIFO_IRQ_MASK, priv->outbox_base + SPRD_MBOX_IRQ_MSK); +} + +static const struct mbox_chan_ops sprd_mbox_ops = { + .send_data = sprd_mbox_send_data, + .flush = sprd_mbox_flush, + .startup = sprd_mbox_startup, + .shutdown = sprd_mbox_shutdown, +}; + +static void sprd_mbox_disable(void *data) +{ + struct sprd_mbox_priv *priv = data; + + clk_disable_unprepare(priv->clk); +} + +static int sprd_mbox_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sprd_mbox_priv *priv; + int ret, inbox_irq, outbox_irq; + unsigned long id; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = dev; + + /* + * The Spreadtrum mailbox uses an inbox to send messages to the target + * core, and uses an outbox to receive messages from other cores. + * + * Thus the mailbox controller supplies 2 different register addresses + * and IRQ numbers for inbox and outbox. + */ + priv->inbox_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->inbox_base)) + return PTR_ERR(priv->inbox_base); + + priv->outbox_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(priv->outbox_base)) + return PTR_ERR(priv->outbox_base); + + priv->clk = devm_clk_get(dev, "enable"); + if (IS_ERR(priv->clk)) { + dev_err(dev, "failed to get mailbox clock\n"); + return PTR_ERR(priv->clk); + } + + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, sprd_mbox_disable, priv); + if (ret) { + dev_err(dev, "failed to add mailbox disable action\n"); + return ret; + } + + inbox_irq = platform_get_irq(pdev, 0); + if (inbox_irq < 0) + return inbox_irq; + + ret = devm_request_irq(dev, inbox_irq, sprd_mbox_inbox_isr, + IRQF_NO_SUSPEND, dev_name(dev), priv); + if (ret) { + dev_err(dev, "failed to request inbox IRQ: %d\n", ret); + return ret; + } + + outbox_irq = platform_get_irq(pdev, 1); + if (outbox_irq < 0) + return outbox_irq; + + ret = devm_request_irq(dev, outbox_irq, sprd_mbox_outbox_isr, + IRQF_NO_SUSPEND, dev_name(dev), priv); + if (ret) { + dev_err(dev, "failed to request outbox IRQ: %d\n", ret); + return ret; + } + + /* Get the default outbox FIFO depth */ + priv->outbox_fifo_depth = + readl(priv->outbox_base + SPRD_MBOX_FIFO_DEPTH) + 1; + priv->mbox.dev = dev; + priv->mbox.chans = &priv->chan[0]; + priv->mbox.num_chans = SPRD_MBOX_CHAN_MAX; + priv->mbox.ops = &sprd_mbox_ops; + priv->mbox.txdone_irq = true; + + for (id = 0; id < SPRD_MBOX_CHAN_MAX; id++) + priv->chan[id].con_priv = (void *)id; + + ret = devm_mbox_controller_register(dev, &priv->mbox); + if (ret) { + dev_err(dev, "failed to register mailbox: %d\n", ret); + return ret; + } + + return 0; +} + +static const struct of_device_id sprd_mbox_of_match[] = { + { .compatible = "sprd,sc9860-mailbox", }, + { }, +}; +MODULE_DEVICE_TABLE(of, sprd_mbox_of_match); + +static struct platform_driver sprd_mbox_driver = { + .driver = { + .name = "sprd-mailbox", + .of_match_table = sprd_mbox_of_match, + }, + .probe = sprd_mbox_probe, +}; +module_platform_driver(sprd_mbox_driver); + +MODULE_AUTHOR("Baolin Wang <baolin.wang@unisoc.com>"); +MODULE_DESCRIPTION("Spreadtrum mailbox driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c index 86887c9a349a..f44079d62b1a 100644 --- a/drivers/mailbox/zynqmp-ipi-mailbox.c +++ b/drivers/mailbox/zynqmp-ipi-mailbox.c @@ -504,10 +504,9 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox, mchan->req_buf_size = resource_size(&res); mchan->req_buf = devm_ioremap(mdev, res.start, mchan->req_buf_size); - if (IS_ERR(mchan->req_buf)) { + if (!mchan->req_buf) { dev_err(mdev, "Unable to map IPI buffer I/O memory\n"); - ret = PTR_ERR(mchan->req_buf); - return ret; + return -ENOMEM; } } else if (ret != -ENODEV) { dev_err(mdev, "Unmatched resource %s, %d.\n", name, ret); @@ -520,10 +519,9 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox, mchan->resp_buf_size = resource_size(&res); mchan->resp_buf = devm_ioremap(mdev, res.start, mchan->resp_buf_size); - if (IS_ERR(mchan->resp_buf)) { + if (!mchan->resp_buf) { dev_err(mdev, "Unable to map IPI buffer I/O memory\n"); - ret = PTR_ERR(mchan->resp_buf); - return ret; + return -ENOMEM; } } else if (ret != -ENODEV) { dev_err(mdev, "Unmatched resource %s.\n", name); @@ -543,10 +541,9 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox, mchan->req_buf_size = resource_size(&res); mchan->req_buf = devm_ioremap(mdev, res.start, mchan->req_buf_size); - if (IS_ERR(mchan->req_buf)) { + if (!mchan->req_buf) { dev_err(mdev, "Unable to map IPI buffer I/O memory\n"); - ret = PTR_ERR(mchan->req_buf); - return ret; + return -ENOMEM; } } else if (ret != -ENODEV) { dev_err(mdev, "Unmatched resource %s.\n", name); @@ -559,10 +556,9 @@ static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox, mchan->resp_buf_size = resource_size(&res); mchan->resp_buf = devm_ioremap(mdev, res.start, mchan->resp_buf_size); - if (IS_ERR(mchan->resp_buf)) { + if (!mchan->resp_buf) { dev_err(mdev, "Unable to map IPI buffer I/O memory\n"); - ret = PTR_ERR(mchan->resp_buf); - return ret; + return -ENOMEM; } } else if (ret != -ENODEV) { dev_err(mdev, "Unmatched resource %s.\n", name); @@ -668,10 +664,9 @@ static int zynqmp_ipi_probe(struct platform_device *pdev) /* IPI IRQ */ ret = platform_get_irq(pdev, 0); - if (ret < 0) { - dev_err(dev, "unable to find IPI IRQ.\n"); + if (ret < 0) goto free_mbox_dev; - } + pdata->irq = ret; ret = devm_request_irq(dev, pdata->irq, zynqmp_ipi_interrupt, IRQF_SHARED, dev_name(dev), pdata); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0585efa47d8f..c2c5bc4fb702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk = disk; if (__nvme_revalidate_disk(disk, id)) - goto out_free_disk; + goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); - out_free_disk: - del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index cb0007592c12..e999a8c4b7e8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); - if (!(op->flags & FCOP_FLAGS_AEN)) + if (!(op->flags & FCOP_FLAGS_AEN)) { nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); + } - nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fa5c75501049..c0f4226d3299 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req, struct nvme_ns *ns = req->q->queuedata; if (req->cmd_flags & REQ_NVME_MPATH) - trace_block_bio_complete(ns->head->disk->queue, - req->bio, status); + trace_block_bio_complete(ns->head->disk->queue, req->bio); } extern struct device_attribute dev_attr_ana_grpid; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d690d5593a80..e2bacd369a88 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev) * the PCI bus layer to put it into D3 in order to take the PCIe link * down, so as to allow the platform to achieve its minimum low-power * state (which may not be possible if the link is up). + * + * If a host memory buffer is enabled, shut down the device as the NVMe + * specification allows the device to access the host memory buffer in + * host DRAM from all power states, but hosts will fail access to DRAM + * during S3. */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || + ndev->nr_host_mem_descs || (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) return nvme_disable_prepare_reset(ndev, true); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1843110ec34f..3345ec7efaff 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -131,8 +131,8 @@ struct nvme_tcp_ctrl { static LIST_HEAD(nvme_tcp_ctrl_list); static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; -static struct blk_mq_ops nvme_tcp_mq_ops; -static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static const struct blk_mq_ops nvme_tcp_mq_ops; +static const struct blk_mq_ops nvme_tcp_admin_mq_ops; static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) @@ -2301,7 +2301,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) return queue->nr_cqe; } -static struct blk_mq_ops nvme_tcp_mq_ops = { +static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, @@ -2312,7 +2312,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = { .poll = nvme_tcp_poll, }; -static struct blk_mq_ops nvme_tcp_admin_mq_ops = { +static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6392bcd30bd7..6e2f623e472e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); } -static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) +static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) +{ + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; + struct nvmet_req *req; + + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds) { + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; + mutex_unlock(&ctrl->lock); + nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); + } + mutex_unlock(&ctrl->lock); +} + +static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) { struct nvmet_async_event *aen; struct nvmet_req *req; @@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) aen = list_first_entry(&ctrl->async_events, struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - if (status == 0) - nvmet_set_result(req, nvmet_async_event_result(aen)); + nvmet_set_result(req, nvmet_async_event_result(aen)); list_del(&aen->entry); kfree(aen); mutex_unlock(&ctrl->lock); trace_nvmet_async_event(ctrl, req->cqe->result.u32); - nvmet_req_complete(req, status); + nvmet_req_complete(req, 0); mutex_lock(&ctrl->lock); } mutex_unlock(&ctrl->lock); @@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work) struct nvmet_ctrl *ctrl = container_of(work, struct nvmet_ctrl, async_event_work); - nvmet_async_events_process(ctrl, 0); + nvmet_async_events_process(ctrl); } void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, @@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) void nvmet_sq_destroy(struct nvmet_sq *sq) { - u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; struct nvmet_ctrl *ctrl = sq->ctrl; /* @@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * queue doesn't have outstanding requests on it. */ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) - nvmet_async_events_process(ctrl, status); + nvmet_async_events_failall(ctrl); percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1669177cd26c..de9217cfd22d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list); static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; -static struct nvmet_fabrics_ops nvmet_tcp_ops; +static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); @@ -1713,7 +1713,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, } } -static struct nvmet_fabrics_ops nvmet_tcp_ops = { +static const struct nvmet_fabrics_ops nvmet_tcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_TCP, .msdbd = 1, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 8dd1278bec04..7719ae4e2c56 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -35,7 +35,7 @@ /* L3C has 8-counters */ #define L3C_NR_COUNTERS 0x8 -#define L3C_PERF_CTRL_EN 0x20000 +#define L3C_PERF_CTRL_EN 0x10000 #define L3C_EVTYPE_NONE 0xff /* diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 494f853f2206..490d353d5fde 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -32,7 +32,7 @@ #include <linux/usb/functionfs.h> #include <linux/aio.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/poll.h> #include <linux/eventfd.h> @@ -824,13 +824,9 @@ static void ffs_user_copy_worker(struct work_struct *work) bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { - mm_segment_t oldfs = get_fs(); - - set_fs(USER_DS); - use_mm(io_data->mm); + kthread_use_mm(io_data->mm); ret = ffs_copy_to_iter(io_data->buf, ret, &io_data->data); - unuse_mm(io_data->mm); - set_fs(oldfs); + kthread_unuse_mm(io_data->mm); } io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 3afddd3bea6e..9ee0bfe7bcda 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -21,7 +21,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/poll.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/aio.h> #include <linux/uio.h> #include <linux/refcount.h> @@ -462,9 +462,9 @@ static void ep_user_copy_worker(struct work_struct *work) struct kiocb *iocb = priv->iocb; size_t ret; - use_mm(mm); + kthread_use_mm(mm); ret = copy_to_iter(priv->buf, priv->actual, &priv->to); - unuse_mm(mm); + kthread_unuse_mm(mm); if (!ret) ret = -EFAULT; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 186acd8675ff..5e556ac9102a 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -27,7 +27,7 @@ #include <linux/iommu.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/mmu_context.h> +#include <linux/kthread.h> #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> @@ -2817,7 +2817,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, return -EPERM; if (kthread) - use_mm(mm); + kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2844,7 +2844,7 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) - unuse_mm(mm); + kthread_unuse_mm(mm); out: mmput(mm); return *copied ? 0 : -EFAULT; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 062595ee1f83..d7b8df3edffc 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -14,7 +14,6 @@ #include <linux/vhost.h> #include <linux/uio.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/poll.h> @@ -335,10 +334,8 @@ static int vhost_worker(void *data) struct vhost_dev *dev = data; struct vhost_work *work, *work_next; struct llist_node *node; - mm_segment_t oldfs = get_fs(); - set_fs(USER_DS); - use_mm(dev->mm); + kthread_use_mm(dev->mm); for (;;) { /* mb paired w/ kthread_stop */ @@ -366,8 +363,7 @@ static int vhost_worker(void *data) schedule(); } } - unuse_mm(dev->mm); - set_fs(oldfs); + kthread_unuse_mm(dev->mm); return 0; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 97bccde3298b..768497f82aee 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -447,6 +447,7 @@ static int afs_store_data(struct address_space *mapping, op->store.last = last; op->store.first_offset = offset; op->store.last_to = to; + op->mtime = vnode->vfs_inode.i_mtime; op->ops = &afs_store_data_operation; try_next_key: @@ -27,7 +27,6 @@ #include <linux/file.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/timer.h> diff --git a/fs/io-wq.c b/fs/io-wq.c index 4023c9846860..0b65a912b036 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -10,7 +10,6 @@ #include <linux/errno.h> #include <linux/sched/signal.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/sched/mm.h> #include <linux/percpu.h> #include <linux/slab.h> @@ -112,6 +111,7 @@ struct io_wq { unsigned long state; free_work_fn *free_work; + io_wq_work_fn *do_work; struct task_struct *manager; struct user_struct *user; @@ -170,8 +170,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) dropped_lock = true; } __set_current_state(TASK_RUNNING); - set_fs(KERNEL_DS); - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } @@ -418,18 +417,15 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) { if (worker->mm) { - unuse_mm(worker->mm); + kthread_unuse_mm(worker->mm); mmput(worker->mm); worker->mm = NULL; } - if (!work->mm) { - set_fs(KERNEL_DS); + if (!work->mm) return; - } + if (mmget_not_zero(work->mm)) { - use_mm(work->mm); - if (!worker->mm) - set_fs(USER_DS); + kthread_use_mm(work->mm); worker->mm = work->mm; /* hang on to this mm */ work->mm = NULL; @@ -528,7 +524,7 @@ get_next: hash = io_get_work_hash(work); linked = old_work = work; - linked->func(&linked); + wq->do_work(&linked); linked = (old_work == linked) ? NULL : linked; work = next_hashed; @@ -785,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) struct io_wq_work *old_work = work; work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + wq->do_work(&work); work = (work == old_work) ? NULL : work; wq->free_work(old_work); } while (work); @@ -1023,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) int ret = -ENOMEM, node; struct io_wq *wq; - if (WARN_ON_ONCE(!data->free_work)) + if (WARN_ON_ONCE(!data->free_work || !data->do_work)) return ERR_PTR(-EINVAL); wq = kzalloc(sizeof(*wq), GFP_KERNEL); @@ -1037,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) } wq->free_work = data->free_work; + wq->do_work = data->do_work; /* caller must already hold a reference to this */ wq->user = data->user; @@ -1093,7 +1090,7 @@ err: bool io_wq_get(struct io_wq *wq, struct io_wq_data *data) { - if (data->free_work != wq->free_work) + if (data->free_work != wq->free_work || data->do_work != wq->do_work) return false; return refcount_inc_not_zero(&wq->use_refs); diff --git a/fs/io-wq.h b/fs/io-wq.h index 5ba12de7572f..8e138fa88b9f 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - void (*func)(struct io_wq_work **); struct files_struct *files; struct mm_struct *mm; const struct cred *creds; @@ -94,11 +93,6 @@ struct io_wq_work { pid_t task_pid; }; -#define INIT_IO_WORK(work, _func) \ - do { \ - *(work) = (struct io_wq_work){ .func = _func }; \ - } while (0) \ - static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) { if (!work->list.next) @@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) } typedef void (free_work_fn)(struct io_wq_work *); +typedef void (io_wq_work_fn)(struct io_wq_work **); struct io_wq_data { struct user_struct *user; + io_wq_work_fn *do_work; free_work_fn *free_work; }; diff --git a/fs/io_uring.c b/fs/io_uring.c index 9fb0dc6033ba..155f3d830ddb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -55,7 +55,6 @@ #include <linux/fdtable.h> #include <linux/mm.h> #include <linux/mman.h> -#include <linux/mmu_context.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/kthread.h> @@ -529,7 +528,6 @@ enum { REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, - REQ_F_IOPOLL_COMPLETED_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_TIMEOUT_BIT, REQ_F_ISREG_BIT, @@ -541,6 +539,8 @@ enum { REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_NO_FILE_TABLE_BIT, + REQ_F_QUEUE_TIMEOUT_BIT, + REQ_F_WORK_INITIALIZED_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -572,8 +572,6 @@ enum { REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), /* must not punt to workers */ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), - /* polled IO has completed */ - REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT), /* has linked timeout */ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* timeout request */ @@ -596,6 +594,10 @@ enum { REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), /* doesn't need file table for this request */ REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), + /* needs to queue linked timeout */ + REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT), + /* io_wq_work is initialized */ + REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), }; struct async_poll { @@ -634,6 +636,8 @@ struct io_kiocb { struct io_async_ctx *io; int cflags; u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; u16 buf_index; @@ -698,6 +702,8 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; + /* don't fail if file grab fails */ + unsigned needs_file_no_error : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -804,6 +810,8 @@ static const struct io_op_def io_op_defs[] = { .needs_fs = 1, }, [IORING_OP_CLOSE] = { + .needs_file = 1, + .needs_file_no_error = 1, .file_table = 1, }, [IORING_OP_FILES_UPDATE] = { @@ -904,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket); static void io_file_put_work(struct work_struct *work); +/* + * Note: must call io_req_init_async() for the first time you + * touch any members of io_wq_work. + */ +static inline void io_req_init_async(struct io_kiocb *req) +{ + if (req->flags & REQ_F_WORK_INITIALIZED) + return; + + memset(&req->work, 0, sizeof(req->work)); + req->flags |= REQ_F_WORK_INITIALIZED; +} + static inline bool io_async_submit(struct io_ring_ctx *ctx) { return ctx->flags & IORING_SETUP_SQPOLL; @@ -1030,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req, static inline void io_req_work_drop_env(struct io_kiocb *req) { + if (!(req->flags & REQ_F_WORK_INITIALIZED)) + return; + if (req->work.mm) { mmdrop(req->work.mm); req->work.mm = NULL; @@ -1576,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req) io_queue_async_work(nxt); } -static void io_link_work_cb(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - struct io_kiocb *link; - - link = list_first_entry(&req->link_list, struct io_kiocb, link_list); - io_queue_linked_timeout(link); - io_wq_submit_work(workptr); -} - static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) { struct io_kiocb *link; @@ -1597,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) *workptr = &nxt->work; link = io_prep_linked_timeout(nxt); if (link) - nxt->work.func = io_link_work_cb; + nxt->flags |= REQ_F_QUEUE_TIMEOUT; } /* @@ -1782,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, * If we find a request that requires polling, break out * and complete those lists first, if we have entries there. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) { + if (READ_ONCE(req->iopoll_completed)) { list_move_tail(&req->list, &done); continue; } @@ -1963,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) req_set_fail_links(req); req->result = res; if (res != -EAGAIN) - req->flags |= REQ_F_IOPOLL_COMPLETED; + WRITE_ONCE(req->iopoll_completed, 1); } /* @@ -1996,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) * For fast devices, IO may have already completed. If it has, add * it to the front so we find it first. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) + if (READ_ONCE(req->iopoll_completed)) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); @@ -2064,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw) if (S_ISREG(mode) && file->f_op != &io_uring_fops) return true; + /* any ->read/write should understand O_NONBLOCK */ + if (file->f_flags & O_NONBLOCK) + return true; + if (!(file->f_mode & FMODE_NOWAIT)) return false; @@ -2106,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_ioprio = get_current_ioprio(); /* don't allow async punt if RWF_NOWAIT was requested */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - (req->file->f_flags & O_NONBLOCK)) + if (kiocb->ki_flags & IOCB_NOWAIT) req->flags |= REQ_F_NOWAIT; if (force_nonblock) @@ -2121,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->result = 0; + req->iopoll_completed = 0; } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; @@ -2359,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, bool needs_lock) { - if (req->flags & REQ_F_BUFFER_SELECTED) + if (req->flags & REQ_F_BUFFER_SELECTED) { + struct io_buffer *kbuf; + + kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; + iov[0].iov_base = u64_to_user_ptr(kbuf->addr); + iov[0].iov_len = kbuf->len; return 0; + } if (!req->rw.len) return 0; else if (req->rw.len > 1) @@ -2742,7 +2766,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - if (!file_can_poll(req->file)) + if (!(req->flags & REQ_F_NOWAIT) && + !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } @@ -2762,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req, if (req->flags & REQ_F_NEED_CLEANUP) return 0; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); @@ -2776,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req, return ret; req->flags |= REQ_F_NEED_CLEANUP; - if (!S_ISREG(file_inode(sp->file_in)->i_mode)) + if (!S_ISREG(file_inode(sp->file_in)->i_mode)) { + /* + * Splice operation will be punted aync, and here need to + * modify io_wq_work.flags, so initialize io_wq_work firstly. + */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_UNBOUND; + } return 0; } @@ -2886,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_req_cancelled(struct io_kiocb *req) -{ - if (req->work.flags & IO_WQ_WORK_CANCEL) { - req_set_fail_links(req); - io_cqring_add_event(req, -ECANCELED); - io_put_req(req); - return true; - } - - return false; -} - -static void __io_fsync(struct io_kiocb *req) +static int io_fsync(struct io_kiocb *req, bool force_nonblock) { loff_t end = req->sync.off + req->sync.len; int ret; + /* fsync always requires a blocking context */ + if (force_nonblock) + return -EAGAIN; + ret = vfs_fsync_range(req->file, req->sync.off, end > 0 ? end : LLONG_MAX, req->sync.flags & IORING_FSYNC_DATASYNC); @@ -2910,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req) req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req(req); -} - -static void io_fsync_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_fsync(req); - io_steal_work(req, workptr); -} - -static int io_fsync(struct io_kiocb *req, bool force_nonblock) -{ - /* fsync always requires a blocking context */ - if (force_nonblock) { - req->work.func = io_fsync_finish; - return -EAGAIN; - } - __io_fsync(req); return 0; } -static void __io_fallocate(struct io_kiocb *req) -{ - int ret; - - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize; - ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, - req->sync.len); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - if (ret < 0) - req_set_fail_links(req); - io_cqring_add_event(req, ret); - io_put_req(req); -} - -static void io_fallocate_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_fallocate(req); - io_steal_work(req, workptr); -} - static int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (sqe->ioprio || sqe->buf_index || sqe->rw_flags) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->sync.off = READ_ONCE(sqe->off); req->sync.len = READ_ONCE(sqe->addr); @@ -2972,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req, static int io_fallocate(struct io_kiocb *req, bool force_nonblock) { + int ret; + /* fallocate always requiring blocking context */ - if (force_nonblock) { - req->work.func = io_fallocate_finish; + if (force_nonblock) return -EAGAIN; - } - __io_fallocate(req); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize; + ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, + req->sync.len); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + io_put_req(req); return 0; } -static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { const char __user *fname; int ret; - if (sqe->ioprio || sqe->buf_index) + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) + if (unlikely(sqe->ioprio || sqe->buf_index)) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; - if (req->flags & REQ_F_NEED_CLEANUP) - return 0; - req->open.dfd = READ_ONCE(sqe->fd); - req->open.how.mode = READ_ONCE(sqe->len); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - req->open.how.flags = READ_ONCE(sqe->open_flags); - if (force_o_largefile()) + /* open.how should be already initialised */ + if (!(req->open.how.flags & O_PATH) && force_o_largefile()) req->open.how.flags |= O_LARGEFILE; + req->open.dfd = READ_ONCE(sqe->fd); + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); req->open.filename = getname(fname); if (IS_ERR(req->open.filename)) { ret = PTR_ERR(req->open.filename); req->open.filename = NULL; return ret; } - req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } +static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + u64 flags, mode; + + if (req->flags & REQ_F_NEED_CLEANUP) + return 0; + mode = READ_ONCE(sqe->len); + flags = READ_ONCE(sqe->open_flags); + req->open.how = build_open_how(flags, mode); + return __io_openat_prep(req, sqe); +} + static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct open_how __user *how; - const char __user *fname; size_t len; int ret; - if (sqe->ioprio || sqe->buf_index) - return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) - return -EBADF; if (req->flags & REQ_F_NEED_CLEANUP) return 0; - - req->open.dfd = READ_ONCE(sqe->fd); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); len = READ_ONCE(sqe->len); - if (len < OPEN_HOW_SIZE_VER0) return -EINVAL; @@ -3040,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ret) return ret; - if (!(req->open.how.flags & O_PATH) && force_o_largefile()) - req->open.how.flags |= O_LARGEFILE; - - req->open.filename = getname(fname); - if (IS_ERR(req->open.filename)) { - ret = PTR_ERR(req->open.filename); - req->open.filename = NULL; - return ret; - } - - req->open.nofile = rlimit(RLIMIT_NOFILE); - req->flags |= REQ_F_NEED_CLEANUP; - return 0; + return __io_openat_prep(req, sqe); } static int io_openat2(struct io_kiocb *req, bool force_nonblock) @@ -3092,7 +3071,6 @@ err: static int io_openat(struct io_kiocb *req, bool force_nonblock) { - req->open.how = build_open_how(req->open.how.flags, req->open.how.mode); return io_openat2(req, force_nonblock); } @@ -3181,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req, p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); - if (!access_ok(u64_to_user_ptr(p->addr), p->len)) + if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) return -EFAULT; p->bgid = READ_ONCE(sqe->buf_group); @@ -3259,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #if defined(CONFIG_EPOLL) if (sqe->ioprio || sqe->buf_index) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->epoll.epfd = READ_ONCE(sqe->fd); req->epoll.op = READ_ONCE(sqe->len); @@ -3303,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) if (sqe->ioprio || sqe->buf_index || sqe->off) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->madvise.addr = READ_ONCE(sqe->addr); req->madvise.len = READ_ONCE(sqe->len); @@ -3337,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (sqe->ioprio || sqe->buf_index || sqe->addr) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->fadvise.offset = READ_ONCE(sqe->off); req->fadvise.len = READ_ONCE(sqe->len); @@ -3370,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; if (sqe->ioprio || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) @@ -3410,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { /* * If we queue this for async, it must not be cancellable. That would - * leave the 'file' in an undeterminate state. + * leave the 'file' in an undeterminate state, and here need to modify + * io_wq_work.flags, so initialize io_wq_work firstly. */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_NO_CANCEL; + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) return -EINVAL; @@ -3421,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); - return 0; -} - -/* only called when __close_fd_get_file() is done */ -static void __io_close_finish(struct io_kiocb *req) -{ - int ret; - - ret = filp_close(req->close.put_file, req->work.files); - if (ret < 0) - req_set_fail_links(req); - io_cqring_add_event(req, ret); - fput(req->close.put_file); - io_put_req(req); -} - -static void io_close_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); + if ((req->file && req->file->f_op == &io_uring_fops) || + req->close.fd == req->ctx->ring_fd) + return -EBADF; - /* not cancellable, don't do io_req_cancelled() */ - __io_close_finish(req); - io_steal_work(req, workptr); + req->close.put_file = NULL; + return 0; } static int io_close(struct io_kiocb *req, bool force_nonblock) { + struct io_close *close = &req->close; int ret; - req->close.put_file = NULL; - ret = __close_fd_get_file(req->close.fd, &req->close.put_file); - if (ret < 0) - return (ret == -ENOENT) ? -EBADF : ret; + /* might be already done during nonblock submission */ + if (!close->put_file) { + ret = __close_fd_get_file(close->fd, &close->put_file); + if (ret < 0) + return (ret == -ENOENT) ? -EBADF : ret; + } /* if the file has a flush method, be safe and punt to async */ - if (req->close.put_file->f_op->flush && force_nonblock) { + if (close->put_file->f_op->flush && force_nonblock) { /* avoid grabbing files - we don't need the files */ req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT; - req->work.func = io_close_finish; return -EAGAIN; } - /* - * No ->flush(), safely close from here and just punt the - * fput() to async context. - */ - __io_close_finish(req); + /* No ->flush() or already async, safely close from here */ + ret = filp_close(close->put_file, req->work.files); + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + fput(close->put_file); + close->put_file = NULL; + io_put_req(req); return 0; } @@ -3489,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static void __io_sync_file_range(struct io_kiocb *req) +static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) { int ret; + /* sync_file_range always requires a blocking context */ + if (force_nonblock) + return -EAGAIN; + ret = sync_file_range(req->file, req->sync.off, req->sync.len, req->sync.flags); if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req(req); -} - - -static void io_sync_file_range_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_sync_file_range(req); - io_steal_work(req, workptr); -} - -static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) -{ - /* sync_file_range always requires a blocking context */ - if (force_nonblock) { - req->work.func = io_sync_file_range_finish; - return -EAGAIN; - } - - __io_sync_file_range(req); return 0; } @@ -3546,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_async_ctx *io = req->io; int ret; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -3575,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_async_ctx io; @@ -3631,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_sr_msg *sr = &req->sr_msg; @@ -3786,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io = req->io; int ret; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -3814,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret, cflags = 0; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_buffer *kbuf; @@ -3878,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret, cflags = 0; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_sr_msg *sr = &req->sr_msg; @@ -3948,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int __io_accept(struct io_kiocb *req, bool force_nonblock) +static int io_accept(struct io_kiocb *req, bool force_nonblock) { struct io_accept *accept = &req->accept; - unsigned file_flags; + unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; int ret; - file_flags = force_nonblock ? O_NONBLOCK : 0; + if (req->file->f_flags & O_NONBLOCK) + req->flags |= REQ_F_NOWAIT; + ret = __sys_accept4_file(req->file, file_flags, accept->addr, accept->addr_len, accept->flags, accept->nofile); if (ret == -EAGAIN && force_nonblock) return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret < 0) + if (ret < 0) { + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail_links(req); + } io_cqring_add_event(req, ret); io_put_req(req); return 0; } -static void io_accept_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_accept(req, false); - io_steal_work(req, workptr); -} - -static int io_accept(struct io_kiocb *req, bool force_nonblock) -{ - int ret; - - ret = __io_accept(req, force_nonblock); - if (ret == -EAGAIN && force_nonblock) { - req->work.func = io_accept_finish; - return -EAGAIN; - } - return 0; -} - static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_connect *conn = &req->connect; @@ -4329,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb) spin_unlock_irq(&ctx->completion_lock); /* restore ->work in case we need to retry again */ - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, sizeof(req->work)); kfree(apoll); if (!canceled) { @@ -4426,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) return false; req->flags |= REQ_F_POLLED; - memcpy(&apoll->work, &req->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&apoll->work, &req->work, sizeof(req->work)); had_io = req->io != NULL; get_task_struct(current); @@ -4451,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) if (!had_io) io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, sizeof(req->work)); kfree(apoll); return false; } @@ -4496,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req) * io_req_work_drop_env below when dropping the * final reference. */ - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, + sizeof(req->work)); kfree(apoll); } } @@ -4945,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req, if (!sqe) return 0; + io_req_init_async(req); + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (unlikely(ret)) @@ -5382,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +static void io_arm_async_linked_timeout(struct io_kiocb *req) +{ + struct io_kiocb *link; + + /* link head's timeout is queued in io_queue_async_work() */ + if (!(req->flags & REQ_F_QUEUE_TIMEOUT)) + return; + + link = list_first_entry(&req->link_list, struct io_kiocb, link_list); + io_queue_linked_timeout(link); +} + static void io_wq_submit_work(struct io_wq_work **workptr) { struct io_wq_work *work = *workptr; struct io_kiocb *req = container_of(work, struct io_kiocb, work); int ret = 0; + io_arm_async_linked_timeout(req); + /* if NO_CANCEL is set, we must still run the work */ if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == IO_WQ_WORK_CANCEL) { @@ -5438,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, return -EBADF; fd = array_index_nospec(fd, ctx->nr_user_files); file = io_file_from_index(ctx, fd); - if (!file) - return -EBADF; - req->fixed_file_refs = ctx->file_data->cur_refs; - percpu_ref_get(req->fixed_file_refs); + if (file) { + req->fixed_file_refs = ctx->file_data->cur_refs; + percpu_ref_get(req->fixed_file_refs); + } } else { trace_io_uring_file_get(ctx, fd); file = __io_file_get(state, fd); - if (unlikely(!file)) - return -EBADF; } - *out_file = file; - return 0; + if (file || io_op_defs[req->opcode].needs_file_no_error) { + *out_file = file; + return 0; + } + return -EBADF; } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, @@ -5584,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) again: linked_timeout = io_prep_linked_timeout(req); - if (req->work.creds && req->work.creds != current_cred()) { + if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && + req->work.creds != current_cred()) { if (old_creds) revert_creds(old_creds); if (old_creds == req->work.creds) @@ -5607,6 +5565,8 @@ again: goto exit; } punt: + io_req_init_async(req); + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (ret) @@ -5859,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, refcount_set(&req->refs, 2); req->task = NULL; req->result = 0; - INIT_IO_WORK(&req->work, io_wq_submit_work); if (unlikely(req->opcode >= IORING_OP_LAST)) return -EINVAL; @@ -5867,7 +5826,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (io_op_defs[req->opcode].needs_mm && !current->mm) { if (unlikely(!mmget_not_zero(ctx->sqo_mm))) return -EFAULT; - use_mm(ctx->sqo_mm); + kthread_use_mm(ctx->sqo_mm); } sqe_flags = READ_ONCE(sqe->flags); @@ -5881,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, id = READ_ONCE(sqe->personality); if (id) { + io_req_init_async(req); req->work.creds = idr_find(&ctx->personality_idr, id); if (unlikely(!req->work.creds)) return -EINVAL; @@ -5981,7 +5941,7 @@ static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) struct mm_struct *mm = current->mm; if (mm) { - unuse_mm(mm); + kthread_unuse_mm(mm); mmput(mm); } } @@ -5990,15 +5950,12 @@ static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; const struct cred *old_cred; - mm_segment_t old_fs; DEFINE_WAIT(wait); unsigned long timeout; int ret = 0; complete(&ctx->sq_thread_comp); - old_fs = get_fs(); - set_fs(USER_DS); old_cred = override_creds(ctx->creds); timeout = jiffies + ctx->sq_thread_idle; @@ -6103,7 +6060,6 @@ static int io_sq_thread(void *data) if (current->task_works) task_work_run(); - set_fs(old_fs); io_sq_thread_drop_mm(ctx); revert_creds(old_cred); @@ -6879,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, data.user = ctx->user; data.free_work = io_free_work; + data.do_work = io_wq_submit_work; if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { /* Do QD, or 4 * CPUS, whatever is smallest */ @@ -7160,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = 0; if (!pages || nr_pages > got_pages) { - kfree(vmas); - kfree(pages); + kvfree(vmas); + kvfree(pages); pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); vmas = kvmalloc_array(nr_pages, diff --git a/fs/locks.c b/fs/locks.c index 6fd1f6e83178..7df0f9fa66f4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1557,6 +1557,9 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { bool rc; + if (lease->fl_lmops->lm_breaker_owns_lease + && lease->fl_lmops->lm_breaker_owns_lease(lease)) + return false; if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { rc = false; goto trace; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a57e7c72c7f4..1b79dd5cf661 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -446,7 +446,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL, requested; + ssize_t result, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); @@ -731,6 +731,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (request_commit) { kref_get(&req->wb_kref); + memcpy(&req->wb_verf, &hdr->verf.verifier, + sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->ds_commit_idx); } diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 963800037609..e87d500ad95a 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -39,7 +39,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include <linux/string.h> #include <linux/kmod.h> #include <linux/slab.h> -#include <linux/module.h> #include <linux/socket.h> #include <linux/seq_file.h> #include <linux/inet.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9d0921cb4fe..0bf1f835de01 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -833,6 +833,8 @@ int nfs_getattr(const struct path *path, struct kstat *stat, do_update |= cache_validity & NFS_INO_INVALID_ATIME; if (request_mask & (STATX_CTIME|STATX_MTIME)) do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; + if (request_mask & STATX_BLOCKS) + do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; if (do_update) { /* Update the attribute cache */ if (!(server->flags & NFS_MOUNT_NOAC)) @@ -1764,7 +1766,8 @@ out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME - | NFS_INO_INVALID_MTIME); + | NFS_INO_INVALID_MTIME + | NFS_INO_INVALID_BLOCKS); return status; } @@ -1871,7 +1874,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED - | NFS_INO_REVAL_PAGECACHE); + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_INVALID_BLOCKS); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -2033,8 +2037,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; - else + else { + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_BLOCKS + | NFS_INO_REVAL_FORCED); cache_revalidated = false; + } /* Update attrtimeo value if we're out of the unstable period */ if (attr_changed) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a46d1d5d16d8..2397ceedba8a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -179,11 +179,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, if (nfs_lookup_is_soft_revalidate(dentry)) task_flags |= RPC_TASK_TIMEOUT; - dprintk("NFS call lookup %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; + dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9056f3dd380e..e32717fd1169 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7909,7 +7909,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) } static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { - .rpc_call_done = &nfs4_bind_one_conn_to_session_done, + .rpc_call_done = nfs4_bind_one_conn_to_session_done, }; /* diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 7e7a97ae21ed..547cec79899f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -961,6 +961,97 @@ TRACE_EVENT(nfs_readpage_done, ) ); +TRACE_EVENT(nfs_readpage_short, + TP_PROTO( + const struct rpc_task *task, + const struct nfs_pgio_header *hdr + ), + + TP_ARGS(task, hdr), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(bool, eof) + __field(int, status) + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->eof = hdr->res.eof; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + ), + + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u status=%d%s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, + __entry->eof ? " eof" : "" + ) +); + +TRACE_EVENT(nfs_pgio_error, + TP_PROTO( + const struct nfs_pgio_header *hdr, + int error, + loff_t pos + ), + + TP_ARGS(hdr, error, pos), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(loff_t, pos) + __field(int, status) + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = error; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); + ), + + TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%lld count=%u res=%u pos=%llu status=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, __entry->fhandle, + (long long)__entry->offset, __entry->arg_count, __entry->res_count, + __entry->pos, __entry->status + ) +); + TRACE_DEFINE_ENUM(NFS_UNSTABLE); TRACE_DEFINE_ENUM(NFS_DATA_SYNC); TRACE_DEFINE_ENUM(NFS_FILE_SYNC); @@ -1312,7 +1403,12 @@ TRACE_EVENT(nfs_xdr_status, __field(unsigned int, task_id) __field(unsigned int, client_id) __field(u32, xid) + __field(int, version) __field(unsigned long, error) + __string(program, + xdr->rqst->rq_task->tk_client->cl_program->name) + __string(procedure, + xdr->rqst->rq_task->tk_msg.rpc_proc->p_name) ), TP_fast_assign( @@ -1322,13 +1418,19 @@ TRACE_EVENT(nfs_xdr_status, __entry->task_id = task->tk_pid; __entry->client_id = task->tk_client->cl_clid; __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->version = task->tk_client->cl_vers; __entry->error = error; + __assign_str(program, + task->tk_client->cl_program->name) + __assign_str(procedure, task->tk_msg.rpc_proc->p_name) ), TP_printk( - "task:%u@%d xid=0x%08x error=%ld (%s)", + "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)", __entry->task_id, __entry->client_id, __entry->xid, - -__entry->error, nfs_show_status(__entry->error) + __get_str(program), __entry->version, + __get_str(procedure), -__entry->error, + nfs_show_status(__entry->error) ) ); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6ca421cbe19c..6ea4cac41e46 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -24,6 +24,7 @@ #include "internal.h" #include "pnfs.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -64,6 +65,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { unsigned int new = pos - hdr->io_start; + trace_nfs_pgio_error(hdr, error, pos); if (hdr->good_bytes > new) { hdr->good_bytes = new; clear_bit(NFS_IOHDR_EOF, &hdr->flags); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 13b22e898116..eb854f1f86e2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -264,6 +264,8 @@ static void nfs_readpage_retry(struct rpc_task *task, /* This is a short read! */ nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD); + trace_nfs_readpage_short(task, hdr); + /* Has the server at least made some progress? */ if (resp->count == 0) { nfs_set_pgio_error(hdr, -EIO, argp->offset); diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index f1b27411dcc0..ebcbdc40483b 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2019 Hammerspace Inc */ diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 10ec5ecdf117..65c331f75e9c 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -78,6 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) +int nfsd_drc_slab_create(void); +void nfsd_drc_slab_free(void); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 09aa545825bd..9217cb64bf0e 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -139,7 +139,6 @@ struct nfsd_net { * Duplicate reply cache */ struct nfsd_drc_bucket *drc_hashtbl; - struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ unsigned int max_drc_entries; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 5cf91322de0f..7fbe9840a03e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -38,6 +38,7 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "trace.h" #include "xdr4cb.h" #include "xdr4.h" @@ -904,16 +905,20 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { - if (!conn->cb_xprt) + if (!conn->cb_xprt) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; @@ -925,32 +930,27 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client: %ld\n", - PTR_ERR(client)); + trace_nfsd_cb_setup_err(clp, PTR_ERR(client)); return PTR_ERR(client); } cred = get_backchannel_cred(clp, client, ses); if (!cred) { + trace_nfsd_cb_setup_err(clp, -ENOMEM); rpc_shutdown_client(client); return -ENOMEM; } clp->cl_cb_client = client; clp->cl_cb_cred = cred; + trace_nfsd_cb_setup(clp); return 0; } -static void warn_no_callback_path(struct nfs4_client *clp, int reason) -{ - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, reason); -} - static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_DOWN; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) @@ -958,17 +958,20 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; clp->cl_cb_state = NFSD4_CB_FAULT; - warn_no_callback_path(clp, reason); + trace_nfsd_cb_state(clp); } static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); + trace_nfsd_cb_done(clp, task->tk_status); if (task->tk_status) nfsd4_mark_cb_down(clp, task->tk_status); - else + else { clp->cl_cb_state = NFSD4_CB_UP; + trace_nfsd_cb_state(clp); + } } static void nfsd4_cb_probe_release(void *calldata) @@ -993,6 +996,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { void nfsd4_probe_callback(struct nfs4_client *clp) { clp->cl_cb_state = NFSD4_CB_UNKNOWN; + trace_nfsd_cb_state(clp); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); nfsd4_run_cb(&clp->cl_cb_null); } @@ -1009,6 +1013,7 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); + trace_nfsd_cb_state(clp); } /* @@ -1165,8 +1170,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - dprintk("%s: minorversion=%d\n", __func__, - clp->cl_minorversion); + trace_nfsd_cb_done(clp, task->tk_status); if (!nfsd4_cb_sequence_done(task, cb)) return; @@ -1271,6 +1275,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) * kill the old client: */ if (clp->cl_cb_client) { + trace_nfsd_cb_shutdown(clp); rpc_shutdown_client(clp->cl_cb_client); clp->cl_cb_client = NULL; put_cred(clp->cl_cb_cred); @@ -1301,6 +1306,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { nfsd4_mark_cb_down(clp, err); + if (c) + svc_xprt_put(c->cn_xprt); return; } } @@ -1314,6 +1321,8 @@ nfsd4_run_cb_work(struct work_struct *work) struct rpc_clnt *clnt; int flags; + trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name); + if (cb->cb_need_restart) { cb->cb_need_restart = false; } else { diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0e75f7fb5fec..a09c35f0f6f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1155,7 +1155,7 @@ extern void nfs_sb_deactive(struct super_block *sb); #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" -/** +/* * Support one copy source server for now. */ static __be32 @@ -1245,10 +1245,9 @@ nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) mntput(ss_mnt); } -/** - * nfsd4_setup_inter_ssc - * +/* * Verify COPY destination stateid. + * * Connect to the source server with NFSv4.1. * Create the source struct file for nfsd_copy_range. * Called with COPY cstate: @@ -2302,6 +2301,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) } check_if_stalefh_allowed(args); + rqstp->rq_lease_breaker = (void **)&cstate->clp; + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c107caa56525..bb3d2c32664a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -51,6 +51,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -167,9 +168,6 @@ renew_client_locked(struct nfs4_client *clp) return; } - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); } @@ -1922,8 +1920,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", - clid->cl_boot, clid->cl_id, nn->boot_time); + trace_nfsd_clid_stale(clid); return 1; } @@ -2406,6 +2403,11 @@ static void states_stop(struct seq_file *s, void *v) spin_unlock(&clp->cl_lock); } +static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) +{ + seq_printf(s, "filename: \"%pD2\"", f->nf_file); +} + static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { struct inode *inode = f->nf_inode; @@ -2422,6 +2424,12 @@ static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } +static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid) +{ + seq_printf(s, "0x%.8x", stid->si_generation); + seq_printf(s, "%12phN", &stid->si_opaque); +} + static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; @@ -2437,7 +2445,9 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = find_any_file(nf); - seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); @@ -2451,6 +2461,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2470,7 +2482,9 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = find_any_file(nf); - seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: lock, "); /* * Note: a lock stateid isn't really the same thing as a lock, @@ -2482,6 +2496,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); /* XXX: open stateid? */ seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2499,7 +2515,9 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) nf = st->sc_file; file = nf->fi_deleg_file; - seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: deleg, "); /* Kinda dead code as long as we only support read delegs: */ seq_printf(s, "access: %s, ", @@ -2508,6 +2526,8 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) /* XXX: lease time, whether it's being recalled. */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); return 0; @@ -2521,11 +2541,15 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) ls = container_of(st, struct nfs4_layout_stateid, ls_stid); file = ls->ls_file; - seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: layout, "); /* XXX: What else would be useful? */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); return 0; @@ -2845,14 +2869,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); + trace_nfsd_cb_args(clp, conn); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; - dprintk("NFSD: this client (clientid %08x/%08x) " - "will not receive delegations\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - + trace_nfsd_cb_nodelegs(clp); return; } @@ -3458,6 +3480,45 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, return nfs_ok; } +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; + } + } + return NULL; +} + +static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, + struct nfsd4_session *session, u32 req) +{ + struct nfs4_client *clp = session->se_client; + struct svc_xprt *xpt = rqst->rq_xprt; + struct nfsd4_conn *c; + __be32 status; + + /* Following the last paragraph of RFC 5661 Section 18.34.3: */ + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(xpt, session); + if (!c) + status = nfserr_noent; + else if (req == c->cn_flags) + status = nfs_ok; + else if (req == NFS4_CDFC4_FORE_OR_BOTH && + c->cn_flags != NFS4_CDFC4_BACK) + status = nfs_ok; + else if (req == NFS4_CDFC4_BACK_OR_BOTH && + c->cn_flags != NFS4_CDFC4_FORE) + status = nfs_ok; + else + status = nfserr_inval; + spin_unlock(&clp->cl_lock); + return status; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3479,6 +3540,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; + status = nfsd4_match_existing_connection(rqstp, session, bcts->dir); + if (status == nfs_ok || status == nfserr_inval) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -3544,18 +3608,6 @@ out: return status; } -static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) -{ - struct nfsd4_conn *c; - - list_for_each_entry(c, &s->se_conns, cn_persession) { - if (c->cn_xprt == xpt) { - return c; - } - } - return NULL; -} - static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; @@ -3879,23 +3931,18 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - char addr_str[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, - sizeof(addr_str)); - dprintk("NFSD: setclientid: string in use by client " - "at %s\n", addr_str); + trace_nfsd_clid_inuse_err(conf); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); + /* We need to handle only case 1: probable callback update */ if (conf && same_verf(&conf->cl_verifier, &clverifier)) { - /* case 1: probable callback update */ copy_clid(new, conf); gen_confirm(new, nn); - } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - ; + } new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -4076,7 +4123,6 @@ out_free_openowner_slab: out_free_client_slab: kmem_cache_destroy(client_slab); out: - dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } @@ -4508,6 +4554,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; + trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid); + /* * We don't want the locks code to timeout the lease for us; * we'll remove it ourself if a delegation isn't returned @@ -4522,6 +4570,19 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; } +static bool nfsd_breaker_owns_lease(struct file_lock *fl) +{ + struct nfs4_delegation *dl = fl->fl_owner; + struct svc_rqst *rqst; + struct nfs4_client *clp; + + if (!i_am_nfsd()) + return NULL; + rqst = kthread_data(current); + clp = *(rqst->rq_lease_breaker); + return dl->dl_stid.sc_client == clp; +} + static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) @@ -4533,6 +4594,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg, } static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, }; @@ -5018,8 +5080,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); - dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", - STATEID_VAL(&dp->dl_stid.sc_stateid)); + trace_nfsd_deleg_open(&dp->dl_stid.sc_stateid); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; nfs4_put_stid(&dp->dl_stid); return; @@ -5136,9 +5197,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; - - dprintk("%s: stateid=" STATEID_FMT "\n", __func__, - STATEID_VAL(&stp->st_stid.sc_stateid)); + trace_nfsd_deleg_none(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && @@ -5192,8 +5251,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("process_renew(%08x/%08x): starting\n", - clid->cl_boot, clid->cl_id); + trace_nfsd_clid_renew(clid); status = lookup_clientid(clid, cstate, nn, false); if (status) goto out; @@ -5214,6 +5272,7 @@ nfsd4_end_grace(struct nfsd_net *nn) if (nn->grace_ended) return; + trace_nfsd_grace_complete(nn); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 @@ -5279,13 +5338,10 @@ nfs4_laundromat(struct nfsd_net *nn) copy_stateid_t *cps_t; int i; - dprintk("NFSD: laundromat service - starting\n"); - if (clients_still_reclaiming(nn)) { new_timeo = 0; goto out; } - dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); @@ -5307,8 +5363,7 @@ nfs4_laundromat(struct nfsd_net *nn) break; } if (mark_client_expired_locked(clp)) { - dprintk("NFSD: client in use (clientid %08x)\n", - clp->cl_clientid.cl_id); + trace_nfsd_clid_expired(&clp->cl_clientid); continue; } list_add(&clp->cl_lru, &reaplist); @@ -5316,8 +5371,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); - dprintk("NFSD: purging unused client (clientid %08x)\n", - clp->cl_clientid.cl_id); + trace_nfsd_clid_purged(&clp->cl_clientid); list_del_init(&clp->cl_lru); expire_client(clp); } @@ -5407,7 +5461,6 @@ laundromat_main(struct work_struct *laundry) laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } @@ -5948,8 +6001,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL; - dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, - seqid, STATEID_VAL(stateid)); + trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); @@ -6018,9 +6070,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); - dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", - __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - + trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid); nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; put_stateid: @@ -7072,7 +7122,7 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, unsigned int strhashval; struct nfs4_client_reclaim *crp; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data); + trace_nfsd_clid_reclaim(nn, name.len, name.data); crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); @@ -7122,7 +7172,7 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data); + trace_nfsd_clid_find(nn, name.len, name.data); strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { @@ -7686,6 +7736,9 @@ nfsd_recall_delegations(struct list_head *reaplist) list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { list_del_init(&dp->dl_recall_lru); clp = dp->dl_stid.sc_client; + + trace_nfsd_deleg_recall(&dp->dl_stid.sc_stateid); + /* * We skipped all entries that had a zero dl_time before, * so we can now reset the dl_time back to 0. If a delegation @@ -7868,6 +7921,7 @@ nfs4_state_start_net(struct net *net) goto skip_grace; printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); + trace_nfsd_grace_start(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96352ab7bd81..0a0cf1fd77d3 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -20,8 +20,7 @@ #include "nfsd.h" #include "cache.h" - -#define NFSDDBG_FACILITY NFSDDBG_REPCACHE +#include "trace.h" /* * We use this value to determine the number of hash buckets from the max @@ -36,6 +35,8 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; +static struct kmem_cache *drc_slab; + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); @@ -95,7 +96,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, { struct svc_cacherep *rp; - rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -129,7 +130,7 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, atomic_dec(&nn->num_drc_entries); nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(nn->drc_slab, rp); + kmem_cache_free(drc_slab, rp); } static void @@ -141,6 +142,18 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, spin_unlock(&b->cache_lock); } +int nfsd_drc_slab_create(void) +{ + drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + return drc_slab ? 0: -ENOMEM; +} + +void nfsd_drc_slab_free(void) +{ + kmem_cache_destroy(drc_slab); +} + int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -159,18 +172,13 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) if (status) goto out_nomem; - nn->drc_slab = kmem_cache_create("nfsd_drc", - sizeof(struct svc_cacherep), 0, 0, NULL); - if (!nn->drc_slab) - goto out_shrinker; - nn->drc_hashtbl = kcalloc(hashsize, sizeof(*nn->drc_hashtbl), GFP_KERNEL); if (!nn->drc_hashtbl) { nn->drc_hashtbl = vzalloc(array_size(hashsize, sizeof(*nn->drc_hashtbl))); if (!nn->drc_hashtbl) - goto out_slab; + goto out_shrinker; } for (i = 0; i < hashsize; i++) { @@ -180,8 +188,6 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) nn->drc_hashsize = hashsize; return 0; -out_slab: - kmem_cache_destroy(nn->drc_slab); out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: @@ -209,8 +215,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) nn->drc_hashtbl = NULL; nn->drc_hashsize = 0; - kmem_cache_destroy(nn->drc_slab); - nn->drc_slab = NULL; } /* @@ -323,8 +327,10 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && - key->c_key.k_csum != rp->c_key.k_csum) + key->c_key.k_csum != rp->c_key.k_csum) { ++nn->payload_misses; + trace_nfsd_drc_mismatch(nn, key, rp); + } return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -377,15 +383,22 @@ out: return ret; } -/* +/** + * nfsd_cache_lookup - Find an entry in the duplicate reply cache + * @rqstp: Incoming Call to find + * * Try to find an entry matching the current call in the cache. When none * is found, we try to grab the oldest expired entry off the LRU list. If * a suitable one isn't there, then drop the cache_lock and allocate a * new one, then search again in case one got inserted while this thread * didn't hold the lock. + * + * Return values: + * %RC_DOIT: Process the request normally + * %RC_REPLY: Reply from cache + * %RC_DROPIT: Do not process the request further */ -int -nfsd_cache_lookup(struct svc_rqst *rqstp) +int nfsd_cache_lookup(struct svc_rqst *rqstp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; @@ -399,7 +412,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rqstp->rq_cacherep = NULL; if (type == RC_NOCACHE) { nfsdstats.rcnocache++; - return rtn; + goto out; } csum = nfsd_cache_csum(rqstp); @@ -409,10 +422,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * preallocate an entry. */ rp = nfsd_reply_cache_alloc(rqstp, csum, nn); - if (!rp) { - dprintk("nfsd: unable to allocate DRC entry!\n"); - return rtn; - } + if (!rp) + goto out; spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); @@ -431,8 +442,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) /* go ahead and prune the cache */ prune_bucket(b, nn); - out: + +out_unlock: spin_unlock(&b->cache_lock); +out: return rtn; found_entry: @@ -442,13 +455,13 @@ found_entry: /* Request being processed */ if (rp->c_state == RC_INPROG) - goto out; + goto out_trace; /* From the hall of fame of impractical attacks: * Is this a user who tries to snoop on the cache? */ rtn = RC_DOIT; if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure) - goto out; + goto out_trace; /* Compose RPC reply header */ switch (rp->c_type) { @@ -460,21 +473,26 @@ found_entry: break; case RC_REPLBUFF: if (!nfsd_cache_append(rqstp, &rp->c_replvec)) - goto out; /* should not happen */ + goto out_unlock; /* should not happen */ rtn = RC_REPLY; break; default: - printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp, nn); + WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type); } - goto out; +out_trace: + trace_nfsd_drc_found(nn, rqstp, rtn); + goto out_unlock; } -/* - * Update a cache entry. This is called from nfsd_dispatch when - * the procedure has been executed and the complete reply is in - * rqstp->rq_res. +/** + * nfsd_cache_update - Update an entry in the duplicate reply cache. + * @rqstp: svc_rqst with a finished Reply + * @cachetype: which cache to update + * @statp: Reply's status code + * + * This is called from nfsd_dispatch when the procedure has been + * executed and the complete reply is in rqstp->rq_res. * * We're copying around data here rather than swapping buffers because * the toplevel loop requires max-sized buffers, which would be a waste @@ -487,8 +505,7 @@ found_entry: * nfsd failed to encode a reply that otherwise would have been cached. * In this case, nfsd_cache_update is called with statp == NULL. */ -void -nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) +void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3bb2db947d29..b68e96681522 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -238,7 +238,7 @@ static inline struct net *netns(struct file *file) return file_inode(file)->i_sb->s_fs_info; } -/** +/* * write_unlock_ip - Release all locks used by a client * * Experimental. @@ -277,7 +277,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) return nlmsvc_unlock_all_by_ip(sap); } -/** +/* * write_unlock_fs - Release all locks on a local file system * * Experimental. @@ -327,7 +327,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) return error; } -/** +/* * write_filehandle - Get a variable-length NFS file handle by path * * On input, the buffer contains a '\n'-terminated C string comprised of @@ -402,7 +402,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) return mesg - buf; } -/** +/* * write_threads - Start NFSD, or report the current number of running threads * * Input: @@ -452,7 +452,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } -/** +/* * write_pool_threads - Set or report the current number of threads per pool * * Input: @@ -661,7 +661,7 @@ out: return tlen + len; } -/** +/* * write_versions - Set or report the available NFS protocol versions * * Input: @@ -811,7 +811,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, return -EINVAL; } -/** +/* * write_ports - Pass a socket file descriptor or transport name to listen on * * Input: @@ -867,7 +867,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) int nfsd_max_blksize; -/** +/* * write_maxblksize - Set or report the current NFS blksize * * Input: @@ -917,7 +917,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } -/** +/* * write_maxconn - Set or report the current max number of connections * * Input: @@ -998,7 +998,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, return rv; } -/** +/* * write_leasetime - Set or report the current NFSv4 lease time * * Input: @@ -1025,7 +1025,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } -/** +/* * write_gracetime - Set or report current NFSv4 grace period time * * As above, but sets the time of the NFSv4 grace period. @@ -1069,7 +1069,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, nfs4_recoverydir()); } -/** +/* * write_recoverydir - Set or report the pathname of the recovery directory * * Input: @@ -1101,7 +1101,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) return rv; } -/** +/* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager * * Input: @@ -1533,6 +1533,9 @@ static int __init init_nfsd(void) goto out_free_slabs; nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ + retval = nfsd_drc_slab_create(); + if (retval) + goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1546,6 +1549,8 @@ out_free_all: remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); nfsd4_exit_pnfs(); @@ -1560,6 +1565,7 @@ out_unregister_pernet: static void __exit exit_nfsd(void) { + nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2ab5569126b8..36cdd81b6688 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -88,6 +88,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +bool i_am_nfsd(void); + struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ca9fd348548b..b603dfcdd361 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -601,6 +601,11 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +bool i_am_nfsd(void) +{ + return kthread_func(current) == nfsd; +} + int nfsd_create_serv(struct net *net) { int error; @@ -1011,6 +1016,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) *statp = rpc_garbage_args; return 1; } + rqstp->rq_lease_breaker = NULL; /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 68d3f30ee760..3b408532a5dc 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,13 +64,6 @@ typedef struct { refcount_t sc_count; } copy_stateid_t; -#define STATEID_FMT "(%08x/%08x/%08x/%08x)" -#define STATEID_VAL(s) \ - (s)->si_opaque.so_clid.cl_boot, \ - (s)->si_opaque.so_clid.cl_id, \ - (s)->si_opaque.so_id, \ - (s)->si_generation - struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 78c574251c60..1861db1bdc67 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -277,6 +277,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \ TP_PROTO(stateid_t *stp), \ TP_ARGS(stp)) + DEFINE_STATEID_EVENT(layoutstate_alloc); DEFINE_STATEID_EVENT(layoutstate_unhash); DEFINE_STATEID_EVENT(layoutstate_free); @@ -288,6 +289,138 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); +DEFINE_STATEID_EVENT(deleg_open); +DEFINE_STATEID_EVENT(deleg_none); +DEFINE_STATEID_EVENT(deleg_break); +DEFINE_STATEID_EVENT(deleg_recall); + +DECLARE_EVENT_CLASS(nfsd_stateseqid_class, + TP_PROTO(u32 seqid, const stateid_t *stp), + TP_ARGS(seqid, stp), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + __entry->seqid = seqid; + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x", + __entry->seqid, __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation) +) + +#define DEFINE_STATESEQID_EVENT(name) \ +DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ + TP_PROTO(u32 seqid, const stateid_t *stp), \ + TP_ARGS(seqid, stp)) + +DEFINE_STATESEQID_EVENT(preprocess); +DEFINE_STATESEQID_EVENT(open_confirm); + +DECLARE_EVENT_CLASS(nfsd_clientid_class, + TP_PROTO(const clientid_t *clid), + TP_ARGS(clid), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clid->cl_boot; + __entry->cl_id = clid->cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +#define DEFINE_CLIENTID_EVENT(name) \ +DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \ + TP_PROTO(const clientid_t *clid), \ + TP_ARGS(clid)) + +DEFINE_CLIENTID_EVENT(expired); +DEFINE_CLIENTID_EVENT(purged); +DEFINE_CLIENTID_EVENT(renew); +DEFINE_CLIENTID_EVENT(stale); + +DECLARE_EVENT_CLASS(nfsd_net_class, + TP_PROTO(const struct nfsd_net *nn), + TP_ARGS(nn), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + ), + TP_printk("boot_time=%16llx", __entry->boot_time) +) + +#define DEFINE_NET_EVENT(name) \ +DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ + TP_PROTO(const struct nfsd_net *nn), \ + TP_ARGS(nn)) + +DEFINE_NET_EVENT(grace_start); +DEFINE_NET_EVENT(grace_complete); + +DECLARE_EVENT_CLASS(nfsd_clid_class, + TP_PROTO(const struct nfsd_net *nn, + unsigned int namelen, + const unsigned char *namedata), + TP_ARGS(nn, namelen, namedata), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned int, namelen) + __dynamic_array(unsigned char, name, namelen) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->namelen = namelen; + memcpy(__get_dynamic_array(name), namedata, namelen); + ), + TP_printk("boot_time=%16llx nfs4_clientid=%.*s", + __entry->boot_time, __entry->namelen, __get_str(name)) +) + +#define DEFINE_CLID_EVENT(name) \ +DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \ + TP_PROTO(const struct nfsd_net *nn, \ + unsigned int namelen, \ + const unsigned char *namedata), \ + TP_ARGS(nn, namelen, namedata)) + +DEFINE_CLID_EVENT(find); +DEFINE_CLID_EVENT(reclaim); + +TRACE_EVENT(nfsd_clid_inuse_err, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __field(unsigned int, namelen) + __dynamic_array(unsigned char, name, clp->cl_name.len) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_addr, + sizeof(struct sockaddr_in6)); + __entry->namelen = clp->cl_name.len; + memcpy(__get_dynamic_array(name), clp->cl_name.data, + clp->cl_name.len); + ), + TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x", + __entry->namelen, __get_str(name), __entry->addr, + __entry->cl_boot, __entry->cl_id) +) + TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); @@ -432,6 +565,218 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->nlink, __entry->mode, __entry->mask) ); +#include "cache.h" + +TRACE_DEFINE_ENUM(RC_DROPIT); +TRACE_DEFINE_ENUM(RC_REPLY); +TRACE_DEFINE_ENUM(RC_DOIT); + +#define show_drc_retval(x) \ + __print_symbolic(x, \ + { RC_DROPIT, "DROPIT" }, \ + { RC_REPLY, "REPLY" }, \ + { RC_DOIT, "DOIT" }) + +TRACE_EVENT(nfsd_drc_found, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int result + ), + TP_ARGS(nn, rqstp, result), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned long, result) + __field(u32, xid) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->result = result; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + TP_printk("boot_time=%16llx xid=0x%08x result=%s", + __entry->boot_time, __entry->xid, + show_drc_retval(__entry->result)) + +); + +TRACE_EVENT(nfsd_drc_mismatch, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_cacherep *key, + const struct svc_cacherep *rp + ), + TP_ARGS(nn, key, rp), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(u32, cached) + __field(u32, ingress) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(key->c_key.k_xid); + __entry->cached = (__force u32)key->c_key.k_csum; + __entry->ingress = (__force u32)rp->c_key.k_csum; + ), + TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x", + __entry->boot_time, __entry->xid, __entry->cached, + __entry->ingress) +); + +TRACE_EVENT(nfsd_cb_args, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfs4_cb_conn *conn + ), + TP_ARGS(clp, conn), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, prog) + __field(u32, ident) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->prog = conn->cb_prog; + __entry->ident = conn->cb_ident; + memcpy(__entry->addr, &conn->cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u", + __entry->cl_boot, __entry->cl_id, + __entry->addr, __entry->prog, __entry->ident) +); + +TRACE_EVENT(nfsd_cb_nodelegs, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +TRACE_DEFINE_ENUM(NFSD4_CB_UP); +TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_DOWN); +TRACE_DEFINE_ENUM(NFSD4_CB_FAULT); + +#define show_cb_state(val) \ + __print_symbolic(val, \ + { NFSD4_CB_UP, "UP" }, \ + { NFSD4_CB_UNKNOWN, "UNKNOWN" }, \ + { NFSD4_CB_DOWN, "DOWN" }, \ + { NFSD4_CB_FAULT, "FAULT"}) + +DECLARE_EVENT_CLASS(nfsd_cb_class, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(unsigned long, state) + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->state = clp->cl_cb_state; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x state=%s", + __entry->addr, __entry->cl_boot, __entry->cl_id, + show_cb_state(__entry->state)) +); + +#define DEFINE_NFSD_CB_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \ + TP_PROTO(const struct nfs4_client *clp), \ + TP_ARGS(clp)) + +DEFINE_NFSD_CB_EVENT(setup); +DEFINE_NFSD_CB_EVENT(state); +DEFINE_NFSD_CB_EVENT(shutdown); + +TRACE_EVENT(nfsd_cb_setup_err, + TP_PROTO( + const struct nfs4_client *clp, + long error + ), + TP_ARGS(clp, error), + TP_STRUCT__entry( + __field(long, error) + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->error = error; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x error=%ld", + __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error) +); + +TRACE_EVENT(nfsd_cb_work, + TP_PROTO( + const struct nfs4_client *clp, + const char *procedure + ), + TP_ARGS(clp, procedure), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __string(procedure, procedure) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_str(procedure, procedure) + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x procedure=%s", + __entry->addr, __entry->cl_boot, __entry->cl_id, + __get_str(procedure)) +); + +TRACE_EVENT(nfsd_cb_done, + TP_PROTO( + const struct nfs4_client *clp, + int status + ), + TP_ARGS(clp, status), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(int, status) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->status = status; + memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("addr=%pISpc client %08x:%08x status=%d", + __entry->addr, __entry->cl_boot, __entry->cl_id, + __entry->status) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 445eef41bfaf..91b58c897f92 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2780,6 +2780,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (!nilfs->ns_writer) return -ENOMEM; + inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); + err = nilfs_segctor_start_thread(nilfs->ns_writer); if (err) { kfree(nilfs->ns_writer); diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig index 1177c33df895..aca16624b370 100644 --- a/fs/ocfs2/Kconfig +++ b/fs/ocfs2/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config OCFS2_FS tristate "OCFS2 file system support" - depends on NET && SYSFS && CONFIGFS_FS + depends on INET && SYSFS && CONFIGFS_FS select JBD2 select CRC32 select QUOTA diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3a44e461828a..25cabbfe87fc 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -62,7 +62,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, last_index = (size - 1) >> PAGE_SHIFT; /* - * There are cases that lead to the page no longer bebongs to the + * There are cases that lead to the page no longer belonging to the * mapping. * 1) pagecache truncates locally due to memory pressure. * 2) pagecache truncates when another is taking EX lock against diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a40f88cf3ab7..a190212ca85d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1236,64 +1236,26 @@ xfs_ioctl_setattr_xflags( return 0; } -/* - * If we are changing DAX flags, we have to ensure the file is clean and any - * cached objects in the address space are invalidated and removed. This - * requires us to lock out other IO and page faults similar to a truncate - * operation. The locks need to be held until the transaction has been committed - * so that the cache invalidation is atomic with respect to the DAX flag - * manipulation. - */ -static int -xfs_ioctl_setattr_dax_invalidate( +static void +xfs_ioctl_setattr_prepare_dax( struct xfs_inode *ip, - struct fsxattr *fa, - int *join_flags) + struct fsxattr *fa) { - struct inode *inode = VFS_I(ip); - struct super_block *sb = inode->i_sb; - int error; - - *join_flags = 0; - - /* - * It is only valid to set the DAX flag on regular files and - * directories on filesystems where the block size is equal to the page - * size. On directories it serves as an inherited hint so we don't - * have to check the device for dax support or flush pagecache. - */ - if (fa->fsx_xflags & FS_XFLAG_DAX) { - struct xfs_buftarg *target = xfs_inode_buftarg(ip); - - if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize)) - return -EINVAL; - } - - /* If the DAX state is not changing, we have nothing to do here. */ - if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode)) - return 0; - if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode)) - return 0; + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); if (S_ISDIR(inode->i_mode)) - return 0; - - /* lock, flush and invalidate mapping in preparation for flag change */ - xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - error = filemap_write_and_wait(inode->i_mapping); - if (error) - goto out_unlock; - error = invalidate_inode_pages2(inode->i_mapping); - if (error) - goto out_unlock; - - *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL; - return 0; + return; -out_unlock: - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); - return error; + if ((mp->m_flags & XFS_MOUNT_DAX_ALWAYS) || + (mp->m_flags & XFS_MOUNT_DAX_NEVER)) + return; + if (((fa->fsx_xflags & FS_XFLAG_DAX) && + !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)) || + (!(fa->fsx_xflags & FS_XFLAG_DAX) && + (ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))) + d_mark_dontcache(inode); } /* @@ -1301,17 +1263,10 @@ out_unlock: * have permission to do so. On success, return a clean transaction and the * inode locked exclusively ready for further operation specific checks. On * failure, return an error without modifying or locking the inode. - * - * The inode might already be IO locked on call. If this is the case, it is - * indicated in @join_flags and we take full responsibility for ensuring they - * are unlocked from now on. Hence if we have an error here, we still have to - * unlock them. Otherwise, once they are joined to the transaction, they will - * be unlocked on commit/cancel. */ static struct xfs_trans * xfs_ioctl_setattr_get_trans( - struct xfs_inode *ip, - int join_flags) + struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -1328,8 +1283,7 @@ xfs_ioctl_setattr_get_trans( goto out_unlock; xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); - join_flags = 0; + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* * CAP_FOWNER overrides the following restrictions: @@ -1350,8 +1304,6 @@ xfs_ioctl_setattr_get_trans( out_cancel: xfs_trans_cancel(tp); out_unlock: - if (join_flags) - xfs_iunlock(ip, join_flags); return ERR_PTR(error); } @@ -1476,7 +1428,6 @@ xfs_ioctl_setattr( struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; - int join_flags = 0; trace_xfs_ioctl_setattr(ip); @@ -1500,18 +1451,9 @@ xfs_ioctl_setattr( return code; } - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags); - if (code) - goto error_free_dquots; + xfs_ioctl_setattr_prepare_dax(ip, fa); - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { code = PTR_ERR(tp); goto error_free_dquots; @@ -1639,7 +1581,6 @@ xfs_ioc_setxflags( struct fsxattr fa; struct fsxattr old_fa; unsigned int flags; - int join_flags = 0; int error; if (copy_from_user(&flags, arg, sizeof(flags))) @@ -1656,18 +1597,9 @@ xfs_ioc_setxflags( if (error) return error; - /* - * Changing DAX config may require inode locking for mapping - * invalidation. These need to be held all the way to transaction commit - * or cancel time, so need to be passed through to - * xfs_ioctl_setattr_get_trans() so it can apply them to the join call - * appropriately. - */ - error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags); - if (error) - goto out_drop_write; + xfs_ioctl_setattr_prepare_dax(ip, &fa); - tp = xfs_ioctl_setattr_get_trans(ip, join_flags); + tp = xfs_ioctl_setattr_get_trans(ip); if (IS_ERR(tp)) { error = PTR_ERR(tp); goto out_drop_write; diff --git a/include/dt-bindings/mailbox/qcom-ipcc.h b/include/dt-bindings/mailbox/qcom-ipcc.h new file mode 100644 index 000000000000..4c23eefed5f3 --- /dev/null +++ b/include/dt-bindings/mailbox/qcom-ipcc.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + */ + +#ifndef __DT_BINDINGS_MAILBOX_IPCC_H +#define __DT_BINDINGS_MAILBOX_IPCC_H + +/* Signal IDs for MPROC protocol */ +#define IPCC_MPROC_SIGNAL_GLINK_QMP 0 +#define IPCC_MPROC_SIGNAL_SMP2P 2 +#define IPCC_MPROC_SIGNAL_PING 3 + +/* Client IDs */ +#define IPCC_CLIENT_AOP 0 +#define IPCC_CLIENT_TZ 1 +#define IPCC_CLIENT_MPSS 2 +#define IPCC_CLIENT_LPASS 3 +#define IPCC_CLIENT_SLPI 4 +#define IPCC_CLIENT_SDC 5 +#define IPCC_CLIENT_CDSP 6 +#define IPCC_CLIENT_NPU 7 +#define IPCC_CLIENT_APSS 8 +#define IPCC_CLIENT_GPU 9 +#define IPCC_CLIENT_CVP 10 +#define IPCC_CLIENT_CAM 11 +#define IPCC_CLIENT_VPU 12 +#define IPCC_CLIENT_PCIE0 13 +#define IPCC_CLIENT_PCIE1 14 +#define IPCC_CLIENT_PCIE2 15 +#define IPCC_CLIENT_SPSS 16 + +#endif diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8377afef8806..191772d4a4d7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -102,6 +102,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, + CPUHP_AP_IRQ_RISCV_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, diff --git a/include/linux/fs.h b/include/linux/fs.h index 19ef6c88c152..8e1f8f93108f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1048,6 +1048,7 @@ struct lock_manager_operations { bool (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); + bool (*lm_breaker_owns_lease)(struct file_lock *); }; struct lock_manager { diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..65b81e0c494d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -5,6 +5,8 @@ #include <linux/err.h> #include <linux/sched.h> +struct mm_struct; + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -57,6 +59,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool __kthread_should_park(struct task_struct *k); bool kthread_freezable_should_stop(bool *was_frozen); +void *kthread_func(struct task_struct *k); void *kthread_data(struct task_struct *k); void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); @@ -198,6 +201,9 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +void kthread_use_mm(struct mm_struct *mm); +void kthread_unuse_mm(struct mm_struct *mm); + struct cgroup_subsys_state; #ifdef CONFIG_BLK_CGROUP diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index d9a543a9e1cc..c51a84132d7c 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -4,11 +4,6 @@ #include <asm/mmu_context.h> -struct mm_struct; - -void use_mm(struct mm_struct *mm); -void unuse_mm(struct mm_struct *mm); - /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index df1f08486d81..c4c37fd12104 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -660,9 +660,21 @@ struct deferred_split { * per-zone basis. */ typedef struct pglist_data { + /* + * node_zones contains just the zones for THIS node. Not all of the + * zones may be populated, but it is the full list. It is referenced by + * this node's node_zonelists as well as other node's node_zonelists. + */ struct zone node_zones[MAX_NR_ZONES]; + + /* + * node_zonelists contains references to all zones in all nodes. + * Generally the first zones will be references to this node's + * node_zones. + */ struct zonelist node_zonelists[MAX_ZONELISTS]; - int nr_zones; + + int nr_zones; /* number of populated zones in this node */ #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; #ifdef CONFIG_PAGE_EXTENSION diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 82d8fb422092..4dba3c948932 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -38,7 +38,7 @@ struct nfs4_ace { struct nfs4_acl { uint32_t naces; - struct nfs4_ace aces[0]; + struct nfs4_ace aces[]; }; #define NFS4_MAXLABELLEN 2048 @@ -295,7 +295,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_NOFILEHANDLE: case NFS4ERR_MOVED: return false; - }; + } return true; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 73eda45f1cfd..6ee9119acc5d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -230,6 +230,7 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ #define NFS_INO_DATA_INVAL_DEFER \ BIT(13) /* Deferred cache invalidation */ +#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e5f3e7d8d3d5..5fd0a9ef425f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1227,7 +1227,7 @@ struct nfs4_secinfo4 { struct nfs4_secinfo_flavors { unsigned int num_flavors; - struct nfs4_secinfo4 flavors[0]; + struct nfs4_secinfo4 flavors[]; }; struct nfs4_secinfo_arg { diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 83bd8cb475d7..b7af8cc13eda 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -64,7 +64,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, struct stack_trace { unsigned int nr_entries, max_entries; unsigned long *entries; - int skip; /* input argument: How many entries to skip */ + unsigned int skip; /* input argument: How many entries to skip */ }; extern void save_stack_trace(struct stack_trace *trace); diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4f6b28487f28..98da816b5fc2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -76,7 +76,7 @@ struct rpc_auth { unsigned int au_verfsize; /* size of reply verifier */ unsigned int au_ralign; /* words before UL header */ - unsigned int au_flags; + unsigned long au_flags; const struct rpc_authops *au_ops; rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in @@ -89,7 +89,8 @@ struct rpc_auth { }; /* rpc_auth au_flags */ -#define RPCAUTH_AUTH_DATATOUCH 0x00000002 +#define RPCAUTH_AUTH_DATATOUCH (1) +#define RPCAUTH_AUTH_UPDATE_SLACK (2) struct rpc_auth_create_args { rpc_authflavor_t pseudoflavor; diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bc07e51f20d1..bf4ac8a0268c 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -84,6 +84,7 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + struct auth_domain *domain; bool datatouch; }; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd390894a584..386628b36bc7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ @@ -299,6 +300,7 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + void ** rq_lease_breaker; /* The v4 client breaking a lease */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cbcfbd0521e3..7ed82625dc0b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,7 +48,6 @@ #include <linux/sunrpc/rpc_rdma.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> -#define SVCRDMA_DEBUG /* Default and maximum inline threshold sizes */ enum { @@ -160,9 +159,8 @@ struct svc_rdma_send_ctxt { }; /* svc_rdma_backchannel.c */ -extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - __be32 *rdma_resp, - struct xdr_buf *rcvbuf); +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 9e1e046de176..aca35ab5cff2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u return 0; } +static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) +{ + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index ca39a388dc22..f09c82b0a7ae 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -20,7 +20,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); -int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor, + char *name); u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 771baadaee9d..b7ac7fe68306 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,7 +28,7 @@ struct svc_sock { /* private TCP part */ /* On-the-wire fragment header: */ - __be32 sk_reclen; + __be32 sk_marker; /* As we receive a record, this includes the length received so * far (including the fragment header): */ u32 sk_tcplen; @@ -41,12 +41,12 @@ struct svc_sock { static inline u32 svc_sock_reclen(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; + return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK; } static inline u32 svc_sock_final_rec(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; + return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT; } /* diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 81b43f5bdf23..1257f26bb887 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -261,9 +261,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio, error), + TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -277,7 +277,7 @@ TRACE_EVENT(block_bio_complete, __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - __entry->error = error; + __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 32d88c4fb063..b9b51a4b1db1 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -17,6 +17,16 @@ ** GSS-API related trace events **/ +TRACE_DEFINE_ENUM(RPC_GSS_SVC_NONE); +TRACE_DEFINE_ENUM(RPC_GSS_SVC_INTEGRITY); +TRACE_DEFINE_ENUM(RPC_GSS_SVC_PRIVACY); + +#define show_gss_service(x) \ + __print_symbolic(x, \ + { RPC_GSS_SVC_NONE, "none" }, \ + { RPC_GSS_SVC_INTEGRITY, "integrity" }, \ + { RPC_GSS_SVC_PRIVACY, "privacy" }) + TRACE_DEFINE_ENUM(GSS_S_BAD_MECH); TRACE_DEFINE_ENUM(GSS_S_BAD_NAME); TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE); @@ -126,6 +136,40 @@ DEFINE_GSSAPI_EVENT(verify_mic); DEFINE_GSSAPI_EVENT(wrap); DEFINE_GSSAPI_EVENT(unwrap); +DECLARE_EVENT_CLASS(rpcgss_ctx_class, + TP_PROTO( + const struct gss_cred *gc + ), + + TP_ARGS(gc), + + TP_STRUCT__entry( + __field(const void *, cred) + __field(unsigned long, service) + __string(principal, gc->gc_principal) + ), + + TP_fast_assign( + __entry->cred = gc; + __entry->service = gc->gc_service; + __assign_str(principal, gc->gc_principal) + ), + + TP_printk("cred=%p service=%s principal='%s'", + __entry->cred, show_gss_service(__entry->service), + __get_str(principal)) +); + +#define DEFINE_CTX_EVENT(name) \ + DEFINE_EVENT(rpcgss_ctx_class, rpcgss_ctx_##name, \ + TP_PROTO( \ + const struct gss_cred *gc \ + ), \ + TP_ARGS(gc)) + +DEFINE_CTX_EVENT(init); +DEFINE_CTX_EVENT(destroy); + TRACE_EVENT(rpcgss_svc_accept_upcall, TP_PROTO( __be32 xid, @@ -291,6 +335,40 @@ TRACE_EVENT(rpcgss_need_reencode, __entry->ret ? "" : "un") ); +TRACE_EVENT(rpcgss_update_slack, + TP_PROTO( + const struct rpc_task *task, + const struct rpc_auth *auth + ), + + TP_ARGS(task, auth), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(const void *, auth) + __field(unsigned int, rslack) + __field(unsigned int, ralign) + __field(unsigned int, verfsize) + ), + + TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); + __entry->auth = auth; + __entry->rslack = auth->au_rslack; + __entry->ralign = auth->au_ralign; + __entry->verfsize = auth->au_verfsize; + ), + + TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n", + __entry->task_id, __entry->client_id, __entry->xid, + __entry->auth, __entry->rslack, __entry->ralign, + __entry->verfsize) +); + DECLARE_EVENT_CLASS(rpcgss_svc_seqno_class, TP_PROTO( __be32 xid, @@ -371,6 +449,7 @@ TRACE_EVENT(rpcgss_upcall_result, TRACE_EVENT(rpcgss_context, TP_PROTO( + u32 window_size, unsigned long expiry, unsigned long now, unsigned int timeout, @@ -378,12 +457,13 @@ TRACE_EVENT(rpcgss_context, const u8 *data ), - TP_ARGS(expiry, now, timeout, len, data), + TP_ARGS(window_size, expiry, now, timeout, len, data), TP_STRUCT__entry( __field(unsigned long, expiry) __field(unsigned long, now) __field(unsigned int, timeout) + __field(u32, window_size) __field(int, len) __string(acceptor, data) ), @@ -392,13 +472,14 @@ TRACE_EVENT(rpcgss_context, __entry->expiry = expiry; __entry->now = now; __entry->timeout = timeout; + __entry->window_size = window_size; __entry->len = len; strncpy(__get_str(acceptor), data, len); ), - TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s", - __entry->expiry, __entry->now, __entry->timeout, - __entry->len, __get_str(acceptor)) + TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s", + __entry->window_size, __entry->expiry, __entry->now, + __entry->timeout, __entry->len, __get_str(acceptor)) ); diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 132c3c778a43..0f05a6e2b9cb 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -380,12 +380,8 @@ TRACE_EVENT(xprtrdma_inline_thresh, DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); -DEFINE_CONN_EVENT(flush_dct); -DEFINE_RXPRT_EVENT(xprtrdma_create); -DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, @@ -1279,38 +1275,42 @@ TRACE_EVENT(xprtrdma_leaked_rep, ** Server-side RPC/RDMA events **/ -DECLARE_EVENT_CLASS(svcrdma_xprt_event, +DECLARE_EVENT_CLASS(svcrdma_accept_class, TP_PROTO( - const struct svc_xprt *xprt + const struct svcxprt_rdma *rdma, + long status ), - TP_ARGS(xprt), + TP_ARGS(rdma, status), TP_STRUCT__entry( - __field(const void *, xprt) - __string(addr, xprt->xpt_remotebuf) + __field(long, status) + __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; - __assign_str(addr, xprt->xpt_remotebuf); + __entry->status = status; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s", - __entry->xprt, __get_str(addr) + TP_printk("addr=%s status=%ld", + __get_str(addr), __entry->status ) ); -#define DEFINE_XPRT_EVENT(name) \ - DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \ - TP_PROTO( \ - const struct svc_xprt *xprt \ - ), \ - TP_ARGS(xprt)) +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcrdma_accept_class, svcrdma_##name##_err, \ + TP_PROTO( \ + const struct svcxprt_rdma *rdma, \ + long status \ + ), \ + TP_ARGS(rdma, status)) -DEFINE_XPRT_EVENT(accept); -DEFINE_XPRT_EVENT(fail); -DEFINE_XPRT_EVENT(free); +DEFINE_ACCEPT_EVENT(pd); +DEFINE_ACCEPT_EVENT(qp); +DEFINE_ACCEPT_EVENT(fabric); +DEFINE_ACCEPT_EVENT(initdepth); +DEFINE_ACCEPT_EVENT(accept); TRACE_DEFINE_ENUM(RDMA_MSG); TRACE_DEFINE_ENUM(RDMA_NOMSG); @@ -1355,7 +1355,7 @@ TRACE_EVENT(svcrdma_decode_rqst, show_rpcrdma_proc(__entry->proc), __entry->hdrlen) ); -TRACE_EVENT(svcrdma_decode_short, +TRACE_EVENT(svcrdma_decode_short_err, TP_PROTO( unsigned int hdrlen ), @@ -1399,7 +1399,8 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event, ); #define DEFINE_BADREQ_EVENT(name) \ - DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\ + DEFINE_EVENT(svcrdma_badreq_event, \ + svcrdma_decode_##name##_err, \ TP_PROTO( \ __be32 *p \ ), \ @@ -1583,28 +1584,117 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class, DEFINE_SVC_DMA_EVENT(dma_map_page); DEFINE_SVC_DMA_EVENT(dma_unmap_page); -TRACE_EVENT(svcrdma_dma_map_rwctx, +TRACE_EVENT(svcrdma_dma_map_rw_err, TP_PROTO( const struct svcxprt_rdma *rdma, + unsigned int nents, int status ), - TP_ARGS(rdma, status), + TP_ARGS(rdma, nents, status), TP_STRUCT__entry( __field(int, status) + __field(unsigned int, nents) __string(device, rdma->sc_cm_id->device->name) __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( __entry->status = status; + __entry->nents = nents; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s nents=%u status=%d", + __get_str(addr), __get_str(device), __entry->nents, + __entry->status + ) +); + +TRACE_EVENT(svcrdma_no_rwctx_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int num_sges + ), + + TP_ARGS(rdma, num_sges), + + TP_STRUCT__entry( + __field(unsigned int, num_sges) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->num_sges = num_sges; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s num_sges=%d", + __get_str(addr), __get_str(device), __entry->num_sges + ) +); + +TRACE_EVENT(svcrdma_page_overrun_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + const struct svc_rqst *rqst, + unsigned int pageno + ), + + TP_ARGS(rdma, rqst, pageno), + + TP_STRUCT__entry( + __field(unsigned int, pageno) + __field(u32, xid) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->pageno = pageno; + __entry->xid = __be32_to_cpu(rqst->rq_xid); + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s xid=0x%08x pageno=%u", __get_str(addr), + __get_str(device), __entry->xid, __entry->pageno + ) +); + +TRACE_EVENT(svcrdma_small_wrch_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int remaining, + unsigned int seg_no, + unsigned int num_segs + ), + + TP_ARGS(rdma, remaining, seg_no, num_segs), + + TP_STRUCT__entry( + __field(unsigned int, remaining) + __field(unsigned int, seg_no) + __field(unsigned int, num_segs) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->remaining = remaining; + __entry->seg_no = seg_no; + __entry->num_segs = num_segs; __assign_str(device, rdma->sc_cm_id->device->name); __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s status=%d", - __get_str(addr), __get_str(device), __entry->status + TP_printk("addr=%s device=%s remaining=%u seg_no=%u num_segs=%u", + __get_str(addr), __get_str(device), __entry->remaining, + __entry->seg_no, __entry->num_segs ) ); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffd2215950dc..6a12935b8b14 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,14 +14,50 @@ #include <linux/net.h> #include <linux/tracepoint.h> -DECLARE_EVENT_CLASS(xdr_buf_class, +TRACE_DEFINE_ENUM(SOCK_STREAM); +TRACE_DEFINE_ENUM(SOCK_DGRAM); +TRACE_DEFINE_ENUM(SOCK_RAW); +TRACE_DEFINE_ENUM(SOCK_RDM); +TRACE_DEFINE_ENUM(SOCK_SEQPACKET); +TRACE_DEFINE_ENUM(SOCK_DCCP); +TRACE_DEFINE_ENUM(SOCK_PACKET); + +#define show_socket_type(type) \ + __print_symbolic(type, \ + { SOCK_STREAM, "STREAM" }, \ + { SOCK_DGRAM, "DGRAM" }, \ + { SOCK_RAW, "RAW" }, \ + { SOCK_RDM, "RDM" }, \ + { SOCK_SEQPACKET, "SEQPACKET" }, \ + { SOCK_DCCP, "DCCP" }, \ + { SOCK_PACKET, "PACKET" }) + +/* This list is known to be incomplete, add new enums as needed. */ +TRACE_DEFINE_ENUM(AF_UNSPEC); +TRACE_DEFINE_ENUM(AF_UNIX); +TRACE_DEFINE_ENUM(AF_LOCAL); +TRACE_DEFINE_ENUM(AF_INET); +TRACE_DEFINE_ENUM(AF_INET6); + +#define rpc_show_address_family(family) \ + __print_symbolic(family, \ + { AF_UNSPEC, "AF_UNSPEC" }, \ + { AF_UNIX, "AF_UNIX" }, \ + { AF_LOCAL, "AF_LOCAL" }, \ + { AF_INET, "AF_INET" }, \ + { AF_INET6, "AF_INET6" }) + +DECLARE_EVENT_CLASS(rpc_xdr_buf_class, TP_PROTO( + const struct rpc_task *task, const struct xdr_buf *xdr ), - TP_ARGS(xdr), + TP_ARGS(task, xdr), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(const void *, head_base) __field(size_t, head_len) __field(const void *, tail_base) @@ -31,6 +67,8 @@ DECLARE_EVENT_CLASS(xdr_buf_class, ), TP_fast_assign( + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; @@ -39,23 +77,137 @@ DECLARE_EVENT_CLASS(xdr_buf_class, __entry->msg_len = xdr->len; ), - TP_printk("head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + __entry->task_id, __entry->client_id, __entry->head_base, __entry->head_len, __entry->page_len, __entry->tail_base, __entry->tail_len, __entry->msg_len ) ); -#define DEFINE_XDRBUF_EVENT(name) \ - DEFINE_EVENT(xdr_buf_class, name, \ +#define DEFINE_RPCXDRBUF_EVENT(name) \ + DEFINE_EVENT(rpc_xdr_buf_class, \ + rpc_xdr_##name, \ TP_PROTO( \ + const struct rpc_task *task, \ const struct xdr_buf *xdr \ ), \ - TP_ARGS(xdr)) + TP_ARGS(task, xdr)) + +DEFINE_RPCXDRBUF_EVENT(sendto); +DEFINE_RPCXDRBUF_EVENT(recvfrom); +DEFINE_RPCXDRBUF_EVENT(reply_pages); + + +DECLARE_EVENT_CLASS(rpc_clnt_class, + TP_PROTO( + const struct rpc_clnt *clnt + ), + + TP_ARGS(clnt), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + ), + + TP_printk("clid=%u", __entry->client_id) +); + +#define DEFINE_RPC_CLNT_EVENT(name) \ + DEFINE_EVENT(rpc_clnt_class, \ + rpc_clnt_##name, \ + TP_PROTO( \ + const struct rpc_clnt *clnt \ + ), \ + TP_ARGS(clnt)) + +DEFINE_RPC_CLNT_EVENT(free); +DEFINE_RPC_CLNT_EVENT(killall); +DEFINE_RPC_CLNT_EVENT(shutdown); +DEFINE_RPC_CLNT_EVENT(release); +DEFINE_RPC_CLNT_EVENT(replace_xprt); +DEFINE_RPC_CLNT_EVENT(replace_xprt_err); + +TRACE_EVENT(rpc_clnt_new, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_xprt *xprt, + const char *program, + const char *server + ), + + TP_ARGS(clnt, xprt, program, server), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + __string(program, program) + __string(server, server) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + __assign_str(program, program) + __assign_str(server, server) + ), + + TP_printk("client=%u peer=[%s]:%s program=%s server=%s", + __entry->client_id, __get_str(addr), __get_str(port), + __get_str(program), __get_str(server)) +); + +TRACE_EVENT(rpc_clnt_new_err, + TP_PROTO( + const char *program, + const char *server, + int error + ), + + TP_ARGS(program, server, error), + + TP_STRUCT__entry( + __field(int, error) + __string(program, program) + __string(server, server) + ), + + TP_fast_assign( + __entry->error = error; + __assign_str(program, program) + __assign_str(server, server) + ), + + TP_printk("program=%s server=%s error=%d", + __get_str(program), __get_str(server), __entry->error) +); + +TRACE_EVENT(rpc_clnt_clone_err, + TP_PROTO( + const struct rpc_clnt *clnt, + int error + ), + + TP_ARGS(clnt, error), + + TP_STRUCT__entry( + __field(unsigned int, client_id) + __field(int, error) + ), + + TP_fast_assign( + __entry->client_id = clnt->cl_clid; + __entry->error = error; + ), + + TP_printk("client=%u error=%d", __entry->client_id, __entry->error) +); -DEFINE_XDRBUF_EVENT(xprt_sendto); -DEFINE_XDRBUF_EVENT(xprt_recvfrom); -DEFINE_XDRBUF_EVENT(svc_recvfrom); -DEFINE_XDRBUF_EVENT(svc_sendto); TRACE_DEFINE_ENUM(RPC_AUTH_OK); TRACE_DEFINE_ENUM(RPC_AUTH_BADCRED); @@ -142,29 +294,35 @@ TRACE_EVENT(rpc_request, TRACE_DEFINE_ENUM(RPC_TASK_ASYNC); TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER); +TRACE_DEFINE_ENUM(RPC_TASK_NULLCREDS); TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN); TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS); TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC); +TRACE_DEFINE_ENUM(RPC_TASK_NO_ROUND_ROBIN); TRACE_DEFINE_ENUM(RPC_TASK_SOFT); TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN); TRACE_DEFINE_ENUM(RPC_TASK_SENT); TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT); TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT); TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT); +TRACE_DEFINE_ENUM(RPC_TASK_CRED_NOREF); #define rpc_show_task_flags(flags) \ __print_flags(flags, "|", \ { RPC_TASK_ASYNC, "ASYNC" }, \ { RPC_TASK_SWAPPER, "SWAPPER" }, \ + { RPC_TASK_NULLCREDS, "NULLCREDS" }, \ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \ { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \ + { RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \ { RPC_TASK_SOFT, "SOFT" }, \ { RPC_TASK_SOFTCONN, "SOFTCONN" }, \ { RPC_TASK_SENT, "SENT" }, \ { RPC_TASK_TIMEOUT, "TIMEOUT" }, \ { RPC_TASK_NOCONNECT, "NOCONNECT" }, \ - { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" }) + { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" }, \ + { RPC_TASK_CRED_NOREF, "CRED_NOREF" }) TRACE_DEFINE_ENUM(RPC_TASK_RUNNING); TRACE_DEFINE_ENUM(RPC_TASK_QUEUED); @@ -359,6 +517,34 @@ DEFINE_RPC_REPLY_EVENT(stale_creds); DEFINE_RPC_REPLY_EVENT(bad_creds); DEFINE_RPC_REPLY_EVENT(auth_tooweak); +TRACE_EVENT(rpc_call_rpcerror, + TP_PROTO( + const struct rpc_task *task, + int tk_status, + int rpc_status + ), + + TP_ARGS(task, tk_status, rpc_status), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(int, tk_status) + __field(int, rpc_status) + ), + + TP_fast_assign( + __entry->client_id = task->tk_client->cl_clid; + __entry->task_id = task->tk_pid; + __entry->tk_status = tk_status; + __entry->rpc_status = rpc_status; + ), + + TP_printk("task:%u@%u tk_status=%d rpc_status=%d", + __entry->task_id, __entry->client_id, + __entry->tk_status, __entry->rpc_status) +); + TRACE_EVENT(rpc_stats_latency, TP_PROTO( @@ -526,43 +712,6 @@ TRACE_EVENT(rpc_xdr_alignment, ) ); -TRACE_EVENT(rpc_reply_pages, - TP_PROTO( - const struct rpc_rqst *req - ), - - TP_ARGS(req), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, head_base) - __field(size_t, head_len) - __field(const void *, tail_base) - __field(size_t, tail_len) - __field(unsigned int, page_len) - ), - - TP_fast_assign( - __entry->task_id = req->rq_task->tk_pid; - __entry->client_id = req->rq_task->tk_client->cl_clid; - - __entry->head_base = req->rq_rcv_buf.head[0].iov_base; - __entry->head_len = req->rq_rcv_buf.head[0].iov_len; - __entry->page_len = req->rq_rcv_buf.page_len; - __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base; - __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len; - ), - - TP_printk( - "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n", - __entry->task_id, __entry->client_id, - __entry->head_base, __entry->head_len, - __entry->page_len, - __entry->tail_base, __entry->tail_len - ) -); - /* * First define the enums in the below macros to be exported to userspace * via TRACE_DEFINE_ENUM(). @@ -575,9 +724,9 @@ TRACE_EVENT(rpc_reply_pages, #define RPC_SHOW_SOCKET \ EM( SS_FREE, "FREE" ) \ EM( SS_UNCONNECTED, "UNCONNECTED" ) \ - EM( SS_CONNECTING, "CONNECTING," ) \ - EM( SS_CONNECTED, "CONNECTED," ) \ - EMe(SS_DISCONNECTING, "DISCONNECTING" ) + EM( SS_CONNECTING, "CONNECTING" ) \ + EM( SS_CONNECTED, "CONNECTED" ) \ + EMe( SS_DISCONNECTING, "DISCONNECTING" ) #define rpc_show_socket_state(state) \ __print_symbolic(state, RPC_SHOW_SOCKET) @@ -719,6 +868,69 @@ DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection); DEFINE_RPC_SOCKET_EVENT(rpc_socket_close); DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown); +TRACE_DEFINE_ENUM(XPRT_LOCKED); +TRACE_DEFINE_ENUM(XPRT_CONNECTED); +TRACE_DEFINE_ENUM(XPRT_CONNECTING); +TRACE_DEFINE_ENUM(XPRT_CLOSE_WAIT); +TRACE_DEFINE_ENUM(XPRT_BOUND); +TRACE_DEFINE_ENUM(XPRT_BINDING); +TRACE_DEFINE_ENUM(XPRT_CLOSING); +TRACE_DEFINE_ENUM(XPRT_CONGESTED); +TRACE_DEFINE_ENUM(XPRT_CWND_WAIT); +TRACE_DEFINE_ENUM(XPRT_WRITE_SPACE); + +#define rpc_show_xprt_state(x) \ + __print_flags(x, "|", \ + { (1UL << XPRT_LOCKED), "LOCKED"}, \ + { (1UL << XPRT_CONNECTED), "CONNECTED"}, \ + { (1UL << XPRT_CONNECTING), "CONNECTING"}, \ + { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \ + { (1UL << XPRT_BOUND), "BOUND"}, \ + { (1UL << XPRT_BINDING), "BINDING"}, \ + { (1UL << XPRT_CLOSING), "CLOSING"}, \ + { (1UL << XPRT_CONGESTED), "CONGESTED"}, \ + { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \ + { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"}) + +DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, + TP_PROTO( + const struct rpc_xprt *xprt + ), + + TP_ARGS(xprt), + + TP_STRUCT__entry( + __field(unsigned long, state) + __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) + __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) + ), + + TP_fast_assign( + __entry->state = xprt->state; + __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); + __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); + ), + + TP_printk("peer=[%s]:%s state=%s", + __get_str(addr), __get_str(port), + rpc_show_xprt_state(__entry->state)) +); + +#define DEFINE_RPC_XPRT_LIFETIME_EVENT(name) \ + DEFINE_EVENT(rpc_xprt_lifetime_class, \ + xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_RPC_XPRT_LIFETIME_EVENT(create); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_auto); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_done); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_force); +DEFINE_RPC_XPRT_LIFETIME_EVENT(disconnect_cleanup); +DEFINE_RPC_XPRT_LIFETIME_EVENT(destroy); + DECLARE_EVENT_CLASS(rpc_xprt_event, TP_PROTO( const struct rpc_xprt *xprt, @@ -990,6 +1202,54 @@ TRACE_EVENT(xs_stream_read_request, __entry->copied, __entry->reclen, __entry->offset) ); + +DECLARE_EVENT_CLASS(svc_xdr_buf_class, + TP_PROTO( + const struct svc_rqst *rqst, + const struct xdr_buf *xdr + ), + + TP_ARGS(rqst, xdr), + + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, msg_len) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->head_base = xdr->head[0].iov_base; + __entry->head_len = xdr->head[0].iov_len; + __entry->tail_base = xdr->tail[0].iov_base; + __entry->tail_len = xdr->tail[0].iov_len; + __entry->page_len = xdr->page_len; + __entry->msg_len = xdr->len; + ), + + TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + __entry->xid, + __entry->head_base, __entry->head_len, __entry->page_len, + __entry->tail_base, __entry->tail_len, __entry->msg_len + ) +); + +#define DEFINE_SVCXDRBUF_EVENT(name) \ + DEFINE_EVENT(svc_xdr_buf_class, \ + svc_xdr_##name, \ + TP_PROTO( \ + const struct svc_rqst *rqst, \ + const struct xdr_buf *xdr \ + ), \ + TP_ARGS(rqst, xdr)) + +DEFINE_SVCXDRBUF_EVENT(recvfrom); +DEFINE_SVCXDRBUF_EVENT(sendto); + #define show_rqstp_flags(flags) \ __print_flags(flags, "|", \ { (1UL << RQ_SECURE), "RQ_SECURE"}, \ @@ -1024,6 +1284,17 @@ TRACE_EVENT(svc_recv, show_rqstp_flags(__entry->flags)) ); +TRACE_DEFINE_ENUM(SVC_GARBAGE); +TRACE_DEFINE_ENUM(SVC_SYSERR); +TRACE_DEFINE_ENUM(SVC_VALID); +TRACE_DEFINE_ENUM(SVC_NEGATIVE); +TRACE_DEFINE_ENUM(SVC_OK); +TRACE_DEFINE_ENUM(SVC_DROP); +TRACE_DEFINE_ENUM(SVC_CLOSE); +TRACE_DEFINE_ENUM(SVC_DENIED); +TRACE_DEFINE_ENUM(SVC_PENDING); +TRACE_DEFINE_ENUM(SVC_COMPLETE); + #define svc_show_status(status) \ __print_symbolic(status, \ { SVC_GARBAGE, "SVC_GARBAGE" }, \ @@ -1167,28 +1438,54 @@ DEFINE_EVENT(svc_rqst_status, svc_send, { (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \ { (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"}) +TRACE_EVENT(svc_xprt_create_err, + TP_PROTO( + const char *program, + const char *protocol, + struct sockaddr *sap, + const struct svc_xprt *xprt + ), + + TP_ARGS(program, protocol, sap, xprt), + + TP_STRUCT__entry( + __field(long, error) + __string(program, program) + __string(protocol, protocol) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->error = PTR_ERR(xprt); + __assign_str(program, program); + __assign_str(protocol, protocol); + memcpy(__entry->addr, sap, sizeof(__entry->addr)); + ), + + TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", + __entry->addr, __get_str(program), __get_str(protocol), + __entry->error) +); + TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), TP_ARGS(xprt, rqst), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(int, pid) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->pid = rqst? rqst->rq_task->pid : 0; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s pid=%d flags=%s", - __entry->xprt, __get_str(addr), - __entry->pid, show_svc_xprt_flags(__entry->flags)) + TP_printk("addr=%s pid=%d flags=%s", __get_str(addr), + __entry->pid, show_svc_xprt_flags(__entry->flags)) ); DECLARE_EVENT_CLASS(svc_xprt_event, @@ -1197,25 +1494,55 @@ DECLARE_EVENT_CLASS(svc_xprt_event, TP_ARGS(xprt), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s flags=%s", - __entry->xprt, __get_str(addr), - show_svc_xprt_flags(__entry->flags)) + TP_printk("addr=%s flags=%s", __get_str(addr), + show_svc_xprt_flags(__entry->flags)) ); -DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, - TP_PROTO(struct svc_xprt *xprt), - TP_ARGS(xprt)); +#define DEFINE_SVC_XPRT_EVENT(name) \ + DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \ + TP_PROTO( \ + struct svc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_SVC_XPRT_EVENT(no_write_space); +DEFINE_SVC_XPRT_EVENT(close); +DEFINE_SVC_XPRT_EVENT(detach); +DEFINE_SVC_XPRT_EVENT(free); + +TRACE_EVENT(svc_xprt_accept, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service + ), + + TP_ARGS(xprt, service), + + TP_STRUCT__entry( + __string(addr, xprt->xpt_remotebuf) + __string(protocol, xprt->xpt_class->xcl_name) + __string(service, service) + ), + + TP_fast_assign( + __assign_str(addr, xprt->xpt_remotebuf); + __assign_str(protocol, xprt->xpt_class->xcl_name) + __assign_str(service, service); + ), + + TP_printk("addr=%s protocol=%s service=%s", + __get_str(addr), __get_str(protocol), __get_str(service) + ) +); TRACE_EVENT(svc_xprt_dequeue, TP_PROTO(struct svc_rqst *rqst), @@ -1223,24 +1550,20 @@ TRACE_EVENT(svc_xprt_dequeue, TP_ARGS(rqst), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(unsigned long, flags) __field(unsigned long, wakeup) __string(addr, rqst->rq_xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = rqst->rq_xprt; __entry->flags = rqst->rq_xprt->xpt_flags; __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), rqst->rq_qtime)); __assign_str(addr, rqst->rq_xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu", - __entry->xprt, __get_str(addr), - show_svc_xprt_flags(__entry->flags), - __entry->wakeup) + TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr), + show_svc_xprt_flags(__entry->flags), __entry->wakeup) ); TRACE_EVENT(svc_wake_up, @@ -1265,21 +1588,18 @@ TRACE_EVENT(svc_handle_xprt, TP_ARGS(xprt, len), TP_STRUCT__entry( - __field(struct svc_xprt *, xprt) __field(int, len) __field(unsigned long, flags) __string(addr, xprt->xpt_remotebuf) ), TP_fast_assign( - __entry->xprt = xprt; __entry->len = len; __entry->flags = xprt->xpt_flags; __assign_str(addr, xprt->xpt_remotebuf); ), - TP_printk("xprt=%p addr=%s len=%d flags=%s", - __entry->xprt, __get_str(addr), + TP_printk("addr=%s len=%d flags=%s", __get_str(addr), __entry->len, show_svc_xprt_flags(__entry->flags)) ); @@ -1313,27 +1633,221 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_ARGS(dr), TP_STRUCT__entry( + __field(const void *, dr) __field(u32, xid) __string(addr, dr->xprt->xpt_remotebuf) ), TP_fast_assign( + __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); __assign_str(addr, dr->xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) + TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + __entry->xid) ); + #define DEFINE_SVC_DEFERRED_EVENT(name) \ - DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \ + DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \ TP_PROTO( \ const struct svc_deferred_req *dr \ ), \ TP_ARGS(dr)) DEFINE_SVC_DEFERRED_EVENT(drop); -DEFINE_SVC_DEFERRED_EVENT(revisit); +DEFINE_SVC_DEFERRED_EVENT(queue); +DEFINE_SVC_DEFERRED_EVENT(recv); + +TRACE_EVENT(svcsock_new_socket, + TP_PROTO( + const struct socket *socket + ), + + TP_ARGS(socket), + + TP_STRUCT__entry( + __field(unsigned long, type) + __field(unsigned long, family) + __field(bool, listener) + ), + + TP_fast_assign( + __entry->type = socket->type; + __entry->family = socket->sk->sk_family; + __entry->listener = (socket->sk->sk_state == TCP_LISTEN); + ), + + TP_printk("type=%s family=%s%s", + show_socket_type(__entry->type), + rpc_show_address_family(__entry->family), + __entry->listener ? " (listener)" : "" + ) +); + +TRACE_EVENT(svcsock_marker, + TP_PROTO( + const struct svc_xprt *xprt, + __be32 marker + ), + + TP_ARGS(xprt, marker), + + TP_STRUCT__entry( + __field(unsigned int, length) + __field(bool, last) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->length = be32_to_cpu(marker) & RPC_FRAGMENT_SIZE_MASK; + __entry->last = be32_to_cpu(marker) & RPC_LAST_STREAM_FRAGMENT; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s length=%u%s", __get_str(addr), + __entry->length, __entry->last ? " (last)" : "") +); + +DECLARE_EVENT_CLASS(svcsock_class, + TP_PROTO( + const struct svc_xprt *xprt, + ssize_t result + ), + + TP_ARGS(xprt, result), + + TP_STRUCT__entry( + __field(ssize_t, result) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->result = result; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s result=%zd flags=%s", __get_str(addr), + __entry->result, show_svc_xprt_flags(__entry->flags) + ) +); + +#define DEFINE_SVCSOCK_EVENT(name) \ + DEFINE_EVENT(svcsock_class, svcsock_##name, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + ssize_t result \ + ), \ + TP_ARGS(xprt, result)) + +DEFINE_SVCSOCK_EVENT(udp_send); +DEFINE_SVCSOCK_EVENT(udp_recv); +DEFINE_SVCSOCK_EVENT(udp_recv_err); +DEFINE_SVCSOCK_EVENT(tcp_send); +DEFINE_SVCSOCK_EVENT(tcp_recv); +DEFINE_SVCSOCK_EVENT(tcp_recv_eagain); +DEFINE_SVCSOCK_EVENT(tcp_recv_err); +DEFINE_SVCSOCK_EVENT(data_ready); +DEFINE_SVCSOCK_EVENT(write_space); + +TRACE_EVENT(svcsock_tcp_recv_short, + TP_PROTO( + const struct svc_xprt *xprt, + u32 expected, + u32 received + ), + + TP_ARGS(xprt, expected, received), + + TP_STRUCT__entry( + __field(u32, expected) + __field(u32, received) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->expected = expected; + __entry->received = received; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s flags=%s expected=%u received=%u", + __get_str(addr), show_svc_xprt_flags(__entry->flags), + __entry->expected, __entry->received + ) +); + +TRACE_EVENT(svcsock_tcp_state, + TP_PROTO( + const struct svc_xprt *xprt, + const struct socket *socket + ), + + TP_ARGS(xprt, socket), + + TP_STRUCT__entry( + __field(unsigned long, socket_state) + __field(unsigned long, sock_state) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->socket_state = socket->state; + __entry->sock_state = socket->sk->sk_state; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s state=%s sk_state=%s flags=%s", __get_str(addr), + rpc_show_socket_state(__entry->socket_state), + rpc_show_sock_state(__entry->sock_state), + show_svc_xprt_flags(__entry->flags) + ) +); + +DECLARE_EVENT_CLASS(svcsock_accept_class, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service, + long status + ), + + TP_ARGS(xprt, service, status), + + TP_STRUCT__entry( + __field(long, status) + __string(service, service) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(service, service); + memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); + ), + + TP_printk("listener=%pISpc service=%s status=%ld", + __entry->addr, __get_str(service), __entry->status + ) +); + +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcsock_accept_class, svcsock_##name##_err, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + const char *service, \ + long status \ + ), \ + TP_ARGS(xprt, service, status)) + +DEFINE_ACCEPT_EVENT(accept); +DEFINE_ACCEPT_EVENT(getpeername); DECLARE_EVENT_CLASS(cache_event, TP_PROTO( @@ -1368,6 +1882,86 @@ DEFINE_CACHE_EVENT(cache_entry_update); DEFINE_CACHE_EVENT(cache_entry_make_negative); DEFINE_CACHE_EVENT(cache_entry_no_listener); +DECLARE_EVENT_CLASS(register_class, + TP_PROTO( + const char *program, + const u32 version, + const int family, + const unsigned short protocol, + const unsigned short port, + int error + ), + + TP_ARGS(program, version, family, protocol, port, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(unsigned long, family) + __field(unsigned short, protocol) + __field(unsigned short, port) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->family = family; + __entry->protocol = protocol; + __entry->port = port; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u proto=%s port=%u family=%s error=%d", + __get_str(program), __entry->version, + __entry->protocol == IPPROTO_UDP ? "udp" : "tcp", + __entry->port, rpc_show_address_family(__entry->family), + __entry->error + ) +); + +#define DEFINE_REGISTER_EVENT(name) \ + DEFINE_EVENT(register_class, svc_##name, \ + TP_PROTO( \ + const char *program, \ + const u32 version, \ + const int family, \ + const unsigned short protocol, \ + const unsigned short port, \ + int error \ + ), \ + TP_ARGS(program, version, family, protocol, \ + port, error)) + +DEFINE_REGISTER_EVENT(register); +DEFINE_REGISTER_EVENT(noregister); + +TRACE_EVENT(svc_unregister, + TP_PROTO( + const char *program, + const u32 version, + int error + ), + + TP_ARGS(program, version, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u error=%d", + __get_str(program), __entry->version, __entry->error + ) +); + #endif /* _TRACE_SUNRPC_H */ #include <trace/define_trace.h> diff --git a/kernel/kcov.c b/kernel/kcov.c index 55c5d883a93e..6afae0bcbac4 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -427,7 +427,8 @@ void kcov_task_exit(struct task_struct *t) * WARN_ON(!kcov->remote && kcov->t != t); * * For KCOV_REMOTE_ENABLE devices, the exiting task is either: - * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * + * 1. A remote task between kcov_remote_start() and kcov_remote_stop(). * In this case we should print a warning right away, since a task * shouldn't be exiting when it's in a kcov coverage collection * section. Here t points to the task that is collecting remote @@ -437,7 +438,7 @@ void kcov_task_exit(struct task_struct *t) * WARN_ON(kcov->remote && kcov->t != t); * * 2. The task that created kcov exiting without calling KCOV_DISABLE, - * and then again we can make sure that t->kcov->t == t: + * and then again we make sure that t->kcov->t == t: * WARN_ON(kcov->remote && kcov->t != t); * * By combining all three checks into one we get: @@ -764,7 +765,7 @@ static const struct file_operations kcov_fops = { * Internally, kcov_remote_start() looks up the kcov device associated with the * provided handle, allocates an area for coverage collection, and saves the * pointers to kcov and area into the current task_struct to allow coverage to - * be collected via __sanitizer_cov_trace_pc() + * be collected via __sanitizer_cov_trace_pc(). * In turns kcov_remote_stop() clears those pointers from task_struct to stop * collecting coverage and copies all collected coverage into the kcov area. */ @@ -972,16 +973,25 @@ void kcov_remote_stop(void) local_irq_restore(flags); return; } - kcov = t->kcov; - area = t->kcov_area; - size = t->kcov_size; - sequence = t->kcov_sequence; - + /* + * When in softirq, check if the corresponding kcov_remote_start() + * actually found the remote handle and started collecting coverage. + */ + if (in_serving_softirq() && !t->kcov_softirq) { + local_irq_restore(flags); + return; + } + /* Make sure that kcov_softirq is only set when in softirq. */ if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { local_irq_restore(flags); return; } + kcov = t->kcov; + area = t->kcov_area; + size = t->kcov_size; + sequence = t->kcov_sequence; + kcov_stop(t); if (in_serving_softirq()) { t->kcov_softirq = 0; diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..8e3d2d7fdf5e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1,13 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* Kernel thread helper functions. * Copyright (C) 2004 IBM Corporation, Rusty Russell. + * Copyright (C) 2009 Red Hat, Inc. * * Creation is done via kthreadd, so that we get a clean environment * even if we're invoked from userspace (think modprobe, hotplug cpu, * etc.). */ #include <uapi/linux/sched/types.h> +#include <linux/mm.h> +#include <linux/mmu_context.h> #include <linux/sched.h> +#include <linux/sched/mm.h> #include <linux/sched/task.h> #include <linux/kthread.h> #include <linux/completion.h> @@ -25,6 +29,7 @@ #include <linux/numa.h> #include <trace/events/sched.h> + static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; @@ -46,7 +51,9 @@ struct kthread_create_info struct kthread { unsigned long flags; unsigned int cpu; + int (*threadfn)(void *); void *data; + mm_segment_t oldfs; struct completion parked; struct completion exited; #ifdef CONFIG_BLK_CGROUP @@ -153,6 +160,20 @@ bool kthread_freezable_should_stop(bool *was_frozen) EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); /** + * kthread_func - return the function specified on kthread creation + * @task: kthread task in question + * + * Returns NULL if the task is not a kthread. + */ +void *kthread_func(struct task_struct *task) +{ + if (task->flags & PF_KTHREAD) + return to_kthread(task)->threadfn; + return NULL; +} +EXPORT_SYMBOL_GPL(kthread_func); + +/** * kthread_data - return data value specified on kthread creation * @task: kthread task in question * @@ -164,6 +185,7 @@ void *kthread_data(struct task_struct *task) { return to_kthread(task)->data; } +EXPORT_SYMBOL_GPL(kthread_data); /** * kthread_probe_data - speculative version of kthread_data() @@ -244,6 +266,7 @@ static int kthread(void *_create) do_exit(-ENOMEM); } + self->threadfn = threadfn; self->data = data; init_completion(&self->exited); init_completion(&self->parked); @@ -1203,6 +1226,61 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +/** + * kthread_use_mm - make the calling kthread operate on an address space + * @mm: address space to operate on + */ +void kthread_use_mm(struct mm_struct *mm) +{ + struct mm_struct *active_mm; + struct task_struct *tsk = current; + + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(tsk->mm); + + task_lock(tsk); + active_mm = tsk->active_mm; + if (active_mm != mm) { + mmgrab(mm); + tsk->active_mm = mm; + } + tsk->mm = mm; + switch_mm(active_mm, mm, tsk); + task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif + + if (active_mm != mm) + mmdrop(active_mm); + + to_kthread(tsk)->oldfs = get_fs(); + set_fs(USER_DS); +} +EXPORT_SYMBOL_GPL(kthread_use_mm); + +/** + * kthread_unuse_mm - reverse the effect of kthread_use_mm() + * @mm: address space to operate on + */ +void kthread_unuse_mm(struct mm_struct *mm) +{ + struct task_struct *tsk = current; + + WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); + WARN_ON_ONCE(!tsk->mm); + + set_fs(to_kthread(tsk)->oldfs); + + task_lock(tsk); + sync_mm_rss(mm); + tsk->mm = NULL; + /* active_mm is still 'mm' */ + enter_lazy_tlb(mm, tsk); + task_unlock(tsk); +} +EXPORT_SYMBOL_GPL(kthread_unuse_mm); + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/kernel/scs.c b/kernel/scs.c index 222a7a9ad543..5d4d9bbdec36 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -74,7 +74,7 @@ static void scs_check_usage(struct task_struct *tsk) for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) { if (!READ_ONCE_NOCHECK(*p)) break; - used++; + used += sizeof(*p); } while (used > curr) { diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7cb09c4cf21c..02441ead3c3b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) clocksource_arch_init(cs); -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE if (cs->vdso_clock_mode < 0 || cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) { pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n", cs->name, cs->vdso_clock_mode); cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; } -#endif /* Initialize mult/shift and max_idle_ns */ __clocksource_update_freq_scale(cs, scale, freq); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ea47f2084087..5773f0ba7e76 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -885,10 +885,10 @@ static void blk_add_trace_bio_bounce(void *ignore, } static void blk_add_trace_bio_complete(void *ignore, - struct request_queue *q, struct bio *bio, - int error) + struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, + blk_status_to_errno(bio->bi_status)); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -995,8 +995,10 @@ static void blk_add_trace_split(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, - BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), - &rpdu, blk_trace_bio_get_cgid(q, bio)); + BLK_TA_SPLIT, + blk_status_to_errno(bio->bi_status), + sizeof(rpdu), &rpdu, + blk_trace_bio_get_cgid(q, bio)); } rcu_read_unlock(); } @@ -1033,7 +1035,8 @@ static void blk_add_trace_bio_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, + bio_op(bio), bio->bi_opf, BLK_TA_REMAP, + blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } @@ -1253,21 +1256,10 @@ static inline __u16 t_error(const struct trace_entry *ent) static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg) { - const __u64 *val = pdu_start(ent, has_cg); + const __be64 *val = pdu_start(ent, has_cg); return be64_to_cpu(*val); } -static void get_pdu_remap(const struct trace_entry *ent, - struct blk_io_trace_remap *r, bool has_cg) -{ - const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); - __u64 sector_from = __r->sector_from; - - r->device_from = be32_to_cpu(__r->device_from); - r->device_to = be32_to_cpu(__r->device_to); - r->sector_from = be64_to_cpu(sector_from); -} - typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act, bool has_cg); @@ -1407,13 +1399,13 @@ static void blk_log_with_error(struct trace_seq *s, static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { - struct blk_io_trace_remap r = { .device_from = 0, }; + const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); - get_pdu_remap(ent, &r, has_cg); trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", t_sector(ent), t_sec(ent), - MAJOR(r.device_from), MINOR(r.device_from), - (unsigned long long)r.sector_from); + MAJOR(be32_to_cpu(__r->device_from)), + MINOR(be32_to_cpu(__r->device_from)), + be64_to_cpu(__r->sector_from)); } static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cb98741601bd..5beec9c833ce 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2052,15 +2052,15 @@ config TEST_LKM If unsure, say N. config TEST_BITOPS - tristate "Test module for compilation of clear_bit/set_bit operations" + tristate "Test module for compilation of bitops operations" depends on m help This builds the "test_bitops" module that is much like the TEST_LKM module except that it does a basic exercise of the - clear_bit and set_bit macros to make sure there are no compiler - warnings from C=1 sparse checker or -Wextra compilations. It has - no dependencies and doesn't run or load unless explicitly requested - by name. for example: modprobe test_bitops. + set/clear_bit macros and get_count_order/long to make sure there are + no compiler warnings from C=1 sparse checker or -Wextra + compilations. It has no dependencies and doesn't run or load unless + explicitly requested by name. for example: modprobe test_bitops. If unsure, say N. diff --git a/lib/bitmap.c b/lib/bitmap.c index 21a7640c5eed..0364452b1617 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -741,8 +741,9 @@ int bitmap_parse(const char *start, unsigned int buflen, int chunks = BITS_TO_U32(nmaskbits); u32 *bitmap = (u32 *)maskp; int unset_bit; + int chunk; - while (1) { + for (chunk = 0; ; chunk++) { end = bitmap_find_region_reverse(start, end); if (start > end) break; @@ -750,7 +751,11 @@ int bitmap_parse(const char *start, unsigned int buflen, if (!chunks--) return -EOVERFLOW; - end = bitmap_get_x32_reverse(start, end, bitmap++); +#if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk ^ 1]); +#else + end = bitmap_get_x32_reverse(start, end, &bitmap[chunk]); +#endif if (IS_ERR(end)) return PTR_ERR(end); } diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 0c9d3ad17e0f..5371dab6b481 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -141,6 +141,9 @@ static FORCE_INLINE int LZ4_decompress_generic( * space in the output for those 18 bytes earlier, upon * entering the shortcut (in other words, there is a * combined check for both stages). + * + * The & in the likely() below is intentionally not && so that + * some compilers can produce better parallelized runtime code */ if ((endOnInput ? length != RUN_MASK : length <= 8) /* diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 717c940112f9..8ad5ba2b86e2 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -268,6 +268,19 @@ m_len_done: *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); else { + if (unlikely(((m_off & 0x403f) == 0x403f) + && (m_len >= 261) + && (m_len <= 264)) + && likely(bitstream_version)) { + // Under lzo-rle, block copies + // for 261 <= length <= 264 and + // (distance & 0x80f3) == 0x80f3 + // can result in ambiguous + // output. Adjust length + // to 260 to prevent ambiguity. + ip -= m_len - 260; + m_len = 260; + } m_len -= M4_MAX_LEN; *op++ = (M4_MARKER | ((m_off >> 11) & 8)); while (unlikely(m_len > 255)) { diff --git a/lib/test_bitops.c b/lib/test_bitops.c index fd50b3ae4a14..ced25e3a779b 100644 --- a/lib/test_bitops.c +++ b/lib/test_bitops.c @@ -9,7 +9,11 @@ #include <linux/module.h> #include <linux/printk.h> -/* a tiny module only meant to test set/clear_bit */ +/* a tiny module only meant to test + * + * set/clear_bit + * get_count_order/long + */ /* use an enum because thats the most common BITMAP usage */ enum bitops_fun { @@ -24,14 +28,59 @@ enum bitops_fun { static DECLARE_BITMAP(g_bitmap, BITOPS_LENGTH); +static unsigned int order_comb[][2] = { + {0x00000003, 2}, + {0x00000004, 2}, + {0x00001fff, 13}, + {0x00002000, 13}, + {0x50000000, 31}, + {0x80000000, 31}, + {0x80003000, 32}, +}; + +#ifdef CONFIG_64BIT +static unsigned long order_comb_long[][2] = { + {0x0000000300000000, 34}, + {0x0000000400000000, 34}, + {0x00001fff00000000, 45}, + {0x0000200000000000, 45}, + {0x5000000000000000, 63}, + {0x8000000000000000, 63}, + {0x8000300000000000, 64}, +}; +#endif + static int __init test_bitops_startup(void) { + int i; + pr_warn("Loaded test module\n"); set_bit(BITOPS_4, g_bitmap); set_bit(BITOPS_7, g_bitmap); set_bit(BITOPS_11, g_bitmap); set_bit(BITOPS_31, g_bitmap); set_bit(BITOPS_88, g_bitmap); + + for (i = 0; i < ARRAY_SIZE(order_comb); i++) { + if (order_comb[i][1] != get_count_order(order_comb[i][0])) + pr_warn("get_count_order wrong for %x\n", + order_comb[i][0]); + } + + for (i = 0; i < ARRAY_SIZE(order_comb); i++) { + if (order_comb[i][1] != get_count_order_long(order_comb[i][0])) + pr_warn("get_count_order_long wrong for %x\n", + order_comb[i][0]); + } + +#ifdef CONFIG_64BIT + for (i = 0; i < ARRAY_SIZE(order_comb_long); i++) { + if (order_comb_long[i][1] != + get_count_order_long(order_comb_long[i][0])) + pr_warn("get_count_order_long wrong for %lx\n", + order_comb_long[i][0]); + } +#endif return 0; } @@ -55,6 +104,6 @@ static void __exit test_bitops_unstartup(void) module_init(test_bitops_startup); module_exit(test_bitops_unstartup); -MODULE_AUTHOR("Jesse Brandeburg <jesse.brandeburg@intel.com>"); +MODULE_AUTHOR("Jesse Brandeburg <jesse.brandeburg@intel.com>, Wei Yang <richard.weiyang@gmail.com>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Bit testing module"); diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index a2909af4b924..bcc9a98a0524 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd) } #endif +#ifndef vdso_cycles_ok +static inline bool vdso_cycles_ok(u64 cycles) +{ + return true; +} +#endif + #ifdef CONFIG_TIME_NS static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) @@ -62,6 +69,8 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -130,6 +139,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -210,7 +221,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, return 0; } -static __maybe_unused int +static __always_inline int __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, struct __kernel_timespec *ts) { diff --git a/mm/Makefile b/mm/Makefile index 662fd1504646..cc8f897dfac0 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,7 +41,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ - mm_init.o mmu_context.o percpu.o slab_common.o \ + mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ debug.o gup.o $(mmu-y) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 9ec59c38d6a2..e45623016aea 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -60,6 +60,9 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { pmd_t pmd = pfn_pmd(pfn, prot); + if (!has_transparent_hugepage()) + return; + WARN_ON(!pmd_same(pmd, pmd)); WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd)))); WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd)))); @@ -79,6 +82,9 @@ static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { pud_t pud = pfn_pud(pfn, prot); + if (!has_transparent_hugepage()) + return; + WARN_ON(!pud_same(pud, pud)); WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud)))); WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud)))); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ababa368cb68..47b8ccb1fb9b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -212,15 +212,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags) short addr_lsb = tk->size_shift; int ret = 0; - if ((t->mm == current->mm) || !(flags & MF_ACTION_REQUIRED)) - pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", pfn, t->comm, t->pid); if (flags & MF_ACTION_REQUIRED) { - if (t->mm == current->mm) - ret = force_sig_mceerr(BUS_MCEERR_AR, + WARN_ON_ONCE(t != current); + ret = force_sig_mceerr(BUS_MCEERR_AR, (void __user *)tk->addr, addr_lsb); - /* send no signal to non-current processes */ } else { /* * Don't use force here, it's convenient if the signal @@ -402,9 +400,15 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) { struct task_struct *t; - for_each_thread(tsk, t) - if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) - return t; + for_each_thread(tsk, t) { + if (t->flags & PF_MCE_PROCESS) { + if (t->flags & PF_MCE_EARLY) + return t; + } else { + if (sysctl_memory_failure_early_kill) + return t; + } + } return NULL; } @@ -413,21 +417,26 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk) * to be signaled when some page under the process is hwpoisoned. * Return task_struct of the dedicated thread (main thread unless explicitly * specified) if the process is "early kill," and otherwise returns NULL. + * + * Note that the above is true for Action Optional case, but not for Action + * Required case where SIGBUS should sent only to the current thread. */ static struct task_struct *task_early_kill(struct task_struct *tsk, int force_early) { - struct task_struct *t; if (!tsk->mm) return NULL; - if (force_early) - return tsk; - t = find_early_kill_thread(tsk); - if (t) - return t; - if (sysctl_memory_failure_early_kill) - return tsk; - return NULL; + if (force_early) { + /* + * Comparing ->mm here because current task might represent + * a subthread, while tsk always points to the main thread. + */ + if (tsk->mm == current->mm) + return current; + else + return NULL; + } + return find_early_kill_thread(tsk); } /* diff --git a/mm/mmu_context.c b/mm/mmu_context.c deleted file mode 100644 index 3e612ae748e9..000000000000 --- a/mm/mmu_context.c +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2009 Red Hat, Inc. - * - * See ../COPYING for licensing terms. - */ - -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/sched/task.h> -#include <linux/mmu_context.h> -#include <linux/export.h> - -#include <asm/mmu_context.h> - -/* - * use_mm - * Makes the calling kernel thread take on the specified - * mm context. - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void use_mm(struct mm_struct *mm) -{ - struct mm_struct *active_mm; - struct task_struct *tsk = current; - - task_lock(tsk); - active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); - tsk->active_mm = mm; - } - tsk->mm = mm; - switch_mm(active_mm, mm, tsk); - task_unlock(tsk); -#ifdef finish_arch_post_lock_switch - finish_arch_post_lock_switch(); -#endif - - if (active_mm != mm) - mmdrop(active_mm); -} -EXPORT_SYMBOL_GPL(use_mm); - -/* - * unuse_mm - * Reverses the effect of use_mm, i.e. releases the - * specified mm context which was earlier taken on - * by the calling kernel thread - * (Note: this routine is intended to be called only - * from a kernel thread context) - */ -void unuse_mm(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - sync_mm_rss(mm); - tsk->mm = NULL; - /* active_mm is still 'mm' */ - enter_lazy_tlb(mm, tsk); - task_unlock(tsk); -} -EXPORT_SYMBOL_GPL(unuse_mm); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b4e9491cb320..6e94962893ee 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -126,7 +126,7 @@ static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc) /* * The process p may have detached its own ->mm while exiting or through - * use_mm(), but one or more of its subthreads may still have a valid + * kthread_use_mm(), but one or more of its subthreads may still have a valid * pointer. Return p, or any of its subthreads with a valid ->mm, with * task_lock() held. */ @@ -919,8 +919,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) continue; } /* - * No use_mm() user needs to read from the userspace so we are - * ok to reap it. + * No kthead_use_mm() user needs to read from the userspace so + * we are ok to reap it. */ if (unlikely(p->flags & PF_KTHREAD)) continue; diff --git a/mm/vmacache.c b/mm/vmacache.c index d9092814c772..01a6e6688ec1 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -24,8 +24,8 @@ * task's vmacache pertains to a different mm (ie, its own). There is * nothing we can do here. * - * Also handle the case where a kernel thread has adopted this mm via use_mm(). - * That kernel thread's vmacache is not applicable to this mm. + * Also handle the case where a kernel thread has adopted this mm via + * kthread_use_mm(). That kernel thread's vmacache is not applicable to this mm. */ static inline bool vmacache_valid_mm(struct mm_struct *mm) { diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 8b4d72b1a066..010dcb876f9d 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -82,11 +82,11 @@ static size_t rpc_ntop6(const struct sockaddr *sap, rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); - if (unlikely((size_t)rc > sizeof(scopebuf))) + if (unlikely((size_t)rc >= sizeof(scopebuf))) return 0; len += rc; - if (unlikely(len > buflen)) + if (unlikely(len >= buflen)) return 0; strcat(buf, scopebuf); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 5748ad0ba1bd..a9f0d17fdb0d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -81,7 +81,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp) unsigned int nbits; nbits = *(unsigned int *)kp->arg; - return sprintf(buffer, "%u", 1U << nbits); + return sprintf(buffer, "%u\n", 1U << nbits); } #define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index ac5cac0dd24b..4ecc2a959567 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -254,7 +254,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct if (IS_ERR(p)) goto err; done: - trace_rpcgss_context(ctx->gc_expiry, now, timeout, + trace_rpcgss_context(window_size, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: return p; @@ -697,10 +697,12 @@ retry: } schedule(); } - if (gss_msg->ctx) + if (gss_msg->ctx) { + trace_rpcgss_ctx_init(gss_cred); gss_cred_set_ctx(cred, gss_msg->ctx); - else + } else { err = gss_msg->msg.errno; + } spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); @@ -1054,11 +1056,11 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2; auth->au_verfsize = GSS_VERF_SLACK >> 2; auth->au_ralign = GSS_VERF_SLACK >> 2; - auth->au_flags = 0; + __set_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags); auth->au_ops = &authgss_ops; auth->au_flavor = flavor; if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) - auth->au_flags |= RPCAUTH_AUTH_DATATOUCH; + __set_bit(RPCAUTH_AUTH_DATATOUCH, &auth->au_flags); refcount_set(&auth->au_count, 1); kref_init(&gss_auth->kref); @@ -1284,8 +1286,9 @@ gss_send_destroy_context(struct rpc_cred *cred) if (new) { ctx->gc_proc = RPC_GSS_PROC_DESTROY; + trace_rpcgss_ctx_destroy(gss_cred); task = rpc_call_null(gss_auth->client, &new->gc_base, - RPC_TASK_ASYNC|RPC_TASK_SOFT); + RPC_TASK_ASYNC); if (!IS_ERR(task)) rpc_put_task(task); @@ -1349,7 +1352,6 @@ gss_destroy_nullcred(struct rpc_cred *cred) static void gss_destroy_cred(struct rpc_cred *cred) { - if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) gss_send_destroy_context(cred); gss_destroy_nullcred(cred); @@ -1613,6 +1615,7 @@ static int gss_renew_cred(struct rpc_task *task) new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); + task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; @@ -1709,7 +1712,8 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ - cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; + if (test_bit(RPCAUTH_AUTH_UPDATE_SLACK, &cred->cr_auth->au_flags)) + cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; status = 0; out: gss_put_ctx(ctx); @@ -1927,13 +1931,30 @@ out: return status; } -static int -gss_unwrap_resp_auth(struct rpc_cred *cred) +/** + * gss_update_rslack - Possibly update RPC receive buffer size estimates + * @task: rpc_task for incoming RPC Reply being unwrapped + * @cred: controlling rpc_cred for @task + * @before: XDR words needed before each RPC Reply message + * @after: XDR words needed following each RPC Reply message + * + */ +static void gss_update_rslack(struct rpc_task *task, struct rpc_cred *cred, + unsigned int before, unsigned int after) { struct rpc_auth *auth = cred->cr_auth; - auth->au_rslack = auth->au_verfsize; - auth->au_ralign = auth->au_verfsize; + if (test_and_clear_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags)) { + auth->au_ralign = auth->au_verfsize + before; + auth->au_rslack = auth->au_verfsize + after; + trace_rpcgss_update_slack(task, auth); + } +} + +static int +gss_unwrap_resp_auth(struct rpc_task *task, struct rpc_cred *cred) +{ + gss_update_rslack(task, cred, 0, 0); return 0; } @@ -1956,7 +1977,6 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, struct xdr_stream *xdr) { struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf; - struct rpc_auth *auth = cred->cr_auth; u32 len, offset, seqno, maj_stat; struct xdr_netobj mic; int ret; @@ -2005,8 +2025,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, if (maj_stat != GSS_S_COMPLETE) goto bad_mic; - auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len); - auth->au_ralign = auth->au_verfsize + 2; + gss_update_rslack(task, cred, 2, 2 + 1 + XDR_QUADLEN(mic.len)); ret = 0; out: @@ -2031,7 +2050,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; - struct rpc_auth *auth = cred->cr_auth; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -2058,8 +2076,8 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack; - auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align; + gss_update_rslack(task, cred, 2 + ctx->gc_gss_ctx->align, + 2 + ctx->gc_gss_ctx->slack); return 0; unwrap_failed: @@ -2130,7 +2148,7 @@ gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - status = gss_unwrap_resp_auth(cred); + status = gss_unwrap_resp_auth(task, cred); break; case RPC_GSS_SVC_INTEGRITY: status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 69316ab1b9fa..fae632da1058 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -37,6 +37,8 @@ gss_mech_free(struct gss_api_mech *gm) for (i = 0; i < gm->gm_pf_num; i++) { pf = &gm->gm_pfs[i]; + if (pf->domain) + auth_domain_put(pf->domain); kfree(pf->auth_domain_name); pf->auth_domain_name = NULL; } @@ -59,6 +61,7 @@ make_auth_domain_name(char *name) static int gss_mech_svc_setup(struct gss_api_mech *gm) { + struct auth_domain *dom; struct pf_desc *pf; int i, status; @@ -68,10 +71,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm) status = -ENOMEM; if (pf->auth_domain_name == NULL) goto out; - status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor, - pf->auth_domain_name); - if (status) + dom = svcauth_gss_register_pseudoflavor( + pf->pseudoflavor, pf->auth_domain_name); + if (IS_ERR(dom)) { + status = PTR_ERR(dom); goto out; + } + pf->domain = dom; } return 0; out: diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index 0349f455a862..af9c7f43859c 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -223,7 +223,7 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) static char *gssp_stringify(struct xdr_netobj *netobj) { - return kstrndup(netobj->data, netobj->len, GFP_KERNEL); + return kmemdup_nul(netobj->data, netobj->len, GFP_KERNEL); } static void gssp_hostbased_service(char **principal) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 50d93c49ef1a..46027d0c903f 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -809,7 +809,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom) EXPORT_SYMBOL_GPL(svcauth_gss_flavor); -int +struct auth_domain * svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { struct gss_domain *new; @@ -826,21 +826,23 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; - stat = 0; test = auth_domain_lookup(name, &new->h); - if (test != &new->h) { /* Duplicate registration */ + if (test != &new->h) { + pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n", + name); + stat = -EADDRINUSE; auth_domain_put(test); - kfree(new->h.name); - goto out_free_dom; + goto out_free_name; } - return 0; + return test; +out_free_name: + kfree(new->h.name); out_free_dom: kfree(new); out: - return stat; + return ERR_PTR(stat); } - EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor); static inline int diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c index 5576f1e66de9..49fa583d7f91 100644 --- a/net/sunrpc/auth_gss/trace.c +++ b/net/sunrpc/auth_gss/trace.c @@ -6,6 +6,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/gss_err.h> +#include <linux/sunrpc/auth_gss.h> #define CREATE_TRACE_POINTS #include <trace/events/rpcgss.h> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 61b21dafd7c0..a91d1cdad9d7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -370,10 +370,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, const char *nodename = args->nodename; int err; - /* sanity check the name before trying to print it */ - dprintk("RPC: creating %s client for %s (xprt %p)\n", - program->name, args->servername, xprt); - err = rpciod_up(); if (err) goto out_no_rpciod; @@ -436,6 +432,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, goto out_no_path; if (parent) atomic_inc(&parent->cl_count); + + trace_rpc_clnt_new(clnt, xprt, program->name, args->servername); return clnt; out_no_path: @@ -450,6 +448,7 @@ out_err: out_no_rpciod: xprt_switch_put(xps); xprt_put(xprt); + trace_rpc_clnt_new_err(program->name, args->servername, err); return ERR_PTR(err); } @@ -634,10 +633,8 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, args->nodename = clnt->cl_nodename; new = rpc_new_client(args, xps, xprt, clnt); - if (IS_ERR(new)) { - err = PTR_ERR(new); - goto out_err; - } + if (IS_ERR(new)) + return new; /* Turn off autobind on clones */ new->cl_autobind = 0; @@ -650,7 +647,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, return new; out_err: - dprintk("RPC: %s: returned error %d\n", __func__, err); + trace_rpc_clnt_clone_err(clnt, err); return ERR_PTR(err); } @@ -723,11 +720,8 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, int err; xprt = xprt_create_transport(args); - if (IS_ERR(xprt)) { - dprintk("RPC: failed to create new xprt for clnt %p\n", - clnt); + if (IS_ERR(xprt)) return PTR_ERR(xprt); - } xps = xprt_switch_alloc(xprt, GFP_KERNEL); if (xps == NULL) { @@ -767,7 +761,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, rpc_release_client(parent); xprt_switch_put(oldxps); xprt_put(old); - dprintk("RPC: replaced xprt for clnt %p\n", clnt); + trace_rpc_clnt_replace_xprt(clnt); return 0; out_revert: @@ -777,7 +771,7 @@ out_revert: rpc_client_register(clnt, pseudoflavor, NULL); xprt_switch_put(xps); xprt_put(xprt); - dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); + trace_rpc_clnt_replace_xprt_err(clnt); return err; } EXPORT_SYMBOL_GPL(rpc_switch_client_transport); @@ -844,10 +838,11 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) return; - dprintk("RPC: killing all tasks for client %p\n", clnt); + /* * Spin lock all_tasks to prevent changes... */ + trace_rpc_clnt_killall(clnt); spin_lock(&clnt->cl_lock); list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) rpc_signal_task(rovr); @@ -863,9 +858,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) { might_sleep(); - dprintk_rcu("RPC: shutting down %s client for %s\n", - clnt->cl_program->name, - rcu_dereference(clnt->cl_xprt)->servername); + trace_rpc_clnt_shutdown(clnt); while (!list_empty(&clnt->cl_tasks)) { rpc_killall_tasks(clnt); @@ -884,6 +877,8 @@ static void rpc_free_client_work(struct work_struct *work) { struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work); + trace_rpc_clnt_free(clnt); + /* These might block on processes that might allocate memory, * so they cannot be called in rpciod, so they are handled separately * here. @@ -901,9 +896,7 @@ rpc_free_client(struct rpc_clnt *clnt) { struct rpc_clnt *parent = NULL; - dprintk_rcu("RPC: destroying %s client for %s\n", - clnt->cl_program->name, - rcu_dereference(clnt->cl_xprt)->servername); + trace_rpc_clnt_release(clnt); if (clnt->cl_parent != clnt) parent = clnt->cl_parent; rpc_unregister_client(clnt); @@ -945,8 +938,6 @@ rpc_free_auth(struct rpc_clnt *clnt) void rpc_release_client(struct rpc_clnt *clnt) { - dprintk("RPC: rpc_release_client(%p)\n", clnt); - do { if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); @@ -1270,7 +1261,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1; xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len); - trace_rpc_reply_pages(req); + trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf); } EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages); @@ -1624,6 +1615,7 @@ const char static void __rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status) { + trace_rpc_call_rpcerror(task, tk_status, rpc_status); task->tk_rpc_status = rpc_status; rpc_exit(task, tk_status); } @@ -2531,7 +2523,7 @@ call_decode(struct rpc_task *task) goto out; req->rq_rcv_buf.len = req->rq_private_buf.len; - trace_xprt_recvfrom(&req->rq_rcv_buf); + trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf); /* Check that the softirq receive buffer is valid */ WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, @@ -2760,7 +2752,8 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, .rpc_op_cred = cred, .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, .callback_data = data, - .flags = flags | RPC_TASK_NULLCREDS, + .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN | + RPC_TASK_NULLCREDS, }; return rpc_run_task(&task_setup_data); @@ -2823,8 +2816,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, goto success; } - task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS, + task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); rpc_put_task(task); @@ -2867,9 +2859,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt, goto out_err; /* Test the connection */ - task = rpc_call_null_helper(clnt, xprt, NULL, - RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, - NULL, NULL); + task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL); if (IS_ERR(task)) { status = PTR_ERR(task); goto out_err; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 4a020b688860..c27123e6ba80 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -795,12 +795,6 @@ void rpcb_getport_async(struct rpc_task *task) child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); - if (IS_ERR(child)) { - /* rpcb_map_release() has freed the arguments */ - dprintk("RPC: %5u %s: rpc_run_task failed\n", - task->tk_pid, __func__); - return; - } xprt->stat.bind_count++; rpc_put_task(child); diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h index 47a756503d11..f6fe2e6cd65a 100644 --- a/net/sunrpc/sunrpc.h +++ b/net/sunrpc/sunrpc.h @@ -52,4 +52,5 @@ static inline int sock_is_loopback(struct sock *sk) int rpc_clients_notifier_register(void); void rpc_clients_notifier_unregister(void); +void auth_domain_cleanup(void); #endif /* _NET_SUNRPC_SUNRPC_H */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f9edaa9174a4..236fadc4a439 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -23,6 +23,7 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/xprtsock.h> +#include "sunrpc.h" #include "netns.h" unsigned int sunrpc_net_id; @@ -131,6 +132,7 @@ cleanup_sunrpc(void) unregister_rpc_pipefs(); rpc_destroy_mempool(); unregister_pernet_subsys(&sunrpc_net_ops); + auth_domain_cleanup(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_unregister_sysctl(); #endif diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9ed3126600ce..c211b607239e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -88,15 +88,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) switch (*ip) { case SVC_POOL_AUTO: - return strlcpy(buf, "auto", 20); + return strlcpy(buf, "auto\n", 20); case SVC_POOL_GLOBAL: - return strlcpy(buf, "global", 20); + return strlcpy(buf, "global\n", 20); case SVC_POOL_PERCPU: - return strlcpy(buf, "percpu", 20); + return strlcpy(buf, "percpu\n", 20); case SVC_POOL_PERNODE: - return strlcpy(buf, "pernode", 20); + return strlcpy(buf, "pernode\n", 20); default: - return sprintf(buf, "%d", *ip); + return sprintf(buf, "%d\n", *ip); } } @@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname, #endif } + trace_svc_register(progname, version, protocol, port, family, error); return error; } @@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net, unsigned short proto, unsigned short port) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); - return __svc_register(net, progp->pg_name, progp->pg_prog, version, family, proto, port); @@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net, return 0; if (vers->vs_hidden) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)" - " (but not telling portmap)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); + trace_svc_noregister(progp->pg_name, version, proto, + port, family, 0); return 0; } @@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi if (error == -EPROTONOSUPPORT) error = rpcb_register(net, program, version, 0, 0); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); + trace_svc_unregister(progname, version, error); } /* @@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) continue; if (progp->pg_vers[i]->vs_hidden) continue; - - dprintk("svc: attempting to unregister %sv%u\n", - progp->pg_name, i); __svc_unregister(net, progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2284ff038dad..43cf8dbde898 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref) xprt_put(xprt->xpt_bc_xprt); if (xprt->xpt_bc_xps) xprt_switch_put(xprt->xpt_bc_xps); + trace_svc_xprt_free(xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -206,6 +207,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin6_port = htons(port), }; #endif + struct svc_xprt *xprt; struct sockaddr *sap; size_t len; @@ -224,7 +226,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return ERR_PTR(-EAFNOSUPPORT); } - return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + if (IS_ERR(xprt)) + trace_svc_xprt_create_err(serv->sv_program->pg_name, + xcl->xcl_name, sap, xprt); + return xprt; } /* @@ -304,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, { int err; - dprintk("svc: creating transport %s[%d]\n", xprt_name, port); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); } - if (err < 0) - dprintk("svc: transport %s not found, err %d\n", - xprt_name, -err); return err; } EXPORT_SYMBOL_GPL(svc_create_xprt); @@ -780,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) int len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); @@ -799,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (newxpt) { newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); + trace_svc_xprt_accept(newxpt, serv->sv_name); } else module_put(xprt->xpt_class->xcl_owner); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { @@ -812,7 +814,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); if (len > 0) - trace_svc_recvfrom(&rqstp->rq_arg); + trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg); rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); @@ -835,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) struct svc_serv *serv = rqstp->rq_server; int len, err; - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); - - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_recv: service %p, transport not NULL!\n", - rqstp); - err = svc_alloc_arg(rqstp); if (err) goto out; @@ -890,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv); void svc_drop(struct svc_rqst *rqstp) { trace_svc_drop(rqstp); - dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_drop); @@ -913,17 +906,11 @@ int svc_send(struct svc_rqst *rqstp) xb->len = xb->head[0].iov_len + xb->page_len + xb->tail[0].iov_len; - trace_svc_sendto(xb); - - /* Grab mutex to serialize outgoing data. */ - mutex_lock(&xprt->xpt_mutex); + trace_svc_xdr_sendto(rqstp, xb); trace_svc_stats_latency(rqstp); - if (test_bit(XPT_DEAD, &xprt->xpt_flags) - || test_bit(XPT_CLOSE, &xprt->xpt_flags)) - len = -ENOTCONN; - else - len = xprt->xpt_ops->xpo_sendto(rqstp); - mutex_unlock(&xprt->xpt_mutex); + + len = xprt->xpt_ops->xpo_sendto(rqstp); + trace_svc_send(rqstp, len); svc_xprt_release(rqstp); @@ -1031,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt) struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; - /* Only do this once */ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) - BUG(); + return; - dprintk("svc: svc_delete_xprt(%p)\n", xprt); + trace_svc_xprt_detach(xprt); xprt->xpt_ops->xpo_detach(xprt); if (xprt->xpt_bc_xprt) xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); @@ -1056,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) void svc_close_xprt(struct svc_xprt *xprt) { + trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ @@ -1158,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) set_bit(XPT_DEFERRED, &xprt->xpt_flags); if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { spin_unlock(&xprt->xpt_lock); - dprintk("revisit canceled\n"); + trace_svc_defer_drop(dr); svc_xprt_put(xprt); - trace_svc_drop_deferred(dr); kfree(dr); return; } - dprintk("revisit queued\n"); dr->xprt = NULL; list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); + trace_svc_defer_queue(dr); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } @@ -1213,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, dr->argslen << 2); } + trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; - trace_svc_defer(rqstp); return &dr->handle; } /* * recv data from a deferred request into an active one */ -static int svc_deferred_recv(struct svc_rqst *rqstp) +static noinline int svc_deferred_recv(struct svc_rqst *rqstp) { struct svc_deferred_req *dr = rqstp->rq_deferred; + trace_svc_defer_recv(dr); + /* setup iov_base past transport header */ rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); /* The iov_len does not include the transport header bytes */ @@ -1259,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - trace_svc_revisit_deferred(dr); } else clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 552617e3467b..998b196b6176 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -21,6 +21,8 @@ #include <trace/events/sunrpc.h> +#include "sunrpc.h" + #define RPCDBG_FACILITY RPCDBG_AUTH @@ -205,3 +207,26 @@ struct auth_domain *auth_domain_find(char *name) return NULL; } EXPORT_SYMBOL_GPL(auth_domain_find); + +/** + * auth_domain_cleanup - check that the auth_domain table is empty + * + * On module unload the auth_domain_table must be empty. To make it + * easier to catch bugs which don't clean up domains properly, we + * warn if anything remains in the table at cleanup time. + * + * Note that we cannot proactively remove the domains at this stage. + * The ->release() function might be in a module that has already been + * unloaded. + */ + +void auth_domain_cleanup(void) +{ + int h; + struct auth_domain *hp; + + for (h = 0; h < DN_HASHMAX; h++) + hlist_for_each_entry(hp, &auth_domain_table[h], hash) + pr_warn("svc: domain %s still present at module unload.\n", + hp->name); +} diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 6c8f802c4261..97c0bddba7a3 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -332,15 +332,6 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, return 0; } -static inline int ip_map_update(struct net *net, struct ip_map *ipm, - struct unix_domain *udom, time64_t expiry) -{ - struct sunrpc_net *sn; - - sn = net_generic(net, sunrpc_net_id); - return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); -} - void svcauth_unix_purge(struct net *net) { struct sunrpc_net *sn; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e7a0037d9b56..5c4ec9386f81 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -45,7 +45,6 @@ #include <net/tcp_states.h> #include <linux/uaccess.h> #include <asm/ioctls.h> -#include <trace/events/skb.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> @@ -55,6 +54,8 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/xprt.h> +#include <trace/events/sunrpc.h> + #include "socklib.h" #include "sunrpc.h" @@ -108,31 +109,35 @@ static void svc_reclassify_socket(struct socket *sock) } #endif -/* - * Release an skbuff after use +/** + * svc_tcp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * */ -static void svc_release_skb(struct svc_rqst *rqstp) +static void svc_tcp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; if (skb) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - rqstp->rq_xprt_ctxt = NULL; - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + rqstp->rq_xprt_ctxt = NULL; skb_free_datagram_locked(svsk->sk_sk, skb); } } -static void svc_release_udp_skb(struct svc_rqst *rqstp) +/** + * svc_udp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * + */ +static void svc_udp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; if (skb) { rqstp->rq_xprt_ctxt = NULL; - - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); consume_skb(skb); } } @@ -218,34 +223,68 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = size, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size, + size_t seek) +{ +} +#endif + /* - * Generic recvfrom routine. + * Read from @rqstp's transport socket. The incoming message fills whole + * pages in @rqstp's rq_pages array until the last page of the message + * has been received into a partial page. */ -static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, - unsigned int nr, size_t buflen, unsigned int base) +static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, + size_t seek) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct bio_vec *bvec = rqstp->rq_bvec; struct msghdr msg = { NULL }; + unsigned int i; ssize_t len; + size_t t; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); - if (base != 0) { - iov_iter_advance(&msg.msg_iter, base); - buflen -= base; + + for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { + bvec[i].bv_page = rqstp->rq_pages[i]; + bvec[i].bv_len = PAGE_SIZE; + bvec[i].bv_offset = 0; + } + rqstp->rq_respages = &rqstp->rq_pages[i]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen); + if (seek) { + iov_iter_advance(&msg.msg_iter, seek); + buflen -= seek; } len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + if (len > 0) + svc_flush_bvec(bvec, len, seek); + /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n", - svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -282,13 +321,10 @@ static void svc_data_ready(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; if (svsk) { - dprintk("svc: socket %p(inet %p), busy=%d\n", - svsk, sk, - test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_odata(sk); + trace_svcsock_data_ready(&svsk->sk_xprt, 0); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -302,11 +338,9 @@ static void svc_write_space(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); if (svsk) { - dprintk("svc: socket %p(inet %p), write_space busy=%d\n", - svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); + trace_svcsock_write_space(&svsk->sk_xprt, 0); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -383,8 +417,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp, return 0; } -/* - * Receive a datagram from a UDP socket. +/** + * svc_udp_recvfrom - Receive a datagram from a UDP socket. + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return */ static int svc_udp_recvfrom(struct svc_rqst *rqstp) { @@ -418,20 +459,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - skb = NULL; err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); - if (err >= 0) - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); - - if (skb == NULL) { - if (err != -EAGAIN) { - /* possibly an icmp error */ - dprintk("svc: recvfrom returned error %d\n", -err); - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - } - return 0; - } + if (err < 0) + goto out_recv_err; + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + if (!skb) + goto out_recv_err; + len = svc_addr_len(svc_addr(rqstp)); rqstp->rq_addrlen = len; if (skb->tstamp == 0) { @@ -442,26 +477,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len; + len = skb->len; rqstp->rq_arg.len = len; + trace_svcsock_udp_recv(&svsk->sk_xprt, len); rqstp->rq_prot = IPPROTO_UDP; - if (!svc_udp_get_dest_address(rqstp, cmh)) { - net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", - cmh->cmsg_level, cmh->cmsg_type); - goto out_free; - } + if (!svc_udp_get_dest_address(rqstp, cmh)) + goto out_cmsg_err; rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ local_bh_disable(); - if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { - local_bh_enable(); - /* checksum error */ - goto out_free; - } + if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) + goto out_bh_enable; local_bh_enable(); consume_skb(skb); } else { @@ -489,6 +519,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) serv->sv_stats->netudpcnt++; return len; + +out_recv_err: + if (err != -EAGAIN) { + /* possibly an icmp error */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + } + trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); + return 0; +out_cmsg_err: + net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", + cmh->cmsg_level, cmh->cmsg_type); + goto out_free; +out_bh_enable: + local_bh_enable(); out_free: kfree_skb(skb); return 0; @@ -498,6 +542,9 @@ out_free: * svc_udp_sendto - Send out a reply on a UDP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_udp_sendto(struct svc_rqst *rqstp) @@ -519,10 +566,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_udp_skb(rqstp); + svc_udp_release_rqst(rqstp); svc_set_cmsg_data(rqstp, cmh); + mutex_lock(&xprt->xpt_mutex); + + if (svc_xprt_is_dead(xprt)) + goto out_notconn; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { @@ -530,9 +582,16 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + trace_svcsock_udp_send(xprt, err); + + mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; return sent; + +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; } static int svc_udp_has_wspace(struct svc_xprt *xprt) @@ -576,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_udp_skb, + .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_udp_has_wspace, @@ -632,9 +691,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); - if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); @@ -655,8 +711,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk) if (svsk) { set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); - } else - printk("svc: socket %p: no user data\n", sk); + } } } @@ -667,15 +722,11 @@ static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); - - if (!svsk) - printk("svc: socket %p: no user data\n", sk); - else { + if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_ostate(sk); + trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); @@ -696,9 +747,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) struct socket *newsock; struct svc_sock *newsvsk; int err, slen; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return NULL; @@ -711,30 +760,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) else if (err != -EAGAIN) net_warn_ratelimited("%s: accept failed (err %d)!\n", serv->sv_name, -err); + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_getpeername(newsock, sin); if (err < 0) { - net_warn_ratelimited("%s: peername failed (err %d)!\n", - serv->sv_name, -err); + trace_svcsock_getpeername_err(xprt, serv->sv_name, err); goto failed; /* aborted connection or whatever */ } slen = err; - /* Ideally, we would want to reject connections from unauthorized - * hosts here, but when we get encryption, the IP of the host won't - * tell us anything. For now just warn about unpriv connections. - */ - if (!svc_port_is_privileged(sin)) { - dprintk("%s: connect from unprivileged port: %s\n", - serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - } - dprintk("%s: connect from %s\n", serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - /* Reset the inherited callbacks before calling svc_setup_socket */ newsock->sk->sk_state_change = svsk->sk_ostate; newsock->sk->sk_data_ready = svsk->sk_odata; @@ -752,10 +789,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); err = kernel_getsockname(newsock, sin); slen = err; - if (unlikely(err < 0)) { - dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + if (unlikely(err < 0)) slen = offsetof(struct sockaddr, sa_data); - } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); if (sock_is_loopback(newsock->sk)) @@ -772,13 +807,14 @@ failed: return NULL; } -static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +static size_t svc_tcp_restore_pages(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - unsigned int i, len, npages; + size_t len = svsk->sk_datalen; + unsigned int i, npages; - if (svsk->sk_datalen == 0) + if (!len) return 0; - len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -827,47 +863,45 @@ out: } /* - * Receive fragment record header. - * If we haven't gotten the record length yet, get the next four bytes. + * Receive fragment record header into sk_marker. */ -static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) +static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - unsigned int want; - int len; + ssize_t want, len; + /* If we haven't gotten the record length yet, + * get the next four bytes. + */ if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + struct msghdr msg = { NULL }; struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; - len = svc_recvfrom(rqstp, &iov, 1, want, 0); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want); + len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); if (len < 0) - goto error; + return len; svsk->sk_tcplen += len; - if (len < want) { - dprintk("svc: short recvfrom while reading record " - "length (%d of %d)\n", len, want); - return -EAGAIN; + /* call again to read the remaining bytes */ + goto err_short; } - - dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker); if (svc_sock_reclen(svsk) + svsk->sk_datalen > - serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: %d\n", - svc_sock_reclen(svsk)); - goto err_delete; - } + svsk->sk_xprt.xpt_server->sv_max_mesg) + goto err_too_large; } - return svc_sock_reclen(svsk); -error: - dprintk("RPC: TCP recv_record got %d\n", len); - return len; -err_delete: + +err_too_large: + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", + __func__, svsk->sk_xprt.xpt_server->sv_name, + svc_sock_reclen(svsk)); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_short: return -EAGAIN; } @@ -916,87 +950,58 @@ unlock_eagain: return -EAGAIN; } -static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) -{ - int i = 0; - int t = 0; - - while (t < len) { - vec[i].iov_base = page_address(pages[i]); - vec[i].iov_len = PAGE_SIZE; - i++; - t += PAGE_SIZE; - } - return i; -} - static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ - dprintk("svc: TCP %s record (%d bytes)\n", - svc_sock_final_rec(svsk) ? "final" : "nonfinal", - svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; } -/* - * Receive data from a TCP socket. +/** + * svc_tcp_recvfrom - Receive data from a TCP socket + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Read the 4-byte stream record marker, then use the record length + * in that marker to set up exactly the resources needed to receive + * the next RPC message into @rqstp. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return + * + * The zero return case handles partial receives and callback Replies. + * The state of a partial receive is preserved in the svc_sock for + * the next call to svc_tcp_recvfrom. */ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - unsigned int want, base; + size_t want, base; + ssize_t len; __be32 *p; __be32 calldir; - int pnum; - - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - len = svc_tcp_recv_record(svsk, rqstp); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); - - vec = rqstp->rq_vec; - - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); - - rqstp->rq_respages = &rqstp->rq_pages[pnum]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, base + want, base); + want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); + len = svc_tcp_read_msg(rqstp, base + want, base); if (len >= 0) { + trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; svsk->sk_datalen += len; } - if (len != want || !svc_sock_final_rec(svsk)) { - svc_tcp_save_pages(svsk, rqstp); - if (len < 0 && len != -EAGAIN) - goto err_delete; - if (len == want) - svc_tcp_fragment_received(svsk); - else - dprintk("svc: incomplete TCP record (%d of %d)\n", - (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), - svc_sock_reclen(svsk)); - goto err_noclose; - } - - if (svsk->sk_datalen < 8) { - svsk->sk_datalen = 0; - goto err_delete; /* client is nuts. */ - } + if (len != want || !svc_sock_final_rec(svsk)) + goto err_incomplete; + if (svsk->sk_datalen < 8) + goto err_nuts; rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; @@ -1031,14 +1036,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return rqstp->rq_arg.len; +err_incomplete: + svc_tcp_save_pages(svsk, rqstp); + if (len < 0 && len != -EAGAIN) + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + trace_svcsock_tcp_recv_short(&svsk->sk_xprt, + svc_sock_reclen(svsk), + svsk->sk_tcplen - sizeof(rpc_fraghdr)); + goto err_noclose; error: if (len != -EAGAIN) goto err_delete; - dprintk("RPC: TCP recvfrom got EAGAIN\n"); + trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); return 0; +err_nuts: + svsk->sk_datalen = 0; err_delete: - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); + trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); err_noclose: return 0; /* record not complete */ @@ -1048,6 +1065,9 @@ err_noclose: * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_tcp_sendto(struct svc_rqst *rqstp) @@ -1063,14 +1083,22 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_skb(rqstp); + svc_tcp_release_rqst(rqstp); + mutex_lock(&xprt->xpt_mutex); + if (svc_xprt_is_dead(xprt)) + goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); + trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + mutex_unlock(&xprt->xpt_mutex); return sent; +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; out_close: pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", xprt->xpt_server->sv_name, @@ -1078,6 +1106,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } @@ -1094,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_tcp_has_wspace, @@ -1132,18 +1161,16 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { - dprintk("setting up TCP socket for listening\n"); strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); } else { - dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; svsk->sk_tcplen = 0; svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); @@ -1188,7 +1215,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); int err = 0; - dprintk("svc: svc_setup_socket %p\n", sock); svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); @@ -1225,12 +1251,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - dprintk("svc: svc_setup_socket created %p (inet %p), " - "listen %d close %d\n", - svsk, svsk->sk_sk, - test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - + trace_svcsock_new_socket(sock); return svsk; } @@ -1322,11 +1343,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; int family; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - - dprintk("svc: svc_create_socket(%s, %d, %s)\n", - serv->sv_program->pg_name, protocol, - __svc_print_addr(sin, buf, sizeof(buf))); if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " @@ -1383,7 +1399,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); return (struct svc_xprt *)svsk; bummer: - dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); return ERR_PTR(error); } @@ -1397,8 +1412,6 @@ static void svc_sock_detach(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; - dprintk("svc: svc_sock_detach(%p)\n", svsk); - /* put back the old socket callbacks */ lock_sock(sk); sk->sk_state_change = svsk->sk_ostate; @@ -1415,8 +1428,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); - svc_sock_detach(xprt); if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { @@ -1431,7 +1442,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_sock_free(%p)\n", svsk); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 493a30a296fc..d5cc5db9dbf3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -663,6 +663,7 @@ static void xprt_autoclose(struct work_struct *work) container_of(work, struct rpc_xprt, task_cleanup); unsigned int pflags = memalloc_nofs_save(); + trace_xprt_disconnect_auto(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt->ops->close(xprt); xprt_release_write(xprt, NULL); @@ -677,7 +678,7 @@ static void xprt_autoclose(struct work_struct *work) */ void xprt_disconnect_done(struct rpc_xprt *xprt) { - dprintk("RPC: disconnected transport %p\n", xprt); + trace_xprt_disconnect_done(xprt); spin_lock(&xprt->transport_lock); xprt_clear_connected(xprt); xprt_clear_write_space_locked(xprt); @@ -694,6 +695,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done); */ void xprt_force_disconnect(struct rpc_xprt *xprt) { + trace_xprt_disconnect_force(xprt); + /* Don't race with the test_bit() in xprt_clear_locked() */ spin_lock(&xprt->transport_lock); set_bit(XPRT_CLOSE_WAIT, &xprt->state); @@ -832,8 +835,10 @@ void xprt_connect(struct rpc_task *task) if (!xprt_lock_write(xprt, task)) return; - if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) { + trace_xprt_disconnect_cleanup(xprt); xprt->ops->close(xprt); + } if (!xprt_connected(xprt)) { task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie; @@ -1460,7 +1465,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task) */ req->rq_ntrans++; - trace_xprt_sendto(&req->rq_snd_buf); + trace_rpc_xdr_sendto(task, &req->rq_snd_buf); connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(req); if (status != 0) { @@ -1903,11 +1908,8 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) found: xprt = t->setup(args); - if (IS_ERR(xprt)) { - dprintk("RPC: xprt_create_transport: failed, %ld\n", - -PTR_ERR(xprt)); + if (IS_ERR(xprt)) goto out; - } if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT) xprt->idle_timeout = 0; INIT_WORK(&xprt->task_cleanup, xprt_autoclose); @@ -1928,8 +1930,7 @@ found: rpc_xprt_debugfs_register(xprt); - dprintk("RPC: created transport %p with %u slots\n", xprt, - xprt->max_reqs); + trace_xprt_create(xprt); out: return xprt; } @@ -1939,6 +1940,8 @@ static void xprt_destroy_cb(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); + trace_xprt_destroy(xprt); + rpc_xprt_debugfs_unregister(xprt); rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); @@ -1963,8 +1966,6 @@ static void xprt_destroy_cb(struct work_struct *work) */ static void xprt_destroy(struct rpc_xprt *xprt) { - dprintk("RPC: destroying transport %p\n", xprt); - /* * Exclude transport connect/disconnect handlers and autoclose */ diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3c627dc685cc..2081c8fbfa48 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -892,8 +892,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) * or privacy, direct data placement of individual data items * is not allowed. */ - ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags & - RPCAUTH_AUTH_DATATOUCH); + ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH, + &rqst->rq_cred->cr_auth->au_flags); /* * Chunks needed for results? diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index af7eb8d202ae..1ee73f7cf931 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -10,59 +10,34 @@ #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - -#undef SVCRDMA_BACKCHANNEL_DEBUG - /** - * svc_rdma_handle_bc_reply - Process incoming backchannel reply - * @xprt: controlling backchannel transport - * @rdma_resp: pointer to incoming transport header - * @rcvbuf: XDR buffer into which to decode the reply + * svc_rdma_handle_bc_reply - Process incoming backchannel Reply + * @rqstp: resources for handling the Reply + * @rctxt: Received message * - * Returns: - * %0 if @rcvbuf is filled in, xprt_complete_rqst called, - * %-EAGAIN if server should call ->recvfrom again. */ -int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, - struct xdr_buf *rcvbuf) +void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt) { + struct svc_xprt *sxprt = rqstp->rq_xprt; + struct rpc_xprt *xprt = sxprt->xpt_bc_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct xdr_buf *rcvbuf = &rqstp->rq_arg; struct kvec *dst, *src = &rcvbuf->head[0]; + __be32 *rdma_resp = rctxt->rc_recv_buf; struct rpc_rqst *req; u32 credits; - size_t len; - __be32 xid; - __be32 *p; - int ret; - - p = (__be32 *)src->iov_base; - len = src->iov_len; - xid = *rdma_resp; - -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: xid=%08x, length=%zu\n", - __func__, be32_to_cpu(xid), len); - pr_info("%s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp); - pr_info("%s: RPC: %*ph\n", - __func__, (int)len, p); -#endif - - ret = -EAGAIN; - if (src->iov_len < 24) - goto out_shortreply; spin_lock(&xprt->queue_lock); - req = xprt_lookup_rqst(xprt, xid); + req = xprt_lookup_rqst(xprt, *rdma_resp); if (!req) - goto out_notfound; + goto out_unlock; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); - if (dst->iov_len < len) + if (dst->iov_len < src->iov_len) goto out_unlock; - memcpy(dst->iov_base, p, len); + memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_pin_rqst(req); spin_unlock(&xprt->queue_lock); @@ -71,31 +46,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; - spin_lock(&xprt->transport_lock); xprt->cwnd = credits << RPC_CWNDSHIFT; spin_unlock(&xprt->transport_lock); spin_lock(&xprt->queue_lock); - ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); xprt_unpin_rqst(req); rcvbuf->len = 0; out_unlock: spin_unlock(&xprt->queue_lock); -out: - return ret; - -out_shortreply: - dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", - xprt, src->iov_len); - goto out; - -out_notfound: - dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", - xprt, be32_to_cpu(xid)); - goto out_unlock; } /* Send a backwards direction RPC call. @@ -192,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) *p++ = xdr_zero; *p = xdr_zero; -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); -#endif - rqst->rq_xtime = ktime_get(); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); if (rc) @@ -206,45 +163,36 @@ put_ctxt: svc_rdma_send_ctxt_put(rdma, ctxt); drop_connection: - dprintk("svcrdma: failed to send bc call\n"); return -ENOTCONN; } -/* Send an RPC call on the passive end of a transport - * connection. +/** + * xprt_rdma_bc_send_request - Send a reverse-direction Call + * @rqst: rpc_rqst containing Call message to be sent + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ -static int -xprt_rdma_bc_send_request(struct rpc_rqst *rqst) +static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) { struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; - struct svcxprt_rdma *rdma; + struct svcxprt_rdma *rdma = + container_of(sxprt, struct svcxprt_rdma, sc_xprt); int ret; - dprintk("svcrdma: sending bc call with xid: %08x\n", - be32_to_cpu(rqst->rq_xid)); + if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) + return -ENOTCONN; - mutex_lock(&sxprt->xpt_mutex); - - ret = -ENOTCONN; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { - ret = rpcrdma_bc_send_request(rdma, rqst); - if (ret == -ENOTCONN) - svc_close_xprt(sxprt); - } - - mutex_unlock(&sxprt->xpt_mutex); - - if (ret < 0) - return ret; - return 0; + ret = rpcrdma_bc_send_request(rdma, rqst); + if (ret == -ENOTCONN) + svc_close_xprt(sxprt); + return ret; } static void xprt_rdma_bc_close(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_disconnect_done(xprt); xprt->cwnd = RPC_CWNDSHIFT; } @@ -252,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt) static void xprt_rdma_bc_put(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_rdma_free_addresses(xprt); xprt_free(xprt); } @@ -288,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args) struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; - if (args->addrlen > sizeof(xprt->addr)) { - dprintk("RPC: %s: address too large\n", __func__); + if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - } xprt = xprt_alloc(args->net, sizeof(*new_xprt), RPCRDMA_MAX_BC_REQUESTS, RPCRDMA_MAX_BC_REQUESTS); - if (!xprt) { - dprintk("RPC: %s: couldn't allocate rpc_xprt\n", - __func__); + if (!xprt) return ERR_PTR(-ENOMEM); - } xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index efa5fcb5793f..e426fedb9524 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, return hdr_len; out_short: - trace_svcrdma_decode_short(rq_arg->len); + trace_svcrdma_decode_short_err(rq_arg->len); return -EINVAL; out_version: - trace_svcrdma_decode_badvers(rdma_argp); + trace_svcrdma_decode_badvers_err(rdma_argp); return -EPROTONOSUPPORT; out_drop: - trace_svcrdma_decode_drop(rdma_argp); + trace_svcrdma_decode_drop_err(rdma_argp); return 0; out_proc: - trace_svcrdma_decode_badproc(rdma_argp); + trace_svcrdma_decode_badproc_err(rdma_argp); return -EINVAL; out_inval: - trace_svcrdma_decode_parse(rdma_argp); + trace_svcrdma_decode_parse_err(rdma_argp); return -EINVAL; } @@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, p)) { - ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, - &rqstp->rq_arg); - svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; - } + if (svc_rdma_is_backchannel_reply(xprt, p)) + goto out_backchannel; + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); p += rpcrdma_fixed_maxsz; @@ -913,6 +910,8 @@ out_postfail: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return ret; +out_backchannel: + svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 23c2d3ce0dc9..5eb35309ecef 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -9,13 +9,10 @@ #include <linux/sunrpc/rpc_rdma.h> #include <linux/sunrpc/svc_rdma.h> -#include <linux/sunrpc/debug.h> #include "xprt_rdma.h" #include <trace/events/rpcrdma.h> -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc); static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); @@ -39,7 +36,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); struct svc_rdma_rw_ctxt { struct list_head rw_list; struct rdma_rw_ctx rw_ctx; - int rw_nents; + unsigned int rw_nents; struct sg_table rw_sg_table; struct scatterlist rw_first_sgl[]; }; @@ -67,19 +64,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), GFP_KERNEL); if (!ctxt) - goto out; + goto out_noctx; INIT_LIST_HEAD(&ctxt->rw_list); } ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, ctxt->rw_sg_table.sgl, - SG_CHUNK_SIZE)) { - kfree(ctxt); - ctxt = NULL; - } -out: + SG_CHUNK_SIZE)) + goto out_free; return ctxt; + +out_free: + kfree(ctxt); +out_noctx: + trace_svcrdma_no_rwctx_err(rdma, sges); + return NULL; } static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, @@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) } } +/** + * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O + * @rdma: controlling transport instance + * @ctxt: R/W context to prepare + * @offset: RDMA offset + * @handle: RDMA tag/handle + * @direction: I/O direction + * + * Returns on success, the number of WQEs that will be needed + * on the workqueue, or a negative errno. + */ +static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, + struct svc_rdma_rw_ctxt *ctxt, + u64 offset, u32 handle, + enum dma_data_direction direction) +{ + int ret; + + ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, + ctxt->rw_sg_table.sgl, ctxt->rw_nents, + 0, offset, handle, direction); + if (unlikely(ret < 0)) { + svc_rdma_put_rw_ctxt(rdma, ctxt); + trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); + } + return ret; +} + /* A chunk context tracks all I/O for moving one Read or Write * chunk. This is a a set of rdma_rw's that handle data movement * for all segments of one chunk. @@ -428,15 +456,13 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, ctxt = svc_rdma_get_rw_ctxt(rdma, (write_len >> PAGE_SHIFT) + 2); if (!ctxt) - goto out_noctx; + return -ENOMEM; constructor(info, write_len, ctxt); - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, ctxt->rw_sg_table.sgl, - ctxt->rw_nents, 0, seg_offset, - seg_handle, DMA_TO_DEVICE); + ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, + DMA_TO_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); @@ -455,18 +481,9 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return 0; out_overflow: - dprintk("svcrdma: inadequate space in Write chunk (%u)\n", - info->wi_nsegs); + trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, + info->wi_nsegs); return -E2BIG; - -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; - -out_initerr: - svc_rdma_put_rw_ctxt(rdma, ctxt); - trace_svcrdma_dma_map_rwctx(rdma, ret); - return -EIO; } /* Send one of an xdr_buf's kvecs by itself. To send a Reply @@ -616,7 +633,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); if (!ctxt) - goto out_noctx; + return -ENOMEM; ctxt->rw_nents = sge_no; sg = ctxt->rw_sg_table.sgl; @@ -646,29 +663,18 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, goto out_overrun; } - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp, - cc->cc_rdma->sc_port_num, - ctxt->rw_sg_table.sgl, ctxt->rw_nents, - 0, offset, rkey, DMA_FROM_DEVICE); + ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey, + DMA_FROM_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; return 0; -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; - out_overrun: - dprintk("svcrdma: request overruns rq_pages\n"); + trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); return -EINVAL; - -out_initerr: - trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret); - svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt); - return -EIO; } /* Walk the segments in the Read chunk starting at @p and construct diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b6c8643867f2..38e7c3c8c4a9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) __be32 *p; int ret; - /* Create the RDMA response header. xprt->xpt_mutex, - * acquired in svc_send(), serializes RPC replies. The - * code path below that inserts the credit grant value - * into each transport header runs only inside this - * critical section. - */ + ret = -ENOTCONN; + if (svc_xprt_is_dead(xprt)) + goto err0; + ret = -ENOMEM; sctxt = svc_rdma_send_ctxt_get(rdma); if (!sctxt) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ea54785db4f8..d38be57b00ed 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, newxprt->sc_ord = param->initiator_depth; sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa); + memcpy(&newxprt->sc_xprt.xpt_remote, sa, + newxprt->sc_xprt.xpt_remotelen); + snprintf(newxprt->sc_xprt.xpt_remotebuf, + sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa); + /* The remote port is arbitrary and not under the control of the * client ULP. Set it to a fixed value so that the DRC continues * to be effective after a reconnect. @@ -309,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct svcxprt_rdma *cma_xprt; int ret; - dprintk("svcrdma: Creating RDMA listener\n"); - if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) { - dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); + if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - } cma_xprt = svc_rdma_create_xprt(serv, net); if (!cma_xprt) return ERR_PTR(-ENOMEM); @@ -324,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); - dprintk("svcrdma: rdma_create_id failed = %d\n", ret); goto err0; } @@ -333,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, */ #if IS_ENABLED(CONFIG_IPV6) ret = rdma_set_afonly(listen_id, 1); - if (ret) { - dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret); + if (ret) goto err1; - } #endif ret = rdma_bind_addr(listen_id, sa); - if (ret) { - dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); + if (ret) goto err1; - } cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); - if (ret) { - dprintk("svcrdma: rdma_listen failed = %d\n", ret); + if (ret) goto err1; - } /* * We need to use the address from the cm_id in case the @@ -405,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!newxprt) return NULL; - dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", - newxprt, newxprt->sc_cm_id); - dev = newxprt->sc_cm_id->device; newxprt->sc_port_num = newxprt->sc_cm_id->port_num; @@ -443,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { - dprintk("svcrdma: error creating PD for connect request\n"); + trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd)); goto errout; } newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_sq_cq)) { - dprintk("svcrdma: error creating SQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_sq_cq)) goto errout; - } newxprt->sc_rq_cq = ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_rq_cq)) { - dprintk("svcrdma: error creating RQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_rq_cq)) goto errout; - } memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; @@ -481,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { - dprintk("svcrdma: failed to create QP, ret=%d\n", ret); + trace_svcrdma_qp_err(newxprt, ret); goto errout; } newxprt->sc_qp = newxprt->sc_cm_id->qp; @@ -489,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) newxprt->sc_snd_w_inv = false; if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) && - !rdma_ib_or_roce(dev, newxprt->sc_port_num)) + !rdma_ib_or_roce(dev, newxprt->sc_port_num)) { + trace_svcrdma_fabric_err(newxprt, -EINVAL); goto errout; + } if (!svc_rdma_post_recvs(newxprt)) goto errout; @@ -512,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) conn_param.initiator_depth = min_t(int, newxprt->sc_ord, dev->attrs.max_qp_init_rd_atom); if (!conn_param.initiator_depth) { - dprintk("svcrdma: invalid ORD setting\n"); ret = -EINVAL; + trace_svcrdma_initdepth_err(newxprt, ret); goto errout; } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); - if (ret) + if (ret) { + trace_svcrdma_accept_err(newxprt, ret); goto errout; + } #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) dprintk("svcrdma: new connection %p accepted:\n", newxprt); @@ -535,12 +527,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" ord : %d\n", conn_param.initiator_depth); #endif - trace_svcrdma_xprt_accept(&newxprt->sc_xprt); return &newxprt->sc_xprt; errout: - dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); - trace_svcrdma_xprt_fail(&newxprt->sc_xprt); /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) @@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work) container_of(work, struct svcxprt_rdma, sc_work); struct svc_xprt *xprt = &rdma->sc_xprt; - trace_svcrdma_xprt_free(xprt); - if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_drain_qp(rdma->sc_qp); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 659da37020a4..0c4af7f5e241 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -68,7 +68,7 @@ * tunables */ -unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; +static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; @@ -281,8 +281,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_op_destroy(r_xprt); - cancel_delayed_work_sync(&r_xprt->rx_connect_worker); rpcrdma_xprt_disconnect(r_xprt); @@ -365,10 +363,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; - dprintk("RPC: %s: %s:%s\n", __func__, - xprt->address_strings[RPC_DISPLAY_ADDR], - xprt->address_strings[RPC_DISPLAY_PORT]); - trace_xprtrdma_create(new_xprt); return xprt; } @@ -385,8 +379,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - trace_xprtrdma_op_close(r_xprt); - rpcrdma_xprt_disconnect(r_xprt); xprt->reestablish_timeout = 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 05c4d3a9cda2..2ae348377806 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -141,7 +141,6 @@ void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) if (wc->status != IB_WC_SUCCESS && r_xprt->rx_ep->re_connect_status == 1) { r_xprt->rx_ep->re_connect_status = -ECONNABORTED; - trace_xprtrdma_flush_dct(r_xprt, wc->status); xprt_force_disconnect(xprt); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3a143e250b9a..914508ea9b84 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2528,8 +2528,16 @@ static int bc_sendto(struct rpc_rqst *req) return sent; } -/* - * The send routine. Borrows from svc_send +/** + * bc_send_request - Send a backchannel Call on a TCP socket + * @req: rpc_rqst containing Call message to be sent + * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ static int bc_send_request(struct rpc_rqst *req) { diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 197436b20288..e23c912548f5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2407,7 +2407,7 @@ sub process { if ($rawline=~/^\+\+\+\s+(\S+)/) { $setup_docs = 0; - if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) { + if ($1 =~ m@Documentation/admin-guide/kernel-parameters.txt$@) { $setup_docs = 1; } #next; @@ -6388,7 +6388,7 @@ sub process { if (!grep(/$name/, @setup_docs)) { CHK("UNDOCUMENTED_SETUP", - "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr); + "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.txt\n" . $herecurr); } } diff --git a/scripts/spelling.txt b/scripts/spelling.txt index d9cd24cf0d40..c45e9afaab2d 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -59,6 +59,7 @@ actualy||actually acumulating||accumulating acumulative||accumulative acumulator||accumulator +acutally||actually adapater||adapter addional||additional additionaly||additionally @@ -249,6 +250,7 @@ calescing||coalescing calle||called callibration||calibration callled||called +callser||caller calucate||calculate calulate||calculate cancelation||cancellation @@ -671,6 +673,7 @@ hanlde||handle hanled||handled happend||happened harware||hardware +havind||having heirarchically||hierarchically helpfull||helpful hexdecimal||hexadecimal @@ -845,6 +848,7 @@ logile||logfile loobpack||loopback loosing||losing losted||lost +maangement||management machinary||machinery maibox||mailbox maintainance||maintenance @@ -905,6 +909,7 @@ modfiy||modify modulues||modules momery||memory memomry||memory +monitring||monitoring monochorome||monochrome monochromo||monochrome monocrome||monochrome @@ -1010,6 +1015,7 @@ partiton||partition pased||passed passin||passing pathes||paths +pattrns||patterns pecularities||peculiarities peformance||performance peforming||performing @@ -1256,6 +1262,7 @@ shoule||should shrinked||shrunk siginificantly||significantly signabl||signal +significanly||significantly similary||similarly similiar||similar simlar||similar @@ -1371,6 +1378,7 @@ thead||thread therfore||therefore thier||their threds||threads +threee||three threshhold||threshold thresold||threshold throught||through @@ -1410,6 +1418,7 @@ tyep||type udpate||update uesd||used uknown||unknown +usccess||success usupported||unsupported uncommited||uncommitted unconditionaly||unconditionally diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 47838f57a647..9630d2523948 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -138,6 +138,16 @@ void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) } EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); +static void snd_pcm_stream_lock_nested(struct snd_pcm_substream *substream) +{ + struct snd_pcm_group *group = &substream->self_group; + + if (substream->pcm->nonatomic) + mutex_lock_nested(&group->mutex, SINGLE_DEPTH_NESTING); + else + spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING); +} + /** * snd_pcm_stream_unlock_irq - Unlock the PCM stream * @substream: PCM substream @@ -2166,6 +2176,12 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) } pcm_file = f.file->private_data; substream1 = pcm_file->substream; + + if (substream == substream1) { + res = -EINVAL; + goto _badf; + } + group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) { res = -ENOMEM; @@ -2194,7 +2210,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) snd_pcm_stream_unlock_irq(substream); snd_pcm_group_lock_irq(target_group, nonatomic); - snd_pcm_stream_lock(substream1); + snd_pcm_stream_lock_nested(substream1); snd_pcm_group_assign(substream1, target_group); refcount_inc(&target_group->refs); snd_pcm_stream_unlock(substream1); @@ -2210,7 +2226,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) static void relink_to_local(struct snd_pcm_substream *substream) { - snd_pcm_stream_lock(substream); + snd_pcm_stream_lock_nested(substream); snd_pcm_group_assign(substream, &substream->self_group); snd_pcm_stream_unlock(substream); } diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index ddb7c2ce3f7c..def8161cde4c 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -1040,7 +1040,7 @@ static void snd_emu10k1x_proc_reg_write(struct snd_info_entry *entry, if (sscanf(line, "%x %x %x", ®, &channel_id, &val) != 3) continue; - if (reg < 0x49 && val <= 0xffffffff && channel_id <= 2) + if (reg < 0x49 && channel_id <= 2) snd_emu10k1x_ptr_write(emu, reg, channel_id, val); } } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0aa778ff7f2b..6d73f8beadb6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8161,6 +8161,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, {0x17, 0x90170110}), + SND_HDA_PIN_QUIRK(0x10ec0623, 0x17aa, "Lenovo", ALC283_FIXUP_HEADSET_MIC, + {0x14, 0x01014010}, + {0x17, 0x90170120}, + {0x18, 0x02a11030}, + {0x19, 0x02a1103f}, + {0x21, 0x0221101f}), {} }; diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c index b9ce44dda886..0d63ebfbff2f 100644 --- a/sound/soc/codecs/max98390.c +++ b/sound/soc/codecs/max98390.c @@ -754,6 +754,7 @@ static struct snd_soc_dai_driver max98390_dai[] = { static int max98390_dsm_init(struct snd_soc_component *component) { int ret; + int param_size, param_start_addr; char filename[128]; const char *vendor, *product; struct max98390_priv *max98390 = @@ -778,16 +779,31 @@ static int max98390_dsm_init(struct snd_soc_component *component) } dev_dbg(component->dev, - "max98390: param fw size %ld\n", + "max98390: param fw size %zd\n", fw->size); + if (fw->size < MAX98390_DSM_PARAM_MIN_SIZE) { + dev_err(component->dev, + "param fw is invalid.\n"); + goto err_alloc; + } dsm_param = (char *)fw->data; + param_start_addr = (dsm_param[0] & 0xff) | (dsm_param[1] & 0xff) << 8; + param_size = (dsm_param[2] & 0xff) | (dsm_param[3] & 0xff) << 8; + if (param_size > MAX98390_DSM_PARAM_MAX_SIZE || + param_start_addr < DSM_STBASS_HPF_B0_BYTE0 || + fw->size < param_size + MAX98390_DSM_PAYLOAD_OFFSET) { + dev_err(component->dev, + "param fw is invalid.\n"); + goto err_alloc; + } + regmap_write(max98390->regmap, MAX98390_R203A_AMP_EN, 0x80); dsm_param += MAX98390_DSM_PAYLOAD_OFFSET; - regmap_bulk_write(max98390->regmap, DSM_EQ_BQ1_B0_BYTE0, - dsm_param, - fw->size - MAX98390_DSM_PAYLOAD_OFFSET); - release_firmware(fw); + regmap_bulk_write(max98390->regmap, param_start_addr, + dsm_param, param_size); regmap_write(max98390->regmap, MAX98390_R23E1_DSP_GLOBAL_EN, 0x01); +err_alloc: + release_firmware(fw); err: return ret; } diff --git a/sound/soc/codecs/max98390.h b/sound/soc/codecs/max98390.h index f59cb114d957..5f444e7779b0 100644 --- a/sound/soc/codecs/max98390.h +++ b/sound/soc/codecs/max98390.h @@ -650,7 +650,8 @@ /* DSM register offset */ #define MAX98390_DSM_PAYLOAD_OFFSET 16 -#define MAX98390_DSM_PAYLOAD_OFFSET_2 495 +#define MAX98390_DSM_PARAM_MAX_SIZE 770 +#define MAX98390_DSM_PARAM_MIN_SIZE 670 struct max98390_priv { struct regmap *regmap; diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c index 2586d1cafc0c..8c9daf32bab8 100644 --- a/sound/soc/codecs/rl6231.c +++ b/sound/soc/codecs/rl6231.c @@ -80,8 +80,8 @@ int rl6231_calc_dmic_clk(int rate) for (i = 0; i < ARRAY_SIZE(div); i++) { if ((div[i] % 3) == 0) continue; - /* find divider that gives DMIC frequency below 3.072MHz */ - if (3072000 * div[i] >= rate) + /* find divider that gives DMIC frequency below 1.536MHz */ + if (1536000 * div[i] >= rate) return i; } diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 6ba1849a77b0..e2e1d5b03b38 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3625,6 +3625,12 @@ static const struct rt5645_platform_data asus_t100ha_platform_data = { .inv_jd1_1 = true, }; +static const struct rt5645_platform_data asus_t101ha_platform_data = { + .dmic1_data_pin = RT5645_DMIC_DATA_IN2N, + .dmic2_data_pin = RT5645_DMIC2_DISABLE, + .jd_mode = 3, +}; + static const struct rt5645_platform_data lenovo_ideapad_miix_310_pdata = { .jd_mode = 3, .in2_diff = true, @@ -3709,6 +3715,14 @@ static const struct dmi_system_id dmi_platform_data[] = { .driver_data = (void *)&asus_t100ha_platform_data, }, { + .ident = "ASUS T101HA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "T101HA"), + }, + .driver_data = (void *)&asus_t101ha_platform_data, + }, + { .ident = "MINIX Z83-4", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MINIX"), diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index cf4feb835743..00be73900888 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -581,7 +581,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) if (!fsl_asoc_card_is_ac97(priv) && !codec_dev) { dev_err(&pdev->dev, "failed to find codec device\n"); - ret = -EINVAL; + ret = -EPROBE_DEFER; goto asrc_fail; } diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 30f70bbdf89c..1fdb70b9e478 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -754,6 +754,18 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_JD_NOT_INV | BYT_RT5640_MCLK_EN), }, + { /* Toshiba Encore WT10-A */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "TOSHIBA WT10-A-103"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD1_IN4P | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF2 | + BYT_RT5640_MCLK_EN), + }, { /* Catch-all for generic Insyde tablets, must be last */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index 48eda1a8aa6c..954ab01f695b 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -407,7 +407,7 @@ static struct snd_soc_dai_link geminilake_dais[] = { .name = "Glk Audio Echo Reference cap", .stream_name = "Echoreference Capture", .init = NULL, - .capture_only = 1, + .dpcm_capture = 1, .nonatomic = 1, .dynamic = 1, SND_SOC_DAILINK_REG(echoref, dummy, platform), diff --git a/sound/soc/intel/boards/kbl_da7219_max98927.c b/sound/soc/intel/boards/kbl_da7219_max98927.c index cc9b5eab8b4a..e29c31ffd241 100644 --- a/sound/soc/intel/boards/kbl_da7219_max98927.c +++ b/sound/soc/intel/boards/kbl_da7219_max98927.c @@ -692,7 +692,7 @@ static struct snd_soc_dai_link kabylake_dais[] = { .name = "Kbl Audio Echo Reference cap", .stream_name = "Echoreference Capture", .init = NULL, - .capture_only = 1, + .dpcm_capture = 1, .nonatomic = 1, SND_SOC_DAILINK_REG(echoref, dummy, platform), }, @@ -858,7 +858,7 @@ static struct snd_soc_dai_link kabylake_max98_927_373_dais[] = { .name = "Kbl Audio Echo Reference cap", .stream_name = "Echoreference Capture", .init = NULL, - .capture_only = 1, + .dpcm_capture = 1, .nonatomic = 1, SND_SOC_DAILINK_REG(echoref, dummy, platform), }, diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 658a9da3a40f..09ba55fc36d5 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -672,7 +672,7 @@ static struct snd_soc_dai_link kabylake_dais[] = { .name = "Kbl Audio Echo Reference cap", .stream_name = "Echoreference Capture", .init = NULL, - .capture_only = 1, + .dpcm_capture = 1, .nonatomic = 1, SND_SOC_DAILINK_REG(echoref, dummy, platform), }, diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 1b1f8d7a4ea3..b34cf6cf1139 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -566,7 +566,7 @@ static struct snd_soc_dai_link kabylake_dais[] = { .name = "Kbl Audio Echo Reference cap", .stream_name = "Echoreference Capture", .init = NULL, - .capture_only = 1, + .dpcm_capture = 1, .nonatomic = 1, SND_SOC_DAILINK_REG(echoref, dummy, platform), }, diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 2e9b56b29d31..b2e867113226 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -249,7 +249,7 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, /* Enable pclk to access registers and clock the fifo ip */ ret = clk_prepare_enable(fifo->pclk); if (ret) - return ret; + goto free_irq; /* Setup status2 so it reports the memory pointer */ regmap_update_bits(fifo->map, FIFO_CTRL1, @@ -269,8 +269,14 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, /* Take memory arbitror out of reset */ ret = reset_control_deassert(fifo->arb); if (ret) - clk_disable_unprepare(fifo->pclk); + goto free_clk; + + return 0; +free_clk: + clk_disable_unprepare(fifo->pclk); +free_irq: + free_irq(fifo->irq, ss); return ret; } EXPORT_SYMBOL_GPL(axg_fifo_pcm_open); diff --git a/sound/soc/meson/meson-card-utils.c b/sound/soc/meson/meson-card-utils.c index 2ca8c98e204f..5a4a91c88734 100644 --- a/sound/soc/meson/meson-card-utils.c +++ b/sound/soc/meson/meson-card-utils.c @@ -49,19 +49,26 @@ int meson_card_reallocate_links(struct snd_soc_card *card, links = krealloc(priv->card.dai_link, num_links * sizeof(*priv->card.dai_link), GFP_KERNEL | __GFP_ZERO); + if (!links) + goto err_links; + ldata = krealloc(priv->link_data, num_links * sizeof(*priv->link_data), GFP_KERNEL | __GFP_ZERO); - - if (!links || !ldata) { - dev_err(priv->card.dev, "failed to allocate links\n"); - return -ENOMEM; - } + if (!ldata) + goto err_ldata; priv->card.dai_link = links; priv->link_data = ldata; priv->card.num_links = num_links; return 0; + +err_ldata: + kfree(links); +err_links: + dev_err(priv->card.dev, "failed to allocate links\n"); + return -ENOMEM; + } EXPORT_SYMBOL_GPL(meson_card_reallocate_links); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index b07eca2c6ccc..7b387202c5db 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1648,9 +1648,25 @@ match: dai_link->platforms->name = component->name; /* convert non BE into BE */ - dai_link->no_pcm = 1; - dai_link->dpcm_playback = 1; - dai_link->dpcm_capture = 1; + if (!dai_link->no_pcm) { + dai_link->no_pcm = 1; + + if (dai_link->dpcm_playback) + dev_warn(card->dev, + "invalid configuration, dailink %s has flags no_pcm=0 and dpcm_playback=1\n", + dai_link->name); + if (dai_link->dpcm_capture) + dev_warn(card->dev, + "invalid configuration, dailink %s has flags no_pcm=0 and dpcm_capture=1\n", + dai_link->name); + + /* convert normal link into DPCM one */ + if (!(dai_link->dpcm_playback || + dai_link->dpcm_capture)) { + dai_link->dpcm_playback = !dai_link->capture_only; + dai_link->dpcm_capture = !dai_link->playback_only; + } + } /* * override any BE fixups diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 276505fb9d50..2c114b4542ce 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -2789,20 +2789,44 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num) struct snd_pcm *pcm; char new_name[64]; int ret = 0, playback = 0, capture = 0; + int stream; int i; + if (rtd->dai_link->dynamic && rtd->num_cpus > 1) { + dev_err(rtd->dev, + "DPCM doesn't support Multi CPU for Front-Ends yet\n"); + return -EINVAL; + } + if (rtd->dai_link->dynamic || rtd->dai_link->no_pcm) { - cpu_dai = asoc_rtd_to_cpu(rtd, 0); - if (rtd->num_cpus > 1) { - dev_err(rtd->dev, - "DPCM doesn't support Multi CPU yet\n"); - return -EINVAL; + if (rtd->dai_link->dpcm_playback) { + stream = SNDRV_PCM_STREAM_PLAYBACK; + + for_each_rtd_cpu_dais(rtd, i, cpu_dai) + if (!snd_soc_dai_stream_valid(cpu_dai, + stream)) { + dev_err(rtd->card->dev, + "CPU DAI %s for rtd %s does not support playback\n", + cpu_dai->name, + rtd->dai_link->stream_name); + return -EINVAL; + } + playback = 1; + } + if (rtd->dai_link->dpcm_capture) { + stream = SNDRV_PCM_STREAM_CAPTURE; + + for_each_rtd_cpu_dais(rtd, i, cpu_dai) + if (!snd_soc_dai_stream_valid(cpu_dai, + stream)) { + dev_err(rtd->card->dev, + "CPU DAI %s for rtd %s does not support capture\n", + cpu_dai->name, + rtd->dai_link->stream_name); + return -EINVAL; + } + capture = 1; } - - playback = rtd->dai_link->dpcm_playback && - snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_PLAYBACK); - capture = rtd->dai_link->dpcm_capture && - snd_soc_dai_stream_valid(cpu_dai, SNDRV_PCM_STREAM_CAPTURE); } else { /* Adapt stream for codec2codec links */ int cpu_capture = rtd->dai_link->params ? diff --git a/sound/soc/sof/nocodec.c b/sound/soc/sof/nocodec.c index ce053ba8f2e8..d03b5be31255 100644 --- a/sound/soc/sof/nocodec.c +++ b/sound/soc/sof/nocodec.c @@ -52,8 +52,10 @@ static int sof_nocodec_bes_setup(struct device *dev, links[i].platforms->name = dev_name(dev); links[i].codecs->dai_name = "snd-soc-dummy-dai"; links[i].codecs->name = "snd-soc-dummy"; - links[i].dpcm_playback = 1; - links[i].dpcm_capture = 1; + if (ops->drv[i].playback.channels_min) + links[i].dpcm_playback = 1; + if (ops->drv[i].capture.channels_min) + links[i].dpcm_capture = 1; } card->dai_link = links; diff --git a/sound/usb/card.c b/sound/usb/card.c index fd6fd1726ea0..162bdd6eb4d4 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -634,7 +634,6 @@ static int usb_audio_probe(struct usb_interface *intf, id, &chip); if (err < 0) goto __error; - chip->pm_intf = intf; break; } else if (vid[i] != -1 || pid[i] != -1) { dev_info(&dev->dev, @@ -651,6 +650,13 @@ static int usb_audio_probe(struct usb_interface *intf, goto __error; } } + + if (chip->num_interfaces >= MAX_CARD_INTERFACES) { + dev_info(&dev->dev, "Too many interfaces assigned to the single USB-audio card\n"); + err = -EINVAL; + goto __error; + } + dev_set_drvdata(&dev->dev, chip); /* @@ -703,6 +709,7 @@ static int usb_audio_probe(struct usb_interface *intf, } usb_chip[chip->index] = chip; + chip->intf[chip->num_interfaces] = intf; chip->num_interfaces++; usb_set_intfdata(intf, chip); atomic_dec(&chip->active); @@ -818,19 +825,37 @@ void snd_usb_unlock_shutdown(struct snd_usb_audio *chip) int snd_usb_autoresume(struct snd_usb_audio *chip) { + int i, err; + if (atomic_read(&chip->shutdown)) return -EIO; - if (atomic_inc_return(&chip->active) == 1) - return usb_autopm_get_interface(chip->pm_intf); + if (atomic_inc_return(&chip->active) != 1) + return 0; + + for (i = 0; i < chip->num_interfaces; i++) { + err = usb_autopm_get_interface(chip->intf[i]); + if (err < 0) { + /* rollback */ + while (--i >= 0) + usb_autopm_put_interface(chip->intf[i]); + atomic_dec(&chip->active); + return err; + } + } return 0; } void snd_usb_autosuspend(struct snd_usb_audio *chip) { + int i; + if (atomic_read(&chip->shutdown)) return; - if (atomic_dec_and_test(&chip->active)) - usb_autopm_put_interface(chip->pm_intf); + if (!atomic_dec_and_test(&chip->active)) + return; + + for (i = 0; i < chip->num_interfaces; i++) + usb_autopm_put_interface(chip->intf[i]); } static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) @@ -843,9 +868,6 @@ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) if (chip == (void *)-1L) return 0; - chip->autosuspended = !!PMSG_IS_AUTO(message); - if (!chip->autosuspended) - snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot); if (!chip->num_suspended_intf++) { list_for_each_entry(as, &chip->pcm_list, list) { snd_usb_pcm_suspend(as); @@ -858,6 +880,11 @@ static int usb_audio_suspend(struct usb_interface *intf, pm_message_t message) snd_usb_mixer_suspend(mixer); } + if (!PMSG_IS_AUTO(message) && !chip->system_suspend) { + snd_power_change_state(chip->card, SNDRV_CTL_POWER_D3hot); + chip->system_suspend = chip->num_suspended_intf; + } + return 0; } @@ -871,10 +898,10 @@ static int __usb_audio_resume(struct usb_interface *intf, bool reset_resume) if (chip == (void *)-1L) return 0; - if (--chip->num_suspended_intf) - return 0; atomic_inc(&chip->active); /* avoid autopm */ + if (chip->num_suspended_intf > 1) + goto out; list_for_each_entry(as, &chip->pcm_list, list) { err = snd_usb_pcm_resume(as); @@ -896,9 +923,12 @@ static int __usb_audio_resume(struct usb_interface *intf, bool reset_resume) snd_usbmidi_resume(p); } - if (!chip->autosuspended) + out: + if (chip->num_suspended_intf == chip->system_suspend) { snd_power_change_state(chip->card, SNDRV_CTL_POWER_D0); - chip->autosuspended = 0; + chip->system_suspend = 0; + } + chip->num_suspended_intf--; err_out: atomic_dec(&chip->active); /* allow autopm after this point */ diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 6d6492195bdc..4ec491011b19 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -40,6 +40,18 @@ .ifnum = QUIRK_NO_INTERFACE \ } +/* HP Thunderbolt Dock Audio Headset */ +{ + USB_DEVICE(0x03f0, 0x0269), + QUIRK_DEVICE_PROFILE("HP", "Thunderbolt Dock Audio Headset", + "HP-Thunderbolt-Dock-Audio-Headset"), +}, +/* HP Thunderbolt Dock Audio Module */ +{ + USB_DEVICE(0x03f0, 0x0567), + QUIRK_DEVICE_PROFILE("HP", "Thunderbolt Dock Audio Module", + "HP-Thunderbolt-Dock-Audio-Module"), +}, /* FTDI devices */ { USB_DEVICE(0x0403, 0xb8d8), diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 1c892c7f14d7..b91c4c0807ec 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -19,14 +19,16 @@ struct media_device; struct media_intf_devnode; +#define MAX_CARD_INTERFACES 16 + struct snd_usb_audio { int index; struct usb_device *dev; struct snd_card *card; - struct usb_interface *pm_intf; + struct usb_interface *intf[MAX_CARD_INTERFACES]; u32 usb_id; struct mutex mutex; - unsigned int autosuspended:1; + unsigned int system_suspend; atomic_t active; atomic_t shutdown; atomic_t usage_count; diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c index 51b89cedd09d..8b75821302a7 100644 --- a/tools/testing/selftests/vm/khugepaged.c +++ b/tools/testing/selftests/vm/khugepaged.c @@ -502,7 +502,7 @@ static bool wait_for_scan(const char *msg, char *p) madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE); - return !timeout; + return timeout == -1; } static void alloc_at_fault(void) |