diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-12-07 17:47:58 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-12-07 17:53:17 -0800 |
commit | 2483e7f04ce0e97c69b27d28ebce7a2320b7a7a6 (patch) | |
tree | a45d2e6897a32eaee4b6a5b3b84617d5ca52f4b2 | |
parent | 87e839c82cc36346a2cd183ca941316902110716 (diff) | |
parent | 5e3f5b81de80c98338bcb47c233aebefee5a4801 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
drivers/net/ethernet/stmicro/stmmac/dwmac5.c
drivers/net/ethernet/stmicro/stmmac/dwmac5.h
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/stmicro/stmmac/hwif.h
37e4b8df27bc ("net: stmmac: fix FPE events losing")
c3f3b97238f6 ("net: stmmac: Refactor EST implementation")
https://lore.kernel.org/all/20231206110306.01e91114@canb.auug.org.au/
Adjacent changes:
net/ipv4/tcp_ao.c
9396c4ee93f9 ("net/tcp: Don't store TCP-AO maclen on reqsk")
7b0f570f879a ("tcp: Move TCP-AO bits from cookie_v[46]_check() to tcp_ao_syncookie().")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
345 files changed, 3496 insertions, 1611 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml index 987aa83c2649..df20a3c9c744 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7533.yaml @@ -9,6 +9,9 @@ title: Analog Devices ADV7533/35 HDMI Encoders maintainers: - Laurent Pinchart <laurent.pinchart@ideasonboard.com> +allOf: + - $ref: /schemas/sound/dai-common.yaml# + description: | The ADV7533 and ADV7535 are HDMI audio and video transmitters compatible with HDMI 1.4 and DVI 1.0. They support color space @@ -89,6 +92,9 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 enum: [ 1, 2, 3, 4 ] + "#sound-dai-cells": + const: 0 + ports: description: The ADV7533/35 has two video ports and one audio port. diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml index fc11ab5fc465..1c2be8d6f633 100644 --- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml +++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml @@ -51,7 +51,10 @@ properties: minItems: 1 interrupts: - maxItems: 1 + items: + - description: LCDIF DMA interrupt + - description: LCDIF Error interrupt + minItems: 1 power-domains: maxItems: 1 @@ -131,6 +134,21 @@ allOf: then: required: - power-domains + - if: + properties: + compatible: + contains: + enum: + - fsl,imx23-lcdif + then: + properties: + interrupts: + minItems: 2 + maxItems: 2 + else: + properties: + interrupts: + maxItems: 1 examples: - | diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml index 537e5304b730..ed24b617090b 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml @@ -10,7 +10,6 @@ maintainers: - Chun-Kuang Hu <chunkuang.hu@kernel.org> - Philipp Zabel <p.zabel@pengutronix.de> - Jitao Shi <jitao.shi@mediatek.com> - - Xinlei Lee <xinlei.lee@mediatek.com> description: | The MediaTek DSI function block is a sink of the display subsystem and can diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml index 509d20c091af..6a206111d4e0 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,mpm.yaml @@ -62,6 +62,9 @@ properties: - description: MPM pin number - description: GIC SPI number for the MPM pin + '#power-domain-cells': + const: 0 + required: - compatible - reg @@ -93,4 +96,5 @@ examples: <86 183>, <90 260>, <91 260>; + #power-domain-cells = <0>; }; diff --git a/Documentation/networking/tcp_ao.rst b/Documentation/networking/tcp_ao.rst index cfa5bf1cc542..8a58321acce7 100644 --- a/Documentation/networking/tcp_ao.rst +++ b/Documentation/networking/tcp_ao.rst @@ -99,7 +99,7 @@ also [6.1]:: when it is no longer considered permitted. Linux TCP-AO will try its best to prevent you from removing a key that's -being used, considering it a key management failure. But sine keeping +being used, considering it a key management failure. But since keeping an outdated key may become a security issue and as a peer may unintentionally prevent the removal of an old key by always setting it as RNextKeyID - a forced key removal mechanism is provided, where diff --git a/MAINTAINERS b/MAINTAINERS index 2d2865c1e479..7fb66210069b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5068,7 +5068,6 @@ CLANG CONTROL FLOW INTEGRITY SUPPORT M: Sami Tolvanen <samitolvanen@google.com> M: Kees Cook <keescook@chromium.org> R: Nathan Chancellor <nathan@kernel.org> -R: Nick Desaulniers <ndesaulniers@google.com> L: llvm@lists.linux.dev S: Supported B: https://github.com/ClangBuiltLinux/linux/issues @@ -5083,8 +5082,9 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor <nathan@kernel.org> -M: Nick Desaulniers <ndesaulniers@google.com> -R: Tom Rix <trix@redhat.com> +R: Nick Desaulniers <ndesaulniers@google.com> +R: Bill Wendling <morbo@google.com> +R: Justin Stitt <justinstitt@google.com> L: llvm@lists.linux.dev S: Supported W: https://clangbuiltlinux.github.io/ @@ -5234,7 +5234,6 @@ F: drivers/platform/x86/compal-laptop.c COMPILER ATTRIBUTES M: Miguel Ojeda <ojeda@kernel.org> -R: Nick Desaulniers <ndesaulniers@google.com> S: Maintained F: include/linux/compiler_attributes.h @@ -11504,7 +11503,6 @@ F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) M: Masahiro Yamada <masahiroy@kernel.org> R: Nathan Chancellor <nathan@kernel.org> -R: Nick Desaulniers <ndesaulniers@google.com> R: Nicolas Schier <nicolas@fjasle.eu> L: linux-kbuild@vger.kernel.org S: Maintained @@ -15059,6 +15057,7 @@ F: lib/random32.c F: net/ F: tools/net/ F: tools/testing/selftests/net/ +X: net/9p/ X: net/bluetooth/ NETWORKING [IPSEC] @@ -17933,6 +17932,8 @@ L: iommu@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained F: drivers/iommu/arm/arm-smmu/qcom_iommu.c +F: drivers/iommu/arm/arm-smmu/arm-smmu-qcom* +F: drivers/iommu/msm_iommu* QUALCOMM IPC ROUTER (QRTR) DRIVER M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646591c67e7a..91d2d6714969 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1839,6 +1839,10 @@ static int __init __kpti_install_ng_mappings(void *__unused) static void __init kpti_install_ng_mappings(void) { + /* Check whether KPTI is going to be used */ + if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0)) + return; + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 1641ff9a8b83..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -71,7 +71,7 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %2\n" \ + "\t.align 4\n" \ "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ "\t.blockz %1-4-2\n" \ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1b5d17a9f70d..cf1f13c82175 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> +#include <asm/ia32.h> #include <asm/insn.h> #include <asm/insn-eval.h> #include <asm/pgtable.h> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d813160b14d8..6356060caaf3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include <xen/events.h> #endif +#include <asm/apic.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/vdso.h> @@ -167,7 +168,96 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -186,6 +276,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 27c05d08558a..de94e2e84ecc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..9805629479d9 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4d84122bd643..484f4f0131a5 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8857abc706e4..660b601f1d6c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index a68f2dda0948..70b91de2e053 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include <asm/msr.h> #include <asm/cmdline.h> #include <asm/sev.h> +#include <asm/ia32.h> #include "mm_internal.h" @@ -481,6 +482,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..e89e415aa743 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); diff --git a/block/blk-core.c b/block/blk-core.c index fdf25b8d6e78..2eca76ccf4ee 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -501,9 +501,17 @@ static inline void bio_check_ro(struct bio *bio) if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) { if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return; - pr_warn_ratelimited("Trying to write to read-only block-device %pg\n", - bio->bi_bdev); - /* Older lvm-tools actually trigger this */ + + if (bio->bi_bdev->bd_ro_warned) + return; + + bio->bi_bdev->bd_ro_warned = true; + /* + * Use ioctl to set underlying disk of raid/dm to read-only + * will trigger this. + */ + pr_warn("Trying to write to read-only block-device %pg\n", + bio->bi_bdev); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index 900c1be1fee1..ac18f802c027 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1512,14 +1512,26 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); +static bool blk_is_flush_data_rq(struct request *rq) +{ + return (rq->rq_flags & RQF_FLUSH_SEQ) && !is_flush_rq(rq); +} + static bool blk_mq_rq_inflight(struct request *rq, void *priv) { /* * If we find a request that isn't idle we know the queue is busy * as it's checked in the iter. * Return false to stop the iteration. + * + * In case of queue quiesce, if one flush data request is completed, + * don't count it as inflight given the flush sequence is suspended, + * and the original flush data request is invisible to driver, just + * like other pending requests because of quiesce */ - if (blk_mq_request_started(rq)) { + if (blk_mq_request_started(rq) && !(blk_queue_quiesced(rq->q) && + blk_is_flush_data_rq(rq) && + blk_mq_request_completed(rq))) { bool *busy = priv; *busy = true; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 63e481262336..0b2d04766324 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -615,6 +615,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); #endif +/* Common attributes for bio-based and request-based queues. */ static struct attribute *queue_attrs[] = { &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, @@ -659,6 +660,7 @@ static struct attribute *queue_attrs[] = { NULL, }; +/* Request-based queue attributes that are not relevant for bio-based queues. */ static struct attribute *blk_mq_queue_attrs[] = { &queue_requests_entry.attr, &elv_iosched_entry.attr, diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index d321ca7160d9..6cee536c229a 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -253,8 +253,7 @@ static const struct backlight_ops acpi_backlight_ops = { static int video_get_max_state(struct thermal_cooling_device *cooling_dev, unsigned long *state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; *state = video->brightness->count - ACPI_VIDEO_FIRST_LEVEL - 1; return 0; @@ -263,8 +262,7 @@ static int video_get_max_state(struct thermal_cooling_device *cooling_dev, static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long *state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; unsigned long long level; int offset; @@ -283,8 +281,7 @@ static int video_get_cur_state(struct thermal_cooling_device *cooling_dev, static int video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long state) { - struct acpi_device *device = cooling_dev->devdata; - struct acpi_video_device *video = acpi_driver_data(device); + struct acpi_video_device *video = cooling_dev->devdata; int level; if (state >= video->brightness->count - ACPI_VIDEO_FIRST_LEVEL) @@ -1125,7 +1122,6 @@ static int acpi_video_bus_get_one_device(struct acpi_device *device, void *arg) strcpy(acpi_device_name(device), ACPI_VIDEO_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS); - device->driver_data = data; data->device_id = device_id; data->video = video; @@ -1747,8 +1743,8 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) device->backlight->props.brightness = acpi_video_get_brightness(device->backlight); - device->cooling_dev = thermal_cooling_device_register("LCD", - device->dev, &video_cooling_ops); + device->cooling_dev = thermal_cooling_device_register("LCD", device, + &video_cooling_ops); if (IS_ERR(device->cooling_dev)) { /* * Set cooling_dev to NULL so we don't crash trying to free it. diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index fa5dd71a80fa..02bb2cce423f 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1568,17 +1568,22 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, int err; const struct iommu_ops *ops; + /* Serialise to make dev->iommu stable under our potential fwspec */ + mutex_lock(&iommu_probe_device_lock); /* * If we already translated the fwspec there is nothing left to do, * return the iommu_ops. */ ops = acpi_iommu_fwspec_ops(dev); - if (ops) + if (ops) { + mutex_unlock(&iommu_probe_device_lock); return ops; + } err = iort_iommu_configure_id(dev, id_in); if (err && err != -EPROBE_DEFER) err = viot_iommu_configure(dev); + mutex_unlock(&iommu_probe_device_lock); /* * If we have reason to believe the IOMMU driver missed the initial diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index c10ff8985203..0a0f483124c3 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1055,9 +1055,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) * Ask the sd driver to issue START STOP UNIT on runtime suspend * and resume and shutdown only. For system level suspend/resume, * devices power state is handled directly by libata EH. + * Given that disks are always spun up on system resume, also + * make sure that the sd driver forces runtime suspended disks + * to be resumed to correctly reflect the power state of the + * device. */ - sdev->manage_runtime_start_stop = true; - sdev->manage_shutdown = true; + sdev->manage_runtime_start_stop = 1; + sdev->manage_shutdown = 1; + sdev->force_runtime_start_on_system_start = 1; } /* diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 92592f944a3d..ac63a73ccdaa 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -410,8 +410,7 @@ out: rb_entry(node, struct regmap_range_node, node); /* If there's nothing in the cache there's nothing to sync */ - ret = regcache_read(map, this->selector_reg, &i); - if (ret != 0) + if (regcache_read(map, this->selector_reg, &i) != 0) continue; ret = _regmap_write(map, this->selector_reg, i); diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 9a1e194d5cf8..1f6186475715 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,11 +307,11 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); - + WRITE_ONCE(cpudata->max_limit_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); - + WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -329,11 +329,12 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); - + WRITE_ONCE(cpudata->max_limit_perf, highest_perf); WRITE_ONCE(cpudata->nominal_perf, cppc_perf.nominal_perf); WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) return 0; @@ -432,6 +433,10 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u64 prev = READ_ONCE(cpudata->cppc_req_cached); u64 value = prev; + min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); if ((cppc_state == AMD_PSTATE_GUIDED) && (gov_flags & CPUFREQ_GOV_DYNAMIC_SWITCHING)) { @@ -470,6 +475,22 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) return 0; } +static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) +{ + u32 max_limit_perf, min_limit_perf; + struct amd_cpudata *cpudata = policy->driver_data; + + max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); + min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); + WRITE_ONCE(cpudata->max_limit_freq, policy->max); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + + return 0; +} + static int amd_pstate_update_freq(struct cpufreq_policy *policy, unsigned int target_freq, bool fast_switch) { @@ -480,6 +501,9 @@ static int amd_pstate_update_freq(struct cpufreq_policy *policy, if (!cpudata->max_freq) return -ENODEV; + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); + cap_perf = READ_ONCE(cpudata->highest_perf); min_perf = READ_ONCE(cpudata->lowest_perf); max_perf = cap_perf; @@ -518,7 +542,9 @@ static int amd_pstate_target(struct cpufreq_policy *policy, static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { - return amd_pstate_update_freq(policy, target_freq, true); + if (!amd_pstate_update_freq(policy, target_freq, true)) + return target_freq; + return policy->cur; } static void amd_pstate_adjust_perf(unsigned int cpu, @@ -532,6 +558,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu, struct amd_cpudata *cpudata = policy->driver_data; unsigned int target_freq; + if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) + amd_pstate_update_min_max_limit(policy); + + cap_perf = READ_ONCE(cpudata->highest_perf); lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); max_freq = READ_ONCE(cpudata->max_freq); @@ -745,6 +775,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) /* Initial processor data capability frequencies */ cpudata->max_freq = max_freq; cpudata->min_freq = min_freq; + cpudata->max_limit_freq = max_freq; + cpudata->min_limit_freq = min_freq; cpudata->nominal_freq = nominal_freq; cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; @@ -850,11 +882,16 @@ static ssize_t show_energy_performance_available_preferences( { int i = 0; int offset = 0; + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + return sysfs_emit_at(buf, offset, "%s\n", + energy_perf_strings[EPP_INDEX_PERFORMANCE]); while (energy_perf_strings[i] != NULL) offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); - sysfs_emit_at(buf, offset, "\n"); + offset += sysfs_emit_at(buf, offset, "\n"); return offset; } @@ -1183,16 +1220,25 @@ static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) return 0; } -static void amd_pstate_epp_init(unsigned int cpu) +static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; + u32 max_perf, min_perf, min_limit_perf, max_limit_perf; u64 value; s16 epp; max_perf = READ_ONCE(cpudata->highest_perf); min_perf = READ_ONCE(cpudata->lowest_perf); + max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); + min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + + max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, + cpudata->max_limit_perf); + + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); + WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); @@ -1210,9 +1256,6 @@ static void amd_pstate_epp_init(unsigned int cpu) value &= ~AMD_CPPC_DES_PERF(~0L); value |= AMD_CPPC_DES_PERF(0); - if (cpudata->epp_policy == cpudata->policy) - goto skip_epp; - cpudata->epp_policy = cpudata->policy; /* Get BIOS pre-defined epp value */ @@ -1222,7 +1265,7 @@ static void amd_pstate_epp_init(unsigned int cpu) * This return value can only be negative for shared_memory * systems where EPP register read/write not supported. */ - goto skip_epp; + return; } if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) @@ -1236,8 +1279,6 @@ static void amd_pstate_epp_init(unsigned int cpu) WRITE_ONCE(cpudata->cppc_req_cached, value); amd_pstate_set_epp(cpudata, epp); -skip_epp: - cpufreq_cpu_put(policy); } static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) @@ -1252,7 +1293,7 @@ static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) cpudata->policy = policy->policy; - amd_pstate_epp_init(policy->cpu); + amd_pstate_epp_update_limit(policy); return 0; } diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 494d044b9e72..33728c242f66 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -327,7 +327,7 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) imx6x_disable_freq_in_opp(dev, 696000000); if (of_machine_is_compatible("fsl,imx6ull")) { - if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ) + if (val < OCOTP_CFG3_6ULL_SPEED_792MHZ) imx6x_disable_freq_in_opp(dev, 792000000); if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ) diff --git a/drivers/cpufreq/qcom-cpufreq-nvmem.c b/drivers/cpufreq/qcom-cpufreq-nvmem.c index 6355a39418c5..ea05d9d67490 100644 --- a/drivers/cpufreq/qcom-cpufreq-nvmem.c +++ b/drivers/cpufreq/qcom-cpufreq-nvmem.c @@ -23,8 +23,10 @@ #include <linux/nvmem-consumer.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/pm.h> #include <linux/pm_domain.h> #include <linux/pm_opp.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/soc/qcom/smem.h> @@ -55,6 +57,7 @@ struct qcom_cpufreq_match_data { struct qcom_cpufreq_drv_cpu { int opp_token; + struct device **virt_devs; }; struct qcom_cpufreq_drv { @@ -424,6 +427,30 @@ static const struct qcom_cpufreq_match_data match_data_ipq8074 = { .get_version = qcom_cpufreq_ipq8074_name_version, }; +static void qcom_cpufreq_suspend_virt_devs(struct qcom_cpufreq_drv *drv, unsigned int cpu) +{ + const char * const *name = drv->data->genpd_names; + int i; + + if (!drv->cpus[cpu].virt_devs) + return; + + for (i = 0; *name; i++, name++) + device_set_awake_path(drv->cpus[cpu].virt_devs[i]); +} + +static void qcom_cpufreq_put_virt_devs(struct qcom_cpufreq_drv *drv, unsigned int cpu) +{ + const char * const *name = drv->data->genpd_names; + int i; + + if (!drv->cpus[cpu].virt_devs) + return; + + for (i = 0; *name; i++, name++) + pm_runtime_put(drv->cpus[cpu].virt_devs[i]); +} + static int qcom_cpufreq_probe(struct platform_device *pdev) { struct qcom_cpufreq_drv *drv; @@ -478,6 +505,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) of_node_put(np); for_each_possible_cpu(cpu) { + struct device **virt_devs = NULL; struct dev_pm_opp_config config = { .supported_hw = NULL, }; @@ -498,7 +526,7 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) if (drv->data->genpd_names) { config.genpd_names = drv->data->genpd_names; - config.virt_devs = NULL; + config.virt_devs = &virt_devs; } if (config.supported_hw || config.genpd_names) { @@ -509,6 +537,27 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) goto free_opp; } } + + if (virt_devs) { + const char * const *name = config.genpd_names; + int i, j; + + for (i = 0; *name; i++, name++) { + ret = pm_runtime_resume_and_get(virt_devs[i]); + if (ret) { + dev_err(cpu_dev, "failed to resume %s: %d\n", + *name, ret); + + /* Rollback previous PM runtime calls */ + name = config.genpd_names; + for (j = 0; *name && j < i; j++, name++) + pm_runtime_put(virt_devs[j]); + + goto free_opp; + } + } + drv->cpus[cpu].virt_devs = virt_devs; + } } cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1, @@ -522,8 +571,10 @@ static int qcom_cpufreq_probe(struct platform_device *pdev) dev_err(cpu_dev, "Failed to register platform device\n"); free_opp: - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + qcom_cpufreq_put_virt_devs(drv, cpu); dev_pm_opp_clear_config(drv->cpus[cpu].opp_token); + } return ret; } @@ -534,15 +585,31 @@ static void qcom_cpufreq_remove(struct platform_device *pdev) platform_device_unregister(cpufreq_dt_pdev); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + qcom_cpufreq_put_virt_devs(drv, cpu); dev_pm_opp_clear_config(drv->cpus[cpu].opp_token); + } } +static int qcom_cpufreq_suspend(struct device *dev) +{ + struct qcom_cpufreq_drv *drv = dev_get_drvdata(dev); + unsigned int cpu; + + for_each_possible_cpu(cpu) + qcom_cpufreq_suspend_virt_devs(drv, cpu); + + return 0; +} + +static DEFINE_SIMPLE_DEV_PM_OPS(qcom_cpufreq_pm_ops, qcom_cpufreq_suspend, NULL); + static struct platform_driver qcom_cpufreq_driver = { .probe = qcom_cpufreq_probe, .remove_new = qcom_cpufreq_remove, .driver = { .name = "qcom-cpufreq-nvmem", + .pm = pm_sleep_ptr(&qcom_cpufreq_pm_ops), }, }; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 38b4110378de..eb8b733065b2 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -301,7 +301,7 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, dma_resv_list_entry(fobj, i, obj, &old, &old_usage); if ((old->context == fence->context && old_usage >= usage && - dma_fence_is_later(fence, old)) || + dma_fence_is_later_or_same(fence, old)) || dma_fence_is_signaled(old)) { dma_resv_list_set(fobj, i, fence, usage); dma_fence_put(old); diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index aa597cda0d88..2828e9573e90 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -717,14 +717,11 @@ static void create_units(struct fw_device *device) fw_unit_attributes, &unit->attribute_group); - if (device_register(&unit->device) < 0) - goto skip_unit; - fw_device_get(device); - continue; - - skip_unit: - kfree(unit); + if (device_register(&unit->device) < 0) { + put_device(&unit->device); + continue; + } } } diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 7edf2c95282f..e779d866022b 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1519,9 +1519,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev) sdev->use_10_for_rw = 1; if (sbp2_param_exclusive_login) { - sdev->manage_system_start_stop = true; - sdev->manage_runtime_start_stop = true; - sdev->manage_shutdown = true; + sdev->manage_system_start_stop = 1; + sdev->manage_runtime_start_stop = 1; + sdev->manage_shutdown = 1; } if (sdev->type == TYPE_ROM) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b8412202a1b0..75dc58470393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -547,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a53f436fa9f1..0e61ebdb3f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -638,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_rreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -694,6 +697,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_wreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7eeaf0aa7f81..5c0817cbc7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4538,6 +4538,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); + r = amdgpu_dpm_notify_rlc_state(adev, false); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 0cacd0b9f8be..b8fbe97efe1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - * we got before + /* if we have no active crtcs, then go to + * drop the power ref we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8f24cabe2155..8b33b130ea36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2263,6 +2263,8 @@ retry_init: pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 5f71414190e9..d2f273d77e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + if (!bo->ttm) + return AMDGPU_BO_INVALID_OFFSET; + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cef920a93924..d79b4ca1ecfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1527,10 +1527,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint64_t offset; + uint64_t offset = AMDGPU_BO_INVALID_OFFSET; - offset = (bo->tbo.resource->start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); + if (bo->tbo.resource->mem_type == TTM_PL_TT) + offset = amdgpu_gmc_agp_addr(&bo->tbo); + + if (offset == AMDGPU_BO_INVALID_OFFSET) + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 65aa218380be..2fde93b00cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 05991c5c8ddb..ab4a762aed5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -959,10 +959,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; addr = amdgpu_gmc_agp_addr(bo); - if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->resource->start = addr >> PAGE_SHIFT; + if (addr != AMDGPU_BO_INVALID_OFFSET) return 0; - } /* allocate GART space */ placement.num_placement = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0c6133cc5e57..8ed4a6fb147a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; + static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), @@ -304,6 +308,10 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -419,7 +427,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 885ebd703260..1943beb135c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e3ff6e46f3f7..69c500910746 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 40d06d32bb74..4a09cc0d8ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -297,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 676ab1d20d2f..1f52b4b1db03 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -259,17 +259,17 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = { static void nbio_v7_11_init_registers(struct amdgpu_device *adev) { -/* uint32_t def, data; + uint32_t def, data; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); - def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); - if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); -*/ } static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 23f26f8caad4..25a3da83e0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -611,11 +611,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device dev_info(adev->dev, "RAS controller interrupt triggered " "by NBIF error\n"); - - /* ras_controller_int is dedicated for nbif ras error, - * not the global interrupt for sync flood - */ - amdgpu_ras_reset_gpu(adev); } amdgpu_ras_error_data_fini(&err_data); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index d4b8d62f4294..c82776e5e9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1161,6 +1161,11 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x46; + /* GC 9.4.3 uses MMIO register region hole at a different offset */ + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = 0x1A000; + adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; + } break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9cc32f577e38..4c8e278a0d0c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1128,7 +1128,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, struct kfd_dev *dev = adev->kfd.dev; uint32_t i; - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3)) return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 77649392e233..77f493262e05 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) return 0; } +static void pqm_clean_queue_resource(struct process_queue_manager *pqm, + struct process_queue_node *pqn) +{ + struct kfd_node *dev; + struct kfd_process_device *pdd; + + dev = pqn->q->device; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return; + } + + if (pqn->q->gws) { + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + !dev->kfd->shared_resources.enable_mes) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, pqn->q->gws); + pdd->qpd.num_gws = 0; + } + + if (dev->kfd->shared_resources.enable_mes) { + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo); + if (pqn->q->wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); + } +} + void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !pqn->q->device->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (pqn->q) + pqm_clean_queue_resource(pqm, pqn); + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); @@ -461,22 +488,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) goto err_destroy_queue; } - if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !dev->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process( - pqm->process->kgd_process_info, - pqn->q->gws); - pdd->qpd.num_gws = 0; - } - - if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, - pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); - - } + pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ee97814ebd99..b452796fc6d3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6267,7 +6267,7 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, dm_new_state->underscan_enable = val; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - dm_new_state->abm_level = val; + dm_new_state->abm_level = val ?: ABM_LEVEL_IMMEDIATE_DISABLE; ret = 0; } @@ -6312,7 +6312,8 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, *val = dm_state->underscan_enable; ret = 0; } else if (property == adev->mode_info.abm_level_property) { - *val = dm_state->abm_level; + *val = (dm_state->abm_level != ABM_LEVEL_IMMEDIATE_DISABLE) ? + dm_state->abm_level : 0; ret = 0; } @@ -6385,7 +6386,8 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->pbn = 0; if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - state->abm_level = amdgpu_dm_abm_level; + state->abm_level = amdgpu_dm_abm_level ?: + ABM_LEVEL_IMMEDIATE_DISABLE; __drm_atomic_helper_connector_reset(connector, &state->base); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index b2c4f97afc8b..8776055bbeaa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -334,7 +334,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -342,7 +342,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -350,7 +350,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, @@ -358,7 +358,7 @@ static struct wm_table lpddr5_wm_table = { { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, + .pstate_latency_us = 129.0, .sr_exit_time_us = 11.5, .sr_enter_plus_exit_time_us = 14.5, .valid = true, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 507a7cf56711..d5fde7d23fbf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -232,6 +232,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->work_arounds.skip_clock_update) return; + /* DTBCLK is fixed, so set a default if unspecified. */ + if (new_clocks->dtbclk_en && !new_clocks->ref_dtbclk_khz) + new_clocks->ref_dtbclk_khz = 600000; + /* * if it is safe to lower, but we are already in the lower state, we don't have to do anything * also if safe to lower is false, we just go in the higher state @@ -265,8 +269,10 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { dcn35_smu_set_dtbclk(clk_mgr, true); - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en; + + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } /* check that we're not already in D0 */ @@ -314,17 +320,12 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base, update_dispclk = true; } - if (!new_clocks->dtbclk_en) { - new_clocks->ref_dtbclk_khz = 600000; - } - /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */ if (!dc->debug.disable_dtb_ref_clk_switch && - should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { - /* DCCG requires KHz precision for DTBCLK */ - dcn35_smu_set_dtbclk(clk_mgr, true); - - dcn35_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz); + should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, + clk_mgr_base->clks.ref_dtbclk_khz / 1000)) { + dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz); + clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz; } if (dpp_clock_lowered) { @@ -443,32 +444,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 9, - .sr_enter_plus_exit_time_us = 11, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } @@ -480,32 +481,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 11.5, - .sr_enter_plus_exit_time_us = 14.5, + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, .valid = true, }, } @@ -515,11 +516,6 @@ static DpmClocks_t_dcn35 dummy_clocks; static struct dcn35_watermarks dummy_wms = { 0 }; -static struct dcn35_ss_info_table ss_info_table = { - .ss_divider = 1000, - .ss_percentage = {0, 0, 375, 375, 375} -}; - static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table) { int i, num_valid_sets; @@ -653,27 +649,47 @@ static unsigned int convert_wck_ratio(uint8_t wck_ratio) return 1; } +static inline uint32_t calc_dram_speed_mts(const MemPstateTable_t *entry) +{ + return entry->UClk * convert_wck_ratio(entry->WckRatio) * 2; +} + static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, DpmClocks_t_dcn35 *clock_table) { struct clk_bw_params *bw_params = clk_mgr->base.bw_params; struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - uint32_t max_pstate = 0, max_uclk = 0, max_fclk = 0; - uint32_t min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; int i; + /* Determine min/max p-state values. */ for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - if (is_valid_clock_value(clock_table->MemPstateTable[i].UClk) && - clock_table->MemPstateTable[i].UClk > max_uclk) { - max_uclk = clock_table->MemPstateTable[i].UClk; + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { + max_dram_speed_mts = dram_speed_mts; max_pstate = i; } } - /* We expect the table to contain at least one valid Uclk entry. */ - ASSERT(is_valid_clock_value(max_uclk)); + min_dram_speed_mts = max_dram_speed_mts; + min_pstate = max_pstate; + + for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); + + if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { + min_dram_speed_mts = dram_speed_mts; + min_pstate = i; + } + } + /* We expect the table to contain at least one valid P-state entry. */ + ASSERT(clock_table->NumMemPstatesEnabled && + is_valid_clock_value(max_dram_speed_mts) && + is_valid_clock_value(min_dram_speed_mts)); /* dispclk and dppclk can be max at any voltage, same number of levels for both */ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && @@ -683,47 +699,46 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); } else { + /* Invalid number of entries in the table from PMFW. */ ASSERT(0); } - if (clock_table->NumFclkLevelsEnabled <= NUM_FCLK_DPM_LEVELS) - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, - clock_table->NumFclkLevelsEnabled); - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { - uint32_t min_uclk = clock_table->MemPstateTable[0].UClk; - int j; + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ + ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - for (j = 1; j < clock_table->NumMemPstatesEnabled; j++) { - if (is_valid_clock_value(clock_table->MemPstateTable[j].UClk) && - clock_table->MemPstateTable[j].UClk < min_uclk && - clock_table->MemPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { - min_uclk = clock_table->MemPstateTable[j].UClk; - min_pstate = j; - } - } + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) - break; + break; bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; - bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + + /* Now update clocks we do read */ bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[min_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[min_pstate].Voltage; bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( - clock_table->MemPstateTable[min_pstate].WckRatio); - } + bw_params->clk_table.entries[i].wck_ratio = + convert_wck_ratio(clock_table->MemPstateTable[min_pstate].WckRatio); + + /* Dcfclk and Fclk are tied, but at a different ratio */ + bw_params->clk_table.entries[i].fclk_mhz = min(max_fclk, 2 * clock_table->DcfClocks[i]); + } /* Make sure to include at least one entry at highest pstate */ if (max_pstate != min_pstate || i == 0) { if (i > MAX_NUM_DPM_LVL - 1) i = MAX_NUM_DPM_LVL - 1; + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemPstateTable[max_pstate].MemClk; bw_params->clk_table.entries[i].voltage = clock_table->MemPstateTable[max_pstate].Voltage; @@ -739,6 +754,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } bw_params->clk_table.num_entries = i--; + /* Make sure all highest clocks are included*/ bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); bw_params->clk_table.entries[i].dispclk_mhz = @@ -757,6 +773,11 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk bw_params->clk_table.num_entries_per_clk.num_fclk_levels = clock_table->NumFclkLevelsEnabled; bw_params->clk_table.num_entries_per_clk.num_memclk_levels = clock_table->NumMemPstatesEnabled; bw_params->clk_table.num_entries_per_clk.num_socclk_levels = clock_table->NumSocClkLevelsEnabled; + + /* + * Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ for (i = 0; i < bw_params->clk_table.num_entries; i++) { if (!bw_params->clk_table.entries[i].fclk_mhz) { bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; @@ -965,21 +986,6 @@ struct clk_mgr_funcs dcn35_fpga_funcs = { .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, }; -static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) -{ - uint32_t clock_source; - struct dc_context *ctx = clk_mgr->base.ctx; - - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); - - clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; - - if (clk_mgr->dprefclk_ss_percentage != 0) { - clk_mgr->ss_on_dprefclk = true; - clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider; - } -} - void dcn35_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn35 *clk_mgr, @@ -1043,17 +1049,11 @@ void dcn35_clk_mgr_construct( dcn35_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); clk_mgr->base.base.dprefclk_khz = dcn35_smu_get_dprefclk(&clk_mgr->base); - clk_mgr->base.base.clks.ref_dtbclk_khz = dcn35_smu_get_dtbclk(&clk_mgr->base); + clk_mgr->base.base.clks.ref_dtbclk_khz = 600000; - if (!clk_mgr->base.base.clks.ref_dtbclk_khz) - dcn35_smu_set_dtbclk(&clk_mgr->base, true); - - clk_mgr->base.base.clks.dtbclk_en = true; dce_clock_read_ss_info(&clk_mgr->base); /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/ - dcn35_read_ss_info_from_lut(&clk_mgr->base); - clk_mgr->base.base.bw_params = &dcn35_bw_params; if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { @@ -1129,7 +1129,6 @@ void dcn35_clk_mgr_construct( ctx->dc->debug.disable_dpp_power_gate = false; ctx->dc->debug.disable_hubp_power_gate = false; ctx->dc->debug.disable_dsc_power_gate = false; - ctx->dc->debug.disable_hpo_power_gate = false; } else { /*let's reset the config control flag*/ ctx->dc->config.disable_ips = DMUB_IPS_DISABLE_ALL; /*pmfw not support it, disable it all*/ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9316b737a8ba..2cafd644baff 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -874,6 +874,7 @@ struct dc_debug_options { unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst int minimum_z8_residency_time; + int minimum_z10_residency_time; bool disable_z9_mpc; unsigned int force_fclk_khz; bool enable_tri_buf; @@ -1608,7 +1609,6 @@ struct dc_link { enum edp_revision edp_revision; union dpcd_sink_ext_caps dpcd_sink_ext_caps; - struct backlight_settings backlight_settings; struct psr_settings psr_settings; struct replay_settings replay_settings; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index fcb825e4f1bb..35d146217aef 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -991,10 +991,6 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct backlight_settings { - uint32_t backlight_millinits; -}; - /* PSR feature flags */ struct psr_settings { bool psr_feature_enabled; // PSR is supported by sink diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 677361d74a4e..c97391edb5ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -871,7 +871,7 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .enable_z9_disable_interface = true, - .minimum_z8_residency_time = 2000, + .minimum_z8_residency_time = 2100, .psr_skip_crtc_disable = true, .replay_skip_crtc_disabled = true, .disable_dmcu = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c index 46f71ff08fd1..d19db8e9b8a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_pg_cntl.c @@ -261,6 +261,7 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -277,6 +278,10 @@ void pg_cntl35_hpo_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN25_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -304,6 +309,7 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) uint32_t power_gate = power_on ? 0 : 1; uint32_t pwr_status = power_on ? 0 : 2; uint32_t org_ip_request_cntl; + uint32_t power_forceon; bool block_enabled; if (pg_cntl->ctx->dc->debug.ignore_pg || @@ -319,6 +325,10 @@ void pg_cntl35_io_clk_pg_control(struct pg_cntl *pg_cntl, bool power_on) return; } + REG_GET(DOMAIN22_PG_CONFIG, DOMAIN_POWER_FORCEON, &power_forceon); + if (power_forceon) + return; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c index c7e011d26d41..70ef1e7ff841 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_resource.c @@ -1712,6 +1712,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc, out = dml2_validate(dc, context, fast_validate); + if (fast_validate) + return out; + + DC_FP_START(); + dcn35_decide_zstate_support(dc, context); + DC_FP_END(); + return out; } @@ -1857,7 +1864,7 @@ static bool dcn35_resource_construct( /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; - dc->config.use_default_clock_table = false; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h index 2cbdd75429ff..6e669a2c5b2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h @@ -36,7 +36,7 @@ * Define the maximum amount of states supported by the ASIC. Every ASIC has a * specific number of states; this macro defines the maximum number of states. */ -#define DC__VOLTAGE_STATES 20 +#define DC__VOLTAGE_STATES 40 #define DC__NUM_DPP__4 1 #define DC__NUM_DPP__0_PRESENT 1 #define DC__NUM_DPP__1_PRESENT 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 7fc8b18096ba..ec77b2b41ba3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -950,10 +950,8 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; - unsigned int min_dst_y_next_start_us; plane_count = 0; - min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -975,26 +973,15 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; - struct dc_stream_state *current_stream = context->streams[0]; int minmum_z8_residency = dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; bool is_pwrseq0 = link->link_index == 0; - bool isFreesyncVideo; - - isFreesyncVideo = current_stream->adjust.v_total_min == current_stream->adjust.v_total_max; - isFreesyncVideo = isFreesyncVideo && current_stream->timing.v_total < current_stream->adjust.v_total_min; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream == current_stream && isFreesyncVideo) { - min_dst_y_next_start_us = context->res_ctx.pipe_ctx[i].dlg_regs.min_dst_y_next_start_us; - break; - } - } /* Don't support multi-plane configurations */ if (stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (is_pwrseq0 && (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || min_dst_y_next_start_us > 5000)) + if (is_pwrseq0 && context->bw_ctx.dml.vba.StutterPeriod > 5000.0) return DCN_ZSTATE_SUPPORT_ALLOW; else if (is_pwrseq0 && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr) return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9ec4172d1c2d..b46cde525066 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1192,13 +1192,16 @@ static bool update_pipe_slice_table_with_split_flags( */ struct pipe_ctx *pipe; bool odm; - int i; + int dc_pipe_idx, dml_pipe_idx = 0; bool updated = false; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; + for (dc_pipe_idx = 0; + dc_pipe_idx < dc->res_pool->pipe_count; dc_pipe_idx++) { + pipe = &context->res_ctx.pipe_ctx[dc_pipe_idx]; + if (resource_is_pipe_type(pipe, FREE_PIPE)) + continue; - if (merge[i]) { + if (merge[dc_pipe_idx]) { if (resource_is_pipe_type(pipe, OPP_HEAD)) /* merging OPP head means reducing ODM slice * count by 1 @@ -1213,17 +1216,18 @@ static bool update_pipe_slice_table_with_split_flags( updated = true; } - if (split[i]) { - odm = vba->ODMCombineEnabled[vba->pipe_plane[i]] != + if (split[dc_pipe_idx]) { + odm = vba->ODMCombineEnabled[vba->pipe_plane[dml_pipe_idx]] != dm_odm_combine_mode_disabled; if (odm && resource_is_pipe_type(pipe, OPP_HEAD)) update_slice_table_for_stream( - table, pipe->stream, split[i] - 1); + table, pipe->stream, split[dc_pipe_idx] - 1); else if (!odm && resource_is_pipe_type(pipe, DPP_PIPE)) update_slice_table_for_plane(table, pipe, - pipe->plane_state, split[i] - 1); + pipe->plane_state, split[dc_pipe_idx] - 1); updated = true; } + dml_pipe_idx++; } return updated; } @@ -2231,6 +2235,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int i, pipe_idx, vlevel_temp = 0; double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + double dram_speed_from_validation = context->bw_ctx.dml.vba.DRAMSpeed; double dcfclk_from_fw_based_mclk_switching = dcfclk_from_validation; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; @@ -2418,7 +2423,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts = dram_speed_from_validation; min_dram_speed_mts_margin = 160; context->bw_ctx.dml.soc.dram_clock_change_latency_us = diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index a5fe523668e9..39cf1ae3a3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,10 +164,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, /*changed from 442.0*/ - .sr_enter_plus_exit_z8_time_us = 50.0,/*changed from 560.0*/ + .sr_exit_time_us = 14.0, + .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_z8_time_us = 525.0, + .sr_enter_plus_exit_z8_time_us = 715.0, .fclk_change_latency_us = 20.0, .usr_retraining_latency_us = 2, .writeback_latency_us = 12.0, @@ -329,6 +329,48 @@ void dcn35_update_bw_bounding_box_fpu(struct dc *dc, /*temp till dml2 fully work without dml1*/ dml_init_instance(&dc->dml, &dcn3_5_soc, &dcn3_5_ip, DML_PROJECT_DCN31); + + /*copy to dml2, before dml2_create*/ + if (clk_table->num_entries > 2) { + + for (i = 0; i < clk_table->num_entries; i++) { + dc->dml2_options.bbox_overrides.clks_table.num_states = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz = + clock_limits[i].dcfclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz = + clock_limits[i].fabricclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz = + clock_limits[i].dispclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz = + clock_limits[i].dppclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz = + clock_limits[i].socclk_mhz; + dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz = + clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dispclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dppclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_socclk_levels = + clk_table->num_entries; + dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels = + clk_table->num_entries; + } + } + + /* Update latency values */ + dc->dml2_options.bbox_overrides.dram_clock_change_latency_us = dcn3_5_soc.dram_clock_change_latency_us; + + dc->dml2_options.bbox_overrides.sr_exit_latency_us = dcn3_5_soc.sr_exit_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_latency_us = dcn3_5_soc.sr_enter_plus_exit_time_us; + + dc->dml2_options.bbox_overrides.sr_exit_z8_time_us = dcn3_5_soc.sr_exit_z8_time_us; + dc->dml2_options.bbox_overrides.sr_enter_plus_exit_z8_time_us = dcn3_5_soc.sr_enter_plus_exit_z8_time_us; } static bool is_dual_plane(enum surface_pixel_format format) @@ -507,3 +549,37 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, return pipe_cnt; } + +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + enum dcn_zstate_support_state support = DCN_ZSTATE_SUPPORT_DISALLOW; + unsigned int i, plane_count = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + if (plane_count == 0) { + support = DCN_ZSTATE_SUPPORT_ALLOW; + } else if (plane_count == 1 && context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + bool is_pwrseq0 = link && link->link_index == 0; + bool is_psr1 = link && link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr; + int minmum_z8_residency = + dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000; + bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency; + int minmum_z10_residency = + dc->debug.minimum_z10_residency_time > 0 ? dc->debug.minimum_z10_residency_time : 5000; + bool allow_z10 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z10_residency; + + if (is_pwrseq0 && allow_z10) + support = DCN_ZSTATE_SUPPORT_ALLOW; + else if (is_pwrseq0 && is_psr1) + support = allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; + else if (allow_z8) + support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY; + } + + context->bw_ctx.bw.dcn.clk.zstate_support = support; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h index e8d5a170893e..067480fc3691 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.h @@ -39,4 +39,6 @@ int dcn35_populate_dml_pipes_from_context_fpu(struct dc *dc, display_e2e_pipe_params_st *pipes, bool fast_validate); +void dcn35_decide_zstate_support(struct dc *dc, struct dc_state *context); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 510be909cd75..59718ee33e51 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6329,7 +6329,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.NoOfDPPThisState, mode_lib->ms.dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); s->NextMaxVStartup = s->MaxVStartupAllPlanes[j]; @@ -6542,7 +6542,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k], mode_lib->ms.MetaRowBytes[j][k], mode_lib->ms.DPTEBytesPerRow[j][k], @@ -7687,7 +7687,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState; @@ -7957,7 +7957,7 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes; - UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal; @@ -8699,7 +8699,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels; CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes; - CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn; CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode; CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0]; @@ -8805,7 +8805,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.hw.DPPPerSurface, locals->dpte_group_bytes, s->HostVMInefficiencyFactor, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels); locals->TCalc = 24.0 / locals->DCFCLKDeepSleep; @@ -8995,7 +8995,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable; CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable; CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels; - CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes; + CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024; CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k]; CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled; CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k]; @@ -9240,7 +9240,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels, mode_lib->ms.cache_display_cfg.plane.GPUVMEnable, - mode_lib->ms.soc.hostvm_min_page_size_kbytes, + mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024, locals->PDEAndMetaPTEBytesFrame[k], locals->MetaRowByte[k], locals->PixelPTEBytesPerRow[k], diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 75171bee6f71..fa8fe5bf7e57 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,25 +341,42 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - /* Override from passed values, mainly for debugging purposes, if available */ - if (dml2->config.bbox_overrides.sr_exit_latency_us) { - p->in_states->state_array[0].sr_exit_time_us = dml2->config.bbox_overrides.sr_exit_latency_us; - } + /* Override from passed values, if available */ + for (i = 0; i < p->in_states->num_states; i++) { + if (dml2->config.bbox_overrides.sr_exit_latency_us) { + p->in_states->state_array[i].sr_exit_time_us = + dml2->config.bbox_overrides.sr_exit_latency_us; + } - if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { - p->in_states->state_array[0].sr_enter_plus_exit_time_us = dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us) { + p->in_states->state_array[i].sr_enter_plus_exit_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_latency_us; + } - if (dml2->config.bbox_overrides.urgent_latency_us) { - p->in_states->state_array[0].urgent_latency_pixel_data_only_us = dml2->config.bbox_overrides.urgent_latency_us; - } + if (dml2->config.bbox_overrides.sr_exit_z8_time_us) { + p->in_states->state_array[i].sr_exit_z8_time_us = + dml2->config.bbox_overrides.sr_exit_z8_time_us; + } - if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { - p->in_states->state_array[0].dram_clock_change_latency_us = dml2->config.bbox_overrides.dram_clock_change_latency_us; - } + if (dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us) { + p->in_states->state_array[i].sr_enter_plus_exit_z8_time_us = + dml2->config.bbox_overrides.sr_enter_plus_exit_z8_time_us; + } + + if (dml2->config.bbox_overrides.urgent_latency_us) { + p->in_states->state_array[i].urgent_latency_pixel_data_only_us = + dml2->config.bbox_overrides.urgent_latency_us; + } - if (dml2->config.bbox_overrides.fclk_change_latency_us) { - p->in_states->state_array[0].fclk_change_latency_us = dml2->config.bbox_overrides.fclk_change_latency_us; + if (dml2->config.bbox_overrides.dram_clock_change_latency_us) { + p->in_states->state_array[i].dram_clock_change_latency_us = + dml2->config.bbox_overrides.dram_clock_change_latency_us; + } + + if (dml2->config.bbox_overrides.fclk_change_latency_us) { + p->in_states->state_array[i].fclk_change_latency_us = + dml2->config.bbox_overrides.fclk_change_latency_us; + } } /* DCFCLK stas values are project specific */ @@ -498,8 +515,8 @@ void dml2_translate_socbb_params(const struct dc *in, struct soc_bounding_box_st out->do_urgent_latency_adjustment = in_soc_params->do_urgent_latency_adjustment; out->dram_channel_width_bytes = (dml_uint_t)in_soc_params->dram_channel_width_bytes; out->fabric_datapath_to_dcn_data_return_bytes = (dml_uint_t)in_soc_params->fabric_datapath_to_dcn_data_return_bytes; - out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes * 1024; - out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes * 1024; + out->gpuvm_min_page_size_kbytes = in_soc_params->gpuvm_min_page_size_bytes / 1024; + out->hostvm_min_page_size_kbytes = in_soc_params->hostvm_min_page_size_bytes / 1024; out->mall_allocated_for_dcn_mbytes = (dml_uint_t)in_soc_params->mall_allocated_for_dcn_mbytes; out->max_avg_dram_bw_use_normal_percent = in_soc_params->max_avg_dram_bw_use_normal_percent; out->max_avg_fabric_bw_use_normal_percent = in_soc_params->max_avg_fabric_bw_use_normal_percent; @@ -1040,9 +1057,12 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat } //Generally these are set by referencing our latest BB/IP params in dcn32_resource.c file - dml_dispcfg->plane.GPUVMEnable = true; - dml_dispcfg->plane.GPUVMMaxPageTableLevels = 4; - dml_dispcfg->plane.HostVMEnable = false; + dml_dispcfg->plane.GPUVMEnable = dml2->v20.dml_core_ctx.ip.gpuvm_enable; + dml_dispcfg->plane.GPUVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.gpuvm_max_page_table_levels; + dml_dispcfg->plane.HostVMEnable = dml2->v20.dml_core_ctx.ip.hostvm_enable; + dml_dispcfg->plane.HostVMMaxPageTableLevels = dml2->v20.dml_core_ctx.ip.hostvm_max_page_table_levels; + if (dml2->v20.dml_core_ctx.ip.hostvm_enable) + dml2->v20.dml_core_ctx.policy.AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter; dml2_populate_pipe_to_plane_index_mapping(dml2, context); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h index 317f90776d97..fe15baa4bf09 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.h @@ -139,6 +139,8 @@ struct dml2_soc_bbox_overrides { double urgent_latency_us; double sr_exit_latency_us; double sr_enter_plus_exit_latency_us; + double sr_exit_z8_time_us; + double sr_enter_plus_exit_z8_time_us; double dram_clock_change_latency_us; double fclk_change_latency_us; unsigned int dram_num_chan; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6a65af8c36b9..5f7f474ef51c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -487,8 +487,7 @@ bool dcn32_set_mcm_luts( if (plane_state->blend_tf->type == TF_TYPE_HWPWL) lut_params = &plane_state->blend_tf->pwl; else if (plane_state->blend_tf->type == TF_TYPE_DISTRIBUTED_POINTS) { - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->blend_tf, + cm3_helper_translate_curve_to_hw_format(plane_state->blend_tf, &dpp_base->regamma_params, false); lut_params = &dpp_base->regamma_params; } @@ -503,8 +502,7 @@ bool dcn32_set_mcm_luts( else if (plane_state->in_shaper_func->type == TF_TYPE_DISTRIBUTED_POINTS) { // TODO: dpp_base replace ASSERT(false); - cm_helper_translate_curve_to_hw_format(plane_state->ctx, - plane_state->in_shaper_func, + cm3_helper_translate_curve_to_hw_format(plane_state->in_shaper_func, &dpp_base->shaper_params, true); lut_params = &dpp_base->shaper_params; } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index f2fe523f914f..24153b0df503 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -879,7 +879,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_sink_ext_caps.bits.oled == 1)) { dpcd_set_source_specific_data(link); msleep(post_oui_delay); - set_cached_brightness_aux(link); + set_default_brightness_aux(link); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 34a4a8c0e18c..f8e01ca09d96 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2142,8 +2142,7 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_cached_brightness_aux(link); - + set_default_brightness_aux(link); if (link->dpcd_sink_ext_caps.bits.oled == 1) msleep(bl_oled_enable_delay); edp_backlight_enable_aux(link, true); diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c index fd8f6f198146..68096d12f52f 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_fixed_vs_pe_retimer.c @@ -115,7 +115,7 @@ static enum link_training_result perform_fixed_vs_pe_nontransparent_training_seq lt_settings->cr_pattern_time = 16000; /* Fixed VS/PE specific: Toggle link rate */ - apply_toggle_rate_wa = (link->vendor_specific_lttpr_link_rate_wa == target_rate); + apply_toggle_rate_wa = ((link->vendor_specific_lttpr_link_rate_wa == target_rate) || (link->vendor_specific_lttpr_link_rate_wa == 0)); target_rate = get_dpcd_link_rate(<_settings->link_settings); toggle_rate = (target_rate == 0x6) ? 0xA : 0x6; @@ -271,7 +271,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence_legacy( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, @@ -617,7 +617,7 @@ enum link_training_result dp_perform_fixed_vs_pe_training_sequence( /* Vendor specific: Toggle link rate */ toggle_rate = (rate == 0x6) ? 0xA : 0x6; - if (link->vendor_specific_lttpr_link_rate_wa == rate) { + if (link->vendor_specific_lttpr_link_rate_wa == rate || link->vendor_specific_lttpr_link_rate_wa == 0) { core_link_write_dpcd( link, DP_LINK_BW_SET, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e32a7974a4bc..996e4ee99023 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -170,7 +170,6 @@ bool edp_set_backlight_level_nits(struct dc_link *link, *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; - link->backlight_settings.backlight_millinits = backlight_millinits; if (!link->dpcd_caps.panel_luminance_control) { if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_LEVEL, @@ -288,9 +287,9 @@ bool set_default_brightness_aux(struct dc_link *link) if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; - // if < 1 nits or > 5000, it might be wrong readback - if (default_backlight < 1000 || default_backlight > 5000000) - default_backlight = 150000; // + // if > 5000, it might be wrong readback + if (default_backlight > 5000000) + default_backlight = 150000; return edp_set_backlight_level_nits(link, true, default_backlight, 0); @@ -298,15 +297,6 @@ bool set_default_brightness_aux(struct dc_link *link) return false; } -bool set_cached_brightness_aux(struct dc_link *link) -{ - if (link->backlight_settings.backlight_millinits) - return edp_set_backlight_level_nits(link, true, - link->backlight_settings.backlight_millinits, 0); - else - return set_default_brightness_aux(link); - return false; -} bool edp_is_ilr_optimization_enabled(struct dc_link *link) { if (link->dpcd_caps.edp_supported_link_rates_count == 0 || !link->panel_config.ilr.optimize_edp_link_rate) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index ebf7deb63d13..a034288ad75d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -30,7 +30,6 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); -bool set_cached_brightness_aux(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 22fc4ba96def..38360adc53d9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -1077,6 +1077,7 @@ enum dmub_status dmub_srv_wait_for_inbox0_ack(struct dmub_srv *dmub, uint32_t ti ack = dmub->hw_funcs.read_inbox0_ack_register(dmub); if (ack) return DMUB_STATUS_OK; + udelay(1); } return DMUB_STATUS_TIMEOUT; } diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h index c92c4b83253f..4bff1ef8a9a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_11_0_0_offset.h @@ -6369,6 +6369,8 @@ #define regTCP_INVALIDATE_BASE_IDX 1 #define regTCP_STATUS 0x19a1 #define regTCP_STATUS_BASE_IDX 1 +#define regTCP_CNTL 0x19a2 +#define regTCP_CNTL_BASE_IDX 1 #define regTCP_CNTL2 0x19a3 #define regTCP_CNTL2_BASE_IDX 1 #define regTCP_DEBUG_INDEX 0x19a5 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h index ff30f04be591..7ee3d291120d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_offset.h @@ -781,6 +781,8 @@ #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2_BASE_IDX 5 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 0x420187 #define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1_BASE_IDX 5 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 0x4201c6 +#define regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3_BASE_IDX 5 // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h index 7f131999a263..eb8c556d9c93 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_11_0_sh_mask.h @@ -24646,6 +24646,35 @@ //BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1 #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L #define BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L +//BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x8 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x9 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE__SHIFT 0xb +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE__SHIFT 0xd +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN__SHIFT 0xf +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE__SHIFT 0x10 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE__SHIFT 0x11 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x14 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE__SHIFT 0x15 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE__SHIFT 0x18 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x19 +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE__SHIFT 0x1b +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV__SHIFT 0x1c +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE__SHIFT 0x1e +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_PAYLOAD_SIZE_MODE_MASK 0x00000100L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_PRIV_MAX_PAYLOAD_SIZE_MASK 0x00000600L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_10BIT_TAG_EN_OVERRIDE_MASK 0x00001800L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_10BIT_TAG_EN_OVERRIDE_MASK 0x00006000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__MST_DROP_SYNC_FLOOD_EN_MASK 0x00008000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_PAYLOAD_SIZE_MODE_MASK 0x00010000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_PAYLOAD_SIZE_MASK 0x000E0000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_REQUEST_SIZE_MODE_MASK 0x00100000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_PRIV_MAX_READ_REQUEST_SIZE_MASK 0x00E00000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_MAX_READ_SAFE_MODE_MASK 0x01000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_EXTENDED_TAG_EN_OVERRIDE_MASK 0x06000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_MODE_MASK 0x08000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV_MASK 0x30000000L +#define BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3__CI_SWUS_EXTENDED_TAG_EN_OVERRIDE_MASK 0xC0000000L // addressBlock: nbio_nbif0_bif_cfg_dev0_rc_bifcfgdecp //BIF_CFG_DEV0_RC0_VENDOR_ID diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index cd3c40a86029..0d1209f2cf31 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -444,6 +444,7 @@ struct amd_pm_funcs { struct dpm_clocks *clock_table); int (*get_smu_prv_buf_details)(void *handle, void **addr, size_t *size); void (*pm_compute_clocks)(void *handle); + int (*notify_rlc_state)(void *handle, bool en); }; struct metrics_table_header { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 08cb79401410..8ec11da0319f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -181,6 +181,24 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) +{ + int ret = 0; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (pp_funcs && pp_funcs->notify_rlc_state) { + mutex_lock(&adev->pm.mutex); + + ret = pp_funcs->notify_rlc_state( + adev->powerplay.pp_handle, + en); + + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index feccd2a7120d..482ea30147ab 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -415,6 +415,8 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); +int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); + int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 1ead323f1c78..e1a5ee911dbb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1710,6 +1710,16 @@ static int smu_disable_dpms(struct smu_context *smu) } } + /* Notify SMU RLC is going to be off, stop RLC and SMU interaction. + * otherwise SMU will hang while interacting with RLC if RLC is halted + * this is a WA for Vangogh asic which fix the SMU hang issue. + */ + ret = smu_notify_rlc_state(smu, false); + if (ret) { + dev_err(adev->dev, "Fail to notify rlc status!\n"); + return ret; + } + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && !((adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs) && !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 8def291b18bc..23fa71cafb14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1360,6 +1360,11 @@ struct pptable_funcs { * management. */ int (*dpm_set_umsch_mm_enable)(struct smu_context *smu, bool enable); + + /** + * @notify_rlc_state: Notify RLC power state to SMU. + */ + int (*notify_rlc_state)(struct smu_context *smu, bool en); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 762b31455a0b..2ff6deedef95 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2193,8 +2193,7 @@ static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clock return 0; } - -static int vangogh_system_features_control(struct smu_context *smu, bool en) +static int vangogh_notify_rlc_state(struct smu_context *smu, bool en) { struct amdgpu_device *adev = smu->adev; int ret = 0; @@ -2523,7 +2522,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .print_clk_levels = vangogh_common_print_clk_levels, .set_default_dpm_table = vangogh_set_default_dpm_tables, .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters, - .system_features_control = vangogh_system_features_control, + .notify_rlc_state = vangogh_notify_rlc_state, .feature_is_enabled = smu_cmn_feature_is_enabled, .set_power_profile_mode = vangogh_set_power_profile_mode, .get_power_profile_mode = vangogh_get_power_profile_mode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 1a6675d70a4b..f1440869d1ce 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -257,8 +257,11 @@ static int aldebaran_tables_init(struct smu_context *smu) } smu_table->ecc_table = kzalloc(tables[SMU_TABLE_ECCINFO].size, GFP_KERNEL); - if (!smu_table->ecc_table) + if (!smu_table->ecc_table) { + kfree(smu_table->metrics_table); + kfree(smu_table->gpu_metrics_table); return -ENOMEM; + } return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 80b3c3efc006..64766ac69c53 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -97,6 +97,7 @@ #define smu_get_default_config_table_settings(smu, config_table) smu_ppt_funcs(get_default_config_table_settings, -EOPNOTSUPP, smu, config_table) #define smu_set_config_table(smu, config_table) smu_ppt_funcs(set_config_table, -EOPNOTSUPP, smu, config_table) #define smu_init_pptable_microcode(smu) smu_ppt_funcs(init_pptable_microcode, 0, smu) +#define smu_notify_rlc_state(smu, en) smu_ppt_funcs(notify_rlc_state, 0, smu, en) #endif #endif diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index e48823a4f1ed..7f41525f7a6e 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -4,8 +4,6 @@ * Copyright (C) 2017 Broadcom */ -#include <linux/device.h> - #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_connector.h> @@ -21,7 +19,6 @@ struct panel_bridge { struct drm_bridge bridge; struct drm_connector connector; struct drm_panel *panel; - struct device_link *link; u32 connector_type; }; @@ -63,24 +60,13 @@ static int panel_bridge_attach(struct drm_bridge *bridge, { struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - struct drm_panel *panel = panel_bridge->panel; - struct drm_device *drm_dev = bridge->dev; int ret; - panel_bridge->link = device_link_add(drm_dev->dev, panel->dev, - DL_FLAG_STATELESS); - if (!panel_bridge->link) { - DRM_ERROR("Failed to add device link between %s and %s\n", - dev_name(drm_dev->dev), dev_name(panel->dev)); - return -EINVAL; - } - if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR) return 0; if (!bridge->encoder) { DRM_ERROR("Missing encoder\n"); - device_link_del(panel_bridge->link); return -ENODEV; } @@ -92,7 +78,6 @@ static int panel_bridge_attach(struct drm_bridge *bridge, panel_bridge->connector_type); if (ret) { DRM_ERROR("Failed to initialize connector\n"); - device_link_del(panel_bridge->link); return ret; } @@ -115,8 +100,6 @@ static void panel_bridge_detach(struct drm_bridge *bridge) struct panel_bridge *panel_bridge = drm_bridge_to_panel_bridge(bridge); struct drm_connector *connector = &panel_bridge->connector; - device_link_del(panel_bridge->link); - /* * Cleanup the connector if we know it was initialized. * diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 08c088319652..b80d4e1cc9b7 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT +// SPDX-License-Identifier: GPL-2.0-only OR MIT /* * Copyright (c) 2022 Red Hat. * diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 63b709a67471..834a5e28abbe 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -278,7 +278,7 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) } EXPORT_SYMBOL(drm_gem_dmabuf_release); -/* +/** * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers * @dev: drm_device to import into * @file_priv: drm file-private structure @@ -292,9 +292,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release); * * Returns 0 on success or a negative error code on failure. */ -static int drm_gem_prime_fd_to_handle(struct drm_device *dev, - struct drm_file *file_priv, int prime_fd, - uint32_t *handle) +int drm_gem_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, + uint32_t *handle) { struct dma_buf *dma_buf; struct drm_gem_object *obj; @@ -360,6 +360,7 @@ out_put: dma_buf_put(dma_buf); return ret; } +EXPORT_SYMBOL(drm_gem_prime_fd_to_handle); int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -408,7 +409,7 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, return dmabuf; } -/* +/** * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure @@ -421,10 +422,10 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. */ -static int drm_gem_prime_handle_to_fd(struct drm_device *dev, - struct drm_file *file_priv, uint32_t handle, - uint32_t flags, - int *prime_fd) +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd) { struct drm_gem_object *obj; int ret = 0; @@ -506,6 +507,7 @@ out_unlock: return ret; } +EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -864,9 +866,9 @@ EXPORT_SYMBOL(drm_prime_get_contiguous_size); * @obj: GEM object to export * @flags: flags like DRM_CLOEXEC and DRM_RDWR * - * This is the implementation of the &drm_gem_object_funcs.export functions - * for GEM drivers using the PRIME helpers. It is used as the default for - * drivers that do not set their own. + * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers + * using the PRIME helpers. It is used as the default in + * drm_gem_prime_handle_to_fd(). */ struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj, int flags) @@ -962,9 +964,10 @@ EXPORT_SYMBOL(drm_gem_prime_import_dev); * @dev: drm_device to import into * @dma_buf: dma-buf object to import * - * This is the implementation of the gem_prime_import functions for GEM - * drivers using the PRIME helpers. It is the default for drivers that do - * not set their own &drm_driver.gem_prime_import. + * This is the implementation of the gem_prime_import functions for GEM drivers + * using the PRIME helpers. Drivers can use this as their + * &drm_driver.gem_prime_import implementation. It is used as the default + * implementation in drm_gem_prime_fd_to_handle(). * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 28d85e1e858e..a2a806262c9e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6853,10 +6853,11 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state) if (!intel_crtc_needs_modeset(new_crtc_state)) continue; + intel_pre_plane_update(state, crtc); + if (!old_crtc_state->hw.active) continue; - intel_pre_plane_update(state, crtc); intel_crtc_disable_planes(state, crtc); } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2c1034578984..2852958dd4e7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6037,8 +6037,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * (eg. Acer Chromebook C710), so we'll check it only if multiple * ports are attempting to use the same AUX CH, according to VBT. */ - if (intel_bios_dp_has_shared_aux_ch(encoder->devdata) && - !intel_digital_port_connected(encoder)) { + if (intel_bios_dp_has_shared_aux_ch(encoder->devdata)) { /* * If this fails, presume the DPCD answer came * from some other port using the same AUX CH. @@ -6046,10 +6045,27 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, * FIXME maybe cleaner to check this before the * DPCD read? Would need sort out the VDD handling... */ - drm_info(&dev_priv->drm, - "[ENCODER:%d:%s] HPD is down, disabling eDP\n", - encoder->base.base.id, encoder->base.name); - goto out_vdd_off; + if (!intel_digital_port_connected(encoder)) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] HPD is down, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } + + /* + * Unfortunately even the HPD based detection fails on + * eg. Asus B360M-A (CFL+CNP), so as a last resort fall + * back to checking for a VGA branch device. Only do this + * on known affected platforms to minimize false positives. + */ + if (DISPLAY_VER(dev_priv) == 9 && drm_dp_is_branch(intel_dp->dpcd) && + (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_TYPE_MASK) == + DP_DWN_STRM_PORT_TYPE_ANALOG) { + drm_info(&dev_priv->drm, + "[ENCODER:%d:%s] VGA converter detected, disabling eDP\n", + encoder->base.base.id, encoder->base.name); + goto out_vdd_off; + } } mutex_lock(&dev_priv->drm.mode_config.mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 118164ddbb2e..833987015b8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -41,12 +41,15 @@ void intel_engine_add_user(struct intel_engine_cs *engine) llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist); } -static const u8 uabi_classes[] = { +#define I915_NO_UABI_CLASS ((u16)(-1)) + +static const u16 uabi_classes[] = { [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER, [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY, [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO, [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE, [COMPUTE_CLASS] = I915_ENGINE_CLASS_COMPUTE, + [OTHER_CLASS] = I915_NO_UABI_CLASS, /* Not exposed to users, no uabi class. */ }; static int engine_cmp(void *priv, const struct list_head *A, @@ -200,6 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 void intel_engines_driver_register(struct drm_i915_private *i915) { + u16 name_instance, other_instance = 0; struct legacy_ring ring = {}; struct list_head *it, *next; struct rb_node **p, *prev; @@ -216,27 +220,28 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ - /* - * We don't want to expose the GSC engine to the users, but we - * still rename it so it is easier to identify in the debug logs - */ - if (engine->id == GSC0) { - engine_rename(engine, "gsc", 0); - continue; - } - GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; + if (engine->uabi_class == I915_NO_UABI_CLASS) { + name_instance = other_instance++; + } else { + GEM_BUG_ON(engine->uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + name_instance = + i915->engine_uabi_class_count[engine->uabi_class]++; + } + engine->uabi_instance = name_instance; - GEM_BUG_ON(engine->uabi_class >= - ARRAY_SIZE(i915->engine_uabi_class_count)); - engine->uabi_instance = - i915->engine_uabi_class_count[engine->uabi_class]++; - - /* Replace the internal name with the final user facing name */ + /* + * Replace the internal name with the final user and log facing + * name. + */ engine_rename(engine, intel_engine_class_repr(engine->class), - engine->uabi_instance); + name_instance); + + if (engine->uabi_class == I915_NO_UABI_CLASS) + continue; rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines); diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h index 754c6af42f30..10121218f4d3 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h @@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE { NvU32 size; NvU32 numEntries; - PACKED_REGISTRY_ENTRY entries[0]; + PACKED_REGISTRY_ENTRY entries[] __counted_by(numEntries); } PACKED_REGISTRY_TABLE; #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0f3bd187ede6..280d1d9a559b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -318,8 +318,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; - if (pi < 0) - pi = i; + /* pick the last one as it will be smallest. */ + pi = i; + /* Stop once the buffer is larger than the current page size. */ if (*size >= 1ULL << vmm->page[i].shift) break; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index dc44f5c7833f..f6725a5f5bfb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -365,10 +365,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc) } ret = r535_gsp_cmdq_push(gsp, rpc); - if (ret) { - mutex_unlock(&gsp->cmdq.mutex); + if (ret) return ERR_PTR(ret); - } if (wait) { msg = r535_gsp_msg_recv(gsp, fn, repc); @@ -1048,7 +1046,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) char *strings; int str_offset; int i; - size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES; + size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES); /* add strings + null terminator */ for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index be8f48e3c1db..c4c0f08e9202 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1764,6 +1764,7 @@ static const struct panel_desc starry_qfh032011_53g_desc = { .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_LPM, .init_cmds = starry_qfh032011_53g_init_cmd, + .lp11_before_reset = true, }; static const struct drm_display_mode starry_himax83102_j02_default_mode = { diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36523.c b/drivers/gpu/drm/panel/panel-novatek-nt36523.c index 9b9a7eb1bc60..a189ce236328 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36523.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36523.c @@ -1254,9 +1254,9 @@ static int nt36523_probe(struct mipi_dsi_device *dsi) return dev_err_probe(dev, -EPROBE_DEFER, "cannot get secondary DSI host\n"); pinfo->dsi[1] = mipi_dsi_device_register_full(dsi1_host, info); - if (!pinfo->dsi[1]) { + if (IS_ERR(pinfo->dsi[1])) { dev_err(dev, "cannot get secondary DSI device\n"); - return -ENODEV; + return PTR_ERR(pinfo->dsi[1]); } } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index a3414afe11b0..23cb80d62a9a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1522,6 +1522,15 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, { struct qi_desc desc; + /* + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + if (mask) { addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; @@ -1587,6 +1596,15 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; + /* + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 3531b956556c..897159dba47d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -299,7 +299,7 @@ static int iommu_skip_te_disable; #define IDENTMAP_AZALIA 4 const struct iommu_ops intel_iommu_ops; -const struct iommu_dirty_ops intel_dirty_ops; +static const struct iommu_dirty_ops intel_dirty_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) { @@ -2207,6 +2207,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr |= DMA_FL_PTE_DIRTY; } + domain->has_mappings = true; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; while (nr_pages > 0) { @@ -2490,7 +2492,8 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return ret; } - iommu_enable_pci_caps(info); + if (sm_supported(info->iommu) || !domain_type_is_si(info->domain)) + iommu_enable_pci_caps(info); return 0; } @@ -3925,8 +3928,8 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op */ static void domain_context_clear(struct device_domain_info *info) { - if (!info->iommu || !info->dev || !dev_is_pci(info->dev)) - return; + if (!dev_is_pci(info->dev)) + domain_context_clear_one(info, info->bus, info->devfn); pci_for_each_dma_alias(to_pci_dev(info->dev), &domain_context_clear_one_cb, info); @@ -4360,7 +4363,8 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; spin_lock_irqsave(&dmar_domain->lock, flags); - if (!domain_support_force_snooping(dmar_domain)) { + if (!domain_support_force_snooping(dmar_domain) || + (!dmar_domain->use_first_level && dmar_domain->has_mappings)) { spin_unlock_irqrestore(&dmar_domain->lock, flags); return false; } @@ -4925,7 +4929,7 @@ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops intel_dirty_ops = { +static const struct iommu_dirty_ops intel_dirty_ops = { .set_dirty_tracking = intel_iommu_set_dirty_tracking, .read_and_clear_dirty = intel_iommu_read_and_clear_dirty, }; @@ -5073,7 +5077,7 @@ static void quirk_igfx_skip_te_disable(struct pci_dev *dev) ver = (dev->device >> 8) & 0xff; if (ver != 0x45 && ver != 0x46 && ver != 0x4c && ver != 0x4e && ver != 0x8a && ver != 0x98 && - ver != 0x9a && ver != 0xa7) + ver != 0x9a && ver != 0xa7 && ver != 0x7d) return; if (risky_device(dev)) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 65d37a138c75..ce030c5b5772 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -602,6 +602,9 @@ struct dmar_domain { */ u8 dirty_tracking:1; /* Dirty tracking is enabled */ u8 nested_parent:1; /* Has other domains nested on it */ + u8 has_mappings:1; /* Has mappings configured through + * iommu_map() interface. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 50a481c895b8..ac12f76c1212 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -216,6 +216,27 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, rcu_read_unlock(); } +static void intel_flush_svm_all(struct intel_svm *svm) +{ + struct device_domain_info *info; + struct intel_svm_dev *sdev; + + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) { + info = dev_iommu_priv_get(sdev->dev); + + qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0); + if (info->ats_enabled) { + qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, + svm->pasid, sdev->qdep, + 0, 64 - VTD_PAGE_SHIFT); + quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT, + svm->pasid, sdev->qdep); + } + } + rcu_read_unlock(); +} + /* Pages have been freed at this point */ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, struct mm_struct *mm, @@ -223,6 +244,11 @@ static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + if (start == 0 && end == -1UL) { + intel_flush_svm_all(svm); + return; + } + intel_flush_svm_range(svm, start, (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f17a1113f3d6..33e2a9b5d339 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -485,11 +485,12 @@ static void iommu_deinit_device(struct device *dev) dev_iommu_free(dev); } +DEFINE_MUTEX(iommu_probe_device_lock); + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; - static DEFINE_MUTEX(iommu_probe_device_lock); struct group_device *gdev; int ret; @@ -502,17 +503,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * probably be able to use device_lock() here to minimise the scope, * but for now enforcing a simple global ordering is fine. */ - mutex_lock(&iommu_probe_device_lock); + lockdep_assert_held(&iommu_probe_device_lock); /* Device is probed already if in a group */ - if (dev->iommu_group) { - ret = 0; - goto out_unlock; - } + if (dev->iommu_group) + return 0; ret = iommu_init_device(dev, ops); if (ret) - goto out_unlock; + return ret; group = dev->iommu_group; gdev = iommu_group_alloc_device(group, dev); @@ -548,7 +547,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list list_add_tail(&group->entry, group_list); } mutex_unlock(&group->mutex); - mutex_unlock(&iommu_probe_device_lock); if (dev_is_pci(dev)) iommu_dma_set_pci_32bit_workaround(dev); @@ -562,8 +560,6 @@ err_put_group: iommu_deinit_device(dev); mutex_unlock(&group->mutex); iommu_group_put(group); -out_unlock: - mutex_unlock(&iommu_probe_device_lock); return ret; } @@ -573,7 +569,9 @@ int iommu_probe_device(struct device *dev) const struct iommu_ops *ops; int ret; + mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, NULL); + mutex_unlock(&iommu_probe_device_lock); if (ret) return ret; @@ -1788,7 +1786,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) */ if (ops->default_domain) { if (req_type) - return NULL; + return ERR_PTR(-EINVAL); return ops->default_domain; } @@ -1797,15 +1795,15 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) /* The driver gave no guidance on what type to use, try the default */ dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type); - if (dom) + if (!IS_ERR(dom)) return dom; /* Otherwise IDENTITY and DMA_FQ defaults will try DMA */ if (iommu_def_domain_type == IOMMU_DOMAIN_DMA) - return NULL; + return ERR_PTR(-EINVAL); dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA); - if (!dom) - return NULL; + if (IS_ERR(dom)) + return dom; pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", iommu_def_domain_type, group->name); @@ -1822,7 +1820,9 @@ static int probe_iommu_group(struct device *dev, void *data) struct list_head *group_list = data; int ret; + mutex_lock(&iommu_probe_device_lock); ret = __iommu_probe_device(dev, group_list); + mutex_unlock(&iommu_probe_device_lock); if (ret == -ENODEV) ret = 0; @@ -2094,10 +2094,17 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, else if (ops->domain_alloc) domain = ops->domain_alloc(alloc_type); else - return NULL; + return ERR_PTR(-EOPNOTSUPP); + /* + * Many domain_alloc ops now return ERR_PTR, make things easier for the + * driver by accepting ERR_PTR from all domain_alloc ops instead of + * having two rules. + */ + if (IS_ERR(domain)) + return domain; if (!domain) - return NULL; + return ERR_PTR(-ENOMEM); domain->type = type; /* @@ -2110,9 +2117,14 @@ static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops, if (!domain->ops) domain->ops = ops->default_domain_ops; - if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) { - iommu_domain_free(domain); - domain = NULL; + if (iommu_is_dma_domain(domain)) { + int rc; + + rc = iommu_get_dma_cookie(domain); + if (rc) { + iommu_domain_free(domain); + return ERR_PTR(rc); + } } return domain; } @@ -2129,10 +2141,15 @@ __iommu_group_domain_alloc(struct iommu_group *group, unsigned int type) struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { + struct iommu_domain *domain; + if (bus == NULL || bus->iommu_ops == NULL) return NULL; - return __iommu_domain_alloc(bus->iommu_ops, NULL, + domain = __iommu_domain_alloc(bus->iommu_ops, NULL, IOMMU_DOMAIN_UNMANAGED); + if (IS_ERR(domain)) + return NULL; + return domain; } EXPORT_SYMBOL_GPL(iommu_domain_alloc); @@ -3041,8 +3058,8 @@ static int iommu_setup_default_domain(struct iommu_group *group, return -EINVAL; dom = iommu_group_alloc_default_domain(group, req_type); - if (!dom) - return -ENODEV; + if (IS_ERR(dom)) + return PTR_ERR(dom); if (group->default_domain == dom) return 0; @@ -3243,21 +3260,23 @@ void iommu_device_unuse_default_domain(struct device *dev) static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) { + struct iommu_domain *domain; + if (group->blocking_domain) return 0; - group->blocking_domain = - __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED); - if (!group->blocking_domain) { + domain = __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED); + if (IS_ERR(domain)) { /* * For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED * create an empty domain instead. */ - group->blocking_domain = __iommu_group_domain_alloc( - group, IOMMU_DOMAIN_UNMANAGED); - if (!group->blocking_domain) - return -EINVAL; + domain = __iommu_group_domain_alloc(group, + IOMMU_DOMAIN_UNMANAGED); + if (IS_ERR(domain)) + return PTR_ERR(domain); } + group->blocking_domain = domain; return 0; } diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 59d3a07300d9..873630c111c1 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -571,7 +571,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, continue; destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); /* * -EINVAL means the domain is incompatible with the * device. Other error codes should propagate to @@ -583,7 +583,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } *pt_id = hwpt->obj.id; - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); goto out_unlock; } @@ -652,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); /* This destruction has to be after we unlock everything */ if (destroy_hwpt) @@ -660,7 +660,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, return 0; out_put_pt_obj: - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); return PTR_ERR(destroy_hwpt); } @@ -792,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) if (IS_ERR(ioas)) return PTR_ERR(ioas); rc = iommufd_access_change_ioas(access, ioas); - iommufd_put_object(&ioas->obj); + iommufd_put_object(access->ictx, &ioas->obj); return rc; } @@ -941,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, access->ops->unmap(access->data, iova, length); - iommufd_put_object(&access->obj); + iommufd_put_object(access->ictx, &access->obj); xa_lock(&ioas->iopt.access_list); } xa_unlock(&ioas->iopt.access_list); @@ -1243,6 +1243,6 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) out_free: kfree(data); out_put: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 2abbeafdbd22..cbb5df0a6c32 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -318,9 +318,9 @@ out_unlock: if (ioas) mutex_unlock(&ioas->mutex); out_put_pt: - iommufd_put_object(pt_obj); + iommufd_put_object(ucmd->ictx, pt_obj); out_put_idev: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } @@ -345,7 +345,7 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain, enable); - iommufd_put_object(&hwpt_paging->common.obj); + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } @@ -368,6 +368,6 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) rc = iopt_read_and_clear_dirty_data( &ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd); - iommufd_put_object(&hwpt_paging->common.obj); + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index d5624577f79f..742248276548 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) rc = -EMSGSIZE; out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -175,7 +175,7 @@ out_free: interval_tree_remove(node, &allowed_iova); kfree(container_of(node, struct iopt_allowed, node)); } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd) cmd->iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) return PTR_ERR(src_ioas); rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, &pages_list); - iommufd_put_object(&src_ioas->obj); + iommufd_put_object(ucmd->ictx, &src_ioas->obj); if (rc) return rc; @@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) cmd->dst_iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put_dst: - iommufd_put_object(&dst_ioas->obj); + iommufd_put_object(ucmd->ictx, &dst_ioas->obj); out_pages: iopt_free_pages_list(&pages_list); return rc; @@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd) rc = -EOPNOTSUPP; } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index a74cfefffbc6..abae041e256f 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -21,6 +21,7 @@ struct iommufd_ctx { struct file *file; struct xarray objects; struct xarray groups; + wait_queue_head_t destroy_wait; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -135,7 +136,7 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - struct rw_semaphore destroy_rwsem; + refcount_t shortterm_users; refcount_t users; enum iommufd_object_type type; unsigned int id; @@ -143,10 +144,15 @@ struct iommufd_object { static inline bool iommufd_lock_obj(struct iommufd_object *obj) { - if (!down_read_trylock(&obj->destroy_rwsem)) + if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->users)) { - up_read(&obj->destroy_rwsem); + if (!refcount_inc_not_zero(&obj->shortterm_users)) { + /* + * If the caller doesn't already have a ref on obj this must be + * called under the xa_lock. Otherwise the caller is holding a + * ref on users. Thus it cannot be one before this decrement. + */ + refcount_dec(&obj->users); return false; } return true; @@ -154,10 +160,16 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, enum iommufd_object_type type); -static inline void iommufd_put_object(struct iommufd_object *obj) +static inline void iommufd_put_object(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { + /* + * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees + * a spurious !0 users with a 0 shortterm_users. + */ refcount_dec(&obj->users); - up_read(&obj->destroy_rwsem); + if (refcount_dec_and_test(&obj->shortterm_users)) + wake_up_interruptible_all(&ictx->destroy_wait); } void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); @@ -165,17 +177,49 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail); + +enum { + REMOVE_WAIT_SHORTTERM = 1, +}; +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags); + +/* + * The caller holds a users refcount and wants to destroy the object. At this + * point the caller has no shortterm_users reference and at least the xarray + * will be holding one. + */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, false); + int ret; + + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + + /* + * If there is a bug and we couldn't destroy the object then we did put + * back the caller's users refcount and will eventually try to free it + * again during close. + */ + WARN_ON(ret); } -static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj) + +/* + * The HWPT allocated by autodomains is used in possibly many devices and + * is automatically destroyed when its refcount reaches zero. + * + * If userspace uses the HWPT manually, even for a short term, then it will + * disrupt this refcounting and the auto-free in the kernel will not work. + * Userspace that tries to use the automatically allocated HWPT must be careful + * to ensure that it is consistently destroyed, eg by not racing accesses + * and by not attaching an automatic HWPT to a device manually. + */ +static inline void +iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, true); + iommufd_object_remove(ictx, obj, obj->id, 0); } struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, @@ -311,7 +355,7 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, lockdep_assert_not_held(&hwpt_paging->ioas->mutex); if (hwpt_paging->auto_domain) { - iommufd_object_deref_user(ictx, &hwpt->obj); + iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); return; } } diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 45b9d40773b1..c9091e46d208 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { - static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; struct iommufd_object *obj; int rc; @@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; - /* - * In most cases the destroy_rwsem is obtained with try so it doesn't - * interact with lockdep, however on destroy we have to sleep. This - * means if we have to destroy an object while holding a get on another - * object it triggers lockdep. Using one locking class per object type - * is a simple and reasonable way to avoid this. - */ - __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", - &obj_keys[type]); + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); refcount_set(&obj->users, 1); /* @@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } +static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) +{ + if (refcount_dec_and_test(&to_destroy->shortterm_users)) + return 0; + + if (wait_event_timeout(ictx->destroy_wait, + refcount_read(&to_destroy->shortterm_users) == + 0, + msecs_to_jiffies(10000))) + return 0; + + pr_crit("Time out waiting for iommufd object to become free\n"); + refcount_inc(&to_destroy->shortterm_users); + return -EBUSY; +} + /* * Remove the given object id from the xarray if the only reference to the - * object is held by the xarray. The caller must call ops destroy(). + * object is held by the xarray. */ -static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx, - u32 id, bool extra_put) +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags) { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - - xa_lock(&ictx->objects); - obj = xas_load(&xas); - if (xa_is_zero(obj) || !obj) { - obj = ERR_PTR(-ENOENT); - goto out_xa; - } + bool zerod_shortterm = false; + int ret; /* - * If the caller is holding a ref on obj we put it here under the - * spinlock. + * The purpose of the shortterm_users is to ensure deterministic + * destruction of objects used by external drivers and destroyed by this + * function. Any temporary increment of the refcount must increment + * shortterm_users, such as during ioctl execution. */ - if (extra_put) + if (flags & REMOVE_WAIT_SHORTTERM) { + ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (ret) { + /* + * We have a bug. Put back the callers reference and + * defer cleaning this object until close. + */ + refcount_dec(&to_destroy->users); + return ret; + } + zerod_shortterm = true; + } + + xa_lock(&ictx->objects); + obj = xas_load(&xas); + if (to_destroy) { + /* + * If the caller is holding a ref on obj we put it here under + * the spinlock. + */ refcount_dec(&obj->users); + if (WARN_ON(obj != to_destroy)) { + ret = -ENOENT; + goto err_xa; + } + } else if (xa_is_zero(obj) || !obj) { + ret = -ENOENT; + goto err_xa; + } + if (!refcount_dec_if_one(&obj->users)) { - obj = ERR_PTR(-EBUSY); - goto out_xa; + ret = -EBUSY; + goto err_xa; } xas_store(&xas, NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; - -out_xa: xa_unlock(&ictx->objects); - /* The returned object reference count is zero */ - return obj; -} - -/* - * The caller holds a users refcount and wants to destroy the object. Returns - * true if the object was destroyed. In all cases the caller no longer has a - * reference on obj. - */ -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail) -{ - struct iommufd_object *ret; - /* - * The purpose of the destroy_rwsem is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must hold the read - * side of this, such as during ioctl execution. - */ - down_write(&obj->destroy_rwsem); - ret = iommufd_object_remove(ictx, obj->id, true); - up_write(&obj->destroy_rwsem); - - if (allow_fail && IS_ERR(ret)) - return; - - /* - * If there is a bug and we couldn't destroy the object then we did put - * back the caller's refcount and will eventually try to free it again - * during close. + * Since users is zero any positive users_shortterm must be racing + * iommufd_put_object(), or we have a bug. */ - if (WARN_ON(IS_ERR(ret))) - return; + if (!zerod_shortterm) { + ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (WARN_ON(ret)) + return ret; + } iommufd_object_ops[obj->type].destroy(obj); kfree(obj); + return 0; + +err_xa: + if (zerod_shortterm) { + /* Restore the xarray owned reference */ + refcount_set(&obj->shortterm_users, 1); + } + xa_unlock(&ictx->objects); + + /* The returned object reference count is zero */ + return ret; } static int iommufd_destroy(struct iommufd_ucmd *ucmd) { struct iommu_destroy *cmd = ucmd->cmd; - struct iommufd_object *obj; - obj = iommufd_object_remove(ucmd->ictx, cmd->id, false); - if (IS_ERR(obj)) - return PTR_ERR(obj); - iommufd_object_ops[obj->type].destroy(obj); - kfree(obj); - return 0; + return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0); } static int iommufd_fops_open(struct inode *inode, struct file *filp) @@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + init_waitqueue_head(&ictx->destroy_wait); filp->private_data = ictx; return 0; } diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 5d93434003d8..022ef8f55088 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -86,7 +86,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, if (IS_ERR(ioas)) return; *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); } struct mock_iommu_domain { @@ -500,7 +500,7 @@ get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED || hwpt->domain->ops != mock_ops.default_domain_ops) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } *mock = container_of(hwpt->domain, struct mock_iommu_domain, domain); @@ -518,7 +518,7 @@ get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id, return hwpt; if (hwpt->domain->type != IOMMU_DOMAIN_NESTED || hwpt->domain->ops != &domain_nested_ops) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } *mock_nested = container_of(hwpt->domain, @@ -681,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_dev_obj: - iommufd_put_object(dev_obj); + iommufd_put_object(ucmd->ictx, dev_obj); return rc; } @@ -699,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, down_write(&ioas->iopt.iova_rwsem); rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); up_write(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -754,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, rc = 0; out_put: - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return rc; } @@ -1233,7 +1233,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, out_free: kvfree(tmp); out_put: - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return rc; } diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 538fbf76354d..a3ad5f0b6c59 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) if (IS_ERR(ioas)) return PTR_ERR(ioas); *out_ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); @@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { ret = 0; - iommufd_put_object(&ictx->vfio_ioas->obj); + iommufd_put_object(ictx, &ictx->vfio_ioas->obj); goto out_abort; } ictx->vfio_ioas = ioas; @@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) if (IS_ERR(ioas)) return PTR_ERR(ioas); cmd->ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); case IOMMU_VFIO_IOAS_SET: @@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) xa_lock(&ucmd->ictx->objects); ucmd->ictx->vfio_ioas = ioas; xa_unlock(&ucmd->ictx->objects); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return 0; case IOMMU_VFIO_IOAS_CLEAR: @@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, iova = map.iova; rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), map.size, iommu_prot, 0); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -249,7 +249,7 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, rc = -EFAULT; err_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -272,7 +272,7 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) } mutex_unlock(&ioas->mutex); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) */ if (type == VFIO_TYPE1_IOMMU) rc = iopt_disable_large_pages(&ioas->iopt); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 157b286e36bf..35ba090f3b5e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -112,16 +112,20 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, const u32 *id) { const struct iommu_ops *ops = NULL; - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct iommu_fwspec *fwspec; int err = NO_IOMMU; if (!master_np) return NULL; + /* Serialise to make dev->iommu stable under our potential fwspec */ + mutex_lock(&iommu_probe_device_lock); + fwspec = dev_iommu_fwspec_get(dev); if (fwspec) { - if (fwspec->ops) + if (fwspec->ops) { + mutex_unlock(&iommu_probe_device_lock); return fwspec->ops; - + } /* In the deferred case, start again from scratch */ iommu_fwspec_free(dev); } @@ -155,6 +159,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, fwspec = dev_iommu_fwspec_get(dev); ops = fwspec->ops; } + mutex_unlock(&iommu_probe_device_lock); + /* * If we have reason to believe the IOMMU driver missed the initial * probe for dev, replay it to get things in order. @@ -191,7 +197,7 @@ iommu_resv_region_get_type(struct device *dev, if (start == phys->start && end == phys->end) return IOMMU_RESV_DIRECT; - dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys, + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", phys, &start, &end); return IOMMU_RESV_RESERVED; } diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index e358e77e4b38..d76214fa9ad8 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -226,6 +226,11 @@ static int set_device_name(struct led_netdev_data *trigger_data, cancel_delayed_work_sync(&trigger_data->work); + /* + * Take RTNL lock before trigger_data lock to prevent potential + * deadlock with netdev notifier registration. + */ + rtnl_lock(); mutex_lock(&trigger_data->lock); if (trigger_data->net_dev) { @@ -245,16 +250,14 @@ static int set_device_name(struct led_netdev_data *trigger_data, trigger_data->carrier_link_up = false; trigger_data->link_speed = SPEED_UNKNOWN; trigger_data->duplex = DUPLEX_UNKNOWN; - if (trigger_data->net_dev != NULL) { - rtnl_lock(); + if (trigger_data->net_dev) get_device_state(trigger_data); - rtnl_unlock(); - } trigger_data->last_activity = 0; set_baseline_state(trigger_data); mutex_unlock(&trigger_data->lock); + rtnl_unlock(); return 0; } diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index de3019972b35..196cdacce38f 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -293,16 +293,16 @@ static void btree_complete_write(struct btree *b, struct btree_write *w) w->journal = NULL; } -static void btree_node_write_unlock(struct closure *cl) +static CLOSURE_CALLBACK(btree_node_write_unlock) { - struct btree *b = container_of(cl, struct btree, io); + closure_type(b, struct btree, io); up(&b->io_mutex); } -static void __btree_node_write_done(struct closure *cl) +static CLOSURE_CALLBACK(__btree_node_write_done) { - struct btree *b = container_of(cl, struct btree, io); + closure_type(b, struct btree, io); struct btree_write *w = btree_prev_write(b); bch_bbio_free(b->bio, b->c); @@ -315,12 +315,12 @@ static void __btree_node_write_done(struct closure *cl) closure_return_with_destructor(cl, btree_node_write_unlock); } -static void btree_node_write_done(struct closure *cl) +static CLOSURE_CALLBACK(btree_node_write_done) { - struct btree *b = container_of(cl, struct btree, io); + closure_type(b, struct btree, io); bio_free_pages(b->bio); - __btree_node_write_done(cl); + __btree_node_write_done(&cl->work); } static void btree_node_write_endio(struct bio *bio) @@ -1522,7 +1522,7 @@ out_nocoalesce: bch_keylist_free(&keylist); for (i = 0; i < nodes; i++) - if (!IS_ERR(new_nodes[i])) { + if (!IS_ERR_OR_NULL(new_nodes[i])) { btree_node_free(new_nodes[i]); rw_unlock(true, new_nodes[i]); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index c182c21de2e8..7ff14bd2feb8 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -723,11 +723,11 @@ static void journal_write_endio(struct bio *bio) closure_put(&w->c->journal.io); } -static void journal_write(struct closure *cl); +static CLOSURE_CALLBACK(journal_write); -static void journal_write_done(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_done) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct journal_write *w = (j->cur == j->w) ? &j->w[1] : &j->w[0]; @@ -736,19 +736,19 @@ static void journal_write_done(struct closure *cl) continue_at_nobarrier(cl, journal_write, bch_journal_wq); } -static void journal_write_unlock(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_unlock) __releases(&c->journal.lock) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); + closure_type(c, struct cache_set, journal.io); c->journal.io_in_flight = 0; spin_unlock(&c->journal.lock); } -static void journal_write_unlocked(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_unlocked) __releases(c->journal.lock) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); + closure_type(c, struct cache_set, journal.io); struct cache *ca = c->cache; struct journal_write *w = c->journal.cur; struct bkey *k = &c->journal.key; @@ -823,12 +823,12 @@ static void journal_write_unlocked(struct closure *cl) continue_at(cl, journal_write_done, NULL); } -static void journal_write(struct closure *cl) +static CLOSURE_CALLBACK(journal_write) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); + closure_type(c, struct cache_set, journal.io); spin_lock(&c->journal.lock); - journal_write_unlocked(cl); + journal_write_unlocked(&cl->work); } static void journal_try_write(struct cache_set *c) diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 9f32901fdad1..ebd500bdf0b2 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -35,16 +35,16 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k) /* Moving GC - IO loop */ -static void moving_io_destructor(struct closure *cl) +static CLOSURE_CALLBACK(moving_io_destructor) { - struct moving_io *io = container_of(cl, struct moving_io, cl); + closure_type(io, struct moving_io, cl); kfree(io); } -static void write_moving_finish(struct closure *cl) +static CLOSURE_CALLBACK(write_moving_finish) { - struct moving_io *io = container_of(cl, struct moving_io, cl); + closure_type(io, struct moving_io, cl); struct bio *bio = &io->bio.bio; bio_free_pages(bio); @@ -89,9 +89,9 @@ static void moving_init(struct moving_io *io) bch_bio_map(bio, NULL); } -static void write_moving(struct closure *cl) +static CLOSURE_CALLBACK(write_moving) { - struct moving_io *io = container_of(cl, struct moving_io, cl); + closure_type(io, struct moving_io, cl); struct data_insert_op *op = &io->op; if (!op->status) { @@ -113,9 +113,9 @@ static void write_moving(struct closure *cl) continue_at(cl, write_moving_finish, op->wq); } -static void read_moving_submit(struct closure *cl) +static CLOSURE_CALLBACK(read_moving_submit) { - struct moving_io *io = container_of(cl, struct moving_io, cl); + closure_type(io, struct moving_io, cl); struct bio *bio = &io->bio.bio; bch_submit_bbio(bio, io->op.c, &io->w->key, 0); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a9b1f3896249..83d112bd2b1c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -25,7 +25,7 @@ struct kmem_cache *bch_search_cache; -static void bch_data_insert_start(struct closure *cl); +static CLOSURE_CALLBACK(bch_data_insert_start); static unsigned int cache_mode(struct cached_dev *dc) { @@ -55,9 +55,9 @@ static void bio_csum(struct bio *bio, struct bkey *k) /* Insert data into cache */ -static void bch_data_insert_keys(struct closure *cl) +static CLOSURE_CALLBACK(bch_data_insert_keys) { - struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); + closure_type(op, struct data_insert_op, cl); atomic_t *journal_ref = NULL; struct bkey *replace_key = op->replace ? &op->replace_key : NULL; int ret; @@ -136,9 +136,9 @@ out: continue_at(cl, bch_data_insert_keys, op->wq); } -static void bch_data_insert_error(struct closure *cl) +static CLOSURE_CALLBACK(bch_data_insert_error) { - struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); + closure_type(op, struct data_insert_op, cl); /* * Our data write just errored, which means we've got a bunch of keys to @@ -163,7 +163,7 @@ static void bch_data_insert_error(struct closure *cl) op->insert_keys.top = dst; - bch_data_insert_keys(cl); + bch_data_insert_keys(&cl->work); } static void bch_data_insert_endio(struct bio *bio) @@ -184,9 +184,9 @@ static void bch_data_insert_endio(struct bio *bio) bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache"); } -static void bch_data_insert_start(struct closure *cl) +static CLOSURE_CALLBACK(bch_data_insert_start) { - struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); + closure_type(op, struct data_insert_op, cl); struct bio *bio = op->bio, *n; if (op->bypass) @@ -305,16 +305,16 @@ err: * If op->bypass is true, instead of inserting the data it invalidates the * region of the cache represented by op->bio and op->inode. */ -void bch_data_insert(struct closure *cl) +CLOSURE_CALLBACK(bch_data_insert) { - struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); + closure_type(op, struct data_insert_op, cl); trace_bcache_write(op->c, op->inode, op->bio, op->writeback, op->bypass); bch_keylist_init(&op->insert_keys); bio_get(op->bio); - bch_data_insert_start(cl); + bch_data_insert_start(&cl->work); } /* @@ -575,9 +575,9 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k) return n == bio ? MAP_DONE : MAP_CONTINUE; } -static void cache_lookup(struct closure *cl) +static CLOSURE_CALLBACK(cache_lookup) { - struct search *s = container_of(cl, struct search, iop.cl); + closure_type(s, struct search, iop.cl); struct bio *bio = &s->bio.bio; struct cached_dev *dc; int ret; @@ -698,9 +698,9 @@ static void do_bio_hook(struct search *s, bio_cnt_set(bio, 3); } -static void search_free(struct closure *cl) +static CLOSURE_CALLBACK(search_free) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); atomic_dec(&s->iop.c->search_inflight); @@ -749,20 +749,20 @@ static inline struct search *search_alloc(struct bio *bio, /* Cached devices */ -static void cached_dev_bio_complete(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_bio_complete) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); cached_dev_put(dc); - search_free(cl); + search_free(&cl->work); } /* Process reads */ -static void cached_dev_read_error_done(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_read_error_done) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); if (s->iop.replace_collision) bch_mark_cache_miss_collision(s->iop.c, s->d); @@ -770,12 +770,12 @@ static void cached_dev_read_error_done(struct closure *cl) if (s->iop.bio) bio_free_pages(s->iop.bio); - cached_dev_bio_complete(cl); + cached_dev_bio_complete(&cl->work); } -static void cached_dev_read_error(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_read_error) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct bio *bio = &s->bio.bio; /* @@ -801,9 +801,9 @@ static void cached_dev_read_error(struct closure *cl) continue_at(cl, cached_dev_read_error_done, NULL); } -static void cached_dev_cache_miss_done(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_cache_miss_done) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct bcache_device *d = s->d; if (s->iop.replace_collision) @@ -812,13 +812,13 @@ static void cached_dev_cache_miss_done(struct closure *cl) if (s->iop.bio) bio_free_pages(s->iop.bio); - cached_dev_bio_complete(cl); + cached_dev_bio_complete(&cl->work); closure_put(&d->cl); } -static void cached_dev_read_done(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_read_done) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); /* @@ -858,9 +858,9 @@ static void cached_dev_read_done(struct closure *cl) continue_at(cl, cached_dev_cache_miss_done, NULL); } -static void cached_dev_read_done_bh(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_read_done_bh) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s->iop.c, s->d, @@ -955,13 +955,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s) /* Process writes */ -static void cached_dev_write_complete(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_write_complete) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); up_read_non_owner(&dc->writeback_lock); - cached_dev_bio_complete(cl); + cached_dev_bio_complete(&cl->work); } static void cached_dev_write(struct cached_dev *dc, struct search *s) @@ -1048,9 +1048,9 @@ insert_data: continue_at(cl, cached_dev_write_complete, NULL); } -static void cached_dev_nodata(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_nodata) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); struct bio *bio = &s->bio.bio; if (s->iop.flush_journal) @@ -1265,9 +1265,9 @@ static int flash_dev_cache_miss(struct btree *b, struct search *s, return MAP_CONTINUE; } -static void flash_dev_nodata(struct closure *cl) +static CLOSURE_CALLBACK(flash_dev_nodata) { - struct search *s = container_of(cl, struct search, cl); + closure_type(s, struct search, cl); if (s->iop.flush_journal) bch_journal_meta(s->iop.c, cl); diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h index 38ab4856eaab..46bbef00aebb 100644 --- a/drivers/md/bcache/request.h +++ b/drivers/md/bcache/request.h @@ -34,7 +34,7 @@ struct data_insert_op { }; unsigned int bch_get_congested(const struct cache_set *c); -void bch_data_insert(struct closure *cl); +CLOSURE_CALLBACK(bch_data_insert); void bch_cached_dev_request_init(struct cached_dev *dc); void cached_dev_submit_bio(struct bio *bio); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index bfe1685dbae5..1402096b8076 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -327,9 +327,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, submit_bio(bio); } -static void bch_write_bdev_super_unlock(struct closure *cl) +static CLOSURE_CALLBACK(bch_write_bdev_super_unlock) { - struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write); + closure_type(dc, struct cached_dev, sb_write); up(&dc->sb_write_mutex); } @@ -363,9 +363,9 @@ static void write_super_endio(struct bio *bio) closure_put(&ca->set->sb_write); } -static void bcache_write_super_unlock(struct closure *cl) +static CLOSURE_CALLBACK(bcache_write_super_unlock) { - struct cache_set *c = container_of(cl, struct cache_set, sb_write); + closure_type(c, struct cache_set, sb_write); up(&c->sb_write_mutex); } @@ -407,9 +407,9 @@ static void uuid_endio(struct bio *bio) closure_put(cl); } -static void uuid_io_unlock(struct closure *cl) +static CLOSURE_CALLBACK(uuid_io_unlock) { - struct cache_set *c = container_of(cl, struct cache_set, uuid_write); + closure_type(c, struct cache_set, uuid_write); up(&c->uuid_write_mutex); } @@ -1344,9 +1344,9 @@ void bch_cached_dev_release(struct kobject *kobj) module_put(THIS_MODULE); } -static void cached_dev_free(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_free) { - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + closure_type(dc, struct cached_dev, disk.cl); if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) cancel_writeback_rate_update_dwork(dc); @@ -1378,9 +1378,9 @@ static void cached_dev_free(struct closure *cl) kobject_put(&dc->disk.kobj); } -static void cached_dev_flush(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_flush) { - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + closure_type(dc, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; mutex_lock(&bch_register_lock); @@ -1499,9 +1499,9 @@ void bch_flash_dev_release(struct kobject *kobj) kfree(d); } -static void flash_dev_free(struct closure *cl) +static CLOSURE_CALLBACK(flash_dev_free) { - struct bcache_device *d = container_of(cl, struct bcache_device, cl); + closure_type(d, struct bcache_device, cl); mutex_lock(&bch_register_lock); atomic_long_sub(bcache_dev_sectors_dirty(d), @@ -1512,9 +1512,9 @@ static void flash_dev_free(struct closure *cl) kobject_put(&d->kobj); } -static void flash_dev_flush(struct closure *cl) +static CLOSURE_CALLBACK(flash_dev_flush) { - struct bcache_device *d = container_of(cl, struct bcache_device, cl); + closure_type(d, struct bcache_device, cl); mutex_lock(&bch_register_lock); bcache_device_unlink(d); @@ -1670,9 +1670,9 @@ void bch_cache_set_release(struct kobject *kobj) module_put(THIS_MODULE); } -static void cache_set_free(struct closure *cl) +static CLOSURE_CALLBACK(cache_set_free) { - struct cache_set *c = container_of(cl, struct cache_set, cl); + closure_type(c, struct cache_set, cl); struct cache *ca; debugfs_remove(c->debug); @@ -1711,9 +1711,9 @@ static void cache_set_free(struct closure *cl) kobject_put(&c->kobj); } -static void cache_set_flush(struct closure *cl) +static CLOSURE_CALLBACK(cache_set_flush) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + closure_type(c, struct cache_set, caching); struct cache *ca = c->cache; struct btree *b; @@ -1808,9 +1808,9 @@ static void conditional_stop_bcache_device(struct cache_set *c, } } -static void __cache_set_unregister(struct closure *cl) +static CLOSURE_CALLBACK(__cache_set_unregister) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + closure_type(c, struct cache_set, caching); struct cached_dev *dc; struct bcache_device *d; size_t i; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 3accfdaee6b1..8827a6f130ad 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -341,16 +341,16 @@ static void dirty_init(struct keybuf_key *w) bch_bio_map(bio, NULL); } -static void dirty_io_destructor(struct closure *cl) +static CLOSURE_CALLBACK(dirty_io_destructor) { - struct dirty_io *io = container_of(cl, struct dirty_io, cl); + closure_type(io, struct dirty_io, cl); kfree(io); } -static void write_dirty_finish(struct closure *cl) +static CLOSURE_CALLBACK(write_dirty_finish) { - struct dirty_io *io = container_of(cl, struct dirty_io, cl); + closure_type(io, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; @@ -400,9 +400,9 @@ static void dirty_endio(struct bio *bio) closure_put(&io->cl); } -static void write_dirty(struct closure *cl) +static CLOSURE_CALLBACK(write_dirty) { - struct dirty_io *io = container_of(cl, struct dirty_io, cl); + closure_type(io, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; @@ -462,9 +462,9 @@ static void read_dirty_endio(struct bio *bio) dirty_endio(bio); } -static void read_dirty_submit(struct closure *cl) +static CLOSURE_CALLBACK(read_dirty_submit) { - struct dirty_io *io = container_of(cl, struct dirty_io, cl); + closure_type(io, struct dirty_io, cl); closure_bio_submit(io->dc->disk.c, &io->bio, cl); diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 120153e44ae0..f57fb821528d 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -434,7 +434,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b remaining_size = size; - order = MAX_ORDER - 1; + order = MAX_ORDER; while (remaining_size) { struct page *pages; unsigned size_to_add, to_copy; diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 2099c755119e..b475200d8586 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -24,7 +24,8 @@ bool verity_fec_is_enabled(struct dm_verity *v) */ static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) { - return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); + return (struct dm_verity_fec_io *) + ((char *)io + io->v->ti->per_io_data_size - sizeof(struct dm_verity_fec_io)); } /* diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e115fcfe723c..14e58ae70521 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -642,7 +642,6 @@ static void verity_work(struct work_struct *w) io->in_tasklet = false; - verity_fec_init_io(io); verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } @@ -668,7 +667,9 @@ static void verity_end_io(struct bio *bio) struct dm_verity_io *io = bio->bi_private; if (bio->bi_status && - (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { + (!verity_fec_is_enabled(io->v) || + verity_is_system_shutting_down() || + (bio->bi_opf & REQ_RAHEAD))) { verity_finish_io(io, bio->bi_status); return; } @@ -792,6 +793,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) bio->bi_private = io; io->iter = bio->bi_iter; + verity_fec_init_io(io); + verity_submit_prefetch(v, io); submit_bio_noacct(bio); diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f96f4e281ee4..f9d522c870e6 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -115,12 +115,6 @@ static inline u8 *verity_io_want_digest(struct dm_verity *v, return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size; } -static inline u8 *verity_io_digest_end(struct dm_verity *v, - struct dm_verity_io *io) -{ - return verity_io_want_digest(v, io) + v->digest_size; -} - extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, struct bvec_iter *iter, int (*process)(struct dm_verity *v, diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 19e996a829c9..b54275389f8a 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -186,6 +186,8 @@ do { \ #define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ +#define ARC_HAS_LED 4 /* card has software controlled LEDs */ +#define ARC_HAS_ROTARY 8 /* card has rotary encoder */ /* information needed to define an encapsulation driver */ struct ArcProto { diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index c580acb8b1d3..7b5c8bb02f11 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -213,12 +213,13 @@ static int com20020pci_probe(struct pci_dev *pdev, if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15)) lp->backplane = 1; - /* Get the dev_id from the PLX rotary coder */ - if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) - dev_id_mask = 0x3; - dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; - - snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + if (ci->flags & ARC_HAS_ROTARY) { + /* Get the dev_id from the PLX rotary coder */ + if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15)) + dev_id_mask = 0x3; + dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask; + snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i); + } if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) { pr_err("IO address %Xh is empty!\n", ioaddr); @@ -230,6 +231,10 @@ static int com20020pci_probe(struct pci_dev *pdev, goto err_free_arcdev; } + ret = com20020_found(dev, IRQF_SHARED); + if (ret) + goto err_free_arcdev; + card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev), GFP_KERNEL); if (!card) { @@ -239,41 +244,39 @@ static int com20020pci_probe(struct pci_dev *pdev, card->index = i; card->pci_priv = priv; - card->tx_led.brightness_set = led_tx_set; - card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-tx", - dev->dev_id, i); - card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:green:tx:%d-%d", - dev->dev_id, i); - - card->tx_led.dev = &dev->dev; - card->recon_led.brightness_set = led_recon_set; - card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, - GFP_KERNEL, "arc%d-%d-recon", - dev->dev_id, i); - card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "pci:red:recon:%d-%d", - dev->dev_id, i); - card->recon_led.dev = &dev->dev; - card->dev = dev; - - ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); - if (ret) - goto err_free_arcdev; - ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); - if (ret) - goto err_free_arcdev; - - dev_set_drvdata(&dev->dev, card); - - ret = com20020_found(dev, IRQF_SHARED); - if (ret) - goto err_free_arcdev; - - devm_arcnet_led_init(dev, dev->dev_id, i); + if (ci->flags & ARC_HAS_LED) { + card->tx_led.brightness_set = led_tx_set; + card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-tx", + dev->dev_id, i); + card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:green:tx:%d-%d", + dev->dev_id, i); + + card->tx_led.dev = &dev->dev; + card->recon_led.brightness_set = led_recon_set; + card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, + GFP_KERNEL, "arc%d-%d-recon", + dev->dev_id, i); + card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "pci:red:recon:%d-%d", + dev->dev_id, i); + card->recon_led.dev = &dev->dev; + + ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); + if (ret) + goto err_free_arcdev; + + ret = devm_led_classdev_register(&pdev->dev, &card->recon_led); + if (ret) + goto err_free_arcdev; + + dev_set_drvdata(&dev->dev, card); + devm_arcnet_led_init(dev, dev->dev_id, i); + } + card->dev = dev; list_add(&card->list, &priv->list_dev); continue; @@ -329,7 +332,7 @@ static struct com20020_pci_card_info card_info_5mbit = { }; static struct com20020_pci_card_info card_info_sohard = { - .name = "PLX-PCI", + .name = "SOHARD SH ARC-PCI", .devcount = 1, /* SOHARD needs PCI base addr 4 */ .chan_map_tbl = { @@ -364,7 +367,7 @@ static struct com20020_pci_card_info card_info_eae_arc1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_ma1 = { @@ -396,7 +399,7 @@ static struct com20020_pci_card_info card_info_eae_ma1 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static struct com20020_pci_card_info card_info_eae_fb2 = { @@ -421,7 +424,7 @@ static struct com20020_pci_card_info card_info_eae_fb2 = { }, }, .rotary = 0x0, - .flags = ARC_CAN_10MBIT, + .flags = ARC_HAS_ROTARY | ARC_HAS_LED | ARC_CAN_10MBIT, }; static const struct pci_device_id com20020pci_id_table[] = { diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index db1bbcf3a5f2..c2cc9083e726 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2722,10 +2722,18 @@ static int ksz_connect_tag_protocol(struct dsa_switch *ds, { struct ksz_tagger_data *tagger_data; - tagger_data = ksz_tagger_data(ds); - tagger_data->xmit_work_fn = ksz_port_deferred_xmit; - - return 0; + switch (proto) { + case DSA_TAG_PROTO_KSZ8795: + return 0; + case DSA_TAG_PROTO_KSZ9893: + case DSA_TAG_PROTO_KSZ9477: + case DSA_TAG_PROTO_LAN937X: + tagger_data = ksz_tagger_data(ds); + tagger_data->xmit_work_fn = ksz_port_deferred_xmit; + return 0; + default: + return -EPROTONOSUPPORT; + } } static int ksz_port_vlan_filtering(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index 9a8429f5d09c..d758a6c1b226 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -465,6 +465,7 @@ mv88e639x_pcs_select(struct mv88e6xxx_chip *chip, int port, case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: case PHY_INTERFACE_MODE_RXAUI: + case PHY_INTERFACE_MODE_USXGMII: return &mpcs->xg_pcs; default: @@ -873,7 +874,8 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); int err; - if (interface == PHY_INTERFACE_MODE_10GBASER) { + if (interface == PHY_INTERFACE_MODE_10GBASER || + interface == PHY_INTERFACE_MODE_USXGMII) { err = mv88e6393x_erratum_5_2(mpcs); if (err) return err; @@ -886,12 +888,37 @@ static int mv88e6393x_xg_pcs_post_config(struct phylink_pcs *pcs, return mv88e639x_xg_pcs_enable(mpcs); } +static void mv88e6393x_xg_pcs_get_state(struct phylink_pcs *pcs, + struct phylink_link_state *state) +{ + struct mv88e639x_pcs *mpcs = xg_pcs_to_mv88e639x_pcs(pcs); + u16 status, lp_status; + int err; + + if (state->interface != PHY_INTERFACE_MODE_USXGMII) + return mv88e639x_xg_pcs_get_state(pcs, state); + + state->link = false; + + err = mv88e639x_read(mpcs, MV88E6390_USXGMII_PHY_STATUS, &status); + err = err ? : mv88e639x_read(mpcs, MV88E6390_USXGMII_LP_STATUS, &lp_status); + if (err) { + dev_err(mpcs->mdio.dev.parent, + "can't read USXGMII status: %pe\n", ERR_PTR(err)); + return; + } + + state->link = !!(status & MDIO_USXGMII_LINK); + state->an_complete = state->link; + phylink_decode_usxgmii_word(state, lp_status); +} + static const struct phylink_pcs_ops mv88e6393x_xg_pcs_ops = { .pcs_enable = mv88e6393x_xg_pcs_enable, .pcs_disable = mv88e6393x_xg_pcs_disable, .pcs_pre_config = mv88e6393x_xg_pcs_pre_config, .pcs_post_config = mv88e6393x_xg_pcs_post_config, - .pcs_get_state = mv88e639x_xg_pcs_get_state, + .pcs_get_state = mv88e6393x_xg_pcs_get_state, .pcs_config = mv88e639x_xg_pcs_config, }; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 80b44043e6c5..28c9b6f1a54f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -553,17 +553,17 @@ void aq_ptp_tx_hwtstamp(struct aq_nic_s *aq_nic, u64 timestamp) /* aq_ptp_rx_hwtstamp - utility function which checks for RX time stamp * @adapter: pointer to adapter struct - * @skb: particular skb to send timestamp with + * @shhwtstamps: particular skb_shared_hwtstamps to save timestamp * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the hwtstamps structure which * is passed up the network stack */ -static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct sk_buff *skb, +static void aq_ptp_rx_hwtstamp(struct aq_ptp_s *aq_ptp, struct skb_shared_hwtstamps *shhwtstamps, u64 timestamp) { timestamp -= atomic_read(&aq_ptp->offset_ingress); - aq_ptp_convert_to_hwtstamp(aq_ptp, skb_hwtstamps(skb), timestamp); + aq_ptp_convert_to_hwtstamp(aq_ptp, shhwtstamps, timestamp); } void aq_ptp_hwtstamp_config_get(struct aq_ptp_s *aq_ptp, @@ -639,7 +639,7 @@ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) &aq_ptp->ptp_rx == ring || &aq_ptp->hwts_rx == ring; } -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; @@ -648,7 +648,7 @@ u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, p, len, ×tamp); if (ret > 0) - aq_ptp_rx_hwtstamp(aq_ptp, skb, timestamp); + aq_ptp_rx_hwtstamp(aq_ptp, shhwtstamps, timestamp); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h index 28ccb7ca2df9..210b723f2207 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.h @@ -67,7 +67,7 @@ int aq_ptp_hwtstamp_config_set(struct aq_ptp_s *aq_ptp, /* Return either ring is belong to PTP or not*/ bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring); -u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct sk_buff *skb, u8 *p, +u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len); struct ptp_clock *aq_ptp_get_ptp_clock(struct aq_ptp_s *aq_ptp); @@ -143,7 +143,7 @@ static inline bool aq_ptp_ring(struct aq_nic_s *aq_nic, struct aq_ring_s *ring) } static inline u16 aq_ptp_extract_ts(struct aq_nic_s *aq_nic, - struct sk_buff *skb, u8 *p, + struct skb_shared_hwtstamps *shhwtstamps, u8 *p, unsigned int len) { return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 4de22eed099a..694daeaf3e61 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -647,7 +647,7 @@ static int __aq_ring_rx_clean(struct aq_ring_s *self, struct napi_struct *napi, } if (is_ptp_ring) buff->len -= - aq_ptp_extract_ts(self->aq_nic, skb, + aq_ptp_extract_ts(self->aq_nic, skb_hwtstamps(skb), aq_buf_vaddr(&buff->rxdata), buff->len); @@ -742,6 +742,8 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, struct aq_ring_buff_s *buff = &rx_ring->buff_ring[rx_ring->sw_head]; bool is_ptp_ring = aq_ptp_ring(rx_ring->aq_nic, rx_ring); struct aq_ring_buff_s *buff_ = NULL; + u16 ptp_hwtstamp_len = 0; + struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb = NULL; unsigned int next_ = 0U; struct xdp_buff xdp; @@ -810,11 +812,12 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, hard_start = page_address(buff->rxdata.page) + buff->rxdata.pg_off - rx_ring->page_offset; - if (is_ptp_ring) - buff->len -= - aq_ptp_extract_ts(rx_ring->aq_nic, skb, - aq_buf_vaddr(&buff->rxdata), - buff->len); + if (is_ptp_ring) { + ptp_hwtstamp_len = aq_ptp_extract_ts(rx_ring->aq_nic, &shhwtstamps, + aq_buf_vaddr(&buff->rxdata), + buff->len); + buff->len -= ptp_hwtstamp_len; + } xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); xdp_prepare_buff(&xdp, hard_start, rx_ring->page_offset, @@ -834,6 +837,9 @@ static int __aq_ring_xdp_clean(struct aq_ring_s *rx_ring, if (IS_ERR(skb) || !skb) continue; + if (ptp_hwtstamp_len > 0) + *skb_hwtstamps(skb) = shhwtstamps; + if (buff->is_vlan) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), buff->vlan_rx_tag); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 38d89d80b4a9..273c9ba48f09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -2075,6 +2075,7 @@ destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); free_tc_info: kfree(tc_info); + bp->tc_info = NULL; return rc; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 48b6191efa56..f52830dfb26a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6474,6 +6474,14 @@ static void tg3_dump_state(struct tg3 *tp) int i; u32 *regs; + /* If it is a PCI error, all registers will be 0xffff, + * we don't dump them out, just report the error and return + */ + if (tp->pdev->error_state != pci_channel_io_normal) { + netdev_err(tp->dev, "PCI channel ERROR!\n"); + return; + } + regs = kzalloc(TG3_REG_BLK_SIZE, GFP_ATOMIC); if (!regs) return; @@ -11259,7 +11267,8 @@ static void tg3_reset_task(struct work_struct *work) rtnl_lock(); tg3_full_lock(tp, 0); - if (tp->pcierr_recovery || !netif_running(tp->dev)) { + if (tp->pcierr_recovery || !netif_running(tp->dev) || + tp->pdev->error_state != pci_channel_io_normal) { tg3_flag_clear(tp, RESET_TASK_PENDING); tg3_full_unlock(tp); rtnl_unlock(); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 928d934cb21a..f75668c47935 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -66,6 +66,27 @@ static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) } } +static u32 hns_mac_link_anti_shake(struct mac_driver *mac_ctrl_drv) +{ +#define HNS_MAC_LINK_WAIT_TIME 5 +#define HNS_MAC_LINK_WAIT_CNT 40 + + u32 link_status = 0; + int i; + + if (!mac_ctrl_drv->get_link_status) + return link_status; + + for (i = 0; i < HNS_MAC_LINK_WAIT_CNT; i++) { + msleep(HNS_MAC_LINK_WAIT_TIME); + mac_ctrl_drv->get_link_status(mac_ctrl_drv, &link_status); + if (!link_status) + break; + } + + return link_status; +} + void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) { struct mac_driver *mac_ctrl_drv; @@ -83,6 +104,14 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) &sfp_prsnt); if (!ret) *link_status = *link_status && sfp_prsnt; + + /* for FIBER port, it may have a fake link up. + * when the link status changes from down to up, we need to do + * anti-shake. the anti-shake time is base on tests. + * only FIBER port need to do this. + */ + if (*link_status && !mac_cb->link) + *link_status = hns_mac_link_anti_shake(mac_ctrl_drv); } mac_cb->link = *link_status; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 0900abf5c508..8a713eed4465 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -142,7 +142,8 @@ MODULE_DEVICE_TABLE(acpi, hns_enet_acpi_match); static void fill_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -275,6 +276,15 @@ static int hns_nic_maybe_stop_tso( return 0; } +static int hns_nic_maybe_stop_tx_v2(struct sk_buff **out_skb, int *bnum, + struct hnae_ring *ring) +{ + if (skb_is_gso(*out_skb)) + return hns_nic_maybe_stop_tso(out_skb, bnum, ring); + else + return hns_nic_maybe_stop_tx(out_skb, bnum, ring); +} + static void fill_tso_desc(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, int buf_num, enum hns_desc_type type, int mtu) @@ -300,6 +310,19 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, mtu); } +static void fill_desc_v2(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso) +{ + if (is_gso) + fill_tso_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); + else + fill_v2_desc(ring, priv, size, dma, frag_end, buf_num, type, + mtu); +} + netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, struct sk_buff *skb, struct hns_nic_ring_data *ring_data) @@ -313,6 +336,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, int seg_num; dma_addr_t dma; int size, next_to_use; + bool is_gso; int i; switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) { @@ -339,8 +363,9 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, ring->stats.sw_err_cnt++; goto out_err_tx_ok; } + is_gso = skb_is_gso(skb); priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0, - buf_num, DESC_TYPE_SKB, ndev->mtu); + buf_num, DESC_TYPE_SKB, ndev->mtu, is_gso); /* fill the fragments */ for (i = 1; i < seg_num; i++) { @@ -354,7 +379,7 @@ netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, } priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma, seg_num - 1 == i ? 1 : 0, buf_num, - DESC_TYPE_PAGE, ndev->mtu); + DESC_TYPE_PAGE, ndev->mtu, is_gso); } /*complete translate all packets*/ @@ -1776,15 +1801,6 @@ static int hns_nic_set_features(struct net_device *netdev, netdev_info(netdev, "enet v1 do not support tso!\n"); break; default: - if (features & (NETIF_F_TSO | NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* The chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } break; } netdev->features = features; @@ -2159,16 +2175,9 @@ static void hns_nic_set_priv_ops(struct net_device *netdev) priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; } else { priv->ops.get_rxd_bnum = get_v2rx_desc_bnum; - if ((netdev->features & NETIF_F_TSO) || - (netdev->features & NETIF_F_TSO6)) { - priv->ops.fill_desc = fill_tso_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tso; - /* This chip only support 7*4096 */ - netif_set_tso_max_size(netdev, 7 * 4096); - } else { - priv->ops.fill_desc = fill_v2_desc; - priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx; - } + priv->ops.fill_desc = fill_desc_v2; + priv->ops.maybe_stop_tx = hns_nic_maybe_stop_tx_v2; + netif_set_tso_max_size(netdev, 7 * 4096); /* enable tso when init * control tso on/off through TSE bit in bd */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index ffa9d6573f54..3f3ee032f631 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -44,7 +44,8 @@ struct hns_nic_ring_data { struct hns_nic_ops { void (*fill_desc)(struct hnae_ring *ring, void *priv, int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu); + int buf_num, enum hns_desc_type type, int mtu, + bool is_gso); int (*maybe_stop_tx)(struct sk_buff **out_skb, int *bnum, struct hnae_ring *ring); void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9eeea8d9ab67..dc642efe1cfa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16176,7 +16176,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT; if (val < MAX_FRAME_SIZE_DEFAULT) dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n", - i, val); + pf->hw.port, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6b3d3e54b8b7..f520855daf58 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -827,18 +827,10 @@ static int __iavf_set_coalesce(struct net_device *netdev, struct iavf_adapter *adapter = netdev_priv(netdev); int i; - if (ec->rx_coalesce_usecs == 0) { - if (ec->use_adaptive_rx_coalesce) - netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->rx_coalesce_usecs > IAVF_MAX_ITR)) { + if (ec->rx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; - } else if (ec->tx_coalesce_usecs == 0) { - if (ec->use_adaptive_tx_coalesce) - netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < IAVF_MIN_ITR) || - (ec->tx_coalesce_usecs > IAVF_MAX_ITR)) { + } else if (ec->tx_coalesce_usecs > IAVF_MAX_ITR) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 7e6ee32d19b6..10ba36602c0c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -15,7 +15,6 @@ */ #define IAVF_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define IAVF_ITR_MASK 0x1FFE /* mask for ITR register value */ -#define IAVF_MIN_ITR 2 /* reg uses 2 usec resolution */ #define IAVF_ITR_100K 10 /* all values below must be even */ #define IAVF_ITR_50K 20 #define IAVF_ITR_20K 50 diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 5a45bd5ce6ad..6d33dd647c78 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -375,16 +375,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) */ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) { - struct ice_pf *pf; - if (!vf || !q_vector) return -EINVAL; - pf = vf->pf; - /* always add one to account for the OICR being the first MSIX */ - return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id + - q_vector->v_idx + 1; + return vf->first_vector_idx + q_vector->v_idx + 1; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c index d7b10dc67f03..80dc4bcdd3a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c @@ -32,7 +32,6 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) /* setup outer VLAN ops */ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan; /* setup inner VLAN ops */ vlan_ops = &vsi->inner_vlan_ops; @@ -47,8 +46,13 @@ static void ice_port_vlan_on(struct ice_vsi *vsi) vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan; vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; - vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan; } + + /* all Rx traffic should be in the domain of the assigned port VLAN, + * so prevent disabling Rx VLAN filtering + */ + vlan_ops->dis_rx_filtering = noop_vlan; + vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering; } @@ -77,6 +81,8 @@ static void ice_port_vlan_off(struct ice_vsi *vsi) vlan_ops->del_vlan = ice_vsi_del_vlan; } + vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; + if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags)) vlan_ops->ena_rx_filtering = noop_vlan; else @@ -141,7 +147,6 @@ void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) &vsi->outer_vlan_ops : &vsi->inner_vlan_ops; vlan_ops->add_vlan = ice_vsi_add_vlan; - vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering; vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering; vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index de11b3186bd7..1c7b4ded948b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -1523,7 +1523,6 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) u16 num_q_vectors_mapped, vsi_id, vector_id; struct virtchnl_irq_map_info *irqmap_info; struct virtchnl_vector_map *map; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; int i; @@ -1535,7 +1534,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) * there is actually at least a single VF queue vector mapped */ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - pf->vfs.num_msix_per < num_q_vectors_mapped || + vf->num_msix < num_q_vectors_mapped || !num_q_vectors_mapped) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -1557,7 +1556,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) /* vector_id is always 0-based for each VF, and can never be * larger than or equal to the max allowed interrupts per VF */ - if (!(vector_id < pf->vfs.num_msix_per) || + if (!(vector_id < vf->num_msix) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || (!vector_id && (map->rxq_map || map->txq_map))) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 1a6e04b9c1ef..edeb0f737312 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -2019,7 +2019,7 @@ struct mcs_hw_info { u8 tcam_entries; /* RX/TX Tcam entries per mcs block */ u8 secy_entries; /* RX/TX SECY entries per mcs block */ u8 sc_entries; /* RX/TX SC CAM entries per mcs block */ - u8 sa_entries; /* PN table entries = SA entries */ + u16 sa_entries; /* PN table entries = SA entries */ u64 rsvd[16]; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index c43f19dfbd74..c1775bd01c2b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -117,7 +117,7 @@ void mcs_get_rx_secy_stats(struct mcs *mcs, struct mcs_secy_stats *stats, int id reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYTAGGEDCTLX(id); stats->pkt_tagged_ctl_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(id); stats->pkt_untaged_cnt = mcs_reg_read(mcs, reg); reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(id); @@ -215,7 +215,7 @@ void mcs_get_sc_stats(struct mcs *mcs, struct mcs_sc_stats *stats, reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCNOTVALIDX(id); stats->pkt_notvalid_cnt = mcs_reg_read(mcs, reg); - reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(id); + reg = MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(id); stats->pkt_unchecked_cnt = mcs_reg_read(mcs, reg); if (mcs->hw->mcs_blks > 1) { @@ -1219,6 +1219,17 @@ struct mcs *mcs_get_pdata(int mcs_id) return NULL; } +bool is_mcs_bypass(int mcs_id) +{ + struct mcs *mcs_dev; + + list_for_each_entry(mcs_dev, &mcs_list, mcs_list) { + if (mcs_dev->mcs_id == mcs_id) + return mcs_dev->bypass; + } + return true; +} + void mcs_set_port_cfg(struct mcs *mcs, struct mcs_port_cfg_set_req *req) { u64 val = 0; @@ -1436,7 +1447,7 @@ static int mcs_x2p_calibration(struct mcs *mcs) return err; } -static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) +static void mcs_set_external_bypass(struct mcs *mcs, bool bypass) { u64 val; @@ -1447,6 +1458,7 @@ static void mcs_set_external_bypass(struct mcs *mcs, u8 bypass) else val &= ~BIT_ULL(6); mcs_reg_write(mcs, MCSX_MIL_GLOBAL, val); + mcs->bypass = bypass; } static void mcs_global_cfg(struct mcs *mcs) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h index 0f89dcb76465..f927cc61dfd2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.h @@ -149,6 +149,7 @@ struct mcs { u16 num_vec; void *rvu; u16 *tx_sa_active; + bool bypass; }; struct mcs_ops { @@ -206,6 +207,7 @@ void mcs_get_custom_tag_cfg(struct mcs *mcs, struct mcs_custom_tag_cfg_get_req * int mcs_alloc_ctrlpktrule(struct rsrc_bmap *rsrc, u16 *pf_map, u16 offset, u16 pcifunc); int mcs_free_ctrlpktrule(struct mcs *mcs, struct mcs_free_ctrl_pkt_rule_req *req); int mcs_ctrlpktrule_write(struct mcs *mcs, struct mcs_ctrl_pkt_rule_write_req *req); +bool is_mcs_bypass(int mcs_id); /* CN10K-B APIs */ void cn10kb_mcs_set_hw_capabilities(struct mcs *mcs); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h index f3ab01fc363c..f4c6de89002c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_reg.h @@ -810,14 +810,37 @@ offset = 0x9d8ull; \ offset; }) +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xee80ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe818ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xa680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xd018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + +#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) ({ \ + u64 offset; \ + \ + offset = 0xf680ull; \ + if (mcs->hw->mcs_blks > 1) \ + offset = 0xe018ull; \ + offset += (a) * 0x8ull; \ + offset; }) + #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCDECRYPTEDX(a) (0xe680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INOCTETSSCVALIDATEX(a) (0xde80ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDORNOTAGX(a) (0xa680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYNOTAGX(a) (0xd218 + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYUNTAGGEDX(a) (0xd018ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCUNCHECKEDOROKX(a) (0xee80ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSECYCTLX(a) (0xb680ull + (a) * 0x8ull) -#define MCSX_CSE_RX_MEM_SLAVE_INPKTSSCLATEORDELAYEDX(a) (0xf680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSAINVALIDX(a) (0x12680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTUSINGSAERRORX(a) (0x15680ull + (a) * 0x8ull) #define MCSX_CSE_RX_MEM_SLAVE_INPKTSSANOTVALIDX(a) (0x13680ull + (a) * 0x8ull) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index c1e868a2610e..5c1d04a3c559 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2638,6 +2638,9 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) rvu_npc_free_mcam_entries(rvu, pcifunc, -1); rvu_mac_reset(rvu, pcifunc); + if (rvu->mcs_blk_cnt) + rvu_mcs_flr_handler(rvu, pcifunc); + mutex_unlock(&rvu->flr_lock); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 814945aa619e..1f3ff4bb8753 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -364,6 +364,7 @@ struct nix_hw { struct nix_txvlan txvlan; struct nix_ipolicer *ipolicer; u64 *tx_credits; + u8 cc_mcs_cnt; }; /* RVU block's capabilities or functionality, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index c70932625d0d..62780d8b4f9a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1087,7 +1087,7 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); if (!rvu_dl->devlink_wq) - goto err; + return -ENOMEM; INIT_WORK(&rvu_reporters->intr_work, rvu_npa_intr_work); INIT_WORK(&rvu_reporters->err_work, rvu_npa_err_work); @@ -1095,9 +1095,6 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) INIT_WORK(&rvu_reporters->ras_work, rvu_npa_ras_work); return 0; -err: - rvu_npa_health_reporters_destroy(rvu_dl); - return -ENOMEM; } static int rvu_npa_health_reporters_create(struct rvu_devlink *rvu_dl) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 475a08e4830e..b01503acd520 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -12,6 +12,7 @@ #include "rvu_reg.h" #include "rvu.h" #include "npc.h" +#include "mcs.h" #include "cgx.h" #include "lmac_common.h" #include "rvu_npc_hash.h" @@ -4645,6 +4646,12 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, SDP_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS); } + /* Get MCS external bypass status for CN10K-B */ + if (mcs_get_blkcnt() == 1) { + /* Adjust for 2 credits when external bypass is disabled */ + nix_hw->cc_mcs_cnt = is_mcs_bypass(0) ? 0 : 2; + } + /* Set credits for Tx links assuming max packet length allowed. * This will be reconfigured based on MTU set for PF/VF. */ @@ -4668,6 +4675,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, tx_credits = (lmac_fifo_len - lmac_max_frs) / 16; /* Enable credits and set credit pkt count to max allowed */ cfg = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); + cfg |= FIELD_PREP(NIX_AF_LINKX_MCS_CNT_MASK, nix_hw->cc_mcs_cnt); link = iter + slink; nix_hw->tx_credits[link] = tx_credits; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 49babf968c1b..cd60073a268c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -389,7 +389,13 @@ static u64 npc_get_default_entry_action(struct rvu *rvu, struct npc_mcam *mcam, int bank, nixlf, index; /* get ucast entry rule entry index */ - nix_get_nixlf(rvu, pf_func, &nixlf, NULL); + if (nix_get_nixlf(rvu, pf_func, &nixlf, NULL)) { + dev_err(rvu->dev, "%s: nixlf not attached to pcifunc:0x%x\n", + __func__, pf_func); + /* Action 0 is drop */ + return 0; + } + index = npc_get_nixlf_mcam_index(mcam, pf_func, nixlf, NIXLF_UCAST_ENTRY); bank = npc_get_bank(mcam, index); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c index b3150f053291..d46ac29adb96 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c @@ -31,8 +31,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = { {NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18}, {0x1200, 0x12E0} } }, {NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608}, - {0x1610, 0x1618}, {0x1700, 0x17B0} } }, - {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } }, + {0x1610, 0x1618}, {0x1700, 0x17C8} } }, + {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17C8} } }, {NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } }, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 189f0eafd5ff..6f73ad9807f0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -437,6 +437,7 @@ #define NIX_AF_LINKX_BASE_MASK GENMASK_ULL(11, 0) #define NIX_AF_LINKX_RANGE_MASK GENMASK_ULL(19, 16) +#define NIX_AF_LINKX_MCS_CNT_MASK GENMASK_ULL(33, 32) /* SSO */ #define SSO_AF_CONST (0x1000) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 9efcec549834..53f6258a973c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -334,9 +334,12 @@ static void otx2_get_pauseparam(struct net_device *netdev, if (is_otx2_lbkvf(pfvf->pdev)) return; + mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_cgx_cfg_pause_frm(&pfvf->mbox); - if (!req) + if (!req) { + mutex_unlock(&pfvf->mbox.lock); return; + } if (!otx2_sync_mbox_msg(&pfvf->mbox)) { rsp = (struct cgx_pause_frm_cfg *) @@ -344,6 +347,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, pause->rx_pause = rsp->rx_pause; pause->tx_pause = rsp->tx_pause; } + mutex_unlock(&pfvf->mbox.lock); } static int otx2_set_pauseparam(struct net_device *netdev, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 532e324bdcc8..0c17ebdda148 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1688,6 +1688,14 @@ static void otx2_do_set_rx_mode(struct otx2_nic *pf) mutex_unlock(&pf->mbox.lock); } +static void otx2_set_irq_coalesce(struct otx2_nic *pfvf) +{ + int cint; + + for (cint = 0; cint < pfvf->hw.cint_cnt; cint++) + otx2_config_irq_coalescing(pfvf, cint); +} + static void otx2_dim_work(struct work_struct *w) { struct dim_cq_moder cur_moder; @@ -1703,6 +1711,7 @@ static void otx2_dim_work(struct work_struct *w) CQ_TIMER_THRESH_MAX : cur_moder.usec; pfvf->hw.cq_ecount_wait = (cur_moder.pkts > NAPI_POLL_WEIGHT) ? NAPI_POLL_WEIGHT : cur_moder.pkts; + otx2_set_irq_coalesce(pfvf); dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 6ee15f3c25ed..4d519ea833b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -512,11 +512,18 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p { struct dim_sample dim_sample; u64 rx_frames, rx_bytes; + u64 tx_frames, tx_bytes; rx_frames = OTX2_GET_RX_STATS(RX_BCAST) + OTX2_GET_RX_STATS(RX_MCAST) + OTX2_GET_RX_STATS(RX_UCAST); rx_bytes = OTX2_GET_RX_STATS(RX_OCTS); - dim_update_sample(pfvf->napi_events, rx_frames, rx_bytes, &dim_sample); + tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); + tx_frames = OTX2_GET_TX_STATS(TX_UCAST); + + dim_update_sample(pfvf->napi_events, + rx_frames + tx_frames, + rx_bytes + tx_bytes, + &dim_sample); net_dim(&cq_poll->dim, dim_sample); } @@ -558,16 +565,9 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) if (pfvf->flags & OTX2_FLAG_INTF_DOWN) return workdone; - /* Check for adaptive interrupt coalesce */ - if (workdone != 0 && - ((pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) == - OTX2_FLAG_ADPTV_INT_COAL_ENABLED)) { - /* Adjust irq coalese using net_dim */ + /* Adjust irq coalese using net_dim */ + if (pfvf->flags & OTX2_FLAG_ADPTV_INT_COAL_ENABLED) otx2_adjust_adaptive_coalese(pfvf, cq_poll); - /* Update irq coalescing */ - for (i = 0; i < pfvf->hw.cint_cnt; i++) - otx2_config_irq_coalescing(pfvf, i); - } if (unlikely(!filled_cnt)) { struct refill_work *work; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 060a77f2265d..e522845c7c21 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -160,6 +160,18 @@ struct nfp_tun_mac_addr_offload { u8 addr[ETH_ALEN]; }; +/** + * struct nfp_neigh_update_work - update neighbour information to nfp + * @work: Work queue for writing neigh to the nfp + * @n: neighbour entry + * @app: Back pointer to app + */ +struct nfp_neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct nfp_app *app; +}; + enum nfp_flower_mac_offload_cmd { NFP_TUNNEL_MAC_OFFLOAD_ADD = 0, NFP_TUNNEL_MAC_OFFLOAD_DEL = 1, @@ -607,38 +619,30 @@ err: nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); } -static int -nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, - void *ptr) +static void +nfp_tun_release_neigh_update_work(struct nfp_neigh_update_work *update_work) { - struct nfp_flower_priv *app_priv; - struct netevent_redirect *redir; - struct neighbour *n; + neigh_release(update_work->n); + kfree(update_work); +} + +static void nfp_tun_neigh_update(struct work_struct *work) +{ + struct nfp_neigh_update_work *update_work; struct nfp_app *app; + struct neighbour *n; bool neigh_invalid; int err; - switch (event) { - case NETEVENT_REDIRECT: - redir = (struct netevent_redirect *)ptr; - n = redir->neigh; - break; - case NETEVENT_NEIGH_UPDATE: - n = (struct neighbour *)ptr; - break; - default: - return NOTIFY_DONE; - } - - neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; - - app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); - app = app_priv->app; + update_work = container_of(work, struct nfp_neigh_update_work, work); + app = update_work->app; + n = update_work->n; if (!nfp_flower_get_port_id_from_netdev(app, n->dev)) - return NOTIFY_DONE; + goto out; #if IS_ENABLED(CONFIG_INET) + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; if (n->tbl->family == AF_INET6) { #if IS_ENABLED(CONFIG_IPV6) struct flowi6 flow6 = {}; @@ -655,13 +659,11 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, &flow6, NULL); if (IS_ERR(dst)) - return NOTIFY_DONE; + goto out; dst_release(dst); } nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); -#else - return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ } else { struct flowi4 flow4 = {}; @@ -678,17 +680,71 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, rt = ip_route_output_key(dev_net(n->dev), &flow4); err = PTR_ERR_OR_ZERO(rt); if (err) - return NOTIFY_DONE; + goto out; ip_rt_put(rt); } nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } -#else - return NOTIFY_DONE; #endif /* CONFIG_INET */ +out: + nfp_tun_release_neigh_update_work(update_work); +} - return NOTIFY_OK; +static struct nfp_neigh_update_work * +nfp_tun_alloc_neigh_update_work(struct nfp_app *app, struct neighbour *n) +{ + struct nfp_neigh_update_work *update_work; + + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (!update_work) + return NULL; + + INIT_WORK(&update_work->work, nfp_tun_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->app = app; + + return update_work; +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_neigh_update_work *update_work; + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct neighbour *n; + struct nfp_app *app; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); + app = app_priv->app; + update_work = nfp_tun_alloc_neigh_update_work(app, n); + if (!update_work) + return NOTIFY_DONE; + + queue_work(system_highpri_wq, &update_work->work); + + return NOTIFY_DONE; } void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) @@ -706,6 +762,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv4_addr; flow.flowi4_proto = IPPROTO_UDP; @@ -725,13 +782,16 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested route not found.\n"); } @@ -749,6 +809,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL); if (!netdev) goto fail_rcu_unlock; + dev_hold(netdev); flow.daddr = payload->ipv6_addr; flow.flowi6_proto = IPPROTO_UDP; @@ -766,14 +827,16 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) dst_release(dst); if (!n) goto fail_rcu_unlock; + rcu_read_unlock(); nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); - rcu_read_unlock(); + dev_put(netdev); return; fail_rcu_unlock: rcu_read_unlock(); + dev_put(netdev); nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n"); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index cee4e5c3d09a..c47c059465ae 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -223,7 +223,7 @@ struct ionic_desc_info { void *cb_arg; }; -#define IONIC_QUEUE_NAME_MAX_SZ 32 +#define IONIC_QUEUE_NAME_MAX_SZ 16 struct ionic_queue { struct device *dev; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 6842a31fc04b..3bb0cfc40576 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -49,24 +49,24 @@ static void ionic_lif_queue_identify(struct ionic_lif *lif); static void ionic_dim_work(struct work_struct *work) { struct dim *dim = container_of(work, struct dim, work); + struct ionic_intr_info *intr; struct dim_cq_moder cur_moder; struct ionic_qcq *qcq; + struct ionic_lif *lif; u32 new_coal; cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); - new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); + lif = qcq->q.lif; + new_coal = ionic_coal_usec_to_hw(lif->ionic, cur_moder.usec); new_coal = new_coal ? new_coal : 1; - if (qcq->intr.dim_coal_hw != new_coal) { - unsigned int qi = qcq->cq.bound_q->index; - struct ionic_lif *lif = qcq->q.lif; - - qcq->intr.dim_coal_hw = new_coal; + intr = &qcq->intr; + if (intr->dim_coal_hw != new_coal) { + intr->dim_coal_hw = new_coal; ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); + intr->index, intr->dim_coal_hw); } dim->state = DIM_START_MEASURE; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 91d9dc5a72a6..c07f82ca9755 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -192,6 +192,7 @@ enum rtl_registers { /* No threshold before first PCI xfer */ #define RX_FIFO_THRESH (7 << RXCFG_FIFO_SHIFT) #define RX_EARLY_OFF (1 << 11) +#define RX_PAUSE_SLOT_ON (1 << 11) /* 8125b and later */ #define RXCFG_DMA_SHIFT 8 /* Unlimited maximum PCI burst. */ #define RX_DMA_BURST (7 << RXCFG_DMA_SHIFT) @@ -2305,9 +2306,13 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_61: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST); break; + case RTL_GIGA_MAC_VER_63: + RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST | + RX_PAUSE_SLOT_ON); + break; default: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST); break; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index ea92650f5c97..e02cebc3f1b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -573,28 +573,22 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, return 0; } -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; - if (!enable) { - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - value &= ~EFPE; - - writel(value, ioaddr + MAC_FPE_CTRL_STS); - return; + if (enable) { + cfg->fpe_csr = EFPE; + value = readl(ioaddr + GMAC_RXQ_CTRL1); + value &= ~GMAC_RXQCTRL_FPRQ; + value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; + writel(value, ioaddr + GMAC_RXQ_CTRL1); + } else { + cfg->fpe_csr = 0; } - - value = readl(ioaddr + GMAC_RXQ_CTRL1); - value &= ~GMAC_RXQCTRL_FPRQ; - value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT; - writel(value, ioaddr + GMAC_RXQ_CTRL1); - - value = readl(ioaddr + MAC_FPE_CTRL_STS); - value |= EFPE; - writel(value, ioaddr + MAC_FPE_CTRL_STS); + writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS); } int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) @@ -604,6 +598,9 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) status = FPE_EVENT_UNKNOWN; + /* Reads from the MAC_FPE_CTRL_STS register should only be performed + * here, since the status flags of MAC_FPE_CTRL_STS are "clear on read" + */ value = readl(ioaddr + MAC_FPE_CTRL_STS); if (value & TRSP) { @@ -629,19 +626,15 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) return status; } -void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type) +void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + enum stmmac_mpacket_type type) { - u32 value; + u32 value = cfg->fpe_csr; - value = readl(ioaddr + MAC_FPE_CTRL_STS); - - if (type == MPACKET_VERIFY) { - value &= ~SRSP; + if (type == MPACKET_VERIFY) value |= SVER; - } else { - value &= ~SVER; + else if (type == MPACKET_RESPONSE) value |= SRSP; - } writel(value, ioaddr + MAC_FPE_CTRL_STS); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 8b0f2c90faef..bf33a51d229e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -102,9 +102,11 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries, int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, struct stmmac_pps_cfg *cfg, bool enable, u32 sub_second_inc, u32 systime_flags); -void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, +void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index f33f73de5cfe..eb48211d9b0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1433,7 +1433,8 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + XGMAC_RX_CONFIG); } -static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq, +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable) { u32 value; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 72412d733856..7be04b54738b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -419,9 +419,11 @@ struct stmmac_ops { bool en, bool udp, bool sa, bool inv, u32 match); void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); - void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, + void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, bool enable); void (*fpe_send_mpacket)(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev); }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b7d45f2e2a9..47de466e432c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -964,7 +964,8 @@ static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) bool *hs_enable = &fpe_cfg->hs_enable; if (is_up && *hs_enable) { - stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY); + stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg, + MPACKET_VERIFY); } else { *lo_state = FPE_STATE_OFF; *lp_state = FPE_STATE_OFF; @@ -5921,6 +5922,7 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) /* If user has requested FPE enable, quickly response */ if (*hs_enable) stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_RESPONSE); } @@ -7338,6 +7340,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) if (*lo_state == FPE_STATE_ENTERING_ON && *lp_state == FPE_STATE_ENTERING_ON) { stmmac_fpe_configure(priv, priv->ioaddr, + fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, *enable); @@ -7356,6 +7359,7 @@ static void stmmac_fpe_lp_task(struct work_struct *work) netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT, *lo_state, *lp_state); stmmac_fpe_send_mpacket(priv, priv->ioaddr, + fpe_cfg, MPACKET_VERIFY); } /* Sleep then retry */ @@ -7370,6 +7374,7 @@ void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable) if (priv->plat->fpe_cfg->hs_enable != enable) { if (enable) { stmmac_fpe_send_mpacket(priv, priv->ioaddr, + priv->plat->fpe_cfg, MPACKET_VERIFY); } else { priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF; @@ -7834,6 +7839,7 @@ int stmmac_suspend(struct device *dev) if (priv->dma_cap.fpesel) { /* Disable FPE */ stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 287767f6d671..26fa33e5ec34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1083,6 +1083,7 @@ disable: priv->plat->fpe_cfg->enable = false; stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->fpe_cfg, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, false); diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig index ca7bf7f897d3..c8cbd85adcf9 100644 --- a/drivers/net/hyperv/Kconfig +++ b/drivers/net/hyperv/Kconfig @@ -3,5 +3,6 @@ config HYPERV_NET tristate "Microsoft Hyper-V virtual network driver" depends on HYPERV select UCS2_STRING + select NLS help Select this option to enable the Hyper-V virtual network driver. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2c5c1e91ded6..9bf2140fd0a1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3000,6 +3000,8 @@ static void rtl8152_nic_reset(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST)) break; usleep_range(100, 400); @@ -3329,6 +3331,8 @@ static void rtl_disable(struct r8152 *tp) rxdy_gated_en(tp, true); for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if ((ocp_data & FIFO_EMPTY) == FIFO_EMPTY) break; @@ -3336,6 +3340,8 @@ static void rtl_disable(struct r8152 *tp) } for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0) & TCR0_TX_EMPTY) break; usleep_range(1000, 2000); @@ -5499,6 +5505,8 @@ static void wait_oob_link_list_ready(struct r8152 *tp) int i; for (i = 0; i < 1000; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); if (ocp_data & LINK_LIST_READY) break; @@ -5513,6 +5521,8 @@ static void r8156b_wait_loading_flash(struct r8152 *tp) int i; for (i = 0; i < 100; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + break; if (ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & GPHY_PATCH_DONE) break; usleep_range(1000, 2000); @@ -5635,6 +5645,8 @@ static int r8153_pre_firmware_1(struct r8152 *tp) for (i = 0; i < 104; i++) { u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_WDT1_CTRL); + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return -ENODEV; if (!(ocp_data & WTD1_EN)) break; usleep_range(1000, 2000); @@ -5791,6 +5803,8 @@ static void r8153_aldps_en(struct r8152 *tp, bool enable) data &= ~EN_ALDPS; ocp_reg_write(tp, OCP_POWER_CFG, data); for (i = 0; i < 20; i++) { + if (test_bit(RTL8152_INACCESSIBLE, &tp->flags)) + return; usleep_range(1000, 2000); if (ocp_read_word(tp, MCU_TYPE_PLA, 0xe000) & 0x0100) break; @@ -8397,6 +8411,8 @@ static int rtl8152_pre_reset(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev; + rtnl_lock(); + if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) return 0; @@ -8428,20 +8444,17 @@ static int rtl8152_post_reset(struct usb_interface *intf) struct sockaddr sa; if (!tp || !test_bit(PROBED_WITH_NO_ERRORS, &tp->flags)) - return 0; + goto exit; rtl_set_accessible(tp); /* reset the MAC address in case of policy change */ - if (determine_ethernet_addr(tp, &sa) >= 0) { - rtnl_lock(); + if (determine_ethernet_addr(tp, &sa) >= 0) dev_set_mac_address (tp->netdev, &sa, NULL); - rtnl_unlock(); - } netdev = tp->netdev; if (!netif_running(netdev)) - return 0; + goto exit; set_bit(WORK_ENABLE, &tp->flags); if (netif_carrier_ok(netdev)) { @@ -8460,6 +8473,8 @@ static int rtl8152_post_reset(struct usb_interface *intf) if (!list_empty(&tp->rx_done)) napi_schedule(&tp->napi); +exit: + rtnl_unlock(); return 0; } @@ -10034,6 +10049,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, + { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, {} }; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 57efb3454c57..977861c46b1f 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -790,7 +790,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, skb_add_rx_frag(nskb, i, page, page_offset, size, truesize); - if (skb_copy_bits(skb, off, page_address(page), + if (skb_copy_bits(skb, off, + page_address(page) + page_offset, size)) { consume_skb(nskb); goto drop; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 46a4c9c5ea96..1be1ce522896 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1192,8 +1192,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) { - queue_delayed_work(nvme_wq, &ctrl->ka_work, - nvme_keep_alive_work_period(ctrl)); + unsigned long now = jiffies; + unsigned long delay = nvme_keep_alive_work_period(ctrl); + unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay; + + if (time_after(now, ka_next_check_tm)) + delay = 0; + else + delay = ka_next_check_tm - now; + + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); } static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, @@ -1479,7 +1487,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, if (id->ncap == 0) { /* namespace not allocated or attached */ info->is_removed = true; - return -ENODEV; + ret = -ENODEV; + goto error; } info->anagrpid = id->anagrpid; @@ -1497,8 +1506,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl, !memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) memcpy(ids->nguid, id->nguid, sizeof(ids->nguid)); } + +error: kfree(id); - return 0; + return ret; } static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl, @@ -1890,9 +1901,10 @@ static void nvme_update_disk_info(struct gendisk *disk, /* * The block layer can't support LBA sizes larger than the page size - * yet, so catch this early and don't allow block I/O. + * or smaller than a sector size yet, so catch this early and don't + * allow block I/O. */ - if (ns->lba_shift > PAGE_SHIFT) { + if (ns->lba_shift > PAGE_SHIFT || ns->lba_shift < SECTOR_SHIFT) { capacity = 0; bs = (1 << 9); } @@ -2029,6 +2041,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (ret) return ret; + if (id->ncap == 0) { + /* namespace not allocated or attached */ + info->is_removed = true; + ret = -ENODEV; + goto error; + } + blk_mq_freeze_queue(ns->disk->queue); lbaf = nvme_lbaf_index(id->flbas); ns->lba_shift = id->lbaf[lbaf].ds; @@ -2090,6 +2109,8 @@ out: set_bit(NVME_NS_READY, &ns->flags); ret = 0; } + +error: kfree(id); return ret; } @@ -4471,6 +4492,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; + ctrl->ka_last_check_time = jiffies; BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) > PAGE_SIZE); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f63250c650ca..3bf27052832f 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -98,8 +98,9 @@ int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) * * Returns the new state of a device based on the notifier used. * - * Return: 0 on device going from enabled to disabled, 1 on device - * going from disabled to enabled and -1 on no change. + * Return: OF_RECONFIG_CHANGE_REMOVE on device going from enabled to + * disabled, OF_RECONFIG_CHANGE_ADD on device going from disabled to + * enabled and OF_RECONFIG_NO_CHANGE on no change. */ int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) { diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c index 1ac7dab22c63..c1aef3a8fb2d 100644 --- a/drivers/platform/mellanox/mlxbf-bootctl.c +++ b/drivers/platform/mellanox/mlxbf-bootctl.c @@ -20,6 +20,7 @@ #define MLXBF_BOOTCTL_SB_SECURE_MASK 0x03 #define MLXBF_BOOTCTL_SB_TEST_MASK 0x0c +#define MLXBF_BOOTCTL_SB_DEV_MASK BIT(4) #define MLXBF_SB_KEY_NUM 4 @@ -40,11 +41,18 @@ static struct mlxbf_bootctl_name boot_names[] = { { MLXBF_BOOTCTL_NONE, "none" }, }; +enum { + MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION = 0, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE = 1, + MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE = 2, + MLXBF_BOOTCTL_SB_LIFECYCLE_RMA = 3 +}; + static const char * const mlxbf_bootctl_lifecycle_states[] = { - [0] = "Production", - [1] = "GA Secured", - [2] = "GA Non-Secured", - [3] = "RMA", + [MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION] = "Production", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE] = "GA Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE] = "GA Non-Secured", + [MLXBF_BOOTCTL_SB_LIFECYCLE_RMA] = "RMA", }; /* Log header format. */ @@ -247,25 +255,30 @@ static ssize_t second_reset_action_store(struct device *dev, static ssize_t lifecycle_state_show(struct device *dev, struct device_attribute *attr, char *buf) { + int status_bits; + int use_dev_key; + int test_state; int lc_state; - lc_state = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, - MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); - if (lc_state < 0) - return lc_state; + status_bits = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS, + MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE); + if (status_bits < 0) + return status_bits; - lc_state &= - MLXBF_BOOTCTL_SB_TEST_MASK | MLXBF_BOOTCTL_SB_SECURE_MASK; + use_dev_key = status_bits & MLXBF_BOOTCTL_SB_DEV_MASK; + test_state = status_bits & MLXBF_BOOTCTL_SB_TEST_MASK; + lc_state = status_bits & MLXBF_BOOTCTL_SB_SECURE_MASK; /* * If the test bits are set, we specify that the current state may be * due to using the test bits. */ - if (lc_state & MLXBF_BOOTCTL_SB_TEST_MASK) { - lc_state &= MLXBF_BOOTCTL_SB_SECURE_MASK; - + if (test_state) { return sprintf(buf, "%s(test)\n", mlxbf_bootctl_lifecycle_states[lc_state]); + } else if (use_dev_key && + (lc_state == MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE)) { + return sprintf(buf, "Secured (development)\n"); } return sprintf(buf, "%s\n", mlxbf_bootctl_lifecycle_states[lc_state]); diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 0b427fc24a96..1dd84c7a79de 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1771,6 +1771,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->dev_attr.show = mlxbf_pmc_event_list_show; attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event_list"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; @@ -1784,6 +1786,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "enable"); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1810,6 +1814,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "counter%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; @@ -1821,6 +1827,8 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, "event%d", j); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[++i] = &attr->dev_attr.attr; attr = NULL; } @@ -1853,6 +1861,8 @@ static int mlxbf_pmc_init_perftype_reg(struct device *dev, int blk_num) attr->nr = blk_num; attr->dev_attr.attr.name = devm_kasprintf(dev, GFP_KERNEL, events[j].evt_name); + if (!attr->dev_attr.attr.name) + return -ENOMEM; pmc->block[blk_num].block_attr[i] = &attr->dev_attr.attr; attr = NULL; i++; @@ -1882,6 +1892,8 @@ static int mlxbf_pmc_create_groups(struct device *dev, int blk_num) pmc->block[blk_num].block_attr_grp.attrs = pmc->block[blk_num].block_attr; pmc->block[blk_num].block_attr_grp.name = devm_kasprintf( dev, GFP_KERNEL, pmc->block_name[blk_num]); + if (!pmc->block[blk_num].block_attr_grp.name) + return -ENOMEM; pmc->groups[pmc->group_num] = &pmc->block[blk_num].block_attr_grp; pmc->group_num++; @@ -2063,6 +2075,8 @@ static int mlxbf_pmc_probe(struct platform_device *pdev) pmc->hwmon_dev = devm_hwmon_device_register_with_groups( dev, "bfperf", pmc, pmc->groups); + if (IS_ERR(pmc->hwmon_dev)) + return PTR_ERR(pmc->hwmon_dev); platform_set_drvdata(pdev, pmc); return 0; diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c index 1a6373dea109..6152be38398c 100644 --- a/drivers/platform/surface/aggregator/core.c +++ b/drivers/platform/surface/aggregator/core.c @@ -231,9 +231,12 @@ static int ssam_receive_buf(struct serdev_device *dev, const unsigned char *buf, size_t n) { struct ssam_controller *ctrl; + int ret; ctrl = serdev_device_get_drvdata(dev); - return ssam_controller_receive_buf(ctrl, buf, n); + ret = ssam_controller_receive_buf(ctrl, buf, n); + + return ret < 0 ? 0 : ret; } static void ssam_write_wakeup(struct serdev_device *dev) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7e69fdaccdd5..c94f31a5c6a3 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -263,6 +263,7 @@ config ASUS_WMI depends on RFKILL || RFKILL = n depends on HOTPLUG_PCI depends on ACPI_VIDEO || ACPI_VIDEO = n + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP select LEDS_CLASS select NEW_LEDS @@ -279,7 +280,6 @@ config ASUS_WMI config ASUS_NB_WMI tristate "Asus Notebook WMI Driver" depends on ASUS_WMI - depends on SERIO_I8042 || SERIO_I8042 = n help This is a driver for newer Asus notebooks. It adds extra features like wireless radio and bluetooth control, leds, hotkeys, backlight... diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 9aa1226e74e6..fceffe2082ec 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -48,25 +48,43 @@ module_param(tablet_mode_sw, uint, 0444); MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip 3:lid-flip-rog"); static struct quirk_entry *quirks; +static bool atkbd_reports_vol_keys; -static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, - struct serio *port) +static bool asus_i8042_filter(unsigned char data, unsigned char str, struct serio *port) { - static bool extended; - bool ret = false; + static bool extended_e0; + static bool extended_e1; if (str & I8042_STR_AUXDATA) return false; - if (unlikely(data == 0xe1)) { - extended = true; - ret = true; - } else if (unlikely(extended)) { - extended = false; - ret = true; + if (quirks->filter_i8042_e1_extended_codes) { + if (data == 0xe1) { + extended_e1 = true; + return true; + } + + if (extended_e1) { + extended_e1 = false; + return true; + } } - return ret; + if (data == 0xe0) { + extended_e0 = true; + } else if (extended_e0) { + extended_e0 = false; + + switch (data & 0x7f) { + case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */ + case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */ + case 0x30: /* e0 30 / e0 b0, Volume Up press / release */ + atkbd_reports_vol_keys = true; + break; + } + } + + return false; } static struct quirk_entry quirk_asus_unknown = { @@ -75,7 +93,7 @@ static struct quirk_entry quirk_asus_unknown = { }; static struct quirk_entry quirk_asus_q500a = { - .i8042_filter = asus_q500a_i8042_filter, + .filter_i8042_e1_extended_codes = true, .wmi_backlight_set_devstate = true, }; @@ -503,8 +521,6 @@ static const struct dmi_system_id asus_quirks[] = { static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) { - int ret; - quirks = &quirk_asus_unknown; dmi_check_system(asus_quirks); @@ -519,15 +535,6 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) if (tablet_mode_sw != -1) quirks->tablet_switch_mode = tablet_mode_sw; - - if (quirks->i8042_filter) { - ret = i8042_install_filter(quirks->i8042_filter); - if (ret) { - pr_warn("Unable to install key filter\n"); - return; - } - pr_info("Using i8042 filter function for receiving events\n"); - } } static const struct key_entry asus_nb_wmi_keymap[] = { @@ -618,6 +625,13 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code, *code = ASUS_WMI_KEY_IGNORE; break; + case 0x30: /* Volume Up */ + case 0x31: /* Volume Down */ + case 0x32: /* Volume Mute */ + if (atkbd_reports_vol_keys) + *code = ASUS_WMI_KEY_IGNORE; + + break; } } @@ -630,6 +644,7 @@ static struct asus_wmi_driver asus_nb_wmi_driver = { .input_phys = ASUS_NB_WMI_FILE "/input0", .detect_quirks = asus_nb_wmi_quirks, .key_filter = asus_nb_wmi_key_filter, + .i8042_filter = asus_i8042_filter, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 6a79f16233ab..9f7e23c5c6b4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -16,6 +16,7 @@ #include <linux/acpi.h> #include <linux/backlight.h> #include <linux/debugfs.h> +#include <linux/delay.h> #include <linux/dmi.h> #include <linux/fb.h> #include <linux/hwmon.h> @@ -132,6 +133,11 @@ module_param(fnlock_default, bool, 0444); #define ASUS_SCREENPAD_BRIGHT_MAX 255 #define ASUS_SCREENPAD_BRIGHT_DEFAULT 60 +/* Controls the power state of the USB0 hub on ROG Ally which input is on */ +#define ASUS_USB0_PWR_EC0_CSEE "\\_SB.PCI0.SBRG.EC0.CSEE" +/* 300ms so far seems to produce a reliable result on AC and battery */ +#define ASUS_USB0_PWR_EC0_CSEE_WAIT 300 + static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); @@ -300,6 +306,9 @@ struct asus_wmi { bool fnlock_locked; + /* The ROG Ally device requires the MCU USB device be disconnected before suspend */ + bool ally_mcu_usb_switch; + struct asus_wmi_debug debug; struct asus_wmi_driver *driver; @@ -4488,6 +4497,8 @@ static int asus_wmi_add(struct platform_device *pdev) asus->nv_temp_tgt_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_NV_THERM_TARGET); asus->panel_overdrive_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_PANEL_OD); asus->mini_led_mode_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE); + asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) + && dmi_match(DMI_BOARD_NAME, "RC71L"); err = fan_boost_mode_check_present(asus); if (err) @@ -4567,6 +4578,12 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_wmi_handler; } + if (asus->driver->i8042_filter) { + err = i8042_install_filter(asus->driver->i8042_filter); + if (err) + pr_warn("Unable to install key filter - %d\n", err); + } + asus_wmi_battery_init(asus); asus_wmi_debugfs_init(asus); @@ -4603,6 +4620,8 @@ static int asus_wmi_remove(struct platform_device *device) struct asus_wmi *asus; asus = platform_get_drvdata(device); + if (asus->driver->i8042_filter) + i8042_remove_filter(asus->driver->i8042_filter); wmi_remove_notify_handler(asus->driver->event_guid); asus_wmi_backlight_exit(asus); asus_screenpad_exit(asus); @@ -4654,6 +4673,43 @@ static int asus_hotk_resume(struct device *device) asus_wmi_fnlock_update(asus); asus_wmi_tablet_mode_get_state(asus); + + return 0; +} + +static int asus_hotk_resume_early(struct device *device) +{ + struct asus_wmi *asus = dev_get_drvdata(device); + + if (asus->ally_mcu_usb_switch) { + if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB8))) + dev_err(device, "ROG Ally MCU failed to connect USB dev\n"); + else + msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); + } + return 0; +} + +static int asus_hotk_prepare(struct device *device) +{ + struct asus_wmi *asus = dev_get_drvdata(device); + int result, err; + + if (asus->ally_mcu_usb_switch) { + /* When powersave is enabled it causes many issues with resume of USB hub */ + result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_MCU_POWERSAVE); + if (result == 1) { + dev_warn(device, "MCU powersave enabled, disabling to prevent resume issues"); + err = asus_wmi_set_devstate(ASUS_WMI_DEVID_MCU_POWERSAVE, 0, &result); + if (err || result != 1) + dev_err(device, "Failed to set MCU powersave mode: %d\n", err); + } + /* sleep required to ensure USB0 is disabled before sleep continues */ + if (ACPI_FAILURE(acpi_execute_simple_method(NULL, ASUS_USB0_PWR_EC0_CSEE, 0xB7))) + dev_err(device, "ROG Ally MCU failed to disconnect USB dev\n"); + else + msleep(ASUS_USB0_PWR_EC0_CSEE_WAIT); + } return 0; } @@ -4701,6 +4757,8 @@ static const struct dev_pm_ops asus_pm_ops = { .thaw = asus_hotk_thaw, .restore = asus_hotk_restore, .resume = asus_hotk_resume, + .resume_early = asus_hotk_resume_early, + .prepare = asus_hotk_prepare, }; /* Registration ***************************************************************/ diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index adb67c925724..cc30f1853847 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -39,6 +39,7 @@ struct quirk_entry { bool wmi_backlight_set_devstate; bool wmi_force_als_set; bool wmi_ignore_fan; + bool filter_i8042_e1_extended_codes; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* @@ -49,9 +50,6 @@ struct quirk_entry { */ int no_display_toggle; u32 xusb2pr; - - bool (*i8042_filter)(unsigned char data, unsigned char str, - struct serio *serio); }; struct asus_wmi_driver { @@ -73,6 +71,9 @@ struct asus_wmi_driver { * Return ASUS_WMI_KEY_IGNORE in code if event should be ignored. */ void (*key_filter) (struct asus_wmi_driver *driver, int *code, unsigned int *value, bool *autorelease); + /* Optional standard i8042 filter */ + bool (*i8042_filter)(unsigned char data, unsigned char str, + struct serio *serio); int (*probe) (struct platform_device *device); void (*detect_quirks) (struct asus_wmi_driver *driver); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 5c27b4aa9690..5dd22258cb3b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1340,6 +1340,11 @@ static int parse_wdg(struct device *wmi_bus_dev, struct platform_device *pdev) if (debug_dump_wdg) wmi_dump_wdg(&gblock[i]); + if (!gblock[i].instance_count) { + dev_info(wmi_bus_dev, FW_INFO "%pUL has zero instances\n", &gblock[i].guid); + continue; + } + if (guid_already_parsed_for_legacy(device, &gblock[i].guid)) continue; diff --git a/drivers/pmdomain/qcom/rpmpd.c b/drivers/pmdomain/qcom/rpmpd.c index 07590a3ef19c..7796d65f96e8 100644 --- a/drivers/pmdomain/qcom/rpmpd.c +++ b/drivers/pmdomain/qcom/rpmpd.c @@ -1044,6 +1044,7 @@ static int rpmpd_probe(struct platform_device *pdev) rpmpds[i]->pd.power_off = rpmpd_power_off; rpmpds[i]->pd.power_on = rpmpd_power_on; rpmpds[i]->pd.set_performance_state = rpmpd_set_performance; + rpmpds[i]->pd.flags = GENPD_FLAG_ACTIVE_WAKEUP; pm_genpd_init(&rpmpds[i]->pd, NULL, true); data->domains[i] = &rpmpds[i]->pd; diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 2ff7717530bf..8a2f18fa3faf 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -24,7 +24,6 @@ #include <linux/of.h> #include <linux/pm_qos.h> #include <linux/slab.h> -#include <linux/units.h> struct dtpm_cpu { struct dtpm dtpm; @@ -104,8 +103,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) if (pd->table[i].frequency < freq) continue; - return scale_pd_power_uw(pd_mask, pd->table[i].power * - MICROWATT_PER_MILLIWATT); + return scale_pd_power_uw(pd_mask, pd->table[i].power); } return 0; @@ -122,11 +120,9 @@ static int update_pd_power_uw(struct dtpm *dtpm) nr_cpus = cpumask_weight(&cpus); dtpm->power_min = em->table[0].power; - dtpm->power_min *= MICROWATT_PER_MILLIWATT; dtpm->power_min *= nr_cpus; dtpm->power_max = em->table[em->nr_perf_states - 1].power; - dtpm->power_max *= MICROWATT_PER_MILLIWATT; dtpm->power_max *= nr_cpus; return 0; diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c index 91276761a31d..612c3b59dd5b 100644 --- a/drivers/powercap/dtpm_devfreq.c +++ b/drivers/powercap/dtpm_devfreq.c @@ -39,10 +39,8 @@ static int update_pd_power_uw(struct dtpm *dtpm) struct em_perf_domain *pd = em_pd_get(dev); dtpm->power_min = pd->table[0].power; - dtpm->power_min *= MICROWATT_PER_MILLIWATT; dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; - dtpm->power_max *= MICROWATT_PER_MILLIWATT; return 0; } @@ -54,13 +52,10 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) struct device *dev = devfreq->dev.parent; struct em_perf_domain *pd = em_pd_get(dev); unsigned long freq; - u64 power; int i; for (i = 0; i < pd->nr_perf_states; i++) { - - power = pd->table[i].power * MICROWATT_PER_MILLIWATT; - if (power > power_limit) + if (pd->table[i].power > power_limit) break; } @@ -68,7 +63,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); - power_limit = pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; + power_limit = pd->table[i - 1].power; return power_limit; } @@ -110,7 +105,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) if (pd->table[i].frequency < freq) continue; - power = pd->table[i].power * MICROWATT_PER_MILLIWATT; + power = pd->table[i].power; power *= status.busy_time; power >>= 10; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fa00dd503cbf..542a4bbb21bc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3949,8 +3949,15 @@ static int sd_resume(struct device *dev, bool runtime) static int sd_resume_system(struct device *dev) { - if (pm_runtime_suspended(dev)) + if (pm_runtime_suspended(dev)) { + struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_device *sdp = sdkp ? sdkp->device : NULL; + + if (sdp && sdp->force_runtime_start_on_system_start) + pm_request_resume(dev); + return 0; + } return sd_resume(dev, false); } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8b1031fb0a44..bce0d2a9a7f3 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6444,11 +6444,24 @@ static bool ufshcd_abort_one(struct request *rq, void *priv) struct scsi_device *sdev = cmd->device; struct Scsi_Host *shost = sdev->host; struct ufs_hba *hba = shost_priv(shost); + struct ufshcd_lrb *lrbp = &hba->lrb[tag]; + struct ufs_hw_queue *hwq; + unsigned long flags; *ret = ufshcd_try_to_abort_task(hba, tag); dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, *ret ? "failed" : "succeeded"); + + /* Release cmd in MCQ mode if abort succeeds */ + if (is_mcq_enabled(hba) && (*ret == 0)) { + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + spin_lock_irqsave(&hwq->cq_lock, flags); + if (ufshcd_cmd_inflight(lrbp->cmd)) + ufshcd_release_scsi_cmd(hba, lrbp); + spin_unlock_irqrestore(&hwq->cq_lock, flags); + } + return *ret == 0; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 12ac3397f39b..26ba7da6b410 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2815,13 +2815,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) struct mlx5_control_vq *cvq = &mvdev->cvq; int err = 0; - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { + u16 idx = cvq->vring.last_avail_idx; + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, MLX5_CVQ_MAX_ENT, false, (struct vring_desc *)(uintptr_t)cvq->desc_addr, (struct vring_avail *)(uintptr_t)cvq->driver_addr, (struct vring_used *)(uintptr_t)cvq->device_addr); + if (!err) + cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; + } return err; } diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c index 9b04aad6ec35..c328e694f6e7 100644 --- a/drivers/vdpa/pds/debugfs.c +++ b/drivers/vdpa/pds/debugfs.c @@ -261,7 +261,7 @@ void pds_vdpa_debugfs_add_vdpadev(struct pds_vdpa_aux *vdpa_aux) debugfs_create_file("config", 0400, vdpa_aux->dentry, vdpa_aux->pdsv, &config_fops); for (i = 0; i < vdpa_aux->pdsv->num_vqs; i++) { - char name[8]; + char name[16]; snprintf(name, sizeof(name), "vq%02d", i); debugfs_create_file(name, 0400, vdpa_aux->dentry, diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c index 52b2449182ad..25c0fe5ec3d5 100644 --- a/drivers/vdpa/pds/vdpa_dev.c +++ b/drivers/vdpa/pds/vdpa_dev.c @@ -318,9 +318,8 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur return -EOPNOTSUPP; } - pdsv->negotiated_features = nego_features; - driver_features = pds_vdpa_get_driver_features(vdpa_dev); + pdsv->negotiated_features = nego_features; dev_dbg(dev, "%s: %#llx => %#llx\n", __func__, driver_features, nego_features); @@ -461,8 +460,10 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) pds_vdpa_cmd_set_status(pdsv, status); - /* Note: still working with FW on the need for this reset cmd */ if (status == 0) { + struct vdpa_callback null_cb = { }; + + pds_vdpa_set_config_cb(vdpa_dev, &null_cb); pds_vdpa_cmd_reset(pdsv); for (i = 0; i < pdsv->num_vqs; i++) { diff --git a/drivers/vfio/pci/pds/pci_drv.c b/drivers/vfio/pci/pds/pci_drv.c index dd8c00c895a2..a34dda516629 100644 --- a/drivers/vfio/pci/pds/pci_drv.c +++ b/drivers/vfio/pci/pds/pci_drv.c @@ -55,10 +55,10 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio) * VFIO_DEVICE_STATE_RUNNING. */ if (deferred_reset_needed) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } } diff --git a/drivers/vfio/pci/pds/vfio_dev.c b/drivers/vfio/pci/pds/vfio_dev.c index 649b18ee394b..4c351c59d05a 100644 --- a/drivers/vfio/pci/pds/vfio_dev.c +++ b/drivers/vfio/pci/pds/vfio_dev.c @@ -29,7 +29,7 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev) void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio) { again: - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); if (pds_vfio->deferred_reset) { pds_vfio->deferred_reset = false; if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { @@ -39,23 +39,23 @@ again: } pds_vfio->state = pds_vfio->deferred_reset_state; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); goto again; } mutex_unlock(&pds_vfio->state_mutex); - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); } void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio) { - spin_lock(&pds_vfio->reset_lock); + mutex_lock(&pds_vfio->reset_mutex); pds_vfio->deferred_reset = true; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; if (!mutex_trylock(&pds_vfio->state_mutex)) { - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); return; } - spin_unlock(&pds_vfio->reset_lock); + mutex_unlock(&pds_vfio->reset_mutex); pds_vfio_state_mutex_unlock(pds_vfio); } @@ -155,6 +155,9 @@ static int pds_vfio_init_device(struct vfio_device *vdev) pds_vfio->vf_id = vf_id; + mutex_init(&pds_vfio->state_mutex); + mutex_init(&pds_vfio->reset_mutex); + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; vdev->mig_ops = &pds_vfio_lm_ops; vdev->log_ops = &pds_vfio_log_ops; @@ -168,6 +171,17 @@ static int pds_vfio_init_device(struct vfio_device *vdev) return 0; } +static void pds_vfio_release_device(struct vfio_device *vdev) +{ + struct pds_vfio_pci_device *pds_vfio = + container_of(vdev, struct pds_vfio_pci_device, + vfio_coredev.vdev); + + mutex_destroy(&pds_vfio->state_mutex); + mutex_destroy(&pds_vfio->reset_mutex); + vfio_pci_core_release_dev(vdev); +} + static int pds_vfio_open_device(struct vfio_device *vdev) { struct pds_vfio_pci_device *pds_vfio = @@ -179,7 +193,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev) if (err) return err; - mutex_init(&pds_vfio->state_mutex); pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING; @@ -199,14 +212,13 @@ static void pds_vfio_close_device(struct vfio_device *vdev) pds_vfio_put_save_file(pds_vfio); pds_vfio_dirty_disable(pds_vfio, true); mutex_unlock(&pds_vfio->state_mutex); - mutex_destroy(&pds_vfio->state_mutex); vfio_pci_core_close_device(vdev); } static const struct vfio_device_ops pds_vfio_ops = { .name = "pds-vfio", .init = pds_vfio_init_device, - .release = vfio_pci_core_release_dev, + .release = pds_vfio_release_device, .open_device = pds_vfio_open_device, .close_device = pds_vfio_close_device, .ioctl = vfio_pci_core_ioctl, diff --git a/drivers/vfio/pci/pds/vfio_dev.h b/drivers/vfio/pci/pds/vfio_dev.h index b8f2d667608f..e7b01080a1ec 100644 --- a/drivers/vfio/pci/pds/vfio_dev.h +++ b/drivers/vfio/pci/pds/vfio_dev.h @@ -18,7 +18,7 @@ struct pds_vfio_pci_device { struct pds_vfio_dirty dirty; struct mutex state_mutex; /* protect migration state */ enum vfio_device_mig_state state; - spinlock_t reset_lock; /* protect reset_done flow */ + struct mutex reset_mutex; /* protect reset_done flow */ u8 deferred_reset; enum vfio_device_mig_state deferred_reset_state; struct notifier_block nb; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index f5edb9e27e3c..b8cfea7812d6 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1110,8 +1110,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, for (i = 0; i < nvec; i++) { info = xen_irq_init(irq + i); - if (!info) + if (!info) { + ret = -ENOMEM; goto error_irq; + } irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name); diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index c08c2c7d6fbb..fddc7be58022 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -33,6 +33,18 @@ config BCACHEFS_QUOTA depends on BCACHEFS_FS select QUOTACTL +config BCACHEFS_ERASURE_CODING + bool "bcachefs erasure coding (RAID5/6) support (EXPERIMENTAL)" + depends on BCACHEFS_FS + select QUOTACTL + help + This enables the "erasure_code" filesysystem and inode option, which + organizes data into reed-solomon stripes instead of ordinary + replication. + + WARNING: this feature is still undergoing on disk format changes, and + should only be enabled for testing purposes. + config BCACHEFS_POSIX_ACL bool "bcachefs POSIX ACL support" depends on BCACHEFS_FS diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index b85c7765272f..1ba0eeb7552a 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1297,6 +1297,30 @@ out: return wp; } +static noinline void +deallocate_extra_replicas(struct bch_fs *c, + struct open_buckets *ptrs, + struct open_buckets *ptrs_no_use, + unsigned extra_replicas) +{ + struct open_buckets ptrs2 = { 0 }; + struct open_bucket *ob; + unsigned i; + + open_bucket_for_each(c, ptrs, ob, i) { + unsigned d = bch_dev_bkey_exists(c, ob->dev)->mi.durability; + + if (d && d <= extra_replicas) { + extra_replicas -= d; + ob_push(c, ptrs_no_use, ob); + } else { + ob_push(c, &ptrs2, ob); + } + } + + *ptrs = ptrs2; +} + /* * Get us an open_bucket we can allocate from, return with it locked: */ @@ -1321,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, int ret; int i; + if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) + erasure_code = false; + BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); BUG_ON(!nr_replicas || !nr_replicas_required); @@ -1382,6 +1409,9 @@ alloc_done: if (ret) goto err; + if (nr_effective > nr_replicas) + deallocate_extra_replicas(c, &ptrs, &wp->ptrs, nr_effective - nr_replicas); + /* Free buckets we didn't use: */ open_bucket_for_each(c, &wp->ptrs, ob, i) open_bucket_free_unused(c, ob); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 403aa3389fcc..dfa22f9d9a1d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -638,6 +638,8 @@ struct journal_keys { size_t gap; size_t nr; size_t size; + atomic_t ref; + bool initial_ref_held; }; struct btree_trans_buf { @@ -929,7 +931,7 @@ struct bch_fs { mempool_t compression_bounce[2]; mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; mempool_t decompress_workspace; - ZSTD_parameters zstd_params; + size_t zstd_workspace_size; struct crypto_shash *sha256; struct crypto_sync_skcipher *chacha20; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 0a750953ff92..1ab1f08d763b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -151,7 +151,11 @@ struct bpos { #else #error edit for your odd byteorder. #endif -} __packed __aligned(4); +} __packed +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +__aligned(4) +#endif +; #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) @@ -1528,7 +1532,7 @@ struct bch_sb_field_disk_groups { x(move_extent_write, 36) \ x(move_extent_finish, 37) \ x(move_extent_fail, 38) \ - x(move_extent_alloc_mem_fail, 39) \ + x(move_extent_start_fail, 39) \ x(copygc, 40) \ x(copygc_wait, 41) \ x(gc_gens_end, 42) \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0b5d09c8475d..30ab78a24517 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ ca = bch_dev_bkey_exists(c, k.k->p.inode); g = gc_bucket(ca, k.k->p.offset); @@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) g->stripe = a->stripe; g->stripe_redundancy = a->stripe_redundancy; } - } - bch2_trans_iter_exit(trans, &iter); + + 0; + })); err: bch2_trans_put(trans); if (ret) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 37d896edb06e..57c20390e10e 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1358,10 +1358,9 @@ static bool btree_node_has_extra_bsets(struct bch_fs *c, unsigned offset, void * return offset; } -static void btree_node_read_all_replicas_done(struct closure *cl) +static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) { - struct btree_node_read_all *ra = - container_of(cl, struct btree_node_read_all, cl); + closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; struct printbuf buf = PRINTBUF; @@ -1567,7 +1566,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool if (sync) { closure_sync(&ra->cl); - btree_node_read_all_replicas_done(&ra->cl); + btree_node_read_all_replicas_done(&ra->cl.work); } else { continue_at(&ra->cl, btree_node_read_all_replicas_done, c->io_complete_wq); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6fa90bcd7016..8e0fe65f6101 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2981,7 +2981,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx) trans->fn_idx = fn_idx; trans->locking_wait.task = current; trans->journal_replay_not_finished = - !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags); + unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) && + atomic_inc_not_zero(&c->journal_keys.ref); closure_init_stack(&trans->ref); s = btree_trans_stats(trans); @@ -3098,6 +3099,9 @@ void bch2_trans_put(struct btree_trans *trans) kfree(trans->fs_usage_deltas); } + if (unlikely(trans->journal_replay_not_finished)) + bch2_journal_keys_put(c); + if (trans->mem_bytes == BTREE_TRANS_MEM_MAX) mempool_free(trans->mem, &c->btree_trans_mem_pool); else diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 58a981bcf3aa..ec52f50d249d 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -80,6 +80,8 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree struct journal_keys *keys = &c->journal_keys; unsigned iters = 0; struct journal_key *k; + + BUG_ON(*idx > keys->nr); search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); @@ -189,10 +191,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, /* Since @keys was full, there was no gap: */ memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr); kvfree(keys->d); - *keys = new_keys; + keys->d = new_keys.d; + keys->nr = new_keys.nr; + keys->size = new_keys.size; /* And now the gap is at the end: */ - keys->gap = keys->nr; + keys->gap = keys->nr; } journal_iters_move_gap(c, keys->gap, idx); @@ -415,10 +419,16 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) cmp_int(l->journal_offset, r->journal_offset); } -void bch2_journal_keys_free(struct journal_keys *keys) +void bch2_journal_keys_put(struct bch_fs *c) { + struct journal_keys *keys = &c->journal_keys; struct journal_key *i; + BUG_ON(atomic_read(&keys->ref) <= 0); + + if (!atomic_dec_and_test(&keys->ref)) + return; + move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr); keys->gap = keys->nr; @@ -429,6 +439,8 @@ void bch2_journal_keys_free(struct journal_keys *keys) kvfree(keys->d); keys->d = NULL; keys->nr = keys->gap = keys->size = 0; + + bch2_journal_entries_free(c); } static void __journal_keys_sort(struct journal_keys *keys) diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h index 5d64e7e22f26..8ca4c100b2e3 100644 --- a/fs/bcachefs/btree_journal_iter.h +++ b/fs/bcachefs/btree_journal_iter.h @@ -49,7 +49,15 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *, struct bch_fs *, struct btree *); -void bch2_journal_keys_free(struct journal_keys *); +void bch2_journal_keys_put(struct bch_fs *); + +static inline void bch2_journal_keys_put_initial(struct bch_fs *c) +{ + if (c->journal_keys.initial_ref_held) + bch2_journal_keys_put(c); + c->journal_keys.initial_ref_held = false; +} + void bch2_journal_entries_free(struct bch_fs *); int bch2_journal_keys_sort(struct bch_fs *); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 76f27bc9fa24..6697417273aa 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -778,9 +778,9 @@ static void btree_interior_update_work(struct work_struct *work) } } -static void btree_update_set_nodes_written(struct closure *cl) +static CLOSURE_CALLBACK(btree_update_set_nodes_written) { - struct btree_update *as = container_of(cl, struct btree_update, cl); + closure_type(as, struct btree_update, cl); struct bch_fs *c = as->c; mutex_lock(&c->btree_interior_update_lock); @@ -1071,8 +1071,12 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, break; } + /* + * Always check for space for two keys, even if we won't have to + * split at prior level - it might have been a merge instead: + */ if (bch2_btree_node_insert_fits(c, path->l[update_level].b, - BKEY_BTREE_PTR_U64s_MAX * (1 + split))) + BKEY_BTREE_PTR_U64s_MAX * 2)) break; split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c); @@ -2266,6 +2270,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); + struct bch_extent_ptr *ptr; + bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, + !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, commit_flags, skip_triggers); out: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 58d8c6ffd955..5a91d3189fcf 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -854,8 +854,12 @@ static int __mark_pointer(struct btree_trans *trans, return ret; *dst_sectors += sectors; - *bucket_data_type = *dirty_sectors || *cached_sectors - ? ptr_data_type : 0; + + if (!*dirty_sectors && !*cached_sectors) + *bucket_data_type = 0; + else if (*bucket_data_type != BCH_DATA_stripe) + *bucket_data_type = ptr_data_type; + return 0; } @@ -2091,8 +2095,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; - bch2_copygc_stop(c); - if (resize) { down_write(&c->gc_lock); down_write(&ca->bucket_lock); diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index a8b148ec2a2b..51af8ea230ed 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c, */ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel()); ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max); - ZSTD_CCtx *ctx = zstd_init_cctx(workspace, - zstd_cctx_workspace_bound(¶ms.cParams)); + ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size); /* * ZSTD requires that when we decompress we pass in the exact @@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c, size_t len = zstd_compress_cctx(ctx, dst + 4, dst_len - 4 - 7, src, src_len, - &c->zstd_params); + ¶ms); if (zstd_is_error(len)) return 0; @@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) size_t decompress_workspace_size = 0; ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), c->opts.encoded_extent_max); + + /* + * ZSTD is lying: if we allocate the size of the workspace it says it + * requires, it returns memory allocation errors + */ + c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams); + struct { unsigned feature; enum bch_compression_type type; @@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, - zstd_cctx_workspace_bound(¶ms.cParams), + c->zstd_workspace_size, zstd_dctx_workspace_bound() }, }, *i; bool have_compressed = false; - c->zstd_params = params; - for (i = compression_types; i < compression_types + ARRAY_SIZE(compression_types); i++) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5ed66202c226..71aa5e59787b 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update) bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } -void bch2_update_unwritten_extent(struct btree_trans *trans, +static void bch2_update_unwritten_extent(struct btree_trans *trans, struct data_update *update) { struct bch_fs *c = update->op.c; @@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans, } } +int bch2_extent_drop_ptrs(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct data_update_opts data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_i *n; + int ret; + + n = bch2_bkey_make_mut_noupdate(trans, k); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + while (data_opts.kill_ptrs) { + unsigned i = 0, drop = __fls(data_opts.kill_ptrs); + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); + data_opts.kill_ptrs ^= 1U << drop; + } + + /* + * If the new extent no longer has any pointers, bch2_extent_normalize() + * will do the appropriate thing with it (turning it into a + * KEY_TYPE_error key, or just a discard if it was a cached extent) + */ + bch2_extent_normalize(c, bkey_i_to_s(n)); + + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&n->k)) + n->k.size = 0; + + return bch2_trans_relock(trans) ?: + bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); +} + int bch2_data_update_init(struct btree_trans *trans, + struct btree_iter *iter, struct moving_context *ctxt, struct data_update *m, struct write_point_specifier wp, @@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans, const struct bch_extent_ptr *ptr; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; unsigned ptrs_locked = 0; - int ret; + int ret = 0; bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); @@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref); + unsigned durability_have = 0, durability_removing = 0; + i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bool locked; @@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans, reserve_sectors += k.k->size; m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached) { + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!p.ptr.cached && + !((1U << i) & m->data_opts.kill_ptrs)) { bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); } /* @@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans, i++; } + /* + * If current extent durability is less than io_opts.data_replicas, + * we're not trying to rereplicate the extent up to data_replicas here - + * unless extra_replicas was specified + * + * Increasing replication is an explicit operation triggered by + * rereplicate, currently, so that users don't get an unexpected -ENOSPC + */ + if (durability_have >= io_opts.data_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); + goto done; + } + + m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) + + m->data_opts.extra_replicas; + m->op.nr_replicas_required = m->op.nr_replicas; + + BUG_ON(!m->op.nr_replicas); + if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas @@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans, goto err; } - m->op.nr_replicas += m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; - - BUG_ON(!m->op.nr_replicas); + if (bkey_extent_is_unwritten(k)) { + bch2_update_unwritten_extent(trans, m); + goto done; + } - /* Special handling required: */ - if (bkey_extent_is_unwritten(k)) - return -BCH_ERR_unwritten_extent_update; return 0; err: i = 0; @@ -560,6 +629,9 @@ err: bch2_bkey_buf_exit(&m->k, c); bch2_bio_free_pages_pool(c, &m->op.wbio.bio); return ret; +done: + bch2_data_update_exit(m); + return ret ?: -BCH_ERR_data_update_done; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 9dc17b9d8379..991095bbd469 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, struct bch_extent_crc_unpacked); +int bch2_extent_drop_ptrs(struct btree_trans *, + struct btree_iter *, + struct bkey_s_c, + struct data_update_opts); + void bch2_data_update_exit(struct data_update *); -void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *); -int bch2_data_update_init(struct btree_trans *, struct moving_context *, +int bch2_data_update_init(struct btree_trans *, struct btree_iter *, + struct moving_context *, struct data_update *, struct write_point_specifier, struct bch_io_opts, struct data_update_opts, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 68a1a96bb7ca..ae7910bf2228 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -162,7 +162,7 @@ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ x(0, restart_recovery) \ - x(0, unwritten_extent_update) \ + x(0, data_update_done) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ x(EINVAL, mismatched_block_size) \ @@ -210,6 +210,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_members) \ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \ + x(BCH_ERR_invalid_sb, invalid_replicas_entry) \ x(BCH_ERR_invalid_sb, invalid_sb_journal) \ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index a864de231b69..f6c92df55270 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) return replicas; } -unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p) { - struct bch_dev *ca; - if (p->ptr.cached) return 0; - ca = bch_dev_bkey_exists(c, p->ptr.dev); - - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return p->has_ec + ? p->ec.redundancy + 1 + : ca->mi.durability; } -unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) { - struct bch_dev *ca; + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); - if (p->ptr.cached) - return 0; + return __extent_ptr_durability(ca, p); +} - ca = bch_dev_bkey_exists(c, p->ptr.dev); +unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); if (ca->mi.state == BCH_MEMBER_STATE_failed) return 0; - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return __extent_ptr_durability(ca, p); } unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 5b42a76c4796..9a479e4de6b3 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -35,9 +35,9 @@ static void bio_check_or_release(struct bio *bio, bool check_dirty) } } -static void bch2_dio_read_complete(struct closure *cl) +static CLOSURE_CALLBACK(bch2_dio_read_complete) { - struct dio_read *dio = container_of(cl, struct dio_read, cl); + closure_type(dio, struct dio_read, cl); dio->req->ki_complete(dio->req, dio->ret); bio_check_or_release(&dio->rbio.bio, dio->should_dirty); @@ -325,9 +325,9 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) return 0; } -static void bch2_dio_write_flush_done(struct closure *cl) +static CLOSURE_CALLBACK(bch2_dio_write_flush_done) { - struct dio_write *dio = container_of(cl, struct dio_write, op.cl); + closure_type(dio, struct dio_write, op.cl); struct bch_fs *c = dio->op.c; closure_debug_destroy(cl); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 8ef817304e4a..4d51be813509 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1667,8 +1667,7 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) if (!first) seq_putc(seq, ':'); first = false; - seq_puts(seq, "/dev/"); - seq_puts(seq, ca->name); + seq_puts(seq, ca->disk_sb.sb_name); } return 0; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index a56ed553dc15..36763865facd 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, bio = &op->write.op.wbio.bio; bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - ret = bch2_data_update_init(trans, NULL, &op->write, + ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), opts, (struct data_update_opts) { diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d704a8f829c8..8ede46b1e354 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -580,9 +580,9 @@ static inline void wp_update_state(struct write_point *wp, bool running) __wp_update_state(wp, state); } -static void bch2_write_index(struct closure *cl) +static CLOSURE_CALLBACK(bch2_write_index) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); struct write_point *wp = op->wp; struct workqueue_struct *wq = index_update_wq(op); unsigned long flags; @@ -1208,9 +1208,9 @@ static void __bch2_nocow_write_done(struct bch_write_op *op) bch2_nocow_write_convert_unwritten(op); } -static void bch2_nocow_write_done(struct closure *cl) +static CLOSURE_CALLBACK(bch2_nocow_write_done) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); __bch2_nocow_write_done(op); bch2_write_done(cl); @@ -1363,7 +1363,7 @@ err: op->insert_keys.top = op->insert_keys.keys; } else if (op->flags & BCH_WRITE_SYNC) { closure_sync(&op->cl); - bch2_nocow_write_done(&op->cl); + bch2_nocow_write_done(&op->cl.work); } else { /* * XXX @@ -1566,9 +1566,9 @@ err: * If op->discard is true, instead of inserting the data it invalidates the * region of the cache represented by op->bio and op->inode. */ -void bch2_write(struct closure *cl) +CLOSURE_CALLBACK(bch2_write) { - struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); + closure_type(op, struct bch_write_op, cl); struct bio *bio = &op->wbio.bio; struct bch_fs *c = op->c; unsigned data_len; diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index 9323167229ee..6c276a48f95d 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -90,8 +90,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c, op->devs_need_flush = NULL; } -void bch2_write(struct closure *); - +CLOSURE_CALLBACK(bch2_write); void bch2_write_point_do_index_updates(struct work_struct *); static inline struct bch_write_bio *wbio_init(struct bio *bio) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 23a9b7845d11..489b34046e78 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -321,6 +321,8 @@ static int journal_entry_open(struct journal *j) atomic64_inc(&j->seq); journal_pin_list_init(fifo_push_ref(&j->pin), 1); + BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); + BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf); bkey_extent_init(&buf->key); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index c85d01cf4948..4c513fca5ef2 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -136,9 +136,7 @@ static inline u64 journal_last_seq(struct journal *j) static inline u64 journal_cur_seq(struct journal *j) { - EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); - - return j->pin.back - 1; + return atomic64_read(&j->seq); } static inline u64 journal_last_unwritten_seq(struct journal *j) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 786a09285509..0f17fc5f8d68 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -547,6 +547,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + struct printbuf err = PRINTBUF; int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -555,10 +556,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - return ret; + goto out; } + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), + c, version, jset, entry, + journal_entry_data_usage_bad_size, + "invalid journal entry usage: %s", err.buf)) { + journal_entry_null_range(entry, vstruct_next(entry)); + goto out; + } +out: fsck_err: + printbuf_exit(&err); return ret; } @@ -1025,10 +1035,9 @@ next_block: return 0; } -static void bch2_journal_read_device(struct closure *cl) +static CLOSURE_CALLBACK(bch2_journal_read_device) { - struct journal_device *ja = - container_of(cl, struct journal_device, read); + closure_type(ja, struct journal_device, read); struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct bch_fs *c = ca->fs; struct journal_list *jlist = @@ -1520,9 +1529,9 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); } -static void journal_write_done(struct closure *cl) +static CLOSURE_CALLBACK(journal_write_done) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_last_unwritten_buf(j); struct bch_replicas_padded replicas; @@ -1638,9 +1647,9 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static void do_journal_write(struct closure *cl) +static CLOSURE_CALLBACK(do_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); @@ -1850,9 +1859,9 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf * return 0; } -void bch2_journal_write(struct closure *cl) +CLOSURE_CALLBACK(bch2_journal_write) { - struct journal *j = container_of(cl, struct journal, io); + closure_type(j, struct journal, io); struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_buf *w = journal_last_unwritten_buf(j); diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index a88d097b13f1..c035e7c108e1 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -60,6 +60,6 @@ void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); -void bch2_journal_write(struct closure *); +CLOSURE_CALLBACK(bch2_journal_write); #endif /* _BCACHEFS_JOURNAL_IO_H */ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index ab749bf2fcbc..54830ee0ed88 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -49,17 +49,6 @@ static void trace_move_extent_read2(struct bch_fs *c, struct bkey_s_c k) } } -static void trace_move_extent_alloc_mem_fail2(struct bch_fs *c, struct bkey_s_c k) -{ - if (trace_move_extent_alloc_mem_fail_enabled()) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - trace_move_extent_alloc_mem_fail(c, buf.buf); - printbuf_exit(&buf); - } -} - struct moving_io { struct list_head read_list; struct list_head io_list; @@ -163,12 +152,18 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) atomic_read(&ctxt->write_sectors) != sectors_pending); } +static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) +{ + move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); + bch2_trans_unlock_long(ctxt->trans); + closure_sync(&ctxt->cl); +} + void bch2_moving_ctxt_exit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; - move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); - closure_sync(&ctxt->cl); + bch2_moving_ctxt_flush_all(ctxt); EBUG_ON(atomic_read(&ctxt->write_sectors)); EBUG_ON(atomic_read(&ctxt->write_ios)); @@ -223,49 +218,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name) scnprintf(stats->name, sizeof(stats->name), "%s", name); } -static int bch2_extent_drop_ptrs(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - struct data_update_opts data_opts) -{ - struct bch_fs *c = trans->c; - struct bkey_i *n; - int ret; - - n = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; - - while (data_opts.kill_ptrs) { - unsigned i = 0, drop = __fls(data_opts.kill_ptrs); - struct bch_extent_ptr *ptr; - - bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); - data_opts.kill_ptrs ^= 1U << drop; - } - - /* - * If the new extent no longer has any pointers, bch2_extent_normalize() - * will do the appropriate thing with it (turning it into a - * KEY_TYPE_error key, or just a discard if it was a cached extent) - */ - bch2_extent_normalize(c, bkey_i_to_s(n)); - - /* - * Since we're not inserting through an extent iterator - * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), - * we aren't using the extent overwrite path to delete, we're - * just using the normal key deletion path: - */ - if (bkey_deleted(&n->k)) - n->k.size = 0; - - return bch2_trans_relock(trans) ?: - bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); -} - int bch2_move_extent(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct btree_iter *iter, @@ -335,19 +287,11 @@ int bch2_move_extent(struct moving_context *ctxt, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; - ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, io_opts, data_opts, iter->btree_id, k); - if (ret && ret != -BCH_ERR_unwritten_extent_update) + if (ret) goto err_free_pages; - if (ret == -BCH_ERR_unwritten_extent_update) { - bch2_update_unwritten_extent(trans, &io->write); - move_free(io); - return 0; - } - - BUG_ON(ret); - io->write.op.end_io = move_write_done; if (ctxt->rate) @@ -391,8 +335,23 @@ err_free_pages: err_free: kfree(io); err: - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]); - trace_move_extent_alloc_mem_fail2(c, k); + if (ret == -BCH_ERR_data_update_done) + return 0; + + if (bch2_err_matches(ret, EROFS) || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]); + if (trace_move_extent_start_fail_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, ": "); + prt_str(&buf, bch2_err_str(ret)); + trace_move_extent_start_fail(c, buf.buf); + printbuf_exit(&buf); + } return ret; } @@ -482,37 +441,30 @@ int bch2_move_get_io_opts_one(struct btree_trans *trans, int bch2_move_ratelimit(struct moving_context *ctxt) { struct bch_fs *c = ctxt->trans->c; + bool is_kthread = current->flags & PF_KTHREAD; u64 delay; - if (ctxt->wait_on_copygc && !c->copygc_running) { - bch2_trans_unlock_long(ctxt->trans); + if (ctxt->wait_on_copygc && c->copygc_running) { + bch2_moving_ctxt_flush_all(ctxt); wait_event_killable(c->copygc_running_wq, !c->copygc_running || - kthread_should_stop()); + (is_kthread && kthread_should_stop())); } do { delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; - - if (delay) { - if (delay > HZ / 10) - bch2_trans_unlock_long(ctxt->trans); - else - bch2_trans_unlock(ctxt->trans); - set_current_state(TASK_INTERRUPTIBLE); - } - - if ((current->flags & PF_KTHREAD) && kthread_should_stop()) { - __set_current_state(TASK_RUNNING); + if (is_kthread && kthread_should_stop()) return 1; - } if (delay) - schedule_timeout(delay); + move_ctxt_wait_event_timeout(ctxt, + freezing(current) || + (is_kthread && kthread_should_stop()), + delay); if (unlikely(freezing(current))) { - move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); + bch2_moving_ctxt_flush_all(ctxt); try_to_freeze(); } } while (delay); @@ -683,6 +635,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; + bool is_kthread = current->flags & PF_KTHREAD; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_iter iter; struct bkey_buf sk; @@ -728,6 +681,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } while (!(ret = bch2_move_ratelimit(ctxt))) { + if (is_kthread && kthread_should_stop()) + break; + bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 07cf9d42643b..0906aa2d1de2 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -38,6 +38,25 @@ struct moving_context { wait_queue_head_t wait; }; +#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \ +({ \ + int _ret = 0; \ + while (true) { \ + bool cond_finished = false; \ + bch2_moving_ctxt_do_pending_writes(_ctxt); \ + \ + if (_cond) \ + break; \ + bch2_trans_unlock_long((_ctxt)->trans); \ + _ret = __wait_event_timeout((_ctxt)->wait, \ + bch2_moving_ctxt_next_pending_write(_ctxt) || \ + (cond_finished = (_cond)), _timeout); \ + if (_ret || ( cond_finished)) \ + break; \ + } \ + _ret; \ +}) + #define move_ctxt_wait_event(_ctxt, _cond) \ do { \ bool cond_finished = false; \ diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0a0576326c5b..a84e79f79e5e 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -207,7 +207,7 @@ static int bch2_copygc(struct moving_context *ctxt, goto err; darray_for_each(buckets, i) { - if (unlikely(freezing(current))) + if (kthread_should_stop() || freezing(current)) break; f = move_bucket_in_flight_add(buckets_in_flight, *i); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9c30500ce920..770ced1c6285 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -167,6 +167,8 @@ static int bch2_journal_replay(struct bch_fs *c) goto err; } + BUG_ON(!atomic_read(&keys->ref)); + for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; @@ -188,6 +190,9 @@ static int bch2_journal_replay(struct bch_fs *c) } } + if (!c->opts.keep_journal) + bch2_journal_keys_put_initial(c); + replay_now_at(j, j->replay_journal_seq_end); j->replay_journal_seq = 0; @@ -909,10 +914,8 @@ out: bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && - test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) { - bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(c); - } + test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) + bch2_journal_keys_put_initial(c); kfree(clean); if (!ret && test_bit(BCH_FS_NEED_DELETE_DEAD_SNAPSHOTS, &c->flags)) { diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 1c3ae13bfced..2008fe8bf706 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } +int bch2_replicas_entry_validate(struct bch_replicas_entry *r, + struct bch_sb *sb, + struct printbuf *err) +{ + if (!r->nr_devs) { + prt_printf(err, "no devices in entry "); + goto bad; + } + + if (r->nr_required > 1 && + r->nr_required >= r->nr_devs) { + prt_printf(err, "bad nr_required in entry "); + goto bad; + } + + for (unsigned i = 0; i < r->nr_devs; i++) + if (!bch2_dev_exists(sb, r->devs[i])) { + prt_printf(err, "invalid device %u in entry ", r->devs[i]); + goto bad; + } + + return 0; +bad: + bch2_replicas_entry_to_text(err, r); + return -BCH_ERR_invalid_replicas_entry; +} + void bch2_cpu_replicas_to_text(struct printbuf *out, struct bch_replicas_cpu *r) { @@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry *e, } static struct bch_replicas_cpu -cpu_replicas_add_entry(struct bch_replicas_cpu *old, +cpu_replicas_add_entry(struct bch_fs *c, + struct bch_replicas_cpu *old, struct bch_replicas_entry *new_entry) { unsigned i; @@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old, replicas_entry_bytes(new_entry)), }; + for (i = 0; i < new_entry->nr_devs; i++) + BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); + BUG_ON(!new_entry->data_type); verify_replicas_entry(new_entry); @@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, if (c->replicas_gc.entries && !__replicas_has_entry(&c->replicas_gc, new_entry)) { - new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry); + new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, } if (!__replicas_has_entry(&c->replicas, new_entry)) { - new_r = cpu_replicas_add_entry(&c->replicas, new_entry); + new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c, if (idx < 0) { struct bch_replicas_cpu n; - n = cpu_replicas_add_entry(&c->replicas, r); + n = cpu_replicas_add_entry(c, &c->replicas, r); if (!n.entries) return -BCH_ERR_ENOMEM_cpu_replicas; @@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_sb *sb, struct printbuf *err) { - unsigned i, j; + unsigned i; sort_cmp_size(cpu_r->entries, cpu_r->nr, @@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry *e = cpu_replicas_entry(cpu_r, i); - if (e->data_type >= BCH_DATA_NR) { - prt_printf(err, "invalid data type in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (!e->nr_devs) { - prt_printf(err, "no devices in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (e->nr_required > 1 && - e->nr_required >= e->nr_devs) { - prt_printf(err, "bad nr_required in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - for (j = 0; j < e->nr_devs; j++) - if (!bch2_dev_exists(sb, e->devs[j])) { - prt_printf(err, "invalid device %u in entry ", e->devs[j]); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } + int ret = bch2_replicas_entry_validate(e, sb, err); + if (ret) + return ret; if (i + 1 < cpu_r->nr) { struct bch_replicas_entry *n = diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index 4887675a86f0..f70a642775d1 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -9,6 +9,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry *); void bch2_replicas_entry_to_text(struct printbuf *, struct bch_replicas_entry *); +int bch2_replicas_entry_validate(struct bch_replicas_entry *, + struct bch_sb *, struct printbuf *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); static inline struct bch_replicas_entry * diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index e9af77b384c7..5dac038f0851 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -959,7 +959,7 @@ static int bch2_snapshot_node_delete(struct btree_trans *trans, u32 id) parent_id, id)) goto err; - parent->v.children[i] = le32_to_cpu(child_id); + parent->v.children[i] = cpu_to_le32(child_id); normalize_snapshot_child_pointers(&parent->v); } diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index f4cad903f4d6..f3e12f7979d5 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -166,6 +166,7 @@ void bch2_free_super(struct bch_sb_handle *sb) if (!IS_ERR_OR_NULL(sb->bdev)) blkdev_put(sb->bdev, sb->holder); kfree(sb->holder); + kfree(sb->sb_name); kfree(sb->sb); memset(sb, 0, sizeof(*sb)); @@ -675,6 +676,10 @@ retry: if (!sb->holder) return -ENOMEM; + sb->sb_name = kstrdup(path, GFP_KERNEL); + if (!sb->sb_name) + return -ENOMEM; + #ifndef __KERNEL__ if (opt_get(*opts, direct_io) == false) sb->mode |= BLK_OPEN_BUFFERED; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 24672bb31cbe..f63474c5c5a2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -423,6 +423,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + set_bit(BCH_FS_RW, &c->flags); + set_bit(BCH_FS_WAS_RW, &c->flags); + +#ifndef BCH_WRITE_REF_DEBUG + percpu_ref_reinit(&c->writes); +#else + for (i = 0; i < BCH_WRITE_REF_NR; i++) { + BUG_ON(atomic_long_read(&c->writes[i])); + atomic_long_inc(&c->writes[i]); + } +#endif + ret = bch2_gc_thread_start(c); if (ret) { bch_err(c, "error starting gc thread"); @@ -439,24 +451,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) goto err; } -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_reinit(&c->writes); -#else - for (i = 0; i < BCH_WRITE_REF_NR; i++) { - BUG_ON(atomic_long_read(&c->writes[i])); - atomic_long_inc(&c->writes[i]); - } -#endif - set_bit(BCH_FS_RW, &c->flags); - set_bit(BCH_FS_WAS_RW, &c->flags); - bch2_do_discards(c); bch2_do_invalidates(c); bch2_do_stripe_deletes(c); bch2_do_pending_node_rewrites(c); return 0; err: - __bch2_fs_read_only(c); + if (test_bit(BCH_FS_RW, &c->flags)) + bch2_fs_read_only(c); + else + __bch2_fs_read_only(c); return ret; } @@ -504,8 +508,8 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[WRITE]); bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); - bch2_journal_keys_free(&c->journal_keys); - bch2_journal_entries_free(c); + bch2_journal_keys_put_initial(c); + BUG_ON(atomic_read(&c->journal_keys.ref)); bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); free_percpu(c->online_reserved); @@ -702,6 +706,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) init_rwsem(&c->gc_lock); mutex_init(&c->gc_gens_lock); + atomic_set(&c->journal_keys.ref, 1); + c->journal_keys.initial_ref_held = true; for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_init(&c->times[i]); diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h index 7dda4985b99f..9c1fd4ca2b10 100644 --- a/fs/bcachefs/super_types.h +++ b/fs/bcachefs/super_types.h @@ -5,6 +5,7 @@ struct bch_sb_handle { struct bch_sb *sb; struct block_device *bdev; + char *sb_name; struct bio *bio; void *holder; size_t buffer_size; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 7857671159b4..fd49b63562c3 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -754,9 +754,9 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k) +DEFINE_EVENT(bkey, move_extent_start_fail, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); TRACE_EVENT(move_data, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1039e5bf90af..4ddc36f4dbd4 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -258,7 +258,6 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out_unlock; } - iocb->ki_pos += status; ret += status; endbyte = pos + status - 1; ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 83ccc51a54d0..c0513fbb8a59 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,11 +882,13 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le32 FileAttributes; + struct_group(common_attributes, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 FileAttributes; + ); __le64 AllocationSize; __le64 EndOfFile; __le16 FileType; @@ -2264,11 +2266,13 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le32 Attributes; + struct_group(common_attributes, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le32 Attributes; + ); __u32 Pad1; __le64 AllocationSize; __le64 EndOfFile; /* size ie offset to first free byte in file */ diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index bad91ba6c3a9..9ee348e6d106 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1244,8 +1244,10 @@ openRetry: *oplock |= CIFS_CREATE_ACTION; if (buf) { - /* copy from CreationTime to Attributes */ - memcpy((char *)buf, (char *)&rsp->CreationTime, 36); + /* copy commonly used attributes */ + memcpy(&buf->common_attributes, + &rsp->common_attributes, + sizeof(buf->common_attributes)); /* the file_info buf is endian converted by caller */ buf->AllocationSize = rsp->AllocationSize; buf->EndOfFile = rsp->EndOfFile; diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 47f49be69ced..09c5c0f5c96e 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -790,7 +790,7 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, case 0: /* SMB1 symlink */ case IO_REPARSE_TAG_SYMLINK: case IO_REPARSE_TAG_NFS: - fattr->cf_mode = S_IFLNK; + fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode; fattr->cf_dtype = DT_LNK; break; default: @@ -865,6 +865,8 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr, out_reparse: if (S_ISLNK(fattr->cf_mode)) { + if (likely(data->symlink_target)) + fattr->cf_eof = strnlen(data->symlink_target, PATH_MAX); fattr->cf_symlink_target = data->symlink_target; data->symlink_target = NULL; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 82ab62fd0040..45931115f475 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3311,6 +3311,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; + unsigned long long new_size; long rc; unsigned int xid; __le64 eof; @@ -3341,10 +3342,15 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, /* * do we also need to change the size of the file? */ - if (keep_size == false && i_size_read(inode) < offset + len) { - eof = cpu_to_le64(offset + len); + new_size = offset + len; + if (keep_size == false && (unsigned long long)i_size_read(inode) < new_size) { + eof = cpu_to_le64(new_size); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, &eof); + if (rc >= 0) { + truncate_setsize(inode, new_size); + fscache_resize_cookie(cifs_inode_cookie(inode), new_size); + } } zero_range_exit: @@ -3739,6 +3745,9 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, if (rc < 0) goto out_2; + truncate_setsize(inode, old_eof + len); + fscache_resize_cookie(cifs_inode_cookie(inode), i_size_read(inode)); + rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len); if (rc < 0) goto out_2; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2eb29fa278c3..395e1230ddbc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3472,12 +3472,10 @@ __SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, } else { trace_smb3_close_done(xid, persistent_fid, tcon->tid, ses->Suid); - /* - * Note that have to subtract 4 since struct network_open_info - * has a final 4 byte pad that close response does not have - */ if (pbuf) - memcpy(pbuf, (char *)&rsp->CreationTime, sizeof(*pbuf) - 4); + memcpy(&pbuf->network_open_info, + &rsp->network_open_info, + sizeof(pbuf->network_open_info)); } atomic_dec(&tcon->num_remote_opens); diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 220994d0a0f7..db08194484e0 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -319,13 +319,15 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; - __le64 EndOfFile; - __le32 Attributes; + struct_group(network_open_info, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + __le64 AllocationSize; + __le64 EndOfFile; + __le32 Attributes; + ); __le32 Reserved; } __packed; /* level 34 Query also similar returned in close rsp and open rsp */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 8983f45f8430..9fbaaa387dcc 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -702,13 +702,16 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - __le64 CreationTime; - __le64 LastAccessTime; - __le64 LastWriteTime; - __le64 ChangeTime; - __le64 AllocationSize; /* Beginning of FILE_STANDARD_INFO equivalent */ - __le64 EndOfFile; - __le32 Attributes; + struct_group(network_open_info, + __le64 CreationTime; + __le64 LastAccessTime; + __le64 LastWriteTime; + __le64 ChangeTime; + /* Beginning of FILE_STANDARD_INFO equivalent */ + __le64 AllocationSize; + __le64 EndOfFile; + __le32 Attributes; + ); } __packed; diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index bdfafc4a7705..c7a0594bdab1 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ #ifndef __DRM_GPUVM_H__ #define __DRM_GPUVM_H__ diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index a7abf9f3e697..2a1d01e5b56b 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -60,12 +60,19 @@ enum dma_data_direction; struct drm_device; struct drm_gem_object; +struct drm_file; /* core prime functions */ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev, struct dma_buf_export_info *exp_info); void drm_gem_dmabuf_release(struct dma_buf *dma_buf); +int drm_gem_prime_fd_to_handle(struct drm_device *dev, + struct drm_file *file_priv, int prime_fd, uint32_t *handle); +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, uint32_t flags, + int *prime_fd); + /* helper functions for exporting */ int drm_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 54189e0e5f41..4db54e928b36 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,7 +15,6 @@ #include <linux/mod_devicetable.h> #include <linux/property.h> #include <linux/uuid.h> -#include <linux/fw_table.h> struct irq_domain; struct irq_domain_ops; @@ -25,22 +24,13 @@ struct irq_domain_ops; #endif #include <acpi/acpi.h> -#ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) -#define __init_or_acpilib -#define __initdata_or_acpilib -#else -#define EXPORT_SYMBOL_ACPI_LIB(x) -#define __init_or_acpilib __init -#define __initdata_or_acpilib __initdata -#endif - #ifdef CONFIG_ACPI #include <linux/list.h> #include <linux/dynamic_debug.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/fw_table.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> @@ -48,6 +38,16 @@ struct irq_domain_ops; #include <acpi/acpi_io.h> #include <asm/acpi.h> +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + static inline acpi_handle acpi_device_handle(struct acpi_device *adev) { return adev ? adev->handle : NULL; diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 446394f84606..6ad02ad9c7b4 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -70,6 +70,10 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; u32 max_freq; u32 min_freq; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d5c5e59ddbd2..b29ebd53417d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -49,9 +49,10 @@ struct block_device { bool bd_write_holder; bool bd_has_submit_bio; dev_t bd_dev; + struct inode *bd_inode; /* will die */ + atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ - struct inode * bd_inode; /* will die */ void * bd_claiming; void * bd_holder; const struct blk_holder_ops *bd_holder_ops; @@ -69,6 +70,7 @@ struct block_device { #ifdef CONFIG_FAIL_MAKE_REQUEST bool bd_make_it_fail; #endif + bool bd_ro_warned; /* * keep this out-of-line as it's both big and not needed in the fast * path diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eb447b0a9423..d5d8e2083610 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3179,6 +3179,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/include/linux/closure.h b/include/linux/closure.h index de7bb47d8a46..c554c6a08768 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -104,7 +104,7 @@ struct closure; struct closure_syncer; -typedef void (closure_fn) (struct closure *); +typedef void (closure_fn) (struct work_struct *); extern struct dentry *bcache_debug; struct closure_waitlist { @@ -254,7 +254,7 @@ static inline void closure_queue(struct closure *cl) INIT_WORK(&cl->work, cl->work.func); BUG_ON(!queue_work(wq, &cl->work)); } else - cl->fn(cl); + cl->fn(&cl->work); } /** @@ -309,6 +309,11 @@ static inline void closure_wake_up(struct closure_waitlist *list) __closure_wake_up(list); } +#define CLOSURE_CALLBACK(name) void name(struct work_struct *ws) +#define closure_type(name, type, member) \ + struct closure *cl = container_of(ws, struct closure, work); \ + type *name = container_of(cl, type, member) + /** * continue_at - jump to another function with barrier * diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ebe78bd3d121..b3772edca2e6 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -499,6 +499,21 @@ static inline bool dma_fence_is_later(struct dma_fence *f1, } /** + * dma_fence_is_later_or_same - return true if f1 is later or same as f2 + * @f1: the first fence from the same context + * @f2: the second fence from the same context + * + * Returns true if f1 is chronologically later than f2 or the same fence. Both + * fences must be from the same context, since a seqno is not re-used across + * contexts. + */ +static inline bool dma_fence_is_later_or_same(struct dma_fence *f1, + struct dma_fence *f2) +{ + return f1 == f2 || dma_fence_is_later(f1, f2); +} + +/** * dma_fence_later - return the chronologically later fence * @f1: the first fence from the same context * @f2: the second fence from the same context diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h index ff8fa58d5818..ca49947f0a77 100644 --- a/include/linux/fw_table.h +++ b/include/linux/fw_table.h @@ -25,9 +25,6 @@ struct acpi_subtable_proc { int count; }; -#include <linux/acpi.h> -#include <acpi/acpi.h> - union acpi_subtable_headers { struct acpi_subtable_header common; struct acpi_hmat_structure hmat; diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index d3009d56af0b..805bb635cdf5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -340,6 +340,9 @@ struct io_ring_ctx { struct list_head io_buffers_cache; + /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ec289c1016f5..6291aa7b079b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -845,6 +845,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv) dev->iommu->priv = priv; } +extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ab1da3142b06..0ff44d6633e3 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -139,7 +139,7 @@ static inline bool kprobe_ftrace(struct kprobe *p) * */ struct kretprobe_holder { - struct kretprobe *rp; + struct kretprobe __rcu *rp; struct objpool_head pool; }; @@ -197,10 +197,8 @@ extern int arch_trampoline_kprobe(struct kprobe *p); #ifdef CONFIG_KRETPROBE_ON_RETHOOK static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); + /* rethook::data is non-changed field, so that you can access it freely. */ + return (struct kretprobe *)ri->node.rethook->data; } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) { @@ -245,10 +243,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) { - RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), - "Kretprobe is accessed from instance under preemptive context"); - - return READ_ONCE(ri->rph->rp); + return rcu_dereference_check(ri->rph->rp, rcu_read_lock_any_held()); } static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 63e630276499..ab1c7deff118 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -114,6 +114,9 @@ /* Charging mode - 1=Barrel, 2=USB */ #define ASUS_WMI_DEVID_CHARGE_MODE 0x0012006C +/* MCU powersave mode */ +#define ASUS_WMI_DEVID_MCU_POWERSAVE 0x001200E2 + /* epu is connected? 1 == true */ #define ASUS_WMI_DEVID_EGPU_CONNECTED 0x00090018 /* egpu on/off */ diff --git a/include/linux/rethook.h b/include/linux/rethook.h index ce69b2b7bc35..ba60962805f6 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -28,7 +28,12 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, */ struct rethook { void *data; - rethook_handler_t handler; + /* + * To avoid sparse warnings, this uses a raw function pointer with + * __rcu, instead of rethook_handler_t. But this must be same as + * rethook_handler_t. + */ + void (__rcu *handler) (struct rethook_node *, void *, unsigned long, struct pt_regs *); struct objpool_head pool; struct rcu_head rcu; }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 0b4658a7eceb..dee5ad6e48c5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -175,6 +175,7 @@ struct stmmac_fpe_cfg { bool hs_enable; /* FPE handshake enable */ enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_safety_feature_cfg { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f55ec155f5b7..89b290d8c8dc 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -169,7 +169,7 @@ struct tcp_request_sock { #ifdef CONFIG_TCP_AO u8 ao_keyid; u8 ao_rcv_next; - u8 maclen; + bool used_tcp_ao; #endif }; @@ -180,14 +180,10 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) static inline bool tcp_rsk_used_ao(const struct request_sock *req) { - /* The real length of MAC is saved in the request socket, - * signing anything with zero-length makes no sense, so here is - * a little hack.. - */ #ifndef CONFIG_TCP_AO return false; #else - return tcp_rsk(req)->maclen != 0; + return tcp_rsk(req)->used_tcp_ao; #endif } diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 287e9d83fb8b..33a4c146dc19 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 454e9295970c..a65b2513f8cd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -289,16 +289,12 @@ void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes, /* * External user API */ -#if IS_ENABLED(CONFIG_VFIO_GROUP) struct iommu_group *vfio_file_iommu_group(struct file *file); + +#if IS_ENABLED(CONFIG_VFIO_GROUP) bool vfio_file_is_group(struct file *file); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #else -static inline struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - return NULL; -} - static inline bool vfio_file_is_group(struct file *file) { return false; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e18a4c0d69ee..c53244f20437 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -12,10 +12,12 @@ * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; + u8 cap_sys_admin:1; }; struct genl_split_ops; diff --git a/include/net/tcp.h b/include/net/tcp.h index 973555cb1d3f..f5ca4abaee8b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1518,17 +1518,22 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 4d900ef8dfc3..0f2dcc9e8d46 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -62,11 +62,17 @@ static inline int tcp_ao_maclen(const struct tcp_ao_key *key) return key->maclen; } +/* Use tcp_ao_len_aligned() for TCP header calculations */ static inline int tcp_ao_len(const struct tcp_ao_key *key) { return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); } +static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key) +{ + return round_up(tcp_ao_len(key), 4); +} + static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) { return key->digest_size; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 10480eb582b2..5ec1e71a09de 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -167,19 +167,25 @@ struct scsi_device { * power state for system suspend/resume (suspend to RAM and * hibernation) operations. */ - bool manage_system_start_stop; + unsigned manage_system_start_stop:1; /* * If true, let the high-level device driver (sd) manage the device * power state for runtime device suspand and resume operations. */ - bool manage_runtime_start_stop; + unsigned manage_runtime_start_stop:1; /* * If true, let the high-level device driver (sd) manage the device * power state for system shutdown (power off) operations. */ - bool manage_shutdown; + unsigned manage_shutdown:1; + + /* + * If set and if the device is runtime suspended, ask the high-level + * device driver (sd) to force a runtime resume of the device. + */ + unsigned force_runtime_start_on_system_start:1; unsigned removable:1; unsigned changed:1; /* Data invalid due to media change */ diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index 043f8ac65dbf..68e053fe7340 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -906,6 +906,6 @@ int cs35l41_init_boost(struct device *dev, struct regmap *regmap, bool cs35l41_safe_reset(struct regmap *regmap, enum cs35l41_boost_type b_type); int cs35l41_mdsync_up(struct regmap *regmap); int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l41_boost_type b_type, - int enable, bool firmware_running); + int enable, struct cs_dsp *dsp); #endif /* __CS35L41_H */ diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index 5c6c4269f7ef..2ec6f35cda32 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -27,7 +27,7 @@ union { \ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ - } + } ATTRS #ifdef __cplusplus /* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 3c19cccb1aec..8a8b07dfc444 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -273,7 +273,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) }; ktime_t timeout = KTIME_MAX; struct io_uring_sync_cancel_reg sc; - struct fd f = { }; + struct file *file = NULL; DEFINE_WAIT(wait); int ret, i; @@ -295,10 +295,10 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) /* we can grab a normal file descriptor upfront */ if ((cd.flags & IORING_ASYNC_CANCEL_FD) && !(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) { - f = fdget(sc.fd); - if (!f.file) + file = fget(sc.fd); + if (!file) return -EBADF; - cd.file = f.file; + cd.file = file; } ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); @@ -348,6 +348,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) if (ret == -ENOENT || ret > 0) ret = 0; out: - fdput(f); + if (file) + fput(file); return ret; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ed254076c723..aba5657d287e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -325,6 +325,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); + INIT_HLIST_HEAD(&ctx->io_buf_list); io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_rsrc_node)); io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, @@ -2666,7 +2667,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; } -static void io_mem_free(void *ptr) +void io_mem_free(void *ptr) { if (!ptr) return; @@ -2697,6 +2698,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, { struct page **page_array; unsigned int nr_pages; + void *page_addr; int ret, i; *npages = 0; @@ -2718,27 +2720,29 @@ err: io_pages_free(&page_array, ret > 0 ? ret : 0); return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT); } - /* - * Should be a single page. If the ring is small enough that we can - * use a normal page, that is fine. If we need multiple pages, then - * userspace should use a huge page. That's the only way to guarantee - * that we get contigious memory, outside of just being lucky or - * (currently) having low memory fragmentation. - */ - if (page_array[0] != page_array[ret - 1]) - goto err; - /* - * Can't support mapping user allocated ring memory on 32-bit archs - * where it could potentially reside in highmem. Just fail those with - * -EINVAL, just like we did on kernels that didn't support this - * feature. - */ + page_addr = page_address(page_array[0]); for (i = 0; i < nr_pages; i++) { - if (PageHighMem(page_array[i])) { - ret = -EINVAL; + ret = -EINVAL; + + /* + * Can't support mapping user allocated ring memory on 32-bit + * archs where it could potentially reside in highmem. Just + * fail those with -EINVAL, just like we did on kernels that + * didn't support this feature. + */ + if (PageHighMem(page_array[i])) goto err; - } + + /* + * No support for discontig pages for now, should either be a + * single normal page, or a huge page. Later on we can add + * support for remapping discontig pages, for now we will + * just fail them with EINVAL. + */ + if (page_address(page_array[i]) != page_addr) + goto err; + page_addr += PAGE_SIZE; } *pages = page_array; @@ -2775,7 +2779,7 @@ static void io_rings_free(struct io_ring_ctx *ctx) } } -static void *io_mem_alloc(size_t size) +void *io_mem_alloc(size_t size) { gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; void *ret; @@ -2947,6 +2951,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } io_rings_free(ctx); + io_kbuf_mmap_list_free(ctx); percpu_ref_exit(&ctx->refs); free_uid(ctx->user); @@ -3475,25 +3480,27 @@ static void *io_uring_validate_mmap_request(struct file *file, struct page *page; void *ptr; - /* Don't allow mmap if the ring was setup without it */ - if (ctx->flags & IORING_SETUP_NO_MMAP) - return ERR_PTR(-EINVAL); - switch (offset & IORING_OFF_MMAP_MASK) { case IORING_OFF_SQ_RING: case IORING_OFF_CQ_RING: + /* Don't allow mmap if the ring was setup without it */ + if (ctx->flags & IORING_SETUP_NO_MMAP) + return ERR_PTR(-EINVAL); ptr = ctx->rings; break; case IORING_OFF_SQES: + /* Don't allow mmap if the ring was setup without it */ + if (ctx->flags & IORING_SETUP_NO_MMAP) + return ERR_PTR(-EINVAL); ptr = ctx->sq_sqes; break; case IORING_OFF_PBUF_RING: { unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - mutex_lock(&ctx->uring_lock); + rcu_read_lock(); ptr = io_pbuf_get_address(ctx, bgid); - mutex_unlock(&ctx->uring_lock); + rcu_read_unlock(); if (!ptr) return ERR_PTR(-EINVAL); break; @@ -3645,7 +3652,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, size_t, argsz) { struct io_ring_ctx *ctx; - struct fd f; + struct file *file; long ret; if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | @@ -3663,20 +3670,19 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) return -EINVAL; fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); - f.file = tctx->registered_rings[fd]; - f.flags = 0; - if (unlikely(!f.file)) + file = tctx->registered_rings[fd]; + if (unlikely(!file)) return -EBADF; } else { - f = fdget(fd); - if (unlikely(!f.file)) + file = fget(fd); + if (unlikely(!file)) return -EBADF; ret = -EOPNOTSUPP; - if (unlikely(!io_is_uring_fops(f.file))) + if (unlikely(!io_is_uring_fops(file))) goto out; } - ctx = f.file->private_data; + ctx = file->private_data; ret = -EBADFD; if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) goto out; @@ -3770,7 +3776,8 @@ iopoll_locked: } } out: - fdput(f); + if (!(flags & IORING_ENTER_REGISTERED_RING)) + fput(file); return ret; } @@ -4611,7 +4618,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, { struct io_ring_ctx *ctx; long ret = -EBADF; - struct fd f; + struct file *file; bool use_registered_ring; use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); @@ -4630,27 +4637,27 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) return -EINVAL; fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); - f.file = tctx->registered_rings[fd]; - f.flags = 0; - if (unlikely(!f.file)) + file = tctx->registered_rings[fd]; + if (unlikely(!file)) return -EBADF; } else { - f = fdget(fd); - if (unlikely(!f.file)) + file = fget(fd); + if (unlikely(!file)) return -EBADF; ret = -EOPNOTSUPP; - if (!io_is_uring_fops(f.file)) + if (!io_is_uring_fops(file)) goto out_fput; } - ctx = f.file->private_data; + ctx = file->private_data; mutex_lock(&ctx->uring_lock); ret = __io_uring_register(ctx, opcode, arg, nr_args); mutex_unlock(&ctx->uring_lock); trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); out_fput: - fdput(f); + if (!use_registered_ring) + fput(file); return ret; } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index dc6d779b452b..ed84f2737b3a 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -86,6 +86,9 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); +void *io_mem_alloc(size_t size); +void io_mem_free(void *ptr); + #if defined(CONFIG_PROVE_LOCKING) static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) { diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index a1e4239c7d75..268788305b61 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -33,19 +33,42 @@ struct io_provide_buf { __u16 bid; }; +struct io_buf_free { + struct hlist_node list; + void *mem; + size_t size; + int inuse; +}; + +static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, + unsigned int bgid) +{ + if (bl && bgid < BGID_ARRAY) + return &bl[bgid]; + + return xa_load(&ctx->io_bl_xa, bgid); +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { - if (ctx->io_bl && bgid < BGID_ARRAY) - return &ctx->io_bl[bgid]; + lockdep_assert_held(&ctx->uring_lock); - return xa_load(&ctx->io_bl_xa, bgid); + return __io_buffer_get_list(ctx, ctx->io_bl, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned int bgid) { + /* + * Store buffer group ID and finally mark the list as visible. + * The normal lookup doesn't care about the visibility as we're + * always under the ->uring_lock, but the RCU lookup from mmap does. + */ bl->bgid = bgid; + smp_store_release(&bl->is_ready, 1); + if (bgid < BGID_ARRAY) return 0; @@ -196,21 +219,40 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, static __cold int io_init_bl_list(struct io_ring_ctx *ctx) { + struct io_buffer_list *bl; int i; - ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), - GFP_KERNEL); - if (!ctx->io_bl) + bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); + if (!bl) return -ENOMEM; for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); - ctx->io_bl[i].bgid = i; + INIT_LIST_HEAD(&bl[i].buf_list); + bl[i].bgid = i; } + smp_store_release(&ctx->io_bl, bl); return 0; } +/* + * Mark the given mapped range as free for reuse + */ +static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + struct io_buf_free *ibf; + + hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { + if (bl->buf_ring == ibf->mem) { + ibf->inuse = 0; + return; + } + } + + /* can't happen... */ + WARN_ON_ONCE(1); +} + static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned nbufs) { @@ -223,7 +265,11 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, if (bl->is_mapped) { i = bl->buf_ring->tail - bl->head; if (bl->is_mmap) { - folio_put(virt_to_folio(bl->buf_ring)); + /* + * io_kbuf_list_free() will free the page(s) at + * ->release() time. + */ + io_kbuf_mark_free(ctx, bl); bl->buf_ring = NULL; bl->is_mmap = 0; } else if (bl->buf_nr_pages) { @@ -274,9 +320,17 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); __io_remove_buffers(ctx, bl, -1U); - kfree(bl); + kfree_rcu(bl, rcu); } + /* + * Move deferred locked entries to cache before pruning + */ + spin_lock(&ctx->completion_lock); + if (!list_empty(&ctx->io_buffers_comp)) + list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); + spin_unlock(&ctx->completion_lock); + list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { buf = list_entry(item, struct io_buffer, list); kmem_cache_free(io_buf_cachep, buf); @@ -460,7 +514,16 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) INIT_LIST_HEAD(&bl->buf_list); ret = io_buffer_add_list(ctx, bl, p->bgid); if (ret) { - kfree(bl); + /* + * Doesn't need rcu free as it was never visible, but + * let's keep it consistent throughout. Also can't + * be a lower indexed array group, as adding one + * where lookup failed cannot happen. + */ + if (p->bgid >= BGID_ARRAY) + kfree_rcu(bl, rcu); + else + WARN_ON_ONCE(1); goto err; } } @@ -531,19 +594,63 @@ error_unpin: return -EINVAL; } -static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, +/* + * See if we have a suitable region that we can reuse, rather than allocate + * both a new io_buf_free and mem region again. We leave it on the list as + * even a reused entry will need freeing at ring release. + */ +static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx, + size_t ring_size) +{ + struct io_buf_free *ibf, *best = NULL; + size_t best_dist; + + hlist_for_each_entry(ibf, &ctx->io_buf_list, list) { + size_t dist; + + if (ibf->inuse || ibf->size < ring_size) + continue; + dist = ibf->size - ring_size; + if (!best || dist < best_dist) { + best = ibf; + if (!dist) + break; + best_dist = dist; + } + } + + return best; +} + +static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, + struct io_uring_buf_reg *reg, struct io_buffer_list *bl) { - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; + struct io_buf_free *ibf; size_t ring_size; void *ptr; ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); - ptr = (void *) __get_free_pages(gfp, get_order(ring_size)); - if (!ptr) - return -ENOMEM; - bl->buf_ring = ptr; + /* Reuse existing entry, if we can */ + ibf = io_lookup_buf_free_entry(ctx, ring_size); + if (!ibf) { + ptr = io_mem_alloc(ring_size); + if (!ptr) + return -ENOMEM; + + /* Allocate and store deferred free entry */ + ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); + if (!ibf) { + io_mem_free(ptr); + return -ENOMEM; + } + ibf->mem = ptr; + ibf->size = ring_size; + hlist_add_head(&ibf->list, &ctx->io_buf_list); + } + ibf->inuse = 1; + bl->buf_ring = ibf->mem; bl->is_mapped = 1; bl->is_mmap = 1; return 0; @@ -555,6 +662,8 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_buffer_list *bl, *free_bl = NULL; int ret; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; @@ -599,7 +708,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!(reg.flags & IOU_PBUF_RING_MMAP)) ret = io_pin_pbuf_ring(®, bl); else - ret = io_alloc_pbuf_ring(®, bl); + ret = io_alloc_pbuf_ring(ctx, ®, bl); if (!ret) { bl->nr_entries = reg.ring_entries; @@ -609,7 +718,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) return 0; } - kfree(free_bl); + kfree_rcu(free_bl, rcu); return ret; } @@ -618,6 +727,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) struct io_uring_buf_reg reg; struct io_buffer_list *bl; + lockdep_assert_held(&ctx->uring_lock); + if (copy_from_user(®, arg, sizeof(reg))) return -EFAULT; if (reg.resv[0] || reg.resv[1] || reg.resv[2]) @@ -634,7 +745,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) __io_remove_buffers(ctx, bl, -1U); if (bl->bgid >= BGID_ARRAY) { xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree(bl); + kfree_rcu(bl, rcu); } return 0; } @@ -643,9 +754,33 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) { struct io_buffer_list *bl; - bl = io_buffer_get_list(ctx, bgid); + bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); + + /* + * Ensure the list is fully setup. Only strictly needed for RCU lookup + * via mmap, and in that case only for the array indexed groups. For + * the xarray lookups, it's either visible and ready, or not at all. + */ + if (!smp_load_acquire(&bl->is_ready)) + return NULL; if (!bl || !bl->is_mmap) return NULL; return bl->buf_ring; } + +/* + * Called at or after ->release(), free the mmap'ed buffers that we used + * for memory mapped provided buffer rings. + */ +void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx) +{ + struct io_buf_free *ibf; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) { + hlist_del(&ibf->list); + io_mem_free(ibf->mem); + kfree(ibf); + } +} diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index f2d615236b2c..9be5960817ea 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -15,6 +15,7 @@ struct io_buffer_list { struct page **buf_pages; struct io_uring_buf_ring *buf_ring; }; + struct rcu_head rcu; }; __u16 bgid; @@ -28,6 +29,8 @@ struct io_buffer_list { __u8 is_mapped; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; + /* bl is visible from an RCU point of view for lookup */ + __u8 is_ready; }; struct io_buffer { @@ -51,6 +54,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags); int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); +void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx); + unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2058e89b5ddd..c85ff9162a5c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, mutex_unlock(&aux->poke_mutex); } +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + WARN_ON_ONCE(1); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { - u8 *old_addr, *new_addr, *old_bypass_addr; struct prog_poke_elem *elem; struct bpf_array_aux *aux; @@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; - int i, ret; + int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; @@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. - * 3) On program teardown, the program's kallsym entry gets - * removed out of RCU callback, but we can only untrack - * from sleepable context, therefore bpf_arch_text_poke() - * might not see that this is in BPF text section and - * bails out with -EINVAL. As these are unreachable since - * RCU grace period already passed, we simply skip them. - * 4) Also programs reaching refcount of zero while patching + * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT - * buffer is freed. When we're still in the middle of - * patching and suddenly kallsyms entry of the program - * gets evicted, we just skip the rest which is fine due - * to point 3). - * 5) Any other error happening below from bpf_arch_text_poke() - * is a unexpected bug. + * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; @@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, poke->tail_call.key != key) continue; - old_bypass_addr = old ? NULL : poke->bypass_addr; - old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; - new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; - - if (new) { - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, new_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - if (!old) { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - poke->bypass_addr, - NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } - } else { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - old_bypass_addr, - poke->bypass_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - /* let other CPUs finish the execution of program - * so that it will not possible to expose them - * to invalid nop, stack unwind, nop state - */ - if (!ret) - synchronize_rcu(); - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } + bpf_arch_poke_desc_update(poke, new, old); } } } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index cd3afe57ece3..fe254ae035fe 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { - const s32 off_min = S16_MIN, off_max = S16_MAX; + s64 off_min, off_max, off; s32 delta = end_new - end_old; - s32 off; - if (insn->code == (BPF_JMP32 | BPF_JA)) + if (insn->code == (BPF_JMP32 | BPF_JA)) { off = insn->imm; - else + off_min = S32_MIN; + off_max = S32_MAX; + } else { off = insn->off; + off_min = S16_MIN; + off_max = S16_MAX; + } if (curr < pos && curr + off + 1 >= end_old) off += delta; diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 122dacb3a443..66d1708042a7 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer) bool cgroup_freezing(struct task_struct *task) { bool ret; + unsigned int state; rcu_read_lock(); - ret = task_freezer(task)->state & CGROUP_FREEZING; + /* Check if the cgroup is still FREEZING, but not FROZEN. The extra + * !FROZEN check is required, because the FREEZING bit is not cleared + * when the state FROZEN is reached. + */ + state = task_freezer(task)->state; + ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN); rcu_read_unlock(); return ret; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 075a632e6c7c..d5a0ee40bf66 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2252,7 +2252,7 @@ int register_kretprobe(struct kretprobe *rp) rp->rph = NULL; return -ENOMEM; } - rp->rph->rp = rp; + rcu_assign_pointer(rp->rph->rp, rp); rp->nmissed = 0; /* Establish function entry probe point */ ret = register_kprobe(&rp->kp); @@ -2300,7 +2300,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num) #ifdef CONFIG_KRETPROBE_ON_RETHOOK rethook_free(rps[i]->rh); #else - rps[i]->rph->rp = NULL; + rcu_assign_pointer(rps[i]->rph->rp, NULL); #endif } mutex_unlock(&kprobe_mutex); diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index 6fd7d4ecbbc6..fa03094e9e69 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -48,7 +48,7 @@ static void rethook_free_rcu(struct rcu_head *head) */ void rethook_stop(struct rethook *rh) { - WRITE_ONCE(rh->handler, NULL); + rcu_assign_pointer(rh->handler, NULL); } /** @@ -63,7 +63,7 @@ void rethook_stop(struct rethook *rh) */ void rethook_free(struct rethook *rh) { - WRITE_ONCE(rh->handler, NULL); + rethook_stop(rh); call_rcu(&rh->rcu, rethook_free_rcu); } @@ -82,6 +82,12 @@ static int rethook_fini_pool(struct objpool_head *head, void *context) return 0; } +static inline rethook_handler_t rethook_get_handler(struct rethook *rh) +{ + return (rethook_handler_t)rcu_dereference_check(rh->handler, + rcu_read_lock_any_held()); +} + /** * rethook_alloc() - Allocate struct rethook. * @data: a data to pass the @handler when hooking the return. @@ -107,7 +113,7 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler, return ERR_PTR(-ENOMEM); rh->data = data; - rh->handler = handler; + rcu_assign_pointer(rh->handler, handler); /* initialize the objpool for rethook nodes */ if (objpool_init(&rh->pool, num, size, GFP_KERNEL, rh, @@ -135,9 +141,10 @@ static void free_rethook_node_rcu(struct rcu_head *head) */ void rethook_recycle(struct rethook_node *node) { - lockdep_assert_preemption_disabled(); + rethook_handler_t handler; - if (likely(READ_ONCE(node->rethook->handler))) + handler = rethook_get_handler(node->rethook); + if (likely(handler)) objpool_push(node, &node->rethook->pool); else call_rcu(&node->rcu, free_rethook_node_rcu); @@ -153,9 +160,7 @@ NOKPROBE_SYMBOL(rethook_recycle); */ struct rethook_node *rethook_try_get(struct rethook *rh) { - rethook_handler_t handler = READ_ONCE(rh->handler); - - lockdep_assert_preemption_disabled(); + rethook_handler_t handler = rethook_get_handler(rh); /* Check whether @rh is going to be freed. */ if (unlikely(!handler)) @@ -300,7 +305,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs, rhn = container_of(first, struct rethook_node, llist); if (WARN_ON_ONCE(rhn->frame != frame)) break; - handler = READ_ONCE(rhn->rethook->handler); + handler = rethook_get_handler(rhn->rethook); if (handler) handler(rhn, rhn->rethook->data, correct_ret_addr, regs); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6e578f576a6f..2989b57e154a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu) pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); } - if (cpumask_empty(wq_unbound_cpumask)) - return cpu; - new_cpu = __this_cpu_read(wq_rr_cpu_last); new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask); if (unlikely(new_cpu >= nr_cpu_ids)) { @@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { } #endif /* CONFIG_WQ_WATCHDOG */ +static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask) +{ + if (!cpumask_intersects(wq_unbound_cpumask, mask)) { + pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n", + cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask)); + return; + } + + cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask); +} + /** * workqueue_init_early - early init for workqueue subsystem * @@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void) BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ)); - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN)); - + cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ)); + restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN)); if (!cpumask_empty(&wq_cmdline_cpumask)) - cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask); + restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); diff --git a/lib/closure.c b/lib/closure.c index f86c9eeafb35..c16540552d61 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -36,7 +36,7 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) closure_debug_destroy(cl); if (destructor) - destructor(cl); + destructor(&cl->work); if (parent) closure_put(parent); @@ -108,8 +108,9 @@ struct closure_syncer { int done; }; -static void closure_sync_fn(struct closure *cl) +static CLOSURE_CALLBACK(closure_sync_fn) { + struct closure *cl = container_of(ws, struct closure, work); struct closure_syncer *s = cl->s; struct task_struct *p; diff --git a/lib/fw_table.c b/lib/fw_table.c index b51f30a28e47..294df54e33b6 100644 --- a/lib/fw_table.c +++ b/lib/fw_table.c @@ -7,7 +7,7 @@ * Copyright (C) 2023 Intel Corp. */ #include <linux/errno.h> -#include <linux/fw_table.h> +#include <linux/acpi.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 99d2a3a528e1..de2113a58fa0 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -562,7 +562,7 @@ static void kunit_log_test(struct kunit *test) KUNIT_EXPECT_TRUE(test, test->log->append_newlines); full_log = string_stream_get_string(test->log); - kunit_add_action(test, (kunit_action_t *)kfree, full_log); + kunit_add_action(test, kfree_wrapper, full_log); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, strstr(full_log, "put this in log.")); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f2eb71f1a66c..7aceb07a1af9 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -338,6 +338,36 @@ void kunit_init_test(struct kunit *test, const char *name, struct string_stream } EXPORT_SYMBOL_GPL(kunit_init_test); +/* Only warn when a test takes more than twice the threshold */ +#define KUNIT_SPEED_WARNING_MULTIPLIER 2 + +/* Slow tests are defined as taking more than 1s */ +#define KUNIT_SPEED_SLOW_THRESHOLD_S 1 + +#define KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S \ + (KUNIT_SPEED_WARNING_MULTIPLIER * KUNIT_SPEED_SLOW_THRESHOLD_S) + +#define s_to_timespec64(s) ns_to_timespec64((s) * NSEC_PER_SEC) + +static void kunit_run_case_check_speed(struct kunit *test, + struct kunit_case *test_case, + struct timespec64 duration) +{ + struct timespec64 slow_thr = + s_to_timespec64(KUNIT_SPEED_SLOW_WARNING_THRESHOLD_S); + enum kunit_speed speed = test_case->attr.speed; + + if (timespec64_compare(&duration, &slow_thr) < 0) + return; + + if (speed == KUNIT_SPEED_VERY_SLOW || speed == KUNIT_SPEED_SLOW) + return; + + kunit_warn(test, + "Test should be marked slow (runtime: %lld.%09lds)", + duration.tv_sec, duration.tv_nsec); +} + /* * Initializes and runs test case. Does not clean up or do post validations. */ @@ -345,6 +375,8 @@ static void kunit_run_case_internal(struct kunit *test, struct kunit_suite *suite, struct kunit_case *test_case) { + struct timespec64 start, end; + if (suite->init) { int ret; @@ -356,7 +388,13 @@ static void kunit_run_case_internal(struct kunit *test, } } + ktime_get_ts64(&start); + test_case->run_case(test); + + ktime_get_ts64(&end); + + kunit_run_case_check_speed(test, test_case, timespec64_sub(end, start)); } static void kunit_case_internal_cleanup(struct kunit *test) @@ -670,6 +708,8 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites, int num_ return 0; } + kunit_suite_counter = 1; + static_branch_inc(&kunit_running); for (i = 0; i < num_suites; i++) { @@ -696,8 +736,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) for (i = 0; i < num_suites; i++) kunit_exit_suite(suites[i]); - - kunit_suite_counter = 1; } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); diff --git a/lib/objpool.c b/lib/objpool.c index ce0087f64400..cfdc02420884 100644 --- a/lib/objpool.c +++ b/lib/objpool.c @@ -201,6 +201,23 @@ static inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu) while (head != READ_ONCE(slot->last)) { void *obj; + /* + * data visibility of 'last' and 'head' could be out of + * order since memory updating of 'last' and 'head' are + * performed in push() and pop() independently + * + * before any retrieving attempts, pop() must guarantee + * 'last' is behind 'head', that is to say, there must + * be available objects in slot, which could be ensured + * by condition 'last != head && last - head <= nr_objs' + * that is equivalent to 'last - head - 1 < nr_objs' as + * 'last' and 'head' are both unsigned int32 + */ + if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) { + head = READ_ONCE(slot->head); + continue; + } + /* obj must be retrieved before moving forward head */ obj = READ_ONCE(slot->entries[head & slot->mask]); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index aff31cd944c2..b240d9aae4a6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -183,7 +183,7 @@ out: } static const struct genl_multicast_group dropmon_mcgrps[] = { - { .name = "events", }, + { .name = "events", .cap_sys_admin = 1 }, }; static void send_dm_alert(struct work_struct *work) @@ -1619,11 +1619,13 @@ static const struct genl_small_ops dropmon_ops[] = { .cmd = NET_DM_CMD_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_CONFIG_GET, diff --git a/net/core/filter.c b/net/core/filter.c index 0adaa4afa35f..eedb33f3e998 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2602,6 +2602,22 @@ BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes) return 0; } +static void sk_msg_reset_curr(struct sk_msg *msg) +{ + u32 i = msg->sg.start; + u32 len = 0; + + do { + len += sk_msg_elem(msg, i)->length; + sk_msg_iter_var_next(i); + if (len >= msg->sg.size) + break; + } while (i != msg->sg.end); + + msg->sg.curr = i; + msg->sg.copybreak = 0; +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2721,6 +2737,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; out: + sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; msg->data_end = msg->data + bytes; return 0; @@ -2857,6 +2874,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, msg->sg.data[new] = rsge; } + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } @@ -3025,6 +3043,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sk_mem_uncharge(msg->sk, len - pop); msg->sg.size -= (len - pop); + sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); return 0; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 22a26d1d29a0..5169c3c72cff 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -635,15 +635,18 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; - /* Pull skb since ip_tunnel_xmit() needs skb->data pointing - * to gre header. - */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + if (!pskb_network_may_pull(skb, pull_len)) + goto free_skb; + + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ + skb_pull(skb, pull_len); skb_reset_mac_header(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 70a1bafbefba..1d6b80145efb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3369,9 +3369,25 @@ int tcp_set_window_clamp(struct sock *sk, int val) return -EINVAL; tp->window_clamp = 0; } else { - tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? - SOCK_MIN_RCVBUF / 2 : val; - tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; + u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? + SOCK_MIN_RCVBUF / 2 : val; + + if (new_window_clamp == old_window_clamp) + return 0; + + tp->window_clamp = new_window_clamp; + if (new_window_clamp < old_window_clamp) { + /* need to apply the reserved mem provisioning only + * when shrinking the window clamp + */ + __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp); + + } else { + new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); + tp->rcv_ssthresh = max(new_rcv_ssthresh, + tp->rcv_ssthresh); + } } return 0; } @@ -3595,6 +3611,10 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) { + err = -EPERM; + break; + } err = tcp_ao_set_repair(sk, optval, optlen); break; #ifdef CONFIG_TCP_AO @@ -4294,6 +4314,8 @@ zerocopy_rcv_out: } #endif case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) + return -EPERM; return tcp_ao_get_repair(sk, optval, optlen); case TCP_AO_GET_KEYS: case TCP_AO_INFO: { diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index c4cd1e09eb6b..87db432c6bb4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -862,7 +862,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, #endif treq->af_specific = &tcp_request_sock_ipv4_ops; - treq->maclen = 0; + treq->used_tcp_ao = false; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; @@ -875,7 +875,7 @@ void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, treq->ao_rcv_next = aoh->keyid; treq->ao_keyid = aoh->rnext_keyid; - treq->maclen = tcp_ao_maclen(key); + treq->used_tcp_ao = true; } static enum skb_drop_reason @@ -1112,7 +1112,7 @@ void tcp_ao_connect_init(struct sock *sk) ao_info->current_key = key; if (!ao_info->rnext_key) ao_info->rnext_key = key; - tp->tcp_header_len += tcp_ao_len(key); + tp->tcp_header_len += tcp_ao_len_aligned(key); ao_info->lisn = htonl(tp->write_seq); ao_info->snd_sne = 0; @@ -1358,7 +1358,7 @@ static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; - if (tcp_ao_len(key) > syn_tcp_option_space) { + if (tcp_ao_len_aligned(key) > syn_tcp_option_space) { err = -EMSGSIZE; goto err_kfree; } @@ -1620,6 +1620,15 @@ static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, if (!dev || !l3index) return -EINVAL; + if (!bound_dev_if || bound_dev_if != cmd.ifindex) { + /* tcp_ao_established_key() doesn't expect having + * non peer-matching key on an established TCP-AO + * connection. + */ + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + } + /* It's still possible to bind after adding keys or even * re-bind to a different dev (with CAP_NET_RAW). * So, no reason to return error here, rather try to be diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0548f0c12155..7990f4939e8d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3864,8 +3864,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * then we can probably ignore it. */ if (before(ack, prior_snd_una)) { + u32 max_window; + + /* do not accept ACK for bytes we never sent. */ + max_window = min_t(u64, tp->max_window, tp->bytes_acked); /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ - if (before(ack, prior_snd_una - tp->max_window)) { + if (before(ack, prior_snd_una - max_window)) { if (!(flag & FLAG_NO_CHALLENGE_ACK)) tcp_send_challenge_ack(sk); return -SKB_DROP_REASON_TCP_TOO_OLD_ACK; @@ -7175,11 +7179,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto drop_and_release; /* Invalid TCP options */ if (aoh) { - tcp_rsk(req)->maclen = aoh->length - sizeof(struct tcp_ao_hdr); + tcp_rsk(req)->used_tcp_ao = true; tcp_rsk(req)->ao_rcv_next = aoh->keyid; tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + } else { - tcp_rsk(req)->maclen = 0; + tcp_rsk(req)->used_tcp_ao = false; } #endif tcp_rsk(req)->snt_isn = isn; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 86cc6d36f818..4bac6e319aca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -696,7 +696,7 @@ static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | (aoh->rnext_keyid << 8) | keyid); - arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + arg->iov[0].iov_len += tcp_ao_len_aligned(key); reply->doff = arg->iov[0].iov_len / 4; if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], @@ -984,7 +984,7 @@ static void tcp_v4_send_ack(const struct sock *sk, (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | key->rcv_next); - arg.iov[0].iov_len += round_up(tcp_ao_len(key->ao_key), 4); + arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key); rep.th.doff = arg.iov[0].iov_len / 4; tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a9807eeb311c..9e85f2a0bddd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -615,7 +615,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, ao_key = treq->af_specific->ao_lookup(sk, req, tcp_rsk(req)->ao_keyid, -1); if (ao_key) - newtp->tcp_header_len += tcp_ao_len(ao_key); + newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index eb13a55d660c..f5ef15e1d9ac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -825,7 +825,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); } } @@ -915,7 +915,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, ireq->tstamp_ok &= !ireq->sack_ok; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - remaining -= tcp_ao_len(key->ao_key); + remaining -= tcp_ao_len_aligned(key->ao_key); ireq->tstamp_ok &= !ireq->sack_ok; } @@ -982,7 +982,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb size += TCPOLEN_MD5SIG_ALIGNED; } else if (tcp_key_is_ao(key)) { opts->options |= OPTION_AO; - size += tcp_ao_len(key->ao_key); + size += tcp_ao_len_aligned(key->ao_key); } if (likely(tp->rx_opt.tstamp_ok)) { @@ -3720,7 +3720,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, if (tcp_rsk_used_ao(req)) { #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; - u8 maclen = tcp_rsk(req)->maclen; u8 keyid = tcp_rsk(req)->ao_keyid; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), @@ -3730,13 +3729,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * for another peer-matching key, but the peer has requested * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ - if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) { - u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0; - + if (unlikely(!ao_key)) { rcu_read_unlock(); kfree_skb(skb); - net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n", - keyid, maclen, key_maclen); + net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", + keyid); return NULL; } key.ao_key = ao_key; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 28b01a068412..7772f42ff2b9 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1511,13 +1511,9 @@ out: if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); + if (!pn_leaf) pn_leaf = info->nl_net->ipv6.fib6_null_entry; - } -#endif fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 43deda49cc52..06a19fe2afd1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -884,7 +884,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) - tot_len += tcp_ao_len(key->ao_key); + tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index e502ec00b2fe..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -31,7 +31,7 @@ struct bpf_nf_link { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) static const struct nf_defrag_hook * get_proto_defrag_hook(struct bpf_nf_link *link, - const struct nf_defrag_hook __rcu *global_hook, + const struct nf_defrag_hook __rcu **ptr_global_hook, const char *mod) { const struct nf_defrag_hook *hook; @@ -39,7 +39,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, /* RCU protects us from races against module unloading */ rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); if (!hook) { rcu_read_unlock(); err = request_module(mod); @@ -47,7 +47,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link, return ERR_PTR(err < 0 ? err : -EINVAL); rcu_read_lock(); - hook = rcu_dereference(global_hook); + hook = rcu_dereference(*ptr_global_hook); } if (hook && try_module_get(hook->owner)) { @@ -78,7 +78,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) switch (link->hook_ops.pf) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) case NFPROTO_IPV4: - hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4"); + hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4"); if (IS_ERR(hook)) return PTR_ERR(hook); @@ -87,7 +87,7 @@ static int bpf_nf_enable_defrag(struct bpf_nf_link *link) #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) case NFPROTO_IPV6: - hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6"); + hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6"); if (IS_ERR(hook)) return PTR_ERR(hook); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0a42989b982..c5c17c6e80ed 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -803,7 +803,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, - u8 genmask, u32 nlpid) + int family, u8 genmask, u32 nlpid) { struct nftables_pernet *nft_net; struct nft_table *table; @@ -811,6 +811,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, nft_net = nft_pernet(net); list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && + table->family == family && nft_active_genmask(table, genmask)) { if (nft_table_has_owner(table) && nlpid && table->nlpid != nlpid) @@ -1544,7 +1545,7 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_TABLE_HANDLE]) { attr = nla[NFTA_TABLE_HANDLE]; - table = nft_table_lookup_byhandle(net, attr, genmask, + table = nft_table_lookup_byhandle(net, attr, family, genmask, NETLINK_CB(skb).portid); } else { attr = nla[NFTA_TABLE_NAME]; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b18a79039125..c09dba57354c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -280,10 +280,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->expr_array[i] = dynset_expr; priv->num_exprs++; - if (set->num_exprs && - dynset_expr->ops != set->exprs[i]->ops) { - err = -EOPNOTSUPP; - goto err_expr_free; + if (set->num_exprs) { + if (i >= set->num_exprs) { + err = -EINVAL; + goto err_expr_free; + } + if (dynset_expr->ops != set->exprs[i]->ops) { + err = -EOPNOTSUPP; + goto err_expr_free; + } } i++; } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3fbaa7bf41f9..6eb571d0c3fd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -214,7 +214,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { - *dest = 1; + nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; @@ -461,7 +461,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr, type = bufp[0]; if (type == priv->type) { - *dest = 1; + nft_reg_store8(dest, 1); return; } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 1bfe258018da..37cfe6dd712d 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -145,11 +145,15 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, switch (priv->result) { case NFT_FIB_RESULT_OIF: index = dev ? dev->ifindex : 0; - *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; + if (priv->flags & NFTA_FIB_F_PRESENT) + nft_reg_store8(dreg, !!index); + else + *dreg = index; + break; case NFT_FIB_RESULT_OIFNAME: if (priv->flags & NFTA_FIB_F_PRESENT) - *dreg = !!dev; + nft_reg_store8(dreg, !!dev); else strscpy_pad(reg, dev ? dev->name : "", IFNAMSIZ); break; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 701977af3ee8..7252fcdae349 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2043,6 +2043,9 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, e = f->mt[r].e; + if (!nft_set_elem_active(&e->ext, iter->genmask)) + goto cont; + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index e85ce69924ae..50332888c8d2 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -76,18 +76,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - filp = sk->sk_socket->file; - if (filp == NULL) + read_lock_bh(&sk->sk_callback_lock); + filp = sk->sk_socket ? sk->sk_socket->file : NULL; + if (filp == NULL) { + read_unlock_bh(&sk->sk_callback_lock); return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; + } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) + !(info->invert & XT_OWNER_UID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } if (info->match & XT_OWNER_GID) { @@ -112,10 +117,13 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) + if (match ^ !(info->invert & XT_OWNER_GID)) { + read_unlock_bh(&sk->sk_callback_lock); return false; + } } + read_unlock_bh(&sk->sk_callback_lock); return true; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 92ef5ed2e7b0..9c7ffd10df2a 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1691,6 +1691,9 @@ static int genl_bind(struct net *net, int group) if ((grp->flags & GENL_UNS_ADMIN_PERM) && !ns_capable(net->user_ns, CAP_NET_ADMIN)) ret = -EPERM; + if (grp->cap_sys_admin && + !ns_capable(net->user_ns, CAP_SYS_ADMIN)) + ret = -EPERM; break; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f92edba4c40f..5f1757a32842 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4294,7 +4294,7 @@ static void packet_mm_open(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_inc(&pkt_sk(sk)->mapped); + atomic_long_inc(&pkt_sk(sk)->mapped); } static void packet_mm_close(struct vm_area_struct *vma) @@ -4304,7 +4304,7 @@ static void packet_mm_close(struct vm_area_struct *vma) struct sock *sk = sock->sk; if (sk) - atomic_dec(&pkt_sk(sk)->mapped); + atomic_long_dec(&pkt_sk(sk)->mapped); } static const struct vm_operations_struct packet_mmap_ops = { @@ -4399,7 +4399,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; if (!closing) { - if (atomic_read(&po->mapped)) + if (atomic_long_read(&po->mapped)) goto out; if (packet_read_pending(rb)) goto out; @@ -4502,7 +4502,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = -EBUSY; mutex_lock(&po->pg_vec_lock); - if (closing || atomic_read(&po->mapped) == 0) { + if (closing || atomic_long_read(&po->mapped) == 0) { err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); @@ -4520,9 +4520,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, po->prot_hook.func = (po->rx_ring.pg_vec) ? tpacket_rcv : packet_rcv; skb_queue_purge(rb_queue); - if (atomic_read(&po->mapped)) - pr_err("packet_mmap: vma is busy: %d\n", - atomic_read(&po->mapped)); + if (atomic_long_read(&po->mapped)) + pr_err("packet_mmap: vma is busy: %ld\n", + atomic_long_read(&po->mapped)); } mutex_unlock(&po->pg_vec_lock); @@ -4600,7 +4600,7 @@ static int packet_mmap(struct file *file, struct socket *sock, } } - atomic_inc(&po->mapped); + atomic_long_inc(&po->mapped); vma->vm_ops = &packet_mmap_ops; err = 0; diff --git a/net/packet/internal.h b/net/packet/internal.h index d29c94c45159..d5d70712007a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -122,7 +122,7 @@ struct packet_sock { __be16 num; struct packet_rollover *rollover; struct packet_mclist *mclist; - atomic_t mapped; + atomic_long_t mapped; enum tpacket_versions tp_version; unsigned int tp_hdrlen; unsigned int tp_reserve; diff --git a/net/psample/psample.c b/net/psample/psample.c index 81a794e36f53..c34e902855db 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -31,7 +31,8 @@ enum psample_nl_multicast_groups { static const struct genl_multicast_group psample_nl_mcgrps[] = { [PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME }, - [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME }, + [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME, + .flags = GENL_UNS_ADMIN_PERM }, }; static struct genl_family psample_nl_family __ro_after_init; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c61666e5beed..7fc2f3c6d248 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -723,7 +723,7 @@ static void smcd_conn_save_peer_info(struct smc_sock *smc, int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; - smc->conn.peer_token = clc->d0.token; + smc->conn.peer_token = ntohll(clc->d0.token); /* msg header takes up space in the buffer */ smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); @@ -1415,7 +1415,7 @@ static int smc_connect_ism(struct smc_sock *smc, if (rc) return rc; } - ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; + ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 0fda5156eef0..95e19aa3e769 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1005,6 +1005,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, { struct smc_connection *conn = &smc->conn; struct smc_clc_first_contact_ext_v2x fce; + struct smcd_dev *smcd = conn->lgr->smcd; struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; @@ -1022,17 +1023,15 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = - conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd); - clc->d0.token = conn->rmb_desc->token; + clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); + clc->d0.token = htonll(conn->rmb_desc->token); clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { - clc_v2->d1.chid = - htons(smc_ism_get_chid(conn->lgr->smcd)); + clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); if (eid && eid[0]) memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 89b258cedffe..1697b84d85be 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -204,8 +204,8 @@ struct smcr_clc_msg_accept_confirm { /* SMCR accept/confirm */ } __packed; struct smcd_clc_msg_accept_confirm_common { /* SMCD accept/confirm */ - u64 gid; /* Sender GID */ - u64 token; /* DMB token */ + __be64 gid; /* Sender GID */ + __be64 token; /* DMB token */ u8 dmbe_idx; /* DMBE index */ #if defined(__BIG_ENDIAN_BITFIELD) u8 dmbe_size : 4, /* buf size (compressed) */ diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 316f76187962..e37b4d2e2acd 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -952,6 +952,8 @@ static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg, } sk_msg_page_add(msg_pl, page, part, off); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, part); *copied += part; try_to_copy -= part; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index f6dc896bf44c..c8e162c9d1df 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -59,8 +59,7 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, t_ops = virtio_transport_get_ops(info->vsk); if (t_ops->can_msgzerocopy) { - int pages_in_iov = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); - int pages_to_send = min(pages_in_iov, MAX_SKB_FRAGS); + int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); /* +1 is for packet header. */ return t_ops->can_msgzerocopy(pages_to_send + 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 281d49b4fca4..9f13aa3353e3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -991,7 +991,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, rcu_read_lock(); if (xsk_check_common(xs)) - goto skip_tx; + goto out; pool = xs->pool; @@ -1003,12 +1003,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, xsk_generic_xmit(sk); } -skip_tx: if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; - +out: rcu_read_unlock(); return mask; } diff --git a/scripts/dtc/dt-extract-compatibles b/scripts/dtc/dt-extract-compatibles index bd07477dd144..5ffb2364409b 100755 --- a/scripts/dtc/dt-extract-compatibles +++ b/scripts/dtc/dt-extract-compatibles @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only +import fnmatch import os -import glob import re import argparse @@ -81,10 +81,20 @@ def print_compat(filename, compatibles): else: print(*compatibles, sep='\n') +def glob_without_symlinks(root, glob): + for path, dirs, files in os.walk(root): + # Ignore hidden directories + for d in dirs: + if fnmatch.fnmatch(d, ".*"): + dirs.remove(d) + for f in files: + if fnmatch.fnmatch(f, glob): + yield os.path.join(path, f) + def files_to_parse(path_args): for f in path_args: if os.path.isdir(f): - for filename in glob.iglob(f + "/**/*.c", recursive=True): + for filename in glob_without_symlinks(f, "*.c"): yield filename else: yield f diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 910bd21d08f4..746ff2d272f2 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -339,8 +339,7 @@ static int relayout_struct(tree type) /* * enforce that we don't randomize the layout of the last - * element of a struct if it's a 0 or 1-length array - * or a proper flexible array + * element of a struct if it's a proper flexible array */ if (is_flexible_array(newtree[num_fields - 1])) { has_flexarray = true; diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 2c4dfc0b7e34..696a958d93e9 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -238,7 +238,7 @@ EXPORT_SYMBOL(intel_nhlt_ssp_mclk_mask); static struct nhlt_specific_cfg * nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch, - u32 rate, u8 vbps, u8 bps) + u32 rate, u8 vbps, u8 bps, bool ignore_vbps) { struct nhlt_fmt_cfg *cfg = fmt->fmt_config; struct wav_fmt *wfmt; @@ -255,8 +255,12 @@ nhlt_get_specific_cfg(struct device *dev, struct nhlt_fmt *fmt, u8 num_ch, dev_dbg(dev, "Endpoint format: ch=%d fmt=%d/%d rate=%d\n", wfmt->channels, _vbps, _bps, wfmt->samples_per_sec); + /* + * When looking for exact match of configuration ignore the vbps + * from NHLT table when ignore_vbps is true + */ if (wfmt->channels == num_ch && wfmt->samples_per_sec == rate && - vbps == _vbps && bps == _bps) + (ignore_vbps || vbps == _vbps) && bps == _bps) return &cfg->config; cfg = (struct nhlt_fmt_cfg *)(cfg->config.caps + cfg->config.size); @@ -289,6 +293,7 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, { struct nhlt_specific_cfg *cfg; struct nhlt_endpoint *epnt; + bool ignore_vbps = false; struct nhlt_fmt *fmt; int i; @@ -298,7 +303,26 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, dev_dbg(dev, "Looking for configuration:\n"); dev_dbg(dev, " vbus_id=%d link_type=%d dir=%d, dev_type=%d\n", bus_id, link_type, dir, dev_type); - dev_dbg(dev, " ch=%d fmt=%d/%d rate=%d\n", num_ch, vbps, bps, rate); + if (link_type == NHLT_LINK_DMIC && bps == 32 && (vbps == 24 || vbps == 32)) { + /* + * The DMIC hardware supports only one type of 32 bits sample + * size, which is 24 bit sampling on the MSB side and bits[1:0] + * are used for indicating the channel number. + * It has been observed that some NHLT tables have the vbps + * specified as 32 while some uses 24. + * The format these variations describe are identical, the + * hardware is configured and behaves the same way. + * Note: when the samples assumed to be vbps=32 then the 'noise' + * introduced by the lower two bits (channel number) have no + * real life implication on audio quality. + */ + dev_dbg(dev, + " ch=%d fmt=%d rate=%d (vbps is ignored for DMIC 32bit format)\n", + num_ch, bps, rate); + ignore_vbps = true; + } else { + dev_dbg(dev, " ch=%d fmt=%d/%d rate=%d\n", num_ch, vbps, bps, rate); + } dev_dbg(dev, "Endpoint count=%d\n", nhlt->endpoint_count); epnt = (struct nhlt_endpoint *)nhlt->desc; @@ -307,7 +331,8 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt, if (nhlt_check_ep_match(dev, epnt, bus_id, link_type, dir, dev_type)) { fmt = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size); - cfg = nhlt_get_specific_cfg(dev, fmt, num_ch, rate, vbps, bps); + cfg = nhlt_get_specific_cfg(dev, fmt, num_ch, rate, + vbps, bps, ignore_vbps); if (cfg) return cfg; } diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index b2db8091f0ed..cbd7d8badf91 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -504,7 +504,6 @@ static void cs35l41_shutdown_dsp(struct cs35l41_hda *cs35l41) cs_dsp_stop(dsp); cs_dsp_power_down(dsp); - cs35l41->firmware_running = false; dev_dbg(cs35l41->dev, "Unloaded Firmware\n"); } @@ -550,7 +549,7 @@ static void cs35l41_hda_play_start(struct device *dev) cs35l41->playback_started = true; - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { regmap_multi_reg_write(reg, cs35l41_hda_config_dsp, ARRAY_SIZE(cs35l41_hda_config_dsp)); regmap_update_bits(reg, CS35L41_PWR_CTRL2, @@ -580,7 +579,7 @@ static void cs35l41_mute(struct device *dev, bool mute) regmap_multi_reg_write(reg, cs35l41_hda_mute, ARRAY_SIZE(cs35l41_hda_mute)); } else { dev_dbg(dev, "Unmuting\n"); - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { regmap_multi_reg_write(reg, cs35l41_hda_unmute_dsp, ARRAY_SIZE(cs35l41_hda_unmute_dsp)); } else { @@ -599,7 +598,7 @@ static void cs35l41_hda_play_done(struct device *dev) dev_dbg(dev, "Play (Complete)\n"); cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 1, - cs35l41->firmware_running); + &cs35l41->cs_dsp); cs35l41_mute(dev, false); } @@ -612,7 +611,7 @@ static void cs35l41_hda_pause_start(struct device *dev) cs35l41_mute(dev, true); cs35l41_global_enable(dev, reg, cs35l41->hw_cfg.bst_type, 0, - cs35l41->firmware_running); + &cs35l41->cs_dsp); } static void cs35l41_hda_pause_done(struct device *dev) @@ -625,7 +624,7 @@ static void cs35l41_hda_pause_done(struct device *dev) regmap_update_bits(reg, CS35L41_PWR_CTRL2, CS35L41_AMP_EN_MASK, 0 << CS35L41_AMP_EN_SHIFT); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST) regmap_write(reg, CS35L41_GPIO1_CTRL1, 0x00000001); - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { cs35l41_set_cspl_mbox_cmd(dev, reg, CSPL_MBOX_CMD_PAUSE); regmap_update_bits(reg, CS35L41_PWR_CTRL2, CS35L41_VMON_EN_MASK | CS35L41_IMON_EN_MASK, @@ -675,7 +674,7 @@ static void cs35l41_hda_playback_hook(struct device *dev, int action) break; case HDA_GEN_PCM_ACT_CLOSE: mutex_lock(&cs35l41->fw_mutex); - if (!cs35l41->firmware_running && cs35l41->request_fw_load && + if (!cs35l41->cs_dsp.running && cs35l41->request_fw_load && !cs35l41->fw_request_ongoing) { dev_info(dev, "Requesting Firmware Load after HDA_GEN_PCM_ACT_CLOSE\n"); cs35l41->fw_request_ongoing = true; @@ -761,10 +760,9 @@ static int cs35l41_verify_id(struct cs35l41_hda *cs35l41, unsigned int *regid, u static int cs35l41_ready_for_reset(struct cs35l41_hda *cs35l41) { mutex_lock(&cs35l41->fw_mutex); - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { cs35l41->cs_dsp.running = false; cs35l41->cs_dsp.booted = false; - cs35l41->firmware_running = false; } regcache_mark_dirty(cs35l41->regmap); mutex_unlock(&cs35l41->fw_mutex); @@ -925,7 +923,7 @@ static int cs35l41_runtime_suspend(struct device *dev) mutex_lock(&cs35l41->fw_mutex); - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { ret = cs35l41_enter_hibernate(cs35l41->dev, cs35l41->regmap, cs35l41->hw_cfg.bst_type); if (ret) @@ -960,7 +958,7 @@ static int cs35l41_runtime_resume(struct device *dev) regcache_cache_only(cs35l41->regmap, false); - if (cs35l41->firmware_running) { + if (cs35l41->cs_dsp.running) { ret = cs35l41_exit_hibernate(cs35l41->dev, cs35l41->regmap); if (ret) { dev_warn(cs35l41->dev, "Unable to exit Hibernate."); @@ -1052,8 +1050,6 @@ static int cs35l41_smart_amp(struct cs35l41_hda *cs35l41) goto clean_dsp; } - cs35l41->firmware_running = true; - return 0; clean_dsp: @@ -1063,10 +1059,10 @@ clean_dsp: static void cs35l41_load_firmware(struct cs35l41_hda *cs35l41, bool load) { - if (cs35l41->firmware_running && !load) { + if (cs35l41->cs_dsp.running && !load) { dev_dbg(cs35l41->dev, "Unloading Firmware\n"); cs35l41_shutdown_dsp(cs35l41); - } else if (!cs35l41->firmware_running && load) { + } else if (!cs35l41->cs_dsp.running && load) { dev_dbg(cs35l41->dev, "Loading Firmware\n"); cs35l41_smart_amp(cs35l41); } else { @@ -1346,7 +1342,7 @@ static int cs35l41_hda_bind(struct device *dev, struct device *master, void *mas cs35l41->acpi_subsystem_id, cs35l41->hw_cfg.bst_type, cs35l41->hw_cfg.gpio1.func == CS35l41_VSPK_SWITCH, cs35l41->hw_cfg.spk_pos ? 'R' : 'L', - cs35l41->firmware_running, cs35l41->speaker_id); + cs35l41->cs_dsp.running, cs35l41->speaker_id); return ret; } diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c index 756aec342eab..27d7fbc56b4c 100644 --- a/sound/pci/hda/cs35l56_hda_spi.c +++ b/sound/pci/hda/cs35l56_hda_spi.c @@ -21,6 +21,10 @@ static int cs35l56_hda_spi_probe(struct spi_device *spi) return -ENOMEM; cs35l56->base.dev = &spi->dev; + +#ifdef CS35L56_WAKE_HOLD_TIME_US + cs35l56->base.can_hibernate = true; +#endif cs35l56->base.regmap = devm_regmap_init_spi(spi, &cs35l56_regmap_spi); if (IS_ERR(cs35l56->base.regmap)) { ret = PTR_ERR(cs35l56->base.regmap); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index db90feb49c16..2d1df3654424 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2242,6 +2242,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugs.launchpad.net/bugs/1821663 */ SND_PCI_QUIRK(0x1631, 0xe017, "Packard Bell NEC IMEDIA 5204", 0), + /* KONTRON SinglePC may cause a stall at runtime resume */ + SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), {} }; #endif /* CONFIG_PM */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5618b1d9bfd1..f9ddacfd920e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1987,6 +1987,7 @@ enum { ALC887_FIXUP_ASUS_AUDIO, ALC887_FIXUP_ASUS_HMIC, ALCS1200A_FIXUP_MIC_VREF, + ALC888VD_FIXUP_MIC_100VREF, }; static void alc889_fixup_coef(struct hda_codec *codec, @@ -2540,6 +2541,13 @@ static const struct hda_fixup alc882_fixups[] = { {} } }, + [ALC888VD_FIXUP_MIC_100VREF] = { + .type = HDA_FIXUP_PINCTLS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, PIN_VREF100 }, /* headset mic */ + {} + } + }, }; static const struct snd_pci_quirk alc882_fixup_tbl[] = { @@ -2609,6 +2617,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), + SND_PCI_QUIRK(0x10ec, 0x12d8, "iBase Elo Touch", ALC888VD_FIXUP_MIC_100VREF), SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), @@ -3256,6 +3265,7 @@ static void alc_disable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0x0); alc_update_coef_idx(codec, 0x49, 0x0045, 0x0); @@ -3285,6 +3295,7 @@ static void alc_enable_headset_jack_key(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0236: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); @@ -6496,6 +6507,7 @@ static void alc_combo_jack_hp_jd_restart(struct hda_codec *codec) case 0x10ec0236: case 0x10ec0255: case 0x10ec0256: + case 0x10ec0257: case 0x19e58326: alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */ alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15); @@ -7249,6 +7261,7 @@ enum { ALC290_FIXUP_SUBWOOFER_HSJACK, ALC269_FIXUP_THINKPAD_ACPI, ALC269_FIXUP_DMIC_THINKPAD_ACPI, + ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO, ALC255_FIXUP_ACER_MIC_NO_PRESENCE, ALC255_FIXUP_ASUS_MIC_NO_PRESENCE, ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -7601,6 +7614,14 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_pincfg_U7x7_headset_mic, }, + [ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x03a19020 }, /* headset mic */ + { 0x1b, 0x90170150 }, /* speaker */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -10244,6 +10265,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c index 4569e4f7cf7e..e9993a39f7d0 100644 --- a/sound/soc/codecs/cs35l41-lib.c +++ b/sound/soc/codecs/cs35l41-lib.c @@ -16,6 +16,8 @@ #include <sound/cs35l41.h> +#define CS35L41_FIRMWARE_OLD_VERSION 0x001C00 /* v0.28.0 */ + static const struct reg_default cs35l41_reg[] = { { CS35L41_PWR_CTRL1, 0x00000000 }, { CS35L41_PWR_CTRL2, 0x00000000 }, @@ -1214,7 +1216,7 @@ EXPORT_SYMBOL_GPL(cs35l41_safe_reset); * the PLL Lock interrupt, in the IRQ handler. */ int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l41_boost_type b_type, - int enable, bool firmware_running) + int enable, struct cs_dsp *dsp) { int ret; unsigned int gpio1_func, pad_control, pwr_ctrl1, pwr_ctrl3, int_status, pup_pdn_mask; @@ -1309,7 +1311,7 @@ int cs35l41_global_enable(struct device *dev, struct regmap *regmap, enum cs35l4 } regmap_write(regmap, CS35L41_IRQ1_STATUS1, CS35L41_PUP_DONE_MASK); - if (firmware_running) + if (dsp->running && dsp->fw_id_version > CS35L41_FIRMWARE_OLD_VERSION) ret = cs35l41_set_cspl_mbox_cmd(dev, regmap, CSPL_MBOX_CMD_SPK_OUT_ENABLE); else diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index d0e9128ac6d0..dfb4ce53491b 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -519,11 +519,11 @@ static int cs35l41_main_amp_event(struct snd_soc_dapm_widget *w, ARRAY_SIZE(cs35l41_pup_patch)); ret = cs35l41_global_enable(cs35l41->dev, cs35l41->regmap, cs35l41->hw_cfg.bst_type, - 1, cs35l41->dsp.cs_dsp.running); + 1, &cs35l41->dsp.cs_dsp); break; case SND_SOC_DAPM_POST_PMD: ret = cs35l41_global_enable(cs35l41->dev, cs35l41->regmap, cs35l41->hw_cfg.bst_type, - 0, cs35l41->dsp.cs_dsp.running); + 0, &cs35l41->dsp.cs_dsp); regmap_multi_reg_write_bypassed(cs35l41->regmap, cs35l41_pdn_patch, diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5f6f84837a49..7c7493cb571f 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -79,13 +79,15 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B -#define APM_CPU_PART_POTENZA 0x000 +#define APM_CPU_PART_XGENE 0x000 +#define APM_CPU_VAR_POTENZA 0x00 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 @@ -148,6 +150,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index f7ddd73a8c0f..89d2fc872d9f 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -505,6 +505,38 @@ struct kvm_smccc_filter { #define KVM_HYPERCALL_EXIT_SMC (1U << 0) #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) +/* + * Get feature ID registers userspace writable mask. + * + * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model + * Feature Register 2"): + * + * "The Feature ID space is defined as the System register space in + * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, + * op2=={0-7}." + * + * This covers all currently known R/O registers that indicate + * anything useful feature wise, including the ID registers. + * + * If we ever need to introduce a new range, it will be described as + * such in the range field. + */ +#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ + ({ \ + __u64 __op1 = (op1) & 3; \ + __op1 -= (__op1 == 3); \ + (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ + }) + +#define KVM_ARM_FEATURE_ID_RANGE 0 +#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + +struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h index fd157f46727e..86e556429e0e 100644 --- a/tools/arch/arm64/include/uapi/asm/perf_regs.h +++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h @@ -36,11 +36,13 @@ enum perf_event_arm_regs { PERF_REG_ARM64_LR, PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, + PERF_REG_ARM64_MAX, /* Extended/pseudo registers */ - PERF_REG_ARM64_VG = 46, // SVE Vector Granule - - PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1, - PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1 + PERF_REG_ARM64_VG = 46, /* SVE Vector Granule */ + PERF_REG_ARM64_EXTENDED_MAX }; + +#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG) + #endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile index 7f64b8bb5107..7b42feedf647 100644 --- a/tools/arch/arm64/tools/Makefile +++ b/tools/arch/arm64/tools/Makefile @@ -22,7 +22,7 @@ endif arm64_tools_dir = $(top_srcdir)/arch/arm64/tools arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk -arm64_generated_dir = $(top_srcdir)/tools/arch/arm64/include/generated +arm64_generated_dir = $(OUTPUT)arch/arm64/include/generated arm64_sysreg_defs = $(arm64_generated_dir)/asm/sysreg-defs.h all: $(arm64_sysreg_defs) diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index a73cf01a1606..abe926d43cbe 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc { __u8 reserved[1728]; }; +#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 +#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7 + +#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64 +struct kvm_s390_vm_cpu_uv_feat { + union { + struct { + __u64 : 4; + __u64 ap : 1; /* bit 4 */ + __u64 ap_intr : 1; /* bit 5 */ + __u64 : 58; + }; + __u64 feat; + }; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 798e60b5454b..4af140cf5719 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -308,6 +307,11 @@ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ + +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -380,6 +384,7 @@ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ @@ -438,11 +443,16 @@ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ + /* * BUG word(s) */ @@ -484,5 +494,9 @@ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index fafe9be7a6f4..702d93fdd10e 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -105,6 +105,18 @@ # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) #endif +#ifdef CONFIG_X86_USER_SHADOW_STACK +#define DISABLE_USER_SHSTK 0 +#else +#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31)) +#endif + +#ifdef CONFIG_X86_KERNEL_IBT +#define DISABLE_IBT 0 +#else +#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -120,7 +132,7 @@ #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING) + DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) #define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 @@ -128,7 +140,7 @@ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ DISABLE_ENQCMD) #define DISABLED_MASK17 0 -#define DISABLED_MASK18 0 +#define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1d111350197f..1d51e1850ed0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -222,6 +222,7 @@ #define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 @@ -553,6 +554,7 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_TW_CFG 0xc0011023 #define MSR_AMD64_DE_CFG 0xc0011029 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 @@ -637,12 +639,21 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 + +/* Fam 19h MSRs */ +#define MSR_F19H_UMC_PERF_CTL 0xc0010800 +#define MSR_F19H_UMC_PERF_CTR 0xc0010801 + +/* Zen 2 */ +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 -#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 -#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) - /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 @@ -1112,12 +1123,16 @@ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F -/* AMD-V MSRs */ +/* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + /* Hardware Feedback Interface */ #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index e8d7ebbca1a4..384e2cc6ac19 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -23,9 +23,21 @@ #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 +/* Don't use 0x3001-0x3004 because of old glibcs */ + #define ARCH_GET_UNTAG_MASK 0x4001 #define ARCH_ENABLE_TAGGED_ADDR 0x4002 #define ARCH_GET_MAX_TAG_BITS 0x4003 #define ARCH_FORCE_TAGGED_SVA 0x4004 +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +/* ARCH_SHSTK_ features bits */ +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) + #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h index 156743d399ae..2fd551915c20 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/asm-generic/unaligned.h @@ -8,6 +8,7 @@ */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" #define __get_unaligned_t(type, ptr) ({ \ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 76d946445391..756b013fb832 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) - #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) - #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_map_shadow_stack 453 +__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 794c1d857677..de723566c5ae 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1134,6 +1134,26 @@ extern "C" { #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) #define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) #define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 7000e5910a1d..218edb0a96f8 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -38,13 +38,13 @@ extern "C" { */ /** - * DOC: uevents generated by i915 on it's device node + * DOC: uevents generated by i915 on its device node * * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch - * event from the gpu l3 cache. Additional information supplied is ROW, + * event from the GPU L3 cache. Additional information supplied is ROW, * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep - * track of these events and if a specific cache-line seems to have a - * persistent error remap it with the l3 remapping tool supplied in + * track of these events, and if a specific cache-line seems to have a + * persistent error, remap it with the L3 remapping tool supplied in * intel-gpu-tools. The value supplied with the event is always 1. * * I915_ERROR_UEVENT - Generated upon error detection, currently only via diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index fd1fb0d5389d..7a8f4c290187 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -71,7 +71,8 @@ struct fscrypt_policy_v2 { __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f089ab290978..211b86de35ac 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -264,6 +264,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_SBI 35 #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 +#define KVM_EXIT_LOONGARCH_IOCSR 38 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -336,6 +337,13 @@ struct kvm_run { __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_LOONGARCH_IOCSR */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } iocsr_io; /* KVM_EXIT_HYPERCALL */ struct { __u64 nr; @@ -1192,6 +1200,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_COUNTER_OFFSET 227 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #ifdef KVM_CAP_IRQ_ROUTING @@ -1362,6 +1371,7 @@ struct kvm_dirty_tlb { #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_RISCV 0x8000000000000000ULL +#define KVM_REG_LOONGARCH 0x9000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL @@ -1418,9 +1428,16 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_VFIO_GROUP 1 -#define KVM_DEV_VFIO_GROUP_ADD 1 -#define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_FILE 1 + +#define KVM_DEV_VFIO_FILE_ADD 1 +#define KVM_DEV_VFIO_FILE_DEL 2 + +/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */ +#define KVM_DEV_VFIO_GROUP KVM_DEV_VFIO_FILE + +#define KVM_DEV_VFIO_GROUP_ADD KVM_DEV_VFIO_FILE_ADD +#define KVM_DEV_VFIO_GROUP_DEL KVM_DEV_VFIO_FILE_DEL #define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { @@ -1555,6 +1572,7 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* Available with KVM_CAP_COUNTER_OFFSET */ #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) +#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 8eb0d7b758d2..bb242fdcfe6b 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -100,8 +100,9 @@ enum fsconfig_command { FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ }; /* diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index f5c48b61ab62..649560c685f1 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -219,4 +219,12 @@ */ #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) +/* Get the group for the descriptor table including driver & device areas + * of a virtqueue: read index, write group in num. + * The virtqueue index is stored in the index field of vhost_vring_state. + * The group ID of the descriptor table for this specific virtqueue + * is returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) #endif diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 1da7f4b91b4f..dc42de1785ce 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,3 +1,5 @@ +arch/arm64/tools/gen-sysreg.awk +arch/arm64/tools/sysreg tools/perf tools/arch tools/scripts diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d88da787e815..058c9aecf608 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -434,6 +434,21 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools +ifneq ($(OUTPUT),) + arm64_gen_sysreg_outdir := $(OUTPUT) +else + arm64_gen_sysreg_outdir := $(CURDIR) +endif + +arm64-sysreg-defs: FORCE + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) + +arm64-sysreg-defs-clean: + $(call QUIET_CLEAN,arm64-sysreg-defs) + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ + clean > /dev/null + beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic @@ -450,15 +465,6 @@ drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh # Create output directory if not already present _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)') -arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools - -arm64-sysreg-defs: FORCE - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) - -arm64-sysreg-defs-clean: - $(call QUIET_CLEAN,arm64-sysreg-defs) - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) clean > /dev/null - $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 80be0e98ea0c..116ff501bf92 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -367,3 +367,7 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +453 n64 map_shadow_stack sys_map_shadow_stack +454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index e1412519b4ad..7fab411378f2 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -539,3 +539,7 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_ni_syscall +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index cc0bc144b661..86fec9b080f6 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -455,3 +455,7 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 2a62eaf30d69..8cb8bf68721c 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,9 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index f007a9b27065..0092b9b39611 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -1643,7 +1643,7 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_ /* * pid */ - ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id); + ret += printf(" %*" PRIu64 " ", PRINT_PID_WIDTH, work->id); /* * tgid diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index fb661c48992f..988473bf907a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -347,7 +347,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE -CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -I$(srctree)/tools/arch/arm64/include/generated/ +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/ $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index e105245eb905..f1716c089c99 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -12,6 +12,7 @@ #include <linux/zalloc.h> #include <linux/string.h> #include <bpf/bpf.h> +#include <inttypes.h> #include "bpf_skel/lock_contention.skel.h" #include "bpf_skel/lock_data.h" @@ -250,7 +251,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (cgrp) return cgrp->name; - snprintf(name_buf, sizeof(name_buf), "cgroup:%lu", cgrp_id); + snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); return name_buf; } diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index fc6b2954e8f5..59993fc9c0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> #include <test_progs.h> #include <network_helpers.h> +#include "tailcall_poke.skel.h" + /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1105,6 +1108,85 @@ out: bpf_object__close(tgt_obj); } +#define JMP_TABLE "/sys/fs/bpf/jmp_table" + +static int poke_thread_exit; + +static void *poke_update(void *arg) +{ + __u32 zero = 0, prog1_fd, prog2_fd, map_fd; + struct tailcall_poke *call = arg; + + map_fd = bpf_map__fd(call->maps.jmp_table); + prog1_fd = bpf_program__fd(call->progs.call1); + prog2_fd = bpf_program__fd(call->progs.call2); + + while (!poke_thread_exit) { + bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY); + bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY); + } + + return NULL; +} + +/* + * We are trying to hit prog array update during another program load + * that shares the same prog array map. + * + * For that we share the jmp_table map between two skeleton instances + * by pinning the jmp_table to same path. Then first skeleton instance + * periodically updates jmp_table in 'poke update' thread while we load + * the second skeleton instance in the main thread. + */ +static void test_tailcall_poke(void) +{ + struct tailcall_poke *call, *test; + int err, cnt = 10; + pthread_t thread; + + unlink(JMP_TABLE); + + call = tailcall_poke__open_and_load(); + if (!ASSERT_OK_PTR(call, "tailcall_poke__open")) + return; + + err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = pthread_create(&thread, NULL, poke_update, call); + if (!ASSERT_OK(err, "new toggler")) + goto out; + + while (cnt--) { + test = tailcall_poke__open(); + if (!ASSERT_OK_PTR(test, "tailcall_poke__open")) + break; + + err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) { + tailcall_poke__destroy(test); + break; + } + + bpf_program__set_autoload(test->progs.test, true); + bpf_program__set_autoload(test->progs.call1, false); + bpf_program__set_autoload(test->progs.call2, false); + + err = tailcall_poke__load(test); + tailcall_poke__destroy(test); + if (!ASSERT_OK(err, "tailcall_poke__load")) + break; + } + + poke_thread_exit = 1; + ASSERT_OK(pthread_join(thread, NULL), "pthread_join"); + +out: + bpf_map__unpin(call->maps.jmp_table, JMP_TABLE); + tailcall_poke__destroy(call); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1139,4 +1221,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_fentry_fexit(); if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) test_tailcall_bpf2bpf_fentry_entry(); + if (test__start_subtest("tailcall_poke")) + test_tailcall_poke(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c new file mode 100644 index 000000000000..c78b94b75e83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test, int a) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call1, int a) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call2, int a) +{ + return 0; +} diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 050e9751321c..ad9202335656 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -293,15 +293,13 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; unsigned long nr = nbits / 2; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) - if (!(i % 2)) - set_bit(i, (unsigned long *)bitmap); - ASSERT_EQ(nr, count); + for (i = 0; i < nbits; i += 2) + set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, bitmap, &out_dirty); @@ -311,9 +309,10 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ + for (i = 0; i < nbits; i++) { ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); - ASSERT_EQ(count, out_dirty); + } memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a5963ab9215b..52c59bad7213 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -18,12 +18,13 @@ else endif ifeq ($(ARCH),arm64) -arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/ +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/ CFLAGS += -I$(GEN_HDRS) $(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) + $(MAKE) -C $(arm64_tools_dir) O=$(tools_dir) endif LIBKVM += lib/assert.c |